mirror of
https://github.com/olivierkes/manuskript.git
synced 2024-05-12 08:52:30 +12:00
Implement async frequency analyzing
Signed-off-by: TheJackiMonster <thejackimonster@gmail.com>
This commit is contained in:
parent
13b6e26b13
commit
07a8197bcd
|
@ -4,9 +4,22 @@
|
|||
import gi
|
||||
|
||||
gi.require_version("Gtk", "3.0")
|
||||
from gi.repository import GObject, Gtk, Handy
|
||||
from gi.repository import GObject, Gtk
|
||||
|
||||
import re
|
||||
|
||||
from enum import Enum, unique
|
||||
|
||||
from manuskript.data import OutlineFolder, OutlineText, OutlineState, Project
|
||||
from manuskript.ui.abstractDialog import AbstractDialog
|
||||
from manuskript.util import validString, validInt
|
||||
|
||||
|
||||
@unique
|
||||
class AnalyzeStatus(Enum):
|
||||
NONE = 0
|
||||
WORDS = 1
|
||||
PHRASES = 2
|
||||
|
||||
|
||||
class FrequencyWindow(AbstractDialog):
|
||||
|
@ -14,6 +27,11 @@ class FrequencyWindow(AbstractDialog):
|
|||
def __init__(self, mainWindow):
|
||||
AbstractDialog.__init__(self, mainWindow, "ui/frequency.glade", "frequency_window")
|
||||
|
||||
self.analyzeStatus = AnalyzeStatus.NONE
|
||||
self.frequencies = dict()
|
||||
self.outlineCompletion = list()
|
||||
self.analyzeCompleted = 0
|
||||
|
||||
self.headerBar = None
|
||||
self.back = None
|
||||
self.wordLeaflet = None
|
||||
|
@ -30,6 +48,10 @@ class FrequencyWindow(AbstractDialog):
|
|||
self.wordsFrequencyStore = None
|
||||
self.phrasesFrequencyStore = None
|
||||
|
||||
self.phraseMinimum = None
|
||||
self.phraseMaximum = None
|
||||
self.wordSize = None
|
||||
|
||||
def initWindow(self, builder, window):
|
||||
self.headerBar = builder.get_object("header_bar")
|
||||
self.back = builder.get_object("back")
|
||||
|
@ -47,6 +69,10 @@ class FrequencyWindow(AbstractDialog):
|
|||
self.wordsFrequencyStore = builder.get_object("words_frequency_store")
|
||||
self.phrasesFrequencyStore = builder.get_object("phrases_frequency_store")
|
||||
|
||||
self.phraseMinimum = builder.get_object("phrase_minimum")
|
||||
self.phraseMaximum = builder.get_object("phrase_maximum")
|
||||
self.wordSize = builder.get_object("word_size")
|
||||
|
||||
self.wordLeaflet.bind_property("folded", self.back, "visible",
|
||||
GObject.BindingFlags.SYNC_CREATE)
|
||||
self.wordLeaflet.bind_property("folded", self.headerBar, "show-close-button",
|
||||
|
@ -59,18 +85,123 @@ class FrequencyWindow(AbstractDialog):
|
|||
self.excludeWordSelection.connect("changed", self._excludeWordSelectionChanged)
|
||||
self.removeWord.connect("clicked", self._removeWordClicked)
|
||||
self.addWord.connect("clicked", self._addWordClicked)
|
||||
|
||||
def getProject(self) -> Project:
|
||||
return self.mainWindow.getProject()
|
||||
|
||||
def _backClicked(self, button: Gtk.Button):
|
||||
if self.wordLeaflet.get_visible_child_name() == "wordlist_view":
|
||||
self.wordLeaflet.set_visible_child_name("wordfilter_view")
|
||||
else:
|
||||
self.hide()
|
||||
|
||||
def __analyzeOutlineText(self, outlineText: OutlineText):
|
||||
if self.analyzeStatus == AnalyzeStatus.WORDS:
|
||||
word_size = validInt(self.wordSize.get_value())
|
||||
|
||||
patterns = [re.compile(r"\w{" + str(word_size) + r",}")]
|
||||
elif self.analyzeStatus == AnalyzeStatus.PHRASES:
|
||||
patterns = []
|
||||
|
||||
phrase_minimum = validInt(self.phraseMinimum.get_value())
|
||||
phrase_maximum = validInt(self.phraseMaximum.get_value())
|
||||
|
||||
for n in range(phrase_minimum, phrase_maximum + 1):
|
||||
patterns.append(re.compile(r"\w+" + r"\s+\w+" * (n - 1)))
|
||||
else:
|
||||
return
|
||||
|
||||
for pattern in patterns:
|
||||
for match in pattern.findall(outlineText.text):
|
||||
if match in self.frequencies:
|
||||
self.frequencies[match] = self.frequencies[match] + 1
|
||||
else:
|
||||
self.frequencies[match] = 1
|
||||
|
||||
def __completeOutlineItem(self):
|
||||
outline_item = self.outlineCompletion.pop(0)
|
||||
|
||||
if outline_item.state != OutlineState.COMPLETE:
|
||||
outline_item.load(False)
|
||||
|
||||
if isinstance(outline_item, OutlineFolder):
|
||||
for item in outline_item:
|
||||
self.outlineCompletion.append(item)
|
||||
elif isinstance(outline_item, OutlineText):
|
||||
self.__analyzeOutlineText(outline_item)
|
||||
|
||||
self.analyzeCompleted = self.analyzeCompleted + 1
|
||||
|
||||
incomplete = len(self.outlineCompletion)
|
||||
complete = self.analyzeCompleted
|
||||
fraction = 1.0 * complete / (complete + incomplete)
|
||||
|
||||
if self.analyzeStatus == AnalyzeStatus.WORDS:
|
||||
self.wordsProgress.set_fraction(fraction)
|
||||
elif self.analyzeStatus == AnalyzeStatus.PHRASES:
|
||||
self.phrasesProgress.set_fraction(fraction)
|
||||
|
||||
if incomplete > 0:
|
||||
return True
|
||||
|
||||
if self.analyzeStatus == AnalyzeStatus.WORDS:
|
||||
self.wordsFrequencyStore.clear()
|
||||
|
||||
for word, frequency in self.frequencies.items():
|
||||
tree_iter = self.wordsFrequencyStore.append()
|
||||
|
||||
if tree_iter is None:
|
||||
continue
|
||||
|
||||
self.wordsFrequencyStore.set_value(tree_iter, 0, validString(word))
|
||||
self.wordsFrequencyStore.set_value(tree_iter, 1, validInt(frequency))
|
||||
|
||||
self.wordLeaflet.set_visible_child_name("wordlist_view")
|
||||
elif self.analyzeStatus == AnalyzeStatus.PHRASES:
|
||||
self.phrasesFrequencyStore.clear()
|
||||
|
||||
for phrase, frequency in self.frequencies.items():
|
||||
tree_iter = self.phrasesFrequencyStore.append()
|
||||
|
||||
if tree_iter is None:
|
||||
continue
|
||||
|
||||
self.phrasesFrequencyStore.set_value(tree_iter, 0, validString(phrase))
|
||||
self.phrasesFrequencyStore.set_value(tree_iter, 1, validInt(frequency))
|
||||
|
||||
self.analyzeStatus = AnalyzeStatus.NONE
|
||||
self.frequencies = dict()
|
||||
return False
|
||||
|
||||
def analyze(self, status: AnalyzeStatus):
|
||||
if self.analyzeStatus != AnalyzeStatus.NONE:
|
||||
return
|
||||
|
||||
project = self.getProject()
|
||||
|
||||
if project is None:
|
||||
return
|
||||
|
||||
self.analyzeStatus = status
|
||||
|
||||
if len(self.outlineCompletion) == 0:
|
||||
self.frequencies = dict()
|
||||
self.analyzeCompleted = 0
|
||||
|
||||
if self.analyzeStatus == AnalyzeStatus.WORDS:
|
||||
self.wordsProgress.set_fraction(0.0)
|
||||
elif self.analyzeStatus == AnalyzeStatus.PHRASES:
|
||||
self.phrasesProgress.set_fraction(0.0)
|
||||
|
||||
GObject.idle_add(self.__completeOutlineItem)
|
||||
for outline_item in project.outline:
|
||||
self.outlineCompletion.append(outline_item)
|
||||
|
||||
def _analyzeWordsClicked(self, button: Gtk.Button):
|
||||
self.wordLeaflet.set_visible_child_name("wordlist_view")
|
||||
self.analyze(AnalyzeStatus.WORDS)
|
||||
|
||||
def _analyzePhrasesClicked(self, button: Gtk.Button):
|
||||
pass
|
||||
self.analyze(AnalyzeStatus.PHRASES)
|
||||
|
||||
def _excludeWordSelectionChanged(self, selection: Gtk.TreeSelection):
|
||||
model, tree_iter = selection.get_selected()
|
||||
|
|
|
@ -365,6 +365,8 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
|
|||
<property name="visible">True</property>
|
||||
<property name="can-focus">True</property>
|
||||
<property name="model">words_frequency_store</property>
|
||||
<property name="search-column">0</property>
|
||||
<property name="show-expanders">False</property>
|
||||
<child internal-child="selection">
|
||||
<object class="GtkTreeSelection"/>
|
||||
</child>
|
||||
|
@ -373,6 +375,7 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
|
|||
<property name="title" translatable="yes">Word</property>
|
||||
<property name="expand">True</property>
|
||||
<property name="sort-indicator">True</property>
|
||||
<property name="sort-column-id">0</property>
|
||||
<child>
|
||||
<object class="GtkCellRendererText"/>
|
||||
<attributes>
|
||||
|
@ -386,6 +389,7 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
|
|||
<property name="title" translatable="yes">Frequency</property>
|
||||
<property name="expand">True</property>
|
||||
<property name="sort-indicator">True</property>
|
||||
<property name="sort-column-id">1</property>
|
||||
<child>
|
||||
<object class="GtkCellRendererText"/>
|
||||
<attributes>
|
||||
|
@ -571,6 +575,7 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
|
|||
<property name="title" translatable="yes">Phrase</property>
|
||||
<property name="expand">True</property>
|
||||
<property name="sort-indicator">True</property>
|
||||
<property name="sort-column-id">0</property>
|
||||
<child>
|
||||
<object class="GtkCellRendererText"/>
|
||||
<attributes>
|
||||
|
@ -584,6 +589,7 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
|
|||
<property name="title" translatable="yes">Frequency</property>
|
||||
<property name="expand">True</property>
|
||||
<property name="sort-indicator">True</property>
|
||||
<property name="sort-column-id">1</property>
|
||||
<child>
|
||||
<object class="GtkCellRendererText"/>
|
||||
<attributes>
|
||||
|
|
Loading…
Reference in a new issue