Implement async frequency analyzing

Signed-off-by: TheJackiMonster <thejackimonster@gmail.com>
This commit is contained in:
TheJackiMonster 2023-06-08 18:46:30 +02:00
parent 13b6e26b13
commit 07a8197bcd
No known key found for this signature in database
GPG key ID: D850A5F772E880F9
2 changed files with 140 additions and 3 deletions

View file

@ -4,9 +4,22 @@
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import GObject, Gtk, Handy
from gi.repository import GObject, Gtk
import re
from enum import Enum, unique
from manuskript.data import OutlineFolder, OutlineText, OutlineState, Project
from manuskript.ui.abstractDialog import AbstractDialog
from manuskript.util import validString, validInt
@unique
class AnalyzeStatus(Enum):
NONE = 0
WORDS = 1
PHRASES = 2
class FrequencyWindow(AbstractDialog):
@ -14,6 +27,11 @@ class FrequencyWindow(AbstractDialog):
def __init__(self, mainWindow):
AbstractDialog.__init__(self, mainWindow, "ui/frequency.glade", "frequency_window")
self.analyzeStatus = AnalyzeStatus.NONE
self.frequencies = dict()
self.outlineCompletion = list()
self.analyzeCompleted = 0
self.headerBar = None
self.back = None
self.wordLeaflet = None
@ -30,6 +48,10 @@ class FrequencyWindow(AbstractDialog):
self.wordsFrequencyStore = None
self.phrasesFrequencyStore = None
self.phraseMinimum = None
self.phraseMaximum = None
self.wordSize = None
def initWindow(self, builder, window):
self.headerBar = builder.get_object("header_bar")
self.back = builder.get_object("back")
@ -47,6 +69,10 @@ class FrequencyWindow(AbstractDialog):
self.wordsFrequencyStore = builder.get_object("words_frequency_store")
self.phrasesFrequencyStore = builder.get_object("phrases_frequency_store")
self.phraseMinimum = builder.get_object("phrase_minimum")
self.phraseMaximum = builder.get_object("phrase_maximum")
self.wordSize = builder.get_object("word_size")
self.wordLeaflet.bind_property("folded", self.back, "visible",
GObject.BindingFlags.SYNC_CREATE)
self.wordLeaflet.bind_property("folded", self.headerBar, "show-close-button",
@ -59,18 +85,123 @@ class FrequencyWindow(AbstractDialog):
self.excludeWordSelection.connect("changed", self._excludeWordSelectionChanged)
self.removeWord.connect("clicked", self._removeWordClicked)
self.addWord.connect("clicked", self._addWordClicked)
def getProject(self) -> Project:
return self.mainWindow.getProject()
def _backClicked(self, button: Gtk.Button):
if self.wordLeaflet.get_visible_child_name() == "wordlist_view":
self.wordLeaflet.set_visible_child_name("wordfilter_view")
else:
self.hide()
def __analyzeOutlineText(self, outlineText: OutlineText):
if self.analyzeStatus == AnalyzeStatus.WORDS:
word_size = validInt(self.wordSize.get_value())
patterns = [re.compile(r"\w{" + str(word_size) + r",}")]
elif self.analyzeStatus == AnalyzeStatus.PHRASES:
patterns = []
phrase_minimum = validInt(self.phraseMinimum.get_value())
phrase_maximum = validInt(self.phraseMaximum.get_value())
for n in range(phrase_minimum, phrase_maximum + 1):
patterns.append(re.compile(r"\w+" + r"\s+\w+" * (n - 1)))
else:
return
for pattern in patterns:
for match in pattern.findall(outlineText.text):
if match in self.frequencies:
self.frequencies[match] = self.frequencies[match] + 1
else:
self.frequencies[match] = 1
def __completeOutlineItem(self):
outline_item = self.outlineCompletion.pop(0)
if outline_item.state != OutlineState.COMPLETE:
outline_item.load(False)
if isinstance(outline_item, OutlineFolder):
for item in outline_item:
self.outlineCompletion.append(item)
elif isinstance(outline_item, OutlineText):
self.__analyzeOutlineText(outline_item)
self.analyzeCompleted = self.analyzeCompleted + 1
incomplete = len(self.outlineCompletion)
complete = self.analyzeCompleted
fraction = 1.0 * complete / (complete + incomplete)
if self.analyzeStatus == AnalyzeStatus.WORDS:
self.wordsProgress.set_fraction(fraction)
elif self.analyzeStatus == AnalyzeStatus.PHRASES:
self.phrasesProgress.set_fraction(fraction)
if incomplete > 0:
return True
if self.analyzeStatus == AnalyzeStatus.WORDS:
self.wordsFrequencyStore.clear()
for word, frequency in self.frequencies.items():
tree_iter = self.wordsFrequencyStore.append()
if tree_iter is None:
continue
self.wordsFrequencyStore.set_value(tree_iter, 0, validString(word))
self.wordsFrequencyStore.set_value(tree_iter, 1, validInt(frequency))
self.wordLeaflet.set_visible_child_name("wordlist_view")
elif self.analyzeStatus == AnalyzeStatus.PHRASES:
self.phrasesFrequencyStore.clear()
for phrase, frequency in self.frequencies.items():
tree_iter = self.phrasesFrequencyStore.append()
if tree_iter is None:
continue
self.phrasesFrequencyStore.set_value(tree_iter, 0, validString(phrase))
self.phrasesFrequencyStore.set_value(tree_iter, 1, validInt(frequency))
self.analyzeStatus = AnalyzeStatus.NONE
self.frequencies = dict()
return False
def analyze(self, status: AnalyzeStatus):
if self.analyzeStatus != AnalyzeStatus.NONE:
return
project = self.getProject()
if project is None:
return
self.analyzeStatus = status
if len(self.outlineCompletion) == 0:
self.frequencies = dict()
self.analyzeCompleted = 0
if self.analyzeStatus == AnalyzeStatus.WORDS:
self.wordsProgress.set_fraction(0.0)
elif self.analyzeStatus == AnalyzeStatus.PHRASES:
self.phrasesProgress.set_fraction(0.0)
GObject.idle_add(self.__completeOutlineItem)
for outline_item in project.outline:
self.outlineCompletion.append(outline_item)
def _analyzeWordsClicked(self, button: Gtk.Button):
self.wordLeaflet.set_visible_child_name("wordlist_view")
self.analyze(AnalyzeStatus.WORDS)
def _analyzePhrasesClicked(self, button: Gtk.Button):
pass
self.analyze(AnalyzeStatus.PHRASES)
def _excludeWordSelectionChanged(self, selection: Gtk.TreeSelection):
model, tree_iter = selection.get_selected()

View file

@ -365,6 +365,8 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
<property name="visible">True</property>
<property name="can-focus">True</property>
<property name="model">words_frequency_store</property>
<property name="search-column">0</property>
<property name="show-expanders">False</property>
<child internal-child="selection">
<object class="GtkTreeSelection"/>
</child>
@ -373,6 +375,7 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
<property name="title" translatable="yes">Word</property>
<property name="expand">True</property>
<property name="sort-indicator">True</property>
<property name="sort-column-id">0</property>
<child>
<object class="GtkCellRendererText"/>
<attributes>
@ -386,6 +389,7 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
<property name="title" translatable="yes">Frequency</property>
<property name="expand">True</property>
<property name="sort-indicator">True</property>
<property name="sort-column-id">1</property>
<child>
<object class="GtkCellRendererText"/>
<attributes>
@ -571,6 +575,7 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
<property name="title" translatable="yes">Phrase</property>
<property name="expand">True</property>
<property name="sort-indicator">True</property>
<property name="sort-column-id">0</property>
<child>
<object class="GtkCellRendererText"/>
<attributes>
@ -584,6 +589,7 @@ along with Manuskript. If not, see <http://www.gnu.org/licenses/>.
<property name="title" translatable="yes">Frequency</property>
<property name="expand">True</property>
<property name="sort-indicator">True</property>
<property name="sort-column-id">1</property>
<child>
<object class="GtkCellRendererText"/>
<attributes>