spellchecker: improve custom dictionary support

Improves the custom dictionary support by making it more generic
and moving it to the base class. Also makes PyEnchant uses a custom
PWL (Personal Word List) file within manuskript's resources directory
and made pyspellchecker detect available languages automatically.
This commit is contained in:
Youness Alaoui 2019-02-23 01:12:53 -05:00 committed by Curtis Gedak
parent 20c5586a6c
commit e1b063a953

View file

@ -1,5 +1,5 @@
import os, gzip, json import os, gzip, json, glob
from PyQt5.QtCore import QLocale from PyQt5.QtCore import QLocale
from collections import OrderedDict from collections import OrderedDict
from manuskript.functions import writablePath from manuskript.functions import writablePath
@ -23,6 +23,10 @@ class Spellchecker:
def __init__(self): def __init__(self):
pass pass
@staticmethod
def registerImplementation(impl):
Spellchecker.implementations.append(impl)
@staticmethod @staticmethod
def isInstalled(): def isInstalled():
for impl in Spellchecker.implementations: for impl in Spellchecker.implementations:
@ -73,14 +77,6 @@ class Spellchecker:
return urls.get(lib, None) return urls.get(lib, None)
return urls return urls
@staticmethod
def getResourcesPath(library):
path = os.path.join(writablePath(), "resources", "dictionaries", library)
if not os.path.exists(path):
os.makedirs(path)
return path
@staticmethod @staticmethod
def getDictionary(dictionary): def getDictionary(dictionary):
if not dictionary: if not dictionary:
@ -98,7 +94,7 @@ class Spellchecker:
d = Spellchecker.dictionaries.get(dictionary, None) d = Spellchecker.dictionaries.get(dictionary, None)
if d is None: if d is None:
for impl in Spellchecker.implementations: for impl in Spellchecker.implementations:
if lib == impl.getLibraryName(): if impl.isInstalled() and lib == impl.getLibraryName():
d = impl(name) d = impl(name)
Spellchecker.dictionaries[dictionary] = d Spellchecker.dictionaries[dictionary] = d
break break
@ -109,11 +105,24 @@ class Spellchecker:
class BasicDictionary: class BasicDictionary:
def __init__(self, name): def __init__(self, name):
pass self._lang = name
if not self._lang:
self._lang = self.getDefaultDictionary()
self._customDict = set()
customPath = self.getCustomDictionaryPath()
try:
with gzip.open(customPath, "rt", encoding='utf-8') as f:
self._customDict = set(json.loads(f.read()))
for word in self._customDict:
self._dict.create_dictionary_entry(word, self.CUSTOM_COUNT)
except:
# If error loading the file, overwrite with empty dictionary
self._saveCustomDict()
@property @property
def name(self): def name(self):
raise NotImplemented return self._lang
@staticmethod @staticmethod
def getLibraryName(): def getLibraryName():
@ -142,13 +151,35 @@ class BasicDictionary:
raise NotImplemented raise NotImplemented
def isCustomWord(self, word): def isCustomWord(self, word):
raise NotImplemented return word.lower() in self._customDict
def addWord(self, word): def addWord(self, word):
raise NotImplemented word = word.lower()
if not word in self._customDict:
self._customDict.add(word)
self._saveCustomDict()
def removeWord(self, word): def removeWord(self, word):
raise NotImplemented word = word.lower()
if word in self._customDict:
self._customDict.remove(word)
self._saveCustomDict()
@classmethod
def getResourcesPath(cls):
path = os.path.join(writablePath(), "resources", "dictionaries", cls.getLibraryName())
if not os.path.exists(path):
os.makedirs(path)
return path
def getCustomDictionaryPath(self):
return os.path.join(self.getResourcesPath(), "{}.json.gz".format(self._lang))
def _saveCustomDict(self):
customPath = self.getCustomDictionaryPath()
with gzip.open(customPath, "wt") as f:
f.write(json.dumps(list(self._customDict)))
class EnchantDictionary(BasicDictionary): class EnchantDictionary(BasicDictionary):
@ -157,11 +188,7 @@ class EnchantDictionary(BasicDictionary):
if not (self._lang and enchant.dict_exists(self._lang)): if not (self._lang and enchant.dict_exists(self._lang)):
self._lang = self.getDefaultDictionary() self._lang = self.getDefaultDictionary()
self._dict = enchant.Dict(self._lang) self._dict = enchant.DictWithPWL(self._lang, self.getCustomDictionaryPath())
@property
def name(self):
return self._lang
@staticmethod @staticmethod
def getLibraryName(): def getLibraryName():
@ -211,36 +238,19 @@ class EnchantDictionary(BasicDictionary):
def removeWord(self, word): def removeWord(self, word):
self._dict.remove(word) self._dict.remove(word)
Spellchecker.implementations.append(EnchantDictionary)
def getCustomDictionaryPath(self):
return os.path.join(self.getResourcesPath(), "{}.txt".format(self.name))
Spellchecker.implementations.append(EnchantDictionary)
class PySpellcheckerDictionary(BasicDictionary): class PySpellcheckerDictionary(BasicDictionary):
def __init__(self, name): def __init__(self, name):
self._lang = name BasicDictionary.__init__(self, name)
if not self._lang:
self._lang = self.getDefaultDictionary()
self._dict = pyspellchecker.SpellChecker(self._lang) self._dict = pyspellchecker.SpellChecker(self.name)
self._customDict = None self._dict.word_frequency.load_words(self._customDict)
customPath = self.getCustomDictionaryPath()
try:
self._customDict = pyspellchecker.SpellChecker(local_dictionary=customPath)
self._dict.word_frequency.load_dictionary(customPath)
except:
# If error loading the file, overwrite with empty dictionary
with gzip.open(customPath, "wt") as f:
f.write(json.dumps({}))
self._customDict = pyspellchecker.SpellChecker(local_dictionary=customPath)
self._dict.word_frequency.load_dictionary(customPath)
def getCustomDictionaryPath(self):
return os.path.join(Spellchecker.getResourcesPath(self.getLibraryName()), "{}.json.gz".format(self._lang))
@property
def name(self):
return self._lang
@staticmethod @staticmethod
def getLibraryName(): def getLibraryName():
@ -257,9 +267,11 @@ class PySpellcheckerDictionary(BasicDictionary):
@staticmethod @staticmethod
def availableDictionaries(): def availableDictionaries():
if PySpellcheckerDictionary.isInstalled(): if PySpellcheckerDictionary.isInstalled():
# TODO: If pyspellchecker eventually adds a way to get this list dictionaries = []
# programmatically or if the list changes, we need to update it here files = glob.glob(os.path.join(pyspellchecker.__path__[0], "resources", "*.json.gz"))
return ["de", "en", "es", "fr", "pt"] for file in files:
dictionaries.append(os.path.basename(file)[:-8])
return dictionaries
return [] return []
@staticmethod @staticmethod
@ -284,17 +296,14 @@ class PySpellcheckerDictionary(BasicDictionary):
candidates.remove(word) candidates.remove(word)
return candidates return candidates
def isCustomWord(self, word):
return len(self._customDict.known([word])) > 0
def addWord(self, word): def addWord(self, word):
self._dict.word_frequency.add(word) BasicDictionary.addWord(self, word)
self._customDict.word_frequency.add(word) self._dict.word_frequency.add(word.lower())
self._customDict.export(self.getCustomDictionaryPath(), gzipped=True)
def removeWord(self, word): def removeWord(self, word):
self._dict.word_frequency.remove(word) BasicDictionary.removeWord(self, word)
self._customDict.word_frequency.remove(word) self._dict.word_frequency.remove(word.lower())
self._customDict.export(self.getCustomDictionaryPath(), gzipped=True)
Spellchecker.registerImplementation(PySpellcheckerDictionary)
Spellchecker.implementations.append(PySpellcheckerDictionary)