mirror of
https://github.com/olivierkes/manuskript.git
synced 2024-06-26 18:20:53 +12:00
Add support for 6.3.8 which has delete_dictionary_entry and do not use gzipped pickle. Also give higher priority to symspellpy vs pyspellchecker. List symspellpy dictionaries by order of cached vs non-cached. symspellpy 6.3.8 is now the minimum version required and add support for showing that information to the user. Also add support for spellcheck libraries that are installed but without dicts.
428 lines
13 KiB
Python
428 lines
13 KiB
Python
|
|
import os, gzip, json, glob
|
|
from PyQt5.QtCore import QLocale
|
|
from collections import OrderedDict
|
|
from manuskript.functions import writablePath
|
|
|
|
try:
|
|
import enchant
|
|
except ImportError:
|
|
enchant = None
|
|
|
|
try:
|
|
import spellchecker as pyspellchecker
|
|
except ImportError:
|
|
pyspellchecker = None
|
|
|
|
SYMSPELLPY_MIN_VERSION = "6.3.8"
|
|
try:
|
|
import symspellpy
|
|
import distutils.version
|
|
|
|
if distutils.version.LooseVersion(symspellpy.__version__) < SYMSPELLPY_MIN_VERSION:
|
|
symspellpy = None
|
|
|
|
except ImportError:
|
|
symspellpy = None
|
|
|
|
|
|
class Spellchecker:
|
|
dictionaries = {}
|
|
# In order of priority
|
|
implementations = []
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
@staticmethod
|
|
def registerImplementation(impl):
|
|
Spellchecker.implementations.append(impl)
|
|
|
|
@staticmethod
|
|
def isInstalled():
|
|
for impl in Spellchecker.implementations:
|
|
if impl.isInstalled():
|
|
return True
|
|
return False
|
|
|
|
@staticmethod
|
|
def supportedLibraries():
|
|
libs = OrderedDict()
|
|
for impl in Spellchecker.implementations:
|
|
libs[impl.getLibraryName()] = impl.getLibraryRequirement()
|
|
return libs
|
|
|
|
@staticmethod
|
|
def availableLibraries():
|
|
ret = []
|
|
for impl in Spellchecker.implementations:
|
|
if impl.isInstalled():
|
|
ret.append(impl.getLibraryName())
|
|
return ret
|
|
|
|
@staticmethod
|
|
def availableDictionaries():
|
|
dictionaries = OrderedDict()
|
|
for impl in Spellchecker.implementations:
|
|
if impl.isInstalled():
|
|
dictionaries[impl.getLibraryName()] = impl.availableDictionaries()
|
|
return dictionaries
|
|
|
|
@staticmethod
|
|
def normalizeDictName(lib, dictionary):
|
|
return "{}:{}".format(lib, dictionary)
|
|
|
|
@staticmethod
|
|
def getDefaultDictionary():
|
|
for impl in Spellchecker.implementations:
|
|
default = impl.getDefaultDictionary()
|
|
if default:
|
|
return Spellchecker.normalizeDictName(impl.getLibraryName(), default)
|
|
return None
|
|
|
|
@staticmethod
|
|
def getLibraryURL(lib=None):
|
|
urls = {}
|
|
for impl in Spellchecker.implementations:
|
|
urls[impl.getLibraryName()] = impl.getLibraryURL()
|
|
if lib:
|
|
return urls.get(lib, None)
|
|
return urls
|
|
|
|
@staticmethod
|
|
def getDictionary(dictionary):
|
|
if not dictionary:
|
|
dictionary = Spellchecker.getDefaultDictionary()
|
|
if not dictionary:
|
|
return None
|
|
|
|
values = dictionary.split(":", 1)
|
|
if len(values) == 1:
|
|
(lib, name) = (Spellchecker.implementations[0].getLibraryName(), dictionary)
|
|
dictionary = Spellchecker.normalizeDictName(lib, name)
|
|
else:
|
|
(lib, name) = values
|
|
try:
|
|
d = Spellchecker.dictionaries.get(dictionary, None)
|
|
if d is None:
|
|
for impl in Spellchecker.implementations:
|
|
if impl.isInstalled() and lib == impl.getLibraryName():
|
|
d = impl(name)
|
|
Spellchecker.dictionaries[dictionary] = d
|
|
break
|
|
return d
|
|
except Exception as e:
|
|
pass
|
|
return None
|
|
|
|
class BasicDictionary:
|
|
def __init__(self, name):
|
|
self._lang = name
|
|
if not self._lang:
|
|
self._lang = self.getDefaultDictionary()
|
|
|
|
self._customDict = set()
|
|
customPath = self.getCustomDictionaryPath()
|
|
try:
|
|
with gzip.open(customPath, "rt", encoding='utf-8') as f:
|
|
self._customDict = set(json.loads(f.read()))
|
|
for word in self._customDict:
|
|
self._dict.create_dictionary_entry(word, self.CUSTOM_COUNT)
|
|
except:
|
|
# If error loading the file, overwrite with empty dictionary
|
|
self._saveCustomDict()
|
|
|
|
@property
|
|
def name(self):
|
|
return self._lang
|
|
|
|
@staticmethod
|
|
def getLibraryName():
|
|
raise NotImplemented
|
|
|
|
@staticmethod
|
|
def getLibraryRequirement():
|
|
return None
|
|
|
|
@staticmethod
|
|
def getLibraryURL():
|
|
raise NotImplemented
|
|
|
|
@staticmethod
|
|
def isInstalled():
|
|
raise NotImplemented
|
|
|
|
@staticmethod
|
|
def getDefaultDictionary():
|
|
raise NotImplemented
|
|
|
|
@staticmethod
|
|
def availableDictionaries():
|
|
raise NotImplemented
|
|
|
|
def isMisspelled(self, word):
|
|
raise NotImplemented
|
|
|
|
def getSuggestions(self, word):
|
|
raise NotImplemented
|
|
|
|
def isCustomWord(self, word):
|
|
return word.lower() in self._customDict
|
|
|
|
def addWord(self, word):
|
|
word = word.lower()
|
|
if not word in self._customDict:
|
|
self._customDict.add(word)
|
|
self._saveCustomDict()
|
|
|
|
def removeWord(self, word):
|
|
word = word.lower()
|
|
if word in self._customDict:
|
|
self._customDict.remove(word)
|
|
self._saveCustomDict()
|
|
|
|
@classmethod
|
|
def getResourcesPath(cls):
|
|
path = os.path.join(writablePath(), "resources", "dictionaries", cls.getLibraryName())
|
|
if not os.path.exists(path):
|
|
os.makedirs(path)
|
|
return path
|
|
|
|
def getCustomDictionaryPath(self):
|
|
return os.path.join(self.getResourcesPath(), "{}.json.gz".format(self._lang))
|
|
|
|
def _saveCustomDict(self):
|
|
customPath = self.getCustomDictionaryPath()
|
|
with gzip.open(customPath, "wt") as f:
|
|
f.write(json.dumps(list(self._customDict)))
|
|
|
|
|
|
class EnchantDictionary(BasicDictionary):
|
|
|
|
def __init__(self, name):
|
|
self._lang = name
|
|
if not (self._lang and enchant.dict_exists(self._lang)):
|
|
self._lang = self.getDefaultDictionary()
|
|
|
|
self._dict = enchant.DictWithPWL(self._lang, self.getCustomDictionaryPath())
|
|
|
|
@staticmethod
|
|
def getLibraryName():
|
|
return "PyEnchant"
|
|
|
|
@staticmethod
|
|
def getLibraryURL():
|
|
return "https://pypi.org/project/pyenchant/"
|
|
|
|
@staticmethod
|
|
def isInstalled():
|
|
return enchant is not None
|
|
|
|
@staticmethod
|
|
def availableDictionaries():
|
|
if EnchantDictionary.isInstalled():
|
|
return list(map(lambda i: str(i[0]), enchant.list_dicts()))
|
|
return []
|
|
|
|
@staticmethod
|
|
def getDefaultDictionary():
|
|
if not EnchantDictionary.isInstalled():
|
|
return None
|
|
|
|
default_locale = enchant.get_default_language()
|
|
if default_locale and not enchant.dict_exists(default_locale):
|
|
default_locale = None
|
|
|
|
if default_locale is None:
|
|
default_locale = QLocale.system().name()
|
|
if default_locale is None:
|
|
default_locale = self.availableDictionaries()[0]
|
|
|
|
return default_locale
|
|
|
|
def isMisspelled(self, word):
|
|
return not self._dict.check(word)
|
|
|
|
def getSuggestions(self, word):
|
|
return self._dict.suggest(word)
|
|
|
|
def isCustomWord(self, word):
|
|
return self._dict.is_added(word)
|
|
|
|
def addWord(self, word):
|
|
self._dict.add(word)
|
|
|
|
def removeWord(self, word):
|
|
self._dict.remove(word)
|
|
|
|
def getCustomDictionaryPath(self):
|
|
return os.path.join(self.getResourcesPath(), "{}.txt".format(self.name))
|
|
|
|
class PySpellcheckerDictionary(BasicDictionary):
|
|
|
|
def __init__(self, name):
|
|
BasicDictionary.__init__(self, name)
|
|
|
|
self._dict = pyspellchecker.SpellChecker(self.name)
|
|
self._dict.word_frequency.load_words(self._customDict)
|
|
|
|
@staticmethod
|
|
def getLibraryName():
|
|
return "pyspellchecker"
|
|
|
|
@staticmethod
|
|
def getLibraryURL():
|
|
return "https://pyspellchecker.readthedocs.io/en/latest/"
|
|
|
|
@staticmethod
|
|
def isInstalled():
|
|
return pyspellchecker is not None
|
|
|
|
@staticmethod
|
|
def availableDictionaries():
|
|
if PySpellcheckerDictionary.isInstalled():
|
|
dictionaries = []
|
|
files = glob.glob(os.path.join(pyspellchecker.__path__[0], "resources", "*.json.gz"))
|
|
for file in files:
|
|
dictionaries.append(os.path.basename(file)[:-8])
|
|
return dictionaries
|
|
return []
|
|
|
|
@staticmethod
|
|
def getDefaultDictionary():
|
|
if not PySpellcheckerDictionary.isInstalled():
|
|
return None
|
|
|
|
default_locale = QLocale.system().name()
|
|
if default_locale:
|
|
default_locale = default_locale[0:2]
|
|
if default_locale is None:
|
|
default_locale = "en"
|
|
|
|
return default_locale
|
|
|
|
def isMisspelled(self, word):
|
|
return len(self._dict.unknown([word])) > 0
|
|
|
|
def getSuggestions(self, word):
|
|
candidates = self._dict.candidates(word)
|
|
if word in candidates:
|
|
candidates.remove(word)
|
|
return candidates
|
|
|
|
def addWord(self, word):
|
|
BasicDictionary.addWord(self, word)
|
|
self._dict.word_frequency.add(word.lower())
|
|
|
|
def removeWord(self, word):
|
|
BasicDictionary.removeWord(self, word)
|
|
self._dict.word_frequency.remove(word.lower())
|
|
|
|
class SymSpellDictionary(BasicDictionary):
|
|
CUSTOM_COUNT = 1
|
|
DISTANCE = 2
|
|
|
|
def __init__(self, name):
|
|
BasicDictionary.__init__(self, name)
|
|
|
|
self._dict = symspellpy.SymSpell(self.DISTANCE)
|
|
|
|
cachePath = self.getCachedDictionaryPath()
|
|
try:
|
|
if not self._dict.load_pickle(cachePath, False):
|
|
raise Exception("Can't load cached dictionary. " +
|
|
"File might be corrupted or incompatible with installed symspellpy version")
|
|
except:
|
|
if pyspellchecker:
|
|
path = os.path.join(pyspellchecker.__path__[0], "resources", "{}.json.gz".format(self.name))
|
|
if os.path.exists(path):
|
|
with gzip.open(path, "rt", encoding='utf-8') as f:
|
|
data = json.loads(f.read())
|
|
for key in data:
|
|
self._dict.create_dictionary_entry(key, data[key])
|
|
self._dict.save_pickle(cachePath, False)
|
|
for word in self._customDict:
|
|
self._dict.create_dictionary_entry(word, self.CUSTOM_COUNT)
|
|
|
|
def getCachedDictionaryPath(self):
|
|
return os.path.join(self.getResourcesPath(), "{}.sym".format(self.name))
|
|
|
|
@staticmethod
|
|
def getLibraryName():
|
|
return "symspellpy"
|
|
|
|
@staticmethod
|
|
def getLibraryRequirement():
|
|
return ">= " + SYMSPELLPY_MIN_VERSION
|
|
|
|
@staticmethod
|
|
def getLibraryURL():
|
|
return "https://github.com/mammothb/symspellpy"
|
|
|
|
@staticmethod
|
|
def isInstalled():
|
|
return symspellpy is not None
|
|
|
|
@classmethod
|
|
def availableDictionaries(cls):
|
|
if SymSpellDictionary.isInstalled():
|
|
files = glob.glob(os.path.join(cls.getResourcesPath(), "*.sym"))
|
|
dictionaries = []
|
|
for file in files:
|
|
dictionaries.append(os.path.basename(file)[:-4])
|
|
for sp_dict in PySpellcheckerDictionary.availableDictionaries():
|
|
if not sp_dict in dictionaries:
|
|
dictionaries.append(sp_dict)
|
|
return dictionaries
|
|
return []
|
|
|
|
@staticmethod
|
|
def getDefaultDictionary():
|
|
if not SymSpellDictionary.isInstalled():
|
|
return None
|
|
|
|
return PySpellcheckerDictionary.getDefaultDictionary()
|
|
|
|
def isMisspelled(self, word):
|
|
suggestions = self._dict.lookup(word.lower(), symspellpy.Verbosity.TOP)
|
|
if len(suggestions) > 0 and suggestions[0].distance == 0:
|
|
return False
|
|
# Try the word as is, since a dictionary might have uppercase letter as part
|
|
# of it's spelling ("I'm" or "January" for example)
|
|
suggestions = self._dict.lookup(word, symspellpy.Verbosity.TOP)
|
|
if len(suggestions) > 0 and suggestions[0].distance == 0:
|
|
return False
|
|
return True
|
|
|
|
def getSuggestions(self, word):
|
|
upper = word.isupper()
|
|
upper1 = word[0].isupper()
|
|
suggestions = self._dict.lookup_compound(word, 2)
|
|
suggestions.extend(self._dict.lookup(word, symspellpy.Verbosity.CLOSEST))
|
|
candidates = []
|
|
for sug in suggestions:
|
|
if upper:
|
|
term = sug.term.upper()
|
|
elif upper1:
|
|
term = sug.term[0].upper() + sug.term[1:]
|
|
else:
|
|
term = sug.term
|
|
if sug.distance > 0 and not term in candidates:
|
|
candidates.append(term)
|
|
return candidates
|
|
|
|
def addWord(self, word):
|
|
BasicDictionary.addWord(self, word)
|
|
self._dict.create_dictionary_entry(word.lower(), self.CUSTOM_COUNT)
|
|
|
|
def removeWord(self, word):
|
|
BasicDictionary.removeWord(self, word)
|
|
# Since 6.3.8
|
|
self._dict.delete_dictionary_entry(word)
|
|
|
|
|
|
# Register the implementations in order of priority
|
|
Spellchecker.implementations.append(EnchantDictionary)
|
|
Spellchecker.registerImplementation(SymSpellDictionary)
|
|
Spellchecker.registerImplementation(PySpellcheckerDictionary)
|