mirror of
https://github.com/olivierkes/manuskript.git
synced 2024-05-16 10:52:29 +12:00
spellchecker: Add SymSpellPy support
SymSpell is a great spellchecker which works a lot faster than pyspellchecker for finding suggestions but is a bit slow at loading dictionaries (about 15 seconds initially, 2 seconds if using a cached version). SymSpell also doesn't come with dictionaries, so the code is currently using dictionaries from pyspellchecker, so if pyspellchecker isn't installed, then the user won't see any available dictionaries. Eventually, would need to have an interface for people to manage dictionaries for it.
This commit is contained in:
parent
e1b063a953
commit
e1edccc7d3
|
@ -14,6 +14,11 @@ try:
|
|||
except ImportError:
|
||||
pyspellchecker = None
|
||||
|
||||
try:
|
||||
import symspellpy
|
||||
except ImportError:
|
||||
symspellpy = None
|
||||
|
||||
|
||||
class Spellchecker:
|
||||
dictionaries = {}
|
||||
|
@ -307,3 +312,105 @@ class PySpellcheckerDictionary(BasicDictionary):
|
|||
Spellchecker.registerImplementation(PySpellcheckerDictionary)
|
||||
|
||||
|
||||
class SymSpellDictionary(BasicDictionary):
|
||||
CUSTOM_COUNT = 1
|
||||
DISTANCE = 2
|
||||
|
||||
def __init__(self, name):
|
||||
BasicDictionary.__init__(self, name)
|
||||
|
||||
self._dict = symspellpy.SymSpell(self.DISTANCE)
|
||||
|
||||
cachePath = self.getCachedDictionaryPath()
|
||||
try:
|
||||
self._dict.load_pickle(cachePath)
|
||||
except:
|
||||
if pyspellchecker:
|
||||
path = os.path.join(pyspellchecker.__path__[0], "resources", "{}.json.gz".format(self.name))
|
||||
if os.path.exists(path):
|
||||
with gzip.open(path, "rt", encoding='utf-8') as f:
|
||||
data = json.loads(f.read())
|
||||
for key in data:
|
||||
self._dict.create_dictionary_entry(key, data[key])
|
||||
self._dict.save_pickle(cachePath)
|
||||
for word in self._customDict:
|
||||
self._dict.create_dictionary_entry(word, self.CUSTOM_COUNT)
|
||||
|
||||
def getCachedDictionaryPath(self):
|
||||
return os.path.join(self.getResourcesPath(), "{}.sym.gz".format(self.name))
|
||||
|
||||
@staticmethod
|
||||
def getLibraryName():
|
||||
return "symspellpy"
|
||||
|
||||
@staticmethod
|
||||
def getLibraryURL():
|
||||
return "https://github.com/mammothb/symspellpy"
|
||||
|
||||
@staticmethod
|
||||
def isInstalled():
|
||||
return symspellpy is not None
|
||||
|
||||
@classmethod
|
||||
def availableDictionaries(cls):
|
||||
if SymSpellDictionary.isInstalled():
|
||||
files = glob.glob(os.path.join(cls.getResourcesPath(), "*.sym.gz"))
|
||||
dictionaries = set()
|
||||
for file in files:
|
||||
dictionaries.add(os.path.basename(file)[:-7])
|
||||
return list(dictionaries.union(PySpellcheckerDictionary.availableDictionaries()))
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def getDefaultDictionary():
|
||||
if not SymSpellDictionary.isInstalled():
|
||||
return None
|
||||
|
||||
return PySpellcheckerDictionary.getDefaultDictionary()
|
||||
|
||||
def isMisspelled(self, word):
|
||||
suggestions = self._dict.lookup(word.lower(), symspellpy.Verbosity.TOP)
|
||||
if len(suggestions) > 0 and suggestions[0].distance == 0:
|
||||
return False
|
||||
# Try the word as is, since a dictionary might have uppercase letter as part
|
||||
# of it's spelling ("I'm" or "January" for example)
|
||||
suggestions = self._dict.lookup(word, symspellpy.Verbosity.TOP)
|
||||
if len(suggestions) > 0 and suggestions[0].distance == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
def getSuggestions(self, word):
|
||||
upper = word.isupper()
|
||||
upper1 = word[0].isupper()
|
||||
suggestions = self._dict.lookup_compound(word, 2)
|
||||
suggestions.extend(self._dict.lookup(word, symspellpy.Verbosity.CLOSEST))
|
||||
candidates = []
|
||||
for sug in suggestions:
|
||||
if upper:
|
||||
term = sug.term.upper()
|
||||
elif upper1:
|
||||
term = sug.term[0].upper() + sug.term[1:]
|
||||
else:
|
||||
term = sug.term
|
||||
if sug.distance > 0 and not term in candidates:
|
||||
candidates.append(term)
|
||||
return candidates
|
||||
|
||||
def addWord(self, word):
|
||||
BasicDictionary.addWord(self, word)
|
||||
self._dict.create_dictionary_entry(word.lower(), self.CUSTOM_COUNT)
|
||||
|
||||
def removeWord(self, word):
|
||||
BasicDictionary.removeWord(self, word)
|
||||
# Need to do this for now because library doesn't support removing a word
|
||||
self._reloadDict()
|
||||
|
||||
def _reloadDict(self):
|
||||
self._dict = symspellpy.SymSpell(self.DISTANCE)
|
||||
|
||||
cachePath = self.getCachedDictionaryPath()
|
||||
self._dict.load_pickle(cachePath)
|
||||
for word in self._customDict:
|
||||
self._dict.create_dictionary_entry(word, self.CUSTOM_COUNT)
|
||||
|
||||
Spellchecker.registerImplementation(SymSpellDictionary)
|
||||
|
|
Loading…
Reference in a new issue