From b473ead98ecc031a7ce279513c4e17dac747477a Mon Sep 17 00:00:00 2001 From: Curtis Gedak Date: Thu, 19 Sep 2019 14:40:16 -0600 Subject: [PATCH] Fix word recognition for spell checker, ignore active partial words MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See PR #651 This commit restores the functionality that prevents spell checking a word that is being actively typed at the end of a paragraph. The goals for the spell check word match regexp are: A. Words should include those with an apostrophe *E.g., can't* B. Words should exclude underscore *E.g., hello_world is two words* C. Words in other languages should be recognized *E.g., French word familiarisé* D. Spell check should include word at absolute end of line with no trailing space or punctuation *E.g., tezt* E. Spell check should ignore partial words in progress (user typing) *E.g., paragr while midway through typing paragraph* This commit addresses all five of the above goals. HISTORY: - See issue #166 and commit 6ec0c19 in the 0.5.0 release. - See issue #283 and commit 63b471e in the 0.7.0 release. Also fix minor incorrect utf-8 encoding at top of source file. --- manuskript/ui/highlighters/basicHighlighter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/manuskript/ui/highlighters/basicHighlighter.py b/manuskript/ui/highlighters/basicHighlighter.py index ad3a8bbb..362ee5a2 100644 --- a/manuskript/ui/highlighters/basicHighlighter.py +++ b/manuskript/ui/highlighters/basicHighlighter.py @@ -1,5 +1,5 @@ #!/usr/bin/python -# -*- coding: utf8 -*- +# -*- coding: utf-8 -*- import re @@ -146,9 +146,12 @@ class BasicHighlighter(QSyntaxHighlighter): textedText = text + " " # Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/ - WORDS = r'(?iu)(((?!_)[\w\'])+)' + WORDS = r'(?iu)((?:[^_\W]|\')+)[^A-Za-z0-9\']' # (?iu) means case insensitive and Unicode - # (?!_) means perform negative lookahead to exclude "_" from pattern match. See issue #283 + # ((?:[^_\W]|\')+) means words exclude underscores but include apostrophes + # [^A-Za-z0-9\'] used with above hack to prevent spellcheck while typing word + # + # See also https://stackoverflow.com/questions/2062169/regex-w-in-utf-8 if hasattr(self.editor, "spellcheck") and self.editor.spellcheck: for word_object in re.finditer(WORDS, textedText): if (self.editor._dict