Checkpoint: adding new markdown highlighter

2024-06-01 10:39:34 +12:00 · 2017-11-22 13:43:40 +01:00 · 2017-11-22 13:43:40 +01:00 · 9be2edeee7
parent 550f889251
commit 9be2edeee7
8 changed files with 1726 additions and 18 deletions
--- a/manuskript/exporter/manuskript/markdown.py
+++ b/manuskript/exporter/manuskript/markdown.py
@ -5,7 +5,7 @@ from PyQt5.QtWidgets import QPlainTextEdit, QGroupBox, qApp, QVBoxLayout, QCheck
 from manuskript.exporter.manuskript.plainText import plainText
 from manuskript.functions import mainWindow
-from manuskript.ui.editors.MMDHighlighter import MMDHighlighter
+from manuskript.ui.highlighters import MMDHighlighter
 from manuskript.ui.exporters.manuskript.plainTextSettings import exporterSettings
@ -72,4 +72,4 @@ class markdownSettings(exporterSettings):
        self.settings = exporterSettings.getSettings(self)
        self.settings["Preview"]["MarkdownHighlighter"] = self.chkMarkdownHighlighter.isChecked()
-        return self.settings
+        return self.settings
--- a/manuskript/ui/highlighters/MMDHighlighter.py
+++ b/manuskript/ui/highlighters/MMDHighlighter.py
@ -5,10 +5,10 @@ import re
 from PyQt5.QtCore import Qt
 from PyQt5.QtGui import QTextCharFormat, QFont, QTextCursor, QFontMetrics
-from manuskript.ui.editors.basicHighlighter import basicHighlighter
+from manuskript.ui.highlighters import BasicHighlighter
-class MMDHighlighter(basicHighlighter):
+class MMDHighlighter(BasicHighlighter):
    MARKDOWN_REGEX = {
        'Bold':             '(\*\*)(.+?)(\*\*)',
@ -27,7 +27,7 @@ class MMDHighlighter(basicHighlighter):
    }
    def __init__(self, editor, style="Default"):
-        basicHighlighter.__init__(self, editor)
+        BasicHighlighter.__init__(self, editor)
        self.editor = editor
@ -36,11 +36,11 @@ class MMDHighlighter(basicHighlighter):
            self.rules[key] = re.compile(self.MARKDOWN_REGEX[key])
    def highlightBlock(self, text):
-        basicHighlighter.highlightBlockBefore(self, text)
+        BasicHighlighter.highlightBlockBefore(self, text)
        self.doHighlightBlock(text)
-        basicHighlighter.highlightBlockAfter(self, text)
+        BasicHighlighter.highlightBlockAfter(self, text)
    def doHighlightBlock(self, text):
        """
--- a/manuskript/ui/highlighters/init.py
+++ b/manuskript/ui/highlighters/init.py
@ -0,0 +1,6 @@
 #!/usr/bin/python
 # -*- coding: utf8 -*-
 from manuskript.ui.highlighters.basicHighlighter import BasicHighlighter
 from manuskript.ui.highlighters.MMDHighlighter import MMDHighlighter
 from manuskript.ui.highlighters.markdownHighlighter import MarkdownHighlighter
--- a/manuskript/ui/highlighters/basicHighlighter.py
+++ b/manuskript/ui/highlighters/basicHighlighter.py
@ -4,12 +4,13 @@
 import re
 from PyQt5.QtCore import Qt
-from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter, QTextBlockFormat, QTextCharFormat
+from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter
 from PyQt5.QtGui import QTextBlockFormat, QTextCharFormat
 import manuskript.models.references as Ref
-class basicHighlighter(QSyntaxHighlighter):
+class BasicHighlighter(QSyntaxHighlighter):
    def __init__(self, editor):
        QSyntaxHighlighter.__init__(self, editor.document())
@ -38,7 +39,7 @@ class basicHighlighter(QSyntaxHighlighter):
    def highlightBlockBefore(self, text):
        """Highlighting to do before anything else.
-        When subclassing basicHighlighter, you must call highlightBlockBefore
+        When subclassing BasicHighlighter, you must call highlightBlockBefore
        before you do any custom highlighting.
        """
@ -56,7 +57,7 @@ class basicHighlighter(QSyntaxHighlighter):
    def highlightBlockAfter(self, text):
        """Highlighting to do after everything else.
-        When subclassing basicHighlighter, you must call highlightBlockAfter
+        When subclassing BasicHighlighter, you must call highlightBlockAfter
        after your custom highlighting.
        """
@ -91,13 +92,16 @@ class basicHighlighter(QSyntaxHighlighter):
            textedText = text + " "
        # Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/
-        WORDS = '(?iu)([\w\']+)[^\'\w]'  # (?iu) means case insensitive and unicode
+        WORDS = r'(?iu)([\w\']+)[^\'\w]'
        #        (?iu) means case insensitive and unicode
        if hasattr(self.editor, "spellcheck") and self.editor.spellcheck:
            for word_object in re.finditer(WORDS, textedText):
-                if self.editor._dict and not self.editor._dict.check(word_object.group(1)):
+                if (self.editor._dict
                        and not self.editor._dict.check(word_object.group(1))):
                    format = self.format(word_object.start(1))
                    format.setUnderlineColor(self._misspelledColor)
                    # SpellCheckUnderline fails with some fonts
                    format.setUnderlineStyle(QTextCharFormat.WaveUnderline)
                    self.setFormat(word_object.start(1),
-                                   word_object.end(1) - word_object.start(1), format)
+                                   word_object.end(1) - word_object.start(1),
                                   format)
--- a/manuskript/ui/highlighters/markdownEnums.py
+++ b/manuskript/ui/highlighters/markdownEnums.py
@ -0,0 +1,94 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 #==============================================================================
 #   MARKDOWN STATES
 #==============================================================================
 class MarkdownState:
    MarkdownStateUnknown = -1
    MarkdownStateParagraphBreak = 0
    MarkdownStateListLineBreak = 1
    MarkdownStateParagraph = 2
    MarkdownStateAtxHeading1 = 3
    MarkdownStateAtxHeading2 = 4
    MarkdownStateAtxHeading3 = 5
    MarkdownStateAtxHeading4 = 6
    MarkdownStateAtxHeading5 = 7
    MarkdownStateAtxHeading6 = 8
    MarkdownStateBlockquote = 9
    MarkdownStateCodeBlock = 10
    MarkdownStateInGithubCodeFence = 11
    MarkdownStateInPandocCodeFence = 12
    MarkdownStateCodeFenceEnd = 13
    MarkdownStateComment = 14
    MarkdownStateHorizontalRule = 15
    MarkdownStateNumberedList = 16
    MarkdownStateBulletPointList = 17
    MarkdownStateSetextHeading1Line1 = 18
    MarkdownStateSetextHeading1Line2 = 19
    MarkdownStateSetextHeading2Line1 = 20
    MarkdownStateSetextHeading2Line2 = 21
    MarkdownStatePipeTableHeader = 22
    MarkdownStatePipeTableDivider = 23
    MarkdownStatePipeTableRow = 24
 #==============================================================================
 #   MARKDOWN TOKEN TYPE
 #==============================================================================
 class MarkdownTokenType:
    TokenUnknown  = -1
    # Titles
    TokenAtxHeading1 = 0
    TokenAtxHeading2 = 1
    TokenAtxHeading3 = 2
    TokenAtxHeading4 = 3
    TokenAtxHeading5 = 4
    TokenAtxHeading6 = 5
    TokenSetextHeading1Line1 = 6
    TokenSetextHeading1Line2 = 7
    TokenSetextHeading2Line1 = 8
    TokenSetextHeading2Line2 = 9
    TokenEmphasis = 10
    TokenStrong = 11
    TokenStrikethrough = 12
    TokenVerbatim = 13
    TokenHtmlTag = 14
    TokenHtmlEntity = 15
    TokenAutomaticLink = 16
    TokenInlineLink = 17
    TokenReferenceLink = 18
    TokenReferenceDefinition = 19
    TokenImage = 20
    TokenHtmlComment = 21
    TokenNumberedList = 22
    TokenBulletPointList = 23
    TokenHorizontalRule = 24
    TokenLineBreak = 25
    TokenBlockquote = 26
    TokenCodeBlock = 27
    TokenGithubCodeFence = 28
    TokenPandocCodeFence = 29
    TokenCodeFenceEnd = 30
    TokenMention = 31
    TokenTableHeader = 32
    TokenTableDivider = 33
    TokenTablePipe = 34
    TokenSuperScript = 35
    TokenSubScript = 36
    TokenLast = 37
    TITLES =  [TokenAtxHeading1, TokenAtxHeading2, TokenAtxHeading3,
               TokenAtxHeading4, TokenAtxHeading5, TokenAtxHeading6,
               TokenSetextHeading1Line1, TokenSetextHeading1Line2,
               TokenSetextHeading2Line1, TokenSetextHeading2Line2]
 class BlockquoteStyle:
    BlockquoteStylePlain = 0
    BlockquoteStyleItalic = 1
    BlockquoteStyleFancy = 2
--- a/manuskript/ui/highlighters/markdownHighlighter.py
+++ b/manuskript/ui/highlighters/markdownHighlighter.py
@ -0,0 +1,718 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
 A QSyntaxHighlighter for markdown, using tokenizer. More accurate than simple
 regexp, but not yet perfect.
 """
 import re
 from PyQt5.QtCore import Qt, pyqtSignal, qWarning, QRegExp
 from PyQt5.QtGui import (QSyntaxHighlighter, QTextBlock, QColor, QFont,
                         QTextCharFormat, QBrush, QPalette)
 from PyQt5.QtWidgets import qApp, QStyle
 from manuskript.ui.highlighters.markdownTokenizer import MarkdownTokenizer
 from manuskript.ui.highlighters.markdownEnums import MarkdownState as MS
 from manuskript.ui.highlighters.markdownEnums import MarkdownTokenType as MTT
 from manuskript.ui.highlighters.markdownEnums import BlockquoteStyle as BS
 # Un longue ligne. Un longue ligne. Un longue ligne. Un longue ligne.asdasdasda
 GW_FADE_ALPHA = 140
 # Highlighter based on GhostWriter (http://wereturtle.github.io/ghostwriter/).
 # GPLV3+.
 #FIXME: Setext heading don't work anymore
 class MarkdownHighlighter(QSyntaxHighlighter):
    highlightBlockAtPosition = pyqtSignal(int)
    headingFound = pyqtSignal(int, str, QTextBlock)
    headingRemoved = pyqtSignal(int)
    def __init__(self, editor):
        QSyntaxHighlighter.__init__(self, editor.document())
        #default values
        self.editor = editor
        self.tokenizer = MarkdownTokenizer()
        self.spellCheckEnabled = False
        #self.typingPaused = True
        self.inBlockquote = False
        self.defaultTextColor = QColor(Qt.black)
        self.backgroundColor = QColor(Qt.white)
        self.markupColor = QColor(Qt.black)
        self.linkColor = QColor(Qt.blue)
        self.spellingErrorColor = QColor(Qt.red)
        self.blockquoteStyle = BS.BlockquoteStyleFancy
        # Settings
        self.useUndlerlineForEmphasis = False
        self.highlightLineBreaks = True
        self.highlightBlockAtPosition.connect(self.onHighlightBlockAtPosition,
                                              Qt.QueuedConnection)
        # font = QFont("Monospace", 12, QFont.Normal, False)
        font = self.document().defaultFont()
        font.setStyleStrategy(QFont.PreferAntialias)
        self.defaultFormat = QTextCharFormat()
        self.defaultFormat.setFont(font)
        self.defaultFormat.setForeground(QBrush(self.defaultTextColor))
        self.theme = self.defaultTheme()
        self.setupHeadingFontSize(True)
        self.highlightedWords = []
        self.highlightedTags = []
        self.searchExpression = ""
        self.searchExpressionRegExp = False
        self.searchExpressionCase = False
        self.customRules = [
            ("(°).*?(°)", {"background": Qt.yellow,
                           "markupColor":Qt.lightGray}),
            ]
        #f = self.document().defaultFont()
        #f.setFamily("monospace")
        #self.document().setDefaultFont(f)
    def highlightBlock(self, text):
        """
        Note:  Never set the QTextBlockFormat for a QTextBlock from within
        the highlighter. Depending on how the block format is modified,
        a recursive call to the highlighter may be triggered, which will
        cause the application to crash.
        Likewise, don't try to set the QTextBlockFormat outside the highlighter
        (i.e., from within the text editor).  While the application will not
        crash, the format change will be added to the undo stack.  Attempting
        to undo from that point on will cause the undo stack to be virtually
        frozen, since undoing the format operation causes the text to be
        considered changed, thus triggering the slot that changes the text
        formatting to be triggered yet again.
        """
        if self.currentBlock().blockNumber() == 0:
            # This is the title
            bf = QTextCharFormat()
            bf.setFontPointSize(self.editor.font().pointSize() * 2)
            bf.setFontWeight(QFont.Bold)
            bf.setForeground(Qt.lightGray)
            self.setFormat(0, len(text), bf)
            return
        lastState = self.currentBlockState()
        self.setFormat(0, len(text), self.defaultFormat)
        if self.tokenizer != None:
            self.tokenizer.clear()
            block = self.currentBlock()
            nextState = MS.MarkdownStateUnknown
            previousState = self.previousBlockState()
            if block.next().isValid():
                nextState = block.next().userState()
            self.tokenizer.tokenize(text, lastState, previousState, nextState)
            self.setCurrentBlockState(self.tokenizer.getState())
            self.inBlockquote = self.tokenizer.getState() == MS.MarkdownStateBlockquote
            # STATE FORMATTING
            # FIXME: generic
            if self.currentBlockState() in [
                    MS.MarkdownStatePipeTableHeader,
                    MS.MarkdownStatePipeTableDivider,
                    MS.MarkdownStatePipeTableRow]:
                fmt = QTextCharFormat()
                f = fmt.font()
                f.setFamily("Monospace")
                fmt.setFont(f)
                self.setFormat(0, len(text), fmt)
            # Monospace the blank chars
            i = 0
            while i <= len(text)-1 and text[i] in [" ", "\t"]:
                fmt = self.format(i)
                fmt.setFontFamily("Monospace")
                self.setFormat(i, 1, fmt)
                i += 1
            #if self.currentBlockState() == MS.MarkdownStateBlockquote:
                #fmt = QTextCharFormat(self.defaultFormat)
                #fmt.setForeground(Qt.lightGray)
                #self.setFormat(0, len(text), fmt)
            tokens = self.tokenizer.getTokens()
            for token in tokens:
                if token.type == MTT.TokenUnknown:
                    qWarning("Highlighter found unknown token type in text block.")
                    continue
                if token.type in [
                        MTT.TokenAtxHeading1,
                        MTT.TokenAtxHeading2,
                        MTT.TokenAtxHeading3,
                        MTT.TokenAtxHeading4,
                        MTT.TokenAtxHeading5,
                        MTT.TokenAtxHeading6,
                        MTT.TokenSetextHeading1Line1,
                        MTT.TokenSetextHeading2Line1,
                    ]:
                    self.storeHeadingData(token, text)
                self.applyFormattingForToken(token, text)
            if self.tokenizer.backtrackRequested():
                previous = self.currentBlock().previous()
                self.highlightBlockAtPosition.emit(previous.position())
        if self.spellCheckEnabled:
            self.spellCheck(text)
        # HASHTAGS AND HIGHLIGHTS
        # Hashtags
        s = 0
        ht = QRegExp(r'([^#])(#[\w]+)')
        while ht.indexIn(text, s) >= 0:
            f = self.format(ht.pos()+1)
            f.setForeground(QColor("#07c"))
            f.setFontWeight(QFont.Bold)
            self.setFormat(ht.pos()+1, ht.matchedLength()-1, f)
            s = ht.pos() + 1
        # Highlighted
        for w in self.highlightedWords + self.highlightedTags:
            pos = text.lower().find(w.lower())
            while pos >= 0:
                for i in range(pos, pos + len(w)):
                    f = self.format(i)
                    f.setBackground(QBrush(QColor("#fAf")))
                    self.setFormat(i, 1, f)
                pos = text.lower().find(w.lower(), pos+1)
        # Searched
        #FIXME: consider searchExpressionRegExp
        if self.searchExpression:
            s = self.searchExpression
            if not self.searchExpressionRegExp:
                if self.searchExpressionCase:
                    pos = text.find(s)
                else:
                    pos = text.lower().find(s.lower())
                while pos >= 0:
                    for i in range(pos, pos + len(s)):
                        f = self.format(i)
                        f.setBackground(QBrush(QColor("#Aff")))
                        self.setFormat(i, 1, f)
                    pos = text.lower().find(s.lower(), pos+1)
            else:
                # Using QRegExp
                rx = QRegExp(s)
                if not self.searchExpressionCase:
                    rx.setCaseSensitivity(Qt.CaseInsensitive)
                p = rx.indexIn(text)
                while p != -1:
                    f = self.format(p)
                    f.setBackground(QBrush(QColor("#Aff")))
                    self.setFormat(p, rx.matchedLength(), f)
                    p = rx.indexIn(text, p + 1)
                # Using python re
                #try:
                    #for m in re.finditer(s, text):
                        #f = self.format(m.start())
                        #f.setBackground(QBrush(QColor("#0ff")))
                        #self.setFormat(m.start(), len(m.group()), f)
                #except:
                    ## Probably malformed regExp
                    #pass
        # Custom rules
        for rule, theme in self.customRules:
            for m in re.finditer(rule, text):
                if not m.groups():  # No groups, therefore no markup
                    f = self.format(m.start())
                    f, garbage = self.formatsFromTheme(theme, f)
                    self.setFormat(m.start(), len(m.group()), f)
                else:
                    mf = self.format(m.start())
                    f = self.format(m.start() + len(m.group(1)))
                    f, mf = self.formatsFromTheme(theme, f, mf)
                    self.setFormat(m.start(1), len(m.group(1)), mf)
                    self.setFormat(m.start(2), len(m.group(2)), mf)
                    self.setFormat(m.start(1) + len(m.group(1)),
                                   len(m.group())
                                   - len(m.group(1))
                                   - len(m.group(2)), f)
        # If the block has transitioned from previously being a heading to now
        # being a non-heading, signal that the position in the document no
        # longer contains a heading.
        if self.isHeadingBlockState(lastState) and \
           not self.isHeadingBlockState(self.currentBlockState()):
            self.headingRemoved.emit(self.currentBlock().position())
    ###########################################################################
    # COLORS & FORMATTING
    ###########################################################################
    def defaultTheme(self):
        markup = qApp.palette().color(QPalette.Mid)
        if markup == Qt.black:
            markup = Qt.lightGray
        dark = qApp.palette().color(QPalette.Dark)
        if dark == Qt.black:
            dark = QColor(Qt.gray)
        darker = dark.darker(150)
        # Text background
        background = qApp.palette().color(QPalette.Base)
        lightBackground = background.darker(130)
        veryLightBackground = background.darker(105)
        theme = {
            "markup": markup}
        #Exemple:
            #"color": Qt.red,
            #"deltaSize": 10,
            #"background": Qt.yellow,
            #"monospace": True,
            #"bold": True,
            #"italic": True,
            #"underline": True,
            #"overline": True,
            #"strike": True,
            #"formatMarkup": True,
            #"markupBold": True,
            #"markupColor": Qt.blue,
            #"markupBackground": Qt.green,
            #"markupMonospace": True,
            #"super":True,
            #"sub":True
        for i in MTT.TITLES:
            theme[i] = {
                "formatMarkup":True,
                "bold": True,
                "monospace": True,
                #"color": Qt.darkBlue if i % 2 == 1 else Qt.darkMagenta,
            }
        b = 100
        d = 50
        color = QColor(Qt.darkBlue)
        theme[MTT.TokenAtxHeading1]["color"] = color
        theme[MTT.TokenAtxHeading2]["color"] = color.lighter(b + d)
        theme[MTT.TokenAtxHeading3]["color"] = color.lighter(b + 2*d)
        theme[MTT.TokenAtxHeading4]["color"] = color.lighter(b + 3*d)
        theme[MTT.TokenAtxHeading5]["color"] = color.lighter(b + 4*d)
        theme[MTT.TokenAtxHeading6]["color"] = color.lighter(b + 5*d)
        for i in [MTT.TokenSetextHeading1Line2, MTT.TokenSetextHeading2Line2]:
            theme[i] = {
                "color": markup,
                "monospace":True}
        # Beautifiers
        theme[MTT.TokenEmphasis] = {
                "italic":True,}
        theme[MTT.TokenStrong] = {
            "bold":True}
        theme[MTT.TokenStrikethrough] = {
            "strike":True}
        theme[MTT.TokenVerbatim] = {
            "monospace":True,
            "background": veryLightBackground,
            "formatMarkup": True,
            "markupColor": markup}
        theme[MTT.TokenSuperScript] = {
            "super":True,
            "formatMarkup":True}
        theme[MTT.TokenSubScript] = {
            "sub":True,
            "formatMarkup":True}
        theme[MTT.TokenHtmlTag] = {
            "color":Qt.red}
        theme[MTT.TokenHtmlEntity] = {
            "color":Qt.red}
        theme[MTT.TokenAutomaticLink] = {
            "color": qApp.palette().color(QPalette.Link)}
        theme[MTT.TokenInlineLink] = {
            "color": qApp.palette().color(QPalette.Link)}
        theme[MTT.TokenReferenceLink] = {
            "color": qApp.palette().color(QPalette.Link)}
        theme[MTT.TokenReferenceDefinition] = {
            "color": qApp.palette().color(QPalette.Link)}
        theme[MTT.TokenImage] = {
            "color": Qt.green}
        theme[MTT.TokenHtmlComment] = {
            "color": dark}
        theme[MTT.TokenNumberedList] = {
            "markupColor": QColor(Qt.red).lighter(),
            "markupBold": True,
            "markupMonospace": True,}
        theme[MTT.TokenBulletPointList] = {
            "markupColor": QColor(Qt.red).lighter(),
            "markupBold": True,
            "markupMonospace": True,}
        theme[MTT.TokenHorizontalRule] = {
            "overline": True,
            "underline": True,
            "monospace": True,
            "color": markup}
        theme[MTT.TokenLineBreak] = {
            "background": markup}
        theme[MTT.TokenBlockquote] = {
            "color": darker,
            "markupColor": lightBackground,
            "markupBackground": lightBackground}
        theme[MTT.TokenCodeBlock] = {
            "color": darker,
            "markupBackground": veryLightBackground,
            "monospace":True}
        theme[MTT.TokenGithubCodeFence] = {
            "color": markup}
        theme[MTT.TokenPandocCodeFence] = {
            "color": markup}
        theme[MTT.TokenCodeFenceEnd] = {
            "color": markup}
        theme[MTT.TokenMention] = {} # FIXME
        theme[MTT.TokenTableHeader] = {
            "color": darker, "monospace":True}
        theme[MTT.TokenTableDivider] = {
            "color": markup, "monospace":True}
        theme[MTT.TokenTablePipe] = {
            "color": markup, "monospace":True}
        return theme
    def setColorScheme(self, defaultTextColor, backgroundColor, markupColor,
                       linkColor, spellingErrorColor):
        self.defaultTextColor = defaultTextColor
        self.backgroundColor = backgroundColor
        self.markupColor = markupColor
        self.linkColor = linkColor
        self.spellingErrorColor = spellingErrorColor
        self.defaultFormat.setForeground(QBrush(defaultTextColor))
        # FIXME: generate a theme based on that
        self.rehighlight()
    ###########################################################################
    # ACTUAL FORMATTING
    ###########################################################################
    def applyFormattingForToken(self, token, text):
        if token.type != MTT.TokenUnknown:
            format = self.format(token.position + token.openingMarkupLength)
            markupFormat = self.format(token.position)
            if self.theme.get("markup"):
                markupFormat.setForeground(self.theme["markup"])
            ## Debug
            def debug():
                print("{}\n{}{}{}{}   (state:{})".format(
                    text,
                    " "*token.position,
                    "^"*token.openingMarkupLength,
                    str(token.type).center(token.length
                                           - token.openingMarkupLength
                                           - token.closingMarkupLength, "-"),
                    "^" * token.closingMarkupLength,
                    self.currentBlockState(),)
                     )
            #if token.type in range(6, 10):
            #debug()
            theme = self.theme.get(token.type)
            if theme:
                format, markupFormat = self.formatsFromTheme(theme,
                                                             format,
                                                             markupFormat)
            # Format openning Markup
            self.setFormat(token.position, token.openingMarkupLength,
                           markupFormat)
            # Format Text
            self.setFormat(
                token.position + token.openingMarkupLength,
                token.length - token.openingMarkupLength - token.closingMarkupLength,
                format)
            # Format closing Markup
            if token.closingMarkupLength > 0:
                self.setFormat(
                    token.position + token.length - token.closingMarkupLength,
                    token.closingMarkupLength,
                    markupFormat)
        else:
            qWarning("MarkdownHighlighter.applyFormattingForToken() was passed"
                     " in a token of unknown type.")
    def formatsFromTheme(self, theme, format=QTextCharFormat(),
                         markupFormat=QTextCharFormat()):
        # Token
        if theme.get("color"):
            format.setForeground(theme["color"])
        if theme.get("deltaSize"):
            format.setFontPointSize(format.fontPointSize() + theme["deltaSize"])
        if theme.get("background"):
            format.setBackground(theme["background"])
        if theme.get("monospace"):
            format.setFontFamily("Monospace")
        if theme.get("bold"):
            format.setFontWeight(QFont.Bold)
        if theme.get("italic"):
            format.setFontItalic(theme["italic"])
        if theme.get("underline"):
            format.setFontUnderline(theme["underline"])
        if theme.get("overline"):
            format.setFontOverline(theme["overline"])
        if theme.get("strike"):
            format.setFontStrikeOut(theme["strike"])
        if theme.get("super"):
            format.setVerticalAlignment(QTextCharFormat.AlignSuperScript)
        if theme.get("sub"):
            format.setVerticalAlignment(QTextCharFormat.AlignSubScript)
        # Markup
        if theme.get("formatMarkup"):
            c = markupFormat.foreground()
            markupFormat = QTextCharFormat(format)
            markupFormat.setForeground(c)
        if theme.get("markupBold"):
            markupFormat.setFontWeight(QFont.Bold)
        if theme.get("markupColor"):
            markupFormat.setForeground(theme["markupColor"])
        if theme.get("markupBackground"):
            markupFormat.setBackground(theme["markupBackground"])
        if theme.get("markupMonospace"):
            markupFormat.setFontFamily("Monospace")
        return format, markupFormat
    ###########################################################################
    # SETTINGS
    ###########################################################################
    def setHighlighted(self, words, tags):
        rehighlight = (self.highlightedWords != words
                       or self.highlightedTags != tags)
        self.highlightedWords = words
        self.highlightedTags = tags
        if rehighlight:
            self.rehighlight()
    def setSearched(self, expression, regExp=False, caseSensitivity=False):
        """
        Define an expression currently searched, to be highlighted.
        Can be regExp.
        """
        rehighlight = self.searchExpression != expression or \
                      self.searchExpressionRegExp != regExp or \
                      self.searchExpressionCase != caseSensitivity
        self.searchExpression = expression
        self.searchExpressionRegExp = regExp
        self.searchExpressionCase = caseSensitivity
        if rehighlight:
            self.rehighlight()
    def setDictionary(self, dictionary):
        self.dictionary = dictionary
        if self.spellCheckEnabled:
            self.rehighlight()
    def increaseFontSize(self):
        self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize()
                                            + 1.0)
        self.rehighlight()
    def decreaseFontSize(self):
        self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize()
                                            - 1.0)
        self.rehighlight()
    def setEnableLargeHeadingSizes(self, enable):
        self.setupHeadingFontSize(enable)
        self.rehighlight()
    def setupHeadingFontSize(self, useLargeHeadings):
        if useLargeHeadings:
            self.theme[MTT.TokenSetextHeading1Line1]["deltaSize"] = 7
            self.theme[MTT.TokenSetextHeading2Line1]["deltaSize"] = 5
            self.theme[MTT.TokenSetextHeading1Line2]["deltaSize"] = 7
            self.theme[MTT.TokenSetextHeading2Line2]["deltaSize"] = 5
            self.theme[MTT.TokenAtxHeading1]["deltaSize"] = 7
            self.theme[MTT.TokenAtxHeading2]["deltaSize"] = 5
            self.theme[MTT.TokenAtxHeading3]["deltaSize"] = 3
            self.theme[MTT.TokenAtxHeading4]["deltaSize"] = 2
            self.theme[MTT.TokenAtxHeading5]["deltaSize"] = 1
            self.theme[MTT.TokenAtxHeading6]["deltaSize"] = 0
        else:
            for i in MTT.TITLES:
                self.theme[i]["deltaSize"] = 0
    def setUseUnderlineForEmphasis(self, enable):
        self.useUndlerlineForEmphasis = enable
        self.rehighlight()
    def setFont(self, fontFamily, fontSize):
        font = QFont(family=fontFamily, pointSize=fontSize, weight=QFont.Normal, italic=False)
        self.defaultFormat.setFont(font)
        self.rehighlight()
    def setSpellCheckEnabled(self, enabled):
        self.spellCheckEnabled = enabled
        self.rehighlight()
    def setBlockquoteStyle(self, style):
        self.blockquoteStyle = style
        if style == BS.BlockquoteStyleItalic:
            self.emphasizeToken[MTT.TokenBlockquote] = True
        else:
            self.emphasizeToken[MTT.TokenBlockquote] = False
        self.rehighlight()
    def setHighlightLineBreaks(self, enable):
        self.highlightLineBreaks = enable
        self.rehighlight()
    ###########################################################################
    # GHOSTWRITER SPECIFIC?
    ###########################################################################
    def onTypingResumed(self):
        self.typingPaused = False
    def onTypingPaused(self):
        self.typingPaused = True
        block = self.document().findBlock(self.editor.textCursor().position())
        self.rehighlightBlock(block)
    def onHighlightBlockAtPosition(self, position):
        block = self.document().findBlock(position)
        self.rehighlightBlock(block)
    def onTextBlockRemoved(self, block):
        if self.isHeadingBlockState(block.userState):
            self.headingRemoved.emit(block.position())
    ###########################################################################
    # SPELLCHECK
    ###########################################################################
    def spellCheck(self, text):
        cursorPosition = self.editor.textCursor().position()
        cursorPosBlock = self.document().findBlock(cursorPosition)
        cursorPosInBlock = -1
        if self.currentBlock() == cursorPosBlock:
            cursorPosInBlock = cursorPosition - cursorPosBlock.position()
        misspelledWord = self.dictionary.check(text, 0)
        while not misspelledWord.isNull():
            startIndex = misspelledWord.position()
            length = misspelledWord.length()
            if self.typingPaused or cursorPosInBlock != startIndex + length:
                spellingErrorFormat = self.format(startIndex)
                spellingErrorFormat.setUnderlineColor(self.spellingErrorColor)
                spellingErrorFormat.setUnderlineStyle(
                    qApp.stlye().styleHint(QStyle.SH_SpellCheckUnderlineStyle))
                self.setFormat(startIndex, length, spellingErrorFormat)
            startIndex += length
            misspelledWord = self.dictionary.check(text, startIndex)
    def storeHeadingData(self, token, text):
        if token.type in [
                MTT.TokenAtxHeading1,
                MTT.TokenAtxHeading2,
                MTT.TokenAtxHeading3,
                MTT.TokenAtxHeading4,
                MTT.TokenAtxHeading5,
                MTT.TokenAtxHeading6]:
            level = token.type - MTT.TokenAtxHeading1 + 1
            s = token.position + token.openingMarkupLength
            l = (token.length
                 - token.openingMarkupLength
                 - token.closingMarkupLength)
            headingText = text[s:s+l].strip()
        elif token.type == MTT.TokenSetextHeading1Line1:
            level = 1
            headingText = text
        elif token.type == MTT.TokenSetextHeading2Line1:
            level = 2
            headingText = text
        else:
            qWarning("MarkdownHighlighter.storeHeadingData() encountered" +
                     " unexpected token: {}".format(token.getType()))
            return
        # FIXME: TypeError: could not convert 'TextBlockData' to 'QTextBlockUserData'
        # blockData = self.currentBlockUserData()
        # if blockData is None:
        #     blockData = TextBlockData(self.document(), self.currentBlock())
        #
        # self.setCurrentBlockUserData(blockData)
        self.headingFound.emit(level, headingText, self.currentBlock())
    def isHeadingBlockState(self, state):
        return state in [
            MS.MarkdownStateAtxHeading1,
            MS.MarkdownStateAtxHeading2,
            MS.MarkdownStateAtxHeading3,
            MS.MarkdownStateAtxHeading4,
            MS.MarkdownStateAtxHeading5,
            MS.MarkdownStateAtxHeading6,
            MS.MarkdownStateSetextHeading1Line1,
            MS.MarkdownStateSetextHeading2Line1,]
 def getLuminance(color):
    return (0.30 * color.redF()) + \
           (0.59 * color.greenF()) + \
           (0.11 * color.blueF())
 def applyAlphaToChannel(foreground, background, alpha):
    return (foreground * alpha) + (background * (1.0 - alpha))
 def applyAlpha(foreground, background, alpha):
    blendedColor = QColor(0, 0, 0)
    normalizedAlpha = alpha / 255.0
    blendedColor.setRed(applyAlphaToChannel(
        foreground.red(), background.red(), normalizedAlpha))
    blendedColor.setGreen(applyAlphaToChannel(
        foreground.green(), background.green(), normalizedAlpha))
    blendedColor.setBlue(applyAlphaToChannel(
        foreground.blue(), background.blue(), normalizedAlpha))
    return blendedColor
--- a/manuskript/ui/highlighters/markdownTokenizer.py
+++ b/manuskript/ui/highlighters/markdownTokenizer.py
@ -0,0 +1,887 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 import re
 from PyQt5.QtCore import *
 from PyQt5.QtGui import *
 from PyQt5.QtWidgets import *
 from noteflow.ui.views.markdownEnums import MarkdownState as MS
 from noteflow.ui.views.markdownEnums import MarkdownTokenType as MTT
 # This file is simply a python translation of GhostWriter's Tokenizer.
 # http://wereturtle.github.io/ghostwriter/
 # GPLV3+.
 # ==============================================================================
 #   TOKEN
 # ==============================================================================
 class Token:
    def __init__(self):
        self.type = -1
        self.position = 0
        self.length = 0
        self.openingMarkupLength = 0
        self.closingMarkupLength = 0
 # ==============================================================================
 #   HIGHLIGHT TOKENIZER
 # ==============================================================================
 class HighlightTokenizer:
    def __init__(self):
        self.tokens = []
    def tokenize(text, currentState, previousState, nextState):
        # Subclass me
        return 0
    def getTokens(self):
        self.tokens = sorted(self.tokens, key=lambda t: t.position)
        return self.tokens
    def getState(self):
        return self.state
    def backtrackRequested(self):
        return self.backtrack
    def clear(self):
        self.tokens = []
        self.backtrack = False
        self.state = -1
    def addToken(self, token):
        self.tokens.append(token)
        if token.type == -1:
            print("Error here", token.position, token.length)
    def setState(self, state):
        self.state = state
    def requestBacktrack(self):
        self.backtrack = True
    def tokenLessThan(self, t1, t2):
        return t1.getPosition() < t2.getPosition()
 class MarkdownTokenizer(HighlightTokenizer):
    DUMMY_CHAR = "$"
    MAX_MARKDOWN_HEADING_LEVEL = 6
    paragraphBreakRegex = QRegExp("^\\s*$")
    heading1SetextRegex = QRegExp("^===+\\s*$")
    heading2SetextRegex = QRegExp("^---+\\s*$")
    blockquoteRegex = QRegExp("^ {0,3}>.*$")
    githubCodeFenceStartRegex = QRegExp("^```+.*$")
    githubCodeFenceEndRegex = QRegExp("^```+\\s*$")
    pandocCodeFenceStartRegex = QRegExp("^~~~+.*$")
    pandocCodeFenceEndRegex = QRegExp("^~~~+\\s*$")
    numberedListRegex = QRegExp("^ {0,3}[0-9a-z]+[.)]\\s+.*$")
    numberedNestedListRegex = QRegExp("^\\s*[0-9a-z]+[.)]\\s+.*$")
    hruleRegex = QRegExp("\\s*(\\*\\s*){3,}|(\\s*(_\\s*){3,})|((\\s*(-\\s*){3,}))")
    lineBreakRegex = QRegExp(".*\\s{2,}$")
    emphasisRegex = QRegExp("(\\*(?![\\s*]).*[^\\s*]\\*)|_(?![\\s_]).*[^\\s_]_")
    emphasisRegex.setMinimal(True)
    strongRegex = QRegExp("\\*\\*(?=\\S).*\\S\\*\\*(?!\\*)|__(?=\\S).*\\S__(?!_)")
    strongRegex.setMinimal(True)
    strikethroughRegex = QRegExp("~~[^\\s]+.*[^\\s]+~~")
    strikethroughRegex.setMinimal(True)
    superScriptRegex = QRegExp("\^([^\\s]|(\\\\\\s))+\^")  # Spaces must be escaped "\ "
    superScriptRegex.setMinimal(True)
    subScriptRegex = QRegExp("~([^\\s]|(\\\\\\s))+~")  # Spaces must be escaped "\ "
    subScriptRegex.setMinimal(True)
    verbatimRegex = QRegExp("`+")
    htmlTagRegex = QRegExp("<[^<>]+>")
    htmlTagRegex.setMinimal(True)
    htmlEntityRegex = QRegExp("&[a-zA-Z]+;|&#x?[0-9]+;")
    automaticLinkRegex = QRegExp("(<[a-zA-Z]+\\:.+>)|(<.+@.+>)")
    automaticLinkRegex.setMinimal(True)
    inlineLinkRegex = QRegExp("\\[.+\\]\\(.+\\)")
    inlineLinkRegex.setMinimal(True)
    referenceLinkRegex = QRegExp("\\[(.+)\\]")
    referenceLinkRegex.setMinimal(True)
    referenceDefinitionRegex = QRegExp("^\\s*\\[.+\\]:")
    imageRegex = QRegExp("!\\[.*\\]\\(.+\\)")
    imageRegex.setMinimal(True)
    htmlInlineCommentRegex = QRegExp("<!--.*-->")
    htmlInlineCommentRegex.setMinimal(True)
    mentionRegex = QRegExp("\\B@\\w+(\\-\\w+)*(/\\w+(\\-\\w+)*)?")
    pipeTableDividerRegex = QRegExp("^ {0,3}(\\|[ :]?)?-{3,}([ :]?\\|[ :]?-{3,}([ :]?\\|)?)+\\s*$")
    def __init__(self):
        HighlightTokenizer.__init__(self)
    def tokenize(self, text, currentState, previousState, nextState):
        self.currentState = currentState
        self.previousState = previousState
        self.nextState = nextState
        if (self.previousState == MS.MarkdownStateInGithubCodeFence or \
            self.previousState == MS.MarkdownStateInPandocCodeFence) and \
                self.tokenizeCodeBlock(text):
            # No further tokenizing required
            pass
        elif self.previousState != MS.MarkdownStateComment \
            and self.paragraphBreakRegex.exactMatch(text):
            if previousState in [MS.MarkdownStateListLineBreak,
                                 MS.MarkdownStateNumberedList,
                                 MS.MarkdownStateBulletPointList]:
                self.setState(MS.MarkdownStateListLineBreak)
            elif previousState != MS.MarkdownStateCodeBlock or \
                (text[:1] != "\t" and text[-4:] != "    "):
                self.setState(MS.MarkdownStateParagraphBreak)
        elif self.tokenizeSetextHeadingLine2(text) or \
             self.tokenizeCodeBlock(text) or \
             self.tokenizeMultilineComment(text) or \
             self.tokenizeHorizontalRule(text) or \
             self.tokenizeTableDivider(text):
            # No further tokenizing required
            pass
        elif self.tokenizeSetextHeadingLine1(text) or \
             self.tokenizeAtxHeading(text) or \
             self.tokenizeBlockquote(text) or \
             self.tokenizeNumberedList(text) or \
             self.tokenizeBulletPointList(text):
            self.tokenizeLineBreak(text)
            self.tokenizeInline(text)
        else:
            if previousState in [MS.MarkdownStateListLineBreak,
                                 MS.MarkdownStateNumberedList,
                                 MS.MarkdownStateNumberedList]:
                if not self.tokenizeNumberedList(text) and \
                   not self.tokenizeBulletPointList(text) and \
                   (text[:1] == "\t" or text[:4] == "    "):
                    self.setState(previousState)
                else:
                    self.setState(MS.MarkdownStateParagraph)
            else:
                self.setState(MS.MarkdownStateParagraph)
            self.tokenizeLineBreak(text)
            self.tokenizeInline(text)
        # Make sure that if the second line of a setext heading is removed the
        # first line is reprocessed.  Otherwise, it will still show up in the
        # document as a heading.
        if (previousState == MS.MarkdownStateSetextHeading1Line1 and \
           self.getState() != MS.MarkdownStateSetextHeading1Line2) or \
           (previousState == MS.MarkdownStateSetextHeading2Line1 and \
           self.getState() != MS.MarkdownStateSetextHeading2Line2):
            self.requestBacktrack()
    def tokenizeSetextHeadingLine1(self, text):
        #Check the next line's state to see if this is a setext-style heading.
        level = 0
        token = Token()
        nextState = self.nextState
        if MS.MarkdownStateSetextHeading1Line2 == nextState:
            level = 1
            self.setState(MS.MarkdownStateSetextHeading1Line1)
            token.type = MTT.TokenSetextHeading1Line1
        elif MS.MarkdownStateSetextHeading2Line2 == nextState:
            level = 2
            self.setState(MS.MarkdownStateSetextHeading2Line1)
            token.type = MTT.TokenSetextHeading2Line1
        if level > 0:
            token.length = len(text)
            token.position = 0
            self.addToken(token)
            return True
        return False
    def tokenizeSetextHeadingLine2(self, text):
        level = 0
        setextMatch = False
        token = Token()
        previousState = self.previousState
        if previousState == MS.MarkdownStateSetextHeading1Line1:
            level = 1
            setextMatch = self.heading1SetextRegex.exactMatch(text)
            self.setState(MS.MarkdownStateSetextHeading1Line2)
            token.type = MTT.TokenSetextHeading1Line2
        elif previousState == MS.MarkdownStateSetextHeading2Line1:
            level = 2
            setextMatch = self.heading2SetextRegex.exactMatch(text)
            self.setState(MS.MarkdownStateSetextHeading2Line2)
            token.type = MTT.TokenSetextHeading2Line2
        elif previousState == MS.MarkdownStateParagraph:
            h1Line2 = self.heading1SetextRegex.exactMatch(text)
            h2Line2 = self.heading2SetextRegex.exactMatch(text)
            if h1Line2 or h2Line2:
                # Restart tokenizing on the previous line.
                self.requestBacktrack()
                token.length = len(text)
                token.position = 0
                if h1Line2:
                    self.setState(MS.MarkdownStateSetextHeading1Line2)
                    token.type = MTT.TokenSetextHeading1Line2
                else:
                    self.setState(MS.MarkdownStateSetextHeading2Line2)
                    token.type = MTT.TokenSetextHeading2Line2
                self.addToken(token)
                return True
        if level > 0:
            if setextMatch:
                token.length = len(text)
                token.position = 0
                self.addToken(token)
                return True
            else:
                # Restart tokenizing on the previous line.
                self.requestBacktrack()
                False
        return False
    def tokenizeAtxHeading(self, text):
        escapedText = self.dummyOutEscapeCharacters(text)
        trailingPoundCount = 0
        level = 0
        #Count the number of pound signs at the front of the string,
        #up to the maximum allowed, to determine the heading level.
        while escapedText[level] == "#":
            level += 1
            if level >= len(escapedText) or level >= self.MAX_MARKDOWN_HEADING_LEVEL:
                break
        if level > 0 and level < len(text):
            # Count how many pound signs are at the end of the text.
            while escapedText[-trailingPoundCount -1] == "#":
                trailingPoundCount += 1
            token = Token()
            token.position = 0
            token.length = len(text)
            token.type = MTT.TokenAtxHeading1 + level -1
            token.openingMarkupLength = level
            token.closingMarkupLength = trailingPoundCount
            self.addToken(token)
            self.setState(MS.MarkdownStateAtxHeading1 + level -1)
            return True
        return False
    def tokenizeNumberedList(self, text):
        previousState = self.previousState
        if (previousState in [MS.MarkdownStateParagraphBreak,
                             MS.MarkdownStateUnknown,
                             MS.MarkdownStateCodeBlock,
                             MS.MarkdownStateCodeFenceEnd,] and \
           self.numberedListRegex.exactMatch(text)) or \
           (previousState in [MS.MarkdownStateListLineBreak,
                             MS.MarkdownStateNumberedList,
                             MS.MarkdownStateBulletPointList,] and \
           self.numberedNestedListRegex.exactMatch(text)):
            periodIndex = text.find(".")
            parenthIndex = text.find(")")
            if periodIndex < 0:
                index = parenthIndex
            elif parenthIndex < 0:
                index = periodIndex
            elif parenthIndex > periodIndex:
                index = periodIndex
            else:
                index = parenthIndex
            if index > 0:
                token = Token()
                token.type = MTT.TokenNumberedList
                token.position = 0
                token.length = len(text)
                token.openingMarkupLength = index + 2
                self.addToken(token)
                self.setState(MS.MarkdownStateNumberedList)
                return True
            return False
        return False
    def tokenizeBulletPointList(self, text):
        foundBulletChar = False
        bulletCharIndex = -1
        spaceCount = 0
        whitespaceFoundAfterBulletChar = False
        previousState = self.previousState
        if previousState not in [MS.MarkdownStateUnknown,
                                 MS.MarkdownStateParagraphBreak,
                                 MS.MarkdownStateListLineBreak,
                                 MS.MarkdownStateNumberedList,
                                 MS.MarkdownStateBulletPointList,
                                 MS.MarkdownStateCodeBlock,
                                 MS.MarkdownStateCodeFenceEnd]:
            return False
        # Search for the bullet point character, which can
        # be either a '+', '-', or '*'.
        for i in range(len(text)):
            if text[i] == " ":
                if foundBulletChar:
                    # We've confirmed it's a bullet point by the whitespace that
                    # follows the bullet point character, and can now exit the
                    # loop.
                    whitespaceFoundAfterBulletChar = True
                    break
                else:
                    spaceCount += 1
                    # If this list item is the first in the list, ensure the
                    # number of spaces preceeding the bullet point does not
                    # exceed three, as that would indicate a code block rather
                    # than a bullet point list.
                    if spaceCount > 3 and previousState not in [
                        MS.MarkdownStateNumberedList,
                        MS.MarkdownStateBulletPointList,
                        MS.MarkdownStateListLineBreak,] and \
                       previousState in [
                        MS.MarkdownStateParagraphBreak,
                        MS.MarkdownStateUnknown,
                        MS.MarkdownStateCodeBlock,
                        MS.MarkdownStateCodeFenceEnd,]:
                        return False
            elif text[i] == "\t":
                if foundBulletChar:
                    # We've confirmed it's a bullet point by the whitespace that
                    # follows the bullet point character, and can now exit the
                    # loop.
                    whitespaceFoundAfterBulletChar = True
                    break
                elif previousState in [
                    MS.MarkdownStateParagraphBreak,
                    MS.MarkdownStateUnknown]:
                    # If this list item is the first in the list, ensure that
                    # no tab character preceedes the bullet point, as that would
                    # indicate a code block rather than a bullet point list.
                    return False
            elif text[i] in ["+", "-", "*"]:
                foundBulletChar = True
                bulletCharIndex = i
            else:
                return False
        if bulletCharIndex >= 0 and whitespaceFoundAfterBulletChar:
            token = Token()
            token.type = MTT.TokenBulletPointList
            token.position = 0
            token.length = len(text)
            token.openingMarkupLength = bulletCharIndex + 2
            self.addToken(token)
            self.setState(MS.MarkdownStateBulletPointList)
            return True
        return False
    def tokenizeHorizontalRule (self, text):
        if self.hruleRegex.exactMatch(text):
            token = Token()
            token.type = MTT.TokenHorizontalRule
            token.position = 0
            token.length = len(text)
            self.addToken(token)
            self.setState(MS.MarkdownStateHorizontalRule)
            return True
        return False
    def tokenizeLineBreak(self, text):
        currentState = self.currentState
        previousState = self.previousState
        nextState = self.nextState
        if currentState in [
            MS.MarkdownStateParagraph,
            MS.MarkdownStateBlockquote,
            MS.MarkdownStateNumberedList,
            MS.MarkdownStateBulletPointList,]:
            if previousState in [
                MS.MarkdownStateParagraph,
                MS.MarkdownStateBlockquote,
                MS.MarkdownStateNumberedList,
                MS.MarkdownStateBulletPointList,]:
                self.requestBacktrack()
            if nextState in [
                MS.MarkdownStateParagraph,
                MS.MarkdownStateBlockquote,
                MS.MarkdownStateNumberedList,
                MS.MarkdownStateBulletPointList,]:
                self.requestBacktrack()
                if self.lineBreakRegex.exactMatch(text):
                    token = Token()
                    token.type = MTT.TokenLineBreak
                    token.position = len(text) - 1
                    token.length = 1
                    self.addToken(token)
                    return True
        return False
    def tokenizeBlockquote(self, text):
        previousState = self.previousState
        if previousState == MS.MarkdownStateBlockquote or \
           self.blockquoteRegex.exactMatch(text):
            # Find any '>' characters at the front of the line.
            markupLength = 0
            for i in range(len(text)):
                if text[i] == ">":
                    markupLength = i + 1
                elif text[i] != " ":
                    # There are no more '>' characters at the front of the line,
                    # so stop processing.
                    break
            token = Token()
            token.type = MTT.TokenBlockquote
            token.position = 0
            token.length = len(text)
            if markupLength > 0:
                token.openingMarkupLength = markupLength
            self.addToken(token)
            self.setState(MS.MarkdownStateBlockquote)
            return True
        return False
    def tokenizeCodeBlock(self, text):
        previousState = self.previousState
        if previousState in [
                MS.MarkdownStateInGithubCodeFence,
                MS.MarkdownStateInPandocCodeFence]:
            self.setState(previousState)
            if (previousState == MS.MarkdownStateInGithubCodeFence and \
               self.githubCodeFenceEndRegex.exactMatch(text)) or \
               (previousState == MS.MarkdownStateInPandocCodeFence and \
               self.pandocCodeFenceEndRegex.exactMatch(text)):
                token = Token()
                token.type = MTT.TokenCodeFenceEnd
                token.position = 0
                token.length = len(text)
                self.addToken(token)
                self.setState(MS.MarkdownStateCodeFenceEnd)
            else:
                token = Token()
                token.type = MTT.TokenCodeBlock
                token.position = 0
                token.length = len(text)
                self.addToken(token)
            return True
        elif previousState in [
                MS.MarkdownStateCodeBlock,
                MS.MarkdownStateParagraphBreak,
                MS.MarkdownStateUnknown,] and \
             (text[:1] == "\t" or text[:4] == "    "):
            token = Token()
            token.type = MTT.TokenCodeBlock
            token.position = 0
            token.length = len(text)
            token.openingMarkupLength = len(text) - len(text.lstrip())
            self.addToken(token)
            self.setState(MS.MarkdownStateCodeBlock)
            return True
        elif previousState in [
                MS.MarkdownStateParagraphBreak,
                MS.MarkdownStateParagraph,
                MS.MarkdownStateUnknown,
                MS.MarkdownStateListLineBreak,]:
            foundCodeFenceStart = False
            token = Token()
            if self.githubCodeFenceStartRegex.exactMatch(text):
                foundCodeFenceStart = True
                token.type = MTT.TokenGithubCodeFence
                self.setState(MS.MarkdownStateInGithubCodeFence)
            elif self.pandocCodeFenceStartRegex.exactMatch(text):
                foundCodeFenceStart = True
                token.type = MTT.TokenPandocCodeFence
                self.setState(MS.MarkdownStateInPandocCodeFence)
            if foundCodeFenceStart:
                token.position = 0
                token.length = len(text)
                self.addToken(token)
                return True
        return False
    def tokenizeMultilineComment(self, text):
        previousState = self.previousState
        if previousState == MS.MarkdownStateComment:
            # Find the end of the comment, if any.
            index = text.find("-->")
            token = Token()
            token.type = MTT.TokenHtmlComment
            token.position = 0
            if index >= 0:
                token.length = index + 3
                self.addToken(token)
                # Return false so that the rest of the line that isn't within
                # the commented segment can be highlighted as normal paragraph
                # text.
            else:
                token.length = len(text)
                self.addToken(token)
                self.setState(MS.MarkdownStateComment)
                return True
        return False
    def tokenizeInline(self, text):
        escapedText = self.dummyOutEscapeCharacters(text)
        # Check if the line is a reference definition.
        if self.referenceDefinitionRegex.exactMatch(text):
            colonIndex = escapedText.find(":")
            token = Token()
            token.type = MTT.TokenReferenceDefinition
            token.position = 0
            token.length = colonIndex + 1
            self.addToken(token)
            # Replace the first bracket so that the '[...]:' reference definition
            # start doesn't get highlighted as a reference link.
            firstBracketIndex = escapedText.find("[")
            if firstBracketIndex >= 0:
                i = firstBracketIndex
                escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:]
        escapedText = self.tokenizeVerbatim(escapedText)
        escapedText = self.tokenizeHtmlComments(escapedText)
        escapedText = self.tokenizeTableHeaderRow(escapedText)
        escapedText = self.tokenizeTableRow(escapedText)
        escapedText = self.tokenizeMatches(MTT.TokenImage, escapedText, self.imageRegex, 0, 0, False, True)
        escapedText = self.tokenizeMatches(MTT.TokenInlineLink, escapedText, self.inlineLinkRegex, 0, 0, False, True)
        escapedText = self.tokenizeMatches(MTT.TokenReferenceLink, escapedText, self.referenceLinkRegex, 0, 0, False, True)
        escapedText = self.tokenizeMatches(MTT.TokenHtmlEntity, escapedText, self.htmlEntityRegex)
        escapedText = self.tokenizeMatches(MTT.TokenAutomaticLink, escapedText, self.automaticLinkRegex, 0, 0, False, True)
        escapedText = self.tokenizeMatches(MTT.TokenStrikethrough, escapedText, self.strikethroughRegex, 2, 2, True)
        escapedText = self.tokenizeMatches(MTT.TokenStrong, escapedText, self.strongRegex, 2, 2, True)
        escapedText = self.tokenizeMatches(MTT.TokenEmphasis, escapedText, self.emphasisRegex, 1, 1, True)
        escapedText = self.tokenizeMatches(MTT.TokenSuperScript, escapedText, self.superScriptRegex, 1, 1, True)
        escapedText = self.tokenizeMatches(MTT.TokenSubScript, escapedText, self.subScriptRegex, 1, 1, True)
        escapedText = self.tokenizeMatches(MTT.TokenHtmlTag, escapedText, self.htmlTagRegex)
        escapedText = self.tokenizeMatches(MTT.TokenMention, escapedText, self.mentionRegex, 0, 0, False, True)
        return True
    def tokenizeVerbatim(self, text):
        index = self.verbatimRegex.indexIn(text)
        while index >= 0:
            end = ""
            count = self.verbatimRegex.matchedLength()
            # Search for the matching end, which should have the same number
            # of back ticks as the start.
            for i in range(count):
                end += '`'
            endIndex = text.find(end, index + count)
            # If the end was found, add the verbatim token.
            if endIndex >= 0:
                token = Token()
                token.type = MTT.TokenVerbatim
                token.position = index
                token.length = endIndex + count - index
                token.openingMarkupLength = count
                token.closingMarkupLength = count
                self.addToken(token)
                # Fill out the token match in the string with the dummy
                # character so that searches for other Markdown elements
                # don't find anything within this token's range in the string.
                for i in range(index, index + token.length):
                    text = text[:i] + self.DUMMY_CHAR + text[i+1:]
                index += token.length
            # Else start searching again at the very next character.
            else:
                index += 1
            index = self.verbatimRegex.indexIn(text, index)
        return text
    def tokenizeHtmlComments(self, text):
        previousState = self.previousState
        # Check for the end of a multiline comment so that it doesn't get further
        # tokenized. Don't bother formatting the comment itself, however, because
        # it should have already been tokenized in tokenizeMultilineComment().
        if previousState == MS.MarkdownStateComment:
            commentEnd = text.find("-->")
            for i in range(commentEnd + 3):
                text = text[:i] + self.DUMMY_CHAR + text[i+1:]
        # Now check for inline comments (non-multiline).
        commentStart = self.htmlInlineCommentRegex.indexIn(text)
        while commentStart >= 0:
            commentLength = self.htmlInlineCommentRegex.matchedLength()
            token = Token()
            token.type = MTT.TokenHtmlComment
            token.position = commentStart
            token.length = commentLength
            self.addToken(token)
            # Replace comment segment with dummy characters so that it doesn't
            # get tokenized again.
            for i in range(commentStart, commentStart + commentLength):
                text = text[:i] + self.DUMMY_CHAR + text[i+1:]
            commentStart = self.htmlInlineCommentRegex.indexIn(text, commentStart + commentLength)
        # Find multiline comment start, if any.
        commentStart = text.find("<!--")
        if commentStart >= 0:
            token = Token()
            token.type = MTT.TokenHtmlComment
            token.position = commentStart
            token.length = len(text) - commentStart
            self.addToken(token)
            self.setState(MS.MarkdownStateComment)
            # Replace comment segment with dummy characters so that it doesn't
            # get tokenized again.
            for i in range(commentStart, len(text)):
                text = text[:i] + self.DUMMY_CHAR + text[i+1:]
        return text
    def tokenizeTableHeaderRow(self, text):
        previousState = self.previousState
        nextState = self.nextState
        if previousState in [
            MS.MarkdownStateParagraphBreak,
            MS.MarkdownStateListLineBreak,
            MS.MarkdownStateSetextHeading1Line2,
            MS.MarkdownStateSetextHeading2Line2,
            MS.MarkdownStateAtxHeading1,
            MS.MarkdownStateAtxHeading2,
            MS.MarkdownStateAtxHeading3,
            MS.MarkdownStateAtxHeading4,
            MS.MarkdownStateAtxHeading5,
            MS.MarkdownStateAtxHeading6,
            MS.MarkdownStateHorizontalRule,
            MS.MarkdownStateCodeFenceEnd,
            MS.MarkdownStateUnknown,] and \
           self.getState() in [
            MS.MarkdownStateParagraph,
            MS.MarkdownStateUnknown] and \
           nextState == MS.MarkdownStatePipeTableDivider:
            self.setState(MS.MarkdownStatePipeTableHeader)
            headerStart = 0
            for i in range(len(text)):
                if text[i] == "|":
                    # Replace pipe with space so that it doesn't get formatted
                    # again with, for example, strong or emphasis formatting.
                    # Note that we use a space rather than DUMMY_CHAR for this,
                    # to prevent formatting such as strong and emphasis from
                    # picking it up.
                    text = text[:i] + " " + text[i+1:]
                    token = Token()
                    if i > 0:
                        token.type = MTT.TokenTableHeader
                        token.position = headerStart
                        token.length = i - headerStart
                        self.addToken(token)
                    token.type = MTT.TokenTablePipe
                    token.position = i
                    token.length = 1
                    self.addToken(token)
                    headerStart = i + 1
            if headerStart < len(text):
                token = Token()
                token.type = MTT.TokenTableHeader
                token.position = headerStart
                token.length = len(text) - headerStart
                self.addToken(token)
        return text
    def tokenizeTableDivider(self, text):
        previousState = self.previousState
        if previousState == MS.MarkdownStatePipeTableHeader:
            if self.pipeTableDividerRegex.exactMatch(text):
                self.setState(MS.MarkdownStatePipeTableDivider)
                token = Token()
                token.type = MTT.TokenTableDivider
                token.length = len(text)
                token.position = 0
                self.addToken(token)
                return True
            else:
                # Restart tokenizing on the previous line.
                self.requestBacktrack()
        elif previousState == MS.MarkdownStateParagraph:
            if self.pipeTableDividerRegex.exactMatch(text):
                # Restart tokenizing on the previous line.
                self.requestBacktrack()
                self.setState(MS.MarkdownStatePipeTableDivider)
                token = Token()
                token.length = len(text)
                token.position = 0
                token.type = MTT.TokenTableDivider
                self.addToken(token)
                return True
        return False
    def tokenizeTableRow(self, text):
        previousState = self.previousState
        if previousState in [
            MS.MarkdownStatePipeTableDivider,
            MS.MarkdownStatePipeTableRow]:
            self.setState(MS.MarkdownStatePipeTableRow)
            for i in range(len(text)):
                if text[i] == "|":
                    # Replace pipe with space so that it doesn't get formatted
                    # again with, for example, strong or emphasis formatting.
                    # Note that we use a space rather than DUMMY_CHAR for this,
                    # to prevent formatting such as strong and emphasis from
                    # picking it up.
                    text = text[:i] + " " + text[i+1:]
                    token = Token()
                    token.type = MTT.TokenTablePipe
                    token.position = i
                    token.length = 1
                    self.addToken(token)
        return text
    def tokenizeMatches(self, tokenType, text, regex,
                        markupStartCount=0, markupEndCount=0,
                        replaceMarkupChars=False, replaceAllChars=False):
        """
        Tokenizes a block of text, searching for all occurrances of regex.
        Occurrances are set to the given token type and added to the list of
        tokens.  The markupStartCount and markupEndCount values are used to
        indicate how many markup special characters preceed and follow the
        main text, respectively.
        For example, if the matched string is "**bold**", and
        markupStartCount = 2 and markupEndCount = 2, then the asterisks
        preceeding and following the word "bold" will be set as opening and
        closing markup in the token.
        If replaceMarkupChars is true, then the markupStartCount and
        markupEndCount characters will be replaced with a dummy character in
        the text QString so that subsequent parsings of the same line do not
        pick up the original characters.
        If replaceAllChars is true instead, then the entire matched text will
        be replaced with dummy characters--again, for ease in parsing the
        same line for other regular expression matches.
        """
        index = regex.indexIn(text)
        while index >= 0:
            length = regex.matchedLength()
            token = Token()
            token.type = tokenType
            token.position = index
            token.length = length
            if markupStartCount > 0:
                token.openingMarkupLength = markupStartCount
            if markupEndCount > 0:
                token.closingMarkupLength = markupEndCount
            if replaceAllChars:
                for i in range(index, index + length):
                    text = text[:i] + self.DUMMY_CHAR + text[i+1:]
            elif replaceMarkupChars:
                for i in range(index, index + markupStartCount):
                    text = text[:i] + self.DUMMY_CHAR + text[i+1:]
                for i in range(index + length - markupEndCount, index + length):
                    text = text[:i] + self.DUMMY_CHAR + text[i+1:]
            self.addToken(token)
            index = regex.indexIn(text, index + length)
        return text
    def dummyOutEscapeCharacters(self, text):
        """
        Replaces escaped characters in text so they aren't picked up
        during parsing.  Returns a copy of the input text string
        with the escaped characters replaced with a dummy character.
        """
        return re.sub("\\\\.", "\$", text)
        #escape = False
        #escapedText = text
        #for i in range(len(text)):
            #if escape:
                #escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:]
                #escape = False
            #elif text[i] == "\\":
                #escape = True
        #return escapedText
--- a/manuskript/ui/views/textEditView.py
+++ b/manuskript/ui/views/textEditView.py
@ -11,8 +11,7 @@ from manuskript.enums import Outline
 from manuskript import functions as F
 from manuskript.models.outlineModel import outlineModel
 from manuskript.ui.editors.MDFunctions import MDFormatSelection
-from manuskript.ui.editors.MMDHighlighter import MMDHighlighter
+from manuskript.ui.highlighters import MMDHighlighter, BasicHighlighter
 from manuskript.ui.editors.basicHighlighter import basicHighlighter
 from manuskript.ui.editors.textFormat import textFormat
 from manuskript.ui import style as S
@ -84,7 +83,7 @@ class textEditView(QTextEdit):
            self.spellcheck = False
        if self._highlighting and not self.highlighter:
-            self.highlighter = basicHighlighter(self)
+            self.highlighter = BasicHighlighter(self)
            self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat)
    def getDefaultLocale(self):
@ -191,7 +190,7 @@ class textEditView(QTextEdit):
            if self._column in [Outline.text.value, Outline.notes.value]:
                self.highlighter = MMDHighlighter(self)
            else:
-                self.highlighter = basicHighlighter(self)
+                self.highlighter = BasicHighlighter(self)
            self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat)