Checkpoint: adding new markdown highlighter

2024-05-17 11:22:28 +12:00 · 2017-11-22 13:43:40 +01:00 · 2017-11-22 13:43:40 +01:00 · 9be2edeee7
parent 550f889251
commit 9be2edeee7
8 changed files with 1726 additions and 18 deletions
--- a/manuskript/exporter/manuskript/markdown.py
+++ b/manuskript/exporter/manuskript/markdown.py
@ -5,7 +5,7 @@ from PyQt5.QtWidgets import QPlainTextEdit, QGroupBox, qApp, QVBoxLayout, QCheck

 from manuskript.exporter.manuskript.plainText import plainText
 from manuskript.functions import mainWindow
-from manuskript.ui.editors.MMDHighlighter import MMDHighlighter
+from manuskript.ui.highlighters import MMDHighlighter
 from manuskript.ui.exporters.manuskript.plainTextSettings import exporterSettings


@ -72,4 +72,4 @@ class markdownSettings(exporterSettings):
        self.settings = exporterSettings.getSettings(self)
        self.settings["Preview"]["MarkdownHighlighter"] = self.chkMarkdownHighlighter.isChecked()

-        return self.settings
+        return self.settings
--- a/manuskript/ui/highlighters/MMDHighlighter.py
+++ b/manuskript/ui/highlighters/MMDHighlighter.py
@ -5,10 +5,10 @@ import re
 from PyQt5.QtCore import Qt
 from PyQt5.QtGui import QTextCharFormat, QFont, QTextCursor, QFontMetrics

-from manuskript.ui.editors.basicHighlighter import basicHighlighter
+from manuskript.ui.highlighters import BasicHighlighter


-class MMDHighlighter(basicHighlighter):
+class MMDHighlighter(BasicHighlighter):

    MARKDOWN_REGEX = {
        'Bold':             '(\*\*)(.+?)(\*\*)',
@ -27,7 +27,7 @@ class MMDHighlighter(basicHighlighter):
    }

    def __init__(self, editor, style="Default"):
-        basicHighlighter.__init__(self, editor)
+        BasicHighlighter.__init__(self, editor)

        self.editor = editor

@ -36,11 +36,11 @@ class MMDHighlighter(basicHighlighter):
            self.rules[key] = re.compile(self.MARKDOWN_REGEX[key])

    def highlightBlock(self, text):
-        basicHighlighter.highlightBlockBefore(self, text)
+        BasicHighlighter.highlightBlockBefore(self, text)

        self.doHighlightBlock(text)

-        basicHighlighter.highlightBlockAfter(self, text)
+        BasicHighlighter.highlightBlockAfter(self, text)

    def doHighlightBlock(self, text):
        """
--- a/manuskript/ui/highlighters/init.py
+++ b/manuskript/ui/highlighters/init.py
@ -0,0 +1,6 @@
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+
+from manuskript.ui.highlighters.basicHighlighter import BasicHighlighter
+from manuskript.ui.highlighters.MMDHighlighter import MMDHighlighter
+from manuskript.ui.highlighters.markdownHighlighter import MarkdownHighlighter
--- a/manuskript/ui/highlighters/basicHighlighter.py
+++ b/manuskript/ui/highlighters/basicHighlighter.py
@ -4,12 +4,13 @@
 import re

 from PyQt5.QtCore import Qt
-from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter, QTextBlockFormat, QTextCharFormat
+from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter
+from PyQt5.QtGui import QTextBlockFormat, QTextCharFormat

 import manuskript.models.references as Ref


-class basicHighlighter(QSyntaxHighlighter):
+class BasicHighlighter(QSyntaxHighlighter):
    def __init__(self, editor):
        QSyntaxHighlighter.__init__(self, editor.document())

@ -38,7 +39,7 @@ class basicHighlighter(QSyntaxHighlighter):
    def highlightBlockBefore(self, text):
        """Highlighting to do before anything else.

-        When subclassing basicHighlighter, you must call highlightBlockBefore
+        When subclassing BasicHighlighter, you must call highlightBlockBefore
        before you do any custom highlighting.
        """

@ -56,7 +57,7 @@ class basicHighlighter(QSyntaxHighlighter):
    def highlightBlockAfter(self, text):
        """Highlighting to do after everything else.

-        When subclassing basicHighlighter, you must call highlightBlockAfter
+        When subclassing BasicHighlighter, you must call highlightBlockAfter
        after your custom highlighting.
        """

@ -91,13 +92,16 @@ class basicHighlighter(QSyntaxHighlighter):
            textedText = text + " "

        # Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/
-        WORDS = '(?iu)([\w\']+)[^\'\w]'  # (?iu) means case insensitive and unicode
+        WORDS = r'(?iu)([\w\']+)[^\'\w]'
+        #        (?iu) means case insensitive and unicode
        if hasattr(self.editor, "spellcheck") and self.editor.spellcheck:
            for word_object in re.finditer(WORDS, textedText):
-                if self.editor._dict and not self.editor._dict.check(word_object.group(1)):
+                if (self.editor._dict
+                        and not self.editor._dict.check(word_object.group(1))):
                    format = self.format(word_object.start(1))
                    format.setUnderlineColor(self._misspelledColor)
                    # SpellCheckUnderline fails with some fonts
                    format.setUnderlineStyle(QTextCharFormat.WaveUnderline)
                    self.setFormat(word_object.start(1),
-                                   word_object.end(1) - word_object.start(1), format)
+                                   word_object.end(1) - word_object.start(1),
+                                   format)
--- a/manuskript/ui/highlighters/markdownEnums.py
+++ b/manuskript/ui/highlighters/markdownEnums.py
@ -0,0 +1,94 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+        
+#==============================================================================
+#   MARKDOWN STATES
+#==============================================================================
+
+class MarkdownState:
+    MarkdownStateUnknown = -1
+    MarkdownStateParagraphBreak = 0
+    MarkdownStateListLineBreak = 1
+    MarkdownStateParagraph = 2
+    MarkdownStateAtxHeading1 = 3
+    MarkdownStateAtxHeading2 = 4
+    MarkdownStateAtxHeading3 = 5
+    MarkdownStateAtxHeading4 = 6
+    MarkdownStateAtxHeading5 = 7
+    MarkdownStateAtxHeading6 = 8
+    MarkdownStateBlockquote = 9
+    MarkdownStateCodeBlock = 10
+    MarkdownStateInGithubCodeFence = 11
+    MarkdownStateInPandocCodeFence = 12
+    MarkdownStateCodeFenceEnd = 13
+    MarkdownStateComment = 14
+    MarkdownStateHorizontalRule = 15
+    MarkdownStateNumberedList = 16
+    MarkdownStateBulletPointList = 17
+    MarkdownStateSetextHeading1Line1 = 18
+    MarkdownStateSetextHeading1Line2 = 19
+    MarkdownStateSetextHeading2Line1 = 20
+    MarkdownStateSetextHeading2Line2 = 21
+    MarkdownStatePipeTableHeader = 22
+    MarkdownStatePipeTableDivider = 23
+    MarkdownStatePipeTableRow = 24
+
+#==============================================================================
+#   MARKDOWN TOKEN TYPE
+#==============================================================================
+        
+class MarkdownTokenType:
+    TokenUnknown  = -1
+    
+    # Titles
+    TokenAtxHeading1 = 0
+    TokenAtxHeading2 = 1
+    TokenAtxHeading3 = 2
+    TokenAtxHeading4 = 3
+    TokenAtxHeading5 = 4
+    TokenAtxHeading6 = 5
+    TokenSetextHeading1Line1 = 6
+    TokenSetextHeading1Line2 = 7
+    TokenSetextHeading2Line1 = 8
+    TokenSetextHeading2Line2 = 9
+    
+    TokenEmphasis = 10
+    TokenStrong = 11
+    TokenStrikethrough = 12
+    TokenVerbatim = 13
+    TokenHtmlTag = 14
+    TokenHtmlEntity = 15
+    TokenAutomaticLink = 16
+    TokenInlineLink = 17
+    TokenReferenceLink = 18
+    TokenReferenceDefinition = 19
+    TokenImage = 20
+    TokenHtmlComment = 21
+    TokenNumberedList = 22
+    TokenBulletPointList = 23
+    TokenHorizontalRule = 24
+    TokenLineBreak = 25
+    TokenBlockquote = 26
+    TokenCodeBlock = 27
+    TokenGithubCodeFence = 28
+    TokenPandocCodeFence = 29
+    TokenCodeFenceEnd = 30
+    TokenMention = 31
+    TokenTableHeader = 32
+    TokenTableDivider = 33
+    TokenTablePipe = 34
+    TokenSuperScript = 35
+    TokenSubScript = 36
+    TokenLast = 37
+    
+    TITLES =  [TokenAtxHeading1, TokenAtxHeading2, TokenAtxHeading3,
+               TokenAtxHeading4, TokenAtxHeading5, TokenAtxHeading6,
+               TokenSetextHeading1Line1, TokenSetextHeading1Line2,
+               TokenSetextHeading2Line1, TokenSetextHeading2Line2]
+    
+    
+    
+class BlockquoteStyle:
+    BlockquoteStylePlain = 0
+    BlockquoteStyleItalic = 1
+    BlockquoteStyleFancy = 2
--- a/manuskript/ui/highlighters/markdownHighlighter.py
+++ b/manuskript/ui/highlighters/markdownHighlighter.py
@ -0,0 +1,718 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+A QSyntaxHighlighter for markdown, using tokenizer. More accurate than simple
+regexp, but not yet perfect.
+"""
+
+import re
+from PyQt5.QtCore import Qt, pyqtSignal, qWarning, QRegExp
+from PyQt5.QtGui import (QSyntaxHighlighter, QTextBlock, QColor, QFont,
+                         QTextCharFormat, QBrush, QPalette)
+from PyQt5.QtWidgets import qApp, QStyle
+
+from manuskript.ui.highlighters.markdownTokenizer import MarkdownTokenizer
+from manuskript.ui.highlighters.markdownEnums import MarkdownState as MS
+from manuskript.ui.highlighters.markdownEnums import MarkdownTokenType as MTT
+from manuskript.ui.highlighters.markdownEnums import BlockquoteStyle as BS
+
+# Un longue ligne. Un longue ligne. Un longue ligne. Un longue ligne.asdasdasda
+
+GW_FADE_ALPHA = 140
+
+# Highlighter based on GhostWriter (http://wereturtle.github.io/ghostwriter/).
+# GPLV3+.
+
+#FIXME: Setext heading don't work anymore
+
+class MarkdownHighlighter(QSyntaxHighlighter):
+
+    highlightBlockAtPosition = pyqtSignal(int)
+    headingFound = pyqtSignal(int, str, QTextBlock)
+    headingRemoved = pyqtSignal(int)
+
+    def __init__(self, editor):
+        QSyntaxHighlighter.__init__(self, editor.document())
+
+        #default values
+        self.editor = editor
+        self.tokenizer = MarkdownTokenizer()
+
+        self.spellCheckEnabled = False
+        #self.typingPaused = True
+        self.inBlockquote = False
+        self.defaultTextColor = QColor(Qt.black)
+        self.backgroundColor = QColor(Qt.white)
+        self.markupColor = QColor(Qt.black)
+        self.linkColor = QColor(Qt.blue)
+        self.spellingErrorColor = QColor(Qt.red)
+        self.blockquoteStyle = BS.BlockquoteStyleFancy
+
+        # Settings
+        self.useUndlerlineForEmphasis = False
+        self.highlightLineBreaks = True
+
+        self.highlightBlockAtPosition.connect(self.onHighlightBlockAtPosition,
+                                              Qt.QueuedConnection)
+
+        # font = QFont("Monospace", 12, QFont.Normal, False)
+        font = self.document().defaultFont()
+        font.setStyleStrategy(QFont.PreferAntialias)
+        self.defaultFormat = QTextCharFormat()
+        self.defaultFormat.setFont(font)
+        self.defaultFormat.setForeground(QBrush(self.defaultTextColor))
+
+        self.theme = self.defaultTheme()
+        self.setupHeadingFontSize(True)
+
+        self.highlightedWords = []
+        self.highlightedTags = []
+        self.searchExpression = ""
+        self.searchExpressionRegExp = False
+        self.searchExpressionCase = False
+
+        self.customRules = [
+            ("(°).*?(°)", {"background": Qt.yellow,
+                           "markupColor":Qt.lightGray}),
+            ]
+
+        #f = self.document().defaultFont()
+        #f.setFamily("monospace")
+        #self.document().setDefaultFont(f)
+
+    def highlightBlock(self, text):
+        """
+        Note:  Never set the QTextBlockFormat for a QTextBlock from within
+        the highlighter. Depending on how the block format is modified,
+        a recursive call to the highlighter may be triggered, which will
+        cause the application to crash.
+
+        Likewise, don't try to set the QTextBlockFormat outside the highlighter
+        (i.e., from within the text editor).  While the application will not
+        crash, the format change will be added to the undo stack.  Attempting
+        to undo from that point on will cause the undo stack to be virtually
+        frozen, since undoing the format operation causes the text to be
+        considered changed, thus triggering the slot that changes the text
+        formatting to be triggered yet again.
+        """
+
+        if self.currentBlock().blockNumber() == 0:
+            # This is the title
+            bf = QTextCharFormat()
+            bf.setFontPointSize(self.editor.font().pointSize() * 2)
+            bf.setFontWeight(QFont.Bold)
+            bf.setForeground(Qt.lightGray)
+            self.setFormat(0, len(text), bf)
+            return
+
+        lastState = self.currentBlockState()
+        self.setFormat(0, len(text), self.defaultFormat)
+
+        if self.tokenizer != None:
+            self.tokenizer.clear()
+            block = self.currentBlock()
+            nextState = MS.MarkdownStateUnknown
+            previousState = self.previousBlockState()
+
+            if block.next().isValid():
+                nextState = block.next().userState()
+
+            self.tokenizer.tokenize(text, lastState, previousState, nextState)
+            self.setCurrentBlockState(self.tokenizer.getState())
+
+            self.inBlockquote = self.tokenizer.getState() == MS.MarkdownStateBlockquote
+
+            # STATE FORMATTING
+            # FIXME: generic
+            if self.currentBlockState() in [
+                    MS.MarkdownStatePipeTableHeader,
+                    MS.MarkdownStatePipeTableDivider,
+                    MS.MarkdownStatePipeTableRow]:
+                fmt = QTextCharFormat()
+                f = fmt.font()
+                f.setFamily("Monospace")
+                fmt.setFont(f)
+                self.setFormat(0, len(text), fmt)
+
+            # Monospace the blank chars
+            i = 0
+            while i <= len(text)-1 and text[i] in [" ", "\t"]:
+                fmt = self.format(i)
+                fmt.setFontFamily("Monospace")
+                self.setFormat(i, 1, fmt)
+                i += 1
+
+            #if self.currentBlockState() == MS.MarkdownStateBlockquote:
+                #fmt = QTextCharFormat(self.defaultFormat)
+                #fmt.setForeground(Qt.lightGray)
+                #self.setFormat(0, len(text), fmt)
+
+            tokens = self.tokenizer.getTokens()
+
+            for token in tokens:
+                if token.type == MTT.TokenUnknown:
+                    qWarning("Highlighter found unknown token type in text block.")
+                    continue
+
+                if token.type in [
+                        MTT.TokenAtxHeading1,
+                        MTT.TokenAtxHeading2,
+                        MTT.TokenAtxHeading3,
+                        MTT.TokenAtxHeading4,
+                        MTT.TokenAtxHeading5,
+                        MTT.TokenAtxHeading6,
+                        MTT.TokenSetextHeading1Line1,
+                        MTT.TokenSetextHeading2Line1,
+                    ]:
+                    self.storeHeadingData(token, text)
+
+                self.applyFormattingForToken(token, text)
+
+            if self.tokenizer.backtrackRequested():
+                previous = self.currentBlock().previous()
+                self.highlightBlockAtPosition.emit(previous.position())
+
+        if self.spellCheckEnabled:
+            self.spellCheck(text)
+
+        # HASHTAGS AND HIGHLIGHTS
+
+        # Hashtags
+        s = 0
+        ht = QRegExp(r'([^#])(#[\w]+)')
+        while ht.indexIn(text, s) >= 0:
+            f = self.format(ht.pos()+1)
+            f.setForeground(QColor("#07c"))
+            f.setFontWeight(QFont.Bold)
+            self.setFormat(ht.pos()+1, ht.matchedLength()-1, f)
+            s = ht.pos() + 1
+
+        # Highlighted
+        for w in self.highlightedWords + self.highlightedTags:
+            pos = text.lower().find(w.lower())
+            while pos >= 0:
+                for i in range(pos, pos + len(w)):
+                    f = self.format(i)
+                    f.setBackground(QBrush(QColor("#fAf")))
+                    self.setFormat(i, 1, f)
+                pos = text.lower().find(w.lower(), pos+1)
+
+        # Searched
+        #FIXME: consider searchExpressionRegExp
+        if self.searchExpression:
+            s = self.searchExpression
+
+            if not self.searchExpressionRegExp:
+                if self.searchExpressionCase:
+                    pos = text.find(s)
+                else:
+                    pos = text.lower().find(s.lower())
+                while pos >= 0:
+                    for i in range(pos, pos + len(s)):
+                        f = self.format(i)
+                        f.setBackground(QBrush(QColor("#Aff")))
+                        self.setFormat(i, 1, f)
+                    pos = text.lower().find(s.lower(), pos+1)
+
+            else:
+                # Using QRegExp
+                rx = QRegExp(s)
+                if not self.searchExpressionCase:
+                    rx.setCaseSensitivity(Qt.CaseInsensitive)
+                p = rx.indexIn(text)
+                while p != -1:
+                    f = self.format(p)
+                    f.setBackground(QBrush(QColor("#Aff")))
+                    self.setFormat(p, rx.matchedLength(), f)
+                    p = rx.indexIn(text, p + 1)
+
+                # Using python re
+                #try:
+                    #for m in re.finditer(s, text):
+                        #f = self.format(m.start())
+                        #f.setBackground(QBrush(QColor("#0ff")))
+                        #self.setFormat(m.start(), len(m.group()), f)
+                #except:
+                    ## Probably malformed regExp
+                    #pass
+
+        # Custom rules
+        for rule, theme in self.customRules:
+            for m in re.finditer(rule, text):
+
+                if not m.groups():  # No groups, therefore no markup
+                    f = self.format(m.start())
+                    f, garbage = self.formatsFromTheme(theme, f)
+                    self.setFormat(m.start(), len(m.group()), f)
+
+                else:
+                    mf = self.format(m.start())
+                    f = self.format(m.start() + len(m.group(1)))
+                    f, mf = self.formatsFromTheme(theme, f, mf)
+                    self.setFormat(m.start(1), len(m.group(1)), mf)
+                    self.setFormat(m.start(2), len(m.group(2)), mf)
+                    self.setFormat(m.start(1) + len(m.group(1)),
+                                   len(m.group())
+                                   - len(m.group(1))
+                                   - len(m.group(2)), f)
+
+        # If the block has transitioned from previously being a heading to now
+        # being a non-heading, signal that the position in the document no
+        # longer contains a heading.
+
+        if self.isHeadingBlockState(lastState) and \
+           not self.isHeadingBlockState(self.currentBlockState()):
+            self.headingRemoved.emit(self.currentBlock().position())
+
+
+    ###########################################################################
+    # COLORS & FORMATTING
+    ###########################################################################
+
+    def defaultTheme(self):
+
+        markup = qApp.palette().color(QPalette.Mid)
+        if markup == Qt.black:
+            markup = Qt.lightGray
+        dark = qApp.palette().color(QPalette.Dark)
+        if dark == Qt.black:
+            dark = QColor(Qt.gray)
+        darker = dark.darker(150)
+
+        # Text background
+        background = qApp.palette().color(QPalette.Base)
+        lightBackground = background.darker(130)
+        veryLightBackground = background.darker(105)
+
+        theme = {
+            "markup": markup}
+
+        #Exemple:
+            #"color": Qt.red,
+            #"deltaSize": 10,
+            #"background": Qt.yellow,
+            #"monospace": True,
+            #"bold": True,
+            #"italic": True,
+            #"underline": True,
+            #"overline": True,
+            #"strike": True,
+            #"formatMarkup": True,
+            #"markupBold": True,
+            #"markupColor": Qt.blue,
+            #"markupBackground": Qt.green,
+            #"markupMonospace": True,
+            #"super":True,
+            #"sub":True
+
+        for i in MTT.TITLES:
+            theme[i] = {
+                "formatMarkup":True,
+                "bold": True,
+                "monospace": True,
+                #"color": Qt.darkBlue if i % 2 == 1 else Qt.darkMagenta,
+            }
+        b = 100
+        d = 50
+        color = QColor(Qt.darkBlue)
+        theme[MTT.TokenAtxHeading1]["color"] = color
+        theme[MTT.TokenAtxHeading2]["color"] = color.lighter(b + d)
+        theme[MTT.TokenAtxHeading3]["color"] = color.lighter(b + 2*d)
+        theme[MTT.TokenAtxHeading4]["color"] = color.lighter(b + 3*d)
+        theme[MTT.TokenAtxHeading5]["color"] = color.lighter(b + 4*d)
+        theme[MTT.TokenAtxHeading6]["color"] = color.lighter(b + 5*d)
+
+        for i in [MTT.TokenSetextHeading1Line2, MTT.TokenSetextHeading2Line2]:
+            theme[i] = {
+                "color": markup,
+                "monospace":True}
+
+        # Beautifiers
+        theme[MTT.TokenEmphasis] = {
+                "italic":True,}
+        theme[MTT.TokenStrong] = {
+            "bold":True}
+        theme[MTT.TokenStrikethrough] = {
+            "strike":True}
+        theme[MTT.TokenVerbatim] = {
+            "monospace":True,
+            "background": veryLightBackground,
+            "formatMarkup": True,
+            "markupColor": markup}
+        theme[MTT.TokenSuperScript] = {
+            "super":True,
+            "formatMarkup":True}
+        theme[MTT.TokenSubScript] = {
+            "sub":True,
+            "formatMarkup":True}
+
+        theme[MTT.TokenHtmlTag] = {
+            "color":Qt.red}
+        theme[MTT.TokenHtmlEntity] = {
+            "color":Qt.red}
+        theme[MTT.TokenAutomaticLink] = {
+            "color": qApp.palette().color(QPalette.Link)}
+        theme[MTT.TokenInlineLink] = {
+            "color": qApp.palette().color(QPalette.Link)}
+        theme[MTT.TokenReferenceLink] = {
+            "color": qApp.palette().color(QPalette.Link)}
+        theme[MTT.TokenReferenceDefinition] = {
+            "color": qApp.palette().color(QPalette.Link)}
+        theme[MTT.TokenImage] = {
+            "color": Qt.green}
+        theme[MTT.TokenHtmlComment] = {
+            "color": dark}
+        theme[MTT.TokenNumberedList] = {
+            "markupColor": QColor(Qt.red).lighter(),
+            "markupBold": True,
+            "markupMonospace": True,}
+        theme[MTT.TokenBulletPointList] = {
+            "markupColor": QColor(Qt.red).lighter(),
+            "markupBold": True,
+            "markupMonospace": True,}
+        theme[MTT.TokenHorizontalRule] = {
+            "overline": True,
+            "underline": True,
+            "monospace": True,
+            "color": markup}
+        theme[MTT.TokenLineBreak] = {
+            "background": markup}
+        theme[MTT.TokenBlockquote] = {
+            "color": darker,
+            "markupColor": lightBackground,
+            "markupBackground": lightBackground}
+        theme[MTT.TokenCodeBlock] = {
+            "color": darker,
+            "markupBackground": veryLightBackground,
+            "monospace":True}
+        theme[MTT.TokenGithubCodeFence] = {
+            "color": markup}
+        theme[MTT.TokenPandocCodeFence] = {
+            "color": markup}
+        theme[MTT.TokenCodeFenceEnd] = {
+            "color": markup}
+        theme[MTT.TokenMention] = {} # FIXME
+        theme[MTT.TokenTableHeader] = {
+            "color": darker, "monospace":True}
+        theme[MTT.TokenTableDivider] = {
+            "color": markup, "monospace":True}
+        theme[MTT.TokenTablePipe] = {
+            "color": markup, "monospace":True}
+
+        return theme
+
+    def setColorScheme(self, defaultTextColor, backgroundColor, markupColor,
+                       linkColor, spellingErrorColor):
+        self.defaultTextColor = defaultTextColor
+        self.backgroundColor = backgroundColor
+        self.markupColor = markupColor
+        self.linkColor = linkColor
+        self.spellingErrorColor = spellingErrorColor
+        self.defaultFormat.setForeground(QBrush(defaultTextColor))
+
+        # FIXME: generate a theme based on that
+        self.rehighlight()
+
+    ###########################################################################
+    # ACTUAL FORMATTING
+    ###########################################################################
+
+    def applyFormattingForToken(self, token, text):
+        if token.type != MTT.TokenUnknown:
+            format = self.format(token.position + token.openingMarkupLength)
+            markupFormat = self.format(token.position)
+            if self.theme.get("markup"):
+                markupFormat.setForeground(self.theme["markup"])
+
+            ## Debug
+            def debug():
+                print("{}\n{}{}{}{}   (state:{})".format(
+                    text,
+                    " "*token.position,
+                    "^"*token.openingMarkupLength,
+                    str(token.type).center(token.length
+                                           - token.openingMarkupLength
+                                           - token.closingMarkupLength, "-"),
+                    "^" * token.closingMarkupLength,
+                    self.currentBlockState(),)
+                     )
+
+            #if token.type in range(6, 10):
+            #debug()
+
+            theme = self.theme.get(token.type)
+            if theme:
+                format, markupFormat = self.formatsFromTheme(theme,
+                                                             format,
+                                                             markupFormat)
+
+            # Format openning Markup
+            self.setFormat(token.position, token.openingMarkupLength,
+                           markupFormat)
+
+            # Format Text
+            self.setFormat(
+                token.position + token.openingMarkupLength,
+                token.length - token.openingMarkupLength - token.closingMarkupLength,
+                format)
+
+            # Format closing Markup
+            if token.closingMarkupLength > 0:
+                self.setFormat(
+                    token.position + token.length - token.closingMarkupLength,
+                    token.closingMarkupLength,
+                    markupFormat)
+
+        else:
+            qWarning("MarkdownHighlighter.applyFormattingForToken() was passed"
+                     " in a token of unknown type.")
+
+    def formatsFromTheme(self, theme, format=QTextCharFormat(),
+                         markupFormat=QTextCharFormat()):
+        # Token
+        if theme.get("color"):
+            format.setForeground(theme["color"])
+        if theme.get("deltaSize"):
+            format.setFontPointSize(format.fontPointSize() + theme["deltaSize"])
+        if theme.get("background"):
+            format.setBackground(theme["background"])
+        if theme.get("monospace"):
+            format.setFontFamily("Monospace")
+        if theme.get("bold"):
+            format.setFontWeight(QFont.Bold)
+        if theme.get("italic"):
+            format.setFontItalic(theme["italic"])
+        if theme.get("underline"):
+            format.setFontUnderline(theme["underline"])
+        if theme.get("overline"):
+            format.setFontOverline(theme["overline"])
+        if theme.get("strike"):
+            format.setFontStrikeOut(theme["strike"])
+        if theme.get("super"):
+            format.setVerticalAlignment(QTextCharFormat.AlignSuperScript)
+        if theme.get("sub"):
+            format.setVerticalAlignment(QTextCharFormat.AlignSubScript)
+
+        # Markup
+        if theme.get("formatMarkup"):
+            c = markupFormat.foreground()
+            markupFormat = QTextCharFormat(format)
+            markupFormat.setForeground(c)
+        if theme.get("markupBold"):
+            markupFormat.setFontWeight(QFont.Bold)
+        if theme.get("markupColor"):
+            markupFormat.setForeground(theme["markupColor"])
+        if theme.get("markupBackground"):
+            markupFormat.setBackground(theme["markupBackground"])
+        if theme.get("markupMonospace"):
+            markupFormat.setFontFamily("Monospace")
+
+        return format, markupFormat
+
+    ###########################################################################
+    # SETTINGS
+    ###########################################################################
+
+    def setHighlighted(self, words, tags):
+        rehighlight = (self.highlightedWords != words
+                       or self.highlightedTags != tags)
+        self.highlightedWords = words
+        self.highlightedTags = tags
+        if rehighlight:
+            self.rehighlight()
+
+    def setSearched(self, expression, regExp=False, caseSensitivity=False):
+        """
+        Define an expression currently searched, to be highlighted.
+        Can be regExp.
+        """
+        rehighlight = self.searchExpression != expression or \
+                      self.searchExpressionRegExp != regExp or \
+                      self.searchExpressionCase != caseSensitivity
+        self.searchExpression = expression
+        self.searchExpressionRegExp = regExp
+        self.searchExpressionCase = caseSensitivity
+        if rehighlight:
+            self.rehighlight()
+
+    def setDictionary(self, dictionary):
+        self.dictionary = dictionary
+        if self.spellCheckEnabled:
+            self.rehighlight()
+
+    def increaseFontSize(self):
+        self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize()
+                                            + 1.0)
+        self.rehighlight()
+
+    def decreaseFontSize(self):
+        self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize()
+                                            - 1.0)
+        self.rehighlight()
+
+    def setEnableLargeHeadingSizes(self, enable):
+        self.setupHeadingFontSize(enable)
+        self.rehighlight()
+
+    def setupHeadingFontSize(self, useLargeHeadings):
+        if useLargeHeadings:
+            self.theme[MTT.TokenSetextHeading1Line1]["deltaSize"] = 7
+            self.theme[MTT.TokenSetextHeading2Line1]["deltaSize"] = 5
+            self.theme[MTT.TokenSetextHeading1Line2]["deltaSize"] = 7
+            self.theme[MTT.TokenSetextHeading2Line2]["deltaSize"] = 5
+            self.theme[MTT.TokenAtxHeading1]["deltaSize"] = 7
+            self.theme[MTT.TokenAtxHeading2]["deltaSize"] = 5
+            self.theme[MTT.TokenAtxHeading3]["deltaSize"] = 3
+            self.theme[MTT.TokenAtxHeading4]["deltaSize"] = 2
+            self.theme[MTT.TokenAtxHeading5]["deltaSize"] = 1
+            self.theme[MTT.TokenAtxHeading6]["deltaSize"] = 0
+
+        else:
+            for i in MTT.TITLES:
+                self.theme[i]["deltaSize"] = 0
+
+    def setUseUnderlineForEmphasis(self, enable):
+        self.useUndlerlineForEmphasis = enable
+        self.rehighlight()
+
+    def setFont(self, fontFamily, fontSize):
+        font = QFont(family=fontFamily, pointSize=fontSize, weight=QFont.Normal, italic=False)
+        self.defaultFormat.setFont(font)
+        self.rehighlight()
+
+    def setSpellCheckEnabled(self, enabled):
+        self.spellCheckEnabled = enabled
+        self.rehighlight()
+
+    def setBlockquoteStyle(self, style):
+        self.blockquoteStyle = style
+
+        if style == BS.BlockquoteStyleItalic:
+            self.emphasizeToken[MTT.TokenBlockquote] = True
+        else:
+            self.emphasizeToken[MTT.TokenBlockquote] = False
+
+        self.rehighlight()
+
+    def setHighlightLineBreaks(self, enable):
+        self.highlightLineBreaks = enable
+        self.rehighlight()
+
+    ###########################################################################
+    # GHOSTWRITER SPECIFIC?
+    ###########################################################################
+
+    def onTypingResumed(self):
+        self.typingPaused = False
+
+    def onTypingPaused(self):
+        self.typingPaused = True
+        block = self.document().findBlock(self.editor.textCursor().position())
+        self.rehighlightBlock(block)
+
+    def onHighlightBlockAtPosition(self, position):
+        block = self.document().findBlock(position)
+        self.rehighlightBlock(block)
+
+    def onTextBlockRemoved(self, block):
+        if self.isHeadingBlockState(block.userState):
+            self.headingRemoved.emit(block.position())
+
+    ###########################################################################
+    # SPELLCHECK
+    ###########################################################################
+
+    def spellCheck(self, text):
+        cursorPosition = self.editor.textCursor().position()
+        cursorPosBlock = self.document().findBlock(cursorPosition)
+        cursorPosInBlock = -1
+
+        if self.currentBlock() == cursorPosBlock:
+            cursorPosInBlock = cursorPosition - cursorPosBlock.position()
+
+        misspelledWord = self.dictionary.check(text, 0)
+
+        while not misspelledWord.isNull():
+            startIndex = misspelledWord.position()
+            length = misspelledWord.length()
+
+            if self.typingPaused or cursorPosInBlock != startIndex + length:
+                spellingErrorFormat = self.format(startIndex)
+                spellingErrorFormat.setUnderlineColor(self.spellingErrorColor)
+                spellingErrorFormat.setUnderlineStyle(
+                    qApp.stlye().styleHint(QStyle.SH_SpellCheckUnderlineStyle))
+
+                self.setFormat(startIndex, length, spellingErrorFormat)
+
+            startIndex += length
+            misspelledWord = self.dictionary.check(text, startIndex)
+
+    def storeHeadingData(self, token, text):
+        if token.type in [
+                MTT.TokenAtxHeading1,
+                MTT.TokenAtxHeading2,
+                MTT.TokenAtxHeading3,
+                MTT.TokenAtxHeading4,
+                MTT.TokenAtxHeading5,
+                MTT.TokenAtxHeading6]:
+            level = token.type - MTT.TokenAtxHeading1 + 1
+            s = token.position + token.openingMarkupLength
+            l = (token.length
+                 - token.openingMarkupLength
+                 - token.closingMarkupLength)
+            headingText = text[s:s+l].strip()
+
+        elif token.type == MTT.TokenSetextHeading1Line1:
+            level = 1
+            headingText = text
+
+        elif token.type == MTT.TokenSetextHeading2Line1:
+            level = 2
+            headingText = text
+
+        else:
+            qWarning("MarkdownHighlighter.storeHeadingData() encountered" +
+                     " unexpected token: {}".format(token.getType()))
+            return
+
+        # FIXME: TypeError: could not convert 'TextBlockData' to 'QTextBlockUserData'
+        # blockData = self.currentBlockUserData()
+        # if blockData is None:
+        #     blockData = TextBlockData(self.document(), self.currentBlock())
+        #
+        # self.setCurrentBlockUserData(blockData)
+        self.headingFound.emit(level, headingText, self.currentBlock())
+
+    def isHeadingBlockState(self, state):
+        return state in [
+            MS.MarkdownStateAtxHeading1,
+            MS.MarkdownStateAtxHeading2,
+            MS.MarkdownStateAtxHeading3,
+            MS.MarkdownStateAtxHeading4,
+            MS.MarkdownStateAtxHeading5,
+            MS.MarkdownStateAtxHeading6,
+            MS.MarkdownStateSetextHeading1Line1,
+            MS.MarkdownStateSetextHeading2Line1,]
+
+
+def getLuminance(color):
+    return (0.30 * color.redF()) + \
+           (0.59 * color.greenF()) + \
+           (0.11 * color.blueF())
+
+
+def applyAlphaToChannel(foreground, background, alpha):
+    return (foreground * alpha) + (background * (1.0 - alpha))
+
+
+def applyAlpha(foreground, background, alpha):
+    blendedColor = QColor(0, 0, 0)
+    normalizedAlpha = alpha / 255.0
+    blendedColor.setRed(applyAlphaToChannel(
+        foreground.red(), background.red(), normalizedAlpha))
+    blendedColor.setGreen(applyAlphaToChannel(
+        foreground.green(), background.green(), normalizedAlpha))
+    blendedColor.setBlue(applyAlphaToChannel(
+        foreground.blue(), background.blue(), normalizedAlpha))
+    return blendedColor
--- a/manuskript/ui/highlighters/markdownTokenizer.py
+++ b/manuskript/ui/highlighters/markdownTokenizer.py
@ -0,0 +1,887 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import re
+from PyQt5.QtCore import *
+from PyQt5.QtGui import *
+from PyQt5.QtWidgets import *
+
+from noteflow.ui.views.markdownEnums import MarkdownState as MS
+from noteflow.ui.views.markdownEnums import MarkdownTokenType as MTT
+
+# This file is simply a python translation of GhostWriter's Tokenizer.
+# http://wereturtle.github.io/ghostwriter/
+# GPLV3+.
+
+# ==============================================================================
+#   TOKEN
+# ==============================================================================
+
+class Token:
+    def __init__(self):
+        self.type = -1
+        self.position = 0
+        self.length = 0
+        self.openingMarkupLength = 0
+        self.closingMarkupLength = 0
+
+# ==============================================================================
+#   HIGHLIGHT TOKENIZER
+# ==============================================================================
+
+class HighlightTokenizer:
+    def __init__(self):
+        self.tokens = []
+
+    def tokenize(text, currentState, previousState, nextState):
+        # Subclass me
+        return 0
+
+    def getTokens(self):
+        self.tokens = sorted(self.tokens, key=lambda t: t.position)
+        return self.tokens
+
+    def getState(self):
+        return self.state
+
+    def backtrackRequested(self):
+        return self.backtrack
+
+    def clear(self):
+        self.tokens = []
+        self.backtrack = False
+        self.state = -1
+
+    def addToken(self, token):
+        self.tokens.append(token)
+
+        if token.type == -1:
+            print("Error here", token.position, token.length)
+
+    def setState(self, state):
+        self.state = state
+
+    def requestBacktrack(self):
+        self.backtrack = True
+
+    def tokenLessThan(self, t1, t2):
+        return t1.getPosition() < t2.getPosition()
+
+
+class MarkdownTokenizer(HighlightTokenizer):
+    
+    DUMMY_CHAR = "$"
+    MAX_MARKDOWN_HEADING_LEVEL = 6
+    
+    paragraphBreakRegex = QRegExp("^\\s*$")
+    heading1SetextRegex = QRegExp("^===+\\s*$")
+    heading2SetextRegex = QRegExp("^---+\\s*$")
+    blockquoteRegex = QRegExp("^ {0,3}>.*$")
+    githubCodeFenceStartRegex = QRegExp("^```+.*$")
+    githubCodeFenceEndRegex = QRegExp("^```+\\s*$")
+    pandocCodeFenceStartRegex = QRegExp("^~~~+.*$")
+    pandocCodeFenceEndRegex = QRegExp("^~~~+\\s*$")
+    numberedListRegex = QRegExp("^ {0,3}[0-9a-z]+[.)]\\s+.*$")
+    numberedNestedListRegex = QRegExp("^\\s*[0-9a-z]+[.)]\\s+.*$")
+    hruleRegex = QRegExp("\\s*(\\*\\s*){3,}|(\\s*(_\\s*){3,})|((\\s*(-\\s*){3,}))")
+    lineBreakRegex = QRegExp(".*\\s{2,}$")
+    emphasisRegex = QRegExp("(\\*(?![\\s*]).*[^\\s*]\\*)|_(?![\\s_]).*[^\\s_]_")
+    emphasisRegex.setMinimal(True)
+    strongRegex = QRegExp("\\*\\*(?=\\S).*\\S\\*\\*(?!\\*)|__(?=\\S).*\\S__(?!_)")
+    strongRegex.setMinimal(True)
+    strikethroughRegex = QRegExp("~~[^\\s]+.*[^\\s]+~~")
+    strikethroughRegex.setMinimal(True)
+    superScriptRegex = QRegExp("\^([^\\s]|(\\\\\\s))+\^")  # Spaces must be escaped "\ "
+    superScriptRegex.setMinimal(True)
+    subScriptRegex = QRegExp("~([^\\s]|(\\\\\\s))+~")  # Spaces must be escaped "\ "
+    subScriptRegex.setMinimal(True)
+    verbatimRegex = QRegExp("`+")
+    htmlTagRegex = QRegExp("<[^<>]+>")
+    htmlTagRegex.setMinimal(True)
+    htmlEntityRegex = QRegExp("&[a-zA-Z]+;|&#x?[0-9]+;")
+    automaticLinkRegex = QRegExp("(<[a-zA-Z]+\\:.+>)|(<.+@.+>)")
+    automaticLinkRegex.setMinimal(True)
+    inlineLinkRegex = QRegExp("\\[.+\\]\\(.+\\)")
+    inlineLinkRegex.setMinimal(True)
+    referenceLinkRegex = QRegExp("\\[(.+)\\]")
+    referenceLinkRegex.setMinimal(True)
+    referenceDefinitionRegex = QRegExp("^\\s*\\[.+\\]:")
+    imageRegex = QRegExp("!\\[.*\\]\\(.+\\)")
+    imageRegex.setMinimal(True)
+    htmlInlineCommentRegex = QRegExp("<!--.*-->")
+    htmlInlineCommentRegex.setMinimal(True)
+    mentionRegex = QRegExp("\\B@\\w+(\\-\\w+)*(/\\w+(\\-\\w+)*)?")
+    pipeTableDividerRegex = QRegExp("^ {0,3}(\\|[ :]?)?-{3,}([ :]?\\|[ :]?-{3,}([ :]?\\|)?)+\\s*$")
+    
+    def __init__(self):
+        HighlightTokenizer.__init__(self)
+    
+    def tokenize(self, text, currentState, previousState, nextState):
+        self.currentState = currentState
+        self.previousState = previousState
+        self.nextState = nextState
+        
+        if (self.previousState == MS.MarkdownStateInGithubCodeFence or \
+            self.previousState == MS.MarkdownStateInPandocCodeFence) and \
+                self.tokenizeCodeBlock(text):
+            # No further tokenizing required
+            pass
+        
+        elif self.previousState != MS.MarkdownStateComment \
+            and self.paragraphBreakRegex.exactMatch(text):
+
+            if previousState in [MS.MarkdownStateListLineBreak,
+                                 MS.MarkdownStateNumberedList,
+                                 MS.MarkdownStateBulletPointList]:
+                self.setState(MS.MarkdownStateListLineBreak)
+            elif previousState != MS.MarkdownStateCodeBlock or \
+                (text[:1] != "\t" and text[-4:] != "    "):
+                self.setState(MS.MarkdownStateParagraphBreak)
+        
+        elif self.tokenizeSetextHeadingLine2(text) or \
+             self.tokenizeCodeBlock(text) or \
+             self.tokenizeMultilineComment(text) or \
+             self.tokenizeHorizontalRule(text) or \
+             self.tokenizeTableDivider(text):
+            # No further tokenizing required
+            pass
+        
+        elif self.tokenizeSetextHeadingLine1(text) or \
+             self.tokenizeAtxHeading(text) or \
+             self.tokenizeBlockquote(text) or \
+             self.tokenizeNumberedList(text) or \
+             self.tokenizeBulletPointList(text):
+            self.tokenizeLineBreak(text)
+            self.tokenizeInline(text)
+            
+        else:
+            if previousState in [MS.MarkdownStateListLineBreak,
+                                 MS.MarkdownStateNumberedList,
+                                 MS.MarkdownStateNumberedList]:
+                if not self.tokenizeNumberedList(text) and \
+                   not self.tokenizeBulletPointList(text) and \
+                   (text[:1] == "\t" or text[:4] == "    "):
+                    self.setState(previousState)
+                else:
+                    self.setState(MS.MarkdownStateParagraph)
+            else:
+                self.setState(MS.MarkdownStateParagraph)
+            self.tokenizeLineBreak(text)
+            self.tokenizeInline(text)
+            
+        # Make sure that if the second line of a setext heading is removed the
+        # first line is reprocessed.  Otherwise, it will still show up in the
+        # document as a heading.
+        if (previousState == MS.MarkdownStateSetextHeading1Line1 and \
+           self.getState() != MS.MarkdownStateSetextHeading1Line2) or \
+           (previousState == MS.MarkdownStateSetextHeading2Line1 and \
+           self.getState() != MS.MarkdownStateSetextHeading2Line2):
+            self.requestBacktrack()
+    
+    def tokenizeSetextHeadingLine1(self, text):
+        #Check the next line's state to see if this is a setext-style heading.
+        level = 0
+        token = Token()
+        nextState = self.nextState
+
+        if MS.MarkdownStateSetextHeading1Line2 == nextState:
+            level = 1
+            self.setState(MS.MarkdownStateSetextHeading1Line1)
+            token.type = MTT.TokenSetextHeading1Line1
+            
+        elif MS.MarkdownStateSetextHeading2Line2 == nextState:
+            level = 2
+            self.setState(MS.MarkdownStateSetextHeading2Line1)
+            token.type = MTT.TokenSetextHeading2Line1
+        
+        if level > 0:
+            token.length = len(text)
+            token.position = 0
+            self.addToken(token)
+            return True
+
+        return False
+    
+    def tokenizeSetextHeadingLine2(self, text):
+        level = 0
+        setextMatch = False
+        token = Token()
+        previousState = self.previousState
+        if previousState == MS.MarkdownStateSetextHeading1Line1:
+            level = 1
+            setextMatch = self.heading1SetextRegex.exactMatch(text)
+            self.setState(MS.MarkdownStateSetextHeading1Line2)
+            token.type = MTT.TokenSetextHeading1Line2
+            
+        elif previousState == MS.MarkdownStateSetextHeading2Line1:
+            level = 2
+            setextMatch = self.heading2SetextRegex.exactMatch(text)
+            self.setState(MS.MarkdownStateSetextHeading2Line2)
+            token.type = MTT.TokenSetextHeading2Line2
+            
+        elif previousState == MS.MarkdownStateParagraph:
+            h1Line2 = self.heading1SetextRegex.exactMatch(text)
+            h2Line2 = self.heading2SetextRegex.exactMatch(text)
+
+            if h1Line2 or h2Line2:
+                # Restart tokenizing on the previous line.
+                self.requestBacktrack()
+                token.length = len(text)
+                token.position = 0
+
+                if h1Line2:
+                    self.setState(MS.MarkdownStateSetextHeading1Line2)
+                    token.type = MTT.TokenSetextHeading1Line2
+                    
+                else:
+                    self.setState(MS.MarkdownStateSetextHeading2Line2)
+                    token.type = MTT.TokenSetextHeading2Line2
+
+                self.addToken(token)
+                return True
+
+        if level > 0:
+            if setextMatch:
+                token.length = len(text)
+                token.position = 0
+                self.addToken(token)
+                return True
+            
+            else:
+                # Restart tokenizing on the previous line.
+                self.requestBacktrack()
+                False
+  
+        return False
+    
+    def tokenizeAtxHeading(self, text):
+        escapedText = self.dummyOutEscapeCharacters(text)
+        trailingPoundCount = 0
+        level = 0
+        
+        #Count the number of pound signs at the front of the string,
+        #up to the maximum allowed, to determine the heading level.
+        
+        while escapedText[level] == "#":
+            level += 1
+            if level >= len(escapedText) or level >= self.MAX_MARKDOWN_HEADING_LEVEL:
+                break
+        
+        if level > 0 and level < len(text):
+            # Count how many pound signs are at the end of the text.
+            while escapedText[-trailingPoundCount -1] == "#":
+                trailingPoundCount += 1
+        
+            token = Token()
+            token.position = 0
+            token.length = len(text)
+            token.type = MTT.TokenAtxHeading1 + level -1
+            token.openingMarkupLength = level
+            token.closingMarkupLength = trailingPoundCount
+            self.addToken(token)
+            self.setState(MS.MarkdownStateAtxHeading1 + level -1)
+            return True
+        return False
+    
+    def tokenizeNumberedList(self, text):
+        previousState = self.previousState
+        if (previousState in [MS.MarkdownStateParagraphBreak,
+                             MS.MarkdownStateUnknown,
+                             MS.MarkdownStateCodeBlock,
+                             MS.MarkdownStateCodeFenceEnd,] and \
+           self.numberedListRegex.exactMatch(text)) or \
+           (previousState in [MS.MarkdownStateListLineBreak,
+                             MS.MarkdownStateNumberedList,
+                             MS.MarkdownStateBulletPointList,] and \
+           self.numberedNestedListRegex.exactMatch(text)):
+            periodIndex = text.find(".")
+            parenthIndex = text.find(")")
+            
+            if periodIndex < 0:
+                index = parenthIndex
+            elif parenthIndex < 0:
+                index = periodIndex
+            elif parenthIndex > periodIndex:
+                index = periodIndex
+            else:
+                index = parenthIndex
+            
+            if index > 0:
+                token = Token()
+                token.type = MTT.TokenNumberedList
+                token.position = 0
+                token.length = len(text)
+                token.openingMarkupLength = index + 2
+                self.addToken(token)
+                self.setState(MS.MarkdownStateNumberedList)
+                return True
+            
+            return False
+        
+        return False
+    
+    def tokenizeBulletPointList(self, text):
+        foundBulletChar = False
+        bulletCharIndex = -1
+        spaceCount = 0
+        whitespaceFoundAfterBulletChar = False
+        previousState = self.previousState
+
+        if previousState not in [MS.MarkdownStateUnknown,
+                                 MS.MarkdownStateParagraphBreak,
+                                 MS.MarkdownStateListLineBreak,
+                                 MS.MarkdownStateNumberedList,
+                                 MS.MarkdownStateBulletPointList,
+                                 MS.MarkdownStateCodeBlock,
+                                 MS.MarkdownStateCodeFenceEnd]:
+            return False
+
+        # Search for the bullet point character, which can
+        # be either a '+', '-', or '*'.
+        
+        for i in range(len(text)):
+            if text[i] == " ":
+                if foundBulletChar:
+                    # We've confirmed it's a bullet point by the whitespace that
+                    # follows the bullet point character, and can now exit the
+                    # loop.
+                    
+                    whitespaceFoundAfterBulletChar = True
+                    break
+                
+                else:
+                    spaceCount += 1
+
+                    # If this list item is the first in the list, ensure the
+                    # number of spaces preceeding the bullet point does not
+                    # exceed three, as that would indicate a code block rather
+                    # than a bullet point list.
+                    
+                    if spaceCount > 3 and previousState not in [
+                        MS.MarkdownStateNumberedList,
+                        MS.MarkdownStateBulletPointList,
+                        MS.MarkdownStateListLineBreak,] and \
+                       previousState in [
+                        MS.MarkdownStateParagraphBreak,
+                        MS.MarkdownStateUnknown,
+                        MS.MarkdownStateCodeBlock,
+                        MS.MarkdownStateCodeFenceEnd,]:
+                        return False
+                        
+            elif text[i] == "\t":
+                if foundBulletChar:
+                    # We've confirmed it's a bullet point by the whitespace that
+                    # follows the bullet point character, and can now exit the
+                    # loop.
+                    
+                    whitespaceFoundAfterBulletChar = True
+                    break
+                
+                elif previousState in [
+                    MS.MarkdownStateParagraphBreak,
+                    MS.MarkdownStateUnknown]:
+                    
+                    # If this list item is the first in the list, ensure that
+                    # no tab character preceedes the bullet point, as that would
+                    # indicate a code block rather than a bullet point list.
+                    
+                    return False
+                
+            elif text[i] in ["+", "-", "*"]:
+                foundBulletChar = True
+                bulletCharIndex = i
+            
+            else:
+                return False
+
+        if bulletCharIndex >= 0 and whitespaceFoundAfterBulletChar:
+            token = Token()
+            token.type = MTT.TokenBulletPointList
+            token.position = 0
+            token.length = len(text)
+            token.openingMarkupLength = bulletCharIndex + 2
+            self.addToken(token)
+            self.setState(MS.MarkdownStateBulletPointList)
+            return True
+        
+        return False
+    
+    def tokenizeHorizontalRule (self, text):
+        if self.hruleRegex.exactMatch(text):
+            token = Token()
+            token.type = MTT.TokenHorizontalRule
+            token.position = 0
+            token.length = len(text)
+            self.addToken(token)
+            self.setState(MS.MarkdownStateHorizontalRule)
+            return True
+
+        return False
+    
+    def tokenizeLineBreak(self, text):
+        currentState = self.currentState
+        previousState = self.previousState
+        nextState = self.nextState
+        
+        if currentState in [
+            MS.MarkdownStateParagraph,
+            MS.MarkdownStateBlockquote,
+            MS.MarkdownStateNumberedList,
+            MS.MarkdownStateBulletPointList,]:
+            if previousState in [
+                MS.MarkdownStateParagraph,
+                MS.MarkdownStateBlockquote,
+                MS.MarkdownStateNumberedList,
+                MS.MarkdownStateBulletPointList,]:
+                self.requestBacktrack()
+            
+            if nextState in [
+                MS.MarkdownStateParagraph,
+                MS.MarkdownStateBlockquote,
+                MS.MarkdownStateNumberedList,
+                MS.MarkdownStateBulletPointList,]:
+                self.requestBacktrack()
+                if self.lineBreakRegex.exactMatch(text):
+                    token = Token()
+                    token.type = MTT.TokenLineBreak
+                    token.position = len(text) - 1
+                    token.length = 1
+                    self.addToken(token)
+                    return True
+        
+        return False
+    
+    def tokenizeBlockquote(self, text):
+        previousState = self.previousState
+        if previousState == MS.MarkdownStateBlockquote or \
+           self.blockquoteRegex.exactMatch(text):
+            
+            # Find any '>' characters at the front of the line.
+            markupLength = 0
+            
+            for i in range(len(text)):
+                if text[i] == ">":
+                    markupLength = i + 1
+                elif text[i] != " ":
+                    # There are no more '>' characters at the front of the line,
+                    # so stop processing.
+                    break
+            
+            token = Token()
+            token.type = MTT.TokenBlockquote
+            token.position = 0
+            token.length = len(text)
+            
+            if markupLength > 0:
+                token.openingMarkupLength = markupLength
+                
+            self.addToken(token)
+            self.setState(MS.MarkdownStateBlockquote)
+            return True
+        return False
+    
+    def tokenizeCodeBlock(self, text):
+        previousState = self.previousState
+        if previousState in [
+                MS.MarkdownStateInGithubCodeFence,
+                MS.MarkdownStateInPandocCodeFence]:
+            self.setState(previousState)
+            
+            if (previousState == MS.MarkdownStateInGithubCodeFence and \
+               self.githubCodeFenceEndRegex.exactMatch(text)) or \
+               (previousState == MS.MarkdownStateInPandocCodeFence and \
+               self.pandocCodeFenceEndRegex.exactMatch(text)):
+                token = Token()
+                token.type = MTT.TokenCodeFenceEnd
+                token.position = 0
+                token.length = len(text)
+                self.addToken(token)
+                self.setState(MS.MarkdownStateCodeFenceEnd)
+                
+            else:
+                token = Token()
+                token.type = MTT.TokenCodeBlock
+                token.position = 0
+                token.length = len(text)
+                self.addToken(token)
+                
+            return True
+        
+        elif previousState in [
+                MS.MarkdownStateCodeBlock,
+                MS.MarkdownStateParagraphBreak,
+                MS.MarkdownStateUnknown,] and \
+             (text[:1] == "\t" or text[:4] == "    "):
+            token = Token()
+            token.type = MTT.TokenCodeBlock
+            token.position = 0
+            token.length = len(text)
+            token.openingMarkupLength = len(text) - len(text.lstrip())
+            self.addToken(token)
+            self.setState(MS.MarkdownStateCodeBlock)
+            return True
+        
+        elif previousState in [
+                MS.MarkdownStateParagraphBreak,
+                MS.MarkdownStateParagraph,
+                MS.MarkdownStateUnknown,
+                MS.MarkdownStateListLineBreak,]:
+            foundCodeFenceStart = False
+            token = Token()
+            if self.githubCodeFenceStartRegex.exactMatch(text):
+                foundCodeFenceStart = True
+                token.type = MTT.TokenGithubCodeFence
+                self.setState(MS.MarkdownStateInGithubCodeFence)
+                
+            elif self.pandocCodeFenceStartRegex.exactMatch(text):
+                foundCodeFenceStart = True
+                token.type = MTT.TokenPandocCodeFence
+                self.setState(MS.MarkdownStateInPandocCodeFence)
+                
+            if foundCodeFenceStart:
+                token.position = 0
+                token.length = len(text)
+                self.addToken(token)
+                return True
+            
+        return False
+    
+    def tokenizeMultilineComment(self, text):
+        previousState = self.previousState
+        
+        if previousState == MS.MarkdownStateComment:
+            # Find the end of the comment, if any.
+            index = text.find("-->")
+            token = Token()
+            token.type = MTT.TokenHtmlComment
+            token.position = 0
+            
+            if index >= 0:
+                token.length = index + 3
+                self.addToken(token)
+                
+                # Return false so that the rest of the line that isn't within
+                # the commented segment can be highlighted as normal paragraph
+                # text.
+                
+            else:
+                token.length = len(text)
+                self.addToken(token)
+                self.setState(MS.MarkdownStateComment)
+                return True
+        
+        return False
+    
+    def tokenizeInline(self, text):
+        escapedText = self.dummyOutEscapeCharacters(text)
+        
+        # Check if the line is a reference definition.
+        if self.referenceDefinitionRegex.exactMatch(text):
+            colonIndex = escapedText.find(":")
+            token = Token()
+            token.type = MTT.TokenReferenceDefinition
+            token.position = 0
+            token.length = colonIndex + 1
+            self.addToken(token)
+            
+            # Replace the first bracket so that the '[...]:' reference definition
+            # start doesn't get highlighted as a reference link.
+            
+            firstBracketIndex = escapedText.find("[")
+            if firstBracketIndex >= 0:
+                i = firstBracketIndex
+                escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:]
+
+        escapedText = self.tokenizeVerbatim(escapedText)
+        escapedText = self.tokenizeHtmlComments(escapedText)
+        escapedText = self.tokenizeTableHeaderRow(escapedText)
+        escapedText = self.tokenizeTableRow(escapedText)
+        escapedText = self.tokenizeMatches(MTT.TokenImage, escapedText, self.imageRegex, 0, 0, False, True)
+        escapedText = self.tokenizeMatches(MTT.TokenInlineLink, escapedText, self.inlineLinkRegex, 0, 0, False, True)
+        escapedText = self.tokenizeMatches(MTT.TokenReferenceLink, escapedText, self.referenceLinkRegex, 0, 0, False, True)
+        escapedText = self.tokenizeMatches(MTT.TokenHtmlEntity, escapedText, self.htmlEntityRegex)
+        escapedText = self.tokenizeMatches(MTT.TokenAutomaticLink, escapedText, self.automaticLinkRegex, 0, 0, False, True)
+        escapedText = self.tokenizeMatches(MTT.TokenStrikethrough, escapedText, self.strikethroughRegex, 2, 2, True)
+        escapedText = self.tokenizeMatches(MTT.TokenStrong, escapedText, self.strongRegex, 2, 2, True)
+        escapedText = self.tokenizeMatches(MTT.TokenEmphasis, escapedText, self.emphasisRegex, 1, 1, True)
+        escapedText = self.tokenizeMatches(MTT.TokenSuperScript, escapedText, self.superScriptRegex, 1, 1, True)
+        escapedText = self.tokenizeMatches(MTT.TokenSubScript, escapedText, self.subScriptRegex, 1, 1, True)
+        escapedText = self.tokenizeMatches(MTT.TokenHtmlTag, escapedText, self.htmlTagRegex)
+        escapedText = self.tokenizeMatches(MTT.TokenMention, escapedText, self.mentionRegex, 0, 0, False, True)
+
+        return True
+    
+    def tokenizeVerbatim(self, text):
+        index = self.verbatimRegex.indexIn(text)
+        
+        while index >= 0:
+            end = ""
+            count = self.verbatimRegex.matchedLength()
+            
+            # Search for the matching end, which should have the same number
+            # of back ticks as the start.
+            for i in range(count):
+                end += '`'
+            
+            endIndex = text.find(end, index + count)
+            
+            # If the end was found, add the verbatim token.
+            if endIndex >= 0:
+                token = Token()
+                token.type = MTT.TokenVerbatim
+                token.position = index
+                token.length = endIndex + count - index
+                token.openingMarkupLength = count
+                token.closingMarkupLength = count
+                self.addToken(token)
+                
+                # Fill out the token match in the string with the dummy
+                # character so that searches for other Markdown elements
+                # don't find anything within this token's range in the string.
+                
+                for i in range(index, index + token.length):
+                    text = text[:i] + self.DUMMY_CHAR + text[i+1:]
+                
+                index += token.length
+            
+            # Else start searching again at the very next character.
+            else:
+                index += 1
+            
+            index = self.verbatimRegex.indexIn(text, index)
+        return text
+        
+    def tokenizeHtmlComments(self, text):
+        previousState = self.previousState
+        
+        # Check for the end of a multiline comment so that it doesn't get further
+        # tokenized. Don't bother formatting the comment itself, however, because
+        # it should have already been tokenized in tokenizeMultilineComment().
+        if previousState == MS.MarkdownStateComment:
+            commentEnd = text.find("-->")
+            for i in range(commentEnd + 3):
+                text = text[:i] + self.DUMMY_CHAR + text[i+1:]
+        
+        # Now check for inline comments (non-multiline).
+        commentStart = self.htmlInlineCommentRegex.indexIn(text)
+        
+        while commentStart >= 0:
+            commentLength = self.htmlInlineCommentRegex.matchedLength()
+            token = Token()
+            token.type = MTT.TokenHtmlComment
+            token.position = commentStart
+            token.length = commentLength
+            self.addToken(token)
+            
+            # Replace comment segment with dummy characters so that it doesn't
+            # get tokenized again.
+            
+            for i in range(commentStart, commentStart + commentLength):
+                text = text[:i] + self.DUMMY_CHAR + text[i+1:]
+            
+            commentStart = self.htmlInlineCommentRegex.indexIn(text, commentStart + commentLength)
+            
+        # Find multiline comment start, if any.
+        commentStart = text.find("<!--")
+        if commentStart >= 0:
+            token = Token()
+            token.type = MTT.TokenHtmlComment
+            token.position = commentStart
+            token.length = len(text) - commentStart
+            self.addToken(token)
+            self.setState(MS.MarkdownStateComment)
+            
+            # Replace comment segment with dummy characters so that it doesn't
+            # get tokenized again.
+            
+            for i in range(commentStart, len(text)):
+                text = text[:i] + self.DUMMY_CHAR + text[i+1:]
+        return text
+            
+    def tokenizeTableHeaderRow(self, text):
+        previousState = self.previousState
+        nextState = self.nextState
+        
+        if previousState in [
+            MS.MarkdownStateParagraphBreak,
+            MS.MarkdownStateListLineBreak,
+            MS.MarkdownStateSetextHeading1Line2,
+            MS.MarkdownStateSetextHeading2Line2,
+            MS.MarkdownStateAtxHeading1,
+            MS.MarkdownStateAtxHeading2,
+            MS.MarkdownStateAtxHeading3,
+            MS.MarkdownStateAtxHeading4,
+            MS.MarkdownStateAtxHeading5,
+            MS.MarkdownStateAtxHeading6,
+            MS.MarkdownStateHorizontalRule,
+            MS.MarkdownStateCodeFenceEnd,
+            MS.MarkdownStateUnknown,] and \
+           self.getState() in [
+            MS.MarkdownStateParagraph,
+            MS.MarkdownStateUnknown] and \
+           nextState == MS.MarkdownStatePipeTableDivider:
+            self.setState(MS.MarkdownStatePipeTableHeader)
+            
+            headerStart = 0
+            for i in range(len(text)):
+                if text[i] == "|":
+                    # Replace pipe with space so that it doesn't get formatted
+                    # again with, for example, strong or emphasis formatting.
+                    # Note that we use a space rather than DUMMY_CHAR for this,
+                    # to prevent formatting such as strong and emphasis from
+                    # picking it up.
+                    text = text[:i] + " " + text[i+1:]
+                    
+                    token = Token()
+                    
+                    if i > 0:
+                        token.type = MTT.TokenTableHeader
+                        token.position = headerStart
+                        token.length = i - headerStart
+                        self.addToken(token)
+                    
+                    token.type = MTT.TokenTablePipe
+                    token.position = i
+                    token.length = 1
+                    self.addToken(token)
+                    headerStart = i + 1
+            
+            if headerStart < len(text):
+                token = Token()
+                token.type = MTT.TokenTableHeader
+                token.position = headerStart
+                token.length = len(text) - headerStart
+                self.addToken(token)
+
+        return text
+    
+    def tokenizeTableDivider(self, text):
+        previousState = self.previousState
+        if previousState == MS.MarkdownStatePipeTableHeader:
+            if self.pipeTableDividerRegex.exactMatch(text):
+                self.setState(MS.MarkdownStatePipeTableDivider)
+                token = Token()
+                token.type = MTT.TokenTableDivider
+                token.length = len(text)
+                token.position = 0
+                self.addToken(token)
+                
+                return True
+            
+            else:
+                # Restart tokenizing on the previous line.
+                self.requestBacktrack()
+        elif previousState == MS.MarkdownStateParagraph:
+            if self.pipeTableDividerRegex.exactMatch(text):
+                # Restart tokenizing on the previous line.
+                self.requestBacktrack()
+                self.setState(MS.MarkdownStatePipeTableDivider)
+                
+                token = Token()
+                token.length = len(text)
+                token.position = 0
+                token.type = MTT.TokenTableDivider
+                self.addToken(token)
+                return True
+        
+        return False
+    
+    def tokenizeTableRow(self, text):
+        previousState = self.previousState
+        
+        if previousState in [
+            MS.MarkdownStatePipeTableDivider,
+            MS.MarkdownStatePipeTableRow]:
+            self.setState(MS.MarkdownStatePipeTableRow)
+            
+            for i in range(len(text)):
+                if text[i] == "|":
+                    # Replace pipe with space so that it doesn't get formatted
+                    # again with, for example, strong or emphasis formatting.
+                    # Note that we use a space rather than DUMMY_CHAR for this,
+                    # to prevent formatting such as strong and emphasis from
+                    # picking it up.
+                    
+                    text = text[:i] + " " + text[i+1:]
+
+                    token = Token()
+                    token.type = MTT.TokenTablePipe
+                    token.position = i
+                    token.length = 1
+                    self.addToken(token)
+
+        return text
+            
+    def tokenizeMatches(self, tokenType, text, regex,
+                        markupStartCount=0, markupEndCount=0,
+                        replaceMarkupChars=False, replaceAllChars=False):
+        """
+        Tokenizes a block of text, searching for all occurrances of regex.
+        Occurrances are set to the given token type and added to the list of
+        tokens.  The markupStartCount and markupEndCount values are used to
+        indicate how many markup special characters preceed and follow the
+        main text, respectively.
+         
+        For example, if the matched string is "**bold**", and
+        markupStartCount = 2 and markupEndCount = 2, then the asterisks
+        preceeding and following the word "bold" will be set as opening and
+        closing markup in the token.
+        
+        If replaceMarkupChars is true, then the markupStartCount and
+        markupEndCount characters will be replaced with a dummy character in
+        the text QString so that subsequent parsings of the same line do not
+        pick up the original characters.
+         
+        If replaceAllChars is true instead, then the entire matched text will
+        be replaced with dummy characters--again, for ease in parsing the
+        same line for other regular expression matches.
+        """
+        index = regex.indexIn(text)
+        
+        while index >= 0:
+            length = regex.matchedLength()
+            token = Token()
+            token.type = tokenType
+            token.position = index
+            token.length = length
+            
+            if markupStartCount > 0:
+                token.openingMarkupLength = markupStartCount
+                
+            if markupEndCount > 0:
+                token.closingMarkupLength = markupEndCount
+                
+            if replaceAllChars:
+                for i in range(index, index + length):
+                    text = text[:i] + self.DUMMY_CHAR + text[i+1:]
+            
+            elif replaceMarkupChars:
+                for i in range(index, index + markupStartCount):
+                    text = text[:i] + self.DUMMY_CHAR + text[i+1:]
+                for i in range(index + length - markupEndCount, index + length):
+                    text = text[:i] + self.DUMMY_CHAR + text[i+1:]
+
+            self.addToken(token)
+            index = regex.indexIn(text, index + length)
+
+        return text
+    
+    def dummyOutEscapeCharacters(self, text):
+        """
+        Replaces escaped characters in text so they aren't picked up
+        during parsing.  Returns a copy of the input text string
+        with the escaped characters replaced with a dummy character.
+        """
+        
+        return re.sub("\\\\.", "\$", text)
+        
+        #escape = False
+        #escapedText = text
+        
+        #for i in range(len(text)):
+            #if escape:
+                #escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:]
+                #escape = False
+            #elif text[i] == "\\":
+                #escape = True
+        #return escapedText
--- a/manuskript/ui/views/textEditView.py
+++ b/manuskript/ui/views/textEditView.py
@ -11,8 +11,7 @@ from manuskript.enums import Outline
 from manuskript import functions as F
 from manuskript.models.outlineModel import outlineModel
 from manuskript.ui.editors.MDFunctions import MDFormatSelection
-from manuskript.ui.editors.MMDHighlighter import MMDHighlighter
-from manuskript.ui.editors.basicHighlighter import basicHighlighter
+from manuskript.ui.highlighters import MMDHighlighter, BasicHighlighter
 from manuskript.ui.editors.textFormat import textFormat
 from manuskript.ui import style as S

@ -84,7 +83,7 @@ class textEditView(QTextEdit):
            self.spellcheck = False

        if self._highlighting and not self.highlighter:
-            self.highlighter = basicHighlighter(self)
+            self.highlighter = BasicHighlighter(self)
            self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat)

    def getDefaultLocale(self):
@ -191,7 +190,7 @@ class textEditView(QTextEdit):
            if self._column in [Outline.text.value, Outline.notes.value]:
                self.highlighter = MMDHighlighter(self)
            else:
-                self.highlighter = basicHighlighter(self)
+                self.highlighter = BasicHighlighter(self)

            self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat)