From da62b823c7ed8ec2e516d9a60667af9833b2d56e Mon Sep 17 00:00:00 2001 From: Olivier Keshavjee Date: Wed, 22 Nov 2017 21:03:47 +0100 Subject: [PATCH] Checkpoint: working highlighter. Optimization welcome. --- manuskript/ui/highlighters/MMDHighlighter.py | 7 - manuskript/ui/highlighters/__init__.py | 6 + .../ui/highlighters/basicHighlighter.py | 42 ++- .../ui/highlighters/markdownHighlighter.py | 215 ++++----------- .../ui/highlighters/markdownTokenizer.py | 246 +++++++++--------- manuskript/ui/style.py | 2 + manuskript/ui/views/textEditView.py | 20 +- 7 files changed, 237 insertions(+), 301 deletions(-) diff --git a/manuskript/ui/highlighters/MMDHighlighter.py b/manuskript/ui/highlighters/MMDHighlighter.py index d2d29ae..cea6255 100644 --- a/manuskript/ui/highlighters/MMDHighlighter.py +++ b/manuskript/ui/highlighters/MMDHighlighter.py @@ -35,13 +35,6 @@ class MMDHighlighter(BasicHighlighter): for key in self.MARKDOWN_REGEX: self.rules[key] = re.compile(self.MARKDOWN_REGEX[key]) - def highlightBlock(self, text): - BasicHighlighter.highlightBlockBefore(self, text) - - self.doHighlightBlock(text) - - BasicHighlighter.highlightBlockAfter(self, text) - def doHighlightBlock(self, text): """ A quick-n-dirty very basic highlighter, that fails in most non-trivial cases. And is ugly. diff --git a/manuskript/ui/highlighters/__init__.py b/manuskript/ui/highlighters/__init__.py index 43acd44..c3b1ed3 100644 --- a/manuskript/ui/highlighters/__init__.py +++ b/manuskript/ui/highlighters/__init__.py @@ -3,4 +3,10 @@ from manuskript.ui.highlighters.basicHighlighter import BasicHighlighter from manuskript.ui.highlighters.MMDHighlighter import MMDHighlighter + +# Markdown highlighter +from manuskript.ui.highlighters.markdownEnums import MarkdownState +from manuskript.ui.highlighters.markdownEnums import MarkdownTokenType +from manuskript.ui.highlighters.markdownEnums import BlockquoteStyle +from manuskript.ui.highlighters.markdownTokenizer import MarkdownTokenizer from manuskript.ui.highlighters.markdownHighlighter import MarkdownHighlighter diff --git a/manuskript/ui/highlighters/basicHighlighter.py b/manuskript/ui/highlighters/basicHighlighter.py index 5ed315e..c483041 100644 --- a/manuskript/ui/highlighters/basicHighlighter.py +++ b/manuskript/ui/highlighters/basicHighlighter.py @@ -8,6 +8,9 @@ from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter from PyQt5.QtGui import QTextBlockFormat, QTextCharFormat import manuskript.models.references as Ref +import manuskript.ui.style as S +from manuskript import settings +from manuskript import functions as F class BasicHighlighter(QSyntaxHighlighter): @@ -18,6 +21,11 @@ class BasicHighlighter(QSyntaxHighlighter): self._misspelledColor = Qt.red self._defaultBlockFormat = QTextBlockFormat() self._defaultCharFormat = QTextCharFormat() + self.defaultTextColor = QColor(S.text) + self.backgroundColor = QColor(S.base) + self.markupColor = QColor(S.textLight) + self.linkColor = QColor(S.link) + self.spellingErrorColor = QColor(Qt.red) def setDefaultBlockFormat(self, bf): self._defaultBlockFormat = bf @@ -30,17 +38,47 @@ class BasicHighlighter(QSyntaxHighlighter): def setMisspelledColor(self, color): self._misspelledColor = color + def updateColorScheme(self, rehighlight=True): + """ + Generates a base set of colors that will take account of user + preferences, and use system style. + """ + + # Reading user settings + opt = settings.textEditor + + self.defaultTextColor = QColor(opt["fontColor"]) + self.backgroundColor = (QColor(opt["background"]) + if not opt["backgroundTransparent"] + else QColor(S.window)) + self.markupColor = F.mixColors(self.defaultTextColor, + self.backgroundColor, + .3) + self.linkColor = QColor(S.link) + self.spellingErrorColor = QColor(opt["misspelled"]) + self._defaultCharFormat.setForeground(QBrush(self.defaultTextColor)) + + if rehighlight: + self.rehighlight() + def highlightBlock(self, text): """Apply syntax highlighting to the given block of text. """ self.highlightBlockBefore(text) + self.doHighlightBlock(text) self.highlightBlockAfter(text) + def doHighlightBlock(self, text): + """ + Virtual funtion to subclass. + """ + pass + def highlightBlockBefore(self, text): """Highlighting to do before anything else. When subclassing BasicHighlighter, you must call highlightBlockBefore - before you do any custom highlighting. + before you do any custom highlighting. Or implement doHighlightBlock. """ #print(">", self.currentBlock().document().availableUndoSteps()) @@ -58,7 +96,7 @@ class BasicHighlighter(QSyntaxHighlighter): """Highlighting to do after everything else. When subclassing BasicHighlighter, you must call highlightBlockAfter - after your custom highlighting. + after your custom highlighting. Or implement doHighlightBlock. """ # References diff --git a/manuskript/ui/highlighters/markdownHighlighter.py b/manuskript/ui/highlighters/markdownHighlighter.py index f0d0c0f..c1b5f2d 100644 --- a/manuskript/ui/highlighters/markdownHighlighter.py +++ b/manuskript/ui/highlighters/markdownHighlighter.py @@ -12,10 +12,14 @@ from PyQt5.QtGui import (QSyntaxHighlighter, QTextBlock, QColor, QFont, QTextCharFormat, QBrush, QPalette) from PyQt5.QtWidgets import qApp, QStyle -from manuskript.ui.highlighters.markdownTokenizer import MarkdownTokenizer -from manuskript.ui.highlighters.markdownEnums import MarkdownState as MS -from manuskript.ui.highlighters.markdownEnums import MarkdownTokenType as MTT -from manuskript.ui.highlighters.markdownEnums import BlockquoteStyle as BS +from manuskript.ui.highlighters import BasicHighlighter +from manuskript.ui.highlighters import MarkdownTokenizer +from manuskript.ui.highlighters import MarkdownState as MS +from manuskript.ui.highlighters import MarkdownTokenType as MTT +from manuskript.ui.highlighters import BlockquoteStyle as BS +from manuskript.ui import style as S +from manuskript import settings +from manuskript import functions as F # Un longue ligne. Un longue ligne. Un longue ligne. Un longue ligne.asdasdasda @@ -26,14 +30,14 @@ GW_FADE_ALPHA = 140 #FIXME: Setext heading don't work anymore -class MarkdownHighlighter(QSyntaxHighlighter): +class MarkdownHighlighter(BasicHighlighter): highlightBlockAtPosition = pyqtSignal(int) headingFound = pyqtSignal(int, str, QTextBlock) headingRemoved = pyqtSignal(int) def __init__(self, editor): - QSyntaxHighlighter.__init__(self, editor.document()) + BasicHighlighter.__init__(self, editor) #default values self.editor = editor @@ -42,11 +46,6 @@ class MarkdownHighlighter(QSyntaxHighlighter): self.spellCheckEnabled = False #self.typingPaused = True self.inBlockquote = False - self.defaultTextColor = QColor(Qt.black) - self.backgroundColor = QColor(Qt.white) - self.markupColor = QColor(Qt.black) - self.linkColor = QColor(Qt.blue) - self.spellingErrorColor = QColor(Qt.red) self.blockquoteStyle = BS.BlockquoteStyleFancy # Settings @@ -56,13 +55,6 @@ class MarkdownHighlighter(QSyntaxHighlighter): self.highlightBlockAtPosition.connect(self.onHighlightBlockAtPosition, Qt.QueuedConnection) - # font = QFont("Monospace", 12, QFont.Normal, False) - font = self.document().defaultFont() - font.setStyleStrategy(QFont.PreferAntialias) - self.defaultFormat = QTextCharFormat() - self.defaultFormat.setFont(font) - self.defaultFormat.setForeground(QBrush(self.defaultTextColor)) - self.theme = self.defaultTheme() self.setupHeadingFontSize(True) @@ -72,16 +64,11 @@ class MarkdownHighlighter(QSyntaxHighlighter): self.searchExpressionRegExp = False self.searchExpressionCase = False - self.customRules = [ - ("(°).*?(°)", {"background": Qt.yellow, - "markupColor":Qt.lightGray}), - ] - #f = self.document().defaultFont() #f.setFamily("monospace") #self.document().setDefaultFont(f) - def highlightBlock(self, text): + def doHighlightBlock(self, text): """ Note: Never set the QTextBlockFormat for a QTextBlock from within the highlighter. Depending on how the block format is modified, @@ -97,17 +84,8 @@ class MarkdownHighlighter(QSyntaxHighlighter): formatting to be triggered yet again. """ - if self.currentBlock().blockNumber() == 0: - # This is the title - bf = QTextCharFormat() - bf.setFontPointSize(self.editor.font().pointSize() * 2) - bf.setFontWeight(QFont.Bold) - bf.setForeground(Qt.lightGray) - self.setFormat(0, len(text), bf) - return - lastState = self.currentBlockState() - self.setFormat(0, len(text), self.defaultFormat) + self.setFormat(0, len(text), self._defaultCharFormat) if self.tokenizer != None: self.tokenizer.clear() @@ -144,7 +122,7 @@ class MarkdownHighlighter(QSyntaxHighlighter): i += 1 #if self.currentBlockState() == MS.MarkdownStateBlockquote: - #fmt = QTextCharFormat(self.defaultFormat) + #fmt = QTextCharFormat(self._defaultCharFormat) #fmt.setForeground(Qt.lightGray) #self.setFormat(0, len(text), fmt) @@ -176,87 +154,6 @@ class MarkdownHighlighter(QSyntaxHighlighter): if self.spellCheckEnabled: self.spellCheck(text) - # HASHTAGS AND HIGHLIGHTS - - # Hashtags - s = 0 - ht = QRegExp(r'([^#])(#[\w]+)') - while ht.indexIn(text, s) >= 0: - f = self.format(ht.pos()+1) - f.setForeground(QColor("#07c")) - f.setFontWeight(QFont.Bold) - self.setFormat(ht.pos()+1, ht.matchedLength()-1, f) - s = ht.pos() + 1 - - # Highlighted - for w in self.highlightedWords + self.highlightedTags: - pos = text.lower().find(w.lower()) - while pos >= 0: - for i in range(pos, pos + len(w)): - f = self.format(i) - f.setBackground(QBrush(QColor("#fAf"))) - self.setFormat(i, 1, f) - pos = text.lower().find(w.lower(), pos+1) - - # Searched - #FIXME: consider searchExpressionRegExp - if self.searchExpression: - s = self.searchExpression - - if not self.searchExpressionRegExp: - if self.searchExpressionCase: - pos = text.find(s) - else: - pos = text.lower().find(s.lower()) - while pos >= 0: - for i in range(pos, pos + len(s)): - f = self.format(i) - f.setBackground(QBrush(QColor("#Aff"))) - self.setFormat(i, 1, f) - pos = text.lower().find(s.lower(), pos+1) - - else: - # Using QRegExp - rx = QRegExp(s) - if not self.searchExpressionCase: - rx.setCaseSensitivity(Qt.CaseInsensitive) - p = rx.indexIn(text) - while p != -1: - f = self.format(p) - f.setBackground(QBrush(QColor("#Aff"))) - self.setFormat(p, rx.matchedLength(), f) - p = rx.indexIn(text, p + 1) - - # Using python re - #try: - #for m in re.finditer(s, text): - #f = self.format(m.start()) - #f.setBackground(QBrush(QColor("#0ff"))) - #self.setFormat(m.start(), len(m.group()), f) - #except: - ## Probably malformed regExp - #pass - - # Custom rules - for rule, theme in self.customRules: - for m in re.finditer(rule, text): - - if not m.groups(): # No groups, therefore no markup - f = self.format(m.start()) - f, garbage = self.formatsFromTheme(theme, f) - self.setFormat(m.start(), len(m.group()), f) - - else: - mf = self.format(m.start()) - f = self.format(m.start() + len(m.group(1))) - f, mf = self.formatsFromTheme(theme, f, mf) - self.setFormat(m.start(1), len(m.group(1)), mf) - self.setFormat(m.start(2), len(m.group(2)), mf) - self.setFormat(m.start(1) + len(m.group(1)), - len(m.group()) - - len(m.group(1)) - - len(m.group(2)), f) - # If the block has transitioned from previously being a heading to now # being a non-heading, signal that the position in the document no # longer contains a heading. @@ -270,20 +167,25 @@ class MarkdownHighlighter(QSyntaxHighlighter): # COLORS & FORMATTING ########################################################################### + def updateColorScheme(self, rehighlight=True): + BasicHighlighter.updateColorScheme(self, rehighlight) + self.theme = self.defaultTheme() + self.setEnableLargeHeadingSizes(True) + def defaultTheme(self): - markup = qApp.palette().color(QPalette.Mid) - if markup == Qt.black: - markup = Qt.lightGray + markup = self.markupColor dark = qApp.palette().color(QPalette.Dark) if dark == Qt.black: dark = QColor(Qt.gray) darker = dark.darker(150) # Text background - background = qApp.palette().color(QPalette.Base) - lightBackground = background.darker(130) - veryLightBackground = background.darker(105) + background = self.backgroundColor + text = self.defaultTextColor + lightBackground = F.mixColors(background, text, .4) + veryLightBackground = F.mixColors(background, text, .7) + link = self.linkColor theme = { "markup": markup} @@ -311,17 +213,15 @@ class MarkdownHighlighter(QSyntaxHighlighter): "formatMarkup":True, "bold": True, "monospace": True, - #"color": Qt.darkBlue if i % 2 == 1 else Qt.darkMagenta, } - b = 100 - d = 50 - color = QColor(Qt.darkBlue) + + color = QColor(S.highlightedTextDark) theme[MTT.TokenAtxHeading1]["color"] = color - theme[MTT.TokenAtxHeading2]["color"] = color.lighter(b + d) - theme[MTT.TokenAtxHeading3]["color"] = color.lighter(b + 2*d) - theme[MTT.TokenAtxHeading4]["color"] = color.lighter(b + 3*d) - theme[MTT.TokenAtxHeading5]["color"] = color.lighter(b + 4*d) - theme[MTT.TokenAtxHeading6]["color"] = color.lighter(b + 5*d) + theme[MTT.TokenAtxHeading2]["color"] = F.mixColors(color, background, .9) + theme[MTT.TokenAtxHeading3]["color"] = F.mixColors(color, background, .8) + theme[MTT.TokenAtxHeading4]["color"] = F.mixColors(color, background, .7) + theme[MTT.TokenAtxHeading5]["color"] = F.mixColors(color, background, .6) + theme[MTT.TokenAtxHeading6]["color"] = F.mixColors(color, background, .5) for i in [MTT.TokenSetextHeading1Line2, MTT.TokenSetextHeading2Line2]: theme[i] = { @@ -352,13 +252,13 @@ class MarkdownHighlighter(QSyntaxHighlighter): theme[MTT.TokenHtmlEntity] = { "color":Qt.red} theme[MTT.TokenAutomaticLink] = { - "color": qApp.palette().color(QPalette.Link)} + "color": link} theme[MTT.TokenInlineLink] = { - "color": qApp.palette().color(QPalette.Link)} + "color": link} theme[MTT.TokenReferenceLink] = { - "color": qApp.palette().color(QPalette.Link)} + "color": link} theme[MTT.TokenReferenceDefinition] = { - "color": qApp.palette().color(QPalette.Link)} + "color": link} theme[MTT.TokenImage] = { "color": Qt.green} theme[MTT.TokenHtmlComment] = { @@ -402,25 +302,13 @@ class MarkdownHighlighter(QSyntaxHighlighter): return theme - def setColorScheme(self, defaultTextColor, backgroundColor, markupColor, - linkColor, spellingErrorColor): - self.defaultTextColor = defaultTextColor - self.backgroundColor = backgroundColor - self.markupColor = markupColor - self.linkColor = linkColor - self.spellingErrorColor = spellingErrorColor - self.defaultFormat.setForeground(QBrush(defaultTextColor)) - - # FIXME: generate a theme based on that - self.rehighlight() - ########################################################################### # ACTUAL FORMATTING ########################################################################### def applyFormattingForToken(self, token, text): if token.type != MTT.TokenUnknown: - format = self.format(token.position + token.openingMarkupLength) + fmt = self.format(token.position + token.openingMarkupLength) markupFormat = self.format(token.position) if self.theme.get("markup"): markupFormat.setForeground(self.theme["markup"]) @@ -438,14 +326,14 @@ class MarkdownHighlighter(QSyntaxHighlighter): self.currentBlockState(),) ) - #if token.type in range(6, 10): - #debug() + # if token.type in range(6, 10): + # debug() theme = self.theme.get(token.type) if theme: - format, markupFormat = self.formatsFromTheme(theme, - format, - markupFormat) + fmt, markupFormat = self.formatsFromTheme(theme, + fmt, + markupFormat) # Format openning Markup self.setFormat(token.position, token.openingMarkupLength, @@ -455,7 +343,7 @@ class MarkdownHighlighter(QSyntaxHighlighter): self.setFormat( token.position + token.openingMarkupLength, token.length - token.openingMarkupLength - token.closingMarkupLength, - format) + fmt) # Format closing Markup if token.closingMarkupLength > 0: @@ -468,13 +356,15 @@ class MarkdownHighlighter(QSyntaxHighlighter): qWarning("MarkdownHighlighter.applyFormattingForToken() was passed" " in a token of unknown type.") - def formatsFromTheme(self, theme, format=QTextCharFormat(), + def formatsFromTheme(self, theme, format=None, markupFormat=QTextCharFormat()): # Token if theme.get("color"): format.setForeground(theme["color"]) if theme.get("deltaSize"): - format.setFontPointSize(format.fontPointSize() + theme["deltaSize"]) + f = format.font() + f.setPointSize(format.font().pointSize() + theme["deltaSize"]) + format.setFont(f) if theme.get("background"): format.setBackground(theme["background"]) if theme.get("monospace"): @@ -542,13 +432,13 @@ class MarkdownHighlighter(QSyntaxHighlighter): self.rehighlight() def increaseFontSize(self): - self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize() - + 1.0) + self._defaultCharFormat.setFontPointSize( + self._defaultCharFormat.font().pointSize() + 1.0) self.rehighlight() def decreaseFontSize(self): - self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize() - - 1.0) + self._defaultCharFormat.setFontPointSize( + self._defaultCharFormat.font().pointSize() - 1.0) self.rehighlight() def setEnableLargeHeadingSizes(self, enable): @@ -577,8 +467,9 @@ class MarkdownHighlighter(QSyntaxHighlighter): self.rehighlight() def setFont(self, fontFamily, fontSize): - font = QFont(family=fontFamily, pointSize=fontSize, weight=QFont.Normal, italic=False) - self.defaultFormat.setFont(font) + font = QFont(family=fontFamily, pointSize=fontSize, + weight=QFont.Normal, italic=False) + self._defaultCharFormat.setFont(font) self.rehighlight() def setSpellCheckEnabled(self, enabled): diff --git a/manuskript/ui/highlighters/markdownTokenizer.py b/manuskript/ui/highlighters/markdownTokenizer.py index be07952..fd9c047 100644 --- a/manuskript/ui/highlighters/markdownTokenizer.py +++ b/manuskript/ui/highlighters/markdownTokenizer.py @@ -6,8 +6,8 @@ from PyQt5.QtCore import * from PyQt5.QtGui import * from PyQt5.QtWidgets import * -from noteflow.ui.views.markdownEnums import MarkdownState as MS -from noteflow.ui.views.markdownEnums import MarkdownTokenType as MTT +from manuskript.ui.highlighters import MarkdownState as MS +from manuskript.ui.highlighters import MarkdownTokenType as MTT # This file is simply a python translation of GhostWriter's Tokenizer. # http://wereturtle.github.io/ghostwriter/ @@ -69,10 +69,10 @@ class HighlightTokenizer: class MarkdownTokenizer(HighlightTokenizer): - + DUMMY_CHAR = "$" MAX_MARKDOWN_HEADING_LEVEL = 6 - + paragraphBreakRegex = QRegExp("^\\s*$") heading1SetextRegex = QRegExp("^===+\\s*$") heading2SetextRegex = QRegExp("^---+\\s*$") @@ -112,21 +112,21 @@ class MarkdownTokenizer(HighlightTokenizer): htmlInlineCommentRegex.setMinimal(True) mentionRegex = QRegExp("\\B@\\w+(\\-\\w+)*(/\\w+(\\-\\w+)*)?") pipeTableDividerRegex = QRegExp("^ {0,3}(\\|[ :]?)?-{3,}([ :]?\\|[ :]?-{3,}([ :]?\\|)?)+\\s*$") - + def __init__(self): HighlightTokenizer.__init__(self) - + def tokenize(self, text, currentState, previousState, nextState): self.currentState = currentState self.previousState = previousState self.nextState = nextState - + if (self.previousState == MS.MarkdownStateInGithubCodeFence or \ self.previousState == MS.MarkdownStateInPandocCodeFence) and \ self.tokenizeCodeBlock(text): # No further tokenizing required pass - + elif self.previousState != MS.MarkdownStateComment \ and self.paragraphBreakRegex.exactMatch(text): @@ -137,7 +137,7 @@ class MarkdownTokenizer(HighlightTokenizer): elif previousState != MS.MarkdownStateCodeBlock or \ (text[:1] != "\t" and text[-4:] != " "): self.setState(MS.MarkdownStateParagraphBreak) - + elif self.tokenizeSetextHeadingLine2(text) or \ self.tokenizeCodeBlock(text) or \ self.tokenizeMultilineComment(text) or \ @@ -145,7 +145,7 @@ class MarkdownTokenizer(HighlightTokenizer): self.tokenizeTableDivider(text): # No further tokenizing required pass - + elif self.tokenizeSetextHeadingLine1(text) or \ self.tokenizeAtxHeading(text) or \ self.tokenizeBlockquote(text) or \ @@ -153,7 +153,7 @@ class MarkdownTokenizer(HighlightTokenizer): self.tokenizeBulletPointList(text): self.tokenizeLineBreak(text) self.tokenizeInline(text) - + else: if previousState in [MS.MarkdownStateListLineBreak, MS.MarkdownStateNumberedList, @@ -168,7 +168,7 @@ class MarkdownTokenizer(HighlightTokenizer): self.setState(MS.MarkdownStateParagraph) self.tokenizeLineBreak(text) self.tokenizeInline(text) - + # Make sure that if the second line of a setext heading is removed the # first line is reprocessed. Otherwise, it will still show up in the # document as a heading. @@ -177,7 +177,7 @@ class MarkdownTokenizer(HighlightTokenizer): (previousState == MS.MarkdownStateSetextHeading2Line1 and \ self.getState() != MS.MarkdownStateSetextHeading2Line2): self.requestBacktrack() - + def tokenizeSetextHeadingLine1(self, text): #Check the next line's state to see if this is a setext-style heading. level = 0 @@ -188,12 +188,12 @@ class MarkdownTokenizer(HighlightTokenizer): level = 1 self.setState(MS.MarkdownStateSetextHeading1Line1) token.type = MTT.TokenSetextHeading1Line1 - + elif MS.MarkdownStateSetextHeading2Line2 == nextState: level = 2 self.setState(MS.MarkdownStateSetextHeading2Line1) token.type = MTT.TokenSetextHeading2Line1 - + if level > 0: token.length = len(text) token.position = 0 @@ -201,7 +201,7 @@ class MarkdownTokenizer(HighlightTokenizer): return True return False - + def tokenizeSetextHeadingLine2(self, text): level = 0 setextMatch = False @@ -212,13 +212,13 @@ class MarkdownTokenizer(HighlightTokenizer): setextMatch = self.heading1SetextRegex.exactMatch(text) self.setState(MS.MarkdownStateSetextHeading1Line2) token.type = MTT.TokenSetextHeading1Line2 - + elif previousState == MS.MarkdownStateSetextHeading2Line1: level = 2 setextMatch = self.heading2SetextRegex.exactMatch(text) self.setState(MS.MarkdownStateSetextHeading2Line2) token.type = MTT.TokenSetextHeading2Line2 - + elif previousState == MS.MarkdownStateParagraph: h1Line2 = self.heading1SetextRegex.exactMatch(text) h2Line2 = self.heading2SetextRegex.exactMatch(text) @@ -232,7 +232,7 @@ class MarkdownTokenizer(HighlightTokenizer): if h1Line2: self.setState(MS.MarkdownStateSetextHeading1Line2) token.type = MTT.TokenSetextHeading1Line2 - + else: self.setState(MS.MarkdownStateSetextHeading2Line2) token.type = MTT.TokenSetextHeading2Line2 @@ -246,32 +246,32 @@ class MarkdownTokenizer(HighlightTokenizer): token.position = 0 self.addToken(token) return True - + else: # Restart tokenizing on the previous line. self.requestBacktrack() False - + return False - + def tokenizeAtxHeading(self, text): escapedText = self.dummyOutEscapeCharacters(text) trailingPoundCount = 0 level = 0 - + #Count the number of pound signs at the front of the string, #up to the maximum allowed, to determine the heading level. - + while escapedText[level] == "#": level += 1 if level >= len(escapedText) or level >= self.MAX_MARKDOWN_HEADING_LEVEL: break - + if level > 0 and level < len(text): # Count how many pound signs are at the end of the text. while escapedText[-trailingPoundCount -1] == "#": trailingPoundCount += 1 - + token = Token() token.position = 0 token.length = len(text) @@ -282,7 +282,7 @@ class MarkdownTokenizer(HighlightTokenizer): self.setState(MS.MarkdownStateAtxHeading1 + level -1) return True return False - + def tokenizeNumberedList(self, text): previousState = self.previousState if (previousState in [MS.MarkdownStateParagraphBreak, @@ -296,7 +296,7 @@ class MarkdownTokenizer(HighlightTokenizer): self.numberedNestedListRegex.exactMatch(text)): periodIndex = text.find(".") parenthIndex = text.find(")") - + if periodIndex < 0: index = parenthIndex elif parenthIndex < 0: @@ -305,7 +305,7 @@ class MarkdownTokenizer(HighlightTokenizer): index = periodIndex else: index = parenthIndex - + if index > 0: token = Token() token.type = MTT.TokenNumberedList @@ -315,11 +315,11 @@ class MarkdownTokenizer(HighlightTokenizer): self.addToken(token) self.setState(MS.MarkdownStateNumberedList) return True - + return False - + return False - + def tokenizeBulletPointList(self, text): foundBulletChar = False bulletCharIndex = -1 @@ -338,17 +338,17 @@ class MarkdownTokenizer(HighlightTokenizer): # Search for the bullet point character, which can # be either a '+', '-', or '*'. - + for i in range(len(text)): if text[i] == " ": if foundBulletChar: # We've confirmed it's a bullet point by the whitespace that # follows the bullet point character, and can now exit the # loop. - + whitespaceFoundAfterBulletChar = True break - + else: spaceCount += 1 @@ -356,7 +356,7 @@ class MarkdownTokenizer(HighlightTokenizer): # number of spaces preceeding the bullet point does not # exceed three, as that would indicate a code block rather # than a bullet point list. - + if spaceCount > 3 and previousState not in [ MS.MarkdownStateNumberedList, MS.MarkdownStateBulletPointList, @@ -367,30 +367,30 @@ class MarkdownTokenizer(HighlightTokenizer): MS.MarkdownStateCodeBlock, MS.MarkdownStateCodeFenceEnd,]: return False - + elif text[i] == "\t": if foundBulletChar: # We've confirmed it's a bullet point by the whitespace that # follows the bullet point character, and can now exit the # loop. - + whitespaceFoundAfterBulletChar = True break - + elif previousState in [ MS.MarkdownStateParagraphBreak, MS.MarkdownStateUnknown]: - + # If this list item is the first in the list, ensure that # no tab character preceedes the bullet point, as that would # indicate a code block rather than a bullet point list. - + return False - + elif text[i] in ["+", "-", "*"]: foundBulletChar = True bulletCharIndex = i - + else: return False @@ -403,9 +403,9 @@ class MarkdownTokenizer(HighlightTokenizer): self.addToken(token) self.setState(MS.MarkdownStateBulletPointList) return True - + return False - + def tokenizeHorizontalRule (self, text): if self.hruleRegex.exactMatch(text): token = Token() @@ -417,12 +417,12 @@ class MarkdownTokenizer(HighlightTokenizer): return True return False - + def tokenizeLineBreak(self, text): currentState = self.currentState previousState = self.previousState nextState = self.nextState - + if currentState in [ MS.MarkdownStateParagraph, MS.MarkdownStateBlockquote, @@ -434,7 +434,7 @@ class MarkdownTokenizer(HighlightTokenizer): MS.MarkdownStateNumberedList, MS.MarkdownStateBulletPointList,]: self.requestBacktrack() - + if nextState in [ MS.MarkdownStateParagraph, MS.MarkdownStateBlockquote, @@ -448,17 +448,17 @@ class MarkdownTokenizer(HighlightTokenizer): token.length = 1 self.addToken(token) return True - + return False - + def tokenizeBlockquote(self, text): previousState = self.previousState if previousState == MS.MarkdownStateBlockquote or \ self.blockquoteRegex.exactMatch(text): - + # Find any '>' characters at the front of the line. markupLength = 0 - + for i in range(len(text)): if text[i] == ">": markupLength = i + 1 @@ -466,27 +466,27 @@ class MarkdownTokenizer(HighlightTokenizer): # There are no more '>' characters at the front of the line, # so stop processing. break - + token = Token() token.type = MTT.TokenBlockquote token.position = 0 token.length = len(text) - + if markupLength > 0: token.openingMarkupLength = markupLength - + self.addToken(token) self.setState(MS.MarkdownStateBlockquote) return True return False - + def tokenizeCodeBlock(self, text): previousState = self.previousState if previousState in [ MS.MarkdownStateInGithubCodeFence, MS.MarkdownStateInPandocCodeFence]: self.setState(previousState) - + if (previousState == MS.MarkdownStateInGithubCodeFence and \ self.githubCodeFenceEndRegex.exactMatch(text)) or \ (previousState == MS.MarkdownStateInPandocCodeFence and \ @@ -497,16 +497,16 @@ class MarkdownTokenizer(HighlightTokenizer): token.length = len(text) self.addToken(token) self.setState(MS.MarkdownStateCodeFenceEnd) - + else: token = Token() token.type = MTT.TokenCodeBlock token.position = 0 token.length = len(text) self.addToken(token) - + return True - + elif previousState in [ MS.MarkdownStateCodeBlock, MS.MarkdownStateParagraphBreak, @@ -520,7 +520,7 @@ class MarkdownTokenizer(HighlightTokenizer): self.addToken(token) self.setState(MS.MarkdownStateCodeBlock) return True - + elif previousState in [ MS.MarkdownStateParagraphBreak, MS.MarkdownStateParagraph, @@ -532,49 +532,49 @@ class MarkdownTokenizer(HighlightTokenizer): foundCodeFenceStart = True token.type = MTT.TokenGithubCodeFence self.setState(MS.MarkdownStateInGithubCodeFence) - + elif self.pandocCodeFenceStartRegex.exactMatch(text): foundCodeFenceStart = True token.type = MTT.TokenPandocCodeFence self.setState(MS.MarkdownStateInPandocCodeFence) - + if foundCodeFenceStart: token.position = 0 token.length = len(text) self.addToken(token) return True - + return False - + def tokenizeMultilineComment(self, text): previousState = self.previousState - + if previousState == MS.MarkdownStateComment: # Find the end of the comment, if any. index = text.find("-->") token = Token() token.type = MTT.TokenHtmlComment token.position = 0 - + if index >= 0: token.length = index + 3 self.addToken(token) - + # Return false so that the rest of the line that isn't within # the commented segment can be highlighted as normal paragraph # text. - + else: token.length = len(text) self.addToken(token) self.setState(MS.MarkdownStateComment) return True - + return False - + def tokenizeInline(self, text): escapedText = self.dummyOutEscapeCharacters(text) - + # Check if the line is a reference definition. if self.referenceDefinitionRegex.exactMatch(text): colonIndex = escapedText.find(":") @@ -583,10 +583,10 @@ class MarkdownTokenizer(HighlightTokenizer): token.position = 0 token.length = colonIndex + 1 self.addToken(token) - + # Replace the first bracket so that the '[...]:' reference definition # start doesn't get highlighted as a reference link. - + firstBracketIndex = escapedText.find("[") if firstBracketIndex >= 0: i = firstBracketIndex @@ -610,21 +610,21 @@ class MarkdownTokenizer(HighlightTokenizer): escapedText = self.tokenizeMatches(MTT.TokenMention, escapedText, self.mentionRegex, 0, 0, False, True) return True - + def tokenizeVerbatim(self, text): index = self.verbatimRegex.indexIn(text) - + while index >= 0: end = "" count = self.verbatimRegex.matchedLength() - + # Search for the matching end, which should have the same number # of back ticks as the start. for i in range(count): end += '`' - + endIndex = text.find(end, index + count) - + # If the end was found, add the verbatim token. if endIndex >= 0: token = Token() @@ -634,26 +634,26 @@ class MarkdownTokenizer(HighlightTokenizer): token.openingMarkupLength = count token.closingMarkupLength = count self.addToken(token) - + # Fill out the token match in the string with the dummy # character so that searches for other Markdown elements # don't find anything within this token's range in the string. - + for i in range(index, index + token.length): text = text[:i] + self.DUMMY_CHAR + text[i+1:] - + index += token.length - + # Else start searching again at the very next character. else: index += 1 - + index = self.verbatimRegex.indexIn(text, index) return text - + def tokenizeHtmlComments(self, text): previousState = self.previousState - + # Check for the end of a multiline comment so that it doesn't get further # tokenized. Don't bother formatting the comment itself, however, because # it should have already been tokenized in tokenizeMultilineComment(). @@ -661,10 +661,10 @@ class MarkdownTokenizer(HighlightTokenizer): commentEnd = text.find("-->") for i in range(commentEnd + 3): text = text[:i] + self.DUMMY_CHAR + text[i+1:] - + # Now check for inline comments (non-multiline). commentStart = self.htmlInlineCommentRegex.indexIn(text) - + while commentStart >= 0: commentLength = self.htmlInlineCommentRegex.matchedLength() token = Token() @@ -672,15 +672,15 @@ class MarkdownTokenizer(HighlightTokenizer): token.position = commentStart token.length = commentLength self.addToken(token) - + # Replace comment segment with dummy characters so that it doesn't # get tokenized again. - + for i in range(commentStart, commentStart + commentLength): text = text[:i] + self.DUMMY_CHAR + text[i+1:] - + commentStart = self.htmlInlineCommentRegex.indexIn(text, commentStart + commentLength) - + # Find multiline comment start, if any. commentStart = text.find("