From 9be2edeee79f8671b724fa3bdc59c3cfdf505e45 Mon Sep 17 00:00:00 2001 From: Olivier Keshavjee Date: Wed, 22 Nov 2017 13:43:40 +0100 Subject: [PATCH 01/13] Checkpoint: adding new markdown highlighter --- manuskript/exporter/manuskript/markdown.py | 4 +- .../MMDHighlighter.py | 10 +- manuskript/ui/highlighters/__init__.py | 6 + .../basicHighlighter.py | 18 +- manuskript/ui/highlighters/markdownEnums.py | 94 ++ .../ui/highlighters/markdownHighlighter.py | 718 ++++++++++++++ .../ui/highlighters/markdownTokenizer.py | 887 ++++++++++++++++++ manuskript/ui/views/textEditView.py | 7 +- 8 files changed, 1726 insertions(+), 18 deletions(-) rename manuskript/ui/{editors => highlighters}/MMDHighlighter.py (94%) create mode 100644 manuskript/ui/highlighters/__init__.py rename manuskript/ui/{editors => highlighters}/basicHighlighter.py (87%) create mode 100644 manuskript/ui/highlighters/markdownEnums.py create mode 100644 manuskript/ui/highlighters/markdownHighlighter.py create mode 100644 manuskript/ui/highlighters/markdownTokenizer.py diff --git a/manuskript/exporter/manuskript/markdown.py b/manuskript/exporter/manuskript/markdown.py index c706650f..f338cb2d 100644 --- a/manuskript/exporter/manuskript/markdown.py +++ b/manuskript/exporter/manuskript/markdown.py @@ -5,7 +5,7 @@ from PyQt5.QtWidgets import QPlainTextEdit, QGroupBox, qApp, QVBoxLayout, QCheck from manuskript.exporter.manuskript.plainText import plainText from manuskript.functions import mainWindow -from manuskript.ui.editors.MMDHighlighter import MMDHighlighter +from manuskript.ui.highlighters import MMDHighlighter from manuskript.ui.exporters.manuskript.plainTextSettings import exporterSettings @@ -72,4 +72,4 @@ class markdownSettings(exporterSettings): self.settings = exporterSettings.getSettings(self) self.settings["Preview"]["MarkdownHighlighter"] = self.chkMarkdownHighlighter.isChecked() - return self.settings \ No newline at end of file + return self.settings diff --git a/manuskript/ui/editors/MMDHighlighter.py b/manuskript/ui/highlighters/MMDHighlighter.py similarity index 94% rename from manuskript/ui/editors/MMDHighlighter.py rename to manuskript/ui/highlighters/MMDHighlighter.py index a6740f3d..d2d29ae7 100644 --- a/manuskript/ui/editors/MMDHighlighter.py +++ b/manuskript/ui/highlighters/MMDHighlighter.py @@ -5,10 +5,10 @@ import re from PyQt5.QtCore import Qt from PyQt5.QtGui import QTextCharFormat, QFont, QTextCursor, QFontMetrics -from manuskript.ui.editors.basicHighlighter import basicHighlighter +from manuskript.ui.highlighters import BasicHighlighter -class MMDHighlighter(basicHighlighter): +class MMDHighlighter(BasicHighlighter): MARKDOWN_REGEX = { 'Bold': '(\*\*)(.+?)(\*\*)', @@ -27,7 +27,7 @@ class MMDHighlighter(basicHighlighter): } def __init__(self, editor, style="Default"): - basicHighlighter.__init__(self, editor) + BasicHighlighter.__init__(self, editor) self.editor = editor @@ -36,11 +36,11 @@ class MMDHighlighter(basicHighlighter): self.rules[key] = re.compile(self.MARKDOWN_REGEX[key]) def highlightBlock(self, text): - basicHighlighter.highlightBlockBefore(self, text) + BasicHighlighter.highlightBlockBefore(self, text) self.doHighlightBlock(text) - basicHighlighter.highlightBlockAfter(self, text) + BasicHighlighter.highlightBlockAfter(self, text) def doHighlightBlock(self, text): """ diff --git a/manuskript/ui/highlighters/__init__.py b/manuskript/ui/highlighters/__init__.py new file mode 100644 index 00000000..43acd447 --- /dev/null +++ b/manuskript/ui/highlighters/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/python +# -*- coding: utf8 -*- + +from manuskript.ui.highlighters.basicHighlighter import BasicHighlighter +from manuskript.ui.highlighters.MMDHighlighter import MMDHighlighter +from manuskript.ui.highlighters.markdownHighlighter import MarkdownHighlighter diff --git a/manuskript/ui/editors/basicHighlighter.py b/manuskript/ui/highlighters/basicHighlighter.py similarity index 87% rename from manuskript/ui/editors/basicHighlighter.py rename to manuskript/ui/highlighters/basicHighlighter.py index a09b53a4..5ed315e7 100644 --- a/manuskript/ui/editors/basicHighlighter.py +++ b/manuskript/ui/highlighters/basicHighlighter.py @@ -4,12 +4,13 @@ import re from PyQt5.QtCore import Qt -from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter, QTextBlockFormat, QTextCharFormat +from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter +from PyQt5.QtGui import QTextBlockFormat, QTextCharFormat import manuskript.models.references as Ref -class basicHighlighter(QSyntaxHighlighter): +class BasicHighlighter(QSyntaxHighlighter): def __init__(self, editor): QSyntaxHighlighter.__init__(self, editor.document()) @@ -38,7 +39,7 @@ class basicHighlighter(QSyntaxHighlighter): def highlightBlockBefore(self, text): """Highlighting to do before anything else. - When subclassing basicHighlighter, you must call highlightBlockBefore + When subclassing BasicHighlighter, you must call highlightBlockBefore before you do any custom highlighting. """ @@ -56,7 +57,7 @@ class basicHighlighter(QSyntaxHighlighter): def highlightBlockAfter(self, text): """Highlighting to do after everything else. - When subclassing basicHighlighter, you must call highlightBlockAfter + When subclassing BasicHighlighter, you must call highlightBlockAfter after your custom highlighting. """ @@ -91,13 +92,16 @@ class basicHighlighter(QSyntaxHighlighter): textedText = text + " " # Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/ - WORDS = '(?iu)([\w\']+)[^\'\w]' # (?iu) means case insensitive and unicode + WORDS = r'(?iu)([\w\']+)[^\'\w]' + # (?iu) means case insensitive and unicode if hasattr(self.editor, "spellcheck") and self.editor.spellcheck: for word_object in re.finditer(WORDS, textedText): - if self.editor._dict and not self.editor._dict.check(word_object.group(1)): + if (self.editor._dict + and not self.editor._dict.check(word_object.group(1))): format = self.format(word_object.start(1)) format.setUnderlineColor(self._misspelledColor) # SpellCheckUnderline fails with some fonts format.setUnderlineStyle(QTextCharFormat.WaveUnderline) self.setFormat(word_object.start(1), - word_object.end(1) - word_object.start(1), format) + word_object.end(1) - word_object.start(1), + format) diff --git a/manuskript/ui/highlighters/markdownEnums.py b/manuskript/ui/highlighters/markdownEnums.py new file mode 100644 index 00000000..fefd424a --- /dev/null +++ b/manuskript/ui/highlighters/markdownEnums.py @@ -0,0 +1,94 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +#============================================================================== +# MARKDOWN STATES +#============================================================================== + +class MarkdownState: + MarkdownStateUnknown = -1 + MarkdownStateParagraphBreak = 0 + MarkdownStateListLineBreak = 1 + MarkdownStateParagraph = 2 + MarkdownStateAtxHeading1 = 3 + MarkdownStateAtxHeading2 = 4 + MarkdownStateAtxHeading3 = 5 + MarkdownStateAtxHeading4 = 6 + MarkdownStateAtxHeading5 = 7 + MarkdownStateAtxHeading6 = 8 + MarkdownStateBlockquote = 9 + MarkdownStateCodeBlock = 10 + MarkdownStateInGithubCodeFence = 11 + MarkdownStateInPandocCodeFence = 12 + MarkdownStateCodeFenceEnd = 13 + MarkdownStateComment = 14 + MarkdownStateHorizontalRule = 15 + MarkdownStateNumberedList = 16 + MarkdownStateBulletPointList = 17 + MarkdownStateSetextHeading1Line1 = 18 + MarkdownStateSetextHeading1Line2 = 19 + MarkdownStateSetextHeading2Line1 = 20 + MarkdownStateSetextHeading2Line2 = 21 + MarkdownStatePipeTableHeader = 22 + MarkdownStatePipeTableDivider = 23 + MarkdownStatePipeTableRow = 24 + +#============================================================================== +# MARKDOWN TOKEN TYPE +#============================================================================== + +class MarkdownTokenType: + TokenUnknown = -1 + + # Titles + TokenAtxHeading1 = 0 + TokenAtxHeading2 = 1 + TokenAtxHeading3 = 2 + TokenAtxHeading4 = 3 + TokenAtxHeading5 = 4 + TokenAtxHeading6 = 5 + TokenSetextHeading1Line1 = 6 + TokenSetextHeading1Line2 = 7 + TokenSetextHeading2Line1 = 8 + TokenSetextHeading2Line2 = 9 + + TokenEmphasis = 10 + TokenStrong = 11 + TokenStrikethrough = 12 + TokenVerbatim = 13 + TokenHtmlTag = 14 + TokenHtmlEntity = 15 + TokenAutomaticLink = 16 + TokenInlineLink = 17 + TokenReferenceLink = 18 + TokenReferenceDefinition = 19 + TokenImage = 20 + TokenHtmlComment = 21 + TokenNumberedList = 22 + TokenBulletPointList = 23 + TokenHorizontalRule = 24 + TokenLineBreak = 25 + TokenBlockquote = 26 + TokenCodeBlock = 27 + TokenGithubCodeFence = 28 + TokenPandocCodeFence = 29 + TokenCodeFenceEnd = 30 + TokenMention = 31 + TokenTableHeader = 32 + TokenTableDivider = 33 + TokenTablePipe = 34 + TokenSuperScript = 35 + TokenSubScript = 36 + TokenLast = 37 + + TITLES = [TokenAtxHeading1, TokenAtxHeading2, TokenAtxHeading3, + TokenAtxHeading4, TokenAtxHeading5, TokenAtxHeading6, + TokenSetextHeading1Line1, TokenSetextHeading1Line2, + TokenSetextHeading2Line1, TokenSetextHeading2Line2] + + + +class BlockquoteStyle: + BlockquoteStylePlain = 0 + BlockquoteStyleItalic = 1 + BlockquoteStyleFancy = 2 diff --git a/manuskript/ui/highlighters/markdownHighlighter.py b/manuskript/ui/highlighters/markdownHighlighter.py new file mode 100644 index 00000000..f0d0c0f1 --- /dev/null +++ b/manuskript/ui/highlighters/markdownHighlighter.py @@ -0,0 +1,718 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +""" +A QSyntaxHighlighter for markdown, using tokenizer. More accurate than simple +regexp, but not yet perfect. +""" + +import re +from PyQt5.QtCore import Qt, pyqtSignal, qWarning, QRegExp +from PyQt5.QtGui import (QSyntaxHighlighter, QTextBlock, QColor, QFont, + QTextCharFormat, QBrush, QPalette) +from PyQt5.QtWidgets import qApp, QStyle + +from manuskript.ui.highlighters.markdownTokenizer import MarkdownTokenizer +from manuskript.ui.highlighters.markdownEnums import MarkdownState as MS +from manuskript.ui.highlighters.markdownEnums import MarkdownTokenType as MTT +from manuskript.ui.highlighters.markdownEnums import BlockquoteStyle as BS + +# Un longue ligne. Un longue ligne. Un longue ligne. Un longue ligne.asdasdasda + +GW_FADE_ALPHA = 140 + +# Highlighter based on GhostWriter (http://wereturtle.github.io/ghostwriter/). +# GPLV3+. + +#FIXME: Setext heading don't work anymore + +class MarkdownHighlighter(QSyntaxHighlighter): + + highlightBlockAtPosition = pyqtSignal(int) + headingFound = pyqtSignal(int, str, QTextBlock) + headingRemoved = pyqtSignal(int) + + def __init__(self, editor): + QSyntaxHighlighter.__init__(self, editor.document()) + + #default values + self.editor = editor + self.tokenizer = MarkdownTokenizer() + + self.spellCheckEnabled = False + #self.typingPaused = True + self.inBlockquote = False + self.defaultTextColor = QColor(Qt.black) + self.backgroundColor = QColor(Qt.white) + self.markupColor = QColor(Qt.black) + self.linkColor = QColor(Qt.blue) + self.spellingErrorColor = QColor(Qt.red) + self.blockquoteStyle = BS.BlockquoteStyleFancy + + # Settings + self.useUndlerlineForEmphasis = False + self.highlightLineBreaks = True + + self.highlightBlockAtPosition.connect(self.onHighlightBlockAtPosition, + Qt.QueuedConnection) + + # font = QFont("Monospace", 12, QFont.Normal, False) + font = self.document().defaultFont() + font.setStyleStrategy(QFont.PreferAntialias) + self.defaultFormat = QTextCharFormat() + self.defaultFormat.setFont(font) + self.defaultFormat.setForeground(QBrush(self.defaultTextColor)) + + self.theme = self.defaultTheme() + self.setupHeadingFontSize(True) + + self.highlightedWords = [] + self.highlightedTags = [] + self.searchExpression = "" + self.searchExpressionRegExp = False + self.searchExpressionCase = False + + self.customRules = [ + ("(°).*?(°)", {"background": Qt.yellow, + "markupColor":Qt.lightGray}), + ] + + #f = self.document().defaultFont() + #f.setFamily("monospace") + #self.document().setDefaultFont(f) + + def highlightBlock(self, text): + """ + Note: Never set the QTextBlockFormat for a QTextBlock from within + the highlighter. Depending on how the block format is modified, + a recursive call to the highlighter may be triggered, which will + cause the application to crash. + + Likewise, don't try to set the QTextBlockFormat outside the highlighter + (i.e., from within the text editor). While the application will not + crash, the format change will be added to the undo stack. Attempting + to undo from that point on will cause the undo stack to be virtually + frozen, since undoing the format operation causes the text to be + considered changed, thus triggering the slot that changes the text + formatting to be triggered yet again. + """ + + if self.currentBlock().blockNumber() == 0: + # This is the title + bf = QTextCharFormat() + bf.setFontPointSize(self.editor.font().pointSize() * 2) + bf.setFontWeight(QFont.Bold) + bf.setForeground(Qt.lightGray) + self.setFormat(0, len(text), bf) + return + + lastState = self.currentBlockState() + self.setFormat(0, len(text), self.defaultFormat) + + if self.tokenizer != None: + self.tokenizer.clear() + block = self.currentBlock() + nextState = MS.MarkdownStateUnknown + previousState = self.previousBlockState() + + if block.next().isValid(): + nextState = block.next().userState() + + self.tokenizer.tokenize(text, lastState, previousState, nextState) + self.setCurrentBlockState(self.tokenizer.getState()) + + self.inBlockquote = self.tokenizer.getState() == MS.MarkdownStateBlockquote + + # STATE FORMATTING + # FIXME: generic + if self.currentBlockState() in [ + MS.MarkdownStatePipeTableHeader, + MS.MarkdownStatePipeTableDivider, + MS.MarkdownStatePipeTableRow]: + fmt = QTextCharFormat() + f = fmt.font() + f.setFamily("Monospace") + fmt.setFont(f) + self.setFormat(0, len(text), fmt) + + # Monospace the blank chars + i = 0 + while i <= len(text)-1 and text[i] in [" ", "\t"]: + fmt = self.format(i) + fmt.setFontFamily("Monospace") + self.setFormat(i, 1, fmt) + i += 1 + + #if self.currentBlockState() == MS.MarkdownStateBlockquote: + #fmt = QTextCharFormat(self.defaultFormat) + #fmt.setForeground(Qt.lightGray) + #self.setFormat(0, len(text), fmt) + + tokens = self.tokenizer.getTokens() + + for token in tokens: + if token.type == MTT.TokenUnknown: + qWarning("Highlighter found unknown token type in text block.") + continue + + if token.type in [ + MTT.TokenAtxHeading1, + MTT.TokenAtxHeading2, + MTT.TokenAtxHeading3, + MTT.TokenAtxHeading4, + MTT.TokenAtxHeading5, + MTT.TokenAtxHeading6, + MTT.TokenSetextHeading1Line1, + MTT.TokenSetextHeading2Line1, + ]: + self.storeHeadingData(token, text) + + self.applyFormattingForToken(token, text) + + if self.tokenizer.backtrackRequested(): + previous = self.currentBlock().previous() + self.highlightBlockAtPosition.emit(previous.position()) + + if self.spellCheckEnabled: + self.spellCheck(text) + + # HASHTAGS AND HIGHLIGHTS + + # Hashtags + s = 0 + ht = QRegExp(r'([^#])(#[\w]+)') + while ht.indexIn(text, s) >= 0: + f = self.format(ht.pos()+1) + f.setForeground(QColor("#07c")) + f.setFontWeight(QFont.Bold) + self.setFormat(ht.pos()+1, ht.matchedLength()-1, f) + s = ht.pos() + 1 + + # Highlighted + for w in self.highlightedWords + self.highlightedTags: + pos = text.lower().find(w.lower()) + while pos >= 0: + for i in range(pos, pos + len(w)): + f = self.format(i) + f.setBackground(QBrush(QColor("#fAf"))) + self.setFormat(i, 1, f) + pos = text.lower().find(w.lower(), pos+1) + + # Searched + #FIXME: consider searchExpressionRegExp + if self.searchExpression: + s = self.searchExpression + + if not self.searchExpressionRegExp: + if self.searchExpressionCase: + pos = text.find(s) + else: + pos = text.lower().find(s.lower()) + while pos >= 0: + for i in range(pos, pos + len(s)): + f = self.format(i) + f.setBackground(QBrush(QColor("#Aff"))) + self.setFormat(i, 1, f) + pos = text.lower().find(s.lower(), pos+1) + + else: + # Using QRegExp + rx = QRegExp(s) + if not self.searchExpressionCase: + rx.setCaseSensitivity(Qt.CaseInsensitive) + p = rx.indexIn(text) + while p != -1: + f = self.format(p) + f.setBackground(QBrush(QColor("#Aff"))) + self.setFormat(p, rx.matchedLength(), f) + p = rx.indexIn(text, p + 1) + + # Using python re + #try: + #for m in re.finditer(s, text): + #f = self.format(m.start()) + #f.setBackground(QBrush(QColor("#0ff"))) + #self.setFormat(m.start(), len(m.group()), f) + #except: + ## Probably malformed regExp + #pass + + # Custom rules + for rule, theme in self.customRules: + for m in re.finditer(rule, text): + + if not m.groups(): # No groups, therefore no markup + f = self.format(m.start()) + f, garbage = self.formatsFromTheme(theme, f) + self.setFormat(m.start(), len(m.group()), f) + + else: + mf = self.format(m.start()) + f = self.format(m.start() + len(m.group(1))) + f, mf = self.formatsFromTheme(theme, f, mf) + self.setFormat(m.start(1), len(m.group(1)), mf) + self.setFormat(m.start(2), len(m.group(2)), mf) + self.setFormat(m.start(1) + len(m.group(1)), + len(m.group()) + - len(m.group(1)) + - len(m.group(2)), f) + + # If the block has transitioned from previously being a heading to now + # being a non-heading, signal that the position in the document no + # longer contains a heading. + + if self.isHeadingBlockState(lastState) and \ + not self.isHeadingBlockState(self.currentBlockState()): + self.headingRemoved.emit(self.currentBlock().position()) + + + ########################################################################### + # COLORS & FORMATTING + ########################################################################### + + def defaultTheme(self): + + markup = qApp.palette().color(QPalette.Mid) + if markup == Qt.black: + markup = Qt.lightGray + dark = qApp.palette().color(QPalette.Dark) + if dark == Qt.black: + dark = QColor(Qt.gray) + darker = dark.darker(150) + + # Text background + background = qApp.palette().color(QPalette.Base) + lightBackground = background.darker(130) + veryLightBackground = background.darker(105) + + theme = { + "markup": markup} + + #Exemple: + #"color": Qt.red, + #"deltaSize": 10, + #"background": Qt.yellow, + #"monospace": True, + #"bold": True, + #"italic": True, + #"underline": True, + #"overline": True, + #"strike": True, + #"formatMarkup": True, + #"markupBold": True, + #"markupColor": Qt.blue, + #"markupBackground": Qt.green, + #"markupMonospace": True, + #"super":True, + #"sub":True + + for i in MTT.TITLES: + theme[i] = { + "formatMarkup":True, + "bold": True, + "monospace": True, + #"color": Qt.darkBlue if i % 2 == 1 else Qt.darkMagenta, + } + b = 100 + d = 50 + color = QColor(Qt.darkBlue) + theme[MTT.TokenAtxHeading1]["color"] = color + theme[MTT.TokenAtxHeading2]["color"] = color.lighter(b + d) + theme[MTT.TokenAtxHeading3]["color"] = color.lighter(b + 2*d) + theme[MTT.TokenAtxHeading4]["color"] = color.lighter(b + 3*d) + theme[MTT.TokenAtxHeading5]["color"] = color.lighter(b + 4*d) + theme[MTT.TokenAtxHeading6]["color"] = color.lighter(b + 5*d) + + for i in [MTT.TokenSetextHeading1Line2, MTT.TokenSetextHeading2Line2]: + theme[i] = { + "color": markup, + "monospace":True} + + # Beautifiers + theme[MTT.TokenEmphasis] = { + "italic":True,} + theme[MTT.TokenStrong] = { + "bold":True} + theme[MTT.TokenStrikethrough] = { + "strike":True} + theme[MTT.TokenVerbatim] = { + "monospace":True, + "background": veryLightBackground, + "formatMarkup": True, + "markupColor": markup} + theme[MTT.TokenSuperScript] = { + "super":True, + "formatMarkup":True} + theme[MTT.TokenSubScript] = { + "sub":True, + "formatMarkup":True} + + theme[MTT.TokenHtmlTag] = { + "color":Qt.red} + theme[MTT.TokenHtmlEntity] = { + "color":Qt.red} + theme[MTT.TokenAutomaticLink] = { + "color": qApp.palette().color(QPalette.Link)} + theme[MTT.TokenInlineLink] = { + "color": qApp.palette().color(QPalette.Link)} + theme[MTT.TokenReferenceLink] = { + "color": qApp.palette().color(QPalette.Link)} + theme[MTT.TokenReferenceDefinition] = { + "color": qApp.palette().color(QPalette.Link)} + theme[MTT.TokenImage] = { + "color": Qt.green} + theme[MTT.TokenHtmlComment] = { + "color": dark} + theme[MTT.TokenNumberedList] = { + "markupColor": QColor(Qt.red).lighter(), + "markupBold": True, + "markupMonospace": True,} + theme[MTT.TokenBulletPointList] = { + "markupColor": QColor(Qt.red).lighter(), + "markupBold": True, + "markupMonospace": True,} + theme[MTT.TokenHorizontalRule] = { + "overline": True, + "underline": True, + "monospace": True, + "color": markup} + theme[MTT.TokenLineBreak] = { + "background": markup} + theme[MTT.TokenBlockquote] = { + "color": darker, + "markupColor": lightBackground, + "markupBackground": lightBackground} + theme[MTT.TokenCodeBlock] = { + "color": darker, + "markupBackground": veryLightBackground, + "monospace":True} + theme[MTT.TokenGithubCodeFence] = { + "color": markup} + theme[MTT.TokenPandocCodeFence] = { + "color": markup} + theme[MTT.TokenCodeFenceEnd] = { + "color": markup} + theme[MTT.TokenMention] = {} # FIXME + theme[MTT.TokenTableHeader] = { + "color": darker, "monospace":True} + theme[MTT.TokenTableDivider] = { + "color": markup, "monospace":True} + theme[MTT.TokenTablePipe] = { + "color": markup, "monospace":True} + + return theme + + def setColorScheme(self, defaultTextColor, backgroundColor, markupColor, + linkColor, spellingErrorColor): + self.defaultTextColor = defaultTextColor + self.backgroundColor = backgroundColor + self.markupColor = markupColor + self.linkColor = linkColor + self.spellingErrorColor = spellingErrorColor + self.defaultFormat.setForeground(QBrush(defaultTextColor)) + + # FIXME: generate a theme based on that + self.rehighlight() + + ########################################################################### + # ACTUAL FORMATTING + ########################################################################### + + def applyFormattingForToken(self, token, text): + if token.type != MTT.TokenUnknown: + format = self.format(token.position + token.openingMarkupLength) + markupFormat = self.format(token.position) + if self.theme.get("markup"): + markupFormat.setForeground(self.theme["markup"]) + + ## Debug + def debug(): + print("{}\n{}{}{}{} (state:{})".format( + text, + " "*token.position, + "^"*token.openingMarkupLength, + str(token.type).center(token.length + - token.openingMarkupLength + - token.closingMarkupLength, "-"), + "^" * token.closingMarkupLength, + self.currentBlockState(),) + ) + + #if token.type in range(6, 10): + #debug() + + theme = self.theme.get(token.type) + if theme: + format, markupFormat = self.formatsFromTheme(theme, + format, + markupFormat) + + # Format openning Markup + self.setFormat(token.position, token.openingMarkupLength, + markupFormat) + + # Format Text + self.setFormat( + token.position + token.openingMarkupLength, + token.length - token.openingMarkupLength - token.closingMarkupLength, + format) + + # Format closing Markup + if token.closingMarkupLength > 0: + self.setFormat( + token.position + token.length - token.closingMarkupLength, + token.closingMarkupLength, + markupFormat) + + else: + qWarning("MarkdownHighlighter.applyFormattingForToken() was passed" + " in a token of unknown type.") + + def formatsFromTheme(self, theme, format=QTextCharFormat(), + markupFormat=QTextCharFormat()): + # Token + if theme.get("color"): + format.setForeground(theme["color"]) + if theme.get("deltaSize"): + format.setFontPointSize(format.fontPointSize() + theme["deltaSize"]) + if theme.get("background"): + format.setBackground(theme["background"]) + if theme.get("monospace"): + format.setFontFamily("Monospace") + if theme.get("bold"): + format.setFontWeight(QFont.Bold) + if theme.get("italic"): + format.setFontItalic(theme["italic"]) + if theme.get("underline"): + format.setFontUnderline(theme["underline"]) + if theme.get("overline"): + format.setFontOverline(theme["overline"]) + if theme.get("strike"): + format.setFontStrikeOut(theme["strike"]) + if theme.get("super"): + format.setVerticalAlignment(QTextCharFormat.AlignSuperScript) + if theme.get("sub"): + format.setVerticalAlignment(QTextCharFormat.AlignSubScript) + + # Markup + if theme.get("formatMarkup"): + c = markupFormat.foreground() + markupFormat = QTextCharFormat(format) + markupFormat.setForeground(c) + if theme.get("markupBold"): + markupFormat.setFontWeight(QFont.Bold) + if theme.get("markupColor"): + markupFormat.setForeground(theme["markupColor"]) + if theme.get("markupBackground"): + markupFormat.setBackground(theme["markupBackground"]) + if theme.get("markupMonospace"): + markupFormat.setFontFamily("Monospace") + + return format, markupFormat + + ########################################################################### + # SETTINGS + ########################################################################### + + def setHighlighted(self, words, tags): + rehighlight = (self.highlightedWords != words + or self.highlightedTags != tags) + self.highlightedWords = words + self.highlightedTags = tags + if rehighlight: + self.rehighlight() + + def setSearched(self, expression, regExp=False, caseSensitivity=False): + """ + Define an expression currently searched, to be highlighted. + Can be regExp. + """ + rehighlight = self.searchExpression != expression or \ + self.searchExpressionRegExp != regExp or \ + self.searchExpressionCase != caseSensitivity + self.searchExpression = expression + self.searchExpressionRegExp = regExp + self.searchExpressionCase = caseSensitivity + if rehighlight: + self.rehighlight() + + def setDictionary(self, dictionary): + self.dictionary = dictionary + if self.spellCheckEnabled: + self.rehighlight() + + def increaseFontSize(self): + self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize() + + 1.0) + self.rehighlight() + + def decreaseFontSize(self): + self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize() + - 1.0) + self.rehighlight() + + def setEnableLargeHeadingSizes(self, enable): + self.setupHeadingFontSize(enable) + self.rehighlight() + + def setupHeadingFontSize(self, useLargeHeadings): + if useLargeHeadings: + self.theme[MTT.TokenSetextHeading1Line1]["deltaSize"] = 7 + self.theme[MTT.TokenSetextHeading2Line1]["deltaSize"] = 5 + self.theme[MTT.TokenSetextHeading1Line2]["deltaSize"] = 7 + self.theme[MTT.TokenSetextHeading2Line2]["deltaSize"] = 5 + self.theme[MTT.TokenAtxHeading1]["deltaSize"] = 7 + self.theme[MTT.TokenAtxHeading2]["deltaSize"] = 5 + self.theme[MTT.TokenAtxHeading3]["deltaSize"] = 3 + self.theme[MTT.TokenAtxHeading4]["deltaSize"] = 2 + self.theme[MTT.TokenAtxHeading5]["deltaSize"] = 1 + self.theme[MTT.TokenAtxHeading6]["deltaSize"] = 0 + + else: + for i in MTT.TITLES: + self.theme[i]["deltaSize"] = 0 + + def setUseUnderlineForEmphasis(self, enable): + self.useUndlerlineForEmphasis = enable + self.rehighlight() + + def setFont(self, fontFamily, fontSize): + font = QFont(family=fontFamily, pointSize=fontSize, weight=QFont.Normal, italic=False) + self.defaultFormat.setFont(font) + self.rehighlight() + + def setSpellCheckEnabled(self, enabled): + self.spellCheckEnabled = enabled + self.rehighlight() + + def setBlockquoteStyle(self, style): + self.blockquoteStyle = style + + if style == BS.BlockquoteStyleItalic: + self.emphasizeToken[MTT.TokenBlockquote] = True + else: + self.emphasizeToken[MTT.TokenBlockquote] = False + + self.rehighlight() + + def setHighlightLineBreaks(self, enable): + self.highlightLineBreaks = enable + self.rehighlight() + + ########################################################################### + # GHOSTWRITER SPECIFIC? + ########################################################################### + + def onTypingResumed(self): + self.typingPaused = False + + def onTypingPaused(self): + self.typingPaused = True + block = self.document().findBlock(self.editor.textCursor().position()) + self.rehighlightBlock(block) + + def onHighlightBlockAtPosition(self, position): + block = self.document().findBlock(position) + self.rehighlightBlock(block) + + def onTextBlockRemoved(self, block): + if self.isHeadingBlockState(block.userState): + self.headingRemoved.emit(block.position()) + + ########################################################################### + # SPELLCHECK + ########################################################################### + + def spellCheck(self, text): + cursorPosition = self.editor.textCursor().position() + cursorPosBlock = self.document().findBlock(cursorPosition) + cursorPosInBlock = -1 + + if self.currentBlock() == cursorPosBlock: + cursorPosInBlock = cursorPosition - cursorPosBlock.position() + + misspelledWord = self.dictionary.check(text, 0) + + while not misspelledWord.isNull(): + startIndex = misspelledWord.position() + length = misspelledWord.length() + + if self.typingPaused or cursorPosInBlock != startIndex + length: + spellingErrorFormat = self.format(startIndex) + spellingErrorFormat.setUnderlineColor(self.spellingErrorColor) + spellingErrorFormat.setUnderlineStyle( + qApp.stlye().styleHint(QStyle.SH_SpellCheckUnderlineStyle)) + + self.setFormat(startIndex, length, spellingErrorFormat) + + startIndex += length + misspelledWord = self.dictionary.check(text, startIndex) + + def storeHeadingData(self, token, text): + if token.type in [ + MTT.TokenAtxHeading1, + MTT.TokenAtxHeading2, + MTT.TokenAtxHeading3, + MTT.TokenAtxHeading4, + MTT.TokenAtxHeading5, + MTT.TokenAtxHeading6]: + level = token.type - MTT.TokenAtxHeading1 + 1 + s = token.position + token.openingMarkupLength + l = (token.length + - token.openingMarkupLength + - token.closingMarkupLength) + headingText = text[s:s+l].strip() + + elif token.type == MTT.TokenSetextHeading1Line1: + level = 1 + headingText = text + + elif token.type == MTT.TokenSetextHeading2Line1: + level = 2 + headingText = text + + else: + qWarning("MarkdownHighlighter.storeHeadingData() encountered" + + " unexpected token: {}".format(token.getType())) + return + + # FIXME: TypeError: could not convert 'TextBlockData' to 'QTextBlockUserData' + # blockData = self.currentBlockUserData() + # if blockData is None: + # blockData = TextBlockData(self.document(), self.currentBlock()) + # + # self.setCurrentBlockUserData(blockData) + self.headingFound.emit(level, headingText, self.currentBlock()) + + def isHeadingBlockState(self, state): + return state in [ + MS.MarkdownStateAtxHeading1, + MS.MarkdownStateAtxHeading2, + MS.MarkdownStateAtxHeading3, + MS.MarkdownStateAtxHeading4, + MS.MarkdownStateAtxHeading5, + MS.MarkdownStateAtxHeading6, + MS.MarkdownStateSetextHeading1Line1, + MS.MarkdownStateSetextHeading2Line1,] + + +def getLuminance(color): + return (0.30 * color.redF()) + \ + (0.59 * color.greenF()) + \ + (0.11 * color.blueF()) + + +def applyAlphaToChannel(foreground, background, alpha): + return (foreground * alpha) + (background * (1.0 - alpha)) + + +def applyAlpha(foreground, background, alpha): + blendedColor = QColor(0, 0, 0) + normalizedAlpha = alpha / 255.0 + blendedColor.setRed(applyAlphaToChannel( + foreground.red(), background.red(), normalizedAlpha)) + blendedColor.setGreen(applyAlphaToChannel( + foreground.green(), background.green(), normalizedAlpha)) + blendedColor.setBlue(applyAlphaToChannel( + foreground.blue(), background.blue(), normalizedAlpha)) + return blendedColor diff --git a/manuskript/ui/highlighters/markdownTokenizer.py b/manuskript/ui/highlighters/markdownTokenizer.py new file mode 100644 index 00000000..be079522 --- /dev/null +++ b/manuskript/ui/highlighters/markdownTokenizer.py @@ -0,0 +1,887 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import re +from PyQt5.QtCore import * +from PyQt5.QtGui import * +from PyQt5.QtWidgets import * + +from noteflow.ui.views.markdownEnums import MarkdownState as MS +from noteflow.ui.views.markdownEnums import MarkdownTokenType as MTT + +# This file is simply a python translation of GhostWriter's Tokenizer. +# http://wereturtle.github.io/ghostwriter/ +# GPLV3+. + +# ============================================================================== +# TOKEN +# ============================================================================== + +class Token: + def __init__(self): + self.type = -1 + self.position = 0 + self.length = 0 + self.openingMarkupLength = 0 + self.closingMarkupLength = 0 + +# ============================================================================== +# HIGHLIGHT TOKENIZER +# ============================================================================== + +class HighlightTokenizer: + def __init__(self): + self.tokens = [] + + def tokenize(text, currentState, previousState, nextState): + # Subclass me + return 0 + + def getTokens(self): + self.tokens = sorted(self.tokens, key=lambda t: t.position) + return self.tokens + + def getState(self): + return self.state + + def backtrackRequested(self): + return self.backtrack + + def clear(self): + self.tokens = [] + self.backtrack = False + self.state = -1 + + def addToken(self, token): + self.tokens.append(token) + + if token.type == -1: + print("Error here", token.position, token.length) + + def setState(self, state): + self.state = state + + def requestBacktrack(self): + self.backtrack = True + + def tokenLessThan(self, t1, t2): + return t1.getPosition() < t2.getPosition() + + +class MarkdownTokenizer(HighlightTokenizer): + + DUMMY_CHAR = "$" + MAX_MARKDOWN_HEADING_LEVEL = 6 + + paragraphBreakRegex = QRegExp("^\\s*$") + heading1SetextRegex = QRegExp("^===+\\s*$") + heading2SetextRegex = QRegExp("^---+\\s*$") + blockquoteRegex = QRegExp("^ {0,3}>.*$") + githubCodeFenceStartRegex = QRegExp("^```+.*$") + githubCodeFenceEndRegex = QRegExp("^```+\\s*$") + pandocCodeFenceStartRegex = QRegExp("^~~~+.*$") + pandocCodeFenceEndRegex = QRegExp("^~~~+\\s*$") + numberedListRegex = QRegExp("^ {0,3}[0-9a-z]+[.)]\\s+.*$") + numberedNestedListRegex = QRegExp("^\\s*[0-9a-z]+[.)]\\s+.*$") + hruleRegex = QRegExp("\\s*(\\*\\s*){3,}|(\\s*(_\\s*){3,})|((\\s*(-\\s*){3,}))") + lineBreakRegex = QRegExp(".*\\s{2,}$") + emphasisRegex = QRegExp("(\\*(?![\\s*]).*[^\\s*]\\*)|_(?![\\s_]).*[^\\s_]_") + emphasisRegex.setMinimal(True) + strongRegex = QRegExp("\\*\\*(?=\\S).*\\S\\*\\*(?!\\*)|__(?=\\S).*\\S__(?!_)") + strongRegex.setMinimal(True) + strikethroughRegex = QRegExp("~~[^\\s]+.*[^\\s]+~~") + strikethroughRegex.setMinimal(True) + superScriptRegex = QRegExp("\^([^\\s]|(\\\\\\s))+\^") # Spaces must be escaped "\ " + superScriptRegex.setMinimal(True) + subScriptRegex = QRegExp("~([^\\s]|(\\\\\\s))+~") # Spaces must be escaped "\ " + subScriptRegex.setMinimal(True) + verbatimRegex = QRegExp("`+") + htmlTagRegex = QRegExp("<[^<>]+>") + htmlTagRegex.setMinimal(True) + htmlEntityRegex = QRegExp("&[a-zA-Z]+;|&#x?[0-9]+;") + automaticLinkRegex = QRegExp("(<[a-zA-Z]+\\:.+>)|(<.+@.+>)") + automaticLinkRegex.setMinimal(True) + inlineLinkRegex = QRegExp("\\[.+\\]\\(.+\\)") + inlineLinkRegex.setMinimal(True) + referenceLinkRegex = QRegExp("\\[(.+)\\]") + referenceLinkRegex.setMinimal(True) + referenceDefinitionRegex = QRegExp("^\\s*\\[.+\\]:") + imageRegex = QRegExp("!\\[.*\\]\\(.+\\)") + imageRegex.setMinimal(True) + htmlInlineCommentRegex = QRegExp("") + htmlInlineCommentRegex.setMinimal(True) + mentionRegex = QRegExp("\\B@\\w+(\\-\\w+)*(/\\w+(\\-\\w+)*)?") + pipeTableDividerRegex = QRegExp("^ {0,3}(\\|[ :]?)?-{3,}([ :]?\\|[ :]?-{3,}([ :]?\\|)?)+\\s*$") + + def __init__(self): + HighlightTokenizer.__init__(self) + + def tokenize(self, text, currentState, previousState, nextState): + self.currentState = currentState + self.previousState = previousState + self.nextState = nextState + + if (self.previousState == MS.MarkdownStateInGithubCodeFence or \ + self.previousState == MS.MarkdownStateInPandocCodeFence) and \ + self.tokenizeCodeBlock(text): + # No further tokenizing required + pass + + elif self.previousState != MS.MarkdownStateComment \ + and self.paragraphBreakRegex.exactMatch(text): + + if previousState in [MS.MarkdownStateListLineBreak, + MS.MarkdownStateNumberedList, + MS.MarkdownStateBulletPointList]: + self.setState(MS.MarkdownStateListLineBreak) + elif previousState != MS.MarkdownStateCodeBlock or \ + (text[:1] != "\t" and text[-4:] != " "): + self.setState(MS.MarkdownStateParagraphBreak) + + elif self.tokenizeSetextHeadingLine2(text) or \ + self.tokenizeCodeBlock(text) or \ + self.tokenizeMultilineComment(text) or \ + self.tokenizeHorizontalRule(text) or \ + self.tokenizeTableDivider(text): + # No further tokenizing required + pass + + elif self.tokenizeSetextHeadingLine1(text) or \ + self.tokenizeAtxHeading(text) or \ + self.tokenizeBlockquote(text) or \ + self.tokenizeNumberedList(text) or \ + self.tokenizeBulletPointList(text): + self.tokenizeLineBreak(text) + self.tokenizeInline(text) + + else: + if previousState in [MS.MarkdownStateListLineBreak, + MS.MarkdownStateNumberedList, + MS.MarkdownStateNumberedList]: + if not self.tokenizeNumberedList(text) and \ + not self.tokenizeBulletPointList(text) and \ + (text[:1] == "\t" or text[:4] == " "): + self.setState(previousState) + else: + self.setState(MS.MarkdownStateParagraph) + else: + self.setState(MS.MarkdownStateParagraph) + self.tokenizeLineBreak(text) + self.tokenizeInline(text) + + # Make sure that if the second line of a setext heading is removed the + # first line is reprocessed. Otherwise, it will still show up in the + # document as a heading. + if (previousState == MS.MarkdownStateSetextHeading1Line1 and \ + self.getState() != MS.MarkdownStateSetextHeading1Line2) or \ + (previousState == MS.MarkdownStateSetextHeading2Line1 and \ + self.getState() != MS.MarkdownStateSetextHeading2Line2): + self.requestBacktrack() + + def tokenizeSetextHeadingLine1(self, text): + #Check the next line's state to see if this is a setext-style heading. + level = 0 + token = Token() + nextState = self.nextState + + if MS.MarkdownStateSetextHeading1Line2 == nextState: + level = 1 + self.setState(MS.MarkdownStateSetextHeading1Line1) + token.type = MTT.TokenSetextHeading1Line1 + + elif MS.MarkdownStateSetextHeading2Line2 == nextState: + level = 2 + self.setState(MS.MarkdownStateSetextHeading2Line1) + token.type = MTT.TokenSetextHeading2Line1 + + if level > 0: + token.length = len(text) + token.position = 0 + self.addToken(token) + return True + + return False + + def tokenizeSetextHeadingLine2(self, text): + level = 0 + setextMatch = False + token = Token() + previousState = self.previousState + if previousState == MS.MarkdownStateSetextHeading1Line1: + level = 1 + setextMatch = self.heading1SetextRegex.exactMatch(text) + self.setState(MS.MarkdownStateSetextHeading1Line2) + token.type = MTT.TokenSetextHeading1Line2 + + elif previousState == MS.MarkdownStateSetextHeading2Line1: + level = 2 + setextMatch = self.heading2SetextRegex.exactMatch(text) + self.setState(MS.MarkdownStateSetextHeading2Line2) + token.type = MTT.TokenSetextHeading2Line2 + + elif previousState == MS.MarkdownStateParagraph: + h1Line2 = self.heading1SetextRegex.exactMatch(text) + h2Line2 = self.heading2SetextRegex.exactMatch(text) + + if h1Line2 or h2Line2: + # Restart tokenizing on the previous line. + self.requestBacktrack() + token.length = len(text) + token.position = 0 + + if h1Line2: + self.setState(MS.MarkdownStateSetextHeading1Line2) + token.type = MTT.TokenSetextHeading1Line2 + + else: + self.setState(MS.MarkdownStateSetextHeading2Line2) + token.type = MTT.TokenSetextHeading2Line2 + + self.addToken(token) + return True + + if level > 0: + if setextMatch: + token.length = len(text) + token.position = 0 + self.addToken(token) + return True + + else: + # Restart tokenizing on the previous line. + self.requestBacktrack() + False + + return False + + def tokenizeAtxHeading(self, text): + escapedText = self.dummyOutEscapeCharacters(text) + trailingPoundCount = 0 + level = 0 + + #Count the number of pound signs at the front of the string, + #up to the maximum allowed, to determine the heading level. + + while escapedText[level] == "#": + level += 1 + if level >= len(escapedText) or level >= self.MAX_MARKDOWN_HEADING_LEVEL: + break + + if level > 0 and level < len(text): + # Count how many pound signs are at the end of the text. + while escapedText[-trailingPoundCount -1] == "#": + trailingPoundCount += 1 + + token = Token() + token.position = 0 + token.length = len(text) + token.type = MTT.TokenAtxHeading1 + level -1 + token.openingMarkupLength = level + token.closingMarkupLength = trailingPoundCount + self.addToken(token) + self.setState(MS.MarkdownStateAtxHeading1 + level -1) + return True + return False + + def tokenizeNumberedList(self, text): + previousState = self.previousState + if (previousState in [MS.MarkdownStateParagraphBreak, + MS.MarkdownStateUnknown, + MS.MarkdownStateCodeBlock, + MS.MarkdownStateCodeFenceEnd,] and \ + self.numberedListRegex.exactMatch(text)) or \ + (previousState in [MS.MarkdownStateListLineBreak, + MS.MarkdownStateNumberedList, + MS.MarkdownStateBulletPointList,] and \ + self.numberedNestedListRegex.exactMatch(text)): + periodIndex = text.find(".") + parenthIndex = text.find(")") + + if periodIndex < 0: + index = parenthIndex + elif parenthIndex < 0: + index = periodIndex + elif parenthIndex > periodIndex: + index = periodIndex + else: + index = parenthIndex + + if index > 0: + token = Token() + token.type = MTT.TokenNumberedList + token.position = 0 + token.length = len(text) + token.openingMarkupLength = index + 2 + self.addToken(token) + self.setState(MS.MarkdownStateNumberedList) + return True + + return False + + return False + + def tokenizeBulletPointList(self, text): + foundBulletChar = False + bulletCharIndex = -1 + spaceCount = 0 + whitespaceFoundAfterBulletChar = False + previousState = self.previousState + + if previousState not in [MS.MarkdownStateUnknown, + MS.MarkdownStateParagraphBreak, + MS.MarkdownStateListLineBreak, + MS.MarkdownStateNumberedList, + MS.MarkdownStateBulletPointList, + MS.MarkdownStateCodeBlock, + MS.MarkdownStateCodeFenceEnd]: + return False + + # Search for the bullet point character, which can + # be either a '+', '-', or '*'. + + for i in range(len(text)): + if text[i] == " ": + if foundBulletChar: + # We've confirmed it's a bullet point by the whitespace that + # follows the bullet point character, and can now exit the + # loop. + + whitespaceFoundAfterBulletChar = True + break + + else: + spaceCount += 1 + + # If this list item is the first in the list, ensure the + # number of spaces preceeding the bullet point does not + # exceed three, as that would indicate a code block rather + # than a bullet point list. + + if spaceCount > 3 and previousState not in [ + MS.MarkdownStateNumberedList, + MS.MarkdownStateBulletPointList, + MS.MarkdownStateListLineBreak,] and \ + previousState in [ + MS.MarkdownStateParagraphBreak, + MS.MarkdownStateUnknown, + MS.MarkdownStateCodeBlock, + MS.MarkdownStateCodeFenceEnd,]: + return False + + elif text[i] == "\t": + if foundBulletChar: + # We've confirmed it's a bullet point by the whitespace that + # follows the bullet point character, and can now exit the + # loop. + + whitespaceFoundAfterBulletChar = True + break + + elif previousState in [ + MS.MarkdownStateParagraphBreak, + MS.MarkdownStateUnknown]: + + # If this list item is the first in the list, ensure that + # no tab character preceedes the bullet point, as that would + # indicate a code block rather than a bullet point list. + + return False + + elif text[i] in ["+", "-", "*"]: + foundBulletChar = True + bulletCharIndex = i + + else: + return False + + if bulletCharIndex >= 0 and whitespaceFoundAfterBulletChar: + token = Token() + token.type = MTT.TokenBulletPointList + token.position = 0 + token.length = len(text) + token.openingMarkupLength = bulletCharIndex + 2 + self.addToken(token) + self.setState(MS.MarkdownStateBulletPointList) + return True + + return False + + def tokenizeHorizontalRule (self, text): + if self.hruleRegex.exactMatch(text): + token = Token() + token.type = MTT.TokenHorizontalRule + token.position = 0 + token.length = len(text) + self.addToken(token) + self.setState(MS.MarkdownStateHorizontalRule) + return True + + return False + + def tokenizeLineBreak(self, text): + currentState = self.currentState + previousState = self.previousState + nextState = self.nextState + + if currentState in [ + MS.MarkdownStateParagraph, + MS.MarkdownStateBlockquote, + MS.MarkdownStateNumberedList, + MS.MarkdownStateBulletPointList,]: + if previousState in [ + MS.MarkdownStateParagraph, + MS.MarkdownStateBlockquote, + MS.MarkdownStateNumberedList, + MS.MarkdownStateBulletPointList,]: + self.requestBacktrack() + + if nextState in [ + MS.MarkdownStateParagraph, + MS.MarkdownStateBlockquote, + MS.MarkdownStateNumberedList, + MS.MarkdownStateBulletPointList,]: + self.requestBacktrack() + if self.lineBreakRegex.exactMatch(text): + token = Token() + token.type = MTT.TokenLineBreak + token.position = len(text) - 1 + token.length = 1 + self.addToken(token) + return True + + return False + + def tokenizeBlockquote(self, text): + previousState = self.previousState + if previousState == MS.MarkdownStateBlockquote or \ + self.blockquoteRegex.exactMatch(text): + + # Find any '>' characters at the front of the line. + markupLength = 0 + + for i in range(len(text)): + if text[i] == ">": + markupLength = i + 1 + elif text[i] != " ": + # There are no more '>' characters at the front of the line, + # so stop processing. + break + + token = Token() + token.type = MTT.TokenBlockquote + token.position = 0 + token.length = len(text) + + if markupLength > 0: + token.openingMarkupLength = markupLength + + self.addToken(token) + self.setState(MS.MarkdownStateBlockquote) + return True + return False + + def tokenizeCodeBlock(self, text): + previousState = self.previousState + if previousState in [ + MS.MarkdownStateInGithubCodeFence, + MS.MarkdownStateInPandocCodeFence]: + self.setState(previousState) + + if (previousState == MS.MarkdownStateInGithubCodeFence and \ + self.githubCodeFenceEndRegex.exactMatch(text)) or \ + (previousState == MS.MarkdownStateInPandocCodeFence and \ + self.pandocCodeFenceEndRegex.exactMatch(text)): + token = Token() + token.type = MTT.TokenCodeFenceEnd + token.position = 0 + token.length = len(text) + self.addToken(token) + self.setState(MS.MarkdownStateCodeFenceEnd) + + else: + token = Token() + token.type = MTT.TokenCodeBlock + token.position = 0 + token.length = len(text) + self.addToken(token) + + return True + + elif previousState in [ + MS.MarkdownStateCodeBlock, + MS.MarkdownStateParagraphBreak, + MS.MarkdownStateUnknown,] and \ + (text[:1] == "\t" or text[:4] == " "): + token = Token() + token.type = MTT.TokenCodeBlock + token.position = 0 + token.length = len(text) + token.openingMarkupLength = len(text) - len(text.lstrip()) + self.addToken(token) + self.setState(MS.MarkdownStateCodeBlock) + return True + + elif previousState in [ + MS.MarkdownStateParagraphBreak, + MS.MarkdownStateParagraph, + MS.MarkdownStateUnknown, + MS.MarkdownStateListLineBreak,]: + foundCodeFenceStart = False + token = Token() + if self.githubCodeFenceStartRegex.exactMatch(text): + foundCodeFenceStart = True + token.type = MTT.TokenGithubCodeFence + self.setState(MS.MarkdownStateInGithubCodeFence) + + elif self.pandocCodeFenceStartRegex.exactMatch(text): + foundCodeFenceStart = True + token.type = MTT.TokenPandocCodeFence + self.setState(MS.MarkdownStateInPandocCodeFence) + + if foundCodeFenceStart: + token.position = 0 + token.length = len(text) + self.addToken(token) + return True + + return False + + def tokenizeMultilineComment(self, text): + previousState = self.previousState + + if previousState == MS.MarkdownStateComment: + # Find the end of the comment, if any. + index = text.find("-->") + token = Token() + token.type = MTT.TokenHtmlComment + token.position = 0 + + if index >= 0: + token.length = index + 3 + self.addToken(token) + + # Return false so that the rest of the line that isn't within + # the commented segment can be highlighted as normal paragraph + # text. + + else: + token.length = len(text) + self.addToken(token) + self.setState(MS.MarkdownStateComment) + return True + + return False + + def tokenizeInline(self, text): + escapedText = self.dummyOutEscapeCharacters(text) + + # Check if the line is a reference definition. + if self.referenceDefinitionRegex.exactMatch(text): + colonIndex = escapedText.find(":") + token = Token() + token.type = MTT.TokenReferenceDefinition + token.position = 0 + token.length = colonIndex + 1 + self.addToken(token) + + # Replace the first bracket so that the '[...]:' reference definition + # start doesn't get highlighted as a reference link. + + firstBracketIndex = escapedText.find("[") + if firstBracketIndex >= 0: + i = firstBracketIndex + escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:] + + escapedText = self.tokenizeVerbatim(escapedText) + escapedText = self.tokenizeHtmlComments(escapedText) + escapedText = self.tokenizeTableHeaderRow(escapedText) + escapedText = self.tokenizeTableRow(escapedText) + escapedText = self.tokenizeMatches(MTT.TokenImage, escapedText, self.imageRegex, 0, 0, False, True) + escapedText = self.tokenizeMatches(MTT.TokenInlineLink, escapedText, self.inlineLinkRegex, 0, 0, False, True) + escapedText = self.tokenizeMatches(MTT.TokenReferenceLink, escapedText, self.referenceLinkRegex, 0, 0, False, True) + escapedText = self.tokenizeMatches(MTT.TokenHtmlEntity, escapedText, self.htmlEntityRegex) + escapedText = self.tokenizeMatches(MTT.TokenAutomaticLink, escapedText, self.automaticLinkRegex, 0, 0, False, True) + escapedText = self.tokenizeMatches(MTT.TokenStrikethrough, escapedText, self.strikethroughRegex, 2, 2, True) + escapedText = self.tokenizeMatches(MTT.TokenStrong, escapedText, self.strongRegex, 2, 2, True) + escapedText = self.tokenizeMatches(MTT.TokenEmphasis, escapedText, self.emphasisRegex, 1, 1, True) + escapedText = self.tokenizeMatches(MTT.TokenSuperScript, escapedText, self.superScriptRegex, 1, 1, True) + escapedText = self.tokenizeMatches(MTT.TokenSubScript, escapedText, self.subScriptRegex, 1, 1, True) + escapedText = self.tokenizeMatches(MTT.TokenHtmlTag, escapedText, self.htmlTagRegex) + escapedText = self.tokenizeMatches(MTT.TokenMention, escapedText, self.mentionRegex, 0, 0, False, True) + + return True + + def tokenizeVerbatim(self, text): + index = self.verbatimRegex.indexIn(text) + + while index >= 0: + end = "" + count = self.verbatimRegex.matchedLength() + + # Search for the matching end, which should have the same number + # of back ticks as the start. + for i in range(count): + end += '`' + + endIndex = text.find(end, index + count) + + # If the end was found, add the verbatim token. + if endIndex >= 0: + token = Token() + token.type = MTT.TokenVerbatim + token.position = index + token.length = endIndex + count - index + token.openingMarkupLength = count + token.closingMarkupLength = count + self.addToken(token) + + # Fill out the token match in the string with the dummy + # character so that searches for other Markdown elements + # don't find anything within this token's range in the string. + + for i in range(index, index + token.length): + text = text[:i] + self.DUMMY_CHAR + text[i+1:] + + index += token.length + + # Else start searching again at the very next character. + else: + index += 1 + + index = self.verbatimRegex.indexIn(text, index) + return text + + def tokenizeHtmlComments(self, text): + previousState = self.previousState + + # Check for the end of a multiline comment so that it doesn't get further + # tokenized. Don't bother formatting the comment itself, however, because + # it should have already been tokenized in tokenizeMultilineComment(). + if previousState == MS.MarkdownStateComment: + commentEnd = text.find("-->") + for i in range(commentEnd + 3): + text = text[:i] + self.DUMMY_CHAR + text[i+1:] + + # Now check for inline comments (non-multiline). + commentStart = self.htmlInlineCommentRegex.indexIn(text) + + while commentStart >= 0: + commentLength = self.htmlInlineCommentRegex.matchedLength() + token = Token() + token.type = MTT.TokenHtmlComment + token.position = commentStart + token.length = commentLength + self.addToken(token) + + # Replace comment segment with dummy characters so that it doesn't + # get tokenized again. + + for i in range(commentStart, commentStart + commentLength): + text = text[:i] + self.DUMMY_CHAR + text[i+1:] + + commentStart = self.htmlInlineCommentRegex.indexIn(text, commentStart + commentLength) + + # Find multiline comment start, if any. + commentStart = text.find("") token = Token() token.type = MTT.TokenHtmlComment token.position = 0 - + if index >= 0: token.length = index + 3 self.addToken(token) - + # Return false so that the rest of the line that isn't within # the commented segment can be highlighted as normal paragraph # text. - + else: token.length = len(text) self.addToken(token) self.setState(MS.MarkdownStateComment) return True - + return False - + def tokenizeInline(self, text): escapedText = self.dummyOutEscapeCharacters(text) - + # Check if the line is a reference definition. if self.referenceDefinitionRegex.exactMatch(text): colonIndex = escapedText.find(":") @@ -583,10 +583,10 @@ class MarkdownTokenizer(HighlightTokenizer): token.position = 0 token.length = colonIndex + 1 self.addToken(token) - + # Replace the first bracket so that the '[...]:' reference definition # start doesn't get highlighted as a reference link. - + firstBracketIndex = escapedText.find("[") if firstBracketIndex >= 0: i = firstBracketIndex @@ -610,21 +610,21 @@ class MarkdownTokenizer(HighlightTokenizer): escapedText = self.tokenizeMatches(MTT.TokenMention, escapedText, self.mentionRegex, 0, 0, False, True) return True - + def tokenizeVerbatim(self, text): index = self.verbatimRegex.indexIn(text) - + while index >= 0: end = "" count = self.verbatimRegex.matchedLength() - + # Search for the matching end, which should have the same number # of back ticks as the start. for i in range(count): end += '`' - + endIndex = text.find(end, index + count) - + # If the end was found, add the verbatim token. if endIndex >= 0: token = Token() @@ -634,26 +634,26 @@ class MarkdownTokenizer(HighlightTokenizer): token.openingMarkupLength = count token.closingMarkupLength = count self.addToken(token) - + # Fill out the token match in the string with the dummy # character so that searches for other Markdown elements # don't find anything within this token's range in the string. - + for i in range(index, index + token.length): text = text[:i] + self.DUMMY_CHAR + text[i+1:] - + index += token.length - + # Else start searching again at the very next character. else: index += 1 - + index = self.verbatimRegex.indexIn(text, index) return text - + def tokenizeHtmlComments(self, text): previousState = self.previousState - + # Check for the end of a multiline comment so that it doesn't get further # tokenized. Don't bother formatting the comment itself, however, because # it should have already been tokenized in tokenizeMultilineComment(). @@ -661,10 +661,10 @@ class MarkdownTokenizer(HighlightTokenizer): commentEnd = text.find("-->") for i in range(commentEnd + 3): text = text[:i] + self.DUMMY_CHAR + text[i+1:] - + # Now check for inline comments (non-multiline). commentStart = self.htmlInlineCommentRegex.indexIn(text) - + while commentStart >= 0: commentLength = self.htmlInlineCommentRegex.matchedLength() token = Token() @@ -672,15 +672,15 @@ class MarkdownTokenizer(HighlightTokenizer): token.position = commentStart token.length = commentLength self.addToken(token) - + # Replace comment segment with dummy characters so that it doesn't # get tokenized again. - + for i in range(commentStart, commentStart + commentLength): text = text[:i] + self.DUMMY_CHAR + text[i+1:] - + commentStart = self.htmlInlineCommentRegex.indexIn(text, commentStart + commentLength) - + # Find multiline comment start, if any. commentStart = text.find("") + else: + cursor.insertText("") + cursor.movePosition(QTextCursor.PreviousCharacter, + QTextCursor.MoveAnchor, 4) + self.setTextCursor(cursor) + + def commentLine(self): + cursor = self.textCursor() + + start = cursor.selectionStart() + end = cursor.selectionEnd() + block = self.document().findBlock(start) + block2 = self.document().findBlock(end) + + if True: + # Method 1 + cursor.beginEditBlock() + while block.isValid(): + self.commentBlock(block) + if block == block2: break + block = block.next() + cursor.endEditBlock() + + else: + # Method 2 + cursor.beginEditBlock() + cursor.setPosition(block.position()) + cursor.insertText("") + cursor.endEditBlock() + + def commentBlock(self, block): + cursor = QTextCursor(block) + text = block.text() + if text[:5] == "": + text2 = text[5:-4] + else: + text2 = "" + self.selectBlock(cursor) + cursor.insertText(text2) + + def insertFormattingMarkup(self, markup): + cursor = self.textCursor() + + # Select begining and end of words + self.selectWord(cursor) + + if cursor.hasSelection(): + start = cursor.selectionStart() + end = cursor.selectionEnd() + len(markup) + cursor.beginEditBlock() + cursor.setPosition(start) + cursor.insertText(markup) + cursor.setPosition(end) + cursor.insertText(markup) + cursor.endEditBlock() + cursor.movePosition(QTextCursor.PreviousCharacter, + QTextCursor.KeepAnchor, len(markup)) + #self.setTextCursor(cursor) + + else: + # Insert markup twice (for opening and closing around the cursor), + # and then move the cursor to be between the pair. + cursor.beginEditBlock() + cursor.insertText(markup) + cursor.insertText(markup) + cursor.movePosition(QTextCursor.PreviousCharacter, + QTextCursor.MoveAnchor, len(markup)) + cursor.endEditBlock() + self.setTextCursor(cursor) + + def clearFormat(self): + cursor = self.textCursor() + text = cursor.selectedText() + if not text: + self.selectBlock(cursor) + text = cursor.selectedText() + text = self.clearedFormat(text) + cursor.insertText(text) + + def clearedFormat(self, text): + # FIXME: clear also block formats + for reg, rep, flags in [ + ("\*\*(.*?)\*\*", "\\1", None), # bold + ("__(.*?)__", "\\1", None), # bold + ("\*(.*?)\*", "\\1", None), # emphasis + ("_(.*?)_", "\\1", None), # emphasis + ("`(.*?)`", "\\1", None), # verbatim + ("~~(.*?)~~", "\\1", None), # strike + ("\^(.*?)\^", "\\1", None), # superscript + ("~(.*?)~", "\\1", None), # subscript + ("", "\\1", re.S), # comments + + + # LINES OR BLOCKS + (r"^#*\s*(.+?)\s*", "\\1", re.M), # ATX + (r"^[=-]*$", "", re.M), # Setext + (r"^`*$", "", re.M), # Code block fenced + (r"^\s*[-+*]\s*(.*?)\s*$", "\\1", re.M), # Bullet List + (r"^\s*[0-9a-z](\.|\))\s*(.*?)\s*$", "\\2", re.M), # Bullet List + (r"\s*[>\s]*(.*?)\s*$", "\\1", re.M), # Code block and blockquote + + ]: + text = re.sub(reg, rep, text, flags if flags else 0) + return text + + def clearedFormatForStats(self, text): + # Remove stuff that musn't be counted + # FIXME: clear also block formats + for reg, rep, flags in [ + ("", "", re.S), # comments + ]: + text = re.sub(reg, rep, text, flags if flags else 0) + return text + + def titleSetext(self, level): + cursor = self.textCursor() + + cursor.beginEditBlock() + # Is it already a Setext header? + if cursor.block().userState() in [ + MS.MarkdownStateSetextHeading1Line2, + MS.MarkdownStateSetextHeading2Line2]: + cursor.movePosition(QTextCursor.PreviousBlock) + + text = cursor.block().text() + + if cursor.block().userState() in [ + MS.MarkdownStateSetextHeading1Line1, + MS.MarkdownStateSetextHeading2Line1]: + # Need to remove line below + c = QTextCursor(cursor.block().next()) + self.selectBlock(c) + c.insertText("") + + char = "=" if level == 1 else "-" + text = re.sub("^#*\s*(.*)\s*#*", "\\1", text) # Removes # + sub = char * len(text) + text = text + "\n" + sub + + self.selectBlock(cursor) + cursor.insertText(text) + cursor.endEditBlock() + + def titleATX(self, level): + cursor = self.textCursor() + text = cursor.block().text() + + # Are we in a Setext Header? + if cursor.block().userState() in [ + MS.MarkdownStateSetextHeading1Line1, + MS.MarkdownStateSetextHeading2Line1]: + # Need to remove line below + cursor.beginEditBlock() + c = QTextCursor(cursor.block().next()) + self.selectBlock(c) + c.insertText("") + + self.selectBlock(cursor) + cursor.insertText(text) + cursor.endEditBlock() + return + + elif cursor.block().userState() in [ + MS.MarkdownStateSetextHeading1Line2, + MS.MarkdownStateSetextHeading2Line2]: + cursor.movePosition(QTextCursor.PreviousBlock) + self.setTextCursor(cursor) + self.titleATX(level) + return + + m = re.match("^(#+)(\s*)(.+)", text) + if m: + pre = m.group(1) + space = m.group(2) + txt = m.group(3) + + if len(pre) == level: + # Remove title + text = txt + else: + text = "#" * level + space + txt + + else: + text = "#" * level + " " + text + + self.selectBlock(cursor) + cursor.insertText(text) diff --git a/manuskript/ui/views/textEditView.py b/manuskript/ui/views/textEditView.py index 67a9d375..6090f2cc 100644 --- a/manuskript/ui/views/textEditView.py +++ b/manuskript/ui/views/textEditView.py @@ -10,10 +10,7 @@ from manuskript import settings from manuskript.enums import Outline, World, Character, Plot from manuskript import functions as F from manuskript.models.outlineModel import outlineModel -from manuskript.ui.editors.MDFunctions import MDFormatSelection from manuskript.ui.highlighters import BasicHighlighter -# from manuskript.ui.highlighters import MMDHighlighter -from manuskript.ui.editors.textFormat import textFormat from manuskript.ui import style as S try: @@ -544,32 +541,6 @@ class textEditView(QTextEdit): QTextEdit.focusOutEvent(self, event) self.submit() - def focusInEvent(self, event): - """Finds textFormatter and attach them to that view.""" - QTextEdit.focusInEvent(self, event) - - p = self.parent() - while p.parent(): - p = p.parent() - - if self._index: - for tF in p.findChildren(textFormat, QRegExp(".*"), Qt.FindChildrenRecursively): - tF.updateFromIndex(self._index) - tF.setTextEdit(self) - - def applyFormat(self, _format): - - if self._textFormat == "md": - - if _format == "Bold": - MDFormatSelection(self, 0) - elif _format == "Italic": - MDFormatSelection(self, 1) - elif _format == "Code": - MDFormatSelection(self, 2) - elif _format == "Clear": - MDFormatSelection(self) - ############################################################################### # KEYBOARD SHORTCUTS ############################################################################### From 9baea11791527eab4a92e3fb79c29f6f4410083a Mon Sep 17 00:00:00 2001 From: Olivier Keshavjee Date: Mon, 27 Nov 2017 21:13:15 +0100 Subject: [PATCH 13/13] Fixes FullScreenEditor with MDEditView --- manuskript/ui/highlighters/basicHighlighter.py | 5 ++++- manuskript/ui/views/MDEditView.py | 4 ++++ manuskript/ui/views/textEditView.py | 6 +++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/manuskript/ui/highlighters/basicHighlighter.py b/manuskript/ui/highlighters/basicHighlighter.py index 18c1dfc0..960b7201 100644 --- a/manuskript/ui/highlighters/basicHighlighter.py +++ b/manuskript/ui/highlighters/basicHighlighter.py @@ -64,7 +64,10 @@ class BasicHighlighter(QSyntaxHighlighter): else: opt = self.editor._themeData self.defaultTextColor = QColor(opt["Text/Color"]) - self.backgroundColor = QColor(opt["Background/Color"]) + self.backgroundColor = F.mixColors( + QColor(opt["Foreground/Color"]), + QColor(opt["Background/Color"]), + int(opt["Foreground/Opacity"])/100.) self.markupColor = F.mixColors(self.defaultTextColor, self.backgroundColor, .3) diff --git a/manuskript/ui/views/MDEditView.py b/manuskript/ui/views/MDEditView.py index 2d31f716..4521906b 100644 --- a/manuskript/ui/views/MDEditView.py +++ b/manuskript/ui/views/MDEditView.py @@ -24,6 +24,10 @@ class MDEditView(textEditView): self._textFormat = "md" self._highlighterClass = MarkdownHighlighter + if index: + # We have to setup things anew, for the highlighter notably + self.setCurrentModelIndex(index) + # def focusInEvent(self, event): # """Finds textFormatter and attach them to that view.""" # textEditView.focusInEvent(self, event) diff --git a/manuskript/ui/views/textEditView.py b/manuskript/ui/views/textEditView.py index 6090f2cc..07fbeb9b 100644 --- a/manuskript/ui/views/textEditView.py +++ b/manuskript/ui/views/textEditView.py @@ -83,9 +83,9 @@ class textEditView(QTextEdit): else: self.spellcheck = False - # if self._highlighting and not self.highlighter: - # self.highlighter = self._highlighterClass(self) - # self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat) + if self._highlighting and not self.highlighter: + self.highlighter = self._highlighterClass(self) + self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat) def getDefaultLocale(self): default_locale = enchant.get_default_language()