Checkpoint: adding new markdown highlighter

This commit is contained in:
Olivier Keshavjee 2017-11-22 13:43:40 +01:00
parent 550f889251
commit 9be2edeee7
8 changed files with 1726 additions and 18 deletions

View file

@ -5,7 +5,7 @@ from PyQt5.QtWidgets import QPlainTextEdit, QGroupBox, qApp, QVBoxLayout, QCheck
from manuskript.exporter.manuskript.plainText import plainText
from manuskript.functions import mainWindow
from manuskript.ui.editors.MMDHighlighter import MMDHighlighter
from manuskript.ui.highlighters import MMDHighlighter
from manuskript.ui.exporters.manuskript.plainTextSettings import exporterSettings
@ -72,4 +72,4 @@ class markdownSettings(exporterSettings):
self.settings = exporterSettings.getSettings(self)
self.settings["Preview"]["MarkdownHighlighter"] = self.chkMarkdownHighlighter.isChecked()
return self.settings
return self.settings

View file

@ -5,10 +5,10 @@ import re
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QTextCharFormat, QFont, QTextCursor, QFontMetrics
from manuskript.ui.editors.basicHighlighter import basicHighlighter
from manuskript.ui.highlighters import BasicHighlighter
class MMDHighlighter(basicHighlighter):
class MMDHighlighter(BasicHighlighter):
MARKDOWN_REGEX = {
'Bold': '(\*\*)(.+?)(\*\*)',
@ -27,7 +27,7 @@ class MMDHighlighter(basicHighlighter):
}
def __init__(self, editor, style="Default"):
basicHighlighter.__init__(self, editor)
BasicHighlighter.__init__(self, editor)
self.editor = editor
@ -36,11 +36,11 @@ class MMDHighlighter(basicHighlighter):
self.rules[key] = re.compile(self.MARKDOWN_REGEX[key])
def highlightBlock(self, text):
basicHighlighter.highlightBlockBefore(self, text)
BasicHighlighter.highlightBlockBefore(self, text)
self.doHighlightBlock(text)
basicHighlighter.highlightBlockAfter(self, text)
BasicHighlighter.highlightBlockAfter(self, text)
def doHighlightBlock(self, text):
"""

View file

@ -0,0 +1,6 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
from manuskript.ui.highlighters.basicHighlighter import BasicHighlighter
from manuskript.ui.highlighters.MMDHighlighter import MMDHighlighter
from manuskript.ui.highlighters.markdownHighlighter import MarkdownHighlighter

View file

@ -4,12 +4,13 @@
import re
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter, QTextBlockFormat, QTextCharFormat
from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter
from PyQt5.QtGui import QTextBlockFormat, QTextCharFormat
import manuskript.models.references as Ref
class basicHighlighter(QSyntaxHighlighter):
class BasicHighlighter(QSyntaxHighlighter):
def __init__(self, editor):
QSyntaxHighlighter.__init__(self, editor.document())
@ -38,7 +39,7 @@ class basicHighlighter(QSyntaxHighlighter):
def highlightBlockBefore(self, text):
"""Highlighting to do before anything else.
When subclassing basicHighlighter, you must call highlightBlockBefore
When subclassing BasicHighlighter, you must call highlightBlockBefore
before you do any custom highlighting.
"""
@ -56,7 +57,7 @@ class basicHighlighter(QSyntaxHighlighter):
def highlightBlockAfter(self, text):
"""Highlighting to do after everything else.
When subclassing basicHighlighter, you must call highlightBlockAfter
When subclassing BasicHighlighter, you must call highlightBlockAfter
after your custom highlighting.
"""
@ -91,13 +92,16 @@ class basicHighlighter(QSyntaxHighlighter):
textedText = text + " "
# Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/
WORDS = '(?iu)([\w\']+)[^\'\w]' # (?iu) means case insensitive and unicode
WORDS = r'(?iu)([\w\']+)[^\'\w]'
# (?iu) means case insensitive and unicode
if hasattr(self.editor, "spellcheck") and self.editor.spellcheck:
for word_object in re.finditer(WORDS, textedText):
if self.editor._dict and not self.editor._dict.check(word_object.group(1)):
if (self.editor._dict
and not self.editor._dict.check(word_object.group(1))):
format = self.format(word_object.start(1))
format.setUnderlineColor(self._misspelledColor)
# SpellCheckUnderline fails with some fonts
format.setUnderlineStyle(QTextCharFormat.WaveUnderline)
self.setFormat(word_object.start(1),
word_object.end(1) - word_object.start(1), format)
word_object.end(1) - word_object.start(1),
format)

View file

@ -0,0 +1,94 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#==============================================================================
# MARKDOWN STATES
#==============================================================================
class MarkdownState:
MarkdownStateUnknown = -1
MarkdownStateParagraphBreak = 0
MarkdownStateListLineBreak = 1
MarkdownStateParagraph = 2
MarkdownStateAtxHeading1 = 3
MarkdownStateAtxHeading2 = 4
MarkdownStateAtxHeading3 = 5
MarkdownStateAtxHeading4 = 6
MarkdownStateAtxHeading5 = 7
MarkdownStateAtxHeading6 = 8
MarkdownStateBlockquote = 9
MarkdownStateCodeBlock = 10
MarkdownStateInGithubCodeFence = 11
MarkdownStateInPandocCodeFence = 12
MarkdownStateCodeFenceEnd = 13
MarkdownStateComment = 14
MarkdownStateHorizontalRule = 15
MarkdownStateNumberedList = 16
MarkdownStateBulletPointList = 17
MarkdownStateSetextHeading1Line1 = 18
MarkdownStateSetextHeading1Line2 = 19
MarkdownStateSetextHeading2Line1 = 20
MarkdownStateSetextHeading2Line2 = 21
MarkdownStatePipeTableHeader = 22
MarkdownStatePipeTableDivider = 23
MarkdownStatePipeTableRow = 24
#==============================================================================
# MARKDOWN TOKEN TYPE
#==============================================================================
class MarkdownTokenType:
TokenUnknown = -1
# Titles
TokenAtxHeading1 = 0
TokenAtxHeading2 = 1
TokenAtxHeading3 = 2
TokenAtxHeading4 = 3
TokenAtxHeading5 = 4
TokenAtxHeading6 = 5
TokenSetextHeading1Line1 = 6
TokenSetextHeading1Line2 = 7
TokenSetextHeading2Line1 = 8
TokenSetextHeading2Line2 = 9
TokenEmphasis = 10
TokenStrong = 11
TokenStrikethrough = 12
TokenVerbatim = 13
TokenHtmlTag = 14
TokenHtmlEntity = 15
TokenAutomaticLink = 16
TokenInlineLink = 17
TokenReferenceLink = 18
TokenReferenceDefinition = 19
TokenImage = 20
TokenHtmlComment = 21
TokenNumberedList = 22
TokenBulletPointList = 23
TokenHorizontalRule = 24
TokenLineBreak = 25
TokenBlockquote = 26
TokenCodeBlock = 27
TokenGithubCodeFence = 28
TokenPandocCodeFence = 29
TokenCodeFenceEnd = 30
TokenMention = 31
TokenTableHeader = 32
TokenTableDivider = 33
TokenTablePipe = 34
TokenSuperScript = 35
TokenSubScript = 36
TokenLast = 37
TITLES = [TokenAtxHeading1, TokenAtxHeading2, TokenAtxHeading3,
TokenAtxHeading4, TokenAtxHeading5, TokenAtxHeading6,
TokenSetextHeading1Line1, TokenSetextHeading1Line2,
TokenSetextHeading2Line1, TokenSetextHeading2Line2]
class BlockquoteStyle:
BlockquoteStylePlain = 0
BlockquoteStyleItalic = 1
BlockquoteStyleFancy = 2

View file

@ -0,0 +1,718 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
A QSyntaxHighlighter for markdown, using tokenizer. More accurate than simple
regexp, but not yet perfect.
"""
import re
from PyQt5.QtCore import Qt, pyqtSignal, qWarning, QRegExp
from PyQt5.QtGui import (QSyntaxHighlighter, QTextBlock, QColor, QFont,
QTextCharFormat, QBrush, QPalette)
from PyQt5.QtWidgets import qApp, QStyle
from manuskript.ui.highlighters.markdownTokenizer import MarkdownTokenizer
from manuskript.ui.highlighters.markdownEnums import MarkdownState as MS
from manuskript.ui.highlighters.markdownEnums import MarkdownTokenType as MTT
from manuskript.ui.highlighters.markdownEnums import BlockquoteStyle as BS
# Un longue ligne. Un longue ligne. Un longue ligne. Un longue ligne.asdasdasda
GW_FADE_ALPHA = 140
# Highlighter based on GhostWriter (http://wereturtle.github.io/ghostwriter/).
# GPLV3+.
#FIXME: Setext heading don't work anymore
class MarkdownHighlighter(QSyntaxHighlighter):
highlightBlockAtPosition = pyqtSignal(int)
headingFound = pyqtSignal(int, str, QTextBlock)
headingRemoved = pyqtSignal(int)
def __init__(self, editor):
QSyntaxHighlighter.__init__(self, editor.document())
#default values
self.editor = editor
self.tokenizer = MarkdownTokenizer()
self.spellCheckEnabled = False
#self.typingPaused = True
self.inBlockquote = False
self.defaultTextColor = QColor(Qt.black)
self.backgroundColor = QColor(Qt.white)
self.markupColor = QColor(Qt.black)
self.linkColor = QColor(Qt.blue)
self.spellingErrorColor = QColor(Qt.red)
self.blockquoteStyle = BS.BlockquoteStyleFancy
# Settings
self.useUndlerlineForEmphasis = False
self.highlightLineBreaks = True
self.highlightBlockAtPosition.connect(self.onHighlightBlockAtPosition,
Qt.QueuedConnection)
# font = QFont("Monospace", 12, QFont.Normal, False)
font = self.document().defaultFont()
font.setStyleStrategy(QFont.PreferAntialias)
self.defaultFormat = QTextCharFormat()
self.defaultFormat.setFont(font)
self.defaultFormat.setForeground(QBrush(self.defaultTextColor))
self.theme = self.defaultTheme()
self.setupHeadingFontSize(True)
self.highlightedWords = []
self.highlightedTags = []
self.searchExpression = ""
self.searchExpressionRegExp = False
self.searchExpressionCase = False
self.customRules = [
("(°).*?(°)", {"background": Qt.yellow,
"markupColor":Qt.lightGray}),
]
#f = self.document().defaultFont()
#f.setFamily("monospace")
#self.document().setDefaultFont(f)
def highlightBlock(self, text):
"""
Note: Never set the QTextBlockFormat for a QTextBlock from within
the highlighter. Depending on how the block format is modified,
a recursive call to the highlighter may be triggered, which will
cause the application to crash.
Likewise, don't try to set the QTextBlockFormat outside the highlighter
(i.e., from within the text editor). While the application will not
crash, the format change will be added to the undo stack. Attempting
to undo from that point on will cause the undo stack to be virtually
frozen, since undoing the format operation causes the text to be
considered changed, thus triggering the slot that changes the text
formatting to be triggered yet again.
"""
if self.currentBlock().blockNumber() == 0:
# This is the title
bf = QTextCharFormat()
bf.setFontPointSize(self.editor.font().pointSize() * 2)
bf.setFontWeight(QFont.Bold)
bf.setForeground(Qt.lightGray)
self.setFormat(0, len(text), bf)
return
lastState = self.currentBlockState()
self.setFormat(0, len(text), self.defaultFormat)
if self.tokenizer != None:
self.tokenizer.clear()
block = self.currentBlock()
nextState = MS.MarkdownStateUnknown
previousState = self.previousBlockState()
if block.next().isValid():
nextState = block.next().userState()
self.tokenizer.tokenize(text, lastState, previousState, nextState)
self.setCurrentBlockState(self.tokenizer.getState())
self.inBlockquote = self.tokenizer.getState() == MS.MarkdownStateBlockquote
# STATE FORMATTING
# FIXME: generic
if self.currentBlockState() in [
MS.MarkdownStatePipeTableHeader,
MS.MarkdownStatePipeTableDivider,
MS.MarkdownStatePipeTableRow]:
fmt = QTextCharFormat()
f = fmt.font()
f.setFamily("Monospace")
fmt.setFont(f)
self.setFormat(0, len(text), fmt)
# Monospace the blank chars
i = 0
while i <= len(text)-1 and text[i] in [" ", "\t"]:
fmt = self.format(i)
fmt.setFontFamily("Monospace")
self.setFormat(i, 1, fmt)
i += 1
#if self.currentBlockState() == MS.MarkdownStateBlockquote:
#fmt = QTextCharFormat(self.defaultFormat)
#fmt.setForeground(Qt.lightGray)
#self.setFormat(0, len(text), fmt)
tokens = self.tokenizer.getTokens()
for token in tokens:
if token.type == MTT.TokenUnknown:
qWarning("Highlighter found unknown token type in text block.")
continue
if token.type in [
MTT.TokenAtxHeading1,
MTT.TokenAtxHeading2,
MTT.TokenAtxHeading3,
MTT.TokenAtxHeading4,
MTT.TokenAtxHeading5,
MTT.TokenAtxHeading6,
MTT.TokenSetextHeading1Line1,
MTT.TokenSetextHeading2Line1,
]:
self.storeHeadingData(token, text)
self.applyFormattingForToken(token, text)
if self.tokenizer.backtrackRequested():
previous = self.currentBlock().previous()
self.highlightBlockAtPosition.emit(previous.position())
if self.spellCheckEnabled:
self.spellCheck(text)
# HASHTAGS AND HIGHLIGHTS
# Hashtags
s = 0
ht = QRegExp(r'([^#])(#[\w]+)')
while ht.indexIn(text, s) >= 0:
f = self.format(ht.pos()+1)
f.setForeground(QColor("#07c"))
f.setFontWeight(QFont.Bold)
self.setFormat(ht.pos()+1, ht.matchedLength()-1, f)
s = ht.pos() + 1
# Highlighted
for w in self.highlightedWords + self.highlightedTags:
pos = text.lower().find(w.lower())
while pos >= 0:
for i in range(pos, pos + len(w)):
f = self.format(i)
f.setBackground(QBrush(QColor("#fAf")))
self.setFormat(i, 1, f)
pos = text.lower().find(w.lower(), pos+1)
# Searched
#FIXME: consider searchExpressionRegExp
if self.searchExpression:
s = self.searchExpression
if not self.searchExpressionRegExp:
if self.searchExpressionCase:
pos = text.find(s)
else:
pos = text.lower().find(s.lower())
while pos >= 0:
for i in range(pos, pos + len(s)):
f = self.format(i)
f.setBackground(QBrush(QColor("#Aff")))
self.setFormat(i, 1, f)
pos = text.lower().find(s.lower(), pos+1)
else:
# Using QRegExp
rx = QRegExp(s)
if not self.searchExpressionCase:
rx.setCaseSensitivity(Qt.CaseInsensitive)
p = rx.indexIn(text)
while p != -1:
f = self.format(p)
f.setBackground(QBrush(QColor("#Aff")))
self.setFormat(p, rx.matchedLength(), f)
p = rx.indexIn(text, p + 1)
# Using python re
#try:
#for m in re.finditer(s, text):
#f = self.format(m.start())
#f.setBackground(QBrush(QColor("#0ff")))
#self.setFormat(m.start(), len(m.group()), f)
#except:
## Probably malformed regExp
#pass
# Custom rules
for rule, theme in self.customRules:
for m in re.finditer(rule, text):
if not m.groups(): # No groups, therefore no markup
f = self.format(m.start())
f, garbage = self.formatsFromTheme(theme, f)
self.setFormat(m.start(), len(m.group()), f)
else:
mf = self.format(m.start())
f = self.format(m.start() + len(m.group(1)))
f, mf = self.formatsFromTheme(theme, f, mf)
self.setFormat(m.start(1), len(m.group(1)), mf)
self.setFormat(m.start(2), len(m.group(2)), mf)
self.setFormat(m.start(1) + len(m.group(1)),
len(m.group())
- len(m.group(1))
- len(m.group(2)), f)
# If the block has transitioned from previously being a heading to now
# being a non-heading, signal that the position in the document no
# longer contains a heading.
if self.isHeadingBlockState(lastState) and \
not self.isHeadingBlockState(self.currentBlockState()):
self.headingRemoved.emit(self.currentBlock().position())
###########################################################################
# COLORS & FORMATTING
###########################################################################
def defaultTheme(self):
markup = qApp.palette().color(QPalette.Mid)
if markup == Qt.black:
markup = Qt.lightGray
dark = qApp.palette().color(QPalette.Dark)
if dark == Qt.black:
dark = QColor(Qt.gray)
darker = dark.darker(150)
# Text background
background = qApp.palette().color(QPalette.Base)
lightBackground = background.darker(130)
veryLightBackground = background.darker(105)
theme = {
"markup": markup}
#Exemple:
#"color": Qt.red,
#"deltaSize": 10,
#"background": Qt.yellow,
#"monospace": True,
#"bold": True,
#"italic": True,
#"underline": True,
#"overline": True,
#"strike": True,
#"formatMarkup": True,
#"markupBold": True,
#"markupColor": Qt.blue,
#"markupBackground": Qt.green,
#"markupMonospace": True,
#"super":True,
#"sub":True
for i in MTT.TITLES:
theme[i] = {
"formatMarkup":True,
"bold": True,
"monospace": True,
#"color": Qt.darkBlue if i % 2 == 1 else Qt.darkMagenta,
}
b = 100
d = 50
color = QColor(Qt.darkBlue)
theme[MTT.TokenAtxHeading1]["color"] = color
theme[MTT.TokenAtxHeading2]["color"] = color.lighter(b + d)
theme[MTT.TokenAtxHeading3]["color"] = color.lighter(b + 2*d)
theme[MTT.TokenAtxHeading4]["color"] = color.lighter(b + 3*d)
theme[MTT.TokenAtxHeading5]["color"] = color.lighter(b + 4*d)
theme[MTT.TokenAtxHeading6]["color"] = color.lighter(b + 5*d)
for i in [MTT.TokenSetextHeading1Line2, MTT.TokenSetextHeading2Line2]:
theme[i] = {
"color": markup,
"monospace":True}
# Beautifiers
theme[MTT.TokenEmphasis] = {
"italic":True,}
theme[MTT.TokenStrong] = {
"bold":True}
theme[MTT.TokenStrikethrough] = {
"strike":True}
theme[MTT.TokenVerbatim] = {
"monospace":True,
"background": veryLightBackground,
"formatMarkup": True,
"markupColor": markup}
theme[MTT.TokenSuperScript] = {
"super":True,
"formatMarkup":True}
theme[MTT.TokenSubScript] = {
"sub":True,
"formatMarkup":True}
theme[MTT.TokenHtmlTag] = {
"color":Qt.red}
theme[MTT.TokenHtmlEntity] = {
"color":Qt.red}
theme[MTT.TokenAutomaticLink] = {
"color": qApp.palette().color(QPalette.Link)}
theme[MTT.TokenInlineLink] = {
"color": qApp.palette().color(QPalette.Link)}
theme[MTT.TokenReferenceLink] = {
"color": qApp.palette().color(QPalette.Link)}
theme[MTT.TokenReferenceDefinition] = {
"color": qApp.palette().color(QPalette.Link)}
theme[MTT.TokenImage] = {
"color": Qt.green}
theme[MTT.TokenHtmlComment] = {
"color": dark}
theme[MTT.TokenNumberedList] = {
"markupColor": QColor(Qt.red).lighter(),
"markupBold": True,
"markupMonospace": True,}
theme[MTT.TokenBulletPointList] = {
"markupColor": QColor(Qt.red).lighter(),
"markupBold": True,
"markupMonospace": True,}
theme[MTT.TokenHorizontalRule] = {
"overline": True,
"underline": True,
"monospace": True,
"color": markup}
theme[MTT.TokenLineBreak] = {
"background": markup}
theme[MTT.TokenBlockquote] = {
"color": darker,
"markupColor": lightBackground,
"markupBackground": lightBackground}
theme[MTT.TokenCodeBlock] = {
"color": darker,
"markupBackground": veryLightBackground,
"monospace":True}
theme[MTT.TokenGithubCodeFence] = {
"color": markup}
theme[MTT.TokenPandocCodeFence] = {
"color": markup}
theme[MTT.TokenCodeFenceEnd] = {
"color": markup}
theme[MTT.TokenMention] = {} # FIXME
theme[MTT.TokenTableHeader] = {
"color": darker, "monospace":True}
theme[MTT.TokenTableDivider] = {
"color": markup, "monospace":True}
theme[MTT.TokenTablePipe] = {
"color": markup, "monospace":True}
return theme
def setColorScheme(self, defaultTextColor, backgroundColor, markupColor,
linkColor, spellingErrorColor):
self.defaultTextColor = defaultTextColor
self.backgroundColor = backgroundColor
self.markupColor = markupColor
self.linkColor = linkColor
self.spellingErrorColor = spellingErrorColor
self.defaultFormat.setForeground(QBrush(defaultTextColor))
# FIXME: generate a theme based on that
self.rehighlight()
###########################################################################
# ACTUAL FORMATTING
###########################################################################
def applyFormattingForToken(self, token, text):
if token.type != MTT.TokenUnknown:
format = self.format(token.position + token.openingMarkupLength)
markupFormat = self.format(token.position)
if self.theme.get("markup"):
markupFormat.setForeground(self.theme["markup"])
## Debug
def debug():
print("{}\n{}{}{}{} (state:{})".format(
text,
" "*token.position,
"^"*token.openingMarkupLength,
str(token.type).center(token.length
- token.openingMarkupLength
- token.closingMarkupLength, "-"),
"^" * token.closingMarkupLength,
self.currentBlockState(),)
)
#if token.type in range(6, 10):
#debug()
theme = self.theme.get(token.type)
if theme:
format, markupFormat = self.formatsFromTheme(theme,
format,
markupFormat)
# Format openning Markup
self.setFormat(token.position, token.openingMarkupLength,
markupFormat)
# Format Text
self.setFormat(
token.position + token.openingMarkupLength,
token.length - token.openingMarkupLength - token.closingMarkupLength,
format)
# Format closing Markup
if token.closingMarkupLength > 0:
self.setFormat(
token.position + token.length - token.closingMarkupLength,
token.closingMarkupLength,
markupFormat)
else:
qWarning("MarkdownHighlighter.applyFormattingForToken() was passed"
" in a token of unknown type.")
def formatsFromTheme(self, theme, format=QTextCharFormat(),
markupFormat=QTextCharFormat()):
# Token
if theme.get("color"):
format.setForeground(theme["color"])
if theme.get("deltaSize"):
format.setFontPointSize(format.fontPointSize() + theme["deltaSize"])
if theme.get("background"):
format.setBackground(theme["background"])
if theme.get("monospace"):
format.setFontFamily("Monospace")
if theme.get("bold"):
format.setFontWeight(QFont.Bold)
if theme.get("italic"):
format.setFontItalic(theme["italic"])
if theme.get("underline"):
format.setFontUnderline(theme["underline"])
if theme.get("overline"):
format.setFontOverline(theme["overline"])
if theme.get("strike"):
format.setFontStrikeOut(theme["strike"])
if theme.get("super"):
format.setVerticalAlignment(QTextCharFormat.AlignSuperScript)
if theme.get("sub"):
format.setVerticalAlignment(QTextCharFormat.AlignSubScript)
# Markup
if theme.get("formatMarkup"):
c = markupFormat.foreground()
markupFormat = QTextCharFormat(format)
markupFormat.setForeground(c)
if theme.get("markupBold"):
markupFormat.setFontWeight(QFont.Bold)
if theme.get("markupColor"):
markupFormat.setForeground(theme["markupColor"])
if theme.get("markupBackground"):
markupFormat.setBackground(theme["markupBackground"])
if theme.get("markupMonospace"):
markupFormat.setFontFamily("Monospace")
return format, markupFormat
###########################################################################
# SETTINGS
###########################################################################
def setHighlighted(self, words, tags):
rehighlight = (self.highlightedWords != words
or self.highlightedTags != tags)
self.highlightedWords = words
self.highlightedTags = tags
if rehighlight:
self.rehighlight()
def setSearched(self, expression, regExp=False, caseSensitivity=False):
"""
Define an expression currently searched, to be highlighted.
Can be regExp.
"""
rehighlight = self.searchExpression != expression or \
self.searchExpressionRegExp != regExp or \
self.searchExpressionCase != caseSensitivity
self.searchExpression = expression
self.searchExpressionRegExp = regExp
self.searchExpressionCase = caseSensitivity
if rehighlight:
self.rehighlight()
def setDictionary(self, dictionary):
self.dictionary = dictionary
if self.spellCheckEnabled:
self.rehighlight()
def increaseFontSize(self):
self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize()
+ 1.0)
self.rehighlight()
def decreaseFontSize(self):
self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize()
- 1.0)
self.rehighlight()
def setEnableLargeHeadingSizes(self, enable):
self.setupHeadingFontSize(enable)
self.rehighlight()
def setupHeadingFontSize(self, useLargeHeadings):
if useLargeHeadings:
self.theme[MTT.TokenSetextHeading1Line1]["deltaSize"] = 7
self.theme[MTT.TokenSetextHeading2Line1]["deltaSize"] = 5
self.theme[MTT.TokenSetextHeading1Line2]["deltaSize"] = 7
self.theme[MTT.TokenSetextHeading2Line2]["deltaSize"] = 5
self.theme[MTT.TokenAtxHeading1]["deltaSize"] = 7
self.theme[MTT.TokenAtxHeading2]["deltaSize"] = 5
self.theme[MTT.TokenAtxHeading3]["deltaSize"] = 3
self.theme[MTT.TokenAtxHeading4]["deltaSize"] = 2
self.theme[MTT.TokenAtxHeading5]["deltaSize"] = 1
self.theme[MTT.TokenAtxHeading6]["deltaSize"] = 0
else:
for i in MTT.TITLES:
self.theme[i]["deltaSize"] = 0
def setUseUnderlineForEmphasis(self, enable):
self.useUndlerlineForEmphasis = enable
self.rehighlight()
def setFont(self, fontFamily, fontSize):
font = QFont(family=fontFamily, pointSize=fontSize, weight=QFont.Normal, italic=False)
self.defaultFormat.setFont(font)
self.rehighlight()
def setSpellCheckEnabled(self, enabled):
self.spellCheckEnabled = enabled
self.rehighlight()
def setBlockquoteStyle(self, style):
self.blockquoteStyle = style
if style == BS.BlockquoteStyleItalic:
self.emphasizeToken[MTT.TokenBlockquote] = True
else:
self.emphasizeToken[MTT.TokenBlockquote] = False
self.rehighlight()
def setHighlightLineBreaks(self, enable):
self.highlightLineBreaks = enable
self.rehighlight()
###########################################################################
# GHOSTWRITER SPECIFIC?
###########################################################################
def onTypingResumed(self):
self.typingPaused = False
def onTypingPaused(self):
self.typingPaused = True
block = self.document().findBlock(self.editor.textCursor().position())
self.rehighlightBlock(block)
def onHighlightBlockAtPosition(self, position):
block = self.document().findBlock(position)
self.rehighlightBlock(block)
def onTextBlockRemoved(self, block):
if self.isHeadingBlockState(block.userState):
self.headingRemoved.emit(block.position())
###########################################################################
# SPELLCHECK
###########################################################################
def spellCheck(self, text):
cursorPosition = self.editor.textCursor().position()
cursorPosBlock = self.document().findBlock(cursorPosition)
cursorPosInBlock = -1
if self.currentBlock() == cursorPosBlock:
cursorPosInBlock = cursorPosition - cursorPosBlock.position()
misspelledWord = self.dictionary.check(text, 0)
while not misspelledWord.isNull():
startIndex = misspelledWord.position()
length = misspelledWord.length()
if self.typingPaused or cursorPosInBlock != startIndex + length:
spellingErrorFormat = self.format(startIndex)
spellingErrorFormat.setUnderlineColor(self.spellingErrorColor)
spellingErrorFormat.setUnderlineStyle(
qApp.stlye().styleHint(QStyle.SH_SpellCheckUnderlineStyle))
self.setFormat(startIndex, length, spellingErrorFormat)
startIndex += length
misspelledWord = self.dictionary.check(text, startIndex)
def storeHeadingData(self, token, text):
if token.type in [
MTT.TokenAtxHeading1,
MTT.TokenAtxHeading2,
MTT.TokenAtxHeading3,
MTT.TokenAtxHeading4,
MTT.TokenAtxHeading5,
MTT.TokenAtxHeading6]:
level = token.type - MTT.TokenAtxHeading1 + 1
s = token.position + token.openingMarkupLength
l = (token.length
- token.openingMarkupLength
- token.closingMarkupLength)
headingText = text[s:s+l].strip()
elif token.type == MTT.TokenSetextHeading1Line1:
level = 1
headingText = text
elif token.type == MTT.TokenSetextHeading2Line1:
level = 2
headingText = text
else:
qWarning("MarkdownHighlighter.storeHeadingData() encountered" +
" unexpected token: {}".format(token.getType()))
return
# FIXME: TypeError: could not convert 'TextBlockData' to 'QTextBlockUserData'
# blockData = self.currentBlockUserData()
# if blockData is None:
# blockData = TextBlockData(self.document(), self.currentBlock())
#
# self.setCurrentBlockUserData(blockData)
self.headingFound.emit(level, headingText, self.currentBlock())
def isHeadingBlockState(self, state):
return state in [
MS.MarkdownStateAtxHeading1,
MS.MarkdownStateAtxHeading2,
MS.MarkdownStateAtxHeading3,
MS.MarkdownStateAtxHeading4,
MS.MarkdownStateAtxHeading5,
MS.MarkdownStateAtxHeading6,
MS.MarkdownStateSetextHeading1Line1,
MS.MarkdownStateSetextHeading2Line1,]
def getLuminance(color):
return (0.30 * color.redF()) + \
(0.59 * color.greenF()) + \
(0.11 * color.blueF())
def applyAlphaToChannel(foreground, background, alpha):
return (foreground * alpha) + (background * (1.0 - alpha))
def applyAlpha(foreground, background, alpha):
blendedColor = QColor(0, 0, 0)
normalizedAlpha = alpha / 255.0
blendedColor.setRed(applyAlphaToChannel(
foreground.red(), background.red(), normalizedAlpha))
blendedColor.setGreen(applyAlphaToChannel(
foreground.green(), background.green(), normalizedAlpha))
blendedColor.setBlue(applyAlphaToChannel(
foreground.blue(), background.blue(), normalizedAlpha))
return blendedColor

View file

@ -0,0 +1,887 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWidgets import *
from noteflow.ui.views.markdownEnums import MarkdownState as MS
from noteflow.ui.views.markdownEnums import MarkdownTokenType as MTT
# This file is simply a python translation of GhostWriter's Tokenizer.
# http://wereturtle.github.io/ghostwriter/
# GPLV3+.
# ==============================================================================
# TOKEN
# ==============================================================================
class Token:
def __init__(self):
self.type = -1
self.position = 0
self.length = 0
self.openingMarkupLength = 0
self.closingMarkupLength = 0
# ==============================================================================
# HIGHLIGHT TOKENIZER
# ==============================================================================
class HighlightTokenizer:
def __init__(self):
self.tokens = []
def tokenize(text, currentState, previousState, nextState):
# Subclass me
return 0
def getTokens(self):
self.tokens = sorted(self.tokens, key=lambda t: t.position)
return self.tokens
def getState(self):
return self.state
def backtrackRequested(self):
return self.backtrack
def clear(self):
self.tokens = []
self.backtrack = False
self.state = -1
def addToken(self, token):
self.tokens.append(token)
if token.type == -1:
print("Error here", token.position, token.length)
def setState(self, state):
self.state = state
def requestBacktrack(self):
self.backtrack = True
def tokenLessThan(self, t1, t2):
return t1.getPosition() < t2.getPosition()
class MarkdownTokenizer(HighlightTokenizer):
DUMMY_CHAR = "$"
MAX_MARKDOWN_HEADING_LEVEL = 6
paragraphBreakRegex = QRegExp("^\\s*$")
heading1SetextRegex = QRegExp("^===+\\s*$")
heading2SetextRegex = QRegExp("^---+\\s*$")
blockquoteRegex = QRegExp("^ {0,3}>.*$")
githubCodeFenceStartRegex = QRegExp("^```+.*$")
githubCodeFenceEndRegex = QRegExp("^```+\\s*$")
pandocCodeFenceStartRegex = QRegExp("^~~~+.*$")
pandocCodeFenceEndRegex = QRegExp("^~~~+\\s*$")
numberedListRegex = QRegExp("^ {0,3}[0-9a-z]+[.)]\\s+.*$")
numberedNestedListRegex = QRegExp("^\\s*[0-9a-z]+[.)]\\s+.*$")
hruleRegex = QRegExp("\\s*(\\*\\s*){3,}|(\\s*(_\\s*){3,})|((\\s*(-\\s*){3,}))")
lineBreakRegex = QRegExp(".*\\s{2,}$")
emphasisRegex = QRegExp("(\\*(?![\\s*]).*[^\\s*]\\*)|_(?![\\s_]).*[^\\s_]_")
emphasisRegex.setMinimal(True)
strongRegex = QRegExp("\\*\\*(?=\\S).*\\S\\*\\*(?!\\*)|__(?=\\S).*\\S__(?!_)")
strongRegex.setMinimal(True)
strikethroughRegex = QRegExp("~~[^\\s]+.*[^\\s]+~~")
strikethroughRegex.setMinimal(True)
superScriptRegex = QRegExp("\^([^\\s]|(\\\\\\s))+\^") # Spaces must be escaped "\ "
superScriptRegex.setMinimal(True)
subScriptRegex = QRegExp("~([^\\s]|(\\\\\\s))+~") # Spaces must be escaped "\ "
subScriptRegex.setMinimal(True)
verbatimRegex = QRegExp("`+")
htmlTagRegex = QRegExp("<[^<>]+>")
htmlTagRegex.setMinimal(True)
htmlEntityRegex = QRegExp("&[a-zA-Z]+;|&#x?[0-9]+;")
automaticLinkRegex = QRegExp("(<[a-zA-Z]+\\:.+>)|(<.+@.+>)")
automaticLinkRegex.setMinimal(True)
inlineLinkRegex = QRegExp("\\[.+\\]\\(.+\\)")
inlineLinkRegex.setMinimal(True)
referenceLinkRegex = QRegExp("\\[(.+)\\]")
referenceLinkRegex.setMinimal(True)
referenceDefinitionRegex = QRegExp("^\\s*\\[.+\\]:")
imageRegex = QRegExp("!\\[.*\\]\\(.+\\)")
imageRegex.setMinimal(True)
htmlInlineCommentRegex = QRegExp("<!--.*-->")
htmlInlineCommentRegex.setMinimal(True)
mentionRegex = QRegExp("\\B@\\w+(\\-\\w+)*(/\\w+(\\-\\w+)*)?")
pipeTableDividerRegex = QRegExp("^ {0,3}(\\|[ :]?)?-{3,}([ :]?\\|[ :]?-{3,}([ :]?\\|)?)+\\s*$")
def __init__(self):
HighlightTokenizer.__init__(self)
def tokenize(self, text, currentState, previousState, nextState):
self.currentState = currentState
self.previousState = previousState
self.nextState = nextState
if (self.previousState == MS.MarkdownStateInGithubCodeFence or \
self.previousState == MS.MarkdownStateInPandocCodeFence) and \
self.tokenizeCodeBlock(text):
# No further tokenizing required
pass
elif self.previousState != MS.MarkdownStateComment \
and self.paragraphBreakRegex.exactMatch(text):
if previousState in [MS.MarkdownStateListLineBreak,
MS.MarkdownStateNumberedList,
MS.MarkdownStateBulletPointList]:
self.setState(MS.MarkdownStateListLineBreak)
elif previousState != MS.MarkdownStateCodeBlock or \
(text[:1] != "\t" and text[-4:] != " "):
self.setState(MS.MarkdownStateParagraphBreak)
elif self.tokenizeSetextHeadingLine2(text) or \
self.tokenizeCodeBlock(text) or \
self.tokenizeMultilineComment(text) or \
self.tokenizeHorizontalRule(text) or \
self.tokenizeTableDivider(text):
# No further tokenizing required
pass
elif self.tokenizeSetextHeadingLine1(text) or \
self.tokenizeAtxHeading(text) or \
self.tokenizeBlockquote(text) or \
self.tokenizeNumberedList(text) or \
self.tokenizeBulletPointList(text):
self.tokenizeLineBreak(text)
self.tokenizeInline(text)
else:
if previousState in [MS.MarkdownStateListLineBreak,
MS.MarkdownStateNumberedList,
MS.MarkdownStateNumberedList]:
if not self.tokenizeNumberedList(text) and \
not self.tokenizeBulletPointList(text) and \
(text[:1] == "\t" or text[:4] == " "):
self.setState(previousState)
else:
self.setState(MS.MarkdownStateParagraph)
else:
self.setState(MS.MarkdownStateParagraph)
self.tokenizeLineBreak(text)
self.tokenizeInline(text)
# Make sure that if the second line of a setext heading is removed the
# first line is reprocessed. Otherwise, it will still show up in the
# document as a heading.
if (previousState == MS.MarkdownStateSetextHeading1Line1 and \
self.getState() != MS.MarkdownStateSetextHeading1Line2) or \
(previousState == MS.MarkdownStateSetextHeading2Line1 and \
self.getState() != MS.MarkdownStateSetextHeading2Line2):
self.requestBacktrack()
def tokenizeSetextHeadingLine1(self, text):
#Check the next line's state to see if this is a setext-style heading.
level = 0
token = Token()
nextState = self.nextState
if MS.MarkdownStateSetextHeading1Line2 == nextState:
level = 1
self.setState(MS.MarkdownStateSetextHeading1Line1)
token.type = MTT.TokenSetextHeading1Line1
elif MS.MarkdownStateSetextHeading2Line2 == nextState:
level = 2
self.setState(MS.MarkdownStateSetextHeading2Line1)
token.type = MTT.TokenSetextHeading2Line1
if level > 0:
token.length = len(text)
token.position = 0
self.addToken(token)
return True
return False
def tokenizeSetextHeadingLine2(self, text):
level = 0
setextMatch = False
token = Token()
previousState = self.previousState
if previousState == MS.MarkdownStateSetextHeading1Line1:
level = 1
setextMatch = self.heading1SetextRegex.exactMatch(text)
self.setState(MS.MarkdownStateSetextHeading1Line2)
token.type = MTT.TokenSetextHeading1Line2
elif previousState == MS.MarkdownStateSetextHeading2Line1:
level = 2
setextMatch = self.heading2SetextRegex.exactMatch(text)
self.setState(MS.MarkdownStateSetextHeading2Line2)
token.type = MTT.TokenSetextHeading2Line2
elif previousState == MS.MarkdownStateParagraph:
h1Line2 = self.heading1SetextRegex.exactMatch(text)
h2Line2 = self.heading2SetextRegex.exactMatch(text)
if h1Line2 or h2Line2:
# Restart tokenizing on the previous line.
self.requestBacktrack()
token.length = len(text)
token.position = 0
if h1Line2:
self.setState(MS.MarkdownStateSetextHeading1Line2)
token.type = MTT.TokenSetextHeading1Line2
else:
self.setState(MS.MarkdownStateSetextHeading2Line2)
token.type = MTT.TokenSetextHeading2Line2
self.addToken(token)
return True
if level > 0:
if setextMatch:
token.length = len(text)
token.position = 0
self.addToken(token)
return True
else:
# Restart tokenizing on the previous line.
self.requestBacktrack()
False
return False
def tokenizeAtxHeading(self, text):
escapedText = self.dummyOutEscapeCharacters(text)
trailingPoundCount = 0
level = 0
#Count the number of pound signs at the front of the string,
#up to the maximum allowed, to determine the heading level.
while escapedText[level] == "#":
level += 1
if level >= len(escapedText) or level >= self.MAX_MARKDOWN_HEADING_LEVEL:
break
if level > 0 and level < len(text):
# Count how many pound signs are at the end of the text.
while escapedText[-trailingPoundCount -1] == "#":
trailingPoundCount += 1
token = Token()
token.position = 0
token.length = len(text)
token.type = MTT.TokenAtxHeading1 + level -1
token.openingMarkupLength = level
token.closingMarkupLength = trailingPoundCount
self.addToken(token)
self.setState(MS.MarkdownStateAtxHeading1 + level -1)
return True
return False
def tokenizeNumberedList(self, text):
previousState = self.previousState
if (previousState in [MS.MarkdownStateParagraphBreak,
MS.MarkdownStateUnknown,
MS.MarkdownStateCodeBlock,
MS.MarkdownStateCodeFenceEnd,] and \
self.numberedListRegex.exactMatch(text)) or \
(previousState in [MS.MarkdownStateListLineBreak,
MS.MarkdownStateNumberedList,
MS.MarkdownStateBulletPointList,] and \
self.numberedNestedListRegex.exactMatch(text)):
periodIndex = text.find(".")
parenthIndex = text.find(")")
if periodIndex < 0:
index = parenthIndex
elif parenthIndex < 0:
index = periodIndex
elif parenthIndex > periodIndex:
index = periodIndex
else:
index = parenthIndex
if index > 0:
token = Token()
token.type = MTT.TokenNumberedList
token.position = 0
token.length = len(text)
token.openingMarkupLength = index + 2
self.addToken(token)
self.setState(MS.MarkdownStateNumberedList)
return True
return False
return False
def tokenizeBulletPointList(self, text):
foundBulletChar = False
bulletCharIndex = -1
spaceCount = 0
whitespaceFoundAfterBulletChar = False
previousState = self.previousState
if previousState not in [MS.MarkdownStateUnknown,
MS.MarkdownStateParagraphBreak,
MS.MarkdownStateListLineBreak,
MS.MarkdownStateNumberedList,
MS.MarkdownStateBulletPointList,
MS.MarkdownStateCodeBlock,
MS.MarkdownStateCodeFenceEnd]:
return False
# Search for the bullet point character, which can
# be either a '+', '-', or '*'.
for i in range(len(text)):
if text[i] == " ":
if foundBulletChar:
# We've confirmed it's a bullet point by the whitespace that
# follows the bullet point character, and can now exit the
# loop.
whitespaceFoundAfterBulletChar = True
break
else:
spaceCount += 1
# If this list item is the first in the list, ensure the
# number of spaces preceeding the bullet point does not
# exceed three, as that would indicate a code block rather
# than a bullet point list.
if spaceCount > 3 and previousState not in [
MS.MarkdownStateNumberedList,
MS.MarkdownStateBulletPointList,
MS.MarkdownStateListLineBreak,] and \
previousState in [
MS.MarkdownStateParagraphBreak,
MS.MarkdownStateUnknown,
MS.MarkdownStateCodeBlock,
MS.MarkdownStateCodeFenceEnd,]:
return False
elif text[i] == "\t":
if foundBulletChar:
# We've confirmed it's a bullet point by the whitespace that
# follows the bullet point character, and can now exit the
# loop.
whitespaceFoundAfterBulletChar = True
break
elif previousState in [
MS.MarkdownStateParagraphBreak,
MS.MarkdownStateUnknown]:
# If this list item is the first in the list, ensure that
# no tab character preceedes the bullet point, as that would
# indicate a code block rather than a bullet point list.
return False
elif text[i] in ["+", "-", "*"]:
foundBulletChar = True
bulletCharIndex = i
else:
return False
if bulletCharIndex >= 0 and whitespaceFoundAfterBulletChar:
token = Token()
token.type = MTT.TokenBulletPointList
token.position = 0
token.length = len(text)
token.openingMarkupLength = bulletCharIndex + 2
self.addToken(token)
self.setState(MS.MarkdownStateBulletPointList)
return True
return False
def tokenizeHorizontalRule (self, text):
if self.hruleRegex.exactMatch(text):
token = Token()
token.type = MTT.TokenHorizontalRule
token.position = 0
token.length = len(text)
self.addToken(token)
self.setState(MS.MarkdownStateHorizontalRule)
return True
return False
def tokenizeLineBreak(self, text):
currentState = self.currentState
previousState = self.previousState
nextState = self.nextState
if currentState in [
MS.MarkdownStateParagraph,
MS.MarkdownStateBlockquote,
MS.MarkdownStateNumberedList,
MS.MarkdownStateBulletPointList,]:
if previousState in [
MS.MarkdownStateParagraph,
MS.MarkdownStateBlockquote,
MS.MarkdownStateNumberedList,
MS.MarkdownStateBulletPointList,]:
self.requestBacktrack()
if nextState in [
MS.MarkdownStateParagraph,
MS.MarkdownStateBlockquote,
MS.MarkdownStateNumberedList,
MS.MarkdownStateBulletPointList,]:
self.requestBacktrack()
if self.lineBreakRegex.exactMatch(text):
token = Token()
token.type = MTT.TokenLineBreak
token.position = len(text) - 1
token.length = 1
self.addToken(token)
return True
return False
def tokenizeBlockquote(self, text):
previousState = self.previousState
if previousState == MS.MarkdownStateBlockquote or \
self.blockquoteRegex.exactMatch(text):
# Find any '>' characters at the front of the line.
markupLength = 0
for i in range(len(text)):
if text[i] == ">":
markupLength = i + 1
elif text[i] != " ":
# There are no more '>' characters at the front of the line,
# so stop processing.
break
token = Token()
token.type = MTT.TokenBlockquote
token.position = 0
token.length = len(text)
if markupLength > 0:
token.openingMarkupLength = markupLength
self.addToken(token)
self.setState(MS.MarkdownStateBlockquote)
return True
return False
def tokenizeCodeBlock(self, text):
previousState = self.previousState
if previousState in [
MS.MarkdownStateInGithubCodeFence,
MS.MarkdownStateInPandocCodeFence]:
self.setState(previousState)
if (previousState == MS.MarkdownStateInGithubCodeFence and \
self.githubCodeFenceEndRegex.exactMatch(text)) or \
(previousState == MS.MarkdownStateInPandocCodeFence and \
self.pandocCodeFenceEndRegex.exactMatch(text)):
token = Token()
token.type = MTT.TokenCodeFenceEnd
token.position = 0
token.length = len(text)
self.addToken(token)
self.setState(MS.MarkdownStateCodeFenceEnd)
else:
token = Token()
token.type = MTT.TokenCodeBlock
token.position = 0
token.length = len(text)
self.addToken(token)
return True
elif previousState in [
MS.MarkdownStateCodeBlock,
MS.MarkdownStateParagraphBreak,
MS.MarkdownStateUnknown,] and \
(text[:1] == "\t" or text[:4] == " "):
token = Token()
token.type = MTT.TokenCodeBlock
token.position = 0
token.length = len(text)
token.openingMarkupLength = len(text) - len(text.lstrip())
self.addToken(token)
self.setState(MS.MarkdownStateCodeBlock)
return True
elif previousState in [
MS.MarkdownStateParagraphBreak,
MS.MarkdownStateParagraph,
MS.MarkdownStateUnknown,
MS.MarkdownStateListLineBreak,]:
foundCodeFenceStart = False
token = Token()
if self.githubCodeFenceStartRegex.exactMatch(text):
foundCodeFenceStart = True
token.type = MTT.TokenGithubCodeFence
self.setState(MS.MarkdownStateInGithubCodeFence)
elif self.pandocCodeFenceStartRegex.exactMatch(text):
foundCodeFenceStart = True
token.type = MTT.TokenPandocCodeFence
self.setState(MS.MarkdownStateInPandocCodeFence)
if foundCodeFenceStart:
token.position = 0
token.length = len(text)
self.addToken(token)
return True
return False
def tokenizeMultilineComment(self, text):
previousState = self.previousState
if previousState == MS.MarkdownStateComment:
# Find the end of the comment, if any.
index = text.find("-->")
token = Token()
token.type = MTT.TokenHtmlComment
token.position = 0
if index >= 0:
token.length = index + 3
self.addToken(token)
# Return false so that the rest of the line that isn't within
# the commented segment can be highlighted as normal paragraph
# text.
else:
token.length = len(text)
self.addToken(token)
self.setState(MS.MarkdownStateComment)
return True
return False
def tokenizeInline(self, text):
escapedText = self.dummyOutEscapeCharacters(text)
# Check if the line is a reference definition.
if self.referenceDefinitionRegex.exactMatch(text):
colonIndex = escapedText.find(":")
token = Token()
token.type = MTT.TokenReferenceDefinition
token.position = 0
token.length = colonIndex + 1
self.addToken(token)
# Replace the first bracket so that the '[...]:' reference definition
# start doesn't get highlighted as a reference link.
firstBracketIndex = escapedText.find("[")
if firstBracketIndex >= 0:
i = firstBracketIndex
escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:]
escapedText = self.tokenizeVerbatim(escapedText)
escapedText = self.tokenizeHtmlComments(escapedText)
escapedText = self.tokenizeTableHeaderRow(escapedText)
escapedText = self.tokenizeTableRow(escapedText)
escapedText = self.tokenizeMatches(MTT.TokenImage, escapedText, self.imageRegex, 0, 0, False, True)
escapedText = self.tokenizeMatches(MTT.TokenInlineLink, escapedText, self.inlineLinkRegex, 0, 0, False, True)
escapedText = self.tokenizeMatches(MTT.TokenReferenceLink, escapedText, self.referenceLinkRegex, 0, 0, False, True)
escapedText = self.tokenizeMatches(MTT.TokenHtmlEntity, escapedText, self.htmlEntityRegex)
escapedText = self.tokenizeMatches(MTT.TokenAutomaticLink, escapedText, self.automaticLinkRegex, 0, 0, False, True)
escapedText = self.tokenizeMatches(MTT.TokenStrikethrough, escapedText, self.strikethroughRegex, 2, 2, True)
escapedText = self.tokenizeMatches(MTT.TokenStrong, escapedText, self.strongRegex, 2, 2, True)
escapedText = self.tokenizeMatches(MTT.TokenEmphasis, escapedText, self.emphasisRegex, 1, 1, True)
escapedText = self.tokenizeMatches(MTT.TokenSuperScript, escapedText, self.superScriptRegex, 1, 1, True)
escapedText = self.tokenizeMatches(MTT.TokenSubScript, escapedText, self.subScriptRegex, 1, 1, True)
escapedText = self.tokenizeMatches(MTT.TokenHtmlTag, escapedText, self.htmlTagRegex)
escapedText = self.tokenizeMatches(MTT.TokenMention, escapedText, self.mentionRegex, 0, 0, False, True)
return True
def tokenizeVerbatim(self, text):
index = self.verbatimRegex.indexIn(text)
while index >= 0:
end = ""
count = self.verbatimRegex.matchedLength()
# Search for the matching end, which should have the same number
# of back ticks as the start.
for i in range(count):
end += '`'
endIndex = text.find(end, index + count)
# If the end was found, add the verbatim token.
if endIndex >= 0:
token = Token()
token.type = MTT.TokenVerbatim
token.position = index
token.length = endIndex + count - index
token.openingMarkupLength = count
token.closingMarkupLength = count
self.addToken(token)
# Fill out the token match in the string with the dummy
# character so that searches for other Markdown elements
# don't find anything within this token's range in the string.
for i in range(index, index + token.length):
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
index += token.length
# Else start searching again at the very next character.
else:
index += 1
index = self.verbatimRegex.indexIn(text, index)
return text
def tokenizeHtmlComments(self, text):
previousState = self.previousState
# Check for the end of a multiline comment so that it doesn't get further
# tokenized. Don't bother formatting the comment itself, however, because
# it should have already been tokenized in tokenizeMultilineComment().
if previousState == MS.MarkdownStateComment:
commentEnd = text.find("-->")
for i in range(commentEnd + 3):
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
# Now check for inline comments (non-multiline).
commentStart = self.htmlInlineCommentRegex.indexIn(text)
while commentStart >= 0:
commentLength = self.htmlInlineCommentRegex.matchedLength()
token = Token()
token.type = MTT.TokenHtmlComment
token.position = commentStart
token.length = commentLength
self.addToken(token)
# Replace comment segment with dummy characters so that it doesn't
# get tokenized again.
for i in range(commentStart, commentStart + commentLength):
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
commentStart = self.htmlInlineCommentRegex.indexIn(text, commentStart + commentLength)
# Find multiline comment start, if any.
commentStart = text.find("<!--")
if commentStart >= 0:
token = Token()
token.type = MTT.TokenHtmlComment
token.position = commentStart
token.length = len(text) - commentStart
self.addToken(token)
self.setState(MS.MarkdownStateComment)
# Replace comment segment with dummy characters so that it doesn't
# get tokenized again.
for i in range(commentStart, len(text)):
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
return text
def tokenizeTableHeaderRow(self, text):
previousState = self.previousState
nextState = self.nextState
if previousState in [
MS.MarkdownStateParagraphBreak,
MS.MarkdownStateListLineBreak,
MS.MarkdownStateSetextHeading1Line2,
MS.MarkdownStateSetextHeading2Line2,
MS.MarkdownStateAtxHeading1,
MS.MarkdownStateAtxHeading2,
MS.MarkdownStateAtxHeading3,
MS.MarkdownStateAtxHeading4,
MS.MarkdownStateAtxHeading5,
MS.MarkdownStateAtxHeading6,
MS.MarkdownStateHorizontalRule,
MS.MarkdownStateCodeFenceEnd,
MS.MarkdownStateUnknown,] and \
self.getState() in [
MS.MarkdownStateParagraph,
MS.MarkdownStateUnknown] and \
nextState == MS.MarkdownStatePipeTableDivider:
self.setState(MS.MarkdownStatePipeTableHeader)
headerStart = 0
for i in range(len(text)):
if text[i] == "|":
# Replace pipe with space so that it doesn't get formatted
# again with, for example, strong or emphasis formatting.
# Note that we use a space rather than DUMMY_CHAR for this,
# to prevent formatting such as strong and emphasis from
# picking it up.
text = text[:i] + " " + text[i+1:]
token = Token()
if i > 0:
token.type = MTT.TokenTableHeader
token.position = headerStart
token.length = i - headerStart
self.addToken(token)
token.type = MTT.TokenTablePipe
token.position = i
token.length = 1
self.addToken(token)
headerStart = i + 1
if headerStart < len(text):
token = Token()
token.type = MTT.TokenTableHeader
token.position = headerStart
token.length = len(text) - headerStart
self.addToken(token)
return text
def tokenizeTableDivider(self, text):
previousState = self.previousState
if previousState == MS.MarkdownStatePipeTableHeader:
if self.pipeTableDividerRegex.exactMatch(text):
self.setState(MS.MarkdownStatePipeTableDivider)
token = Token()
token.type = MTT.TokenTableDivider
token.length = len(text)
token.position = 0
self.addToken(token)
return True
else:
# Restart tokenizing on the previous line.
self.requestBacktrack()
elif previousState == MS.MarkdownStateParagraph:
if self.pipeTableDividerRegex.exactMatch(text):
# Restart tokenizing on the previous line.
self.requestBacktrack()
self.setState(MS.MarkdownStatePipeTableDivider)
token = Token()
token.length = len(text)
token.position = 0
token.type = MTT.TokenTableDivider
self.addToken(token)
return True
return False
def tokenizeTableRow(self, text):
previousState = self.previousState
if previousState in [
MS.MarkdownStatePipeTableDivider,
MS.MarkdownStatePipeTableRow]:
self.setState(MS.MarkdownStatePipeTableRow)
for i in range(len(text)):
if text[i] == "|":
# Replace pipe with space so that it doesn't get formatted
# again with, for example, strong or emphasis formatting.
# Note that we use a space rather than DUMMY_CHAR for this,
# to prevent formatting such as strong and emphasis from
# picking it up.
text = text[:i] + " " + text[i+1:]
token = Token()
token.type = MTT.TokenTablePipe
token.position = i
token.length = 1
self.addToken(token)
return text
def tokenizeMatches(self, tokenType, text, regex,
markupStartCount=0, markupEndCount=0,
replaceMarkupChars=False, replaceAllChars=False):
"""
Tokenizes a block of text, searching for all occurrances of regex.
Occurrances are set to the given token type and added to the list of
tokens. The markupStartCount and markupEndCount values are used to
indicate how many markup special characters preceed and follow the
main text, respectively.
For example, if the matched string is "**bold**", and
markupStartCount = 2 and markupEndCount = 2, then the asterisks
preceeding and following the word "bold" will be set as opening and
closing markup in the token.
If replaceMarkupChars is true, then the markupStartCount and
markupEndCount characters will be replaced with a dummy character in
the text QString so that subsequent parsings of the same line do not
pick up the original characters.
If replaceAllChars is true instead, then the entire matched text will
be replaced with dummy characters--again, for ease in parsing the
same line for other regular expression matches.
"""
index = regex.indexIn(text)
while index >= 0:
length = regex.matchedLength()
token = Token()
token.type = tokenType
token.position = index
token.length = length
if markupStartCount > 0:
token.openingMarkupLength = markupStartCount
if markupEndCount > 0:
token.closingMarkupLength = markupEndCount
if replaceAllChars:
for i in range(index, index + length):
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
elif replaceMarkupChars:
for i in range(index, index + markupStartCount):
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
for i in range(index + length - markupEndCount, index + length):
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
self.addToken(token)
index = regex.indexIn(text, index + length)
return text
def dummyOutEscapeCharacters(self, text):
"""
Replaces escaped characters in text so they aren't picked up
during parsing. Returns a copy of the input text string
with the escaped characters replaced with a dummy character.
"""
return re.sub("\\\\.", "\$", text)
#escape = False
#escapedText = text
#for i in range(len(text)):
#if escape:
#escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:]
#escape = False
#elif text[i] == "\\":
#escape = True
#return escapedText

View file

@ -11,8 +11,7 @@ from manuskript.enums import Outline
from manuskript import functions as F
from manuskript.models.outlineModel import outlineModel
from manuskript.ui.editors.MDFunctions import MDFormatSelection
from manuskript.ui.editors.MMDHighlighter import MMDHighlighter
from manuskript.ui.editors.basicHighlighter import basicHighlighter
from manuskript.ui.highlighters import MMDHighlighter, BasicHighlighter
from manuskript.ui.editors.textFormat import textFormat
from manuskript.ui import style as S
@ -84,7 +83,7 @@ class textEditView(QTextEdit):
self.spellcheck = False
if self._highlighting and not self.highlighter:
self.highlighter = basicHighlighter(self)
self.highlighter = BasicHighlighter(self)
self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat)
def getDefaultLocale(self):
@ -191,7 +190,7 @@ class textEditView(QTextEdit):
if self._column in [Outline.text.value, Outline.notes.value]:
self.highlighter = MMDHighlighter(self)
else:
self.highlighter = basicHighlighter(self)
self.highlighter = BasicHighlighter(self)
self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat)