mirror of
https://github.com/olivierkes/manuskript.git
synced 2024-05-17 11:22:28 +12:00
Checkpoint: adding new markdown highlighter
This commit is contained in:
parent
550f889251
commit
9be2edeee7
|
@ -5,7 +5,7 @@ from PyQt5.QtWidgets import QPlainTextEdit, QGroupBox, qApp, QVBoxLayout, QCheck
|
|||
|
||||
from manuskript.exporter.manuskript.plainText import plainText
|
||||
from manuskript.functions import mainWindow
|
||||
from manuskript.ui.editors.MMDHighlighter import MMDHighlighter
|
||||
from manuskript.ui.highlighters import MMDHighlighter
|
||||
from manuskript.ui.exporters.manuskript.plainTextSettings import exporterSettings
|
||||
|
||||
|
||||
|
@ -72,4 +72,4 @@ class markdownSettings(exporterSettings):
|
|||
self.settings = exporterSettings.getSettings(self)
|
||||
self.settings["Preview"]["MarkdownHighlighter"] = self.chkMarkdownHighlighter.isChecked()
|
||||
|
||||
return self.settings
|
||||
return self.settings
|
||||
|
|
|
@ -5,10 +5,10 @@ import re
|
|||
from PyQt5.QtCore import Qt
|
||||
from PyQt5.QtGui import QTextCharFormat, QFont, QTextCursor, QFontMetrics
|
||||
|
||||
from manuskript.ui.editors.basicHighlighter import basicHighlighter
|
||||
from manuskript.ui.highlighters import BasicHighlighter
|
||||
|
||||
|
||||
class MMDHighlighter(basicHighlighter):
|
||||
class MMDHighlighter(BasicHighlighter):
|
||||
|
||||
MARKDOWN_REGEX = {
|
||||
'Bold': '(\*\*)(.+?)(\*\*)',
|
||||
|
@ -27,7 +27,7 @@ class MMDHighlighter(basicHighlighter):
|
|||
}
|
||||
|
||||
def __init__(self, editor, style="Default"):
|
||||
basicHighlighter.__init__(self, editor)
|
||||
BasicHighlighter.__init__(self, editor)
|
||||
|
||||
self.editor = editor
|
||||
|
||||
|
@ -36,11 +36,11 @@ class MMDHighlighter(basicHighlighter):
|
|||
self.rules[key] = re.compile(self.MARKDOWN_REGEX[key])
|
||||
|
||||
def highlightBlock(self, text):
|
||||
basicHighlighter.highlightBlockBefore(self, text)
|
||||
BasicHighlighter.highlightBlockBefore(self, text)
|
||||
|
||||
self.doHighlightBlock(text)
|
||||
|
||||
basicHighlighter.highlightBlockAfter(self, text)
|
||||
BasicHighlighter.highlightBlockAfter(self, text)
|
||||
|
||||
def doHighlightBlock(self, text):
|
||||
"""
|
6
manuskript/ui/highlighters/__init__.py
Normal file
6
manuskript/ui/highlighters/__init__.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
from manuskript.ui.highlighters.basicHighlighter import BasicHighlighter
|
||||
from manuskript.ui.highlighters.MMDHighlighter import MMDHighlighter
|
||||
from manuskript.ui.highlighters.markdownHighlighter import MarkdownHighlighter
|
|
@ -4,12 +4,13 @@
|
|||
import re
|
||||
|
||||
from PyQt5.QtCore import Qt
|
||||
from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter, QTextBlockFormat, QTextCharFormat
|
||||
from PyQt5.QtGui import QBrush, QTextCursor, QColor, QFont, QSyntaxHighlighter
|
||||
from PyQt5.QtGui import QTextBlockFormat, QTextCharFormat
|
||||
|
||||
import manuskript.models.references as Ref
|
||||
|
||||
|
||||
class basicHighlighter(QSyntaxHighlighter):
|
||||
class BasicHighlighter(QSyntaxHighlighter):
|
||||
def __init__(self, editor):
|
||||
QSyntaxHighlighter.__init__(self, editor.document())
|
||||
|
||||
|
@ -38,7 +39,7 @@ class basicHighlighter(QSyntaxHighlighter):
|
|||
def highlightBlockBefore(self, text):
|
||||
"""Highlighting to do before anything else.
|
||||
|
||||
When subclassing basicHighlighter, you must call highlightBlockBefore
|
||||
When subclassing BasicHighlighter, you must call highlightBlockBefore
|
||||
before you do any custom highlighting.
|
||||
"""
|
||||
|
||||
|
@ -56,7 +57,7 @@ class basicHighlighter(QSyntaxHighlighter):
|
|||
def highlightBlockAfter(self, text):
|
||||
"""Highlighting to do after everything else.
|
||||
|
||||
When subclassing basicHighlighter, you must call highlightBlockAfter
|
||||
When subclassing BasicHighlighter, you must call highlightBlockAfter
|
||||
after your custom highlighting.
|
||||
"""
|
||||
|
||||
|
@ -91,13 +92,16 @@ class basicHighlighter(QSyntaxHighlighter):
|
|||
textedText = text + " "
|
||||
|
||||
# Based on http://john.nachtimwald.com/2009/08/22/qplaintextedit-with-in-line-spell-check/
|
||||
WORDS = '(?iu)([\w\']+)[^\'\w]' # (?iu) means case insensitive and unicode
|
||||
WORDS = r'(?iu)([\w\']+)[^\'\w]'
|
||||
# (?iu) means case insensitive and unicode
|
||||
if hasattr(self.editor, "spellcheck") and self.editor.spellcheck:
|
||||
for word_object in re.finditer(WORDS, textedText):
|
||||
if self.editor._dict and not self.editor._dict.check(word_object.group(1)):
|
||||
if (self.editor._dict
|
||||
and not self.editor._dict.check(word_object.group(1))):
|
||||
format = self.format(word_object.start(1))
|
||||
format.setUnderlineColor(self._misspelledColor)
|
||||
# SpellCheckUnderline fails with some fonts
|
||||
format.setUnderlineStyle(QTextCharFormat.WaveUnderline)
|
||||
self.setFormat(word_object.start(1),
|
||||
word_object.end(1) - word_object.start(1), format)
|
||||
word_object.end(1) - word_object.start(1),
|
||||
format)
|
94
manuskript/ui/highlighters/markdownEnums.py
Normal file
94
manuskript/ui/highlighters/markdownEnums.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
#==============================================================================
|
||||
# MARKDOWN STATES
|
||||
#==============================================================================
|
||||
|
||||
class MarkdownState:
|
||||
MarkdownStateUnknown = -1
|
||||
MarkdownStateParagraphBreak = 0
|
||||
MarkdownStateListLineBreak = 1
|
||||
MarkdownStateParagraph = 2
|
||||
MarkdownStateAtxHeading1 = 3
|
||||
MarkdownStateAtxHeading2 = 4
|
||||
MarkdownStateAtxHeading3 = 5
|
||||
MarkdownStateAtxHeading4 = 6
|
||||
MarkdownStateAtxHeading5 = 7
|
||||
MarkdownStateAtxHeading6 = 8
|
||||
MarkdownStateBlockquote = 9
|
||||
MarkdownStateCodeBlock = 10
|
||||
MarkdownStateInGithubCodeFence = 11
|
||||
MarkdownStateInPandocCodeFence = 12
|
||||
MarkdownStateCodeFenceEnd = 13
|
||||
MarkdownStateComment = 14
|
||||
MarkdownStateHorizontalRule = 15
|
||||
MarkdownStateNumberedList = 16
|
||||
MarkdownStateBulletPointList = 17
|
||||
MarkdownStateSetextHeading1Line1 = 18
|
||||
MarkdownStateSetextHeading1Line2 = 19
|
||||
MarkdownStateSetextHeading2Line1 = 20
|
||||
MarkdownStateSetextHeading2Line2 = 21
|
||||
MarkdownStatePipeTableHeader = 22
|
||||
MarkdownStatePipeTableDivider = 23
|
||||
MarkdownStatePipeTableRow = 24
|
||||
|
||||
#==============================================================================
|
||||
# MARKDOWN TOKEN TYPE
|
||||
#==============================================================================
|
||||
|
||||
class MarkdownTokenType:
|
||||
TokenUnknown = -1
|
||||
|
||||
# Titles
|
||||
TokenAtxHeading1 = 0
|
||||
TokenAtxHeading2 = 1
|
||||
TokenAtxHeading3 = 2
|
||||
TokenAtxHeading4 = 3
|
||||
TokenAtxHeading5 = 4
|
||||
TokenAtxHeading6 = 5
|
||||
TokenSetextHeading1Line1 = 6
|
||||
TokenSetextHeading1Line2 = 7
|
||||
TokenSetextHeading2Line1 = 8
|
||||
TokenSetextHeading2Line2 = 9
|
||||
|
||||
TokenEmphasis = 10
|
||||
TokenStrong = 11
|
||||
TokenStrikethrough = 12
|
||||
TokenVerbatim = 13
|
||||
TokenHtmlTag = 14
|
||||
TokenHtmlEntity = 15
|
||||
TokenAutomaticLink = 16
|
||||
TokenInlineLink = 17
|
||||
TokenReferenceLink = 18
|
||||
TokenReferenceDefinition = 19
|
||||
TokenImage = 20
|
||||
TokenHtmlComment = 21
|
||||
TokenNumberedList = 22
|
||||
TokenBulletPointList = 23
|
||||
TokenHorizontalRule = 24
|
||||
TokenLineBreak = 25
|
||||
TokenBlockquote = 26
|
||||
TokenCodeBlock = 27
|
||||
TokenGithubCodeFence = 28
|
||||
TokenPandocCodeFence = 29
|
||||
TokenCodeFenceEnd = 30
|
||||
TokenMention = 31
|
||||
TokenTableHeader = 32
|
||||
TokenTableDivider = 33
|
||||
TokenTablePipe = 34
|
||||
TokenSuperScript = 35
|
||||
TokenSubScript = 36
|
||||
TokenLast = 37
|
||||
|
||||
TITLES = [TokenAtxHeading1, TokenAtxHeading2, TokenAtxHeading3,
|
||||
TokenAtxHeading4, TokenAtxHeading5, TokenAtxHeading6,
|
||||
TokenSetextHeading1Line1, TokenSetextHeading1Line2,
|
||||
TokenSetextHeading2Line1, TokenSetextHeading2Line2]
|
||||
|
||||
|
||||
|
||||
class BlockquoteStyle:
|
||||
BlockquoteStylePlain = 0
|
||||
BlockquoteStyleItalic = 1
|
||||
BlockquoteStyleFancy = 2
|
718
manuskript/ui/highlighters/markdownHighlighter.py
Normal file
718
manuskript/ui/highlighters/markdownHighlighter.py
Normal file
|
@ -0,0 +1,718 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
A QSyntaxHighlighter for markdown, using tokenizer. More accurate than simple
|
||||
regexp, but not yet perfect.
|
||||
"""
|
||||
|
||||
import re
|
||||
from PyQt5.QtCore import Qt, pyqtSignal, qWarning, QRegExp
|
||||
from PyQt5.QtGui import (QSyntaxHighlighter, QTextBlock, QColor, QFont,
|
||||
QTextCharFormat, QBrush, QPalette)
|
||||
from PyQt5.QtWidgets import qApp, QStyle
|
||||
|
||||
from manuskript.ui.highlighters.markdownTokenizer import MarkdownTokenizer
|
||||
from manuskript.ui.highlighters.markdownEnums import MarkdownState as MS
|
||||
from manuskript.ui.highlighters.markdownEnums import MarkdownTokenType as MTT
|
||||
from manuskript.ui.highlighters.markdownEnums import BlockquoteStyle as BS
|
||||
|
||||
# Un longue ligne. Un longue ligne. Un longue ligne. Un longue ligne.asdasdasda
|
||||
|
||||
GW_FADE_ALPHA = 140
|
||||
|
||||
# Highlighter based on GhostWriter (http://wereturtle.github.io/ghostwriter/).
|
||||
# GPLV3+.
|
||||
|
||||
#FIXME: Setext heading don't work anymore
|
||||
|
||||
class MarkdownHighlighter(QSyntaxHighlighter):
|
||||
|
||||
highlightBlockAtPosition = pyqtSignal(int)
|
||||
headingFound = pyqtSignal(int, str, QTextBlock)
|
||||
headingRemoved = pyqtSignal(int)
|
||||
|
||||
def __init__(self, editor):
|
||||
QSyntaxHighlighter.__init__(self, editor.document())
|
||||
|
||||
#default values
|
||||
self.editor = editor
|
||||
self.tokenizer = MarkdownTokenizer()
|
||||
|
||||
self.spellCheckEnabled = False
|
||||
#self.typingPaused = True
|
||||
self.inBlockquote = False
|
||||
self.defaultTextColor = QColor(Qt.black)
|
||||
self.backgroundColor = QColor(Qt.white)
|
||||
self.markupColor = QColor(Qt.black)
|
||||
self.linkColor = QColor(Qt.blue)
|
||||
self.spellingErrorColor = QColor(Qt.red)
|
||||
self.blockquoteStyle = BS.BlockquoteStyleFancy
|
||||
|
||||
# Settings
|
||||
self.useUndlerlineForEmphasis = False
|
||||
self.highlightLineBreaks = True
|
||||
|
||||
self.highlightBlockAtPosition.connect(self.onHighlightBlockAtPosition,
|
||||
Qt.QueuedConnection)
|
||||
|
||||
# font = QFont("Monospace", 12, QFont.Normal, False)
|
||||
font = self.document().defaultFont()
|
||||
font.setStyleStrategy(QFont.PreferAntialias)
|
||||
self.defaultFormat = QTextCharFormat()
|
||||
self.defaultFormat.setFont(font)
|
||||
self.defaultFormat.setForeground(QBrush(self.defaultTextColor))
|
||||
|
||||
self.theme = self.defaultTheme()
|
||||
self.setupHeadingFontSize(True)
|
||||
|
||||
self.highlightedWords = []
|
||||
self.highlightedTags = []
|
||||
self.searchExpression = ""
|
||||
self.searchExpressionRegExp = False
|
||||
self.searchExpressionCase = False
|
||||
|
||||
self.customRules = [
|
||||
("(°).*?(°)", {"background": Qt.yellow,
|
||||
"markupColor":Qt.lightGray}),
|
||||
]
|
||||
|
||||
#f = self.document().defaultFont()
|
||||
#f.setFamily("monospace")
|
||||
#self.document().setDefaultFont(f)
|
||||
|
||||
def highlightBlock(self, text):
|
||||
"""
|
||||
Note: Never set the QTextBlockFormat for a QTextBlock from within
|
||||
the highlighter. Depending on how the block format is modified,
|
||||
a recursive call to the highlighter may be triggered, which will
|
||||
cause the application to crash.
|
||||
|
||||
Likewise, don't try to set the QTextBlockFormat outside the highlighter
|
||||
(i.e., from within the text editor). While the application will not
|
||||
crash, the format change will be added to the undo stack. Attempting
|
||||
to undo from that point on will cause the undo stack to be virtually
|
||||
frozen, since undoing the format operation causes the text to be
|
||||
considered changed, thus triggering the slot that changes the text
|
||||
formatting to be triggered yet again.
|
||||
"""
|
||||
|
||||
if self.currentBlock().blockNumber() == 0:
|
||||
# This is the title
|
||||
bf = QTextCharFormat()
|
||||
bf.setFontPointSize(self.editor.font().pointSize() * 2)
|
||||
bf.setFontWeight(QFont.Bold)
|
||||
bf.setForeground(Qt.lightGray)
|
||||
self.setFormat(0, len(text), bf)
|
||||
return
|
||||
|
||||
lastState = self.currentBlockState()
|
||||
self.setFormat(0, len(text), self.defaultFormat)
|
||||
|
||||
if self.tokenizer != None:
|
||||
self.tokenizer.clear()
|
||||
block = self.currentBlock()
|
||||
nextState = MS.MarkdownStateUnknown
|
||||
previousState = self.previousBlockState()
|
||||
|
||||
if block.next().isValid():
|
||||
nextState = block.next().userState()
|
||||
|
||||
self.tokenizer.tokenize(text, lastState, previousState, nextState)
|
||||
self.setCurrentBlockState(self.tokenizer.getState())
|
||||
|
||||
self.inBlockquote = self.tokenizer.getState() == MS.MarkdownStateBlockquote
|
||||
|
||||
# STATE FORMATTING
|
||||
# FIXME: generic
|
||||
if self.currentBlockState() in [
|
||||
MS.MarkdownStatePipeTableHeader,
|
||||
MS.MarkdownStatePipeTableDivider,
|
||||
MS.MarkdownStatePipeTableRow]:
|
||||
fmt = QTextCharFormat()
|
||||
f = fmt.font()
|
||||
f.setFamily("Monospace")
|
||||
fmt.setFont(f)
|
||||
self.setFormat(0, len(text), fmt)
|
||||
|
||||
# Monospace the blank chars
|
||||
i = 0
|
||||
while i <= len(text)-1 and text[i] in [" ", "\t"]:
|
||||
fmt = self.format(i)
|
||||
fmt.setFontFamily("Monospace")
|
||||
self.setFormat(i, 1, fmt)
|
||||
i += 1
|
||||
|
||||
#if self.currentBlockState() == MS.MarkdownStateBlockquote:
|
||||
#fmt = QTextCharFormat(self.defaultFormat)
|
||||
#fmt.setForeground(Qt.lightGray)
|
||||
#self.setFormat(0, len(text), fmt)
|
||||
|
||||
tokens = self.tokenizer.getTokens()
|
||||
|
||||
for token in tokens:
|
||||
if token.type == MTT.TokenUnknown:
|
||||
qWarning("Highlighter found unknown token type in text block.")
|
||||
continue
|
||||
|
||||
if token.type in [
|
||||
MTT.TokenAtxHeading1,
|
||||
MTT.TokenAtxHeading2,
|
||||
MTT.TokenAtxHeading3,
|
||||
MTT.TokenAtxHeading4,
|
||||
MTT.TokenAtxHeading5,
|
||||
MTT.TokenAtxHeading6,
|
||||
MTT.TokenSetextHeading1Line1,
|
||||
MTT.TokenSetextHeading2Line1,
|
||||
]:
|
||||
self.storeHeadingData(token, text)
|
||||
|
||||
self.applyFormattingForToken(token, text)
|
||||
|
||||
if self.tokenizer.backtrackRequested():
|
||||
previous = self.currentBlock().previous()
|
||||
self.highlightBlockAtPosition.emit(previous.position())
|
||||
|
||||
if self.spellCheckEnabled:
|
||||
self.spellCheck(text)
|
||||
|
||||
# HASHTAGS AND HIGHLIGHTS
|
||||
|
||||
# Hashtags
|
||||
s = 0
|
||||
ht = QRegExp(r'([^#])(#[\w]+)')
|
||||
while ht.indexIn(text, s) >= 0:
|
||||
f = self.format(ht.pos()+1)
|
||||
f.setForeground(QColor("#07c"))
|
||||
f.setFontWeight(QFont.Bold)
|
||||
self.setFormat(ht.pos()+1, ht.matchedLength()-1, f)
|
||||
s = ht.pos() + 1
|
||||
|
||||
# Highlighted
|
||||
for w in self.highlightedWords + self.highlightedTags:
|
||||
pos = text.lower().find(w.lower())
|
||||
while pos >= 0:
|
||||
for i in range(pos, pos + len(w)):
|
||||
f = self.format(i)
|
||||
f.setBackground(QBrush(QColor("#fAf")))
|
||||
self.setFormat(i, 1, f)
|
||||
pos = text.lower().find(w.lower(), pos+1)
|
||||
|
||||
# Searched
|
||||
#FIXME: consider searchExpressionRegExp
|
||||
if self.searchExpression:
|
||||
s = self.searchExpression
|
||||
|
||||
if not self.searchExpressionRegExp:
|
||||
if self.searchExpressionCase:
|
||||
pos = text.find(s)
|
||||
else:
|
||||
pos = text.lower().find(s.lower())
|
||||
while pos >= 0:
|
||||
for i in range(pos, pos + len(s)):
|
||||
f = self.format(i)
|
||||
f.setBackground(QBrush(QColor("#Aff")))
|
||||
self.setFormat(i, 1, f)
|
||||
pos = text.lower().find(s.lower(), pos+1)
|
||||
|
||||
else:
|
||||
# Using QRegExp
|
||||
rx = QRegExp(s)
|
||||
if not self.searchExpressionCase:
|
||||
rx.setCaseSensitivity(Qt.CaseInsensitive)
|
||||
p = rx.indexIn(text)
|
||||
while p != -1:
|
||||
f = self.format(p)
|
||||
f.setBackground(QBrush(QColor("#Aff")))
|
||||
self.setFormat(p, rx.matchedLength(), f)
|
||||
p = rx.indexIn(text, p + 1)
|
||||
|
||||
# Using python re
|
||||
#try:
|
||||
#for m in re.finditer(s, text):
|
||||
#f = self.format(m.start())
|
||||
#f.setBackground(QBrush(QColor("#0ff")))
|
||||
#self.setFormat(m.start(), len(m.group()), f)
|
||||
#except:
|
||||
## Probably malformed regExp
|
||||
#pass
|
||||
|
||||
# Custom rules
|
||||
for rule, theme in self.customRules:
|
||||
for m in re.finditer(rule, text):
|
||||
|
||||
if not m.groups(): # No groups, therefore no markup
|
||||
f = self.format(m.start())
|
||||
f, garbage = self.formatsFromTheme(theme, f)
|
||||
self.setFormat(m.start(), len(m.group()), f)
|
||||
|
||||
else:
|
||||
mf = self.format(m.start())
|
||||
f = self.format(m.start() + len(m.group(1)))
|
||||
f, mf = self.formatsFromTheme(theme, f, mf)
|
||||
self.setFormat(m.start(1), len(m.group(1)), mf)
|
||||
self.setFormat(m.start(2), len(m.group(2)), mf)
|
||||
self.setFormat(m.start(1) + len(m.group(1)),
|
||||
len(m.group())
|
||||
- len(m.group(1))
|
||||
- len(m.group(2)), f)
|
||||
|
||||
# If the block has transitioned from previously being a heading to now
|
||||
# being a non-heading, signal that the position in the document no
|
||||
# longer contains a heading.
|
||||
|
||||
if self.isHeadingBlockState(lastState) and \
|
||||
not self.isHeadingBlockState(self.currentBlockState()):
|
||||
self.headingRemoved.emit(self.currentBlock().position())
|
||||
|
||||
|
||||
###########################################################################
|
||||
# COLORS & FORMATTING
|
||||
###########################################################################
|
||||
|
||||
def defaultTheme(self):
|
||||
|
||||
markup = qApp.palette().color(QPalette.Mid)
|
||||
if markup == Qt.black:
|
||||
markup = Qt.lightGray
|
||||
dark = qApp.palette().color(QPalette.Dark)
|
||||
if dark == Qt.black:
|
||||
dark = QColor(Qt.gray)
|
||||
darker = dark.darker(150)
|
||||
|
||||
# Text background
|
||||
background = qApp.palette().color(QPalette.Base)
|
||||
lightBackground = background.darker(130)
|
||||
veryLightBackground = background.darker(105)
|
||||
|
||||
theme = {
|
||||
"markup": markup}
|
||||
|
||||
#Exemple:
|
||||
#"color": Qt.red,
|
||||
#"deltaSize": 10,
|
||||
#"background": Qt.yellow,
|
||||
#"monospace": True,
|
||||
#"bold": True,
|
||||
#"italic": True,
|
||||
#"underline": True,
|
||||
#"overline": True,
|
||||
#"strike": True,
|
||||
#"formatMarkup": True,
|
||||
#"markupBold": True,
|
||||
#"markupColor": Qt.blue,
|
||||
#"markupBackground": Qt.green,
|
||||
#"markupMonospace": True,
|
||||
#"super":True,
|
||||
#"sub":True
|
||||
|
||||
for i in MTT.TITLES:
|
||||
theme[i] = {
|
||||
"formatMarkup":True,
|
||||
"bold": True,
|
||||
"monospace": True,
|
||||
#"color": Qt.darkBlue if i % 2 == 1 else Qt.darkMagenta,
|
||||
}
|
||||
b = 100
|
||||
d = 50
|
||||
color = QColor(Qt.darkBlue)
|
||||
theme[MTT.TokenAtxHeading1]["color"] = color
|
||||
theme[MTT.TokenAtxHeading2]["color"] = color.lighter(b + d)
|
||||
theme[MTT.TokenAtxHeading3]["color"] = color.lighter(b + 2*d)
|
||||
theme[MTT.TokenAtxHeading4]["color"] = color.lighter(b + 3*d)
|
||||
theme[MTT.TokenAtxHeading5]["color"] = color.lighter(b + 4*d)
|
||||
theme[MTT.TokenAtxHeading6]["color"] = color.lighter(b + 5*d)
|
||||
|
||||
for i in [MTT.TokenSetextHeading1Line2, MTT.TokenSetextHeading2Line2]:
|
||||
theme[i] = {
|
||||
"color": markup,
|
||||
"monospace":True}
|
||||
|
||||
# Beautifiers
|
||||
theme[MTT.TokenEmphasis] = {
|
||||
"italic":True,}
|
||||
theme[MTT.TokenStrong] = {
|
||||
"bold":True}
|
||||
theme[MTT.TokenStrikethrough] = {
|
||||
"strike":True}
|
||||
theme[MTT.TokenVerbatim] = {
|
||||
"monospace":True,
|
||||
"background": veryLightBackground,
|
||||
"formatMarkup": True,
|
||||
"markupColor": markup}
|
||||
theme[MTT.TokenSuperScript] = {
|
||||
"super":True,
|
||||
"formatMarkup":True}
|
||||
theme[MTT.TokenSubScript] = {
|
||||
"sub":True,
|
||||
"formatMarkup":True}
|
||||
|
||||
theme[MTT.TokenHtmlTag] = {
|
||||
"color":Qt.red}
|
||||
theme[MTT.TokenHtmlEntity] = {
|
||||
"color":Qt.red}
|
||||
theme[MTT.TokenAutomaticLink] = {
|
||||
"color": qApp.palette().color(QPalette.Link)}
|
||||
theme[MTT.TokenInlineLink] = {
|
||||
"color": qApp.palette().color(QPalette.Link)}
|
||||
theme[MTT.TokenReferenceLink] = {
|
||||
"color": qApp.palette().color(QPalette.Link)}
|
||||
theme[MTT.TokenReferenceDefinition] = {
|
||||
"color": qApp.palette().color(QPalette.Link)}
|
||||
theme[MTT.TokenImage] = {
|
||||
"color": Qt.green}
|
||||
theme[MTT.TokenHtmlComment] = {
|
||||
"color": dark}
|
||||
theme[MTT.TokenNumberedList] = {
|
||||
"markupColor": QColor(Qt.red).lighter(),
|
||||
"markupBold": True,
|
||||
"markupMonospace": True,}
|
||||
theme[MTT.TokenBulletPointList] = {
|
||||
"markupColor": QColor(Qt.red).lighter(),
|
||||
"markupBold": True,
|
||||
"markupMonospace": True,}
|
||||
theme[MTT.TokenHorizontalRule] = {
|
||||
"overline": True,
|
||||
"underline": True,
|
||||
"monospace": True,
|
||||
"color": markup}
|
||||
theme[MTT.TokenLineBreak] = {
|
||||
"background": markup}
|
||||
theme[MTT.TokenBlockquote] = {
|
||||
"color": darker,
|
||||
"markupColor": lightBackground,
|
||||
"markupBackground": lightBackground}
|
||||
theme[MTT.TokenCodeBlock] = {
|
||||
"color": darker,
|
||||
"markupBackground": veryLightBackground,
|
||||
"monospace":True}
|
||||
theme[MTT.TokenGithubCodeFence] = {
|
||||
"color": markup}
|
||||
theme[MTT.TokenPandocCodeFence] = {
|
||||
"color": markup}
|
||||
theme[MTT.TokenCodeFenceEnd] = {
|
||||
"color": markup}
|
||||
theme[MTT.TokenMention] = {} # FIXME
|
||||
theme[MTT.TokenTableHeader] = {
|
||||
"color": darker, "monospace":True}
|
||||
theme[MTT.TokenTableDivider] = {
|
||||
"color": markup, "monospace":True}
|
||||
theme[MTT.TokenTablePipe] = {
|
||||
"color": markup, "monospace":True}
|
||||
|
||||
return theme
|
||||
|
||||
def setColorScheme(self, defaultTextColor, backgroundColor, markupColor,
|
||||
linkColor, spellingErrorColor):
|
||||
self.defaultTextColor = defaultTextColor
|
||||
self.backgroundColor = backgroundColor
|
||||
self.markupColor = markupColor
|
||||
self.linkColor = linkColor
|
||||
self.spellingErrorColor = spellingErrorColor
|
||||
self.defaultFormat.setForeground(QBrush(defaultTextColor))
|
||||
|
||||
# FIXME: generate a theme based on that
|
||||
self.rehighlight()
|
||||
|
||||
###########################################################################
|
||||
# ACTUAL FORMATTING
|
||||
###########################################################################
|
||||
|
||||
def applyFormattingForToken(self, token, text):
|
||||
if token.type != MTT.TokenUnknown:
|
||||
format = self.format(token.position + token.openingMarkupLength)
|
||||
markupFormat = self.format(token.position)
|
||||
if self.theme.get("markup"):
|
||||
markupFormat.setForeground(self.theme["markup"])
|
||||
|
||||
## Debug
|
||||
def debug():
|
||||
print("{}\n{}{}{}{} (state:{})".format(
|
||||
text,
|
||||
" "*token.position,
|
||||
"^"*token.openingMarkupLength,
|
||||
str(token.type).center(token.length
|
||||
- token.openingMarkupLength
|
||||
- token.closingMarkupLength, "-"),
|
||||
"^" * token.closingMarkupLength,
|
||||
self.currentBlockState(),)
|
||||
)
|
||||
|
||||
#if token.type in range(6, 10):
|
||||
#debug()
|
||||
|
||||
theme = self.theme.get(token.type)
|
||||
if theme:
|
||||
format, markupFormat = self.formatsFromTheme(theme,
|
||||
format,
|
||||
markupFormat)
|
||||
|
||||
# Format openning Markup
|
||||
self.setFormat(token.position, token.openingMarkupLength,
|
||||
markupFormat)
|
||||
|
||||
# Format Text
|
||||
self.setFormat(
|
||||
token.position + token.openingMarkupLength,
|
||||
token.length - token.openingMarkupLength - token.closingMarkupLength,
|
||||
format)
|
||||
|
||||
# Format closing Markup
|
||||
if token.closingMarkupLength > 0:
|
||||
self.setFormat(
|
||||
token.position + token.length - token.closingMarkupLength,
|
||||
token.closingMarkupLength,
|
||||
markupFormat)
|
||||
|
||||
else:
|
||||
qWarning("MarkdownHighlighter.applyFormattingForToken() was passed"
|
||||
" in a token of unknown type.")
|
||||
|
||||
def formatsFromTheme(self, theme, format=QTextCharFormat(),
|
||||
markupFormat=QTextCharFormat()):
|
||||
# Token
|
||||
if theme.get("color"):
|
||||
format.setForeground(theme["color"])
|
||||
if theme.get("deltaSize"):
|
||||
format.setFontPointSize(format.fontPointSize() + theme["deltaSize"])
|
||||
if theme.get("background"):
|
||||
format.setBackground(theme["background"])
|
||||
if theme.get("monospace"):
|
||||
format.setFontFamily("Monospace")
|
||||
if theme.get("bold"):
|
||||
format.setFontWeight(QFont.Bold)
|
||||
if theme.get("italic"):
|
||||
format.setFontItalic(theme["italic"])
|
||||
if theme.get("underline"):
|
||||
format.setFontUnderline(theme["underline"])
|
||||
if theme.get("overline"):
|
||||
format.setFontOverline(theme["overline"])
|
||||
if theme.get("strike"):
|
||||
format.setFontStrikeOut(theme["strike"])
|
||||
if theme.get("super"):
|
||||
format.setVerticalAlignment(QTextCharFormat.AlignSuperScript)
|
||||
if theme.get("sub"):
|
||||
format.setVerticalAlignment(QTextCharFormat.AlignSubScript)
|
||||
|
||||
# Markup
|
||||
if theme.get("formatMarkup"):
|
||||
c = markupFormat.foreground()
|
||||
markupFormat = QTextCharFormat(format)
|
||||
markupFormat.setForeground(c)
|
||||
if theme.get("markupBold"):
|
||||
markupFormat.setFontWeight(QFont.Bold)
|
||||
if theme.get("markupColor"):
|
||||
markupFormat.setForeground(theme["markupColor"])
|
||||
if theme.get("markupBackground"):
|
||||
markupFormat.setBackground(theme["markupBackground"])
|
||||
if theme.get("markupMonospace"):
|
||||
markupFormat.setFontFamily("Monospace")
|
||||
|
||||
return format, markupFormat
|
||||
|
||||
###########################################################################
|
||||
# SETTINGS
|
||||
###########################################################################
|
||||
|
||||
def setHighlighted(self, words, tags):
|
||||
rehighlight = (self.highlightedWords != words
|
||||
or self.highlightedTags != tags)
|
||||
self.highlightedWords = words
|
||||
self.highlightedTags = tags
|
||||
if rehighlight:
|
||||
self.rehighlight()
|
||||
|
||||
def setSearched(self, expression, regExp=False, caseSensitivity=False):
|
||||
"""
|
||||
Define an expression currently searched, to be highlighted.
|
||||
Can be regExp.
|
||||
"""
|
||||
rehighlight = self.searchExpression != expression or \
|
||||
self.searchExpressionRegExp != regExp or \
|
||||
self.searchExpressionCase != caseSensitivity
|
||||
self.searchExpression = expression
|
||||
self.searchExpressionRegExp = regExp
|
||||
self.searchExpressionCase = caseSensitivity
|
||||
if rehighlight:
|
||||
self.rehighlight()
|
||||
|
||||
def setDictionary(self, dictionary):
|
||||
self.dictionary = dictionary
|
||||
if self.spellCheckEnabled:
|
||||
self.rehighlight()
|
||||
|
||||
def increaseFontSize(self):
|
||||
self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize()
|
||||
+ 1.0)
|
||||
self.rehighlight()
|
||||
|
||||
def decreaseFontSize(self):
|
||||
self.defaultFormat.setFontPointSize(self.defaultFormat.fontPointSize()
|
||||
- 1.0)
|
||||
self.rehighlight()
|
||||
|
||||
def setEnableLargeHeadingSizes(self, enable):
|
||||
self.setupHeadingFontSize(enable)
|
||||
self.rehighlight()
|
||||
|
||||
def setupHeadingFontSize(self, useLargeHeadings):
|
||||
if useLargeHeadings:
|
||||
self.theme[MTT.TokenSetextHeading1Line1]["deltaSize"] = 7
|
||||
self.theme[MTT.TokenSetextHeading2Line1]["deltaSize"] = 5
|
||||
self.theme[MTT.TokenSetextHeading1Line2]["deltaSize"] = 7
|
||||
self.theme[MTT.TokenSetextHeading2Line2]["deltaSize"] = 5
|
||||
self.theme[MTT.TokenAtxHeading1]["deltaSize"] = 7
|
||||
self.theme[MTT.TokenAtxHeading2]["deltaSize"] = 5
|
||||
self.theme[MTT.TokenAtxHeading3]["deltaSize"] = 3
|
||||
self.theme[MTT.TokenAtxHeading4]["deltaSize"] = 2
|
||||
self.theme[MTT.TokenAtxHeading5]["deltaSize"] = 1
|
||||
self.theme[MTT.TokenAtxHeading6]["deltaSize"] = 0
|
||||
|
||||
else:
|
||||
for i in MTT.TITLES:
|
||||
self.theme[i]["deltaSize"] = 0
|
||||
|
||||
def setUseUnderlineForEmphasis(self, enable):
|
||||
self.useUndlerlineForEmphasis = enable
|
||||
self.rehighlight()
|
||||
|
||||
def setFont(self, fontFamily, fontSize):
|
||||
font = QFont(family=fontFamily, pointSize=fontSize, weight=QFont.Normal, italic=False)
|
||||
self.defaultFormat.setFont(font)
|
||||
self.rehighlight()
|
||||
|
||||
def setSpellCheckEnabled(self, enabled):
|
||||
self.spellCheckEnabled = enabled
|
||||
self.rehighlight()
|
||||
|
||||
def setBlockquoteStyle(self, style):
|
||||
self.blockquoteStyle = style
|
||||
|
||||
if style == BS.BlockquoteStyleItalic:
|
||||
self.emphasizeToken[MTT.TokenBlockquote] = True
|
||||
else:
|
||||
self.emphasizeToken[MTT.TokenBlockquote] = False
|
||||
|
||||
self.rehighlight()
|
||||
|
||||
def setHighlightLineBreaks(self, enable):
|
||||
self.highlightLineBreaks = enable
|
||||
self.rehighlight()
|
||||
|
||||
###########################################################################
|
||||
# GHOSTWRITER SPECIFIC?
|
||||
###########################################################################
|
||||
|
||||
def onTypingResumed(self):
|
||||
self.typingPaused = False
|
||||
|
||||
def onTypingPaused(self):
|
||||
self.typingPaused = True
|
||||
block = self.document().findBlock(self.editor.textCursor().position())
|
||||
self.rehighlightBlock(block)
|
||||
|
||||
def onHighlightBlockAtPosition(self, position):
|
||||
block = self.document().findBlock(position)
|
||||
self.rehighlightBlock(block)
|
||||
|
||||
def onTextBlockRemoved(self, block):
|
||||
if self.isHeadingBlockState(block.userState):
|
||||
self.headingRemoved.emit(block.position())
|
||||
|
||||
###########################################################################
|
||||
# SPELLCHECK
|
||||
###########################################################################
|
||||
|
||||
def spellCheck(self, text):
|
||||
cursorPosition = self.editor.textCursor().position()
|
||||
cursorPosBlock = self.document().findBlock(cursorPosition)
|
||||
cursorPosInBlock = -1
|
||||
|
||||
if self.currentBlock() == cursorPosBlock:
|
||||
cursorPosInBlock = cursorPosition - cursorPosBlock.position()
|
||||
|
||||
misspelledWord = self.dictionary.check(text, 0)
|
||||
|
||||
while not misspelledWord.isNull():
|
||||
startIndex = misspelledWord.position()
|
||||
length = misspelledWord.length()
|
||||
|
||||
if self.typingPaused or cursorPosInBlock != startIndex + length:
|
||||
spellingErrorFormat = self.format(startIndex)
|
||||
spellingErrorFormat.setUnderlineColor(self.spellingErrorColor)
|
||||
spellingErrorFormat.setUnderlineStyle(
|
||||
qApp.stlye().styleHint(QStyle.SH_SpellCheckUnderlineStyle))
|
||||
|
||||
self.setFormat(startIndex, length, spellingErrorFormat)
|
||||
|
||||
startIndex += length
|
||||
misspelledWord = self.dictionary.check(text, startIndex)
|
||||
|
||||
def storeHeadingData(self, token, text):
|
||||
if token.type in [
|
||||
MTT.TokenAtxHeading1,
|
||||
MTT.TokenAtxHeading2,
|
||||
MTT.TokenAtxHeading3,
|
||||
MTT.TokenAtxHeading4,
|
||||
MTT.TokenAtxHeading5,
|
||||
MTT.TokenAtxHeading6]:
|
||||
level = token.type - MTT.TokenAtxHeading1 + 1
|
||||
s = token.position + token.openingMarkupLength
|
||||
l = (token.length
|
||||
- token.openingMarkupLength
|
||||
- token.closingMarkupLength)
|
||||
headingText = text[s:s+l].strip()
|
||||
|
||||
elif token.type == MTT.TokenSetextHeading1Line1:
|
||||
level = 1
|
||||
headingText = text
|
||||
|
||||
elif token.type == MTT.TokenSetextHeading2Line1:
|
||||
level = 2
|
||||
headingText = text
|
||||
|
||||
else:
|
||||
qWarning("MarkdownHighlighter.storeHeadingData() encountered" +
|
||||
" unexpected token: {}".format(token.getType()))
|
||||
return
|
||||
|
||||
# FIXME: TypeError: could not convert 'TextBlockData' to 'QTextBlockUserData'
|
||||
# blockData = self.currentBlockUserData()
|
||||
# if blockData is None:
|
||||
# blockData = TextBlockData(self.document(), self.currentBlock())
|
||||
#
|
||||
# self.setCurrentBlockUserData(blockData)
|
||||
self.headingFound.emit(level, headingText, self.currentBlock())
|
||||
|
||||
def isHeadingBlockState(self, state):
|
||||
return state in [
|
||||
MS.MarkdownStateAtxHeading1,
|
||||
MS.MarkdownStateAtxHeading2,
|
||||
MS.MarkdownStateAtxHeading3,
|
||||
MS.MarkdownStateAtxHeading4,
|
||||
MS.MarkdownStateAtxHeading5,
|
||||
MS.MarkdownStateAtxHeading6,
|
||||
MS.MarkdownStateSetextHeading1Line1,
|
||||
MS.MarkdownStateSetextHeading2Line1,]
|
||||
|
||||
|
||||
def getLuminance(color):
|
||||
return (0.30 * color.redF()) + \
|
||||
(0.59 * color.greenF()) + \
|
||||
(0.11 * color.blueF())
|
||||
|
||||
|
||||
def applyAlphaToChannel(foreground, background, alpha):
|
||||
return (foreground * alpha) + (background * (1.0 - alpha))
|
||||
|
||||
|
||||
def applyAlpha(foreground, background, alpha):
|
||||
blendedColor = QColor(0, 0, 0)
|
||||
normalizedAlpha = alpha / 255.0
|
||||
blendedColor.setRed(applyAlphaToChannel(
|
||||
foreground.red(), background.red(), normalizedAlpha))
|
||||
blendedColor.setGreen(applyAlphaToChannel(
|
||||
foreground.green(), background.green(), normalizedAlpha))
|
||||
blendedColor.setBlue(applyAlphaToChannel(
|
||||
foreground.blue(), background.blue(), normalizedAlpha))
|
||||
return blendedColor
|
887
manuskript/ui/highlighters/markdownTokenizer.py
Normal file
887
manuskript/ui/highlighters/markdownTokenizer.py
Normal file
|
@ -0,0 +1,887 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import re
|
||||
from PyQt5.QtCore import *
|
||||
from PyQt5.QtGui import *
|
||||
from PyQt5.QtWidgets import *
|
||||
|
||||
from noteflow.ui.views.markdownEnums import MarkdownState as MS
|
||||
from noteflow.ui.views.markdownEnums import MarkdownTokenType as MTT
|
||||
|
||||
# This file is simply a python translation of GhostWriter's Tokenizer.
|
||||
# http://wereturtle.github.io/ghostwriter/
|
||||
# GPLV3+.
|
||||
|
||||
# ==============================================================================
|
||||
# TOKEN
|
||||
# ==============================================================================
|
||||
|
||||
class Token:
|
||||
def __init__(self):
|
||||
self.type = -1
|
||||
self.position = 0
|
||||
self.length = 0
|
||||
self.openingMarkupLength = 0
|
||||
self.closingMarkupLength = 0
|
||||
|
||||
# ==============================================================================
|
||||
# HIGHLIGHT TOKENIZER
|
||||
# ==============================================================================
|
||||
|
||||
class HighlightTokenizer:
|
||||
def __init__(self):
|
||||
self.tokens = []
|
||||
|
||||
def tokenize(text, currentState, previousState, nextState):
|
||||
# Subclass me
|
||||
return 0
|
||||
|
||||
def getTokens(self):
|
||||
self.tokens = sorted(self.tokens, key=lambda t: t.position)
|
||||
return self.tokens
|
||||
|
||||
def getState(self):
|
||||
return self.state
|
||||
|
||||
def backtrackRequested(self):
|
||||
return self.backtrack
|
||||
|
||||
def clear(self):
|
||||
self.tokens = []
|
||||
self.backtrack = False
|
||||
self.state = -1
|
||||
|
||||
def addToken(self, token):
|
||||
self.tokens.append(token)
|
||||
|
||||
if token.type == -1:
|
||||
print("Error here", token.position, token.length)
|
||||
|
||||
def setState(self, state):
|
||||
self.state = state
|
||||
|
||||
def requestBacktrack(self):
|
||||
self.backtrack = True
|
||||
|
||||
def tokenLessThan(self, t1, t2):
|
||||
return t1.getPosition() < t2.getPosition()
|
||||
|
||||
|
||||
class MarkdownTokenizer(HighlightTokenizer):
|
||||
|
||||
DUMMY_CHAR = "$"
|
||||
MAX_MARKDOWN_HEADING_LEVEL = 6
|
||||
|
||||
paragraphBreakRegex = QRegExp("^\\s*$")
|
||||
heading1SetextRegex = QRegExp("^===+\\s*$")
|
||||
heading2SetextRegex = QRegExp("^---+\\s*$")
|
||||
blockquoteRegex = QRegExp("^ {0,3}>.*$")
|
||||
githubCodeFenceStartRegex = QRegExp("^```+.*$")
|
||||
githubCodeFenceEndRegex = QRegExp("^```+\\s*$")
|
||||
pandocCodeFenceStartRegex = QRegExp("^~~~+.*$")
|
||||
pandocCodeFenceEndRegex = QRegExp("^~~~+\\s*$")
|
||||
numberedListRegex = QRegExp("^ {0,3}[0-9a-z]+[.)]\\s+.*$")
|
||||
numberedNestedListRegex = QRegExp("^\\s*[0-9a-z]+[.)]\\s+.*$")
|
||||
hruleRegex = QRegExp("\\s*(\\*\\s*){3,}|(\\s*(_\\s*){3,})|((\\s*(-\\s*){3,}))")
|
||||
lineBreakRegex = QRegExp(".*\\s{2,}$")
|
||||
emphasisRegex = QRegExp("(\\*(?![\\s*]).*[^\\s*]\\*)|_(?![\\s_]).*[^\\s_]_")
|
||||
emphasisRegex.setMinimal(True)
|
||||
strongRegex = QRegExp("\\*\\*(?=\\S).*\\S\\*\\*(?!\\*)|__(?=\\S).*\\S__(?!_)")
|
||||
strongRegex.setMinimal(True)
|
||||
strikethroughRegex = QRegExp("~~[^\\s]+.*[^\\s]+~~")
|
||||
strikethroughRegex.setMinimal(True)
|
||||
superScriptRegex = QRegExp("\^([^\\s]|(\\\\\\s))+\^") # Spaces must be escaped "\ "
|
||||
superScriptRegex.setMinimal(True)
|
||||
subScriptRegex = QRegExp("~([^\\s]|(\\\\\\s))+~") # Spaces must be escaped "\ "
|
||||
subScriptRegex.setMinimal(True)
|
||||
verbatimRegex = QRegExp("`+")
|
||||
htmlTagRegex = QRegExp("<[^<>]+>")
|
||||
htmlTagRegex.setMinimal(True)
|
||||
htmlEntityRegex = QRegExp("&[a-zA-Z]+;|&#x?[0-9]+;")
|
||||
automaticLinkRegex = QRegExp("(<[a-zA-Z]+\\:.+>)|(<.+@.+>)")
|
||||
automaticLinkRegex.setMinimal(True)
|
||||
inlineLinkRegex = QRegExp("\\[.+\\]\\(.+\\)")
|
||||
inlineLinkRegex.setMinimal(True)
|
||||
referenceLinkRegex = QRegExp("\\[(.+)\\]")
|
||||
referenceLinkRegex.setMinimal(True)
|
||||
referenceDefinitionRegex = QRegExp("^\\s*\\[.+\\]:")
|
||||
imageRegex = QRegExp("!\\[.*\\]\\(.+\\)")
|
||||
imageRegex.setMinimal(True)
|
||||
htmlInlineCommentRegex = QRegExp("<!--.*-->")
|
||||
htmlInlineCommentRegex.setMinimal(True)
|
||||
mentionRegex = QRegExp("\\B@\\w+(\\-\\w+)*(/\\w+(\\-\\w+)*)?")
|
||||
pipeTableDividerRegex = QRegExp("^ {0,3}(\\|[ :]?)?-{3,}([ :]?\\|[ :]?-{3,}([ :]?\\|)?)+\\s*$")
|
||||
|
||||
def __init__(self):
|
||||
HighlightTokenizer.__init__(self)
|
||||
|
||||
def tokenize(self, text, currentState, previousState, nextState):
|
||||
self.currentState = currentState
|
||||
self.previousState = previousState
|
||||
self.nextState = nextState
|
||||
|
||||
if (self.previousState == MS.MarkdownStateInGithubCodeFence or \
|
||||
self.previousState == MS.MarkdownStateInPandocCodeFence) and \
|
||||
self.tokenizeCodeBlock(text):
|
||||
# No further tokenizing required
|
||||
pass
|
||||
|
||||
elif self.previousState != MS.MarkdownStateComment \
|
||||
and self.paragraphBreakRegex.exactMatch(text):
|
||||
|
||||
if previousState in [MS.MarkdownStateListLineBreak,
|
||||
MS.MarkdownStateNumberedList,
|
||||
MS.MarkdownStateBulletPointList]:
|
||||
self.setState(MS.MarkdownStateListLineBreak)
|
||||
elif previousState != MS.MarkdownStateCodeBlock or \
|
||||
(text[:1] != "\t" and text[-4:] != " "):
|
||||
self.setState(MS.MarkdownStateParagraphBreak)
|
||||
|
||||
elif self.tokenizeSetextHeadingLine2(text) or \
|
||||
self.tokenizeCodeBlock(text) or \
|
||||
self.tokenizeMultilineComment(text) or \
|
||||
self.tokenizeHorizontalRule(text) or \
|
||||
self.tokenizeTableDivider(text):
|
||||
# No further tokenizing required
|
||||
pass
|
||||
|
||||
elif self.tokenizeSetextHeadingLine1(text) or \
|
||||
self.tokenizeAtxHeading(text) or \
|
||||
self.tokenizeBlockquote(text) or \
|
||||
self.tokenizeNumberedList(text) or \
|
||||
self.tokenizeBulletPointList(text):
|
||||
self.tokenizeLineBreak(text)
|
||||
self.tokenizeInline(text)
|
||||
|
||||
else:
|
||||
if previousState in [MS.MarkdownStateListLineBreak,
|
||||
MS.MarkdownStateNumberedList,
|
||||
MS.MarkdownStateNumberedList]:
|
||||
if not self.tokenizeNumberedList(text) and \
|
||||
not self.tokenizeBulletPointList(text) and \
|
||||
(text[:1] == "\t" or text[:4] == " "):
|
||||
self.setState(previousState)
|
||||
else:
|
||||
self.setState(MS.MarkdownStateParagraph)
|
||||
else:
|
||||
self.setState(MS.MarkdownStateParagraph)
|
||||
self.tokenizeLineBreak(text)
|
||||
self.tokenizeInline(text)
|
||||
|
||||
# Make sure that if the second line of a setext heading is removed the
|
||||
# first line is reprocessed. Otherwise, it will still show up in the
|
||||
# document as a heading.
|
||||
if (previousState == MS.MarkdownStateSetextHeading1Line1 and \
|
||||
self.getState() != MS.MarkdownStateSetextHeading1Line2) or \
|
||||
(previousState == MS.MarkdownStateSetextHeading2Line1 and \
|
||||
self.getState() != MS.MarkdownStateSetextHeading2Line2):
|
||||
self.requestBacktrack()
|
||||
|
||||
def tokenizeSetextHeadingLine1(self, text):
|
||||
#Check the next line's state to see if this is a setext-style heading.
|
||||
level = 0
|
||||
token = Token()
|
||||
nextState = self.nextState
|
||||
|
||||
if MS.MarkdownStateSetextHeading1Line2 == nextState:
|
||||
level = 1
|
||||
self.setState(MS.MarkdownStateSetextHeading1Line1)
|
||||
token.type = MTT.TokenSetextHeading1Line1
|
||||
|
||||
elif MS.MarkdownStateSetextHeading2Line2 == nextState:
|
||||
level = 2
|
||||
self.setState(MS.MarkdownStateSetextHeading2Line1)
|
||||
token.type = MTT.TokenSetextHeading2Line1
|
||||
|
||||
if level > 0:
|
||||
token.length = len(text)
|
||||
token.position = 0
|
||||
self.addToken(token)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeSetextHeadingLine2(self, text):
|
||||
level = 0
|
||||
setextMatch = False
|
||||
token = Token()
|
||||
previousState = self.previousState
|
||||
if previousState == MS.MarkdownStateSetextHeading1Line1:
|
||||
level = 1
|
||||
setextMatch = self.heading1SetextRegex.exactMatch(text)
|
||||
self.setState(MS.MarkdownStateSetextHeading1Line2)
|
||||
token.type = MTT.TokenSetextHeading1Line2
|
||||
|
||||
elif previousState == MS.MarkdownStateSetextHeading2Line1:
|
||||
level = 2
|
||||
setextMatch = self.heading2SetextRegex.exactMatch(text)
|
||||
self.setState(MS.MarkdownStateSetextHeading2Line2)
|
||||
token.type = MTT.TokenSetextHeading2Line2
|
||||
|
||||
elif previousState == MS.MarkdownStateParagraph:
|
||||
h1Line2 = self.heading1SetextRegex.exactMatch(text)
|
||||
h2Line2 = self.heading2SetextRegex.exactMatch(text)
|
||||
|
||||
if h1Line2 or h2Line2:
|
||||
# Restart tokenizing on the previous line.
|
||||
self.requestBacktrack()
|
||||
token.length = len(text)
|
||||
token.position = 0
|
||||
|
||||
if h1Line2:
|
||||
self.setState(MS.MarkdownStateSetextHeading1Line2)
|
||||
token.type = MTT.TokenSetextHeading1Line2
|
||||
|
||||
else:
|
||||
self.setState(MS.MarkdownStateSetextHeading2Line2)
|
||||
token.type = MTT.TokenSetextHeading2Line2
|
||||
|
||||
self.addToken(token)
|
||||
return True
|
||||
|
||||
if level > 0:
|
||||
if setextMatch:
|
||||
token.length = len(text)
|
||||
token.position = 0
|
||||
self.addToken(token)
|
||||
return True
|
||||
|
||||
else:
|
||||
# Restart tokenizing on the previous line.
|
||||
self.requestBacktrack()
|
||||
False
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeAtxHeading(self, text):
|
||||
escapedText = self.dummyOutEscapeCharacters(text)
|
||||
trailingPoundCount = 0
|
||||
level = 0
|
||||
|
||||
#Count the number of pound signs at the front of the string,
|
||||
#up to the maximum allowed, to determine the heading level.
|
||||
|
||||
while escapedText[level] == "#":
|
||||
level += 1
|
||||
if level >= len(escapedText) or level >= self.MAX_MARKDOWN_HEADING_LEVEL:
|
||||
break
|
||||
|
||||
if level > 0 and level < len(text):
|
||||
# Count how many pound signs are at the end of the text.
|
||||
while escapedText[-trailingPoundCount -1] == "#":
|
||||
trailingPoundCount += 1
|
||||
|
||||
token = Token()
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
token.type = MTT.TokenAtxHeading1 + level -1
|
||||
token.openingMarkupLength = level
|
||||
token.closingMarkupLength = trailingPoundCount
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateAtxHeading1 + level -1)
|
||||
return True
|
||||
return False
|
||||
|
||||
def tokenizeNumberedList(self, text):
|
||||
previousState = self.previousState
|
||||
if (previousState in [MS.MarkdownStateParagraphBreak,
|
||||
MS.MarkdownStateUnknown,
|
||||
MS.MarkdownStateCodeBlock,
|
||||
MS.MarkdownStateCodeFenceEnd,] and \
|
||||
self.numberedListRegex.exactMatch(text)) or \
|
||||
(previousState in [MS.MarkdownStateListLineBreak,
|
||||
MS.MarkdownStateNumberedList,
|
||||
MS.MarkdownStateBulletPointList,] and \
|
||||
self.numberedNestedListRegex.exactMatch(text)):
|
||||
periodIndex = text.find(".")
|
||||
parenthIndex = text.find(")")
|
||||
|
||||
if periodIndex < 0:
|
||||
index = parenthIndex
|
||||
elif parenthIndex < 0:
|
||||
index = periodIndex
|
||||
elif parenthIndex > periodIndex:
|
||||
index = periodIndex
|
||||
else:
|
||||
index = parenthIndex
|
||||
|
||||
if index > 0:
|
||||
token = Token()
|
||||
token.type = MTT.TokenNumberedList
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
token.openingMarkupLength = index + 2
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateNumberedList)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeBulletPointList(self, text):
|
||||
foundBulletChar = False
|
||||
bulletCharIndex = -1
|
||||
spaceCount = 0
|
||||
whitespaceFoundAfterBulletChar = False
|
||||
previousState = self.previousState
|
||||
|
||||
if previousState not in [MS.MarkdownStateUnknown,
|
||||
MS.MarkdownStateParagraphBreak,
|
||||
MS.MarkdownStateListLineBreak,
|
||||
MS.MarkdownStateNumberedList,
|
||||
MS.MarkdownStateBulletPointList,
|
||||
MS.MarkdownStateCodeBlock,
|
||||
MS.MarkdownStateCodeFenceEnd]:
|
||||
return False
|
||||
|
||||
# Search for the bullet point character, which can
|
||||
# be either a '+', '-', or '*'.
|
||||
|
||||
for i in range(len(text)):
|
||||
if text[i] == " ":
|
||||
if foundBulletChar:
|
||||
# We've confirmed it's a bullet point by the whitespace that
|
||||
# follows the bullet point character, and can now exit the
|
||||
# loop.
|
||||
|
||||
whitespaceFoundAfterBulletChar = True
|
||||
break
|
||||
|
||||
else:
|
||||
spaceCount += 1
|
||||
|
||||
# If this list item is the first in the list, ensure the
|
||||
# number of spaces preceeding the bullet point does not
|
||||
# exceed three, as that would indicate a code block rather
|
||||
# than a bullet point list.
|
||||
|
||||
if spaceCount > 3 and previousState not in [
|
||||
MS.MarkdownStateNumberedList,
|
||||
MS.MarkdownStateBulletPointList,
|
||||
MS.MarkdownStateListLineBreak,] and \
|
||||
previousState in [
|
||||
MS.MarkdownStateParagraphBreak,
|
||||
MS.MarkdownStateUnknown,
|
||||
MS.MarkdownStateCodeBlock,
|
||||
MS.MarkdownStateCodeFenceEnd,]:
|
||||
return False
|
||||
|
||||
elif text[i] == "\t":
|
||||
if foundBulletChar:
|
||||
# We've confirmed it's a bullet point by the whitespace that
|
||||
# follows the bullet point character, and can now exit the
|
||||
# loop.
|
||||
|
||||
whitespaceFoundAfterBulletChar = True
|
||||
break
|
||||
|
||||
elif previousState in [
|
||||
MS.MarkdownStateParagraphBreak,
|
||||
MS.MarkdownStateUnknown]:
|
||||
|
||||
# If this list item is the first in the list, ensure that
|
||||
# no tab character preceedes the bullet point, as that would
|
||||
# indicate a code block rather than a bullet point list.
|
||||
|
||||
return False
|
||||
|
||||
elif text[i] in ["+", "-", "*"]:
|
||||
foundBulletChar = True
|
||||
bulletCharIndex = i
|
||||
|
||||
else:
|
||||
return False
|
||||
|
||||
if bulletCharIndex >= 0 and whitespaceFoundAfterBulletChar:
|
||||
token = Token()
|
||||
token.type = MTT.TokenBulletPointList
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
token.openingMarkupLength = bulletCharIndex + 2
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateBulletPointList)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeHorizontalRule (self, text):
|
||||
if self.hruleRegex.exactMatch(text):
|
||||
token = Token()
|
||||
token.type = MTT.TokenHorizontalRule
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateHorizontalRule)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeLineBreak(self, text):
|
||||
currentState = self.currentState
|
||||
previousState = self.previousState
|
||||
nextState = self.nextState
|
||||
|
||||
if currentState in [
|
||||
MS.MarkdownStateParagraph,
|
||||
MS.MarkdownStateBlockquote,
|
||||
MS.MarkdownStateNumberedList,
|
||||
MS.MarkdownStateBulletPointList,]:
|
||||
if previousState in [
|
||||
MS.MarkdownStateParagraph,
|
||||
MS.MarkdownStateBlockquote,
|
||||
MS.MarkdownStateNumberedList,
|
||||
MS.MarkdownStateBulletPointList,]:
|
||||
self.requestBacktrack()
|
||||
|
||||
if nextState in [
|
||||
MS.MarkdownStateParagraph,
|
||||
MS.MarkdownStateBlockquote,
|
||||
MS.MarkdownStateNumberedList,
|
||||
MS.MarkdownStateBulletPointList,]:
|
||||
self.requestBacktrack()
|
||||
if self.lineBreakRegex.exactMatch(text):
|
||||
token = Token()
|
||||
token.type = MTT.TokenLineBreak
|
||||
token.position = len(text) - 1
|
||||
token.length = 1
|
||||
self.addToken(token)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeBlockquote(self, text):
|
||||
previousState = self.previousState
|
||||
if previousState == MS.MarkdownStateBlockquote or \
|
||||
self.blockquoteRegex.exactMatch(text):
|
||||
|
||||
# Find any '>' characters at the front of the line.
|
||||
markupLength = 0
|
||||
|
||||
for i in range(len(text)):
|
||||
if text[i] == ">":
|
||||
markupLength = i + 1
|
||||
elif text[i] != " ":
|
||||
# There are no more '>' characters at the front of the line,
|
||||
# so stop processing.
|
||||
break
|
||||
|
||||
token = Token()
|
||||
token.type = MTT.TokenBlockquote
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
|
||||
if markupLength > 0:
|
||||
token.openingMarkupLength = markupLength
|
||||
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateBlockquote)
|
||||
return True
|
||||
return False
|
||||
|
||||
def tokenizeCodeBlock(self, text):
|
||||
previousState = self.previousState
|
||||
if previousState in [
|
||||
MS.MarkdownStateInGithubCodeFence,
|
||||
MS.MarkdownStateInPandocCodeFence]:
|
||||
self.setState(previousState)
|
||||
|
||||
if (previousState == MS.MarkdownStateInGithubCodeFence and \
|
||||
self.githubCodeFenceEndRegex.exactMatch(text)) or \
|
||||
(previousState == MS.MarkdownStateInPandocCodeFence and \
|
||||
self.pandocCodeFenceEndRegex.exactMatch(text)):
|
||||
token = Token()
|
||||
token.type = MTT.TokenCodeFenceEnd
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateCodeFenceEnd)
|
||||
|
||||
else:
|
||||
token = Token()
|
||||
token.type = MTT.TokenCodeBlock
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
self.addToken(token)
|
||||
|
||||
return True
|
||||
|
||||
elif previousState in [
|
||||
MS.MarkdownStateCodeBlock,
|
||||
MS.MarkdownStateParagraphBreak,
|
||||
MS.MarkdownStateUnknown,] and \
|
||||
(text[:1] == "\t" or text[:4] == " "):
|
||||
token = Token()
|
||||
token.type = MTT.TokenCodeBlock
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
token.openingMarkupLength = len(text) - len(text.lstrip())
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateCodeBlock)
|
||||
return True
|
||||
|
||||
elif previousState in [
|
||||
MS.MarkdownStateParagraphBreak,
|
||||
MS.MarkdownStateParagraph,
|
||||
MS.MarkdownStateUnknown,
|
||||
MS.MarkdownStateListLineBreak,]:
|
||||
foundCodeFenceStart = False
|
||||
token = Token()
|
||||
if self.githubCodeFenceStartRegex.exactMatch(text):
|
||||
foundCodeFenceStart = True
|
||||
token.type = MTT.TokenGithubCodeFence
|
||||
self.setState(MS.MarkdownStateInGithubCodeFence)
|
||||
|
||||
elif self.pandocCodeFenceStartRegex.exactMatch(text):
|
||||
foundCodeFenceStart = True
|
||||
token.type = MTT.TokenPandocCodeFence
|
||||
self.setState(MS.MarkdownStateInPandocCodeFence)
|
||||
|
||||
if foundCodeFenceStart:
|
||||
token.position = 0
|
||||
token.length = len(text)
|
||||
self.addToken(token)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeMultilineComment(self, text):
|
||||
previousState = self.previousState
|
||||
|
||||
if previousState == MS.MarkdownStateComment:
|
||||
# Find the end of the comment, if any.
|
||||
index = text.find("-->")
|
||||
token = Token()
|
||||
token.type = MTT.TokenHtmlComment
|
||||
token.position = 0
|
||||
|
||||
if index >= 0:
|
||||
token.length = index + 3
|
||||
self.addToken(token)
|
||||
|
||||
# Return false so that the rest of the line that isn't within
|
||||
# the commented segment can be highlighted as normal paragraph
|
||||
# text.
|
||||
|
||||
else:
|
||||
token.length = len(text)
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateComment)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeInline(self, text):
|
||||
escapedText = self.dummyOutEscapeCharacters(text)
|
||||
|
||||
# Check if the line is a reference definition.
|
||||
if self.referenceDefinitionRegex.exactMatch(text):
|
||||
colonIndex = escapedText.find(":")
|
||||
token = Token()
|
||||
token.type = MTT.TokenReferenceDefinition
|
||||
token.position = 0
|
||||
token.length = colonIndex + 1
|
||||
self.addToken(token)
|
||||
|
||||
# Replace the first bracket so that the '[...]:' reference definition
|
||||
# start doesn't get highlighted as a reference link.
|
||||
|
||||
firstBracketIndex = escapedText.find("[")
|
||||
if firstBracketIndex >= 0:
|
||||
i = firstBracketIndex
|
||||
escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:]
|
||||
|
||||
escapedText = self.tokenizeVerbatim(escapedText)
|
||||
escapedText = self.tokenizeHtmlComments(escapedText)
|
||||
escapedText = self.tokenizeTableHeaderRow(escapedText)
|
||||
escapedText = self.tokenizeTableRow(escapedText)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenImage, escapedText, self.imageRegex, 0, 0, False, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenInlineLink, escapedText, self.inlineLinkRegex, 0, 0, False, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenReferenceLink, escapedText, self.referenceLinkRegex, 0, 0, False, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenHtmlEntity, escapedText, self.htmlEntityRegex)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenAutomaticLink, escapedText, self.automaticLinkRegex, 0, 0, False, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenStrikethrough, escapedText, self.strikethroughRegex, 2, 2, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenStrong, escapedText, self.strongRegex, 2, 2, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenEmphasis, escapedText, self.emphasisRegex, 1, 1, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenSuperScript, escapedText, self.superScriptRegex, 1, 1, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenSubScript, escapedText, self.subScriptRegex, 1, 1, True)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenHtmlTag, escapedText, self.htmlTagRegex)
|
||||
escapedText = self.tokenizeMatches(MTT.TokenMention, escapedText, self.mentionRegex, 0, 0, False, True)
|
||||
|
||||
return True
|
||||
|
||||
def tokenizeVerbatim(self, text):
|
||||
index = self.verbatimRegex.indexIn(text)
|
||||
|
||||
while index >= 0:
|
||||
end = ""
|
||||
count = self.verbatimRegex.matchedLength()
|
||||
|
||||
# Search for the matching end, which should have the same number
|
||||
# of back ticks as the start.
|
||||
for i in range(count):
|
||||
end += '`'
|
||||
|
||||
endIndex = text.find(end, index + count)
|
||||
|
||||
# If the end was found, add the verbatim token.
|
||||
if endIndex >= 0:
|
||||
token = Token()
|
||||
token.type = MTT.TokenVerbatim
|
||||
token.position = index
|
||||
token.length = endIndex + count - index
|
||||
token.openingMarkupLength = count
|
||||
token.closingMarkupLength = count
|
||||
self.addToken(token)
|
||||
|
||||
# Fill out the token match in the string with the dummy
|
||||
# character so that searches for other Markdown elements
|
||||
# don't find anything within this token's range in the string.
|
||||
|
||||
for i in range(index, index + token.length):
|
||||
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
|
||||
|
||||
index += token.length
|
||||
|
||||
# Else start searching again at the very next character.
|
||||
else:
|
||||
index += 1
|
||||
|
||||
index = self.verbatimRegex.indexIn(text, index)
|
||||
return text
|
||||
|
||||
def tokenizeHtmlComments(self, text):
|
||||
previousState = self.previousState
|
||||
|
||||
# Check for the end of a multiline comment so that it doesn't get further
|
||||
# tokenized. Don't bother formatting the comment itself, however, because
|
||||
# it should have already been tokenized in tokenizeMultilineComment().
|
||||
if previousState == MS.MarkdownStateComment:
|
||||
commentEnd = text.find("-->")
|
||||
for i in range(commentEnd + 3):
|
||||
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
|
||||
|
||||
# Now check for inline comments (non-multiline).
|
||||
commentStart = self.htmlInlineCommentRegex.indexIn(text)
|
||||
|
||||
while commentStart >= 0:
|
||||
commentLength = self.htmlInlineCommentRegex.matchedLength()
|
||||
token = Token()
|
||||
token.type = MTT.TokenHtmlComment
|
||||
token.position = commentStart
|
||||
token.length = commentLength
|
||||
self.addToken(token)
|
||||
|
||||
# Replace comment segment with dummy characters so that it doesn't
|
||||
# get tokenized again.
|
||||
|
||||
for i in range(commentStart, commentStart + commentLength):
|
||||
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
|
||||
|
||||
commentStart = self.htmlInlineCommentRegex.indexIn(text, commentStart + commentLength)
|
||||
|
||||
# Find multiline comment start, if any.
|
||||
commentStart = text.find("<!--")
|
||||
if commentStart >= 0:
|
||||
token = Token()
|
||||
token.type = MTT.TokenHtmlComment
|
||||
token.position = commentStart
|
||||
token.length = len(text) - commentStart
|
||||
self.addToken(token)
|
||||
self.setState(MS.MarkdownStateComment)
|
||||
|
||||
# Replace comment segment with dummy characters so that it doesn't
|
||||
# get tokenized again.
|
||||
|
||||
for i in range(commentStart, len(text)):
|
||||
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
|
||||
return text
|
||||
|
||||
def tokenizeTableHeaderRow(self, text):
|
||||
previousState = self.previousState
|
||||
nextState = self.nextState
|
||||
|
||||
if previousState in [
|
||||
MS.MarkdownStateParagraphBreak,
|
||||
MS.MarkdownStateListLineBreak,
|
||||
MS.MarkdownStateSetextHeading1Line2,
|
||||
MS.MarkdownStateSetextHeading2Line2,
|
||||
MS.MarkdownStateAtxHeading1,
|
||||
MS.MarkdownStateAtxHeading2,
|
||||
MS.MarkdownStateAtxHeading3,
|
||||
MS.MarkdownStateAtxHeading4,
|
||||
MS.MarkdownStateAtxHeading5,
|
||||
MS.MarkdownStateAtxHeading6,
|
||||
MS.MarkdownStateHorizontalRule,
|
||||
MS.MarkdownStateCodeFenceEnd,
|
||||
MS.MarkdownStateUnknown,] and \
|
||||
self.getState() in [
|
||||
MS.MarkdownStateParagraph,
|
||||
MS.MarkdownStateUnknown] and \
|
||||
nextState == MS.MarkdownStatePipeTableDivider:
|
||||
self.setState(MS.MarkdownStatePipeTableHeader)
|
||||
|
||||
headerStart = 0
|
||||
for i in range(len(text)):
|
||||
if text[i] == "|":
|
||||
# Replace pipe with space so that it doesn't get formatted
|
||||
# again with, for example, strong or emphasis formatting.
|
||||
# Note that we use a space rather than DUMMY_CHAR for this,
|
||||
# to prevent formatting such as strong and emphasis from
|
||||
# picking it up.
|
||||
text = text[:i] + " " + text[i+1:]
|
||||
|
||||
token = Token()
|
||||
|
||||
if i > 0:
|
||||
token.type = MTT.TokenTableHeader
|
||||
token.position = headerStart
|
||||
token.length = i - headerStart
|
||||
self.addToken(token)
|
||||
|
||||
token.type = MTT.TokenTablePipe
|
||||
token.position = i
|
||||
token.length = 1
|
||||
self.addToken(token)
|
||||
headerStart = i + 1
|
||||
|
||||
if headerStart < len(text):
|
||||
token = Token()
|
||||
token.type = MTT.TokenTableHeader
|
||||
token.position = headerStart
|
||||
token.length = len(text) - headerStart
|
||||
self.addToken(token)
|
||||
|
||||
return text
|
||||
|
||||
def tokenizeTableDivider(self, text):
|
||||
previousState = self.previousState
|
||||
if previousState == MS.MarkdownStatePipeTableHeader:
|
||||
if self.pipeTableDividerRegex.exactMatch(text):
|
||||
self.setState(MS.MarkdownStatePipeTableDivider)
|
||||
token = Token()
|
||||
token.type = MTT.TokenTableDivider
|
||||
token.length = len(text)
|
||||
token.position = 0
|
||||
self.addToken(token)
|
||||
|
||||
return True
|
||||
|
||||
else:
|
||||
# Restart tokenizing on the previous line.
|
||||
self.requestBacktrack()
|
||||
elif previousState == MS.MarkdownStateParagraph:
|
||||
if self.pipeTableDividerRegex.exactMatch(text):
|
||||
# Restart tokenizing on the previous line.
|
||||
self.requestBacktrack()
|
||||
self.setState(MS.MarkdownStatePipeTableDivider)
|
||||
|
||||
token = Token()
|
||||
token.length = len(text)
|
||||
token.position = 0
|
||||
token.type = MTT.TokenTableDivider
|
||||
self.addToken(token)
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def tokenizeTableRow(self, text):
|
||||
previousState = self.previousState
|
||||
|
||||
if previousState in [
|
||||
MS.MarkdownStatePipeTableDivider,
|
||||
MS.MarkdownStatePipeTableRow]:
|
||||
self.setState(MS.MarkdownStatePipeTableRow)
|
||||
|
||||
for i in range(len(text)):
|
||||
if text[i] == "|":
|
||||
# Replace pipe with space so that it doesn't get formatted
|
||||
# again with, for example, strong or emphasis formatting.
|
||||
# Note that we use a space rather than DUMMY_CHAR for this,
|
||||
# to prevent formatting such as strong and emphasis from
|
||||
# picking it up.
|
||||
|
||||
text = text[:i] + " " + text[i+1:]
|
||||
|
||||
token = Token()
|
||||
token.type = MTT.TokenTablePipe
|
||||
token.position = i
|
||||
token.length = 1
|
||||
self.addToken(token)
|
||||
|
||||
return text
|
||||
|
||||
def tokenizeMatches(self, tokenType, text, regex,
|
||||
markupStartCount=0, markupEndCount=0,
|
||||
replaceMarkupChars=False, replaceAllChars=False):
|
||||
"""
|
||||
Tokenizes a block of text, searching for all occurrances of regex.
|
||||
Occurrances are set to the given token type and added to the list of
|
||||
tokens. The markupStartCount and markupEndCount values are used to
|
||||
indicate how many markup special characters preceed and follow the
|
||||
main text, respectively.
|
||||
|
||||
For example, if the matched string is "**bold**", and
|
||||
markupStartCount = 2 and markupEndCount = 2, then the asterisks
|
||||
preceeding and following the word "bold" will be set as opening and
|
||||
closing markup in the token.
|
||||
|
||||
If replaceMarkupChars is true, then the markupStartCount and
|
||||
markupEndCount characters will be replaced with a dummy character in
|
||||
the text QString so that subsequent parsings of the same line do not
|
||||
pick up the original characters.
|
||||
|
||||
If replaceAllChars is true instead, then the entire matched text will
|
||||
be replaced with dummy characters--again, for ease in parsing the
|
||||
same line for other regular expression matches.
|
||||
"""
|
||||
index = regex.indexIn(text)
|
||||
|
||||
while index >= 0:
|
||||
length = regex.matchedLength()
|
||||
token = Token()
|
||||
token.type = tokenType
|
||||
token.position = index
|
||||
token.length = length
|
||||
|
||||
if markupStartCount > 0:
|
||||
token.openingMarkupLength = markupStartCount
|
||||
|
||||
if markupEndCount > 0:
|
||||
token.closingMarkupLength = markupEndCount
|
||||
|
||||
if replaceAllChars:
|
||||
for i in range(index, index + length):
|
||||
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
|
||||
|
||||
elif replaceMarkupChars:
|
||||
for i in range(index, index + markupStartCount):
|
||||
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
|
||||
for i in range(index + length - markupEndCount, index + length):
|
||||
text = text[:i] + self.DUMMY_CHAR + text[i+1:]
|
||||
|
||||
self.addToken(token)
|
||||
index = regex.indexIn(text, index + length)
|
||||
|
||||
return text
|
||||
|
||||
def dummyOutEscapeCharacters(self, text):
|
||||
"""
|
||||
Replaces escaped characters in text so they aren't picked up
|
||||
during parsing. Returns a copy of the input text string
|
||||
with the escaped characters replaced with a dummy character.
|
||||
"""
|
||||
|
||||
return re.sub("\\\\.", "\$", text)
|
||||
|
||||
#escape = False
|
||||
#escapedText = text
|
||||
|
||||
#for i in range(len(text)):
|
||||
#if escape:
|
||||
#escapedText = escapedText[:i] + self.DUMMY_CHAR + escapedText[i+1:]
|
||||
#escape = False
|
||||
#elif text[i] == "\\":
|
||||
#escape = True
|
||||
#return escapedText
|
|
@ -11,8 +11,7 @@ from manuskript.enums import Outline
|
|||
from manuskript import functions as F
|
||||
from manuskript.models.outlineModel import outlineModel
|
||||
from manuskript.ui.editors.MDFunctions import MDFormatSelection
|
||||
from manuskript.ui.editors.MMDHighlighter import MMDHighlighter
|
||||
from manuskript.ui.editors.basicHighlighter import basicHighlighter
|
||||
from manuskript.ui.highlighters import MMDHighlighter, BasicHighlighter
|
||||
from manuskript.ui.editors.textFormat import textFormat
|
||||
from manuskript.ui import style as S
|
||||
|
||||
|
@ -84,7 +83,7 @@ class textEditView(QTextEdit):
|
|||
self.spellcheck = False
|
||||
|
||||
if self._highlighting and not self.highlighter:
|
||||
self.highlighter = basicHighlighter(self)
|
||||
self.highlighter = BasicHighlighter(self)
|
||||
self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat)
|
||||
|
||||
def getDefaultLocale(self):
|
||||
|
@ -191,7 +190,7 @@ class textEditView(QTextEdit):
|
|||
if self._column in [Outline.text.value, Outline.notes.value]:
|
||||
self.highlighter = MMDHighlighter(self)
|
||||
else:
|
||||
self.highlighter = basicHighlighter(self)
|
||||
self.highlighter = BasicHighlighter(self)
|
||||
|
||||
self.highlighter.setDefaultBlockFormat(self._defaultBlockFormat)
|
||||
|
||||
|
|
Loading…
Reference in a new issue