Fix crash if invalid character is inserted into the text.

If an invalid character is inserted into the text, such as a "^L" (ASCII 0x0C)
when copy-pasting from a google document that has a page break in it, a crash
will happen as the character cannot be inserted into XML. This patch removes
those invalid characters from the text so the revisions.xml can be saved.

Fixes #562
This commit is contained in:
Youness Alaoui 2019-05-09 19:46:21 -04:00 committed by Curtis Gedak
parent f1baab8b3a
commit a8ec6512c9
2 changed files with 9 additions and 2 deletions

View file

@ -9,6 +9,7 @@ from PyQt5.QtCore import Qt
from PyQt5.QtGui import QIcon, QFont
from PyQt5.QtWidgets import QTextEdit, qApp
from lxml import etree as ET
import re
from manuskript import enums
@ -21,6 +22,9 @@ class abstractItem():
# Used for XML export
name = "abstractItem"
# Regexp from https://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python
valid_xml_re = re.compile(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+')
def __init__(self, model=None, title="", _type="abstract", xml=None, parent=None, ID=None):
self._data = {}
@ -258,6 +262,9 @@ class abstractItem():
# We want to force some data even if they're empty
XMLForce = []
def cleanTextForXML(self, text):
return self.valid_xml_re.sub('', text)
def toXML(self):
"""
Returns a string containing the item (and children) in XML.
@ -272,7 +279,7 @@ class abstractItem():
continue
val = self.data(attrib)
if val or attrib in self.XMLForce:
item.set(attrib.name, str(val))
item.set(attrib.name, self.cleanTextForXML(str(val)))
# Saving lastPath
item.set("lastPath", self._lastPath)

View file

@ -480,7 +480,7 @@ class outlineItem(abstractItem):
for r in rev:
revItem = ET.Element("revision")
revItem.set("timestamp", str(r[0]))
revItem.set("text", r[1])
revItem.set("text", self.cleanTextForXML(r[1]))
item.append(revItem)
return item