Fix crash if invalid character is inserted into the text.

If an invalid character is inserted into the text, such as a "^L" (ASCII 0x0C)
when copy-pasting from a google document that has a page break in it, a crash
will happen as the character cannot be inserted into XML. This patch removes
those invalid characters from the text so the revisions.xml can be saved.

Fixes #562
This commit is contained in:
Youness Alaoui 2019-05-09 19:46:21 -04:00 committed by Curtis Gedak
parent f1baab8b3a
commit a8ec6512c9
2 changed files with 9 additions and 2 deletions

View file

@ -9,6 +9,7 @@ from PyQt5.QtCore import Qt
from PyQt5.QtGui import QIcon, QFont from PyQt5.QtGui import QIcon, QFont
from PyQt5.QtWidgets import QTextEdit, qApp from PyQt5.QtWidgets import QTextEdit, qApp
from lxml import etree as ET from lxml import etree as ET
import re
from manuskript import enums from manuskript import enums
@ -21,6 +22,9 @@ class abstractItem():
# Used for XML export # Used for XML export
name = "abstractItem" name = "abstractItem"
# Regexp from https://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python
valid_xml_re = re.compile(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+')
def __init__(self, model=None, title="", _type="abstract", xml=None, parent=None, ID=None): def __init__(self, model=None, title="", _type="abstract", xml=None, parent=None, ID=None):
self._data = {} self._data = {}
@ -258,6 +262,9 @@ class abstractItem():
# We want to force some data even if they're empty # We want to force some data even if they're empty
XMLForce = [] XMLForce = []
def cleanTextForXML(self, text):
return self.valid_xml_re.sub('', text)
def toXML(self): def toXML(self):
""" """
Returns a string containing the item (and children) in XML. Returns a string containing the item (and children) in XML.
@ -272,7 +279,7 @@ class abstractItem():
continue continue
val = self.data(attrib) val = self.data(attrib)
if val or attrib in self.XMLForce: if val or attrib in self.XMLForce:
item.set(attrib.name, str(val)) item.set(attrib.name, self.cleanTextForXML(str(val)))
# Saving lastPath # Saving lastPath
item.set("lastPath", self._lastPath) item.set("lastPath", self._lastPath)

View file

@ -480,7 +480,7 @@ class outlineItem(abstractItem):
for r in rev: for r in rev:
revItem = ET.Element("revision") revItem = ET.Element("revision")
revItem.set("timestamp", str(r[0])) revItem.set("timestamp", str(r[0]))
revItem.set("text", r[1]) revItem.set("text", self.cleanTextForXML(r[1]))
item.append(revItem) item.append(revItem)
return item return item