From a8ec6512c91d9dbce51ca30b5252a94fcf4b8d0f Mon Sep 17 00:00:00 2001 From: Youness Alaoui Date: Thu, 9 May 2019 19:46:21 -0400 Subject: [PATCH] Fix crash if invalid character is inserted into the text. If an invalid character is inserted into the text, such as a "^L" (ASCII 0x0C) when copy-pasting from a google document that has a page break in it, a crash will happen as the character cannot be inserted into XML. This patch removes those invalid characters from the text so the revisions.xml can be saved. Fixes #562 --- manuskript/models/abstractItem.py | 9 ++++++++- manuskript/models/outlineItem.py | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/manuskript/models/abstractItem.py b/manuskript/models/abstractItem.py index b405140a..7ce87057 100644 --- a/manuskript/models/abstractItem.py +++ b/manuskript/models/abstractItem.py @@ -9,6 +9,7 @@ from PyQt5.QtCore import Qt from PyQt5.QtGui import QIcon, QFont from PyQt5.QtWidgets import QTextEdit, qApp from lxml import etree as ET +import re from manuskript import enums @@ -21,6 +22,9 @@ class abstractItem(): # Used for XML export name = "abstractItem" + # Regexp from https://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python + valid_xml_re = re.compile(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+') + def __init__(self, model=None, title="", _type="abstract", xml=None, parent=None, ID=None): self._data = {} @@ -258,6 +262,9 @@ class abstractItem(): # We want to force some data even if they're empty XMLForce = [] + def cleanTextForXML(self, text): + return self.valid_xml_re.sub('', text) + def toXML(self): """ Returns a string containing the item (and children) in XML. @@ -272,7 +279,7 @@ class abstractItem(): continue val = self.data(attrib) if val or attrib in self.XMLForce: - item.set(attrib.name, str(val)) + item.set(attrib.name, self.cleanTextForXML(str(val))) # Saving lastPath item.set("lastPath", self._lastPath) diff --git a/manuskript/models/outlineItem.py b/manuskript/models/outlineItem.py index 3d597a36..56b64649 100644 --- a/manuskript/models/outlineItem.py +++ b/manuskript/models/outlineItem.py @@ -480,7 +480,7 @@ class outlineItem(abstractItem): for r in rev: revItem = ET.Element("revision") revItem.set("timestamp", str(r[0])) - revItem.set("text", r[1]) + revItem.set("text", self.cleanTextForXML(r[1])) item.append(revItem) return item