diff --git a/manuskript/models/abstractItem.py b/manuskript/models/abstractItem.py index b405140a..7ce87057 100644 --- a/manuskript/models/abstractItem.py +++ b/manuskript/models/abstractItem.py @@ -9,6 +9,7 @@ from PyQt5.QtCore import Qt from PyQt5.QtGui import QIcon, QFont from PyQt5.QtWidgets import QTextEdit, qApp from lxml import etree as ET +import re from manuskript import enums @@ -21,6 +22,9 @@ class abstractItem(): # Used for XML export name = "abstractItem" + # Regexp from https://stackoverflow.com/questions/8733233/filtering-out-certain-bytes-in-python + valid_xml_re = re.compile(u'[^\u0020-\uD7FF\u0009\u000A\u000D\uE000-\uFFFD\U00010000-\U0010FFFF]+') + def __init__(self, model=None, title="", _type="abstract", xml=None, parent=None, ID=None): self._data = {} @@ -258,6 +262,9 @@ class abstractItem(): # We want to force some data even if they're empty XMLForce = [] + def cleanTextForXML(self, text): + return self.valid_xml_re.sub('', text) + def toXML(self): """ Returns a string containing the item (and children) in XML. @@ -272,7 +279,7 @@ class abstractItem(): continue val = self.data(attrib) if val or attrib in self.XMLForce: - item.set(attrib.name, str(val)) + item.set(attrib.name, self.cleanTextForXML(str(val))) # Saving lastPath item.set("lastPath", self._lastPath) diff --git a/manuskript/models/outlineItem.py b/manuskript/models/outlineItem.py index 3d597a36..56b64649 100644 --- a/manuskript/models/outlineItem.py +++ b/manuskript/models/outlineItem.py @@ -480,7 +480,7 @@ class outlineItem(abstractItem): for r in rev: revItem = ET.Element("revision") revItem.set("timestamp", str(r[0])) - revItem.set("text", r[1]) + revItem.set("text", self.cleanTextForXML(r[1])) item.append(revItem) return item