manuskript/libs/odf/load.py
2015-07-10 12:29:25 +02:00

116 lines
3.9 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2007-2008 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# Contributor(s):
#
# This script is to be embedded in opendocument.py later
# The purpose is to read an ODT/ODP/ODS file and create the datastructure
# in memory. The user should then be able to make operations and then save
# the structure again.
from xml.sax import make_parser,handler
from xml.sax.xmlreader import InputSource
import xml.sax.saxutils
from element import Element
from namespaces import OFFICENS
try:
from cStringIO import StringIO
except ImportError:
from io import StringIO
#
# Parse the XML files
#
class LoadParser(handler.ContentHandler):
""" Extract headings from content.xml of an ODT file """
triggers = (
(OFFICENS, 'automatic-styles'), (OFFICENS, 'body'),
(OFFICENS, 'font-face-decls'), (OFFICENS, 'master-styles'),
(OFFICENS, 'meta'), (OFFICENS, 'scripts'),
(OFFICENS, 'settings'), (OFFICENS, 'styles') )
def __init__(self, document):
self.doc = document
self.data = []
self.level = 0
self.parse = False
def characters(self, data):
if self.parse == False:
return
self.data.append(data)
def startElementNS(self, tag, qname, attrs):
if tag in self.triggers:
self.parse = True
if self.doc._parsing != "styles.xml" and tag == (OFFICENS, 'font-face-decls'):
self.parse = False
if self.parse == False:
return
self.level = self.level + 1
# Add any accumulated text content
content = ''.join(self.data)
if len(content.strip()) > 0:
self.parent.addText(content, check_grammar=False)
self.data = []
# Create the element
attrdict = {}
for (att,value) in attrs.items():
attrdict[att] = value
try:
e = Element(qname = tag, qattributes=attrdict, check_grammar=False)
self.curr = e
except AttributeError as v:
print ("Error: %s" % v)
if tag == (OFFICENS, 'automatic-styles'):
e = self.doc.automaticstyles
elif tag == (OFFICENS, 'body'):
e = self.doc.body
elif tag == (OFFICENS, 'master-styles'):
e = self.doc.masterstyles
elif tag == (OFFICENS, 'meta'):
e = self.doc.meta
elif tag == (OFFICENS,'scripts'):
e = self.doc.scripts
elif tag == (OFFICENS,'settings'):
e = self.doc.settings
elif tag == (OFFICENS,'styles'):
e = self.doc.styles
elif self.doc._parsing == "styles.xml" and tag == (OFFICENS, 'font-face-decls'):
e = self.doc.fontfacedecls
elif hasattr(self,'parent'):
self.parent.addElement(e, check_grammar=False)
self.parent = e
def endElementNS(self, tag, qname):
if self.parse == False:
return
self.level = self.level - 1
str = ''.join(self.data)
if len(str.strip()) > 0:
self.curr.addText(str, check_grammar=False)
self.data = []
self.curr = self.curr.parentNode
self.parent = self.curr
if tag in self.triggers:
self.parse = False