manuskript/libs/odf/load.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2007-2008 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#
# Contributor(s):
#

# This script is to be embedded in opendocument.py later
# The purpose is to read an ODT/ODP/ODS file and create the datastructure
# in memory. The user should then be able to make operations and then save
# the structure again.

from xml.sax import make_parser,handler
from xml.sax.xmlreader import InputSource
import xml.sax.saxutils
from element import Element
from namespaces import OFFICENS
try:
    from cStringIO import StringIO
except ImportError:
    from io import StringIO

#
# Parse the XML files
#
class LoadParser(handler.ContentHandler):
    """ Extract headings from content.xml of an ODT file """
    triggers = (
       (OFFICENS, 'automatic-styles'), (OFFICENS, 'body'),
       (OFFICENS, 'font-face-decls'), (OFFICENS, 'master-styles'),
       (OFFICENS, 'meta'), (OFFICENS, 'scripts'),
       (OFFICENS, 'settings'), (OFFICENS, 'styles') )

    def __init__(self, document):
        self.doc = document
        self.data = []
        self.level = 0
        self.parse = False

    def characters(self, data):
        if self.parse == False:
            return
        self.data.append(data)

    def startElementNS(self, tag, qname, attrs):
        if tag in self.triggers:
            self.parse = True
        if self.doc._parsing != "styles.xml" and tag == (OFFICENS, 'font-face-decls'):
            self.parse = False
        if self.parse == False:
            return

        self.level = self.level + 1
        # Add any accumulated text content
        content = ''.join(self.data)
        if len(content.strip()) > 0:
            self.parent.addText(content, check_grammar=False)
            self.data = []
        # Create the element
        attrdict = {}
        for (att,value) in attrs.items():
            attrdict[att] = value
        try:
            e = Element(qname = tag, qattributes=attrdict, check_grammar=False)
            self.curr = e
        except AttributeError as v:
            print ("Error: %s" % v)

        if tag == (OFFICENS, 'automatic-styles'):
            e = self.doc.automaticstyles
        elif tag == (OFFICENS, 'body'):
            e = self.doc.body
        elif tag == (OFFICENS, 'master-styles'):
            e = self.doc.masterstyles
        elif tag == (OFFICENS, 'meta'):
            e = self.doc.meta
        elif tag == (OFFICENS,'scripts'):
            e = self.doc.scripts
        elif tag == (OFFICENS,'settings'):
            e = self.doc.settings
        elif tag == (OFFICENS,'styles'):
            e = self.doc.styles
        elif self.doc._parsing == "styles.xml" and tag == (OFFICENS, 'font-face-decls'):
            e = self.doc.fontfacedecls
        elif hasattr(self,'parent'):
            self.parent.addElement(e, check_grammar=False)
        self.parent = e


    def endElementNS(self, tag, qname):
        if self.parse == False:
            return
        self.level = self.level - 1
        str = ''.join(self.data)
        if len(str.strip()) > 0:
            self.curr.addText(str, check_grammar=False)
        self.data = []
        self.curr = self.curr.parentNode
        self.parent = self.curr
        if tag in self.triggers:
            self.parse = False