| 1 | """\
|
|---|
| 2 | A library of useful helper classes to the SAX classes, for the
|
|---|
| 3 | convenience of application and driver writers.
|
|---|
| 4 | """
|
|---|
| 5 |
|
|---|
| 6 | import os, urlparse, urllib, types
|
|---|
| 7 | import io
|
|---|
| 8 | import sys
|
|---|
| 9 | import handler
|
|---|
| 10 | import xmlreader
|
|---|
| 11 |
|
|---|
| 12 | try:
|
|---|
| 13 | _StringTypes = [types.StringType, types.UnicodeType]
|
|---|
| 14 | except AttributeError:
|
|---|
| 15 | _StringTypes = [types.StringType]
|
|---|
| 16 |
|
|---|
| 17 | def __dict_replace(s, d):
|
|---|
| 18 | """Replace substrings of a string using a dictionary."""
|
|---|
| 19 | for key, value in d.items():
|
|---|
| 20 | s = s.replace(key, value)
|
|---|
| 21 | return s
|
|---|
| 22 |
|
|---|
| 23 | def escape(data, entities={}):
|
|---|
| 24 | """Escape &, <, and > in a string of data.
|
|---|
| 25 |
|
|---|
| 26 | You can escape other strings of data by passing a dictionary as
|
|---|
| 27 | the optional entities parameter. The keys and values must all be
|
|---|
| 28 | strings; each key will be replaced with its corresponding value.
|
|---|
| 29 | """
|
|---|
| 30 |
|
|---|
| 31 | # must do ampersand first
|
|---|
| 32 | data = data.replace("&", "&")
|
|---|
| 33 | data = data.replace(">", ">")
|
|---|
| 34 | data = data.replace("<", "<")
|
|---|
| 35 | if entities:
|
|---|
| 36 | data = __dict_replace(data, entities)
|
|---|
| 37 | return data
|
|---|
| 38 |
|
|---|
| 39 | def unescape(data, entities={}):
|
|---|
| 40 | """Unescape &, <, and > in a string of data.
|
|---|
| 41 |
|
|---|
| 42 | You can unescape other strings of data by passing a dictionary as
|
|---|
| 43 | the optional entities parameter. The keys and values must all be
|
|---|
| 44 | strings; each key will be replaced with its corresponding value.
|
|---|
| 45 | """
|
|---|
| 46 | data = data.replace("<", "<")
|
|---|
| 47 | data = data.replace(">", ">")
|
|---|
| 48 | if entities:
|
|---|
| 49 | data = __dict_replace(data, entities)
|
|---|
| 50 | # must do ampersand last
|
|---|
| 51 | return data.replace("&", "&")
|
|---|
| 52 |
|
|---|
| 53 | def quoteattr(data, entities={}):
|
|---|
| 54 | """Escape and quote an attribute value.
|
|---|
| 55 |
|
|---|
| 56 | Escape &, <, and > in a string of data, then quote it for use as
|
|---|
| 57 | an attribute value. The \" character will be escaped as well, if
|
|---|
| 58 | necessary.
|
|---|
| 59 |
|
|---|
| 60 | You can escape other strings of data by passing a dictionary as
|
|---|
| 61 | the optional entities parameter. The keys and values must all be
|
|---|
| 62 | strings; each key will be replaced with its corresponding value.
|
|---|
| 63 | """
|
|---|
| 64 | entities = entities.copy()
|
|---|
| 65 | entities.update({'\n': ' ', '\r': ' ', '\t':'	'})
|
|---|
| 66 | data = escape(data, entities)
|
|---|
| 67 | if '"' in data:
|
|---|
| 68 | if "'" in data:
|
|---|
| 69 | data = '"%s"' % data.replace('"', """)
|
|---|
| 70 | else:
|
|---|
| 71 | data = "'%s'" % data
|
|---|
| 72 | else:
|
|---|
| 73 | data = '"%s"' % data
|
|---|
| 74 | return data
|
|---|
| 75 |
|
|---|
| 76 |
|
|---|
| 77 | def _gettextwriter(out, encoding):
|
|---|
| 78 | if out is None:
|
|---|
| 79 | import sys
|
|---|
| 80 | out = sys.stdout
|
|---|
| 81 |
|
|---|
| 82 | if isinstance(out, io.RawIOBase):
|
|---|
| 83 | buffer = io.BufferedIOBase(out)
|
|---|
| 84 | # Keep the original file open when the TextIOWrapper is
|
|---|
| 85 | # destroyed
|
|---|
| 86 | buffer.close = lambda: None
|
|---|
| 87 | else:
|
|---|
| 88 | # This is to handle passed objects that aren't in the
|
|---|
| 89 | # IOBase hierarchy, but just have a write method
|
|---|
| 90 | buffer = io.BufferedIOBase()
|
|---|
| 91 | buffer.writable = lambda: True
|
|---|
| 92 | buffer.write = out.write
|
|---|
| 93 | try:
|
|---|
| 94 | # TextIOWrapper uses this methods to determine
|
|---|
| 95 | # if BOM (for UTF-16, etc) should be added
|
|---|
| 96 | buffer.seekable = out.seekable
|
|---|
| 97 | buffer.tell = out.tell
|
|---|
| 98 | except AttributeError:
|
|---|
| 99 | pass
|
|---|
| 100 | # wrap a binary writer with TextIOWrapper
|
|---|
| 101 | class UnbufferedTextIOWrapper(io.TextIOWrapper):
|
|---|
| 102 | def write(self, s):
|
|---|
| 103 | super(UnbufferedTextIOWrapper, self).write(s)
|
|---|
| 104 | self.flush()
|
|---|
| 105 | return UnbufferedTextIOWrapper(buffer, encoding=encoding,
|
|---|
| 106 | errors='xmlcharrefreplace',
|
|---|
| 107 | newline='\n')
|
|---|
| 108 |
|
|---|
| 109 | class XMLGenerator(handler.ContentHandler):
|
|---|
| 110 |
|
|---|
| 111 | def __init__(self, out=None, encoding="iso-8859-1"):
|
|---|
| 112 | handler.ContentHandler.__init__(self)
|
|---|
| 113 | out = _gettextwriter(out, encoding)
|
|---|
| 114 | self._write = out.write
|
|---|
| 115 | self._flush = out.flush
|
|---|
| 116 | self._ns_contexts = [{}] # contains uri -> prefix dicts
|
|---|
| 117 | self._current_context = self._ns_contexts[-1]
|
|---|
| 118 | self._undeclared_ns_maps = []
|
|---|
| 119 | self._encoding = encoding
|
|---|
| 120 |
|
|---|
| 121 | def _qname(self, name):
|
|---|
| 122 | """Builds a qualified name from a (ns_url, localname) pair"""
|
|---|
| 123 | if name[0]:
|
|---|
| 124 | # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
|
|---|
| 125 | # bound by definition to http://www.w3.org/XML/1998/namespace. It
|
|---|
| 126 | # does not need to be declared and will not usually be found in
|
|---|
| 127 | # self._current_context.
|
|---|
| 128 | if 'http://www.w3.org/XML/1998/namespace' == name[0]:
|
|---|
| 129 | return 'xml:' + name[1]
|
|---|
| 130 | # The name is in a non-empty namespace
|
|---|
| 131 | prefix = self._current_context[name[0]]
|
|---|
| 132 | if prefix:
|
|---|
| 133 | # If it is not the default namespace, prepend the prefix
|
|---|
| 134 | return prefix + ":" + name[1]
|
|---|
| 135 | # Return the unqualified name
|
|---|
| 136 | return name[1]
|
|---|
| 137 |
|
|---|
| 138 | # ContentHandler methods
|
|---|
| 139 |
|
|---|
| 140 | def startDocument(self):
|
|---|
| 141 | self._write(u'<?xml version="1.0" encoding="%s"?>\n' %
|
|---|
| 142 | self._encoding)
|
|---|
| 143 |
|
|---|
| 144 | def endDocument(self):
|
|---|
| 145 | self._flush()
|
|---|
| 146 |
|
|---|
| 147 | def startPrefixMapping(self, prefix, uri):
|
|---|
| 148 | self._ns_contexts.append(self._current_context.copy())
|
|---|
| 149 | self._current_context[uri] = prefix
|
|---|
| 150 | self._undeclared_ns_maps.append((prefix, uri))
|
|---|
| 151 |
|
|---|
| 152 | def endPrefixMapping(self, prefix):
|
|---|
| 153 | self._current_context = self._ns_contexts[-1]
|
|---|
| 154 | del self._ns_contexts[-1]
|
|---|
| 155 |
|
|---|
| 156 | def startElement(self, name, attrs):
|
|---|
| 157 | self._write(u'<' + name)
|
|---|
| 158 | for (name, value) in attrs.items():
|
|---|
| 159 | self._write(u' %s=%s' % (name, quoteattr(value)))
|
|---|
| 160 | self._write(u'>')
|
|---|
| 161 |
|
|---|
| 162 | def endElement(self, name):
|
|---|
| 163 | self._write(u'</%s>' % name)
|
|---|
| 164 |
|
|---|
| 165 | def startElementNS(self, name, qname, attrs):
|
|---|
| 166 | self._write(u'<' + self._qname(name))
|
|---|
| 167 |
|
|---|
| 168 | for prefix, uri in self._undeclared_ns_maps:
|
|---|
| 169 | if prefix:
|
|---|
| 170 | self._write(u' xmlns:%s="%s"' % (prefix, uri))
|
|---|
| 171 | else:
|
|---|
| 172 | self._write(u' xmlns="%s"' % uri)
|
|---|
| 173 | self._undeclared_ns_maps = []
|
|---|
| 174 |
|
|---|
| 175 | for (name, value) in attrs.items():
|
|---|
| 176 | self._write(u' %s=%s' % (self._qname(name), quoteattr(value)))
|
|---|
| 177 | self._write(u'>')
|
|---|
| 178 |
|
|---|
| 179 | def endElementNS(self, name, qname):
|
|---|
| 180 | self._write(u'</%s>' % self._qname(name))
|
|---|
| 181 |
|
|---|
| 182 | def characters(self, content):
|
|---|
| 183 | if not isinstance(content, unicode):
|
|---|
| 184 | content = unicode(content, self._encoding)
|
|---|
| 185 | self._write(escape(content))
|
|---|
| 186 |
|
|---|
| 187 | def ignorableWhitespace(self, content):
|
|---|
| 188 | if not isinstance(content, unicode):
|
|---|
| 189 | content = unicode(content, self._encoding)
|
|---|
| 190 | self._write(content)
|
|---|
| 191 |
|
|---|
| 192 | def processingInstruction(self, target, data):
|
|---|
| 193 | self._write(u'<?%s %s?>' % (target, data))
|
|---|
| 194 |
|
|---|
| 195 |
|
|---|
| 196 | class XMLFilterBase(xmlreader.XMLReader):
|
|---|
| 197 | """This class is designed to sit between an XMLReader and the
|
|---|
| 198 | client application's event handlers. By default, it does nothing
|
|---|
| 199 | but pass requests up to the reader and events on to the handlers
|
|---|
| 200 | unmodified, but subclasses can override specific methods to modify
|
|---|
| 201 | the event stream or the configuration requests as they pass
|
|---|
| 202 | through."""
|
|---|
| 203 |
|
|---|
| 204 | def __init__(self, parent = None):
|
|---|
| 205 | xmlreader.XMLReader.__init__(self)
|
|---|
| 206 | self._parent = parent
|
|---|
| 207 |
|
|---|
| 208 | # ErrorHandler methods
|
|---|
| 209 |
|
|---|
| 210 | def error(self, exception):
|
|---|
| 211 | self._err_handler.error(exception)
|
|---|
| 212 |
|
|---|
| 213 | def fatalError(self, exception):
|
|---|
| 214 | self._err_handler.fatalError(exception)
|
|---|
| 215 |
|
|---|
| 216 | def warning(self, exception):
|
|---|
| 217 | self._err_handler.warning(exception)
|
|---|
| 218 |
|
|---|
| 219 | # ContentHandler methods
|
|---|
| 220 |
|
|---|
| 221 | def setDocumentLocator(self, locator):
|
|---|
| 222 | self._cont_handler.setDocumentLocator(locator)
|
|---|
| 223 |
|
|---|
| 224 | def startDocument(self):
|
|---|
| 225 | self._cont_handler.startDocument()
|
|---|
| 226 |
|
|---|
| 227 | def endDocument(self):
|
|---|
| 228 | self._cont_handler.endDocument()
|
|---|
| 229 |
|
|---|
| 230 | def startPrefixMapping(self, prefix, uri):
|
|---|
| 231 | self._cont_handler.startPrefixMapping(prefix, uri)
|
|---|
| 232 |
|
|---|
| 233 | def endPrefixMapping(self, prefix):
|
|---|
| 234 | self._cont_handler.endPrefixMapping(prefix)
|
|---|
| 235 |
|
|---|
| 236 | def startElement(self, name, attrs):
|
|---|
| 237 | self._cont_handler.startElement(name, attrs)
|
|---|
| 238 |
|
|---|
| 239 | def endElement(self, name):
|
|---|
| 240 | self._cont_handler.endElement(name)
|
|---|
| 241 |
|
|---|
| 242 | def startElementNS(self, name, qname, attrs):
|
|---|
| 243 | self._cont_handler.startElementNS(name, qname, attrs)
|
|---|
| 244 |
|
|---|
| 245 | def endElementNS(self, name, qname):
|
|---|
| 246 | self._cont_handler.endElementNS(name, qname)
|
|---|
| 247 |
|
|---|
| 248 | def characters(self, content):
|
|---|
| 249 | self._cont_handler.characters(content)
|
|---|
| 250 |
|
|---|
| 251 | def ignorableWhitespace(self, chars):
|
|---|
| 252 | self._cont_handler.ignorableWhitespace(chars)
|
|---|
| 253 |
|
|---|
| 254 | def processingInstruction(self, target, data):
|
|---|
| 255 | self._cont_handler.processingInstruction(target, data)
|
|---|
| 256 |
|
|---|
| 257 | def skippedEntity(self, name):
|
|---|
| 258 | self._cont_handler.skippedEntity(name)
|
|---|
| 259 |
|
|---|
| 260 | # DTDHandler methods
|
|---|
| 261 |
|
|---|
| 262 | def notationDecl(self, name, publicId, systemId):
|
|---|
| 263 | self._dtd_handler.notationDecl(name, publicId, systemId)
|
|---|
| 264 |
|
|---|
| 265 | def unparsedEntityDecl(self, name, publicId, systemId, ndata):
|
|---|
| 266 | self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
|
|---|
| 267 |
|
|---|
| 268 | # EntityResolver methods
|
|---|
| 269 |
|
|---|
| 270 | def resolveEntity(self, publicId, systemId):
|
|---|
| 271 | return self._ent_handler.resolveEntity(publicId, systemId)
|
|---|
| 272 |
|
|---|
| 273 | # XMLReader methods
|
|---|
| 274 |
|
|---|
| 275 | def parse(self, source):
|
|---|
| 276 | self._parent.setContentHandler(self)
|
|---|
| 277 | self._parent.setErrorHandler(self)
|
|---|
| 278 | self._parent.setEntityResolver(self)
|
|---|
| 279 | self._parent.setDTDHandler(self)
|
|---|
| 280 | self._parent.parse(source)
|
|---|
| 281 |
|
|---|
| 282 | def setLocale(self, locale):
|
|---|
| 283 | self._parent.setLocale(locale)
|
|---|
| 284 |
|
|---|
| 285 | def getFeature(self, name):
|
|---|
| 286 | return self._parent.getFeature(name)
|
|---|
| 287 |
|
|---|
| 288 | def setFeature(self, name, state):
|
|---|
| 289 | self._parent.setFeature(name, state)
|
|---|
| 290 |
|
|---|
| 291 | def getProperty(self, name):
|
|---|
| 292 | return self._parent.getProperty(name)
|
|---|
| 293 |
|
|---|
| 294 | def setProperty(self, name, value):
|
|---|
| 295 | self._parent.setProperty(name, value)
|
|---|
| 296 |
|
|---|
| 297 | # XMLFilter methods
|
|---|
| 298 |
|
|---|
| 299 | def getParent(self):
|
|---|
| 300 | return self._parent
|
|---|
| 301 |
|
|---|
| 302 | def setParent(self, parent):
|
|---|
| 303 | self._parent = parent
|
|---|
| 304 |
|
|---|
| 305 | # --- Utility functions
|
|---|
| 306 |
|
|---|
| 307 | def prepare_input_source(source, base = ""):
|
|---|
| 308 | """This function takes an InputSource and an optional base URL and
|
|---|
| 309 | returns a fully resolved InputSource object ready for reading."""
|
|---|
| 310 |
|
|---|
| 311 | if type(source) in _StringTypes:
|
|---|
| 312 | source = xmlreader.InputSource(source)
|
|---|
| 313 | elif hasattr(source, "read"):
|
|---|
| 314 | f = source
|
|---|
| 315 | source = xmlreader.InputSource()
|
|---|
| 316 | source.setByteStream(f)
|
|---|
| 317 | if hasattr(f, "name"):
|
|---|
| 318 | source.setSystemId(f.name)
|
|---|
| 319 |
|
|---|
| 320 | if source.getByteStream() is None:
|
|---|
| 321 | try:
|
|---|
| 322 | sysid = source.getSystemId()
|
|---|
| 323 | basehead = os.path.dirname(os.path.normpath(base))
|
|---|
| 324 | encoding = sys.getfilesystemencoding()
|
|---|
| 325 | if isinstance(sysid, unicode):
|
|---|
| 326 | if not isinstance(basehead, unicode):
|
|---|
| 327 | try:
|
|---|
| 328 | basehead = basehead.decode(encoding)
|
|---|
| 329 | except UnicodeDecodeError:
|
|---|
| 330 | sysid = sysid.encode(encoding)
|
|---|
| 331 | else:
|
|---|
| 332 | if isinstance(basehead, unicode):
|
|---|
| 333 | try:
|
|---|
| 334 | sysid = sysid.decode(encoding)
|
|---|
| 335 | except UnicodeDecodeError:
|
|---|
| 336 | basehead = basehead.encode(encoding)
|
|---|
| 337 | sysidfilename = os.path.join(basehead, sysid)
|
|---|
| 338 | isfile = os.path.isfile(sysidfilename)
|
|---|
| 339 | except UnicodeError:
|
|---|
| 340 | isfile = False
|
|---|
| 341 | if isfile:
|
|---|
| 342 | source.setSystemId(sysidfilename)
|
|---|
| 343 | f = open(sysidfilename, "rb")
|
|---|
| 344 | else:
|
|---|
| 345 | source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
|
|---|
| 346 | f = urllib.urlopen(source.getSystemId())
|
|---|
| 347 |
|
|---|
| 348 | source.setByteStream(f)
|
|---|
| 349 |
|
|---|
| 350 | return source
|
|---|