source: python/trunk/Lib/xml/sax/saxutils.py

Last change on this file was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 11.3 KB
Line 
1"""\
2A library of useful helper classes to the SAX classes, for the
3convenience of application and driver writers.
4"""
5
6import os, urlparse, urllib, types
7import io
8import sys
9import handler
10import xmlreader
11
12try:
13 _StringTypes = [types.StringType, types.UnicodeType]
14except AttributeError:
15 _StringTypes = [types.StringType]
16
17def __dict_replace(s, d):
18 """Replace substrings of a string using a dictionary."""
19 for key, value in d.items():
20 s = s.replace(key, value)
21 return s
22
23def escape(data, entities={}):
24 """Escape &, <, and > in a string of data.
25
26 You can escape other strings of data by passing a dictionary as
27 the optional entities parameter. The keys and values must all be
28 strings; each key will be replaced with its corresponding value.
29 """
30
31 # must do ampersand first
32 data = data.replace("&", "&amp;")
33 data = data.replace(">", "&gt;")
34 data = data.replace("<", "&lt;")
35 if entities:
36 data = __dict_replace(data, entities)
37 return data
38
39def unescape(data, entities={}):
40 """Unescape &amp;, &lt;, and &gt; in a string of data.
41
42 You can unescape other strings of data by passing a dictionary as
43 the optional entities parameter. The keys and values must all be
44 strings; each key will be replaced with its corresponding value.
45 """
46 data = data.replace("&lt;", "<")
47 data = data.replace("&gt;", ">")
48 if entities:
49 data = __dict_replace(data, entities)
50 # must do ampersand last
51 return data.replace("&amp;", "&")
52
53def quoteattr(data, entities={}):
54 """Escape and quote an attribute value.
55
56 Escape &, <, and > in a string of data, then quote it for use as
57 an attribute value. The \" character will be escaped as well, if
58 necessary.
59
60 You can escape other strings of data by passing a dictionary as
61 the optional entities parameter. The keys and values must all be
62 strings; each key will be replaced with its corresponding value.
63 """
64 entities = entities.copy()
65 entities.update({'\n': '&#10;', '\r': '&#13;', '\t':'&#9;'})
66 data = escape(data, entities)
67 if '"' in data:
68 if "'" in data:
69 data = '"%s"' % data.replace('"', "&quot;")
70 else:
71 data = "'%s'" % data
72 else:
73 data = '"%s"' % data
74 return data
75
76
77def _gettextwriter(out, encoding):
78 if out is None:
79 import sys
80 out = sys.stdout
81
82 if isinstance(out, io.RawIOBase):
83 buffer = io.BufferedIOBase(out)
84 # Keep the original file open when the TextIOWrapper is
85 # destroyed
86 buffer.close = lambda: None
87 else:
88 # This is to handle passed objects that aren't in the
89 # IOBase hierarchy, but just have a write method
90 buffer = io.BufferedIOBase()
91 buffer.writable = lambda: True
92 buffer.write = out.write
93 try:
94 # TextIOWrapper uses this methods to determine
95 # if BOM (for UTF-16, etc) should be added
96 buffer.seekable = out.seekable
97 buffer.tell = out.tell
98 except AttributeError:
99 pass
100 # wrap a binary writer with TextIOWrapper
101 class UnbufferedTextIOWrapper(io.TextIOWrapper):
102 def write(self, s):
103 super(UnbufferedTextIOWrapper, self).write(s)
104 self.flush()
105 return UnbufferedTextIOWrapper(buffer, encoding=encoding,
106 errors='xmlcharrefreplace',
107 newline='\n')
108
109class XMLGenerator(handler.ContentHandler):
110
111 def __init__(self, out=None, encoding="iso-8859-1"):
112 handler.ContentHandler.__init__(self)
113 out = _gettextwriter(out, encoding)
114 self._write = out.write
115 self._flush = out.flush
116 self._ns_contexts = [{}] # contains uri -> prefix dicts
117 self._current_context = self._ns_contexts[-1]
118 self._undeclared_ns_maps = []
119 self._encoding = encoding
120
121 def _qname(self, name):
122 """Builds a qualified name from a (ns_url, localname) pair"""
123 if name[0]:
124 # Per http://www.w3.org/XML/1998/namespace, The 'xml' prefix is
125 # bound by definition to http://www.w3.org/XML/1998/namespace. It
126 # does not need to be declared and will not usually be found in
127 # self._current_context.
128 if 'http://www.w3.org/XML/1998/namespace' == name[0]:
129 return 'xml:' + name[1]
130 # The name is in a non-empty namespace
131 prefix = self._current_context[name[0]]
132 if prefix:
133 # If it is not the default namespace, prepend the prefix
134 return prefix + ":" + name[1]
135 # Return the unqualified name
136 return name[1]
137
138 # ContentHandler methods
139
140 def startDocument(self):
141 self._write(u'<?xml version="1.0" encoding="%s"?>\n' %
142 self._encoding)
143
144 def endDocument(self):
145 self._flush()
146
147 def startPrefixMapping(self, prefix, uri):
148 self._ns_contexts.append(self._current_context.copy())
149 self._current_context[uri] = prefix
150 self._undeclared_ns_maps.append((prefix, uri))
151
152 def endPrefixMapping(self, prefix):
153 self._current_context = self._ns_contexts[-1]
154 del self._ns_contexts[-1]
155
156 def startElement(self, name, attrs):
157 self._write(u'<' + name)
158 for (name, value) in attrs.items():
159 self._write(u' %s=%s' % (name, quoteattr(value)))
160 self._write(u'>')
161
162 def endElement(self, name):
163 self._write(u'</%s>' % name)
164
165 def startElementNS(self, name, qname, attrs):
166 self._write(u'<' + self._qname(name))
167
168 for prefix, uri in self._undeclared_ns_maps:
169 if prefix:
170 self._write(u' xmlns:%s="%s"' % (prefix, uri))
171 else:
172 self._write(u' xmlns="%s"' % uri)
173 self._undeclared_ns_maps = []
174
175 for (name, value) in attrs.items():
176 self._write(u' %s=%s' % (self._qname(name), quoteattr(value)))
177 self._write(u'>')
178
179 def endElementNS(self, name, qname):
180 self._write(u'</%s>' % self._qname(name))
181
182 def characters(self, content):
183 if not isinstance(content, unicode):
184 content = unicode(content, self._encoding)
185 self._write(escape(content))
186
187 def ignorableWhitespace(self, content):
188 if not isinstance(content, unicode):
189 content = unicode(content, self._encoding)
190 self._write(content)
191
192 def processingInstruction(self, target, data):
193 self._write(u'<?%s %s?>' % (target, data))
194
195
196class XMLFilterBase(xmlreader.XMLReader):
197 """This class is designed to sit between an XMLReader and the
198 client application's event handlers. By default, it does nothing
199 but pass requests up to the reader and events on to the handlers
200 unmodified, but subclasses can override specific methods to modify
201 the event stream or the configuration requests as they pass
202 through."""
203
204 def __init__(self, parent = None):
205 xmlreader.XMLReader.__init__(self)
206 self._parent = parent
207
208 # ErrorHandler methods
209
210 def error(self, exception):
211 self._err_handler.error(exception)
212
213 def fatalError(self, exception):
214 self._err_handler.fatalError(exception)
215
216 def warning(self, exception):
217 self._err_handler.warning(exception)
218
219 # ContentHandler methods
220
221 def setDocumentLocator(self, locator):
222 self._cont_handler.setDocumentLocator(locator)
223
224 def startDocument(self):
225 self._cont_handler.startDocument()
226
227 def endDocument(self):
228 self._cont_handler.endDocument()
229
230 def startPrefixMapping(self, prefix, uri):
231 self._cont_handler.startPrefixMapping(prefix, uri)
232
233 def endPrefixMapping(self, prefix):
234 self._cont_handler.endPrefixMapping(prefix)
235
236 def startElement(self, name, attrs):
237 self._cont_handler.startElement(name, attrs)
238
239 def endElement(self, name):
240 self._cont_handler.endElement(name)
241
242 def startElementNS(self, name, qname, attrs):
243 self._cont_handler.startElementNS(name, qname, attrs)
244
245 def endElementNS(self, name, qname):
246 self._cont_handler.endElementNS(name, qname)
247
248 def characters(self, content):
249 self._cont_handler.characters(content)
250
251 def ignorableWhitespace(self, chars):
252 self._cont_handler.ignorableWhitespace(chars)
253
254 def processingInstruction(self, target, data):
255 self._cont_handler.processingInstruction(target, data)
256
257 def skippedEntity(self, name):
258 self._cont_handler.skippedEntity(name)
259
260 # DTDHandler methods
261
262 def notationDecl(self, name, publicId, systemId):
263 self._dtd_handler.notationDecl(name, publicId, systemId)
264
265 def unparsedEntityDecl(self, name, publicId, systemId, ndata):
266 self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
267
268 # EntityResolver methods
269
270 def resolveEntity(self, publicId, systemId):
271 return self._ent_handler.resolveEntity(publicId, systemId)
272
273 # XMLReader methods
274
275 def parse(self, source):
276 self._parent.setContentHandler(self)
277 self._parent.setErrorHandler(self)
278 self._parent.setEntityResolver(self)
279 self._parent.setDTDHandler(self)
280 self._parent.parse(source)
281
282 def setLocale(self, locale):
283 self._parent.setLocale(locale)
284
285 def getFeature(self, name):
286 return self._parent.getFeature(name)
287
288 def setFeature(self, name, state):
289 self._parent.setFeature(name, state)
290
291 def getProperty(self, name):
292 return self._parent.getProperty(name)
293
294 def setProperty(self, name, value):
295 self._parent.setProperty(name, value)
296
297 # XMLFilter methods
298
299 def getParent(self):
300 return self._parent
301
302 def setParent(self, parent):
303 self._parent = parent
304
305# --- Utility functions
306
307def prepare_input_source(source, base = ""):
308 """This function takes an InputSource and an optional base URL and
309 returns a fully resolved InputSource object ready for reading."""
310
311 if type(source) in _StringTypes:
312 source = xmlreader.InputSource(source)
313 elif hasattr(source, "read"):
314 f = source
315 source = xmlreader.InputSource()
316 source.setByteStream(f)
317 if hasattr(f, "name"):
318 source.setSystemId(f.name)
319
320 if source.getByteStream() is None:
321 try:
322 sysid = source.getSystemId()
323 basehead = os.path.dirname(os.path.normpath(base))
324 encoding = sys.getfilesystemencoding()
325 if isinstance(sysid, unicode):
326 if not isinstance(basehead, unicode):
327 try:
328 basehead = basehead.decode(encoding)
329 except UnicodeDecodeError:
330 sysid = sysid.encode(encoding)
331 else:
332 if isinstance(basehead, unicode):
333 try:
334 sysid = sysid.decode(encoding)
335 except UnicodeDecodeError:
336 basehead = basehead.encode(encoding)
337 sysidfilename = os.path.join(basehead, sysid)
338 isfile = os.path.isfile(sysidfilename)
339 except UnicodeError:
340 isfile = False
341 if isfile:
342 source.setSystemId(sysidfilename)
343 f = open(sysidfilename, "rb")
344 else:
345 source.setSystemId(urlparse.urljoin(base, source.getSystemId()))
346 f = urllib.urlopen(source.getSystemId())
347
348 source.setByteStream(f)
349
350 return source
Note: See TracBrowser for help on using the repository browser.