1 | #
|
---|
2 | # ElementTree
|
---|
3 | # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
|
---|
4 | #
|
---|
5 | # light-weight XML support for Python 2.3 and later.
|
---|
6 | #
|
---|
7 | # history (since 1.2.6):
|
---|
8 | # 2005-11-12 fl added tostringlist/fromstringlist helpers
|
---|
9 | # 2006-07-05 fl merged in selected changes from the 1.3 sandbox
|
---|
10 | # 2006-07-05 fl removed support for 2.1 and earlier
|
---|
11 | # 2007-06-21 fl added deprecation/future warnings
|
---|
12 | # 2007-08-25 fl added doctype hook, added parser version attribute etc
|
---|
13 | # 2007-08-26 fl added new serializer code (better namespace handling, etc)
|
---|
14 | # 2007-08-27 fl warn for broken /tag searches on tree level
|
---|
15 | # 2007-09-02 fl added html/text methods to serializer (experimental)
|
---|
16 | # 2007-09-05 fl added method argument to tostring/tostringlist
|
---|
17 | # 2007-09-06 fl improved error handling
|
---|
18 | # 2007-09-13 fl added itertext, iterfind; assorted cleanups
|
---|
19 | # 2007-12-15 fl added C14N hooks, copy method (experimental)
|
---|
20 | #
|
---|
21 | # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
|
---|
22 | #
|
---|
23 | # fredrik@pythonware.com
|
---|
24 | # http://www.pythonware.com
|
---|
25 | #
|
---|
26 | # --------------------------------------------------------------------
|
---|
27 | # The ElementTree toolkit is
|
---|
28 | #
|
---|
29 | # Copyright (c) 1999-2008 by Fredrik Lundh
|
---|
30 | #
|
---|
31 | # By obtaining, using, and/or copying this software and/or its
|
---|
32 | # associated documentation, you agree that you have read, understood,
|
---|
33 | # and will comply with the following terms and conditions:
|
---|
34 | #
|
---|
35 | # Permission to use, copy, modify, and distribute this software and
|
---|
36 | # its associated documentation for any purpose and without fee is
|
---|
37 | # hereby granted, provided that the above copyright notice appears in
|
---|
38 | # all copies, and that both that copyright notice and this permission
|
---|
39 | # notice appear in supporting documentation, and that the name of
|
---|
40 | # Secret Labs AB or the author not be used in advertising or publicity
|
---|
41 | # pertaining to distribution of the software without specific, written
|
---|
42 | # prior permission.
|
---|
43 | #
|
---|
44 | # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
---|
45 | # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
---|
46 | # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
---|
47 | # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
---|
48 | # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
---|
49 | # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
---|
50 | # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
---|
51 | # OF THIS SOFTWARE.
|
---|
52 | # --------------------------------------------------------------------
|
---|
53 |
|
---|
54 | # Licensed to PSF under a Contributor Agreement.
|
---|
55 | # See http://www.python.org/psf/license for licensing details.
|
---|
56 |
|
---|
57 | __all__ = [
|
---|
58 | # public symbols
|
---|
59 | "Comment",
|
---|
60 | "dump",
|
---|
61 | "Element", "ElementTree",
|
---|
62 | "fromstring", "fromstringlist",
|
---|
63 | "iselement", "iterparse",
|
---|
64 | "parse", "ParseError",
|
---|
65 | "PI", "ProcessingInstruction",
|
---|
66 | "QName",
|
---|
67 | "SubElement",
|
---|
68 | "tostring", "tostringlist",
|
---|
69 | "TreeBuilder",
|
---|
70 | "VERSION",
|
---|
71 | "XML",
|
---|
72 | "XMLParser", "XMLTreeBuilder",
|
---|
73 | ]
|
---|
74 |
|
---|
75 | VERSION = "1.3.0"
|
---|
76 |
|
---|
77 | ##
|
---|
78 | # The <b>Element</b> type is a flexible container object, designed to
|
---|
79 | # store hierarchical data structures in memory. The type can be
|
---|
80 | # described as a cross between a list and a dictionary.
|
---|
81 | # <p>
|
---|
82 | # Each element has a number of properties associated with it:
|
---|
83 | # <ul>
|
---|
84 | # <li>a <i>tag</i>. This is a string identifying what kind of data
|
---|
85 | # this element represents (the element type, in other words).</li>
|
---|
86 | # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
|
---|
87 | # <li>a <i>text</i> string.</li>
|
---|
88 | # <li>an optional <i>tail</i> string.</li>
|
---|
89 | # <li>a number of <i>child elements</i>, stored in a Python sequence</li>
|
---|
90 | # </ul>
|
---|
91 | #
|
---|
92 | # To create an element instance, use the {@link #Element} constructor
|
---|
93 | # or the {@link #SubElement} factory function.
|
---|
94 | # <p>
|
---|
95 | # The {@link #ElementTree} class can be used to wrap an element
|
---|
96 | # structure, and convert it from and to XML.
|
---|
97 | ##
|
---|
98 |
|
---|
99 | import sys
|
---|
100 | import re
|
---|
101 | import warnings
|
---|
102 |
|
---|
103 |
|
---|
104 | class _SimpleElementPath(object):
|
---|
105 | # emulate pre-1.2 find/findtext/findall behaviour
|
---|
106 | def find(self, element, tag, namespaces=None):
|
---|
107 | for elem in element:
|
---|
108 | if elem.tag == tag:
|
---|
109 | return elem
|
---|
110 | return None
|
---|
111 | def findtext(self, element, tag, default=None, namespaces=None):
|
---|
112 | elem = self.find(element, tag)
|
---|
113 | if elem is None:
|
---|
114 | return default
|
---|
115 | return elem.text or ""
|
---|
116 | def iterfind(self, element, tag, namespaces=None):
|
---|
117 | if tag[:3] == ".//":
|
---|
118 | for elem in element.iter(tag[3:]):
|
---|
119 | yield elem
|
---|
120 | for elem in element:
|
---|
121 | if elem.tag == tag:
|
---|
122 | yield elem
|
---|
123 | def findall(self, element, tag, namespaces=None):
|
---|
124 | return list(self.iterfind(element, tag, namespaces))
|
---|
125 |
|
---|
126 | try:
|
---|
127 | from . import ElementPath
|
---|
128 | except ImportError:
|
---|
129 | ElementPath = _SimpleElementPath()
|
---|
130 |
|
---|
131 | ##
|
---|
132 | # Parser error. This is a subclass of <b>SyntaxError</b>.
|
---|
133 | # <p>
|
---|
134 | # In addition to the exception value, an exception instance contains a
|
---|
135 | # specific exception code in the <b>code</b> attribute, and the line and
|
---|
136 | # column of the error in the <b>position</b> attribute.
|
---|
137 |
|
---|
138 | class ParseError(SyntaxError):
|
---|
139 | pass
|
---|
140 |
|
---|
141 | # --------------------------------------------------------------------
|
---|
142 |
|
---|
143 | ##
|
---|
144 | # Checks if an object appears to be a valid element object.
|
---|
145 | #
|
---|
146 | # @param An element instance.
|
---|
147 | # @return A true value if this is an element object.
|
---|
148 | # @defreturn flag
|
---|
149 |
|
---|
150 | def iselement(element):
|
---|
151 | # FIXME: not sure about this; might be a better idea to look
|
---|
152 | # for tag/attrib/text attributes
|
---|
153 | return isinstance(element, Element) or hasattr(element, "tag")
|
---|
154 |
|
---|
155 | ##
|
---|
156 | # Element class. This class defines the Element interface, and
|
---|
157 | # provides a reference implementation of this interface.
|
---|
158 | # <p>
|
---|
159 | # The element name, attribute names, and attribute values can be
|
---|
160 | # either ASCII strings (ordinary Python strings containing only 7-bit
|
---|
161 | # ASCII characters) or Unicode strings.
|
---|
162 | #
|
---|
163 | # @param tag The element name.
|
---|
164 | # @param attrib An optional dictionary, containing element attributes.
|
---|
165 | # @param **extra Additional attributes, given as keyword arguments.
|
---|
166 | # @see Element
|
---|
167 | # @see SubElement
|
---|
168 | # @see Comment
|
---|
169 | # @see ProcessingInstruction
|
---|
170 |
|
---|
171 | class Element(object):
|
---|
172 | # <tag attrib>text<child/>...</tag>tail
|
---|
173 |
|
---|
174 | ##
|
---|
175 | # (Attribute) Element tag.
|
---|
176 |
|
---|
177 | tag = None
|
---|
178 |
|
---|
179 | ##
|
---|
180 | # (Attribute) Element attribute dictionary. Where possible, use
|
---|
181 | # {@link #Element.get},
|
---|
182 | # {@link #Element.set},
|
---|
183 | # {@link #Element.keys}, and
|
---|
184 | # {@link #Element.items} to access
|
---|
185 | # element attributes.
|
---|
186 |
|
---|
187 | attrib = None
|
---|
188 |
|
---|
189 | ##
|
---|
190 | # (Attribute) Text before first subelement. This is either a
|
---|
191 | # string or the value None. Note that if there was no text, this
|
---|
192 | # attribute may be either None or an empty string, depending on
|
---|
193 | # the parser.
|
---|
194 |
|
---|
195 | text = None
|
---|
196 |
|
---|
197 | ##
|
---|
198 | # (Attribute) Text after this element's end tag, but before the
|
---|
199 | # next sibling element's start tag. This is either a string or
|
---|
200 | # the value None. Note that if there was no text, this attribute
|
---|
201 | # may be either None or an empty string, depending on the parser.
|
---|
202 |
|
---|
203 | tail = None # text after end tag, if any
|
---|
204 |
|
---|
205 | # constructor
|
---|
206 |
|
---|
207 | def __init__(self, tag, attrib={}, **extra):
|
---|
208 | attrib = attrib.copy()
|
---|
209 | attrib.update(extra)
|
---|
210 | self.tag = tag
|
---|
211 | self.attrib = attrib
|
---|
212 | self._children = []
|
---|
213 |
|
---|
214 | def __repr__(self):
|
---|
215 | return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
|
---|
216 |
|
---|
217 | ##
|
---|
218 | # Creates a new element object of the same type as this element.
|
---|
219 | #
|
---|
220 | # @param tag Element tag.
|
---|
221 | # @param attrib Element attributes, given as a dictionary.
|
---|
222 | # @return A new element instance.
|
---|
223 |
|
---|
224 | def makeelement(self, tag, attrib):
|
---|
225 | return self.__class__(tag, attrib)
|
---|
226 |
|
---|
227 | ##
|
---|
228 | # (Experimental) Copies the current element. This creates a
|
---|
229 | # shallow copy; subelements will be shared with the original tree.
|
---|
230 | #
|
---|
231 | # @return A new element instance.
|
---|
232 |
|
---|
233 | def copy(self):
|
---|
234 | elem = self.makeelement(self.tag, self.attrib)
|
---|
235 | elem.text = self.text
|
---|
236 | elem.tail = self.tail
|
---|
237 | elem[:] = self
|
---|
238 | return elem
|
---|
239 |
|
---|
240 | ##
|
---|
241 | # Returns the number of subelements. Note that this only counts
|
---|
242 | # full elements; to check if there's any content in an element, you
|
---|
243 | # have to check both the length and the <b>text</b> attribute.
|
---|
244 | #
|
---|
245 | # @return The number of subelements.
|
---|
246 |
|
---|
247 | def __len__(self):
|
---|
248 | return len(self._children)
|
---|
249 |
|
---|
250 | def __nonzero__(self):
|
---|
251 | warnings.warn(
|
---|
252 | "The behavior of this method will change in future versions. "
|
---|
253 | "Use specific 'len(elem)' or 'elem is not None' test instead.",
|
---|
254 | FutureWarning, stacklevel=2
|
---|
255 | )
|
---|
256 | return len(self._children) != 0 # emulate old behaviour, for now
|
---|
257 |
|
---|
258 | ##
|
---|
259 | # Returns the given subelement, by index.
|
---|
260 | #
|
---|
261 | # @param index What subelement to return.
|
---|
262 | # @return The given subelement.
|
---|
263 | # @exception IndexError If the given element does not exist.
|
---|
264 |
|
---|
265 | def __getitem__(self, index):
|
---|
266 | return self._children[index]
|
---|
267 |
|
---|
268 | ##
|
---|
269 | # Replaces the given subelement, by index.
|
---|
270 | #
|
---|
271 | # @param index What subelement to replace.
|
---|
272 | # @param element The new element value.
|
---|
273 | # @exception IndexError If the given element does not exist.
|
---|
274 |
|
---|
275 | def __setitem__(self, index, element):
|
---|
276 | # if isinstance(index, slice):
|
---|
277 | # for elt in element:
|
---|
278 | # assert iselement(elt)
|
---|
279 | # else:
|
---|
280 | # assert iselement(element)
|
---|
281 | self._children[index] = element
|
---|
282 |
|
---|
283 | ##
|
---|
284 | # Deletes the given subelement, by index.
|
---|
285 | #
|
---|
286 | # @param index What subelement to delete.
|
---|
287 | # @exception IndexError If the given element does not exist.
|
---|
288 |
|
---|
289 | def __delitem__(self, index):
|
---|
290 | del self._children[index]
|
---|
291 |
|
---|
292 | ##
|
---|
293 | # Adds a subelement to the end of this element. In document order,
|
---|
294 | # the new element will appear after the last existing subelement (or
|
---|
295 | # directly after the text, if it's the first subelement), but before
|
---|
296 | # the end tag for this element.
|
---|
297 | #
|
---|
298 | # @param element The element to add.
|
---|
299 |
|
---|
300 | def append(self, element):
|
---|
301 | # assert iselement(element)
|
---|
302 | self._children.append(element)
|
---|
303 |
|
---|
304 | ##
|
---|
305 | # Appends subelements from a sequence.
|
---|
306 | #
|
---|
307 | # @param elements A sequence object with zero or more elements.
|
---|
308 | # @since 1.3
|
---|
309 |
|
---|
310 | def extend(self, elements):
|
---|
311 | # for element in elements:
|
---|
312 | # assert iselement(element)
|
---|
313 | self._children.extend(elements)
|
---|
314 |
|
---|
315 | ##
|
---|
316 | # Inserts a subelement at the given position in this element.
|
---|
317 | #
|
---|
318 | # @param index Where to insert the new subelement.
|
---|
319 |
|
---|
320 | def insert(self, index, element):
|
---|
321 | # assert iselement(element)
|
---|
322 | self._children.insert(index, element)
|
---|
323 |
|
---|
324 | ##
|
---|
325 | # Removes a matching subelement. Unlike the <b>find</b> methods,
|
---|
326 | # this method compares elements based on identity, not on tag
|
---|
327 | # value or contents. To remove subelements by other means, the
|
---|
328 | # easiest way is often to use a list comprehension to select what
|
---|
329 | # elements to keep, and use slice assignment to update the parent
|
---|
330 | # element.
|
---|
331 | #
|
---|
332 | # @param element What element to remove.
|
---|
333 | # @exception ValueError If a matching element could not be found.
|
---|
334 |
|
---|
335 | def remove(self, element):
|
---|
336 | # assert iselement(element)
|
---|
337 | self._children.remove(element)
|
---|
338 |
|
---|
339 | ##
|
---|
340 | # (Deprecated) Returns all subelements. The elements are returned
|
---|
341 | # in document order.
|
---|
342 | #
|
---|
343 | # @return A list of subelements.
|
---|
344 | # @defreturn list of Element instances
|
---|
345 |
|
---|
346 | def getchildren(self):
|
---|
347 | warnings.warn(
|
---|
348 | "This method will be removed in future versions. "
|
---|
349 | "Use 'list(elem)' or iteration over elem instead.",
|
---|
350 | DeprecationWarning, stacklevel=2
|
---|
351 | )
|
---|
352 | return self._children
|
---|
353 |
|
---|
354 | ##
|
---|
355 | # Finds the first matching subelement, by tag name or path.
|
---|
356 | #
|
---|
357 | # @param path What element to look for.
|
---|
358 | # @keyparam namespaces Optional namespace prefix map.
|
---|
359 | # @return The first matching element, or None if no element was found.
|
---|
360 | # @defreturn Element or None
|
---|
361 |
|
---|
362 | def find(self, path, namespaces=None):
|
---|
363 | return ElementPath.find(self, path, namespaces)
|
---|
364 |
|
---|
365 | ##
|
---|
366 | # Finds text for the first matching subelement, by tag name or path.
|
---|
367 | #
|
---|
368 | # @param path What element to look for.
|
---|
369 | # @param default What to return if the element was not found.
|
---|
370 | # @keyparam namespaces Optional namespace prefix map.
|
---|
371 | # @return The text content of the first matching element, or the
|
---|
372 | # default value no element was found. Note that if the element
|
---|
373 | # is found, but has no text content, this method returns an
|
---|
374 | # empty string.
|
---|
375 | # @defreturn string
|
---|
376 |
|
---|
377 | def findtext(self, path, default=None, namespaces=None):
|
---|
378 | return ElementPath.findtext(self, path, default, namespaces)
|
---|
379 |
|
---|
380 | ##
|
---|
381 | # Finds all matching subelements, by tag name or path.
|
---|
382 | #
|
---|
383 | # @param path What element to look for.
|
---|
384 | # @keyparam namespaces Optional namespace prefix map.
|
---|
385 | # @return A list or other sequence containing all matching elements,
|
---|
386 | # in document order.
|
---|
387 | # @defreturn list of Element instances
|
---|
388 |
|
---|
389 | def findall(self, path, namespaces=None):
|
---|
390 | return ElementPath.findall(self, path, namespaces)
|
---|
391 |
|
---|
392 | ##
|
---|
393 | # Finds all matching subelements, by tag name or path.
|
---|
394 | #
|
---|
395 | # @param path What element to look for.
|
---|
396 | # @keyparam namespaces Optional namespace prefix map.
|
---|
397 | # @return An iterator or sequence containing all matching elements,
|
---|
398 | # in document order.
|
---|
399 | # @defreturn a generated sequence of Element instances
|
---|
400 |
|
---|
401 | def iterfind(self, path, namespaces=None):
|
---|
402 | return ElementPath.iterfind(self, path, namespaces)
|
---|
403 |
|
---|
404 | ##
|
---|
405 | # Resets an element. This function removes all subelements, clears
|
---|
406 | # all attributes, and sets the <b>text</b> and <b>tail</b> attributes
|
---|
407 | # to None.
|
---|
408 |
|
---|
409 | def clear(self):
|
---|
410 | self.attrib.clear()
|
---|
411 | self._children = []
|
---|
412 | self.text = self.tail = None
|
---|
413 |
|
---|
414 | ##
|
---|
415 | # Gets an element attribute. Equivalent to <b>attrib.get</b>, but
|
---|
416 | # some implementations may handle this a bit more efficiently.
|
---|
417 | #
|
---|
418 | # @param key What attribute to look for.
|
---|
419 | # @param default What to return if the attribute was not found.
|
---|
420 | # @return The attribute value, or the default value, if the
|
---|
421 | # attribute was not found.
|
---|
422 | # @defreturn string or None
|
---|
423 |
|
---|
424 | def get(self, key, default=None):
|
---|
425 | return self.attrib.get(key, default)
|
---|
426 |
|
---|
427 | ##
|
---|
428 | # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
|
---|
429 | # but some implementations may handle this a bit more efficiently.
|
---|
430 | #
|
---|
431 | # @param key What attribute to set.
|
---|
432 | # @param value The attribute value.
|
---|
433 |
|
---|
434 | def set(self, key, value):
|
---|
435 | self.attrib[key] = value
|
---|
436 |
|
---|
437 | ##
|
---|
438 | # Gets a list of attribute names. The names are returned in an
|
---|
439 | # arbitrary order (just like for an ordinary Python dictionary).
|
---|
440 | # Equivalent to <b>attrib.keys()</b>.
|
---|
441 | #
|
---|
442 | # @return A list of element attribute names.
|
---|
443 | # @defreturn list of strings
|
---|
444 |
|
---|
445 | def keys(self):
|
---|
446 | return self.attrib.keys()
|
---|
447 |
|
---|
448 | ##
|
---|
449 | # Gets element attributes, as a sequence. The attributes are
|
---|
450 | # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
|
---|
451 | #
|
---|
452 | # @return A list of (name, value) tuples for all attributes.
|
---|
453 | # @defreturn list of (string, string) tuples
|
---|
454 |
|
---|
455 | def items(self):
|
---|
456 | return self.attrib.items()
|
---|
457 |
|
---|
458 | ##
|
---|
459 | # Creates a tree iterator. The iterator loops over this element
|
---|
460 | # and all subelements, in document order, and returns all elements
|
---|
461 | # with a matching tag.
|
---|
462 | # <p>
|
---|
463 | # If the tree structure is modified during iteration, new or removed
|
---|
464 | # elements may or may not be included. To get a stable set, use the
|
---|
465 | # list() function on the iterator, and loop over the resulting list.
|
---|
466 | #
|
---|
467 | # @param tag What tags to look for (default is to return all elements).
|
---|
468 | # @return An iterator containing all the matching elements.
|
---|
469 | # @defreturn iterator
|
---|
470 |
|
---|
471 | def iter(self, tag=None):
|
---|
472 | if tag == "*":
|
---|
473 | tag = None
|
---|
474 | if tag is None or self.tag == tag:
|
---|
475 | yield self
|
---|
476 | for e in self._children:
|
---|
477 | for e in e.iter(tag):
|
---|
478 | yield e
|
---|
479 |
|
---|
480 | # compatibility
|
---|
481 | def getiterator(self, tag=None):
|
---|
482 | # Change for a DeprecationWarning in 1.4
|
---|
483 | warnings.warn(
|
---|
484 | "This method will be removed in future versions. "
|
---|
485 | "Use 'elem.iter()' or 'list(elem.iter())' instead.",
|
---|
486 | PendingDeprecationWarning, stacklevel=2
|
---|
487 | )
|
---|
488 | return list(self.iter(tag))
|
---|
489 |
|
---|
490 | ##
|
---|
491 | # Creates a text iterator. The iterator loops over this element
|
---|
492 | # and all subelements, in document order, and returns all inner
|
---|
493 | # text.
|
---|
494 | #
|
---|
495 | # @return An iterator containing all inner text.
|
---|
496 | # @defreturn iterator
|
---|
497 |
|
---|
498 | def itertext(self):
|
---|
499 | tag = self.tag
|
---|
500 | if not isinstance(tag, basestring) and tag is not None:
|
---|
501 | return
|
---|
502 | if self.text:
|
---|
503 | yield self.text
|
---|
504 | for e in self:
|
---|
505 | for s in e.itertext():
|
---|
506 | yield s
|
---|
507 | if e.tail:
|
---|
508 | yield e.tail
|
---|
509 |
|
---|
510 | # compatibility
|
---|
511 | _Element = _ElementInterface = Element
|
---|
512 |
|
---|
513 | ##
|
---|
514 | # Subelement factory. This function creates an element instance, and
|
---|
515 | # appends it to an existing element.
|
---|
516 | # <p>
|
---|
517 | # The element name, attribute names, and attribute values can be
|
---|
518 | # either 8-bit ASCII strings or Unicode strings.
|
---|
519 | #
|
---|
520 | # @param parent The parent element.
|
---|
521 | # @param tag The subelement name.
|
---|
522 | # @param attrib An optional dictionary, containing element attributes.
|
---|
523 | # @param **extra Additional attributes, given as keyword arguments.
|
---|
524 | # @return An element instance.
|
---|
525 | # @defreturn Element
|
---|
526 |
|
---|
527 | def SubElement(parent, tag, attrib={}, **extra):
|
---|
528 | attrib = attrib.copy()
|
---|
529 | attrib.update(extra)
|
---|
530 | element = parent.makeelement(tag, attrib)
|
---|
531 | parent.append(element)
|
---|
532 | return element
|
---|
533 |
|
---|
534 | ##
|
---|
535 | # Comment element factory. This factory function creates a special
|
---|
536 | # element that will be serialized as an XML comment by the standard
|
---|
537 | # serializer.
|
---|
538 | # <p>
|
---|
539 | # The comment string can be either an 8-bit ASCII string or a Unicode
|
---|
540 | # string.
|
---|
541 | #
|
---|
542 | # @param text A string containing the comment string.
|
---|
543 | # @return An element instance, representing a comment.
|
---|
544 | # @defreturn Element
|
---|
545 |
|
---|
546 | def Comment(text=None):
|
---|
547 | element = Element(Comment)
|
---|
548 | element.text = text
|
---|
549 | return element
|
---|
550 |
|
---|
551 | ##
|
---|
552 | # PI element factory. This factory function creates a special element
|
---|
553 | # that will be serialized as an XML processing instruction by the standard
|
---|
554 | # serializer.
|
---|
555 | #
|
---|
556 | # @param target A string containing the PI target.
|
---|
557 | # @param text A string containing the PI contents, if any.
|
---|
558 | # @return An element instance, representing a PI.
|
---|
559 | # @defreturn Element
|
---|
560 |
|
---|
561 | def ProcessingInstruction(target, text=None):
|
---|
562 | element = Element(ProcessingInstruction)
|
---|
563 | element.text = target
|
---|
564 | if text:
|
---|
565 | element.text = element.text + " " + text
|
---|
566 | return element
|
---|
567 |
|
---|
568 | PI = ProcessingInstruction
|
---|
569 |
|
---|
570 | ##
|
---|
571 | # QName wrapper. This can be used to wrap a QName attribute value, in
|
---|
572 | # order to get proper namespace handling on output.
|
---|
573 | #
|
---|
574 | # @param text A string containing the QName value, in the form {uri}local,
|
---|
575 | # or, if the tag argument is given, the URI part of a QName.
|
---|
576 | # @param tag Optional tag. If given, the first argument is interpreted as
|
---|
577 | # an URI, and this argument is interpreted as a local name.
|
---|
578 | # @return An opaque object, representing the QName.
|
---|
579 |
|
---|
580 | class QName(object):
|
---|
581 | def __init__(self, text_or_uri, tag=None):
|
---|
582 | if tag:
|
---|
583 | text_or_uri = "{%s}%s" % (text_or_uri, tag)
|
---|
584 | self.text = text_or_uri
|
---|
585 | def __str__(self):
|
---|
586 | return self.text
|
---|
587 | def __hash__(self):
|
---|
588 | return hash(self.text)
|
---|
589 | def __cmp__(self, other):
|
---|
590 | if isinstance(other, QName):
|
---|
591 | return cmp(self.text, other.text)
|
---|
592 | return cmp(self.text, other)
|
---|
593 |
|
---|
594 | # --------------------------------------------------------------------
|
---|
595 |
|
---|
596 | ##
|
---|
597 | # ElementTree wrapper class. This class represents an entire element
|
---|
598 | # hierarchy, and adds some extra support for serialization to and from
|
---|
599 | # standard XML.
|
---|
600 | #
|
---|
601 | # @param element Optional root element.
|
---|
602 | # @keyparam file Optional file handle or file name. If given, the
|
---|
603 | # tree is initialized with the contents of this XML file.
|
---|
604 |
|
---|
605 | class ElementTree(object):
|
---|
606 |
|
---|
607 | def __init__(self, element=None, file=None):
|
---|
608 | # assert element is None or iselement(element)
|
---|
609 | self._root = element # first node
|
---|
610 | if file:
|
---|
611 | self.parse(file)
|
---|
612 |
|
---|
613 | ##
|
---|
614 | # Gets the root element for this tree.
|
---|
615 | #
|
---|
616 | # @return An element instance.
|
---|
617 | # @defreturn Element
|
---|
618 |
|
---|
619 | def getroot(self):
|
---|
620 | return self._root
|
---|
621 |
|
---|
622 | ##
|
---|
623 | # Replaces the root element for this tree. This discards the
|
---|
624 | # current contents of the tree, and replaces it with the given
|
---|
625 | # element. Use with care.
|
---|
626 | #
|
---|
627 | # @param element An element instance.
|
---|
628 |
|
---|
629 | def _setroot(self, element):
|
---|
630 | # assert iselement(element)
|
---|
631 | self._root = element
|
---|
632 |
|
---|
633 | ##
|
---|
634 | # Loads an external XML document into this element tree.
|
---|
635 | #
|
---|
636 | # @param source A file name or file object. If a file object is
|
---|
637 | # given, it only has to implement a <b>read(n)</b> method.
|
---|
638 | # @keyparam parser An optional parser instance. If not given, the
|
---|
639 | # standard {@link XMLParser} parser is used.
|
---|
640 | # @return The document root element.
|
---|
641 | # @defreturn Element
|
---|
642 | # @exception ParseError If the parser fails to parse the document.
|
---|
643 |
|
---|
644 | def parse(self, source, parser=None):
|
---|
645 | close_source = False
|
---|
646 | if not hasattr(source, "read"):
|
---|
647 | source = open(source, "rb")
|
---|
648 | close_source = True
|
---|
649 | try:
|
---|
650 | if not parser:
|
---|
651 | parser = XMLParser(target=TreeBuilder())
|
---|
652 | while 1:
|
---|
653 | data = source.read(65536)
|
---|
654 | if not data:
|
---|
655 | break
|
---|
656 | parser.feed(data)
|
---|
657 | self._root = parser.close()
|
---|
658 | return self._root
|
---|
659 | finally:
|
---|
660 | if close_source:
|
---|
661 | source.close()
|
---|
662 |
|
---|
663 | ##
|
---|
664 | # Creates a tree iterator for the root element. The iterator loops
|
---|
665 | # over all elements in this tree, in document order.
|
---|
666 | #
|
---|
667 | # @param tag What tags to look for (default is to return all elements)
|
---|
668 | # @return An iterator.
|
---|
669 | # @defreturn iterator
|
---|
670 |
|
---|
671 | def iter(self, tag=None):
|
---|
672 | # assert self._root is not None
|
---|
673 | return self._root.iter(tag)
|
---|
674 |
|
---|
675 | # compatibility
|
---|
676 | def getiterator(self, tag=None):
|
---|
677 | # Change for a DeprecationWarning in 1.4
|
---|
678 | warnings.warn(
|
---|
679 | "This method will be removed in future versions. "
|
---|
680 | "Use 'tree.iter()' or 'list(tree.iter())' instead.",
|
---|
681 | PendingDeprecationWarning, stacklevel=2
|
---|
682 | )
|
---|
683 | return list(self.iter(tag))
|
---|
684 |
|
---|
685 | ##
|
---|
686 | # Same as getroot().find(path), starting at the root of the
|
---|
687 | # tree.
|
---|
688 | #
|
---|
689 | # @param path What element to look for.
|
---|
690 | # @keyparam namespaces Optional namespace prefix map.
|
---|
691 | # @return The first matching element, or None if no element was found.
|
---|
692 | # @defreturn Element or None
|
---|
693 |
|
---|
694 | def find(self, path, namespaces=None):
|
---|
695 | # assert self._root is not None
|
---|
696 | if path[:1] == "/":
|
---|
697 | path = "." + path
|
---|
698 | warnings.warn(
|
---|
699 | "This search is broken in 1.3 and earlier, and will be "
|
---|
700 | "fixed in a future version. If you rely on the current "
|
---|
701 | "behaviour, change it to %r" % path,
|
---|
702 | FutureWarning, stacklevel=2
|
---|
703 | )
|
---|
704 | return self._root.find(path, namespaces)
|
---|
705 |
|
---|
706 | ##
|
---|
707 | # Same as getroot().findtext(path), starting at the root of the tree.
|
---|
708 | #
|
---|
709 | # @param path What element to look for.
|
---|
710 | # @param default What to return if the element was not found.
|
---|
711 | # @keyparam namespaces Optional namespace prefix map.
|
---|
712 | # @return The text content of the first matching element, or the
|
---|
713 | # default value no element was found. Note that if the element
|
---|
714 | # is found, but has no text content, this method returns an
|
---|
715 | # empty string.
|
---|
716 | # @defreturn string
|
---|
717 |
|
---|
718 | def findtext(self, path, default=None, namespaces=None):
|
---|
719 | # assert self._root is not None
|
---|
720 | if path[:1] == "/":
|
---|
721 | path = "." + path
|
---|
722 | warnings.warn(
|
---|
723 | "This search is broken in 1.3 and earlier, and will be "
|
---|
724 | "fixed in a future version. If you rely on the current "
|
---|
725 | "behaviour, change it to %r" % path,
|
---|
726 | FutureWarning, stacklevel=2
|
---|
727 | )
|
---|
728 | return self._root.findtext(path, default, namespaces)
|
---|
729 |
|
---|
730 | ##
|
---|
731 | # Same as getroot().findall(path), starting at the root of the tree.
|
---|
732 | #
|
---|
733 | # @param path What element to look for.
|
---|
734 | # @keyparam namespaces Optional namespace prefix map.
|
---|
735 | # @return A list or iterator containing all matching elements,
|
---|
736 | # in document order.
|
---|
737 | # @defreturn list of Element instances
|
---|
738 |
|
---|
739 | def findall(self, path, namespaces=None):
|
---|
740 | # assert self._root is not None
|
---|
741 | if path[:1] == "/":
|
---|
742 | path = "." + path
|
---|
743 | warnings.warn(
|
---|
744 | "This search is broken in 1.3 and earlier, and will be "
|
---|
745 | "fixed in a future version. If you rely on the current "
|
---|
746 | "behaviour, change it to %r" % path,
|
---|
747 | FutureWarning, stacklevel=2
|
---|
748 | )
|
---|
749 | return self._root.findall(path, namespaces)
|
---|
750 |
|
---|
751 | ##
|
---|
752 | # Finds all matching subelements, by tag name or path.
|
---|
753 | # Same as getroot().iterfind(path).
|
---|
754 | #
|
---|
755 | # @param path What element to look for.
|
---|
756 | # @keyparam namespaces Optional namespace prefix map.
|
---|
757 | # @return An iterator or sequence containing all matching elements,
|
---|
758 | # in document order.
|
---|
759 | # @defreturn a generated sequence of Element instances
|
---|
760 |
|
---|
761 | def iterfind(self, path, namespaces=None):
|
---|
762 | # assert self._root is not None
|
---|
763 | if path[:1] == "/":
|
---|
764 | path = "." + path
|
---|
765 | warnings.warn(
|
---|
766 | "This search is broken in 1.3 and earlier, and will be "
|
---|
767 | "fixed in a future version. If you rely on the current "
|
---|
768 | "behaviour, change it to %r" % path,
|
---|
769 | FutureWarning, stacklevel=2
|
---|
770 | )
|
---|
771 | return self._root.iterfind(path, namespaces)
|
---|
772 |
|
---|
773 | ##
|
---|
774 | # Writes the element tree to a file, as XML.
|
---|
775 | #
|
---|
776 | # @def write(file, **options)
|
---|
777 | # @param file A file name, or a file object opened for writing.
|
---|
778 | # @param **options Options, given as keyword arguments.
|
---|
779 | # @keyparam encoding Optional output encoding (default is US-ASCII).
|
---|
780 | # @keyparam xml_declaration Controls if an XML declaration should
|
---|
781 | # be added to the file. Use False for never, True for always,
|
---|
782 | # None for only if not US-ASCII or UTF-8. None is default.
|
---|
783 | # @keyparam default_namespace Sets the default XML namespace (for "xmlns").
|
---|
784 | # @keyparam method Optional output method ("xml", "html", "text" or
|
---|
785 | # "c14n"; default is "xml").
|
---|
786 |
|
---|
787 | def write(self, file_or_filename,
|
---|
788 | # keyword arguments
|
---|
789 | encoding=None,
|
---|
790 | xml_declaration=None,
|
---|
791 | default_namespace=None,
|
---|
792 | method=None):
|
---|
793 | # assert self._root is not None
|
---|
794 | if not method:
|
---|
795 | method = "xml"
|
---|
796 | elif method not in _serialize:
|
---|
797 | # FIXME: raise an ImportError for c14n if ElementC14N is missing?
|
---|
798 | raise ValueError("unknown method %r" % method)
|
---|
799 | if hasattr(file_or_filename, "write"):
|
---|
800 | file = file_or_filename
|
---|
801 | else:
|
---|
802 | file = open(file_or_filename, "wb")
|
---|
803 | write = file.write
|
---|
804 | if not encoding:
|
---|
805 | if method == "c14n":
|
---|
806 | encoding = "utf-8"
|
---|
807 | else:
|
---|
808 | encoding = "us-ascii"
|
---|
809 | elif xml_declaration or (xml_declaration is None and
|
---|
810 | encoding not in ("utf-8", "us-ascii")):
|
---|
811 | if method == "xml":
|
---|
812 | write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
|
---|
813 | if method == "text":
|
---|
814 | _serialize_text(write, self._root, encoding)
|
---|
815 | else:
|
---|
816 | qnames, namespaces = _namespaces(
|
---|
817 | self._root, encoding, default_namespace
|
---|
818 | )
|
---|
819 | serialize = _serialize[method]
|
---|
820 | serialize(write, self._root, encoding, qnames, namespaces)
|
---|
821 | if file_or_filename is not file:
|
---|
822 | file.close()
|
---|
823 |
|
---|
824 | def write_c14n(self, file):
|
---|
825 | # lxml.etree compatibility. use output method instead
|
---|
826 | return self.write(file, method="c14n")
|
---|
827 |
|
---|
828 | # --------------------------------------------------------------------
|
---|
829 | # serialization support
|
---|
830 |
|
---|
831 | def _namespaces(elem, encoding, default_namespace=None):
|
---|
832 | # identify namespaces used in this tree
|
---|
833 |
|
---|
834 | # maps qnames to *encoded* prefix:local names
|
---|
835 | qnames = {None: None}
|
---|
836 |
|
---|
837 | # maps uri:s to prefixes
|
---|
838 | namespaces = {}
|
---|
839 | if default_namespace:
|
---|
840 | namespaces[default_namespace] = ""
|
---|
841 |
|
---|
842 | def encode(text):
|
---|
843 | return text.encode(encoding)
|
---|
844 |
|
---|
845 | def add_qname(qname):
|
---|
846 | # calculate serialized qname representation
|
---|
847 | try:
|
---|
848 | if qname[:1] == "{":
|
---|
849 | uri, tag = qname[1:].rsplit("}", 1)
|
---|
850 | prefix = namespaces.get(uri)
|
---|
851 | if prefix is None:
|
---|
852 | prefix = _namespace_map.get(uri)
|
---|
853 | if prefix is None:
|
---|
854 | prefix = "ns%d" % len(namespaces)
|
---|
855 | if prefix != "xml":
|
---|
856 | namespaces[uri] = prefix
|
---|
857 | if prefix:
|
---|
858 | qnames[qname] = encode("%s:%s" % (prefix, tag))
|
---|
859 | else:
|
---|
860 | qnames[qname] = encode(tag) # default element
|
---|
861 | else:
|
---|
862 | if default_namespace:
|
---|
863 | # FIXME: can this be handled in XML 1.0?
|
---|
864 | raise ValueError(
|
---|
865 | "cannot use non-qualified names with "
|
---|
866 | "default_namespace option"
|
---|
867 | )
|
---|
868 | qnames[qname] = encode(qname)
|
---|
869 | except TypeError:
|
---|
870 | _raise_serialization_error(qname)
|
---|
871 |
|
---|
872 | # populate qname and namespaces table
|
---|
873 | try:
|
---|
874 | iterate = elem.iter
|
---|
875 | except AttributeError:
|
---|
876 | iterate = elem.getiterator # cET compatibility
|
---|
877 | for elem in iterate():
|
---|
878 | tag = elem.tag
|
---|
879 | if isinstance(tag, QName):
|
---|
880 | if tag.text not in qnames:
|
---|
881 | add_qname(tag.text)
|
---|
882 | elif isinstance(tag, basestring):
|
---|
883 | if tag not in qnames:
|
---|
884 | add_qname(tag)
|
---|
885 | elif tag is not None and tag is not Comment and tag is not PI:
|
---|
886 | _raise_serialization_error(tag)
|
---|
887 | for key, value in elem.items():
|
---|
888 | if isinstance(key, QName):
|
---|
889 | key = key.text
|
---|
890 | if key not in qnames:
|
---|
891 | add_qname(key)
|
---|
892 | if isinstance(value, QName) and value.text not in qnames:
|
---|
893 | add_qname(value.text)
|
---|
894 | text = elem.text
|
---|
895 | if isinstance(text, QName) and text.text not in qnames:
|
---|
896 | add_qname(text.text)
|
---|
897 | return qnames, namespaces
|
---|
898 |
|
---|
899 | def _serialize_xml(write, elem, encoding, qnames, namespaces):
|
---|
900 | tag = elem.tag
|
---|
901 | text = elem.text
|
---|
902 | if tag is Comment:
|
---|
903 | write("<!--%s-->" % _encode(text, encoding))
|
---|
904 | elif tag is ProcessingInstruction:
|
---|
905 | write("<?%s?>" % _encode(text, encoding))
|
---|
906 | else:
|
---|
907 | tag = qnames[tag]
|
---|
908 | if tag is None:
|
---|
909 | if text:
|
---|
910 | write(_escape_cdata(text, encoding))
|
---|
911 | for e in elem:
|
---|
912 | _serialize_xml(write, e, encoding, qnames, None)
|
---|
913 | else:
|
---|
914 | write("<" + tag)
|
---|
915 | items = elem.items()
|
---|
916 | if items or namespaces:
|
---|
917 | if namespaces:
|
---|
918 | for v, k in sorted(namespaces.items(),
|
---|
919 | key=lambda x: x[1]): # sort on prefix
|
---|
920 | if k:
|
---|
921 | k = ":" + k
|
---|
922 | write(" xmlns%s=\"%s\"" % (
|
---|
923 | k.encode(encoding),
|
---|
924 | _escape_attrib(v, encoding)
|
---|
925 | ))
|
---|
926 | for k, v in sorted(items): # lexical order
|
---|
927 | if isinstance(k, QName):
|
---|
928 | k = k.text
|
---|
929 | if isinstance(v, QName):
|
---|
930 | v = qnames[v.text]
|
---|
931 | else:
|
---|
932 | v = _escape_attrib(v, encoding)
|
---|
933 | write(" %s=\"%s\"" % (qnames[k], v))
|
---|
934 | if text or len(elem):
|
---|
935 | write(">")
|
---|
936 | if text:
|
---|
937 | write(_escape_cdata(text, encoding))
|
---|
938 | for e in elem:
|
---|
939 | _serialize_xml(write, e, encoding, qnames, None)
|
---|
940 | write("</" + tag + ">")
|
---|
941 | else:
|
---|
942 | write(" />")
|
---|
943 | if elem.tail:
|
---|
944 | write(_escape_cdata(elem.tail, encoding))
|
---|
945 |
|
---|
946 | HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
|
---|
947 | "img", "input", "isindex", "link", "meta", "param")
|
---|
948 |
|
---|
949 | try:
|
---|
950 | HTML_EMPTY = set(HTML_EMPTY)
|
---|
951 | except NameError:
|
---|
952 | pass
|
---|
953 |
|
---|
954 | def _serialize_html(write, elem, encoding, qnames, namespaces):
|
---|
955 | tag = elem.tag
|
---|
956 | text = elem.text
|
---|
957 | if tag is Comment:
|
---|
958 | write("<!--%s-->" % _escape_cdata(text, encoding))
|
---|
959 | elif tag is ProcessingInstruction:
|
---|
960 | write("<?%s?>" % _escape_cdata(text, encoding))
|
---|
961 | else:
|
---|
962 | tag = qnames[tag]
|
---|
963 | if tag is None:
|
---|
964 | if text:
|
---|
965 | write(_escape_cdata(text, encoding))
|
---|
966 | for e in elem:
|
---|
967 | _serialize_html(write, e, encoding, qnames, None)
|
---|
968 | else:
|
---|
969 | write("<" + tag)
|
---|
970 | items = elem.items()
|
---|
971 | if items or namespaces:
|
---|
972 | if namespaces:
|
---|
973 | for v, k in sorted(namespaces.items(),
|
---|
974 | key=lambda x: x[1]): # sort on prefix
|
---|
975 | if k:
|
---|
976 | k = ":" + k
|
---|
977 | write(" xmlns%s=\"%s\"" % (
|
---|
978 | k.encode(encoding),
|
---|
979 | _escape_attrib(v, encoding)
|
---|
980 | ))
|
---|
981 | for k, v in sorted(items): # lexical order
|
---|
982 | if isinstance(k, QName):
|
---|
983 | k = k.text
|
---|
984 | if isinstance(v, QName):
|
---|
985 | v = qnames[v.text]
|
---|
986 | else:
|
---|
987 | v = _escape_attrib_html(v, encoding)
|
---|
988 | # FIXME: handle boolean attributes
|
---|
989 | write(" %s=\"%s\"" % (qnames[k], v))
|
---|
990 | write(">")
|
---|
991 | ltag = tag.lower()
|
---|
992 | if text:
|
---|
993 | if ltag == "script" or ltag == "style":
|
---|
994 | write(_encode(text, encoding))
|
---|
995 | else:
|
---|
996 | write(_escape_cdata(text, encoding))
|
---|
997 | for e in elem:
|
---|
998 | _serialize_html(write, e, encoding, qnames, None)
|
---|
999 | if ltag not in HTML_EMPTY:
|
---|
1000 | write("</" + tag + ">")
|
---|
1001 | if elem.tail:
|
---|
1002 | write(_escape_cdata(elem.tail, encoding))
|
---|
1003 |
|
---|
1004 | def _serialize_text(write, elem, encoding):
|
---|
1005 | for part in elem.itertext():
|
---|
1006 | write(part.encode(encoding))
|
---|
1007 | if elem.tail:
|
---|
1008 | write(elem.tail.encode(encoding))
|
---|
1009 |
|
---|
1010 | _serialize = {
|
---|
1011 | "xml": _serialize_xml,
|
---|
1012 | "html": _serialize_html,
|
---|
1013 | "text": _serialize_text,
|
---|
1014 | # this optional method is imported at the end of the module
|
---|
1015 | # "c14n": _serialize_c14n,
|
---|
1016 | }
|
---|
1017 |
|
---|
1018 | ##
|
---|
1019 | # Registers a namespace prefix. The registry is global, and any
|
---|
1020 | # existing mapping for either the given prefix or the namespace URI
|
---|
1021 | # will be removed.
|
---|
1022 | #
|
---|
1023 | # @param prefix Namespace prefix.
|
---|
1024 | # @param uri Namespace uri. Tags and attributes in this namespace
|
---|
1025 | # will be serialized with the given prefix, if at all possible.
|
---|
1026 | # @exception ValueError If the prefix is reserved, or is otherwise
|
---|
1027 | # invalid.
|
---|
1028 |
|
---|
1029 | def register_namespace(prefix, uri):
|
---|
1030 | if re.match("ns\d+$", prefix):
|
---|
1031 | raise ValueError("Prefix format reserved for internal use")
|
---|
1032 | for k, v in _namespace_map.items():
|
---|
1033 | if k == uri or v == prefix:
|
---|
1034 | del _namespace_map[k]
|
---|
1035 | _namespace_map[uri] = prefix
|
---|
1036 |
|
---|
1037 | _namespace_map = {
|
---|
1038 | # "well-known" namespace prefixes
|
---|
1039 | "http://www.w3.org/XML/1998/namespace": "xml",
|
---|
1040 | "http://www.w3.org/1999/xhtml": "html",
|
---|
1041 | "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
|
---|
1042 | "http://schemas.xmlsoap.org/wsdl/": "wsdl",
|
---|
1043 | # xml schema
|
---|
1044 | "http://www.w3.org/2001/XMLSchema": "xs",
|
---|
1045 | "http://www.w3.org/2001/XMLSchema-instance": "xsi",
|
---|
1046 | # dublin core
|
---|
1047 | "http://purl.org/dc/elements/1.1/": "dc",
|
---|
1048 | }
|
---|
1049 |
|
---|
1050 | def _raise_serialization_error(text):
|
---|
1051 | raise TypeError(
|
---|
1052 | "cannot serialize %r (type %s)" % (text, type(text).__name__)
|
---|
1053 | )
|
---|
1054 |
|
---|
1055 | def _encode(text, encoding):
|
---|
1056 | try:
|
---|
1057 | return text.encode(encoding, "xmlcharrefreplace")
|
---|
1058 | except (TypeError, AttributeError):
|
---|
1059 | _raise_serialization_error(text)
|
---|
1060 |
|
---|
1061 | def _escape_cdata(text, encoding):
|
---|
1062 | # escape character data
|
---|
1063 | try:
|
---|
1064 | # it's worth avoiding do-nothing calls for strings that are
|
---|
1065 | # shorter than 500 character, or so. assume that's, by far,
|
---|
1066 | # the most common case in most applications.
|
---|
1067 | if "&" in text:
|
---|
1068 | text = text.replace("&", "&")
|
---|
1069 | if "<" in text:
|
---|
1070 | text = text.replace("<", "<")
|
---|
1071 | if ">" in text:
|
---|
1072 | text = text.replace(">", ">")
|
---|
1073 | return text.encode(encoding, "xmlcharrefreplace")
|
---|
1074 | except (TypeError, AttributeError):
|
---|
1075 | _raise_serialization_error(text)
|
---|
1076 |
|
---|
1077 | def _escape_attrib(text, encoding):
|
---|
1078 | # escape attribute value
|
---|
1079 | try:
|
---|
1080 | if "&" in text:
|
---|
1081 | text = text.replace("&", "&")
|
---|
1082 | if "<" in text:
|
---|
1083 | text = text.replace("<", "<")
|
---|
1084 | if ">" in text:
|
---|
1085 | text = text.replace(">", ">")
|
---|
1086 | if "\"" in text:
|
---|
1087 | text = text.replace("\"", """)
|
---|
1088 | if "\n" in text:
|
---|
1089 | text = text.replace("\n", " ")
|
---|
1090 | return text.encode(encoding, "xmlcharrefreplace")
|
---|
1091 | except (TypeError, AttributeError):
|
---|
1092 | _raise_serialization_error(text)
|
---|
1093 |
|
---|
1094 | def _escape_attrib_html(text, encoding):
|
---|
1095 | # escape attribute value
|
---|
1096 | try:
|
---|
1097 | if "&" in text:
|
---|
1098 | text = text.replace("&", "&")
|
---|
1099 | if ">" in text:
|
---|
1100 | text = text.replace(">", ">")
|
---|
1101 | if "\"" in text:
|
---|
1102 | text = text.replace("\"", """)
|
---|
1103 | return text.encode(encoding, "xmlcharrefreplace")
|
---|
1104 | except (TypeError, AttributeError):
|
---|
1105 | _raise_serialization_error(text)
|
---|
1106 |
|
---|
1107 | # --------------------------------------------------------------------
|
---|
1108 |
|
---|
1109 | ##
|
---|
1110 | # Generates a string representation of an XML element, including all
|
---|
1111 | # subelements.
|
---|
1112 | #
|
---|
1113 | # @param element An Element instance.
|
---|
1114 | # @keyparam encoding Optional output encoding (default is US-ASCII).
|
---|
1115 | # @keyparam method Optional output method ("xml", "html", "text" or
|
---|
1116 | # "c14n"; default is "xml").
|
---|
1117 | # @return An encoded string containing the XML data.
|
---|
1118 | # @defreturn string
|
---|
1119 |
|
---|
1120 | def tostring(element, encoding=None, method=None):
|
---|
1121 | class dummy:
|
---|
1122 | pass
|
---|
1123 | data = []
|
---|
1124 | file = dummy()
|
---|
1125 | file.write = data.append
|
---|
1126 | ElementTree(element).write(file, encoding, method=method)
|
---|
1127 | return "".join(data)
|
---|
1128 |
|
---|
1129 | ##
|
---|
1130 | # Generates a string representation of an XML element, including all
|
---|
1131 | # subelements. The string is returned as a sequence of string fragments.
|
---|
1132 | #
|
---|
1133 | # @param element An Element instance.
|
---|
1134 | # @keyparam encoding Optional output encoding (default is US-ASCII).
|
---|
1135 | # @keyparam method Optional output method ("xml", "html", "text" or
|
---|
1136 | # "c14n"; default is "xml").
|
---|
1137 | # @return A sequence object containing the XML data.
|
---|
1138 | # @defreturn sequence
|
---|
1139 | # @since 1.3
|
---|
1140 |
|
---|
1141 | def tostringlist(element, encoding=None, method=None):
|
---|
1142 | class dummy:
|
---|
1143 | pass
|
---|
1144 | data = []
|
---|
1145 | file = dummy()
|
---|
1146 | file.write = data.append
|
---|
1147 | ElementTree(element).write(file, encoding, method=method)
|
---|
1148 | # FIXME: merge small fragments into larger parts
|
---|
1149 | return data
|
---|
1150 |
|
---|
1151 | ##
|
---|
1152 | # Writes an element tree or element structure to sys.stdout. This
|
---|
1153 | # function should be used for debugging only.
|
---|
1154 | # <p>
|
---|
1155 | # The exact output format is implementation dependent. In this
|
---|
1156 | # version, it's written as an ordinary XML file.
|
---|
1157 | #
|
---|
1158 | # @param elem An element tree or an individual element.
|
---|
1159 |
|
---|
1160 | def dump(elem):
|
---|
1161 | # debugging
|
---|
1162 | if not isinstance(elem, ElementTree):
|
---|
1163 | elem = ElementTree(elem)
|
---|
1164 | elem.write(sys.stdout)
|
---|
1165 | tail = elem.getroot().tail
|
---|
1166 | if not tail or tail[-1] != "\n":
|
---|
1167 | sys.stdout.write("\n")
|
---|
1168 |
|
---|
1169 | # --------------------------------------------------------------------
|
---|
1170 | # parsing
|
---|
1171 |
|
---|
1172 | ##
|
---|
1173 | # Parses an XML document into an element tree.
|
---|
1174 | #
|
---|
1175 | # @param source A filename or file object containing XML data.
|
---|
1176 | # @param parser An optional parser instance. If not given, the
|
---|
1177 | # standard {@link XMLParser} parser is used.
|
---|
1178 | # @return An ElementTree instance
|
---|
1179 |
|
---|
1180 | def parse(source, parser=None):
|
---|
1181 | tree = ElementTree()
|
---|
1182 | tree.parse(source, parser)
|
---|
1183 | return tree
|
---|
1184 |
|
---|
1185 | ##
|
---|
1186 | # Parses an XML document into an element tree incrementally, and reports
|
---|
1187 | # what's going on to the user.
|
---|
1188 | #
|
---|
1189 | # @param source A filename or file object containing XML data.
|
---|
1190 | # @param events A list of events to report back. If omitted, only "end"
|
---|
1191 | # events are reported.
|
---|
1192 | # @param parser An optional parser instance. If not given, the
|
---|
1193 | # standard {@link XMLParser} parser is used.
|
---|
1194 | # @return A (event, elem) iterator.
|
---|
1195 |
|
---|
1196 | def iterparse(source, events=None, parser=None):
|
---|
1197 | close_source = False
|
---|
1198 | if not hasattr(source, "read"):
|
---|
1199 | source = open(source, "rb")
|
---|
1200 | close_source = True
|
---|
1201 | if not parser:
|
---|
1202 | parser = XMLParser(target=TreeBuilder())
|
---|
1203 | return _IterParseIterator(source, events, parser, close_source)
|
---|
1204 |
|
---|
1205 | class _IterParseIterator(object):
|
---|
1206 |
|
---|
1207 | def __init__(self, source, events, parser, close_source=False):
|
---|
1208 | self._file = source
|
---|
1209 | self._close_file = close_source
|
---|
1210 | self._events = []
|
---|
1211 | self._index = 0
|
---|
1212 | self._error = None
|
---|
1213 | self.root = self._root = None
|
---|
1214 | self._parser = parser
|
---|
1215 | # wire up the parser for event reporting
|
---|
1216 | parser = self._parser._parser
|
---|
1217 | append = self._events.append
|
---|
1218 | if events is None:
|
---|
1219 | events = ["end"]
|
---|
1220 | for event in events:
|
---|
1221 | if event == "start":
|
---|
1222 | try:
|
---|
1223 | parser.ordered_attributes = 1
|
---|
1224 | parser.specified_attributes = 1
|
---|
1225 | def handler(tag, attrib_in, event=event, append=append,
|
---|
1226 | start=self._parser._start_list):
|
---|
1227 | append((event, start(tag, attrib_in)))
|
---|
1228 | parser.StartElementHandler = handler
|
---|
1229 | except AttributeError:
|
---|
1230 | def handler(tag, attrib_in, event=event, append=append,
|
---|
1231 | start=self._parser._start):
|
---|
1232 | append((event, start(tag, attrib_in)))
|
---|
1233 | parser.StartElementHandler = handler
|
---|
1234 | elif event == "end":
|
---|
1235 | def handler(tag, event=event, append=append,
|
---|
1236 | end=self._parser._end):
|
---|
1237 | append((event, end(tag)))
|
---|
1238 | parser.EndElementHandler = handler
|
---|
1239 | elif event == "start-ns":
|
---|
1240 | def handler(prefix, uri, event=event, append=append):
|
---|
1241 | try:
|
---|
1242 | uri = (uri or "").encode("ascii")
|
---|
1243 | except UnicodeError:
|
---|
1244 | pass
|
---|
1245 | append((event, (prefix or "", uri or "")))
|
---|
1246 | parser.StartNamespaceDeclHandler = handler
|
---|
1247 | elif event == "end-ns":
|
---|
1248 | def handler(prefix, event=event, append=append):
|
---|
1249 | append((event, None))
|
---|
1250 | parser.EndNamespaceDeclHandler = handler
|
---|
1251 | else:
|
---|
1252 | raise ValueError("unknown event %r" % event)
|
---|
1253 |
|
---|
1254 | def next(self):
|
---|
1255 | while 1:
|
---|
1256 | try:
|
---|
1257 | item = self._events[self._index]
|
---|
1258 | self._index += 1
|
---|
1259 | return item
|
---|
1260 | except IndexError:
|
---|
1261 | pass
|
---|
1262 | if self._error:
|
---|
1263 | e = self._error
|
---|
1264 | self._error = None
|
---|
1265 | raise e
|
---|
1266 | if self._parser is None:
|
---|
1267 | self.root = self._root
|
---|
1268 | if self._close_file:
|
---|
1269 | self._file.close()
|
---|
1270 | raise StopIteration
|
---|
1271 | # load event buffer
|
---|
1272 | del self._events[:]
|
---|
1273 | self._index = 0
|
---|
1274 | data = self._file.read(16384)
|
---|
1275 | if data:
|
---|
1276 | try:
|
---|
1277 | self._parser.feed(data)
|
---|
1278 | except SyntaxError as exc:
|
---|
1279 | self._error = exc
|
---|
1280 | else:
|
---|
1281 | self._root = self._parser.close()
|
---|
1282 | self._parser = None
|
---|
1283 |
|
---|
1284 | def __iter__(self):
|
---|
1285 | return self
|
---|
1286 |
|
---|
1287 | ##
|
---|
1288 | # Parses an XML document from a string constant. This function can
|
---|
1289 | # be used to embed "XML literals" in Python code.
|
---|
1290 | #
|
---|
1291 | # @param source A string containing XML data.
|
---|
1292 | # @param parser An optional parser instance. If not given, the
|
---|
1293 | # standard {@link XMLParser} parser is used.
|
---|
1294 | # @return An Element instance.
|
---|
1295 | # @defreturn Element
|
---|
1296 |
|
---|
1297 | def XML(text, parser=None):
|
---|
1298 | if not parser:
|
---|
1299 | parser = XMLParser(target=TreeBuilder())
|
---|
1300 | parser.feed(text)
|
---|
1301 | return parser.close()
|
---|
1302 |
|
---|
1303 | ##
|
---|
1304 | # Parses an XML document from a string constant, and also returns
|
---|
1305 | # a dictionary which maps from element id:s to elements.
|
---|
1306 | #
|
---|
1307 | # @param source A string containing XML data.
|
---|
1308 | # @param parser An optional parser instance. If not given, the
|
---|
1309 | # standard {@link XMLParser} parser is used.
|
---|
1310 | # @return A tuple containing an Element instance and a dictionary.
|
---|
1311 | # @defreturn (Element, dictionary)
|
---|
1312 |
|
---|
1313 | def XMLID(text, parser=None):
|
---|
1314 | if not parser:
|
---|
1315 | parser = XMLParser(target=TreeBuilder())
|
---|
1316 | parser.feed(text)
|
---|
1317 | tree = parser.close()
|
---|
1318 | ids = {}
|
---|
1319 | for elem in tree.iter():
|
---|
1320 | id = elem.get("id")
|
---|
1321 | if id:
|
---|
1322 | ids[id] = elem
|
---|
1323 | return tree, ids
|
---|
1324 |
|
---|
1325 | ##
|
---|
1326 | # Parses an XML document from a string constant. Same as {@link #XML}.
|
---|
1327 | #
|
---|
1328 | # @def fromstring(text)
|
---|
1329 | # @param source A string containing XML data.
|
---|
1330 | # @return An Element instance.
|
---|
1331 | # @defreturn Element
|
---|
1332 |
|
---|
1333 | fromstring = XML
|
---|
1334 |
|
---|
1335 | ##
|
---|
1336 | # Parses an XML document from a sequence of string fragments.
|
---|
1337 | #
|
---|
1338 | # @param sequence A list or other sequence containing XML data fragments.
|
---|
1339 | # @param parser An optional parser instance. If not given, the
|
---|
1340 | # standard {@link XMLParser} parser is used.
|
---|
1341 | # @return An Element instance.
|
---|
1342 | # @defreturn Element
|
---|
1343 | # @since 1.3
|
---|
1344 |
|
---|
1345 | def fromstringlist(sequence, parser=None):
|
---|
1346 | if not parser:
|
---|
1347 | parser = XMLParser(target=TreeBuilder())
|
---|
1348 | for text in sequence:
|
---|
1349 | parser.feed(text)
|
---|
1350 | return parser.close()
|
---|
1351 |
|
---|
1352 | # --------------------------------------------------------------------
|
---|
1353 |
|
---|
1354 | ##
|
---|
1355 | # Generic element structure builder. This builder converts a sequence
|
---|
1356 | # of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
|
---|
1357 | # #TreeBuilder.end} method calls to a well-formed element structure.
|
---|
1358 | # <p>
|
---|
1359 | # You can use this class to build an element structure using a custom XML
|
---|
1360 | # parser, or a parser for some other XML-like format.
|
---|
1361 | #
|
---|
1362 | # @param element_factory Optional element factory. This factory
|
---|
1363 | # is called to create new Element instances, as necessary.
|
---|
1364 |
|
---|
1365 | class TreeBuilder(object):
|
---|
1366 |
|
---|
1367 | def __init__(self, element_factory=None):
|
---|
1368 | self._data = [] # data collector
|
---|
1369 | self._elem = [] # element stack
|
---|
1370 | self._last = None # last element
|
---|
1371 | self._tail = None # true if we're after an end tag
|
---|
1372 | if element_factory is None:
|
---|
1373 | element_factory = Element
|
---|
1374 | self._factory = element_factory
|
---|
1375 |
|
---|
1376 | ##
|
---|
1377 | # Flushes the builder buffers, and returns the toplevel document
|
---|
1378 | # element.
|
---|
1379 | #
|
---|
1380 | # @return An Element instance.
|
---|
1381 | # @defreturn Element
|
---|
1382 |
|
---|
1383 | def close(self):
|
---|
1384 | assert len(self._elem) == 0, "missing end tags"
|
---|
1385 | assert self._last is not None, "missing toplevel element"
|
---|
1386 | return self._last
|
---|
1387 |
|
---|
1388 | def _flush(self):
|
---|
1389 | if self._data:
|
---|
1390 | if self._last is not None:
|
---|
1391 | text = "".join(self._data)
|
---|
1392 | if self._tail:
|
---|
1393 | assert self._last.tail is None, "internal error (tail)"
|
---|
1394 | self._last.tail = text
|
---|
1395 | else:
|
---|
1396 | assert self._last.text is None, "internal error (text)"
|
---|
1397 | self._last.text = text
|
---|
1398 | self._data = []
|
---|
1399 |
|
---|
1400 | ##
|
---|
1401 | # Adds text to the current element.
|
---|
1402 | #
|
---|
1403 | # @param data A string. This should be either an 8-bit string
|
---|
1404 | # containing ASCII text, or a Unicode string.
|
---|
1405 |
|
---|
1406 | def data(self, data):
|
---|
1407 | self._data.append(data)
|
---|
1408 |
|
---|
1409 | ##
|
---|
1410 | # Opens a new element.
|
---|
1411 | #
|
---|
1412 | # @param tag The element name.
|
---|
1413 | # @param attrib A dictionary containing element attributes.
|
---|
1414 | # @return The opened element.
|
---|
1415 | # @defreturn Element
|
---|
1416 |
|
---|
1417 | def start(self, tag, attrs):
|
---|
1418 | self._flush()
|
---|
1419 | self._last = elem = self._factory(tag, attrs)
|
---|
1420 | if self._elem:
|
---|
1421 | self._elem[-1].append(elem)
|
---|
1422 | self._elem.append(elem)
|
---|
1423 | self._tail = 0
|
---|
1424 | return elem
|
---|
1425 |
|
---|
1426 | ##
|
---|
1427 | # Closes the current element.
|
---|
1428 | #
|
---|
1429 | # @param tag The element name.
|
---|
1430 | # @return The closed element.
|
---|
1431 | # @defreturn Element
|
---|
1432 |
|
---|
1433 | def end(self, tag):
|
---|
1434 | self._flush()
|
---|
1435 | self._last = self._elem.pop()
|
---|
1436 | assert self._last.tag == tag,\
|
---|
1437 | "end tag mismatch (expected %s, got %s)" % (
|
---|
1438 | self._last.tag, tag)
|
---|
1439 | self._tail = 1
|
---|
1440 | return self._last
|
---|
1441 |
|
---|
1442 | ##
|
---|
1443 | # Element structure builder for XML source data, based on the
|
---|
1444 | # <b>expat</b> parser.
|
---|
1445 | #
|
---|
1446 | # @keyparam target Target object. If omitted, the builder uses an
|
---|
1447 | # instance of the standard {@link #TreeBuilder} class.
|
---|
1448 | # @keyparam html Predefine HTML entities. This flag is not supported
|
---|
1449 | # by the current implementation.
|
---|
1450 | # @keyparam encoding Optional encoding. If given, the value overrides
|
---|
1451 | # the encoding specified in the XML file.
|
---|
1452 | # @see #ElementTree
|
---|
1453 | # @see #TreeBuilder
|
---|
1454 |
|
---|
1455 | class XMLParser(object):
|
---|
1456 |
|
---|
1457 | def __init__(self, html=0, target=None, encoding=None):
|
---|
1458 | try:
|
---|
1459 | from xml.parsers import expat
|
---|
1460 | except ImportError:
|
---|
1461 | try:
|
---|
1462 | import pyexpat as expat
|
---|
1463 | except ImportError:
|
---|
1464 | raise ImportError(
|
---|
1465 | "No module named expat; use SimpleXMLTreeBuilder instead"
|
---|
1466 | )
|
---|
1467 | parser = expat.ParserCreate(encoding, "}")
|
---|
1468 | if target is None:
|
---|
1469 | target = TreeBuilder()
|
---|
1470 | # underscored names are provided for compatibility only
|
---|
1471 | self.parser = self._parser = parser
|
---|
1472 | self.target = self._target = target
|
---|
1473 | self._error = expat.error
|
---|
1474 | self._names = {} # name memo cache
|
---|
1475 | # callbacks
|
---|
1476 | parser.DefaultHandlerExpand = self._default
|
---|
1477 | parser.StartElementHandler = self._start
|
---|
1478 | parser.EndElementHandler = self._end
|
---|
1479 | parser.CharacterDataHandler = self._data
|
---|
1480 | # optional callbacks
|
---|
1481 | parser.CommentHandler = self._comment
|
---|
1482 | parser.ProcessingInstructionHandler = self._pi
|
---|
1483 | # let expat do the buffering, if supported
|
---|
1484 | try:
|
---|
1485 | self._parser.buffer_text = 1
|
---|
1486 | except AttributeError:
|
---|
1487 | pass
|
---|
1488 | # use new-style attribute handling, if supported
|
---|
1489 | try:
|
---|
1490 | self._parser.ordered_attributes = 1
|
---|
1491 | self._parser.specified_attributes = 1
|
---|
1492 | parser.StartElementHandler = self._start_list
|
---|
1493 | except AttributeError:
|
---|
1494 | pass
|
---|
1495 | self._doctype = None
|
---|
1496 | self.entity = {}
|
---|
1497 | try:
|
---|
1498 | self.version = "Expat %d.%d.%d" % expat.version_info
|
---|
1499 | except AttributeError:
|
---|
1500 | pass # unknown
|
---|
1501 |
|
---|
1502 | def _raiseerror(self, value):
|
---|
1503 | err = ParseError(value)
|
---|
1504 | err.code = value.code
|
---|
1505 | err.position = value.lineno, value.offset
|
---|
1506 | raise err
|
---|
1507 |
|
---|
1508 | def _fixtext(self, text):
|
---|
1509 | # convert text string to ascii, if possible
|
---|
1510 | try:
|
---|
1511 | return text.encode("ascii")
|
---|
1512 | except UnicodeError:
|
---|
1513 | return text
|
---|
1514 |
|
---|
1515 | def _fixname(self, key):
|
---|
1516 | # expand qname, and convert name string to ascii, if possible
|
---|
1517 | try:
|
---|
1518 | name = self._names[key]
|
---|
1519 | except KeyError:
|
---|
1520 | name = key
|
---|
1521 | if "}" in name:
|
---|
1522 | name = "{" + name
|
---|
1523 | self._names[key] = name = self._fixtext(name)
|
---|
1524 | return name
|
---|
1525 |
|
---|
1526 | def _start(self, tag, attrib_in):
|
---|
1527 | fixname = self._fixname
|
---|
1528 | fixtext = self._fixtext
|
---|
1529 | tag = fixname(tag)
|
---|
1530 | attrib = {}
|
---|
1531 | for key, value in attrib_in.items():
|
---|
1532 | attrib[fixname(key)] = fixtext(value)
|
---|
1533 | return self.target.start(tag, attrib)
|
---|
1534 |
|
---|
1535 | def _start_list(self, tag, attrib_in):
|
---|
1536 | fixname = self._fixname
|
---|
1537 | fixtext = self._fixtext
|
---|
1538 | tag = fixname(tag)
|
---|
1539 | attrib = {}
|
---|
1540 | if attrib_in:
|
---|
1541 | for i in range(0, len(attrib_in), 2):
|
---|
1542 | attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
|
---|
1543 | return self.target.start(tag, attrib)
|
---|
1544 |
|
---|
1545 | def _data(self, text):
|
---|
1546 | return self.target.data(self._fixtext(text))
|
---|
1547 |
|
---|
1548 | def _end(self, tag):
|
---|
1549 | return self.target.end(self._fixname(tag))
|
---|
1550 |
|
---|
1551 | def _comment(self, data):
|
---|
1552 | try:
|
---|
1553 | comment = self.target.comment
|
---|
1554 | except AttributeError:
|
---|
1555 | pass
|
---|
1556 | else:
|
---|
1557 | return comment(self._fixtext(data))
|
---|
1558 |
|
---|
1559 | def _pi(self, target, data):
|
---|
1560 | try:
|
---|
1561 | pi = self.target.pi
|
---|
1562 | except AttributeError:
|
---|
1563 | pass
|
---|
1564 | else:
|
---|
1565 | return pi(self._fixtext(target), self._fixtext(data))
|
---|
1566 |
|
---|
1567 | def _default(self, text):
|
---|
1568 | prefix = text[:1]
|
---|
1569 | if prefix == "&":
|
---|
1570 | # deal with undefined entities
|
---|
1571 | try:
|
---|
1572 | self.target.data(self.entity[text[1:-1]])
|
---|
1573 | except KeyError:
|
---|
1574 | from xml.parsers import expat
|
---|
1575 | err = expat.error(
|
---|
1576 | "undefined entity %s: line %d, column %d" %
|
---|
1577 | (text, self._parser.ErrorLineNumber,
|
---|
1578 | self._parser.ErrorColumnNumber)
|
---|
1579 | )
|
---|
1580 | err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
|
---|
1581 | err.lineno = self._parser.ErrorLineNumber
|
---|
1582 | err.offset = self._parser.ErrorColumnNumber
|
---|
1583 | raise err
|
---|
1584 | elif prefix == "<" and text[:9] == "<!DOCTYPE":
|
---|
1585 | self._doctype = [] # inside a doctype declaration
|
---|
1586 | elif self._doctype is not None:
|
---|
1587 | # parse doctype contents
|
---|
1588 | if prefix == ">":
|
---|
1589 | self._doctype = None
|
---|
1590 | return
|
---|
1591 | text = text.strip()
|
---|
1592 | if not text:
|
---|
1593 | return
|
---|
1594 | self._doctype.append(text)
|
---|
1595 | n = len(self._doctype)
|
---|
1596 | if n > 2:
|
---|
1597 | type = self._doctype[1]
|
---|
1598 | if type == "PUBLIC" and n == 4:
|
---|
1599 | name, type, pubid, system = self._doctype
|
---|
1600 | elif type == "SYSTEM" and n == 3:
|
---|
1601 | name, type, system = self._doctype
|
---|
1602 | pubid = None
|
---|
1603 | else:
|
---|
1604 | return
|
---|
1605 | if pubid:
|
---|
1606 | pubid = pubid[1:-1]
|
---|
1607 | if hasattr(self.target, "doctype"):
|
---|
1608 | self.target.doctype(name, pubid, system[1:-1])
|
---|
1609 | elif self.doctype is not self._XMLParser__doctype:
|
---|
1610 | # warn about deprecated call
|
---|
1611 | self._XMLParser__doctype(name, pubid, system[1:-1])
|
---|
1612 | self.doctype(name, pubid, system[1:-1])
|
---|
1613 | self._doctype = None
|
---|
1614 |
|
---|
1615 | ##
|
---|
1616 | # (Deprecated) Handles a doctype declaration.
|
---|
1617 | #
|
---|
1618 | # @param name Doctype name.
|
---|
1619 | # @param pubid Public identifier.
|
---|
1620 | # @param system System identifier.
|
---|
1621 |
|
---|
1622 | def doctype(self, name, pubid, system):
|
---|
1623 | """This method of XMLParser is deprecated."""
|
---|
1624 | warnings.warn(
|
---|
1625 | "This method of XMLParser is deprecated. Define doctype() "
|
---|
1626 | "method on the TreeBuilder target.",
|
---|
1627 | DeprecationWarning,
|
---|
1628 | )
|
---|
1629 |
|
---|
1630 | # sentinel, if doctype is redefined in a subclass
|
---|
1631 | __doctype = doctype
|
---|
1632 |
|
---|
1633 | ##
|
---|
1634 | # Feeds data to the parser.
|
---|
1635 | #
|
---|
1636 | # @param data Encoded data.
|
---|
1637 |
|
---|
1638 | def feed(self, data):
|
---|
1639 | try:
|
---|
1640 | self._parser.Parse(data, 0)
|
---|
1641 | except self._error, v:
|
---|
1642 | self._raiseerror(v)
|
---|
1643 |
|
---|
1644 | ##
|
---|
1645 | # Finishes feeding data to the parser.
|
---|
1646 | #
|
---|
1647 | # @return An element structure.
|
---|
1648 | # @defreturn Element
|
---|
1649 |
|
---|
1650 | def close(self):
|
---|
1651 | try:
|
---|
1652 | self._parser.Parse("", 1) # end of data
|
---|
1653 | except self._error, v:
|
---|
1654 | self._raiseerror(v)
|
---|
1655 | tree = self.target.close()
|
---|
1656 | del self.target, self._parser # get rid of circular references
|
---|
1657 | return tree
|
---|
1658 |
|
---|
1659 | # compatibility
|
---|
1660 | XMLTreeBuilder = XMLParser
|
---|
1661 |
|
---|
1662 | # workaround circular import.
|
---|
1663 | try:
|
---|
1664 | from ElementC14N import _serialize_c14n
|
---|
1665 | _serialize["c14n"] = _serialize_c14n
|
---|
1666 | except ImportError:
|
---|
1667 | pass
|
---|