Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

ElementTree.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 55.2 KB

Line
1	#
2	# ElementTree
3	# $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $
4	#
5	# light-weight XML support for Python 2.3 and later.
6	#
7	# history (since 1.2.6):
8	# 2005-11-12 fl added tostringlist/fromstringlist helpers
9	# 2006-07-05 fl merged in selected changes from the 1.3 sandbox
10	# 2006-07-05 fl removed support for 2.1 and earlier
11	# 2007-06-21 fl added deprecation/future warnings
12	# 2007-08-25 fl added doctype hook, added parser version attribute etc
13	# 2007-08-26 fl added new serializer code (better namespace handling, etc)
14	# 2007-08-27 fl warn for broken /tag searches on tree level
15	# 2007-09-02 fl added html/text methods to serializer (experimental)
16	# 2007-09-05 fl added method argument to tostring/tostringlist
17	# 2007-09-06 fl improved error handling
18	# 2007-09-13 fl added itertext, iterfind; assorted cleanups
19	# 2007-12-15 fl added C14N hooks, copy method (experimental)
20	#
21	# Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved.
22	#
23	# fredrik@pythonware.com
24	# http://www.pythonware.com
25	#
26	# --------------------------------------------------------------------
27	# The ElementTree toolkit is
28	#
29	# Copyright (c) 1999-2008 by Fredrik Lundh
30	#
31	# By obtaining, using, and/or copying this software and/or its
32	# associated documentation, you agree that you have read, understood,
33	# and will comply with the following terms and conditions:
34	#
35	# Permission to use, copy, modify, and distribute this software and
36	# its associated documentation for any purpose and without fee is
37	# hereby granted, provided that the above copyright notice appears in
38	# all copies, and that both that copyright notice and this permission
39	# notice appear in supporting documentation, and that the name of
40	# Secret Labs AB or the author not be used in advertising or publicity
41	# pertaining to distribution of the software without specific, written
42	# prior permission.
43	#
44	# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
45	# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
46	# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
47	# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
48	# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
49	# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
50	# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
51	# OF THIS SOFTWARE.
52	# --------------------------------------------------------------------
53
54	# Licensed to PSF under a Contributor Agreement.
55	# See http://www.python.org/psf/license for licensing details.
56
57	__all__ = [
58	# public symbols
59	"Comment",
60	"dump",
61	"Element", "ElementTree",
62	"fromstring", "fromstringlist",
63	"iselement", "iterparse",
64	"parse", "ParseError",
65	"PI", "ProcessingInstruction",
66	"QName",
67	"SubElement",
68	"tostring", "tostringlist",
69	"TreeBuilder",
70	"VERSION",
71	"XML",
72	"XMLParser", "XMLTreeBuilder",
73	]
74
75	VERSION = "1.3.0"
76
77	##
78	# The <b>Element</b> type is a flexible container object, designed to
79	# store hierarchical data structures in memory. The type can be
80	# described as a cross between a list and a dictionary.
81	# <p>
82	# Each element has a number of properties associated with it:
83	# <ul>
84	# <li>a <i>tag</i>. This is a string identifying what kind of data
85	# this element represents (the element type, in other words).</li>
86	# <li>a number of <i>attributes</i>, stored in a Python dictionary.</li>
87	# <li>a <i>text</i> string.</li>
88	# <li>an optional <i>tail</i> string.</li>
89	# <li>a number of <i>child elements</i>, stored in a Python sequence</li>
90	# </ul>
91	#
92	# To create an element instance, use the {@link #Element} constructor
93	# or the {@link #SubElement} factory function.
94	# <p>
95	# The {@link #ElementTree} class can be used to wrap an element
96	# structure, and convert it from and to XML.
97	##
98
99	import sys
100	import re
101	import warnings
102
103
104	class _SimpleElementPath(object):
105	# emulate pre-1.2 find/findtext/findall behaviour
106	def find(self, element, tag, namespaces=None):
107	for elem in element:
108	if elem.tag == tag:
109	return elem
110	return None
111	def findtext(self, element, tag, default=None, namespaces=None):
112	elem = self.find(element, tag)
113	if elem is None:
114	return default
115	return elem.text or ""
116	def iterfind(self, element, tag, namespaces=None):
117	if tag[:3] == ".//":
118	for elem in element.iter(tag[3:]):
119	yield elem
120	for elem in element:
121	if elem.tag == tag:
122	yield elem
123	def findall(self, element, tag, namespaces=None):
124	return list(self.iterfind(element, tag, namespaces))
125
126	try:
127	from . import ElementPath
128	except ImportError:
129	ElementPath = _SimpleElementPath()
130
131	##
132	# Parser error. This is a subclass of <b>SyntaxError</b>.
133	# <p>
134	# In addition to the exception value, an exception instance contains a
135	# specific exception code in the <b>code</b> attribute, and the line and
136	# column of the error in the <b>position</b> attribute.
137
138	class ParseError(SyntaxError):
139	pass
140
141	# --------------------------------------------------------------------
142
143	##
144	# Checks if an object appears to be a valid element object.
145	#
146	# @param An element instance.
147	# @return A true value if this is an element object.
148	# @defreturn flag
149
150	def iselement(element):
151	# FIXME: not sure about this; might be a better idea to look
152	# for tag/attrib/text attributes
153	return isinstance(element, Element) or hasattr(element, "tag")
154
155	##
156	# Element class. This class defines the Element interface, and
157	# provides a reference implementation of this interface.
158	# <p>
159	# The element name, attribute names, and attribute values can be
160	# either ASCII strings (ordinary Python strings containing only 7-bit
161	# ASCII characters) or Unicode strings.
162	#
163	# @param tag The element name.
164	# @param attrib An optional dictionary, containing element attributes.
165	# @param **extra Additional attributes, given as keyword arguments.
166	# @see Element
167	# @see SubElement
168	# @see Comment
169	# @see ProcessingInstruction
170
171	class Element(object):
172	# <tag attrib>text<child/>...</tag>tail
173
174	##
175	# (Attribute) Element tag.
176
177	tag = None
178
179	##
180	# (Attribute) Element attribute dictionary. Where possible, use
181	# {@link #Element.get},
182	# {@link #Element.set},
183	# {@link #Element.keys}, and
184	# {@link #Element.items} to access
185	# element attributes.
186
187	attrib = None
188
189	##
190	# (Attribute) Text before first subelement. This is either a
191	# string or the value None. Note that if there was no text, this
192	# attribute may be either None or an empty string, depending on
193	# the parser.
194
195	text = None
196
197	##
198	# (Attribute) Text after this element's end tag, but before the
199	# next sibling element's start tag. This is either a string or
200	# the value None. Note that if there was no text, this attribute
201	# may be either None or an empty string, depending on the parser.
202
203	tail = None # text after end tag, if any
204
205	# constructor
206
207	def __init__(self, tag, attrib={}, **extra):
208	attrib = attrib.copy()
209	attrib.update(extra)
210	self.tag = tag
211	self.attrib = attrib
212	self._children = []
213
214	def __repr__(self):
215	return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
216
217	##
218	# Creates a new element object of the same type as this element.
219	#
220	# @param tag Element tag.
221	# @param attrib Element attributes, given as a dictionary.
222	# @return A new element instance.
223
224	def makeelement(self, tag, attrib):
225	return self.__class__(tag, attrib)
226
227	##
228	# (Experimental) Copies the current element. This creates a
229	# shallow copy; subelements will be shared with the original tree.
230	#
231	# @return A new element instance.
232
233	def copy(self):
234	elem = self.makeelement(self.tag, self.attrib)
235	elem.text = self.text
236	elem.tail = self.tail
237	elem[:] = self
238	return elem
239
240	##
241	# Returns the number of subelements. Note that this only counts
242	# full elements; to check if there's any content in an element, you
243	# have to check both the length and the <b>text</b> attribute.
244	#
245	# @return The number of subelements.
246
247	def __len__(self):
248	return len(self._children)
249
250	def __nonzero__(self):
251	warnings.warn(
252	"The behavior of this method will change in future versions. "
253	"Use specific 'len(elem)' or 'elem is not None' test instead.",
254	FutureWarning, stacklevel=2
255	)
256	return len(self._children) != 0 # emulate old behaviour, for now
257
258	##
259	# Returns the given subelement, by index.
260	#
261	# @param index What subelement to return.
262	# @return The given subelement.
263	# @exception IndexError If the given element does not exist.
264
265	def __getitem__(self, index):
266	return self._children[index]
267
268	##
269	# Replaces the given subelement, by index.
270	#
271	# @param index What subelement to replace.
272	# @param element The new element value.
273	# @exception IndexError If the given element does not exist.
274
275	def __setitem__(self, index, element):
276	# if isinstance(index, slice):
277	# for elt in element:
278	# assert iselement(elt)
279	# else:
280	# assert iselement(element)
281	self._children[index] = element
282
283	##
284	# Deletes the given subelement, by index.
285	#
286	# @param index What subelement to delete.
287	# @exception IndexError If the given element does not exist.
288
289	def __delitem__(self, index):
290	del self._children[index]
291
292	##
293	# Adds a subelement to the end of this element. In document order,
294	# the new element will appear after the last existing subelement (or
295	# directly after the text, if it's the first subelement), but before
296	# the end tag for this element.
297	#
298	# @param element The element to add.
299
300	def append(self, element):
301	# assert iselement(element)
302	self._children.append(element)
303
304	##
305	# Appends subelements from a sequence.
306	#
307	# @param elements A sequence object with zero or more elements.
308	# @since 1.3
309
310	def extend(self, elements):
311	# for element in elements:
312	# assert iselement(element)
313	self._children.extend(elements)
314
315	##
316	# Inserts a subelement at the given position in this element.
317	#
318	# @param index Where to insert the new subelement.
319
320	def insert(self, index, element):
321	# assert iselement(element)
322	self._children.insert(index, element)
323
324	##
325	# Removes a matching subelement. Unlike the <b>find</b> methods,
326	# this method compares elements based on identity, not on tag
327	# value or contents. To remove subelements by other means, the
328	# easiest way is often to use a list comprehension to select what
329	# elements to keep, and use slice assignment to update the parent
330	# element.
331	#
332	# @param element What element to remove.
333	# @exception ValueError If a matching element could not be found.
334
335	def remove(self, element):
336	# assert iselement(element)
337	self._children.remove(element)
338
339	##
340	# (Deprecated) Returns all subelements. The elements are returned
341	# in document order.
342	#
343	# @return A list of subelements.
344	# @defreturn list of Element instances
345
346	def getchildren(self):
347	warnings.warn(
348	"This method will be removed in future versions. "
349	"Use 'list(elem)' or iteration over elem instead.",
350	DeprecationWarning, stacklevel=2
351	)
352	return self._children
353
354	##
355	# Finds the first matching subelement, by tag name or path.
356	#
357	# @param path What element to look for.
358	# @keyparam namespaces Optional namespace prefix map.
359	# @return The first matching element, or None if no element was found.
360	# @defreturn Element or None
361
362	def find(self, path, namespaces=None):
363	return ElementPath.find(self, path, namespaces)
364
365	##
366	# Finds text for the first matching subelement, by tag name or path.
367	#
368	# @param path What element to look for.
369	# @param default What to return if the element was not found.
370	# @keyparam namespaces Optional namespace prefix map.
371	# @return The text content of the first matching element, or the
372	# default value no element was found. Note that if the element
373	# is found, but has no text content, this method returns an
374	# empty string.
375	# @defreturn string
376
377	def findtext(self, path, default=None, namespaces=None):
378	return ElementPath.findtext(self, path, default, namespaces)
379
380	##
381	# Finds all matching subelements, by tag name or path.
382	#
383	# @param path What element to look for.
384	# @keyparam namespaces Optional namespace prefix map.
385	# @return A list or other sequence containing all matching elements,
386	# in document order.
387	# @defreturn list of Element instances
388
389	def findall(self, path, namespaces=None):
390	return ElementPath.findall(self, path, namespaces)
391
392	##
393	# Finds all matching subelements, by tag name or path.
394	#
395	# @param path What element to look for.
396	# @keyparam namespaces Optional namespace prefix map.
397	# @return An iterator or sequence containing all matching elements,
398	# in document order.
399	# @defreturn a generated sequence of Element instances
400
401	def iterfind(self, path, namespaces=None):
402	return ElementPath.iterfind(self, path, namespaces)
403
404	##
405	# Resets an element. This function removes all subelements, clears
406	# all attributes, and sets the <b>text</b> and <b>tail</b> attributes
407	# to None.
408
409	def clear(self):
410	self.attrib.clear()
411	self._children = []
412	self.text = self.tail = None
413
414	##
415	# Gets an element attribute. Equivalent to <b>attrib.get</b>, but
416	# some implementations may handle this a bit more efficiently.
417	#
418	# @param key What attribute to look for.
419	# @param default What to return if the attribute was not found.
420	# @return The attribute value, or the default value, if the
421	# attribute was not found.
422	# @defreturn string or None
423
424	def get(self, key, default=None):
425	return self.attrib.get(key, default)
426
427	##
428	# Sets an element attribute. Equivalent to <b>attrib[key] = value</b>,
429	# but some implementations may handle this a bit more efficiently.
430	#
431	# @param key What attribute to set.
432	# @param value The attribute value.
433
434	def set(self, key, value):
435	self.attrib[key] = value
436
437	##
438	# Gets a list of attribute names. The names are returned in an
439	# arbitrary order (just like for an ordinary Python dictionary).
440	# Equivalent to <b>attrib.keys()</b>.
441	#
442	# @return A list of element attribute names.
443	# @defreturn list of strings
444
445	def keys(self):
446	return self.attrib.keys()
447
448	##
449	# Gets element attributes, as a sequence. The attributes are
450	# returned in an arbitrary order. Equivalent to <b>attrib.items()</b>.
451	#
452	# @return A list of (name, value) tuples for all attributes.
453	# @defreturn list of (string, string) tuples
454
455	def items(self):
456	return self.attrib.items()
457
458	##
459	# Creates a tree iterator. The iterator loops over this element
460	# and all subelements, in document order, and returns all elements
461	# with a matching tag.
462	# <p>
463	# If the tree structure is modified during iteration, new or removed
464	# elements may or may not be included. To get a stable set, use the
465	# list() function on the iterator, and loop over the resulting list.
466	#
467	# @param tag What tags to look for (default is to return all elements).
468	# @return An iterator containing all the matching elements.
469	# @defreturn iterator
470
471	def iter(self, tag=None):
472	if tag == "*":
473	tag = None
474	if tag is None or self.tag == tag:
475	yield self
476	for e in self._children:
477	for e in e.iter(tag):
478	yield e
479
480	# compatibility
481	def getiterator(self, tag=None):
482	# Change for a DeprecationWarning in 1.4
483	warnings.warn(
484	"This method will be removed in future versions. "
485	"Use 'elem.iter()' or 'list(elem.iter())' instead.",
486	PendingDeprecationWarning, stacklevel=2
487	)
488	return list(self.iter(tag))
489
490	##
491	# Creates a text iterator. The iterator loops over this element
492	# and all subelements, in document order, and returns all inner
493	# text.
494	#
495	# @return An iterator containing all inner text.
496	# @defreturn iterator
497
498	def itertext(self):
499	tag = self.tag
500	if not isinstance(tag, basestring) and tag is not None:
501	return
502	if self.text:
503	yield self.text
504	for e in self:
505	for s in e.itertext():
506	yield s
507	if e.tail:
508	yield e.tail
509
510	# compatibility
511	_Element = _ElementInterface = Element
512
513	##
514	# Subelement factory. This function creates an element instance, and
515	# appends it to an existing element.
516	# <p>
517	# The element name, attribute names, and attribute values can be
518	# either 8-bit ASCII strings or Unicode strings.
519	#
520	# @param parent The parent element.
521	# @param tag The subelement name.
522	# @param attrib An optional dictionary, containing element attributes.
523	# @param **extra Additional attributes, given as keyword arguments.
524	# @return An element instance.
525	# @defreturn Element
526
527	def SubElement(parent, tag, attrib={}, **extra):
528	attrib = attrib.copy()
529	attrib.update(extra)
530	element = parent.makeelement(tag, attrib)
531	parent.append(element)
532	return element
533
534	##
535	# Comment element factory. This factory function creates a special
536	# element that will be serialized as an XML comment by the standard
537	# serializer.
538	# <p>
539	# The comment string can be either an 8-bit ASCII string or a Unicode
540	# string.
541	#
542	# @param text A string containing the comment string.
543	# @return An element instance, representing a comment.
544	# @defreturn Element
545
546	def Comment(text=None):
547	element = Element(Comment)
548	element.text = text
549	return element
550
551	##
552	# PI element factory. This factory function creates a special element
553	# that will be serialized as an XML processing instruction by the standard
554	# serializer.
555	#
556	# @param target A string containing the PI target.
557	# @param text A string containing the PI contents, if any.
558	# @return An element instance, representing a PI.
559	# @defreturn Element
560
561	def ProcessingInstruction(target, text=None):
562	element = Element(ProcessingInstruction)
563	element.text = target
564	if text:
565	element.text = element.text + " " + text
566	return element
567
568	PI = ProcessingInstruction
569
570	##
571	# QName wrapper. This can be used to wrap a QName attribute value, in
572	# order to get proper namespace handling on output.
573	#
574	# @param text A string containing the QName value, in the form {uri}local,
575	# or, if the tag argument is given, the URI part of a QName.
576	# @param tag Optional tag. If given, the first argument is interpreted as
577	# an URI, and this argument is interpreted as a local name.
578	# @return An opaque object, representing the QName.
579
580	class QName(object):
581	def __init__(self, text_or_uri, tag=None):
582	if tag:
583	text_or_uri = "{%s}%s" % (text_or_uri, tag)
584	self.text = text_or_uri
585	def __str__(self):
586	return self.text
587	def __hash__(self):
588	return hash(self.text)
589	def __cmp__(self, other):
590	if isinstance(other, QName):
591	return cmp(self.text, other.text)
592	return cmp(self.text, other)
593
594	# --------------------------------------------------------------------
595
596	##
597	# ElementTree wrapper class. This class represents an entire element
598	# hierarchy, and adds some extra support for serialization to and from
599	# standard XML.
600	#
601	# @param element Optional root element.
602	# @keyparam file Optional file handle or file name. If given, the
603	# tree is initialized with the contents of this XML file.
604
605	class ElementTree(object):
606
607	def __init__(self, element=None, file=None):
608	# assert element is None or iselement(element)
609	self._root = element # first node
610	if file:
611	self.parse(file)
612
613	##
614	# Gets the root element for this tree.
615	#
616	# @return An element instance.
617	# @defreturn Element
618
619	def getroot(self):
620	return self._root
621
622	##
623	# Replaces the root element for this tree. This discards the
624	# current contents of the tree, and replaces it with the given
625	# element. Use with care.
626	#
627	# @param element An element instance.
628
629	def _setroot(self, element):
630	# assert iselement(element)
631	self._root = element
632
633	##
634	# Loads an external XML document into this element tree.
635	#
636	# @param source A file name or file object. If a file object is
637	# given, it only has to implement a <b>read(n)</b> method.
638	# @keyparam parser An optional parser instance. If not given, the
639	# standard {@link XMLParser} parser is used.
640	# @return The document root element.
641	# @defreturn Element
642	# @exception ParseError If the parser fails to parse the document.
643
644	def parse(self, source, parser=None):
645	close_source = False
646	if not hasattr(source, "read"):
647	source = open(source, "rb")
648	close_source = True
649	try:
650	if not parser:
651	parser = XMLParser(target=TreeBuilder())
652	while 1:
653	data = source.read(65536)
654	if not data:
655	break
656	parser.feed(data)
657	self._root = parser.close()
658	return self._root
659	finally:
660	if close_source:
661	source.close()
662
663	##
664	# Creates a tree iterator for the root element. The iterator loops
665	# over all elements in this tree, in document order.
666	#
667	# @param tag What tags to look for (default is to return all elements)
668	# @return An iterator.
669	# @defreturn iterator
670
671	def iter(self, tag=None):
672	# assert self._root is not None
673	return self._root.iter(tag)
674
675	# compatibility
676	def getiterator(self, tag=None):
677	# Change for a DeprecationWarning in 1.4
678	warnings.warn(
679	"This method will be removed in future versions. "
680	"Use 'tree.iter()' or 'list(tree.iter())' instead.",
681	PendingDeprecationWarning, stacklevel=2
682	)
683	return list(self.iter(tag))
684
685	##
686	# Same as getroot().find(path), starting at the root of the
687	# tree.
688	#
689	# @param path What element to look for.
690	# @keyparam namespaces Optional namespace prefix map.
691	# @return The first matching element, or None if no element was found.
692	# @defreturn Element or None
693
694	def find(self, path, namespaces=None):
695	# assert self._root is not None
696	if path[:1] == "/":
697	path = "." + path
698	warnings.warn(
699	"This search is broken in 1.3 and earlier, and will be "
700	"fixed in a future version. If you rely on the current "
701	"behaviour, change it to %r" % path,
702	FutureWarning, stacklevel=2
703	)
704	return self._root.find(path, namespaces)
705
706	##
707	# Same as getroot().findtext(path), starting at the root of the tree.
708	#
709	# @param path What element to look for.
710	# @param default What to return if the element was not found.
711	# @keyparam namespaces Optional namespace prefix map.
712	# @return The text content of the first matching element, or the
713	# default value no element was found. Note that if the element
714	# is found, but has no text content, this method returns an
715	# empty string.
716	# @defreturn string
717
718	def findtext(self, path, default=None, namespaces=None):
719	# assert self._root is not None
720	if path[:1] == "/":
721	path = "." + path
722	warnings.warn(
723	"This search is broken in 1.3 and earlier, and will be "
724	"fixed in a future version. If you rely on the current "
725	"behaviour, change it to %r" % path,
726	FutureWarning, stacklevel=2
727	)
728	return self._root.findtext(path, default, namespaces)
729
730	##
731	# Same as getroot().findall(path), starting at the root of the tree.
732	#
733	# @param path What element to look for.
734	# @keyparam namespaces Optional namespace prefix map.
735	# @return A list or iterator containing all matching elements,
736	# in document order.
737	# @defreturn list of Element instances
738
739	def findall(self, path, namespaces=None):
740	# assert self._root is not None
741	if path[:1] == "/":
742	path = "." + path
743	warnings.warn(
744	"This search is broken in 1.3 and earlier, and will be "
745	"fixed in a future version. If you rely on the current "
746	"behaviour, change it to %r" % path,
747	FutureWarning, stacklevel=2
748	)
749	return self._root.findall(path, namespaces)
750
751	##
752	# Finds all matching subelements, by tag name or path.
753	# Same as getroot().iterfind(path).
754	#
755	# @param path What element to look for.
756	# @keyparam namespaces Optional namespace prefix map.
757	# @return An iterator or sequence containing all matching elements,
758	# in document order.
759	# @defreturn a generated sequence of Element instances
760
761	def iterfind(self, path, namespaces=None):
762	# assert self._root is not None
763	if path[:1] == "/":
764	path = "." + path
765	warnings.warn(
766	"This search is broken in 1.3 and earlier, and will be "
767	"fixed in a future version. If you rely on the current "
768	"behaviour, change it to %r" % path,
769	FutureWarning, stacklevel=2
770	)
771	return self._root.iterfind(path, namespaces)
772
773	##
774	# Writes the element tree to a file, as XML.
775	#
776	# @def write(file, **options)
777	# @param file A file name, or a file object opened for writing.
778	# @param **options Options, given as keyword arguments.
779	# @keyparam encoding Optional output encoding (default is US-ASCII).
780	# @keyparam xml_declaration Controls if an XML declaration should
781	# be added to the file. Use False for never, True for always,
782	# None for only if not US-ASCII or UTF-8. None is default.
783	# @keyparam default_namespace Sets the default XML namespace (for "xmlns").
784	# @keyparam method Optional output method ("xml", "html", "text" or
785	# "c14n"; default is "xml").
786
787	def write(self, file_or_filename,
788	# keyword arguments
789	encoding=None,
790	xml_declaration=None,
791	default_namespace=None,
792	method=None):
793	# assert self._root is not None
794	if not method:
795	method = "xml"
796	elif method not in _serialize:
797	# FIXME: raise an ImportError for c14n if ElementC14N is missing?
798	raise ValueError("unknown method %r" % method)
799	if hasattr(file_or_filename, "write"):
800	file = file_or_filename
801	else:
802	file = open(file_or_filename, "wb")
803	write = file.write
804	if not encoding:
805	if method == "c14n":
806	encoding = "utf-8"
807	else:
808	encoding = "us-ascii"
809	elif xml_declaration or (xml_declaration is None and
810	encoding not in ("utf-8", "us-ascii")):
811	if method == "xml":
812	write("<?xml version='1.0' encoding='%s'?>\n" % encoding)
813	if method == "text":
814	_serialize_text(write, self._root, encoding)
815	else:
816	qnames, namespaces = _namespaces(
817	self._root, encoding, default_namespace
818	)
819	serialize = _serialize[method]
820	serialize(write, self._root, encoding, qnames, namespaces)
821	if file_or_filename is not file:
822	file.close()
823
824	def write_c14n(self, file):
825	# lxml.etree compatibility. use output method instead
826	return self.write(file, method="c14n")
827
828	# --------------------------------------------------------------------
829	# serialization support
830
831	def _namespaces(elem, encoding, default_namespace=None):
832	# identify namespaces used in this tree
833
834	# maps qnames to encoded prefix:local names
835	qnames = {None: None}
836
837	# maps uri:s to prefixes
838	namespaces = {}
839	if default_namespace:
840	namespaces[default_namespace] = ""
841
842	def encode(text):
843	return text.encode(encoding)
844
845	def add_qname(qname):
846	# calculate serialized qname representation
847	try:
848	if qname[:1] == "{":
849	uri, tag = qname[1:].rsplit("}", 1)
850	prefix = namespaces.get(uri)
851	if prefix is None:
852	prefix = _namespace_map.get(uri)
853	if prefix is None:
854	prefix = "ns%d" % len(namespaces)
855	if prefix != "xml":
856	namespaces[uri] = prefix
857	if prefix:
858	qnames[qname] = encode("%s:%s" % (prefix, tag))
859	else:
860	qnames[qname] = encode(tag) # default element
861	else:
862	if default_namespace:
863	# FIXME: can this be handled in XML 1.0?
864	raise ValueError(
865	"cannot use non-qualified names with "
866	"default_namespace option"
867	)
868	qnames[qname] = encode(qname)
869	except TypeError:
870	_raise_serialization_error(qname)
871
872	# populate qname and namespaces table
873	try:
874	iterate = elem.iter
875	except AttributeError:
876	iterate = elem.getiterator # cET compatibility
877	for elem in iterate():
878	tag = elem.tag
879	if isinstance(tag, QName):
880	if tag.text not in qnames:
881	add_qname(tag.text)
882	elif isinstance(tag, basestring):
883	if tag not in qnames:
884	add_qname(tag)
885	elif tag is not None and tag is not Comment and tag is not PI:
886	_raise_serialization_error(tag)
887	for key, value in elem.items():
888	if isinstance(key, QName):
889	key = key.text
890	if key not in qnames:
891	add_qname(key)
892	if isinstance(value, QName) and value.text not in qnames:
893	add_qname(value.text)
894	text = elem.text
895	if isinstance(text, QName) and text.text not in qnames:
896	add_qname(text.text)
897	return qnames, namespaces
898
899	def _serialize_xml(write, elem, encoding, qnames, namespaces):
900	tag = elem.tag
901	text = elem.text
902	if tag is Comment:
903	write("<!--%s-->" % _encode(text, encoding))
904	elif tag is ProcessingInstruction:
905	write("<?%s?>" % _encode(text, encoding))
906	else:
907	tag = qnames[tag]
908	if tag is None:
909	if text:
910	write(_escape_cdata(text, encoding))
911	for e in elem:
912	_serialize_xml(write, e, encoding, qnames, None)
913	else:
914	write("<" + tag)
915	items = elem.items()
916	if items or namespaces:
917	if namespaces:
918	for v, k in sorted(namespaces.items(),
919	key=lambda x: x[1]): # sort on prefix
920	if k:
921	k = ":" + k
922	write(" xmlns%s=\"%s\"" % (
923	k.encode(encoding),
924	_escape_attrib(v, encoding)
925	))
926	for k, v in sorted(items): # lexical order
927	if isinstance(k, QName):
928	k = k.text
929	if isinstance(v, QName):
930	v = qnames[v.text]
931	else:
932	v = _escape_attrib(v, encoding)
933	write(" %s=\"%s\"" % (qnames[k], v))
934	if text or len(elem):
935	write(">")
936	if text:
937	write(_escape_cdata(text, encoding))
938	for e in elem:
939	_serialize_xml(write, e, encoding, qnames, None)
940	write("</" + tag + ">")
941	else:
942	write(" />")
943	if elem.tail:
944	write(_escape_cdata(elem.tail, encoding))
945
946	HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
947	"img", "input", "isindex", "link", "meta", "param")
948
949	try:
950	HTML_EMPTY = set(HTML_EMPTY)
951	except NameError:
952	pass
953
954	def _serialize_html(write, elem, encoding, qnames, namespaces):
955	tag = elem.tag
956	text = elem.text
957	if tag is Comment:
958	write("<!--%s-->" % _escape_cdata(text, encoding))
959	elif tag is ProcessingInstruction:
960	write("<?%s?>" % _escape_cdata(text, encoding))
961	else:
962	tag = qnames[tag]
963	if tag is None:
964	if text:
965	write(_escape_cdata(text, encoding))
966	for e in elem:
967	_serialize_html(write, e, encoding, qnames, None)
968	else:
969	write("<" + tag)
970	items = elem.items()
971	if items or namespaces:
972	if namespaces:
973	for v, k in sorted(namespaces.items(),
974	key=lambda x: x[1]): # sort on prefix
975	if k:
976	k = ":" + k
977	write(" xmlns%s=\"%s\"" % (
978	k.encode(encoding),
979	_escape_attrib(v, encoding)
980	))
981	for k, v in sorted(items): # lexical order
982	if isinstance(k, QName):
983	k = k.text
984	if isinstance(v, QName):
985	v = qnames[v.text]
986	else:
987	v = _escape_attrib_html(v, encoding)
988	# FIXME: handle boolean attributes
989	write(" %s=\"%s\"" % (qnames[k], v))
990	write(">")
991	ltag = tag.lower()
992	if text:
993	if ltag == "script" or ltag == "style":
994	write(_encode(text, encoding))
995	else:
996	write(_escape_cdata(text, encoding))
997	for e in elem:
998	_serialize_html(write, e, encoding, qnames, None)
999	if ltag not in HTML_EMPTY:
1000	write("</" + tag + ">")
1001	if elem.tail:
1002	write(_escape_cdata(elem.tail, encoding))
1003
1004	def _serialize_text(write, elem, encoding):
1005	for part in elem.itertext():
1006	write(part.encode(encoding))
1007	if elem.tail:
1008	write(elem.tail.encode(encoding))
1009
1010	_serialize = {
1011	"xml": _serialize_xml,
1012	"html": _serialize_html,
1013	"text": _serialize_text,
1014	# this optional method is imported at the end of the module
1015	# "c14n": _serialize_c14n,
1016	}
1017
1018	##
1019	# Registers a namespace prefix. The registry is global, and any
1020	# existing mapping for either the given prefix or the namespace URI
1021	# will be removed.
1022	#
1023	# @param prefix Namespace prefix.
1024	# @param uri Namespace uri. Tags and attributes in this namespace
1025	# will be serialized with the given prefix, if at all possible.
1026	# @exception ValueError If the prefix is reserved, or is otherwise
1027	# invalid.
1028
1029	def register_namespace(prefix, uri):
1030	if re.match("ns\d+$", prefix):
1031	raise ValueError("Prefix format reserved for internal use")
1032	for k, v in _namespace_map.items():
1033	if k == uri or v == prefix:
1034	del _namespace_map[k]
1035	_namespace_map[uri] = prefix
1036
1037	_namespace_map = {
1038	# "well-known" namespace prefixes
1039	"http://www.w3.org/XML/1998/namespace": "xml",
1040	"http://www.w3.org/1999/xhtml": "html",
1041	"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1042	"http://schemas.xmlsoap.org/wsdl/": "wsdl",
1043	# xml schema
1044	"http://www.w3.org/2001/XMLSchema": "xs",
1045	"http://www.w3.org/2001/XMLSchema-instance": "xsi",
1046	# dublin core
1047	"http://purl.org/dc/elements/1.1/": "dc",
1048	}
1049
1050	def _raise_serialization_error(text):
1051	raise TypeError(
1052	"cannot serialize %r (type %s)" % (text, type(text).__name__)
1053	)
1054
1055	def _encode(text, encoding):
1056	try:
1057	return text.encode(encoding, "xmlcharrefreplace")
1058	except (TypeError, AttributeError):
1059	_raise_serialization_error(text)
1060
1061	def _escape_cdata(text, encoding):
1062	# escape character data
1063	try:
1064	# it's worth avoiding do-nothing calls for strings that are
1065	# shorter than 500 character, or so. assume that's, by far,
1066	# the most common case in most applications.
1067	if "&" in text:
1068	text = text.replace("&", "&")
1069	if "<" in text:
1070	text = text.replace("<", "<")
1071	if ">" in text:
1072	text = text.replace(">", ">")
1073	return text.encode(encoding, "xmlcharrefreplace")
1074	except (TypeError, AttributeError):
1075	_raise_serialization_error(text)
1076
1077	def _escape_attrib(text, encoding):
1078	# escape attribute value
1079	try:
1080	if "&" in text:
1081	text = text.replace("&", "&")
1082	if "<" in text:
1083	text = text.replace("<", "<")
1084	if ">" in text:
1085	text = text.replace(">", ">")
1086	if "\"" in text:
1087	text = text.replace("\"", """)
1088	if "\n" in text:
1089	text = text.replace("\n", " ")
1090	return text.encode(encoding, "xmlcharrefreplace")
1091	except (TypeError, AttributeError):
1092	_raise_serialization_error(text)
1093
1094	def _escape_attrib_html(text, encoding):
1095	# escape attribute value
1096	try:
1097	if "&" in text:
1098	text = text.replace("&", "&")
1099	if ">" in text:
1100	text = text.replace(">", ">")
1101	if "\"" in text:
1102	text = text.replace("\"", """)
1103	return text.encode(encoding, "xmlcharrefreplace")
1104	except (TypeError, AttributeError):
1105	_raise_serialization_error(text)
1106
1107	# --------------------------------------------------------------------
1108
1109	##
1110	# Generates a string representation of an XML element, including all
1111	# subelements.
1112	#
1113	# @param element An Element instance.
1114	# @keyparam encoding Optional output encoding (default is US-ASCII).
1115	# @keyparam method Optional output method ("xml", "html", "text" or
1116	# "c14n"; default is "xml").
1117	# @return An encoded string containing the XML data.
1118	# @defreturn string
1119
1120	def tostring(element, encoding=None, method=None):
1121	class dummy:
1122	pass
1123	data = []
1124	file = dummy()
1125	file.write = data.append
1126	ElementTree(element).write(file, encoding, method=method)
1127	return "".join(data)
1128
1129	##
1130	# Generates a string representation of an XML element, including all
1131	# subelements. The string is returned as a sequence of string fragments.
1132	#
1133	# @param element An Element instance.
1134	# @keyparam encoding Optional output encoding (default is US-ASCII).
1135	# @keyparam method Optional output method ("xml", "html", "text" or
1136	# "c14n"; default is "xml").
1137	# @return A sequence object containing the XML data.
1138	# @defreturn sequence
1139	# @since 1.3
1140
1141	def tostringlist(element, encoding=None, method=None):
1142	class dummy:
1143	pass
1144	data = []
1145	file = dummy()
1146	file.write = data.append
1147	ElementTree(element).write(file, encoding, method=method)
1148	# FIXME: merge small fragments into larger parts
1149	return data
1150
1151	##
1152	# Writes an element tree or element structure to sys.stdout. This
1153	# function should be used for debugging only.
1154	# <p>
1155	# The exact output format is implementation dependent. In this
1156	# version, it's written as an ordinary XML file.
1157	#
1158	# @param elem An element tree or an individual element.
1159
1160	def dump(elem):
1161	# debugging
1162	if not isinstance(elem, ElementTree):
1163	elem = ElementTree(elem)
1164	elem.write(sys.stdout)
1165	tail = elem.getroot().tail
1166	if not tail or tail[-1] != "\n":
1167	sys.stdout.write("\n")
1168
1169	# --------------------------------------------------------------------
1170	# parsing
1171
1172	##
1173	# Parses an XML document into an element tree.
1174	#
1175	# @param source A filename or file object containing XML data.
1176	# @param parser An optional parser instance. If not given, the
1177	# standard {@link XMLParser} parser is used.
1178	# @return An ElementTree instance
1179
1180	def parse(source, parser=None):
1181	tree = ElementTree()
1182	tree.parse(source, parser)
1183	return tree
1184
1185	##
1186	# Parses an XML document into an element tree incrementally, and reports
1187	# what's going on to the user.
1188	#
1189	# @param source A filename or file object containing XML data.
1190	# @param events A list of events to report back. If omitted, only "end"
1191	# events are reported.
1192	# @param parser An optional parser instance. If not given, the
1193	# standard {@link XMLParser} parser is used.
1194	# @return A (event, elem) iterator.
1195
1196	def iterparse(source, events=None, parser=None):
1197	close_source = False
1198	if not hasattr(source, "read"):
1199	source = open(source, "rb")
1200	close_source = True
1201	if not parser:
1202	parser = XMLParser(target=TreeBuilder())
1203	return _IterParseIterator(source, events, parser, close_source)
1204
1205	class _IterParseIterator(object):
1206
1207	def __init__(self, source, events, parser, close_source=False):
1208	self._file = source
1209	self._close_file = close_source
1210	self._events = []
1211	self._index = 0
1212	self._error = None
1213	self.root = self._root = None
1214	self._parser = parser
1215	# wire up the parser for event reporting
1216	parser = self._parser._parser
1217	append = self._events.append
1218	if events is None:
1219	events = ["end"]
1220	for event in events:
1221	if event == "start":
1222	try:
1223	parser.ordered_attributes = 1
1224	parser.specified_attributes = 1
1225	def handler(tag, attrib_in, event=event, append=append,
1226	start=self._parser._start_list):
1227	append((event, start(tag, attrib_in)))
1228	parser.StartElementHandler = handler
1229	except AttributeError:
1230	def handler(tag, attrib_in, event=event, append=append,
1231	start=self._parser._start):
1232	append((event, start(tag, attrib_in)))
1233	parser.StartElementHandler = handler
1234	elif event == "end":
1235	def handler(tag, event=event, append=append,
1236	end=self._parser._end):
1237	append((event, end(tag)))
1238	parser.EndElementHandler = handler
1239	elif event == "start-ns":
1240	def handler(prefix, uri, event=event, append=append):
1241	try:
1242	uri = (uri or "").encode("ascii")
1243	except UnicodeError:
1244	pass
1245	append((event, (prefix or "", uri or "")))
1246	parser.StartNamespaceDeclHandler = handler
1247	elif event == "end-ns":
1248	def handler(prefix, event=event, append=append):
1249	append((event, None))
1250	parser.EndNamespaceDeclHandler = handler
1251	else:
1252	raise ValueError("unknown event %r" % event)
1253
1254	def next(self):
1255	while 1:
1256	try:
1257	item = self._events[self._index]
1258	self._index += 1
1259	return item
1260	except IndexError:
1261	pass
1262	if self._error:
1263	e = self._error
1264	self._error = None
1265	raise e
1266	if self._parser is None:
1267	self.root = self._root
1268	if self._close_file:
1269	self._file.close()
1270	raise StopIteration
1271	# load event buffer
1272	del self._events[:]
1273	self._index = 0
1274	data = self._file.read(16384)
1275	if data:
1276	try:
1277	self._parser.feed(data)
1278	except SyntaxError as exc:
1279	self._error = exc
1280	else:
1281	self._root = self._parser.close()
1282	self._parser = None
1283
1284	def __iter__(self):
1285	return self
1286
1287	##
1288	# Parses an XML document from a string constant. This function can
1289	# be used to embed "XML literals" in Python code.
1290	#
1291	# @param source A string containing XML data.
1292	# @param parser An optional parser instance. If not given, the
1293	# standard {@link XMLParser} parser is used.
1294	# @return An Element instance.
1295	# @defreturn Element
1296
1297	def XML(text, parser=None):
1298	if not parser:
1299	parser = XMLParser(target=TreeBuilder())
1300	parser.feed(text)
1301	return parser.close()
1302
1303	##
1304	# Parses an XML document from a string constant, and also returns
1305	# a dictionary which maps from element id:s to elements.
1306	#
1307	# @param source A string containing XML data.
1308	# @param parser An optional parser instance. If not given, the
1309	# standard {@link XMLParser} parser is used.
1310	# @return A tuple containing an Element instance and a dictionary.
1311	# @defreturn (Element, dictionary)
1312
1313	def XMLID(text, parser=None):
1314	if not parser:
1315	parser = XMLParser(target=TreeBuilder())
1316	parser.feed(text)
1317	tree = parser.close()
1318	ids = {}
1319	for elem in tree.iter():
1320	id = elem.get("id")
1321	if id:
1322	ids[id] = elem
1323	return tree, ids
1324
1325	##
1326	# Parses an XML document from a string constant. Same as {@link #XML}.
1327	#
1328	# @def fromstring(text)
1329	# @param source A string containing XML data.
1330	# @return An Element instance.
1331	# @defreturn Element
1332
1333	fromstring = XML
1334
1335	##
1336	# Parses an XML document from a sequence of string fragments.
1337	#
1338	# @param sequence A list or other sequence containing XML data fragments.
1339	# @param parser An optional parser instance. If not given, the
1340	# standard {@link XMLParser} parser is used.
1341	# @return An Element instance.
1342	# @defreturn Element
1343	# @since 1.3
1344
1345	def fromstringlist(sequence, parser=None):
1346	if not parser:
1347	parser = XMLParser(target=TreeBuilder())
1348	for text in sequence:
1349	parser.feed(text)
1350	return parser.close()
1351
1352	# --------------------------------------------------------------------
1353
1354	##
1355	# Generic element structure builder. This builder converts a sequence
1356	# of {@link #TreeBuilder.start}, {@link #TreeBuilder.data}, and {@link
1357	# #TreeBuilder.end} method calls to a well-formed element structure.
1358	# <p>
1359	# You can use this class to build an element structure using a custom XML
1360	# parser, or a parser for some other XML-like format.
1361	#
1362	# @param element_factory Optional element factory. This factory
1363	# is called to create new Element instances, as necessary.
1364
1365	class TreeBuilder(object):
1366
1367	def __init__(self, element_factory=None):
1368	self._data = [] # data collector
1369	self._elem = [] # element stack
1370	self._last = None # last element
1371	self._tail = None # true if we're after an end tag
1372	if element_factory is None:
1373	element_factory = Element
1374	self._factory = element_factory
1375
1376	##
1377	# Flushes the builder buffers, and returns the toplevel document
1378	# element.
1379	#
1380	# @return An Element instance.
1381	# @defreturn Element
1382
1383	def close(self):
1384	assert len(self._elem) == 0, "missing end tags"
1385	assert self._last is not None, "missing toplevel element"
1386	return self._last
1387
1388	def _flush(self):
1389	if self._data:
1390	if self._last is not None:
1391	text = "".join(self._data)
1392	if self._tail:
1393	assert self._last.tail is None, "internal error (tail)"
1394	self._last.tail = text
1395	else:
1396	assert self._last.text is None, "internal error (text)"
1397	self._last.text = text
1398	self._data = []
1399
1400	##
1401	# Adds text to the current element.
1402	#
1403	# @param data A string. This should be either an 8-bit string
1404	# containing ASCII text, or a Unicode string.
1405
1406	def data(self, data):
1407	self._data.append(data)
1408
1409	##
1410	# Opens a new element.
1411	#
1412	# @param tag The element name.
1413	# @param attrib A dictionary containing element attributes.
1414	# @return The opened element.
1415	# @defreturn Element
1416
1417	def start(self, tag, attrs):
1418	self._flush()
1419	self._last = elem = self._factory(tag, attrs)
1420	if self._elem:
1421	self._elem[-1].append(elem)
1422	self._elem.append(elem)
1423	self._tail = 0
1424	return elem
1425
1426	##
1427	# Closes the current element.
1428	#
1429	# @param tag The element name.
1430	# @return The closed element.
1431	# @defreturn Element
1432
1433	def end(self, tag):
1434	self._flush()
1435	self._last = self._elem.pop()
1436	assert self._last.tag == tag,\
1437	"end tag mismatch (expected %s, got %s)" % (
1438	self._last.tag, tag)
1439	self._tail = 1
1440	return self._last
1441
1442	##
1443	# Element structure builder for XML source data, based on the
1444	# <b>expat</b> parser.
1445	#
1446	# @keyparam target Target object. If omitted, the builder uses an
1447	# instance of the standard {@link #TreeBuilder} class.
1448	# @keyparam html Predefine HTML entities. This flag is not supported
1449	# by the current implementation.
1450	# @keyparam encoding Optional encoding. If given, the value overrides
1451	# the encoding specified in the XML file.
1452	# @see #ElementTree
1453	# @see #TreeBuilder
1454
1455	class XMLParser(object):
1456
1457	def __init__(self, html=0, target=None, encoding=None):
1458	try:
1459	from xml.parsers import expat
1460	except ImportError:
1461	try:
1462	import pyexpat as expat
1463	except ImportError:
1464	raise ImportError(
1465	"No module named expat; use SimpleXMLTreeBuilder instead"
1466	)
1467	parser = expat.ParserCreate(encoding, "}")
1468	if target is None:
1469	target = TreeBuilder()
1470	# underscored names are provided for compatibility only
1471	self.parser = self._parser = parser
1472	self.target = self._target = target
1473	self._error = expat.error
1474	self._names = {} # name memo cache
1475	# callbacks
1476	parser.DefaultHandlerExpand = self._default
1477	parser.StartElementHandler = self._start
1478	parser.EndElementHandler = self._end
1479	parser.CharacterDataHandler = self._data
1480	# optional callbacks
1481	parser.CommentHandler = self._comment
1482	parser.ProcessingInstructionHandler = self._pi
1483	# let expat do the buffering, if supported
1484	try:
1485	self._parser.buffer_text = 1
1486	except AttributeError:
1487	pass
1488	# use new-style attribute handling, if supported
1489	try:
1490	self._parser.ordered_attributes = 1
1491	self._parser.specified_attributes = 1
1492	parser.StartElementHandler = self._start_list
1493	except AttributeError:
1494	pass
1495	self._doctype = None
1496	self.entity = {}
1497	try:
1498	self.version = "Expat %d.%d.%d" % expat.version_info
1499	except AttributeError:
1500	pass # unknown
1501
1502	def _raiseerror(self, value):
1503	err = ParseError(value)
1504	err.code = value.code
1505	err.position = value.lineno, value.offset
1506	raise err
1507
1508	def _fixtext(self, text):
1509	# convert text string to ascii, if possible
1510	try:
1511	return text.encode("ascii")
1512	except UnicodeError:
1513	return text
1514
1515	def _fixname(self, key):
1516	# expand qname, and convert name string to ascii, if possible
1517	try:
1518	name = self._names[key]
1519	except KeyError:
1520	name = key
1521	if "}" in name:
1522	name = "{" + name
1523	self._names[key] = name = self._fixtext(name)
1524	return name
1525
1526	def _start(self, tag, attrib_in):
1527	fixname = self._fixname
1528	fixtext = self._fixtext
1529	tag = fixname(tag)
1530	attrib = {}
1531	for key, value in attrib_in.items():
1532	attrib[fixname(key)] = fixtext(value)
1533	return self.target.start(tag, attrib)
1534
1535	def _start_list(self, tag, attrib_in):
1536	fixname = self._fixname
1537	fixtext = self._fixtext
1538	tag = fixname(tag)
1539	attrib = {}
1540	if attrib_in:
1541	for i in range(0, len(attrib_in), 2):
1542	attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1])
1543	return self.target.start(tag, attrib)
1544
1545	def _data(self, text):
1546	return self.target.data(self._fixtext(text))
1547
1548	def _end(self, tag):
1549	return self.target.end(self._fixname(tag))
1550
1551	def _comment(self, data):
1552	try:
1553	comment = self.target.comment
1554	except AttributeError:
1555	pass
1556	else:
1557	return comment(self._fixtext(data))
1558
1559	def _pi(self, target, data):
1560	try:
1561	pi = self.target.pi
1562	except AttributeError:
1563	pass
1564	else:
1565	return pi(self._fixtext(target), self._fixtext(data))
1566
1567	def _default(self, text):
1568	prefix = text[:1]
1569	if prefix == "&":
1570	# deal with undefined entities
1571	try:
1572	self.target.data(self.entity[text[1:-1]])
1573	except KeyError:
1574	from xml.parsers import expat
1575	err = expat.error(
1576	"undefined entity %s: line %d, column %d" %
1577	(text, self._parser.ErrorLineNumber,
1578	self._parser.ErrorColumnNumber)
1579	)
1580	err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1581	err.lineno = self._parser.ErrorLineNumber
1582	err.offset = self._parser.ErrorColumnNumber
1583	raise err
1584	elif prefix == "<" and text[:9] == "<!DOCTYPE":
1585	self._doctype = [] # inside a doctype declaration
1586	elif self._doctype is not None:
1587	# parse doctype contents
1588	if prefix == ">":
1589	self._doctype = None
1590	return
1591	text = text.strip()
1592	if not text:
1593	return
1594	self._doctype.append(text)
1595	n = len(self._doctype)
1596	if n > 2:
1597	type = self._doctype[1]
1598	if type == "PUBLIC" and n == 4:
1599	name, type, pubid, system = self._doctype
1600	elif type == "SYSTEM" and n == 3:
1601	name, type, system = self._doctype
1602	pubid = None
1603	else:
1604	return
1605	if pubid:
1606	pubid = pubid[1:-1]
1607	if hasattr(self.target, "doctype"):
1608	self.target.doctype(name, pubid, system[1:-1])
1609	elif self.doctype is not self._XMLParser__doctype:
1610	# warn about deprecated call
1611	self._XMLParser__doctype(name, pubid, system[1:-1])
1612	self.doctype(name, pubid, system[1:-1])
1613	self._doctype = None
1614
1615	##
1616	# (Deprecated) Handles a doctype declaration.
1617	#
1618	# @param name Doctype name.
1619	# @param pubid Public identifier.
1620	# @param system System identifier.
1621
1622	def doctype(self, name, pubid, system):
1623	"""This method of XMLParser is deprecated."""
1624	warnings.warn(
1625	"This method of XMLParser is deprecated. Define doctype() "
1626	"method on the TreeBuilder target.",
1627	DeprecationWarning,
1628	)
1629
1630	# sentinel, if doctype is redefined in a subclass
1631	__doctype = doctype
1632
1633	##
1634	# Feeds data to the parser.
1635	#
1636	# @param data Encoded data.
1637
1638	def feed(self, data):
1639	try:
1640	self._parser.Parse(data, 0)
1641	except self._error, v:
1642	self._raiseerror(v)
1643
1644	##
1645	# Finishes feeding data to the parser.
1646	#
1647	# @return An element structure.
1648	# @defreturn Element
1649
1650	def close(self):
1651	try:
1652	self._parser.Parse("", 1) # end of data
1653	except self._error, v:
1654	self._raiseerror(v)
1655	tree = self.target.close()
1656	del self.target, self._parser # get rid of circular references
1657	return tree
1658
1659	# compatibility
1660	XMLTreeBuilder = XMLParser
1661
1662	# workaround circular import.
1663	try:
1664	from ElementC14N import _serialize_c14n
1665	_serialize["c14n"] = _serialize_c14n
1666	except ImportError:
1667	pass

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/xml/etree/ElementTree.py

Download in other formats: