1 | # xml.etree test. This file contains enough tests to make sure that
|
---|
2 | # all included components work as they should.
|
---|
3 | # Large parts are extracted from the upstream test suite.
|
---|
4 |
|
---|
5 | # IMPORTANT: the same doctests are run from "test_xml_etree_c" in
|
---|
6 | # order to ensure consistency between the C implementation and the
|
---|
7 | # Python implementation.
|
---|
8 | #
|
---|
9 | # For this purpose, the module-level "ET" symbol is temporarily
|
---|
10 | # monkey-patched when running the "test_xml_etree_c" test suite.
|
---|
11 | # Don't re-import "xml.etree.ElementTree" module in the docstring,
|
---|
12 | # except if the test is specific to the Python implementation.
|
---|
13 |
|
---|
14 | import sys
|
---|
15 | import cgi
|
---|
16 |
|
---|
17 | from test import test_support
|
---|
18 | from test.test_support import findfile
|
---|
19 |
|
---|
20 | from xml.etree import ElementTree as ET
|
---|
21 |
|
---|
22 | SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
|
---|
23 | SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
|
---|
24 |
|
---|
25 | SAMPLE_XML = """\
|
---|
26 | <body>
|
---|
27 | <tag class='a'>text</tag>
|
---|
28 | <tag class='b' />
|
---|
29 | <section>
|
---|
30 | <tag class='b' id='inner'>subtext</tag>
|
---|
31 | </section>
|
---|
32 | </body>
|
---|
33 | """
|
---|
34 |
|
---|
35 | SAMPLE_SECTION = """\
|
---|
36 | <section>
|
---|
37 | <tag class='b' id='inner'>subtext</tag>
|
---|
38 | <nexttag />
|
---|
39 | <nextsection>
|
---|
40 | <tag />
|
---|
41 | </nextsection>
|
---|
42 | </section>
|
---|
43 | """
|
---|
44 |
|
---|
45 | SAMPLE_XML_NS = """
|
---|
46 | <body xmlns="http://effbot.org/ns">
|
---|
47 | <tag>text</tag>
|
---|
48 | <tag />
|
---|
49 | <section>
|
---|
50 | <tag>subtext</tag>
|
---|
51 | </section>
|
---|
52 | </body>
|
---|
53 | """
|
---|
54 |
|
---|
55 |
|
---|
56 | def sanity():
|
---|
57 | """
|
---|
58 | Import sanity.
|
---|
59 |
|
---|
60 | >>> from xml.etree import ElementTree
|
---|
61 | >>> from xml.etree import ElementInclude
|
---|
62 | >>> from xml.etree import ElementPath
|
---|
63 | """
|
---|
64 |
|
---|
65 | def check_method(method):
|
---|
66 | if not hasattr(method, '__call__'):
|
---|
67 | print method, "not callable"
|
---|
68 |
|
---|
69 | def serialize(elem, to_string=True, **options):
|
---|
70 | import StringIO
|
---|
71 | file = StringIO.StringIO()
|
---|
72 | tree = ET.ElementTree(elem)
|
---|
73 | tree.write(file, **options)
|
---|
74 | if to_string:
|
---|
75 | return file.getvalue()
|
---|
76 | else:
|
---|
77 | file.seek(0)
|
---|
78 | return file
|
---|
79 |
|
---|
80 | def summarize(elem):
|
---|
81 | if elem.tag == ET.Comment:
|
---|
82 | return "<Comment>"
|
---|
83 | return elem.tag
|
---|
84 |
|
---|
85 | def summarize_list(seq):
|
---|
86 | return [summarize(elem) for elem in seq]
|
---|
87 |
|
---|
88 | def normalize_crlf(tree):
|
---|
89 | for elem in tree.iter():
|
---|
90 | if elem.text:
|
---|
91 | elem.text = elem.text.replace("\r\n", "\n")
|
---|
92 | if elem.tail:
|
---|
93 | elem.tail = elem.tail.replace("\r\n", "\n")
|
---|
94 |
|
---|
95 | def check_string(string):
|
---|
96 | len(string)
|
---|
97 | for char in string:
|
---|
98 | if len(char) != 1:
|
---|
99 | print "expected one-character string, got %r" % char
|
---|
100 | new_string = string + ""
|
---|
101 | new_string = string + " "
|
---|
102 | string[:0]
|
---|
103 |
|
---|
104 | def check_mapping(mapping):
|
---|
105 | len(mapping)
|
---|
106 | keys = mapping.keys()
|
---|
107 | items = mapping.items()
|
---|
108 | for key in keys:
|
---|
109 | item = mapping[key]
|
---|
110 | mapping["key"] = "value"
|
---|
111 | if mapping["key"] != "value":
|
---|
112 | print "expected value string, got %r" % mapping["key"]
|
---|
113 |
|
---|
114 | def check_element(element):
|
---|
115 | if not ET.iselement(element):
|
---|
116 | print "not an element"
|
---|
117 | if not hasattr(element, "tag"):
|
---|
118 | print "no tag member"
|
---|
119 | if not hasattr(element, "attrib"):
|
---|
120 | print "no attrib member"
|
---|
121 | if not hasattr(element, "text"):
|
---|
122 | print "no text member"
|
---|
123 | if not hasattr(element, "tail"):
|
---|
124 | print "no tail member"
|
---|
125 |
|
---|
126 | check_string(element.tag)
|
---|
127 | check_mapping(element.attrib)
|
---|
128 | if element.text is not None:
|
---|
129 | check_string(element.text)
|
---|
130 | if element.tail is not None:
|
---|
131 | check_string(element.tail)
|
---|
132 | for elem in element:
|
---|
133 | check_element(elem)
|
---|
134 |
|
---|
135 | # --------------------------------------------------------------------
|
---|
136 | # element tree tests
|
---|
137 |
|
---|
138 | def interface():
|
---|
139 | r"""
|
---|
140 | Test element tree interface.
|
---|
141 |
|
---|
142 | >>> element = ET.Element("tag")
|
---|
143 | >>> check_element(element)
|
---|
144 | >>> tree = ET.ElementTree(element)
|
---|
145 | >>> check_element(tree.getroot())
|
---|
146 |
|
---|
147 | >>> element = ET.Element("t\xe4g", key="value")
|
---|
148 | >>> tree = ET.ElementTree(element)
|
---|
149 | >>> repr(element) # doctest: +ELLIPSIS
|
---|
150 | "<Element 't\\xe4g' at 0x...>"
|
---|
151 | >>> element = ET.Element("tag", key="value")
|
---|
152 |
|
---|
153 | Make sure all standard element methods exist.
|
---|
154 |
|
---|
155 | >>> check_method(element.append)
|
---|
156 | >>> check_method(element.extend)
|
---|
157 | >>> check_method(element.insert)
|
---|
158 | >>> check_method(element.remove)
|
---|
159 | >>> check_method(element.getchildren)
|
---|
160 | >>> check_method(element.find)
|
---|
161 | >>> check_method(element.iterfind)
|
---|
162 | >>> check_method(element.findall)
|
---|
163 | >>> check_method(element.findtext)
|
---|
164 | >>> check_method(element.clear)
|
---|
165 | >>> check_method(element.get)
|
---|
166 | >>> check_method(element.set)
|
---|
167 | >>> check_method(element.keys)
|
---|
168 | >>> check_method(element.items)
|
---|
169 | >>> check_method(element.iter)
|
---|
170 | >>> check_method(element.itertext)
|
---|
171 | >>> check_method(element.getiterator)
|
---|
172 |
|
---|
173 | These methods return an iterable. See bug 6472.
|
---|
174 |
|
---|
175 | >>> check_method(element.iter("tag").next)
|
---|
176 | >>> check_method(element.iterfind("tag").next)
|
---|
177 | >>> check_method(element.iterfind("*").next)
|
---|
178 | >>> check_method(tree.iter("tag").next)
|
---|
179 | >>> check_method(tree.iterfind("tag").next)
|
---|
180 | >>> check_method(tree.iterfind("*").next)
|
---|
181 |
|
---|
182 | These aliases are provided:
|
---|
183 |
|
---|
184 | >>> assert ET.XML == ET.fromstring
|
---|
185 | >>> assert ET.PI == ET.ProcessingInstruction
|
---|
186 | >>> assert ET.XMLParser == ET.XMLTreeBuilder
|
---|
187 | """
|
---|
188 |
|
---|
189 | def simpleops():
|
---|
190 | """
|
---|
191 | Basic method sanity checks.
|
---|
192 |
|
---|
193 | >>> elem = ET.XML("<body><tag/></body>")
|
---|
194 | >>> serialize(elem)
|
---|
195 | '<body><tag /></body>'
|
---|
196 | >>> e = ET.Element("tag2")
|
---|
197 | >>> elem.append(e)
|
---|
198 | >>> serialize(elem)
|
---|
199 | '<body><tag /><tag2 /></body>'
|
---|
200 | >>> elem.remove(e)
|
---|
201 | >>> serialize(elem)
|
---|
202 | '<body><tag /></body>'
|
---|
203 | >>> elem.insert(0, e)
|
---|
204 | >>> serialize(elem)
|
---|
205 | '<body><tag2 /><tag /></body>'
|
---|
206 | >>> elem.remove(e)
|
---|
207 | >>> elem.extend([e])
|
---|
208 | >>> serialize(elem)
|
---|
209 | '<body><tag /><tag2 /></body>'
|
---|
210 | >>> elem.remove(e)
|
---|
211 |
|
---|
212 | >>> element = ET.Element("tag", key="value")
|
---|
213 | >>> serialize(element) # 1
|
---|
214 | '<tag key="value" />'
|
---|
215 | >>> subelement = ET.Element("subtag")
|
---|
216 | >>> element.append(subelement)
|
---|
217 | >>> serialize(element) # 2
|
---|
218 | '<tag key="value"><subtag /></tag>'
|
---|
219 | >>> element.insert(0, subelement)
|
---|
220 | >>> serialize(element) # 3
|
---|
221 | '<tag key="value"><subtag /><subtag /></tag>'
|
---|
222 | >>> element.remove(subelement)
|
---|
223 | >>> serialize(element) # 4
|
---|
224 | '<tag key="value"><subtag /></tag>'
|
---|
225 | >>> element.remove(subelement)
|
---|
226 | >>> serialize(element) # 5
|
---|
227 | '<tag key="value" />'
|
---|
228 | >>> element.remove(subelement)
|
---|
229 | Traceback (most recent call last):
|
---|
230 | ValueError: list.remove(x): x not in list
|
---|
231 | >>> serialize(element) # 6
|
---|
232 | '<tag key="value" />'
|
---|
233 | >>> element[0:0] = [subelement, subelement, subelement]
|
---|
234 | >>> serialize(element[1])
|
---|
235 | '<subtag />'
|
---|
236 | >>> element[1:9] == [element[1], element[2]]
|
---|
237 | True
|
---|
238 | >>> element[:9:2] == [element[0], element[2]]
|
---|
239 | True
|
---|
240 | >>> del element[1:2]
|
---|
241 | >>> serialize(element)
|
---|
242 | '<tag key="value"><subtag /><subtag /></tag>'
|
---|
243 | """
|
---|
244 |
|
---|
245 | def cdata():
|
---|
246 | """
|
---|
247 | Test CDATA handling (etc).
|
---|
248 |
|
---|
249 | >>> serialize(ET.XML("<tag>hello</tag>"))
|
---|
250 | '<tag>hello</tag>'
|
---|
251 | >>> serialize(ET.XML("<tag>hello</tag>"))
|
---|
252 | '<tag>hello</tag>'
|
---|
253 | >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>"))
|
---|
254 | '<tag>hello</tag>'
|
---|
255 | """
|
---|
256 |
|
---|
257 | # Only with Python implementation
|
---|
258 | def simplefind():
|
---|
259 | """
|
---|
260 | Test find methods using the elementpath fallback.
|
---|
261 |
|
---|
262 | >>> from xml.etree import ElementTree
|
---|
263 |
|
---|
264 | >>> CurrentElementPath = ElementTree.ElementPath
|
---|
265 | >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
|
---|
266 | >>> elem = ElementTree.XML(SAMPLE_XML)
|
---|
267 | >>> elem.find("tag").tag
|
---|
268 | 'tag'
|
---|
269 | >>> ElementTree.ElementTree(elem).find("tag").tag
|
---|
270 | 'tag'
|
---|
271 | >>> elem.findtext("tag")
|
---|
272 | 'text'
|
---|
273 | >>> elem.findtext("tog")
|
---|
274 | >>> elem.findtext("tog", "default")
|
---|
275 | 'default'
|
---|
276 | >>> ElementTree.ElementTree(elem).findtext("tag")
|
---|
277 | 'text'
|
---|
278 | >>> summarize_list(elem.findall("tag"))
|
---|
279 | ['tag', 'tag']
|
---|
280 | >>> summarize_list(elem.findall(".//tag"))
|
---|
281 | ['tag', 'tag', 'tag']
|
---|
282 |
|
---|
283 | Path syntax doesn't work in this case.
|
---|
284 |
|
---|
285 | >>> elem.find("section/tag")
|
---|
286 | >>> elem.findtext("section/tag")
|
---|
287 | >>> summarize_list(elem.findall("section/tag"))
|
---|
288 | []
|
---|
289 |
|
---|
290 | >>> ElementTree.ElementPath = CurrentElementPath
|
---|
291 | """
|
---|
292 |
|
---|
293 | def find():
|
---|
294 | """
|
---|
295 | Test find methods (including xpath syntax).
|
---|
296 |
|
---|
297 | >>> elem = ET.XML(SAMPLE_XML)
|
---|
298 | >>> elem.find("tag").tag
|
---|
299 | 'tag'
|
---|
300 | >>> ET.ElementTree(elem).find("tag").tag
|
---|
301 | 'tag'
|
---|
302 | >>> elem.find("section/tag").tag
|
---|
303 | 'tag'
|
---|
304 | >>> elem.find("./tag").tag
|
---|
305 | 'tag'
|
---|
306 | >>> ET.ElementTree(elem).find("./tag").tag
|
---|
307 | 'tag'
|
---|
308 | >>> ET.ElementTree(elem).find("/tag").tag
|
---|
309 | 'tag'
|
---|
310 | >>> elem[2] = ET.XML(SAMPLE_SECTION)
|
---|
311 | >>> elem.find("section/nexttag").tag
|
---|
312 | 'nexttag'
|
---|
313 | >>> ET.ElementTree(elem).find("section/tag").tag
|
---|
314 | 'tag'
|
---|
315 | >>> ET.ElementTree(elem).find("tog")
|
---|
316 | >>> ET.ElementTree(elem).find("tog/foo")
|
---|
317 | >>> elem.findtext("tag")
|
---|
318 | 'text'
|
---|
319 | >>> elem.findtext("section/nexttag")
|
---|
320 | ''
|
---|
321 | >>> elem.findtext("section/nexttag", "default")
|
---|
322 | ''
|
---|
323 | >>> elem.findtext("tog")
|
---|
324 | >>> elem.findtext("tog", "default")
|
---|
325 | 'default'
|
---|
326 | >>> ET.ElementTree(elem).findtext("tag")
|
---|
327 | 'text'
|
---|
328 | >>> ET.ElementTree(elem).findtext("tog/foo")
|
---|
329 | >>> ET.ElementTree(elem).findtext("tog/foo", "default")
|
---|
330 | 'default'
|
---|
331 | >>> ET.ElementTree(elem).findtext("./tag")
|
---|
332 | 'text'
|
---|
333 | >>> ET.ElementTree(elem).findtext("/tag")
|
---|
334 | 'text'
|
---|
335 | >>> elem.findtext("section/tag")
|
---|
336 | 'subtext'
|
---|
337 | >>> ET.ElementTree(elem).findtext("section/tag")
|
---|
338 | 'subtext'
|
---|
339 | >>> summarize_list(elem.findall("."))
|
---|
340 | ['body']
|
---|
341 | >>> summarize_list(elem.findall("tag"))
|
---|
342 | ['tag', 'tag']
|
---|
343 | >>> summarize_list(elem.findall("tog"))
|
---|
344 | []
|
---|
345 | >>> summarize_list(elem.findall("tog/foo"))
|
---|
346 | []
|
---|
347 | >>> summarize_list(elem.findall("*"))
|
---|
348 | ['tag', 'tag', 'section']
|
---|
349 | >>> summarize_list(elem.findall(".//tag"))
|
---|
350 | ['tag', 'tag', 'tag', 'tag']
|
---|
351 | >>> summarize_list(elem.findall("section/tag"))
|
---|
352 | ['tag']
|
---|
353 | >>> summarize_list(elem.findall("section//tag"))
|
---|
354 | ['tag', 'tag']
|
---|
355 | >>> summarize_list(elem.findall("section/*"))
|
---|
356 | ['tag', 'nexttag', 'nextsection']
|
---|
357 | >>> summarize_list(elem.findall("section//*"))
|
---|
358 | ['tag', 'nexttag', 'nextsection', 'tag']
|
---|
359 | >>> summarize_list(elem.findall("section/.//*"))
|
---|
360 | ['tag', 'nexttag', 'nextsection', 'tag']
|
---|
361 | >>> summarize_list(elem.findall("*/*"))
|
---|
362 | ['tag', 'nexttag', 'nextsection']
|
---|
363 | >>> summarize_list(elem.findall("*//*"))
|
---|
364 | ['tag', 'nexttag', 'nextsection', 'tag']
|
---|
365 | >>> summarize_list(elem.findall("*/tag"))
|
---|
366 | ['tag']
|
---|
367 | >>> summarize_list(elem.findall("*/./tag"))
|
---|
368 | ['tag']
|
---|
369 | >>> summarize_list(elem.findall("./tag"))
|
---|
370 | ['tag', 'tag']
|
---|
371 | >>> summarize_list(elem.findall(".//tag"))
|
---|
372 | ['tag', 'tag', 'tag', 'tag']
|
---|
373 | >>> summarize_list(elem.findall("././tag"))
|
---|
374 | ['tag', 'tag']
|
---|
375 | >>> summarize_list(elem.findall(".//tag[@class]"))
|
---|
376 | ['tag', 'tag', 'tag']
|
---|
377 | >>> summarize_list(elem.findall(".//tag[@class='a']"))
|
---|
378 | ['tag']
|
---|
379 | >>> summarize_list(elem.findall(".//tag[@class='b']"))
|
---|
380 | ['tag', 'tag']
|
---|
381 | >>> summarize_list(elem.findall(".//tag[@id]"))
|
---|
382 | ['tag']
|
---|
383 | >>> summarize_list(elem.findall(".//section[tag]"))
|
---|
384 | ['section']
|
---|
385 | >>> summarize_list(elem.findall(".//section[element]"))
|
---|
386 | []
|
---|
387 | >>> summarize_list(elem.findall("../tag"))
|
---|
388 | []
|
---|
389 | >>> summarize_list(elem.findall("section/../tag"))
|
---|
390 | ['tag', 'tag']
|
---|
391 | >>> summarize_list(ET.ElementTree(elem).findall("./tag"))
|
---|
392 | ['tag', 'tag']
|
---|
393 |
|
---|
394 | Following example is invalid in 1.2.
|
---|
395 | A leading '*' is assumed in 1.3.
|
---|
396 |
|
---|
397 | >>> elem.findall("section//") == elem.findall("section//*")
|
---|
398 | True
|
---|
399 |
|
---|
400 | ET's Path module handles this case incorrectly; this gives
|
---|
401 | a warning in 1.3, and the behaviour will be modified in 1.4.
|
---|
402 |
|
---|
403 | >>> summarize_list(ET.ElementTree(elem).findall("/tag"))
|
---|
404 | ['tag', 'tag']
|
---|
405 |
|
---|
406 | >>> elem = ET.XML(SAMPLE_XML_NS)
|
---|
407 | >>> summarize_list(elem.findall("tag"))
|
---|
408 | []
|
---|
409 | >>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
|
---|
410 | ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
|
---|
411 | >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
|
---|
412 | ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
|
---|
413 | """
|
---|
414 |
|
---|
415 | def file_init():
|
---|
416 | """
|
---|
417 | >>> import StringIO
|
---|
418 |
|
---|
419 | >>> stringfile = StringIO.StringIO(SAMPLE_XML)
|
---|
420 | >>> tree = ET.ElementTree(file=stringfile)
|
---|
421 | >>> tree.find("tag").tag
|
---|
422 | 'tag'
|
---|
423 | >>> tree.find("section/tag").tag
|
---|
424 | 'tag'
|
---|
425 |
|
---|
426 | >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
|
---|
427 | >>> tree.find("element").tag
|
---|
428 | 'element'
|
---|
429 | >>> tree.find("element/../empty-element").tag
|
---|
430 | 'empty-element'
|
---|
431 | """
|
---|
432 |
|
---|
433 | def bad_find():
|
---|
434 | """
|
---|
435 | Check bad or unsupported path expressions.
|
---|
436 |
|
---|
437 | >>> elem = ET.XML(SAMPLE_XML)
|
---|
438 | >>> elem.findall("/tag")
|
---|
439 | Traceback (most recent call last):
|
---|
440 | SyntaxError: cannot use absolute path on element
|
---|
441 | """
|
---|
442 |
|
---|
443 | def path_cache():
|
---|
444 | """
|
---|
445 | Check that the path cache behaves sanely.
|
---|
446 |
|
---|
447 | >>> elem = ET.XML(SAMPLE_XML)
|
---|
448 | >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
|
---|
449 | >>> cache_len_10 = len(ET.ElementPath._cache)
|
---|
450 | >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
|
---|
451 | >>> len(ET.ElementPath._cache) == cache_len_10
|
---|
452 | True
|
---|
453 | >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
|
---|
454 | >>> len(ET.ElementPath._cache) > cache_len_10
|
---|
455 | True
|
---|
456 | >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
|
---|
457 | >>> len(ET.ElementPath._cache) < 500
|
---|
458 | True
|
---|
459 | """
|
---|
460 |
|
---|
461 | def copy():
|
---|
462 | """
|
---|
463 | Test copy handling (etc).
|
---|
464 |
|
---|
465 | >>> import copy
|
---|
466 | >>> e1 = ET.XML("<tag>hello<foo/></tag>")
|
---|
467 | >>> e2 = copy.copy(e1)
|
---|
468 | >>> e3 = copy.deepcopy(e1)
|
---|
469 | >>> e1.find("foo").tag = "bar"
|
---|
470 | >>> serialize(e1)
|
---|
471 | '<tag>hello<bar /></tag>'
|
---|
472 | >>> serialize(e2)
|
---|
473 | '<tag>hello<bar /></tag>'
|
---|
474 | >>> serialize(e3)
|
---|
475 | '<tag>hello<foo /></tag>'
|
---|
476 |
|
---|
477 | """
|
---|
478 |
|
---|
479 | def attrib():
|
---|
480 | """
|
---|
481 | Test attribute handling.
|
---|
482 |
|
---|
483 | >>> elem = ET.Element("tag")
|
---|
484 | >>> elem.get("key") # 1.1
|
---|
485 | >>> elem.get("key", "default") # 1.2
|
---|
486 | 'default'
|
---|
487 | >>> elem.set("key", "value")
|
---|
488 | >>> elem.get("key") # 1.3
|
---|
489 | 'value'
|
---|
490 |
|
---|
491 | >>> elem = ET.Element("tag", key="value")
|
---|
492 | >>> elem.get("key") # 2.1
|
---|
493 | 'value'
|
---|
494 | >>> elem.attrib # 2.2
|
---|
495 | {'key': 'value'}
|
---|
496 |
|
---|
497 | >>> attrib = {"key": "value"}
|
---|
498 | >>> elem = ET.Element("tag", attrib)
|
---|
499 | >>> attrib.clear() # check for aliasing issues
|
---|
500 | >>> elem.get("key") # 3.1
|
---|
501 | 'value'
|
---|
502 | >>> elem.attrib # 3.2
|
---|
503 | {'key': 'value'}
|
---|
504 |
|
---|
505 | >>> attrib = {"key": "value"}
|
---|
506 | >>> elem = ET.Element("tag", **attrib)
|
---|
507 | >>> attrib.clear() # check for aliasing issues
|
---|
508 | >>> elem.get("key") # 4.1
|
---|
509 | 'value'
|
---|
510 | >>> elem.attrib # 4.2
|
---|
511 | {'key': 'value'}
|
---|
512 |
|
---|
513 | >>> elem = ET.Element("tag", {"key": "other"}, key="value")
|
---|
514 | >>> elem.get("key") # 5.1
|
---|
515 | 'value'
|
---|
516 | >>> elem.attrib # 5.2
|
---|
517 | {'key': 'value'}
|
---|
518 |
|
---|
519 | >>> elem = ET.Element('test')
|
---|
520 | >>> elem.text = "aa"
|
---|
521 | >>> elem.set('testa', 'testval')
|
---|
522 | >>> elem.set('testb', 'test2')
|
---|
523 | >>> ET.tostring(elem)
|
---|
524 | '<test testa="testval" testb="test2">aa</test>'
|
---|
525 | >>> sorted(elem.keys())
|
---|
526 | ['testa', 'testb']
|
---|
527 | >>> sorted(elem.items())
|
---|
528 | [('testa', 'testval'), ('testb', 'test2')]
|
---|
529 | >>> elem.attrib['testb']
|
---|
530 | 'test2'
|
---|
531 | >>> elem.attrib['testb'] = 'test1'
|
---|
532 | >>> elem.attrib['testc'] = 'test2'
|
---|
533 | >>> ET.tostring(elem)
|
---|
534 | '<test testa="testval" testb="test1" testc="test2">aa</test>'
|
---|
535 | """
|
---|
536 |
|
---|
537 | def makeelement():
|
---|
538 | """
|
---|
539 | Test makeelement handling.
|
---|
540 |
|
---|
541 | >>> elem = ET.Element("tag")
|
---|
542 | >>> attrib = {"key": "value"}
|
---|
543 | >>> subelem = elem.makeelement("subtag", attrib)
|
---|
544 | >>> if subelem.attrib is attrib:
|
---|
545 | ... print "attrib aliasing"
|
---|
546 | >>> elem.append(subelem)
|
---|
547 | >>> serialize(elem)
|
---|
548 | '<tag><subtag key="value" /></tag>'
|
---|
549 |
|
---|
550 | >>> elem.clear()
|
---|
551 | >>> serialize(elem)
|
---|
552 | '<tag />'
|
---|
553 | >>> elem.append(subelem)
|
---|
554 | >>> serialize(elem)
|
---|
555 | '<tag><subtag key="value" /></tag>'
|
---|
556 | >>> elem.extend([subelem, subelem])
|
---|
557 | >>> serialize(elem)
|
---|
558 | '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>'
|
---|
559 | >>> elem[:] = [subelem]
|
---|
560 | >>> serialize(elem)
|
---|
561 | '<tag><subtag key="value" /></tag>'
|
---|
562 | >>> elem[:] = tuple([subelem])
|
---|
563 | >>> serialize(elem)
|
---|
564 | '<tag><subtag key="value" /></tag>'
|
---|
565 |
|
---|
566 | """
|
---|
567 |
|
---|
568 | def parsefile():
|
---|
569 | """
|
---|
570 | Test parsing from file.
|
---|
571 |
|
---|
572 | >>> tree = ET.parse(SIMPLE_XMLFILE)
|
---|
573 | >>> normalize_crlf(tree)
|
---|
574 | >>> tree.write(sys.stdout)
|
---|
575 | <root>
|
---|
576 | <element key="value">text</element>
|
---|
577 | <element>text</element>tail
|
---|
578 | <empty-element />
|
---|
579 | </root>
|
---|
580 | >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
|
---|
581 | >>> normalize_crlf(tree)
|
---|
582 | >>> tree.write(sys.stdout)
|
---|
583 | <ns0:root xmlns:ns0="namespace">
|
---|
584 | <ns0:element key="value">text</ns0:element>
|
---|
585 | <ns0:element>text</ns0:element>tail
|
---|
586 | <ns0:empty-element />
|
---|
587 | </ns0:root>
|
---|
588 |
|
---|
589 | >>> with open(SIMPLE_XMLFILE) as f:
|
---|
590 | ... data = f.read()
|
---|
591 |
|
---|
592 | >>> parser = ET.XMLParser()
|
---|
593 | >>> parser.version # doctest: +ELLIPSIS
|
---|
594 | 'Expat ...'
|
---|
595 | >>> parser.feed(data)
|
---|
596 | >>> print serialize(parser.close())
|
---|
597 | <root>
|
---|
598 | <element key="value">text</element>
|
---|
599 | <element>text</element>tail
|
---|
600 | <empty-element />
|
---|
601 | </root>
|
---|
602 |
|
---|
603 | >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
|
---|
604 | >>> parser.feed(data)
|
---|
605 | >>> print serialize(parser.close())
|
---|
606 | <root>
|
---|
607 | <element key="value">text</element>
|
---|
608 | <element>text</element>tail
|
---|
609 | <empty-element />
|
---|
610 | </root>
|
---|
611 |
|
---|
612 | >>> target = ET.TreeBuilder()
|
---|
613 | >>> parser = ET.XMLParser(target=target)
|
---|
614 | >>> parser.feed(data)
|
---|
615 | >>> print serialize(parser.close())
|
---|
616 | <root>
|
---|
617 | <element key="value">text</element>
|
---|
618 | <element>text</element>tail
|
---|
619 | <empty-element />
|
---|
620 | </root>
|
---|
621 | """
|
---|
622 |
|
---|
623 | def parseliteral():
|
---|
624 | """
|
---|
625 | >>> element = ET.XML("<html><body>text</body></html>")
|
---|
626 | >>> ET.ElementTree(element).write(sys.stdout)
|
---|
627 | <html><body>text</body></html>
|
---|
628 | >>> element = ET.fromstring("<html><body>text</body></html>")
|
---|
629 | >>> ET.ElementTree(element).write(sys.stdout)
|
---|
630 | <html><body>text</body></html>
|
---|
631 | >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
|
---|
632 | >>> element = ET.fromstringlist(sequence)
|
---|
633 | >>> print ET.tostring(element)
|
---|
634 | <html><body>text</body></html>
|
---|
635 | >>> print "".join(ET.tostringlist(element))
|
---|
636 | <html><body>text</body></html>
|
---|
637 | >>> ET.tostring(element, "ascii")
|
---|
638 | "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
|
---|
639 | >>> _, ids = ET.XMLID("<html><body>text</body></html>")
|
---|
640 | >>> len(ids)
|
---|
641 | 0
|
---|
642 | >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
|
---|
643 | >>> len(ids)
|
---|
644 | 1
|
---|
645 | >>> ids["body"].tag
|
---|
646 | 'body'
|
---|
647 | """
|
---|
648 |
|
---|
649 | def iterparse():
|
---|
650 | """
|
---|
651 | Test iterparse interface.
|
---|
652 |
|
---|
653 | >>> iterparse = ET.iterparse
|
---|
654 |
|
---|
655 | >>> context = iterparse(SIMPLE_XMLFILE)
|
---|
656 | >>> action, elem = next(context)
|
---|
657 | >>> print action, elem.tag
|
---|
658 | end element
|
---|
659 | >>> for action, elem in context:
|
---|
660 | ... print action, elem.tag
|
---|
661 | end element
|
---|
662 | end empty-element
|
---|
663 | end root
|
---|
664 | >>> context.root.tag
|
---|
665 | 'root'
|
---|
666 |
|
---|
667 | >>> context = iterparse(SIMPLE_NS_XMLFILE)
|
---|
668 | >>> for action, elem in context:
|
---|
669 | ... print action, elem.tag
|
---|
670 | end {namespace}element
|
---|
671 | end {namespace}element
|
---|
672 | end {namespace}empty-element
|
---|
673 | end {namespace}root
|
---|
674 |
|
---|
675 | >>> events = ()
|
---|
676 | >>> context = iterparse(SIMPLE_XMLFILE, events)
|
---|
677 | >>> for action, elem in context:
|
---|
678 | ... print action, elem.tag
|
---|
679 |
|
---|
680 | >>> events = ()
|
---|
681 | >>> context = iterparse(SIMPLE_XMLFILE, events=events)
|
---|
682 | >>> for action, elem in context:
|
---|
683 | ... print action, elem.tag
|
---|
684 |
|
---|
685 | >>> events = ("start", "end")
|
---|
686 | >>> context = iterparse(SIMPLE_XMLFILE, events)
|
---|
687 | >>> for action, elem in context:
|
---|
688 | ... print action, elem.tag
|
---|
689 | start root
|
---|
690 | start element
|
---|
691 | end element
|
---|
692 | start element
|
---|
693 | end element
|
---|
694 | start empty-element
|
---|
695 | end empty-element
|
---|
696 | end root
|
---|
697 |
|
---|
698 | >>> events = ("start", "end", "start-ns", "end-ns")
|
---|
699 | >>> context = iterparse(SIMPLE_NS_XMLFILE, events)
|
---|
700 | >>> for action, elem in context:
|
---|
701 | ... if action in ("start", "end"):
|
---|
702 | ... print action, elem.tag
|
---|
703 | ... else:
|
---|
704 | ... print action, elem
|
---|
705 | start-ns ('', 'namespace')
|
---|
706 | start {namespace}root
|
---|
707 | start {namespace}element
|
---|
708 | end {namespace}element
|
---|
709 | start {namespace}element
|
---|
710 | end {namespace}element
|
---|
711 | start {namespace}empty-element
|
---|
712 | end {namespace}empty-element
|
---|
713 | end {namespace}root
|
---|
714 | end-ns None
|
---|
715 |
|
---|
716 | >>> events = ("start", "end", "bogus")
|
---|
717 | >>> with open(SIMPLE_XMLFILE, "rb") as f:
|
---|
718 | ... iterparse(f, events)
|
---|
719 | Traceback (most recent call last):
|
---|
720 | ValueError: unknown event 'bogus'
|
---|
721 |
|
---|
722 | >>> import StringIO
|
---|
723 |
|
---|
724 | >>> source = StringIO.StringIO(
|
---|
725 | ... "<?xml version='1.0' encoding='iso-8859-1'?>\\n"
|
---|
726 | ... "<body xmlns='http://éffbot.org/ns'\\n"
|
---|
727 | ... " xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n")
|
---|
728 | >>> events = ("start-ns",)
|
---|
729 | >>> context = iterparse(source, events)
|
---|
730 | >>> for action, elem in context:
|
---|
731 | ... print action, elem
|
---|
732 | start-ns ('', u'http://\\xe9ffbot.org/ns')
|
---|
733 | start-ns (u'cl\\xe9', 'http://effbot.org/ns')
|
---|
734 |
|
---|
735 | >>> source = StringIO.StringIO("<document />junk")
|
---|
736 | >>> try:
|
---|
737 | ... for action, elem in iterparse(source):
|
---|
738 | ... print action, elem.tag
|
---|
739 | ... except ET.ParseError, v:
|
---|
740 | ... print v
|
---|
741 | end document
|
---|
742 | junk after document element: line 1, column 12
|
---|
743 | """
|
---|
744 |
|
---|
745 | def writefile():
|
---|
746 | """
|
---|
747 | >>> elem = ET.Element("tag")
|
---|
748 | >>> elem.text = "text"
|
---|
749 | >>> serialize(elem)
|
---|
750 | '<tag>text</tag>'
|
---|
751 | >>> ET.SubElement(elem, "subtag").text = "subtext"
|
---|
752 | >>> serialize(elem)
|
---|
753 | '<tag>text<subtag>subtext</subtag></tag>'
|
---|
754 |
|
---|
755 | Test tag suppression
|
---|
756 | >>> elem.tag = None
|
---|
757 | >>> serialize(elem)
|
---|
758 | 'text<subtag>subtext</subtag>'
|
---|
759 | >>> elem.insert(0, ET.Comment("comment"))
|
---|
760 | >>> serialize(elem) # assumes 1.3
|
---|
761 | 'text<!--comment--><subtag>subtext</subtag>'
|
---|
762 | >>> elem[0] = ET.PI("key", "value")
|
---|
763 | >>> serialize(elem)
|
---|
764 | 'text<?key value?><subtag>subtext</subtag>'
|
---|
765 | """
|
---|
766 |
|
---|
767 | def custom_builder():
|
---|
768 | """
|
---|
769 | Test parser w. custom builder.
|
---|
770 |
|
---|
771 | >>> with open(SIMPLE_XMLFILE) as f:
|
---|
772 | ... data = f.read()
|
---|
773 | >>> class Builder:
|
---|
774 | ... def start(self, tag, attrib):
|
---|
775 | ... print "start", tag
|
---|
776 | ... def end(self, tag):
|
---|
777 | ... print "end", tag
|
---|
778 | ... def data(self, text):
|
---|
779 | ... pass
|
---|
780 | >>> builder = Builder()
|
---|
781 | >>> parser = ET.XMLParser(target=builder)
|
---|
782 | >>> parser.feed(data)
|
---|
783 | start root
|
---|
784 | start element
|
---|
785 | end element
|
---|
786 | start element
|
---|
787 | end element
|
---|
788 | start empty-element
|
---|
789 | end empty-element
|
---|
790 | end root
|
---|
791 |
|
---|
792 | >>> with open(SIMPLE_NS_XMLFILE) as f:
|
---|
793 | ... data = f.read()
|
---|
794 | >>> class Builder:
|
---|
795 | ... def start(self, tag, attrib):
|
---|
796 | ... print "start", tag
|
---|
797 | ... def end(self, tag):
|
---|
798 | ... print "end", tag
|
---|
799 | ... def data(self, text):
|
---|
800 | ... pass
|
---|
801 | ... def pi(self, target, data):
|
---|
802 | ... print "pi", target, repr(data)
|
---|
803 | ... def comment(self, data):
|
---|
804 | ... print "comment", repr(data)
|
---|
805 | >>> builder = Builder()
|
---|
806 | >>> parser = ET.XMLParser(target=builder)
|
---|
807 | >>> parser.feed(data)
|
---|
808 | pi pi 'data'
|
---|
809 | comment ' comment '
|
---|
810 | start {namespace}root
|
---|
811 | start {namespace}element
|
---|
812 | end {namespace}element
|
---|
813 | start {namespace}element
|
---|
814 | end {namespace}element
|
---|
815 | start {namespace}empty-element
|
---|
816 | end {namespace}empty-element
|
---|
817 | end {namespace}root
|
---|
818 |
|
---|
819 | """
|
---|
820 |
|
---|
821 | def getchildren():
|
---|
822 | """
|
---|
823 | Test Element.getchildren()
|
---|
824 |
|
---|
825 | >>> with open(SIMPLE_XMLFILE, "r") as f:
|
---|
826 | ... tree = ET.parse(f)
|
---|
827 | >>> for elem in tree.getroot().iter():
|
---|
828 | ... summarize_list(elem.getchildren())
|
---|
829 | ['element', 'element', 'empty-element']
|
---|
830 | []
|
---|
831 | []
|
---|
832 | []
|
---|
833 | >>> for elem in tree.getiterator():
|
---|
834 | ... summarize_list(elem.getchildren())
|
---|
835 | ['element', 'element', 'empty-element']
|
---|
836 | []
|
---|
837 | []
|
---|
838 | []
|
---|
839 |
|
---|
840 | >>> elem = ET.XML(SAMPLE_XML)
|
---|
841 | >>> len(elem.getchildren())
|
---|
842 | 3
|
---|
843 | >>> len(elem[2].getchildren())
|
---|
844 | 1
|
---|
845 | >>> elem[:] == elem.getchildren()
|
---|
846 | True
|
---|
847 | >>> child1 = elem[0]
|
---|
848 | >>> child2 = elem[2]
|
---|
849 | >>> del elem[1:2]
|
---|
850 | >>> len(elem.getchildren())
|
---|
851 | 2
|
---|
852 | >>> child1 == elem[0]
|
---|
853 | True
|
---|
854 | >>> child2 == elem[1]
|
---|
855 | True
|
---|
856 | >>> elem[0:2] = [child2, child1]
|
---|
857 | >>> child2 == elem[0]
|
---|
858 | True
|
---|
859 | >>> child1 == elem[1]
|
---|
860 | True
|
---|
861 | >>> child1 == elem[0]
|
---|
862 | False
|
---|
863 | >>> elem.clear()
|
---|
864 | >>> elem.getchildren()
|
---|
865 | []
|
---|
866 | """
|
---|
867 |
|
---|
868 | def writestring():
|
---|
869 | """
|
---|
870 | >>> elem = ET.XML("<html><body>text</body></html>")
|
---|
871 | >>> ET.tostring(elem)
|
---|
872 | '<html><body>text</body></html>'
|
---|
873 | >>> elem = ET.fromstring("<html><body>text</body></html>")
|
---|
874 | >>> ET.tostring(elem)
|
---|
875 | '<html><body>text</body></html>'
|
---|
876 | """
|
---|
877 |
|
---|
878 | def check_encoding(encoding):
|
---|
879 | """
|
---|
880 | >>> check_encoding("ascii")
|
---|
881 | >>> check_encoding("us-ascii")
|
---|
882 | >>> check_encoding("iso-8859-1")
|
---|
883 | >>> check_encoding("iso-8859-15")
|
---|
884 | >>> check_encoding("cp437")
|
---|
885 | >>> check_encoding("mac-roman")
|
---|
886 | >>> check_encoding("gbk")
|
---|
887 | Traceback (most recent call last):
|
---|
888 | ValueError: multi-byte encodings are not supported
|
---|
889 | >>> check_encoding("cp037")
|
---|
890 | Traceback (most recent call last):
|
---|
891 | ParseError: unknown encoding: line 1, column 30
|
---|
892 | """
|
---|
893 | ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
|
---|
894 |
|
---|
895 | def encoding():
|
---|
896 | r"""
|
---|
897 | Test encoding issues.
|
---|
898 |
|
---|
899 | >>> elem = ET.Element("tag")
|
---|
900 | >>> elem.text = u"abc"
|
---|
901 | >>> serialize(elem)
|
---|
902 | '<tag>abc</tag>'
|
---|
903 | >>> serialize(elem, encoding="utf-8")
|
---|
904 | '<tag>abc</tag>'
|
---|
905 | >>> serialize(elem, encoding="us-ascii")
|
---|
906 | '<tag>abc</tag>'
|
---|
907 | >>> serialize(elem, encoding="iso-8859-1")
|
---|
908 | "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
|
---|
909 |
|
---|
910 | >>> elem.text = "<&\"\'>"
|
---|
911 | >>> serialize(elem)
|
---|
912 | '<tag><&"\'></tag>'
|
---|
913 | >>> serialize(elem, encoding="utf-8")
|
---|
914 | '<tag><&"\'></tag>'
|
---|
915 | >>> serialize(elem, encoding="us-ascii") # cdata characters
|
---|
916 | '<tag><&"\'></tag>'
|
---|
917 | >>> serialize(elem, encoding="iso-8859-1")
|
---|
918 | '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>'
|
---|
919 |
|
---|
920 | >>> elem.attrib["key"] = "<&\"\'>"
|
---|
921 | >>> elem.text = None
|
---|
922 | >>> serialize(elem)
|
---|
923 | '<tag key="<&"\'>" />'
|
---|
924 | >>> serialize(elem, encoding="utf-8")
|
---|
925 | '<tag key="<&"\'>" />'
|
---|
926 | >>> serialize(elem, encoding="us-ascii")
|
---|
927 | '<tag key="<&"\'>" />'
|
---|
928 | >>> serialize(elem, encoding="iso-8859-1")
|
---|
929 | '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="<&"\'>" />'
|
---|
930 |
|
---|
931 | >>> elem.text = u'\xe5\xf6\xf6<>'
|
---|
932 | >>> elem.attrib.clear()
|
---|
933 | >>> serialize(elem)
|
---|
934 | '<tag>åöö<></tag>'
|
---|
935 | >>> serialize(elem, encoding="utf-8")
|
---|
936 | '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>'
|
---|
937 | >>> serialize(elem, encoding="us-ascii")
|
---|
938 | '<tag>åöö<></tag>'
|
---|
939 | >>> serialize(elem, encoding="iso-8859-1")
|
---|
940 | "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>"
|
---|
941 |
|
---|
942 | >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
|
---|
943 | >>> elem.text = None
|
---|
944 | >>> serialize(elem)
|
---|
945 | '<tag key="åöö<>" />'
|
---|
946 | >>> serialize(elem, encoding="utf-8")
|
---|
947 | '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />'
|
---|
948 | >>> serialize(elem, encoding="us-ascii")
|
---|
949 | '<tag key="åöö<>" />'
|
---|
950 | >>> serialize(elem, encoding="iso-8859-1")
|
---|
951 | '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6<>" />'
|
---|
952 | """
|
---|
953 |
|
---|
954 | def methods():
|
---|
955 | r"""
|
---|
956 | Test serialization methods.
|
---|
957 |
|
---|
958 | >>> e = ET.XML("<html><link/><script>1 < 2</script></html>")
|
---|
959 | >>> e.tail = "\n"
|
---|
960 | >>> serialize(e)
|
---|
961 | '<html><link /><script>1 < 2</script></html>\n'
|
---|
962 | >>> serialize(e, method=None)
|
---|
963 | '<html><link /><script>1 < 2</script></html>\n'
|
---|
964 | >>> serialize(e, method="xml")
|
---|
965 | '<html><link /><script>1 < 2</script></html>\n'
|
---|
966 | >>> serialize(e, method="html")
|
---|
967 | '<html><link><script>1 < 2</script></html>\n'
|
---|
968 | >>> serialize(e, method="text")
|
---|
969 | '1 < 2\n'
|
---|
970 | """
|
---|
971 |
|
---|
972 | def iterators():
|
---|
973 | """
|
---|
974 | Test iterators.
|
---|
975 |
|
---|
976 | >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
|
---|
977 | >>> summarize_list(e.iter())
|
---|
978 | ['html', 'body', 'i']
|
---|
979 | >>> summarize_list(e.find("body").iter())
|
---|
980 | ['body', 'i']
|
---|
981 | >>> summarize(next(e.iter()))
|
---|
982 | 'html'
|
---|
983 | >>> "".join(e.itertext())
|
---|
984 | 'this is a paragraph...'
|
---|
985 | >>> "".join(e.find("body").itertext())
|
---|
986 | 'this is a paragraph.'
|
---|
987 | >>> next(e.itertext())
|
---|
988 | 'this is a '
|
---|
989 |
|
---|
990 | Method iterparse should return an iterator. See bug 6472.
|
---|
991 |
|
---|
992 | >>> sourcefile = serialize(e, to_string=False)
|
---|
993 | >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
|
---|
994 | ('end', <Element 'i' at 0x...>)
|
---|
995 |
|
---|
996 | >>> tree = ET.ElementTree(None)
|
---|
997 | >>> tree.iter()
|
---|
998 | Traceback (most recent call last):
|
---|
999 | AttributeError: 'NoneType' object has no attribute 'iter'
|
---|
1000 | """
|
---|
1001 |
|
---|
1002 | ENTITY_XML = """\
|
---|
1003 | <!DOCTYPE points [
|
---|
1004 | <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
|
---|
1005 | %user-entities;
|
---|
1006 | ]>
|
---|
1007 | <document>&entity;</document>
|
---|
1008 | """
|
---|
1009 |
|
---|
1010 | def entity():
|
---|
1011 | """
|
---|
1012 | Test entity handling.
|
---|
1013 |
|
---|
1014 | 1) good entities
|
---|
1015 |
|
---|
1016 | >>> e = ET.XML("<document title='舰'>test</document>")
|
---|
1017 | >>> serialize(e)
|
---|
1018 | '<document title="舰">test</document>'
|
---|
1019 |
|
---|
1020 | 2) bad entities
|
---|
1021 |
|
---|
1022 | >>> ET.XML("<document>&entity;</document>")
|
---|
1023 | Traceback (most recent call last):
|
---|
1024 | ParseError: undefined entity: line 1, column 10
|
---|
1025 |
|
---|
1026 | >>> ET.XML(ENTITY_XML)
|
---|
1027 | Traceback (most recent call last):
|
---|
1028 | ParseError: undefined entity &entity;: line 5, column 10
|
---|
1029 |
|
---|
1030 | 3) custom entity
|
---|
1031 |
|
---|
1032 | >>> parser = ET.XMLParser()
|
---|
1033 | >>> parser.entity["entity"] = "text"
|
---|
1034 | >>> parser.feed(ENTITY_XML)
|
---|
1035 | >>> root = parser.close()
|
---|
1036 | >>> serialize(root)
|
---|
1037 | '<document>text</document>'
|
---|
1038 | """
|
---|
1039 |
|
---|
1040 | def error(xml):
|
---|
1041 | """
|
---|
1042 |
|
---|
1043 | Test error handling.
|
---|
1044 |
|
---|
1045 | >>> issubclass(ET.ParseError, SyntaxError)
|
---|
1046 | True
|
---|
1047 | >>> error("foo").position
|
---|
1048 | (1, 0)
|
---|
1049 | >>> error("<tag>&foo;</tag>").position
|
---|
1050 | (1, 5)
|
---|
1051 | >>> error("foobar<").position
|
---|
1052 | (1, 6)
|
---|
1053 |
|
---|
1054 | """
|
---|
1055 | try:
|
---|
1056 | ET.XML(xml)
|
---|
1057 | except ET.ParseError:
|
---|
1058 | return sys.exc_value
|
---|
1059 |
|
---|
1060 | def namespace():
|
---|
1061 | """
|
---|
1062 | Test namespace issues.
|
---|
1063 |
|
---|
1064 | 1) xml namespace
|
---|
1065 |
|
---|
1066 | >>> elem = ET.XML("<tag xml:lang='en' />")
|
---|
1067 | >>> serialize(elem) # 1.1
|
---|
1068 | '<tag xml:lang="en" />'
|
---|
1069 |
|
---|
1070 | 2) other "well-known" namespaces
|
---|
1071 |
|
---|
1072 | >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
|
---|
1073 | >>> serialize(elem) # 2.1
|
---|
1074 | '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
|
---|
1075 |
|
---|
1076 | >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
|
---|
1077 | >>> serialize(elem) # 2.2
|
---|
1078 | '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
|
---|
1079 |
|
---|
1080 | >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
|
---|
1081 | >>> serialize(elem) # 2.3
|
---|
1082 | '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
|
---|
1083 |
|
---|
1084 | 3) unknown namespaces
|
---|
1085 | >>> elem = ET.XML(SAMPLE_XML_NS)
|
---|
1086 | >>> print serialize(elem)
|
---|
1087 | <ns0:body xmlns:ns0="http://effbot.org/ns">
|
---|
1088 | <ns0:tag>text</ns0:tag>
|
---|
1089 | <ns0:tag />
|
---|
1090 | <ns0:section>
|
---|
1091 | <ns0:tag>subtext</ns0:tag>
|
---|
1092 | </ns0:section>
|
---|
1093 | </ns0:body>
|
---|
1094 | """
|
---|
1095 |
|
---|
1096 | def qname():
|
---|
1097 | """
|
---|
1098 | Test QName handling.
|
---|
1099 |
|
---|
1100 | 1) decorated tags
|
---|
1101 |
|
---|
1102 | >>> elem = ET.Element("{uri}tag")
|
---|
1103 | >>> serialize(elem) # 1.1
|
---|
1104 | '<ns0:tag xmlns:ns0="uri" />'
|
---|
1105 | >>> elem = ET.Element(ET.QName("{uri}tag"))
|
---|
1106 | >>> serialize(elem) # 1.2
|
---|
1107 | '<ns0:tag xmlns:ns0="uri" />'
|
---|
1108 | >>> elem = ET.Element(ET.QName("uri", "tag"))
|
---|
1109 | >>> serialize(elem) # 1.3
|
---|
1110 | '<ns0:tag xmlns:ns0="uri" />'
|
---|
1111 | >>> elem = ET.Element(ET.QName("uri", "tag"))
|
---|
1112 | >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
|
---|
1113 | >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
|
---|
1114 | >>> serialize(elem) # 1.4
|
---|
1115 | '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>'
|
---|
1116 |
|
---|
1117 | 2) decorated attributes
|
---|
1118 |
|
---|
1119 | >>> elem.clear()
|
---|
1120 | >>> elem.attrib["{uri}key"] = "value"
|
---|
1121 | >>> serialize(elem) # 2.1
|
---|
1122 | '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
|
---|
1123 |
|
---|
1124 | >>> elem.clear()
|
---|
1125 | >>> elem.attrib[ET.QName("{uri}key")] = "value"
|
---|
1126 | >>> serialize(elem) # 2.2
|
---|
1127 | '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
|
---|
1128 |
|
---|
1129 | 3) decorated values are not converted by default, but the
|
---|
1130 | QName wrapper can be used for values
|
---|
1131 |
|
---|
1132 | >>> elem.clear()
|
---|
1133 | >>> elem.attrib["{uri}key"] = "{uri}value"
|
---|
1134 | >>> serialize(elem) # 3.1
|
---|
1135 | '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />'
|
---|
1136 |
|
---|
1137 | >>> elem.clear()
|
---|
1138 | >>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
|
---|
1139 | >>> serialize(elem) # 3.2
|
---|
1140 | '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />'
|
---|
1141 |
|
---|
1142 | >>> elem.clear()
|
---|
1143 | >>> subelem = ET.Element("tag")
|
---|
1144 | >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
|
---|
1145 | >>> elem.append(subelem)
|
---|
1146 | >>> elem.append(subelem)
|
---|
1147 | >>> serialize(elem) # 3.3
|
---|
1148 | '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
|
---|
1149 |
|
---|
1150 | 4) Direct QName tests
|
---|
1151 |
|
---|
1152 | >>> str(ET.QName('ns', 'tag'))
|
---|
1153 | '{ns}tag'
|
---|
1154 | >>> str(ET.QName('{ns}tag'))
|
---|
1155 | '{ns}tag'
|
---|
1156 | >>> q1 = ET.QName('ns', 'tag')
|
---|
1157 | >>> q2 = ET.QName('ns', 'tag')
|
---|
1158 | >>> q1 == q2
|
---|
1159 | True
|
---|
1160 | >>> q2 = ET.QName('ns', 'other-tag')
|
---|
1161 | >>> q1 == q2
|
---|
1162 | False
|
---|
1163 | >>> q1 == 'ns:tag'
|
---|
1164 | False
|
---|
1165 | >>> q1 == '{ns}tag'
|
---|
1166 | True
|
---|
1167 | """
|
---|
1168 |
|
---|
1169 | def doctype_public():
|
---|
1170 | """
|
---|
1171 | Test PUBLIC doctype.
|
---|
1172 |
|
---|
1173 | >>> elem = ET.XML('<!DOCTYPE html PUBLIC'
|
---|
1174 | ... ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
|
---|
1175 | ... ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
|
---|
1176 | ... '<html>text</html>')
|
---|
1177 |
|
---|
1178 | """
|
---|
1179 |
|
---|
1180 | def xpath_tokenizer(p):
|
---|
1181 | """
|
---|
1182 | Test the XPath tokenizer.
|
---|
1183 |
|
---|
1184 | >>> # tests from the xml specification
|
---|
1185 | >>> xpath_tokenizer("*")
|
---|
1186 | ['*']
|
---|
1187 | >>> xpath_tokenizer("text()")
|
---|
1188 | ['text', '()']
|
---|
1189 | >>> xpath_tokenizer("@name")
|
---|
1190 | ['@', 'name']
|
---|
1191 | >>> xpath_tokenizer("@*")
|
---|
1192 | ['@', '*']
|
---|
1193 | >>> xpath_tokenizer("para[1]")
|
---|
1194 | ['para', '[', '1', ']']
|
---|
1195 | >>> xpath_tokenizer("para[last()]")
|
---|
1196 | ['para', '[', 'last', '()', ']']
|
---|
1197 | >>> xpath_tokenizer("*/para")
|
---|
1198 | ['*', '/', 'para']
|
---|
1199 | >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
|
---|
1200 | ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
|
---|
1201 | >>> xpath_tokenizer("chapter//para")
|
---|
1202 | ['chapter', '//', 'para']
|
---|
1203 | >>> xpath_tokenizer("//para")
|
---|
1204 | ['//', 'para']
|
---|
1205 | >>> xpath_tokenizer("//olist/item")
|
---|
1206 | ['//', 'olist', '/', 'item']
|
---|
1207 | >>> xpath_tokenizer(".")
|
---|
1208 | ['.']
|
---|
1209 | >>> xpath_tokenizer(".//para")
|
---|
1210 | ['.', '//', 'para']
|
---|
1211 | >>> xpath_tokenizer("..")
|
---|
1212 | ['..']
|
---|
1213 | >>> xpath_tokenizer("../@lang")
|
---|
1214 | ['..', '/', '@', 'lang']
|
---|
1215 | >>> xpath_tokenizer("chapter[title]")
|
---|
1216 | ['chapter', '[', 'title', ']']
|
---|
1217 | >>> xpath_tokenizer("employee[@secretary and @assistant]")
|
---|
1218 | ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
|
---|
1219 |
|
---|
1220 | >>> # additional tests
|
---|
1221 | >>> xpath_tokenizer("{http://spam}egg")
|
---|
1222 | ['{http://spam}egg']
|
---|
1223 | >>> xpath_tokenizer("./spam.egg")
|
---|
1224 | ['.', '/', 'spam.egg']
|
---|
1225 | >>> xpath_tokenizer(".//{http://spam}egg")
|
---|
1226 | ['.', '//', '{http://spam}egg']
|
---|
1227 | """
|
---|
1228 | from xml.etree import ElementPath
|
---|
1229 | out = []
|
---|
1230 | for op, tag in ElementPath.xpath_tokenizer(p):
|
---|
1231 | out.append(op or tag)
|
---|
1232 | return out
|
---|
1233 |
|
---|
1234 | def processinginstruction():
|
---|
1235 | """
|
---|
1236 | Test ProcessingInstruction directly
|
---|
1237 |
|
---|
1238 | >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
|
---|
1239 | '<?test instruction?>'
|
---|
1240 | >>> ET.tostring(ET.PI('test', 'instruction'))
|
---|
1241 | '<?test instruction?>'
|
---|
1242 |
|
---|
1243 | Issue #2746
|
---|
1244 |
|
---|
1245 | >>> ET.tostring(ET.PI('test', '<testing&>'))
|
---|
1246 | '<?test <testing&>?>'
|
---|
1247 | >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1')
|
---|
1248 | "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
|
---|
1249 | """
|
---|
1250 |
|
---|
1251 | #
|
---|
1252 | # xinclude tests (samples from appendix C of the xinclude specification)
|
---|
1253 |
|
---|
1254 | XINCLUDE = {}
|
---|
1255 |
|
---|
1256 | XINCLUDE["C1.xml"] = """\
|
---|
1257 | <?xml version='1.0'?>
|
---|
1258 | <document xmlns:xi="http://www.w3.org/2001/XInclude">
|
---|
1259 | <p>120 Mz is adequate for an average home user.</p>
|
---|
1260 | <xi:include href="disclaimer.xml"/>
|
---|
1261 | </document>
|
---|
1262 | """
|
---|
1263 |
|
---|
1264 | XINCLUDE["disclaimer.xml"] = """\
|
---|
1265 | <?xml version='1.0'?>
|
---|
1266 | <disclaimer>
|
---|
1267 | <p>The opinions represented herein represent those of the individual
|
---|
1268 | and should not be interpreted as official policy endorsed by this
|
---|
1269 | organization.</p>
|
---|
1270 | </disclaimer>
|
---|
1271 | """
|
---|
1272 |
|
---|
1273 | XINCLUDE["C2.xml"] = """\
|
---|
1274 | <?xml version='1.0'?>
|
---|
1275 | <document xmlns:xi="http://www.w3.org/2001/XInclude">
|
---|
1276 | <p>This document has been accessed
|
---|
1277 | <xi:include href="count.txt" parse="text"/> times.</p>
|
---|
1278 | </document>
|
---|
1279 | """
|
---|
1280 |
|
---|
1281 | XINCLUDE["count.txt"] = "324387"
|
---|
1282 |
|
---|
1283 | XINCLUDE["C2b.xml"] = """\
|
---|
1284 | <?xml version='1.0'?>
|
---|
1285 | <document xmlns:xi="http://www.w3.org/2001/XInclude">
|
---|
1286 | <p>This document has been <em>accessed</em>
|
---|
1287 | <xi:include href="count.txt" parse="text"/> times.</p>
|
---|
1288 | </document>
|
---|
1289 | """
|
---|
1290 |
|
---|
1291 | XINCLUDE["C3.xml"] = """\
|
---|
1292 | <?xml version='1.0'?>
|
---|
1293 | <document xmlns:xi="http://www.w3.org/2001/XInclude">
|
---|
1294 | <p>The following is the source of the "data.xml" resource:</p>
|
---|
1295 | <example><xi:include href="data.xml" parse="text"/></example>
|
---|
1296 | </document>
|
---|
1297 | """
|
---|
1298 |
|
---|
1299 | XINCLUDE["data.xml"] = """\
|
---|
1300 | <?xml version='1.0'?>
|
---|
1301 | <data>
|
---|
1302 | <item><![CDATA[Brooks & Shields]]></item>
|
---|
1303 | </data>
|
---|
1304 | """
|
---|
1305 |
|
---|
1306 | XINCLUDE["C5.xml"] = """\
|
---|
1307 | <?xml version='1.0'?>
|
---|
1308 | <div xmlns:xi="http://www.w3.org/2001/XInclude">
|
---|
1309 | <xi:include href="example.txt" parse="text">
|
---|
1310 | <xi:fallback>
|
---|
1311 | <xi:include href="fallback-example.txt" parse="text">
|
---|
1312 | <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
|
---|
1313 | </xi:include>
|
---|
1314 | </xi:fallback>
|
---|
1315 | </xi:include>
|
---|
1316 | </div>
|
---|
1317 | """
|
---|
1318 |
|
---|
1319 | XINCLUDE["default.xml"] = """\
|
---|
1320 | <?xml version='1.0'?>
|
---|
1321 | <document xmlns:xi="http://www.w3.org/2001/XInclude">
|
---|
1322 | <p>Example.</p>
|
---|
1323 | <xi:include href="{}"/>
|
---|
1324 | </document>
|
---|
1325 | """.format(cgi.escape(SIMPLE_XMLFILE, True))
|
---|
1326 |
|
---|
1327 | def xinclude_loader(href, parse="xml", encoding=None):
|
---|
1328 | try:
|
---|
1329 | data = XINCLUDE[href]
|
---|
1330 | except KeyError:
|
---|
1331 | raise IOError("resource not found")
|
---|
1332 | if parse == "xml":
|
---|
1333 | from xml.etree.ElementTree import XML
|
---|
1334 | return XML(data)
|
---|
1335 | return data
|
---|
1336 |
|
---|
1337 | def xinclude():
|
---|
1338 | r"""
|
---|
1339 | Basic inclusion example (XInclude C.1)
|
---|
1340 |
|
---|
1341 | >>> from xml.etree import ElementTree as ET
|
---|
1342 | >>> from xml.etree import ElementInclude
|
---|
1343 |
|
---|
1344 | >>> document = xinclude_loader("C1.xml")
|
---|
1345 | >>> ElementInclude.include(document, xinclude_loader)
|
---|
1346 | >>> print serialize(document) # C1
|
---|
1347 | <document>
|
---|
1348 | <p>120 Mz is adequate for an average home user.</p>
|
---|
1349 | <disclaimer>
|
---|
1350 | <p>The opinions represented herein represent those of the individual
|
---|
1351 | and should not be interpreted as official policy endorsed by this
|
---|
1352 | organization.</p>
|
---|
1353 | </disclaimer>
|
---|
1354 | </document>
|
---|
1355 |
|
---|
1356 | Textual inclusion example (XInclude C.2)
|
---|
1357 |
|
---|
1358 | >>> document = xinclude_loader("C2.xml")
|
---|
1359 | >>> ElementInclude.include(document, xinclude_loader)
|
---|
1360 | >>> print serialize(document) # C2
|
---|
1361 | <document>
|
---|
1362 | <p>This document has been accessed
|
---|
1363 | 324387 times.</p>
|
---|
1364 | </document>
|
---|
1365 |
|
---|
1366 | Textual inclusion after sibling element (based on modified XInclude C.2)
|
---|
1367 |
|
---|
1368 | >>> document = xinclude_loader("C2b.xml")
|
---|
1369 | >>> ElementInclude.include(document, xinclude_loader)
|
---|
1370 | >>> print(serialize(document)) # C2b
|
---|
1371 | <document>
|
---|
1372 | <p>This document has been <em>accessed</em>
|
---|
1373 | 324387 times.</p>
|
---|
1374 | </document>
|
---|
1375 |
|
---|
1376 | Textual inclusion of XML example (XInclude C.3)
|
---|
1377 |
|
---|
1378 | >>> document = xinclude_loader("C3.xml")
|
---|
1379 | >>> ElementInclude.include(document, xinclude_loader)
|
---|
1380 | >>> print serialize(document) # C3
|
---|
1381 | <document>
|
---|
1382 | <p>The following is the source of the "data.xml" resource:</p>
|
---|
1383 | <example><?xml version='1.0'?>
|
---|
1384 | <data>
|
---|
1385 | <item><![CDATA[Brooks & Shields]]></item>
|
---|
1386 | </data>
|
---|
1387 | </example>
|
---|
1388 | </document>
|
---|
1389 |
|
---|
1390 | Fallback example (XInclude C.5)
|
---|
1391 | Note! Fallback support is not yet implemented
|
---|
1392 |
|
---|
1393 | >>> document = xinclude_loader("C5.xml")
|
---|
1394 | >>> ElementInclude.include(document, xinclude_loader)
|
---|
1395 | Traceback (most recent call last):
|
---|
1396 | IOError: resource not found
|
---|
1397 | >>> # print serialize(document) # C5
|
---|
1398 | """
|
---|
1399 |
|
---|
1400 | def xinclude_default():
|
---|
1401 | """
|
---|
1402 | >>> from xml.etree import ElementInclude
|
---|
1403 |
|
---|
1404 | >>> document = xinclude_loader("default.xml")
|
---|
1405 | >>> ElementInclude.include(document)
|
---|
1406 | >>> print serialize(document) # default
|
---|
1407 | <document>
|
---|
1408 | <p>Example.</p>
|
---|
1409 | <root>
|
---|
1410 | <element key="value">text</element>
|
---|
1411 | <element>text</element>tail
|
---|
1412 | <empty-element />
|
---|
1413 | </root>
|
---|
1414 | </document>
|
---|
1415 | """
|
---|
1416 |
|
---|
1417 | #
|
---|
1418 | # badly formatted xi:include tags
|
---|
1419 |
|
---|
1420 | XINCLUDE_BAD = {}
|
---|
1421 |
|
---|
1422 | XINCLUDE_BAD["B1.xml"] = """\
|
---|
1423 | <?xml version='1.0'?>
|
---|
1424 | <document xmlns:xi="http://www.w3.org/2001/XInclude">
|
---|
1425 | <p>120 Mz is adequate for an average home user.</p>
|
---|
1426 | <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
|
---|
1427 | </document>
|
---|
1428 | """
|
---|
1429 |
|
---|
1430 | XINCLUDE_BAD["B2.xml"] = """\
|
---|
1431 | <?xml version='1.0'?>
|
---|
1432 | <div xmlns:xi="http://www.w3.org/2001/XInclude">
|
---|
1433 | <xi:fallback></xi:fallback>
|
---|
1434 | </div>
|
---|
1435 | """
|
---|
1436 |
|
---|
1437 | def xinclude_failures():
|
---|
1438 | r"""
|
---|
1439 | Test failure to locate included XML file.
|
---|
1440 |
|
---|
1441 | >>> from xml.etree import ElementInclude
|
---|
1442 |
|
---|
1443 | >>> def none_loader(href, parser, encoding=None):
|
---|
1444 | ... return None
|
---|
1445 |
|
---|
1446 | >>> document = ET.XML(XINCLUDE["C1.xml"])
|
---|
1447 | >>> ElementInclude.include(document, loader=none_loader)
|
---|
1448 | Traceback (most recent call last):
|
---|
1449 | FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
|
---|
1450 |
|
---|
1451 | Test failure to locate included text file.
|
---|
1452 |
|
---|
1453 | >>> document = ET.XML(XINCLUDE["C2.xml"])
|
---|
1454 | >>> ElementInclude.include(document, loader=none_loader)
|
---|
1455 | Traceback (most recent call last):
|
---|
1456 | FatalIncludeError: cannot load 'count.txt' as 'text'
|
---|
1457 |
|
---|
1458 | Test bad parse type.
|
---|
1459 |
|
---|
1460 | >>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
|
---|
1461 | >>> ElementInclude.include(document, loader=none_loader)
|
---|
1462 | Traceback (most recent call last):
|
---|
1463 | FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
|
---|
1464 |
|
---|
1465 | Test xi:fallback outside xi:include.
|
---|
1466 |
|
---|
1467 | >>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
|
---|
1468 | >>> ElementInclude.include(document, loader=none_loader)
|
---|
1469 | Traceback (most recent call last):
|
---|
1470 | FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
|
---|
1471 | """
|
---|
1472 |
|
---|
1473 | # --------------------------------------------------------------------
|
---|
1474 | # reported bugs
|
---|
1475 |
|
---|
1476 | def bug_xmltoolkit21():
|
---|
1477 | """
|
---|
1478 |
|
---|
1479 | marshaller gives obscure errors for non-string values
|
---|
1480 |
|
---|
1481 | >>> elem = ET.Element(123)
|
---|
1482 | >>> serialize(elem) # tag
|
---|
1483 | Traceback (most recent call last):
|
---|
1484 | TypeError: cannot serialize 123 (type int)
|
---|
1485 | >>> elem = ET.Element("elem")
|
---|
1486 | >>> elem.text = 123
|
---|
1487 | >>> serialize(elem) # text
|
---|
1488 | Traceback (most recent call last):
|
---|
1489 | TypeError: cannot serialize 123 (type int)
|
---|
1490 | >>> elem = ET.Element("elem")
|
---|
1491 | >>> elem.tail = 123
|
---|
1492 | >>> serialize(elem) # tail
|
---|
1493 | Traceback (most recent call last):
|
---|
1494 | TypeError: cannot serialize 123 (type int)
|
---|
1495 | >>> elem = ET.Element("elem")
|
---|
1496 | >>> elem.set(123, "123")
|
---|
1497 | >>> serialize(elem) # attribute key
|
---|
1498 | Traceback (most recent call last):
|
---|
1499 | TypeError: cannot serialize 123 (type int)
|
---|
1500 | >>> elem = ET.Element("elem")
|
---|
1501 | >>> elem.set("123", 123)
|
---|
1502 | >>> serialize(elem) # attribute value
|
---|
1503 | Traceback (most recent call last):
|
---|
1504 | TypeError: cannot serialize 123 (type int)
|
---|
1505 |
|
---|
1506 | """
|
---|
1507 |
|
---|
1508 | def bug_xmltoolkit25():
|
---|
1509 | """
|
---|
1510 |
|
---|
1511 | typo in ElementTree.findtext
|
---|
1512 |
|
---|
1513 | >>> elem = ET.XML(SAMPLE_XML)
|
---|
1514 | >>> tree = ET.ElementTree(elem)
|
---|
1515 | >>> tree.findtext("tag")
|
---|
1516 | 'text'
|
---|
1517 | >>> tree.findtext("section/tag")
|
---|
1518 | 'subtext'
|
---|
1519 |
|
---|
1520 | """
|
---|
1521 |
|
---|
1522 | def bug_xmltoolkit28():
|
---|
1523 | """
|
---|
1524 |
|
---|
1525 | .//tag causes exceptions
|
---|
1526 |
|
---|
1527 | >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
|
---|
1528 | >>> summarize_list(tree.findall(".//thead"))
|
---|
1529 | []
|
---|
1530 | >>> summarize_list(tree.findall(".//tbody"))
|
---|
1531 | ['tbody']
|
---|
1532 |
|
---|
1533 | """
|
---|
1534 |
|
---|
1535 | def bug_xmltoolkitX1():
|
---|
1536 | """
|
---|
1537 |
|
---|
1538 | dump() doesn't flush the output buffer
|
---|
1539 |
|
---|
1540 | >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
|
---|
1541 | >>> ET.dump(tree); sys.stdout.write("tail")
|
---|
1542 | <doc><table><tbody /></table></doc>
|
---|
1543 | tail
|
---|
1544 |
|
---|
1545 | """
|
---|
1546 |
|
---|
1547 | def bug_xmltoolkit39():
|
---|
1548 | """
|
---|
1549 |
|
---|
1550 | non-ascii element and attribute names doesn't work
|
---|
1551 |
|
---|
1552 | >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
|
---|
1553 | >>> ET.tostring(tree, "utf-8")
|
---|
1554 | '<t\\xc3\\xa4g />'
|
---|
1555 |
|
---|
1556 | >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='välue' />")
|
---|
1557 | >>> tree.attrib
|
---|
1558 | {u'\\xe4ttr': u'v\\xe4lue'}
|
---|
1559 | >>> ET.tostring(tree, "utf-8")
|
---|
1560 | '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
|
---|
1561 |
|
---|
1562 | >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>")
|
---|
1563 | >>> ET.tostring(tree, "utf-8")
|
---|
1564 | '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
|
---|
1565 |
|
---|
1566 | >>> tree = ET.Element(u"t\u00e4g")
|
---|
1567 | >>> ET.tostring(tree, "utf-8")
|
---|
1568 | '<t\\xc3\\xa4g />'
|
---|
1569 |
|
---|
1570 | >>> tree = ET.Element("tag")
|
---|
1571 | >>> tree.set(u"\u00e4ttr", u"v\u00e4lue")
|
---|
1572 | >>> ET.tostring(tree, "utf-8")
|
---|
1573 | '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
|
---|
1574 |
|
---|
1575 | """
|
---|
1576 |
|
---|
1577 | def bug_xmltoolkit54():
|
---|
1578 | """
|
---|
1579 |
|
---|
1580 | problems handling internally defined entities
|
---|
1581 |
|
---|
1582 | >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]><doc>&ldots;</doc>")
|
---|
1583 | >>> serialize(e)
|
---|
1584 | '<doc>舰</doc>'
|
---|
1585 |
|
---|
1586 | """
|
---|
1587 |
|
---|
1588 | def bug_xmltoolkit55():
|
---|
1589 | """
|
---|
1590 |
|
---|
1591 | make sure we're reporting the first error, not the last
|
---|
1592 |
|
---|
1593 | >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
|
---|
1594 | Traceback (most recent call last):
|
---|
1595 | ParseError: undefined entity &ldots;: line 1, column 36
|
---|
1596 |
|
---|
1597 | """
|
---|
1598 |
|
---|
1599 | class ExceptionFile:
|
---|
1600 | def read(self, x):
|
---|
1601 | raise IOError
|
---|
1602 |
|
---|
1603 | def xmltoolkit60():
|
---|
1604 | """
|
---|
1605 |
|
---|
1606 | Handle crash in stream source.
|
---|
1607 | >>> tree = ET.parse(ExceptionFile())
|
---|
1608 | Traceback (most recent call last):
|
---|
1609 | IOError
|
---|
1610 |
|
---|
1611 | """
|
---|
1612 |
|
---|
1613 | XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?>
|
---|
1614 | <!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
|
---|
1615 | <patent-application-publication>
|
---|
1616 | <subdoc-abstract>
|
---|
1617 | <paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph>
|
---|
1618 | </subdoc-abstract>
|
---|
1619 | </patent-application-publication>"""
|
---|
1620 |
|
---|
1621 |
|
---|
1622 | def xmltoolkit62():
|
---|
1623 | """
|
---|
1624 |
|
---|
1625 | Don't crash when using custom entities.
|
---|
1626 |
|
---|
1627 | >>> xmltoolkit62()
|
---|
1628 | u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
|
---|
1629 |
|
---|
1630 | """
|
---|
1631 | ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
|
---|
1632 | parser = ET.XMLTreeBuilder()
|
---|
1633 | parser.entity.update(ENTITIES)
|
---|
1634 | parser.feed(XMLTOOLKIT62_DOC)
|
---|
1635 | t = parser.close()
|
---|
1636 | return t.find('.//paragraph').text
|
---|
1637 |
|
---|
1638 | def xmltoolkit63():
|
---|
1639 | """
|
---|
1640 |
|
---|
1641 | Check reference leak.
|
---|
1642 | >>> xmltoolkit63()
|
---|
1643 | >>> count = sys.getrefcount(None)
|
---|
1644 | >>> for i in range(1000):
|
---|
1645 | ... xmltoolkit63()
|
---|
1646 | >>> sys.getrefcount(None) - count
|
---|
1647 | 0
|
---|
1648 |
|
---|
1649 | """
|
---|
1650 | tree = ET.TreeBuilder()
|
---|
1651 | tree.start("tag", {})
|
---|
1652 | tree.data("text")
|
---|
1653 | tree.end("tag")
|
---|
1654 |
|
---|
1655 | # --------------------------------------------------------------------
|
---|
1656 |
|
---|
1657 |
|
---|
1658 | def bug_200708_newline():
|
---|
1659 | r"""
|
---|
1660 |
|
---|
1661 | Preserve newlines in attributes.
|
---|
1662 |
|
---|
1663 | >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
|
---|
1664 | >>> ET.tostring(e)
|
---|
1665 | '<SomeTag text="def _f(): return 3 " />'
|
---|
1666 | >>> ET.XML(ET.tostring(e)).get("text")
|
---|
1667 | 'def _f():\n return 3\n'
|
---|
1668 | >>> ET.tostring(ET.XML(ET.tostring(e)))
|
---|
1669 | '<SomeTag text="def _f(): return 3 " />'
|
---|
1670 |
|
---|
1671 | """
|
---|
1672 |
|
---|
1673 | def bug_200708_close():
|
---|
1674 | """
|
---|
1675 |
|
---|
1676 | Test default builder.
|
---|
1677 | >>> parser = ET.XMLParser() # default
|
---|
1678 | >>> parser.feed("<element>some text</element>")
|
---|
1679 | >>> summarize(parser.close())
|
---|
1680 | 'element'
|
---|
1681 |
|
---|
1682 | Test custom builder.
|
---|
1683 | >>> class EchoTarget:
|
---|
1684 | ... def close(self):
|
---|
1685 | ... return ET.Element("element") # simulate root
|
---|
1686 | >>> parser = ET.XMLParser(EchoTarget())
|
---|
1687 | >>> parser.feed("<element>some text</element>")
|
---|
1688 | >>> summarize(parser.close())
|
---|
1689 | 'element'
|
---|
1690 |
|
---|
1691 | """
|
---|
1692 |
|
---|
1693 | def bug_200709_default_namespace():
|
---|
1694 | """
|
---|
1695 |
|
---|
1696 | >>> e = ET.Element("{default}elem")
|
---|
1697 | >>> s = ET.SubElement(e, "{default}elem")
|
---|
1698 | >>> serialize(e, default_namespace="default") # 1
|
---|
1699 | '<elem xmlns="default"><elem /></elem>'
|
---|
1700 |
|
---|
1701 | >>> e = ET.Element("{default}elem")
|
---|
1702 | >>> s = ET.SubElement(e, "{default}elem")
|
---|
1703 | >>> s = ET.SubElement(e, "{not-default}elem")
|
---|
1704 | >>> serialize(e, default_namespace="default") # 2
|
---|
1705 | '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
|
---|
1706 |
|
---|
1707 | >>> e = ET.Element("{default}elem")
|
---|
1708 | >>> s = ET.SubElement(e, "{default}elem")
|
---|
1709 | >>> s = ET.SubElement(e, "elem") # unprefixed name
|
---|
1710 | >>> serialize(e, default_namespace="default") # 3
|
---|
1711 | Traceback (most recent call last):
|
---|
1712 | ValueError: cannot use non-qualified names with default_namespace option
|
---|
1713 |
|
---|
1714 | """
|
---|
1715 |
|
---|
1716 | def bug_200709_register_namespace():
|
---|
1717 | """
|
---|
1718 |
|
---|
1719 | >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
|
---|
1720 | '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
|
---|
1721 | >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
|
---|
1722 | >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
|
---|
1723 | '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
|
---|
1724 |
|
---|
1725 | And the Dublin Core namespace is in the default list:
|
---|
1726 |
|
---|
1727 | >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
|
---|
1728 | '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
|
---|
1729 |
|
---|
1730 | """
|
---|
1731 |
|
---|
1732 | def bug_200709_element_comment():
|
---|
1733 | """
|
---|
1734 |
|
---|
1735 | Not sure if this can be fixed, really (since the serializer needs
|
---|
1736 | ET.Comment, not cET.comment).
|
---|
1737 |
|
---|
1738 | >>> a = ET.Element('a')
|
---|
1739 | >>> a.append(ET.Comment('foo'))
|
---|
1740 | >>> a[0].tag == ET.Comment
|
---|
1741 | True
|
---|
1742 |
|
---|
1743 | >>> a = ET.Element('a')
|
---|
1744 | >>> a.append(ET.PI('foo'))
|
---|
1745 | >>> a[0].tag == ET.PI
|
---|
1746 | True
|
---|
1747 |
|
---|
1748 | """
|
---|
1749 |
|
---|
1750 | def bug_200709_element_insert():
|
---|
1751 | """
|
---|
1752 |
|
---|
1753 | >>> a = ET.Element('a')
|
---|
1754 | >>> b = ET.SubElement(a, 'b')
|
---|
1755 | >>> c = ET.SubElement(a, 'c')
|
---|
1756 | >>> d = ET.Element('d')
|
---|
1757 | >>> a.insert(0, d)
|
---|
1758 | >>> summarize_list(a)
|
---|
1759 | ['d', 'b', 'c']
|
---|
1760 | >>> a.insert(-1, d)
|
---|
1761 | >>> summarize_list(a)
|
---|
1762 | ['d', 'b', 'd', 'c']
|
---|
1763 |
|
---|
1764 | """
|
---|
1765 |
|
---|
1766 | def bug_200709_iter_comment():
|
---|
1767 | """
|
---|
1768 |
|
---|
1769 | >>> a = ET.Element('a')
|
---|
1770 | >>> b = ET.SubElement(a, 'b')
|
---|
1771 | >>> comment_b = ET.Comment("TEST-b")
|
---|
1772 | >>> b.append(comment_b)
|
---|
1773 | >>> summarize_list(a.iter(ET.Comment))
|
---|
1774 | ['<Comment>']
|
---|
1775 |
|
---|
1776 | """
|
---|
1777 |
|
---|
1778 | def bug_18347():
|
---|
1779 | """
|
---|
1780 |
|
---|
1781 | >>> e = ET.XML('<html><CamelCase>text</CamelCase></html>')
|
---|
1782 | >>> serialize(e)
|
---|
1783 | '<html><CamelCase>text</CamelCase></html>'
|
---|
1784 | >>> serialize(e, method="html")
|
---|
1785 | '<html><CamelCase>text</CamelCase></html>'
|
---|
1786 | """
|
---|
1787 |
|
---|
1788 | # --------------------------------------------------------------------
|
---|
1789 | # reported on bugs.python.org
|
---|
1790 |
|
---|
1791 | def bug_1534630():
|
---|
1792 | """
|
---|
1793 |
|
---|
1794 | >>> bob = ET.TreeBuilder()
|
---|
1795 | >>> e = bob.data("data")
|
---|
1796 | >>> e = bob.start("tag", {})
|
---|
1797 | >>> e = bob.end("tag")
|
---|
1798 | >>> e = bob.close()
|
---|
1799 | >>> serialize(e)
|
---|
1800 | '<tag />'
|
---|
1801 |
|
---|
1802 | """
|
---|
1803 |
|
---|
1804 | def check_issue6233():
|
---|
1805 | """
|
---|
1806 |
|
---|
1807 | >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>")
|
---|
1808 | >>> ET.tostring(e, 'ascii')
|
---|
1809 | "<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>"
|
---|
1810 | >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>")
|
---|
1811 | >>> ET.tostring(e, 'ascii')
|
---|
1812 | "<?xml version='1.0' encoding='ascii'?>\\n<body>tãg</body>"
|
---|
1813 |
|
---|
1814 | """
|
---|
1815 |
|
---|
1816 | def check_issue3151():
|
---|
1817 | """
|
---|
1818 |
|
---|
1819 | >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
|
---|
1820 | >>> e.tag
|
---|
1821 | '{${stuff}}localname'
|
---|
1822 | >>> t = ET.ElementTree(e)
|
---|
1823 | >>> ET.tostring(e)
|
---|
1824 | '<ns0:localname xmlns:ns0="${stuff}" />'
|
---|
1825 |
|
---|
1826 | """
|
---|
1827 |
|
---|
1828 | def check_issue6565():
|
---|
1829 | """
|
---|
1830 |
|
---|
1831 | >>> elem = ET.XML("<body><tag/></body>")
|
---|
1832 | >>> summarize_list(elem)
|
---|
1833 | ['tag']
|
---|
1834 | >>> newelem = ET.XML(SAMPLE_XML)
|
---|
1835 | >>> elem[:] = newelem[:]
|
---|
1836 | >>> summarize_list(elem)
|
---|
1837 | ['tag', 'tag', 'section']
|
---|
1838 |
|
---|
1839 | """
|
---|
1840 |
|
---|
1841 | def check_html_empty_elems_serialization(self):
|
---|
1842 | # issue 15970
|
---|
1843 | # from http://www.w3.org/TR/html401/index/elements.html
|
---|
1844 | """
|
---|
1845 |
|
---|
1846 | >>> empty_elems = ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
|
---|
1847 | ... 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']
|
---|
1848 | >>> elems = ''.join('<%s />' % elem for elem in empty_elems)
|
---|
1849 | >>> serialize(ET.XML('<html>%s</html>' % elems), method='html')
|
---|
1850 | '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>'
|
---|
1851 | >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html')
|
---|
1852 | '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>'
|
---|
1853 | >>> elems = ''.join('<%s></%s>' % (elem, elem) for elem in empty_elems)
|
---|
1854 | >>> serialize(ET.XML('<html>%s</html>' % elems), method='html')
|
---|
1855 | '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>'
|
---|
1856 | >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html')
|
---|
1857 | '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>'
|
---|
1858 |
|
---|
1859 | """
|
---|
1860 |
|
---|
1861 | # --------------------------------------------------------------------
|
---|
1862 |
|
---|
1863 |
|
---|
1864 | class CleanContext(object):
|
---|
1865 | """Provide default namespace mapping and path cache."""
|
---|
1866 | checkwarnings = None
|
---|
1867 |
|
---|
1868 | def __init__(self, quiet=False):
|
---|
1869 | if sys.flags.optimize >= 2:
|
---|
1870 | # under -OO, doctests cannot be run and therefore not all warnings
|
---|
1871 | # will be emitted
|
---|
1872 | quiet = True
|
---|
1873 | deprecations = (
|
---|
1874 | # Search behaviour is broken if search path starts with "/".
|
---|
1875 | ("This search is broken in 1.3 and earlier, and will be fixed "
|
---|
1876 | "in a future version. If you rely on the current behaviour, "
|
---|
1877 | "change it to '.+'", FutureWarning),
|
---|
1878 | # Element.getchildren() and Element.getiterator() are deprecated.
|
---|
1879 | ("This method will be removed in future versions. "
|
---|
1880 | "Use .+ instead.", DeprecationWarning),
|
---|
1881 | ("This method will be removed in future versions. "
|
---|
1882 | "Use .+ instead.", PendingDeprecationWarning),
|
---|
1883 | # XMLParser.doctype() is deprecated.
|
---|
1884 | ("This method of XMLParser is deprecated. Define doctype.. "
|
---|
1885 | "method on the TreeBuilder target.", DeprecationWarning))
|
---|
1886 | self.checkwarnings = test_support.check_warnings(*deprecations,
|
---|
1887 | quiet=quiet)
|
---|
1888 |
|
---|
1889 | def __enter__(self):
|
---|
1890 | from xml.etree import ElementTree
|
---|
1891 | self._nsmap = ElementTree._namespace_map
|
---|
1892 | self._path_cache = ElementTree.ElementPath._cache
|
---|
1893 | # Copy the default namespace mapping
|
---|
1894 | ElementTree._namespace_map = self._nsmap.copy()
|
---|
1895 | # Copy the path cache (should be empty)
|
---|
1896 | ElementTree.ElementPath._cache = self._path_cache.copy()
|
---|
1897 | self.checkwarnings.__enter__()
|
---|
1898 |
|
---|
1899 | def __exit__(self, *args):
|
---|
1900 | from xml.etree import ElementTree
|
---|
1901 | # Restore mapping and path cache
|
---|
1902 | ElementTree._namespace_map = self._nsmap
|
---|
1903 | ElementTree.ElementPath._cache = self._path_cache
|
---|
1904 | self.checkwarnings.__exit__(*args)
|
---|
1905 |
|
---|
1906 |
|
---|
1907 | def test_main(module_name='xml.etree.ElementTree'):
|
---|
1908 | from test import test_xml_etree
|
---|
1909 |
|
---|
1910 | use_py_module = (module_name == 'xml.etree.ElementTree')
|
---|
1911 |
|
---|
1912 | # The same doctests are used for both the Python and the C implementations
|
---|
1913 | assert test_xml_etree.ET.__name__ == module_name
|
---|
1914 |
|
---|
1915 | # XXX the C module should give the same warnings as the Python module
|
---|
1916 | with CleanContext(quiet=not use_py_module):
|
---|
1917 | test_support.run_doctest(test_xml_etree, verbosity=True)
|
---|
1918 |
|
---|
1919 | # The module should not be changed by the tests
|
---|
1920 | assert test_xml_etree.ET.__name__ == module_name
|
---|
1921 |
|
---|
1922 | if __name__ == '__main__':
|
---|
1923 | test_main()
|
---|