1 | # XXX TypeErrors on calling handlers, or on bad return values from a
|
---|
2 | # handler, are obscure and unhelpful.
|
---|
3 |
|
---|
4 | import StringIO, sys
|
---|
5 | import unittest
|
---|
6 |
|
---|
7 | from xml.parsers import expat
|
---|
8 |
|
---|
9 | from test import test_support
|
---|
10 | from test.test_support import sortdict, run_unittest
|
---|
11 |
|
---|
12 |
|
---|
13 | class SetAttributeTest(unittest.TestCase):
|
---|
14 | def setUp(self):
|
---|
15 | self.parser = expat.ParserCreate(namespace_separator='!')
|
---|
16 | self.set_get_pairs = [
|
---|
17 | [0, 0],
|
---|
18 | [1, 1],
|
---|
19 | [2, 1],
|
---|
20 | [0, 0],
|
---|
21 | ]
|
---|
22 |
|
---|
23 | def test_returns_unicode(self):
|
---|
24 | for x, y in self.set_get_pairs:
|
---|
25 | self.parser.returns_unicode = x
|
---|
26 | self.assertEqual(self.parser.returns_unicode, y)
|
---|
27 |
|
---|
28 | def test_ordered_attributes(self):
|
---|
29 | for x, y in self.set_get_pairs:
|
---|
30 | self.parser.ordered_attributes = x
|
---|
31 | self.assertEqual(self.parser.ordered_attributes, y)
|
---|
32 |
|
---|
33 | def test_specified_attributes(self):
|
---|
34 | for x, y in self.set_get_pairs:
|
---|
35 | self.parser.specified_attributes = x
|
---|
36 | self.assertEqual(self.parser.specified_attributes, y)
|
---|
37 |
|
---|
38 |
|
---|
39 | data = '''\
|
---|
40 | <?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
|
---|
41 | <?xml-stylesheet href="stylesheet.css"?>
|
---|
42 | <!-- comment data -->
|
---|
43 | <!DOCTYPE quotations SYSTEM "quotations.dtd" [
|
---|
44 | <!ELEMENT root ANY>
|
---|
45 | <!NOTATION notation SYSTEM "notation.jpeg">
|
---|
46 | <!ENTITY acirc "â">
|
---|
47 | <!ENTITY external_entity SYSTEM "entity.file">
|
---|
48 | <!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
|
---|
49 | %unparsed_entity;
|
---|
50 | ]>
|
---|
51 |
|
---|
52 | <root attr1="value1" attr2="value2ὀ">
|
---|
53 | <myns:subelement xmlns:myns="http://www.python.org/namespace">
|
---|
54 | Contents of subelements
|
---|
55 | </myns:subelement>
|
---|
56 | <sub2><![CDATA[contents of CDATA section]]></sub2>
|
---|
57 | &external_entity;
|
---|
58 | </root>
|
---|
59 | '''
|
---|
60 |
|
---|
61 |
|
---|
62 | # Produce UTF-8 output
|
---|
63 | class ParseTest(unittest.TestCase):
|
---|
64 | class Outputter:
|
---|
65 | def __init__(self):
|
---|
66 | self.out = []
|
---|
67 |
|
---|
68 | def StartElementHandler(self, name, attrs):
|
---|
69 | self.out.append('Start element: ' + repr(name) + ' ' +
|
---|
70 | sortdict(attrs))
|
---|
71 |
|
---|
72 | def EndElementHandler(self, name):
|
---|
73 | self.out.append('End element: ' + repr(name))
|
---|
74 |
|
---|
75 | def CharacterDataHandler(self, data):
|
---|
76 | data = data.strip()
|
---|
77 | if data:
|
---|
78 | self.out.append('Character data: ' + repr(data))
|
---|
79 |
|
---|
80 | def ProcessingInstructionHandler(self, target, data):
|
---|
81 | self.out.append('PI: ' + repr(target) + ' ' + repr(data))
|
---|
82 |
|
---|
83 | def StartNamespaceDeclHandler(self, prefix, uri):
|
---|
84 | self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
|
---|
85 |
|
---|
86 | def EndNamespaceDeclHandler(self, prefix):
|
---|
87 | self.out.append('End of NS decl: ' + repr(prefix))
|
---|
88 |
|
---|
89 | def StartCdataSectionHandler(self):
|
---|
90 | self.out.append('Start of CDATA section')
|
---|
91 |
|
---|
92 | def EndCdataSectionHandler(self):
|
---|
93 | self.out.append('End of CDATA section')
|
---|
94 |
|
---|
95 | def CommentHandler(self, text):
|
---|
96 | self.out.append('Comment: ' + repr(text))
|
---|
97 |
|
---|
98 | def NotationDeclHandler(self, *args):
|
---|
99 | name, base, sysid, pubid = args
|
---|
100 | self.out.append('Notation declared: %s' %(args,))
|
---|
101 |
|
---|
102 | def UnparsedEntityDeclHandler(self, *args):
|
---|
103 | entityName, base, systemId, publicId, notationName = args
|
---|
104 | self.out.append('Unparsed entity decl: %s' %(args,))
|
---|
105 |
|
---|
106 | def NotStandaloneHandler(self, userData):
|
---|
107 | self.out.append('Not standalone')
|
---|
108 | return 1
|
---|
109 |
|
---|
110 | def ExternalEntityRefHandler(self, *args):
|
---|
111 | context, base, sysId, pubId = args
|
---|
112 | self.out.append('External entity ref: %s' %(args[1:],))
|
---|
113 | return 1
|
---|
114 |
|
---|
115 | def DefaultHandler(self, userData):
|
---|
116 | pass
|
---|
117 |
|
---|
118 | def DefaultHandlerExpand(self, userData):
|
---|
119 | pass
|
---|
120 |
|
---|
121 | handler_names = [
|
---|
122 | 'StartElementHandler', 'EndElementHandler',
|
---|
123 | 'CharacterDataHandler', 'ProcessingInstructionHandler',
|
---|
124 | 'UnparsedEntityDeclHandler', 'NotationDeclHandler',
|
---|
125 | 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
|
---|
126 | 'CommentHandler', 'StartCdataSectionHandler',
|
---|
127 | 'EndCdataSectionHandler',
|
---|
128 | 'DefaultHandler', 'DefaultHandlerExpand',
|
---|
129 | #'NotStandaloneHandler',
|
---|
130 | 'ExternalEntityRefHandler'
|
---|
131 | ]
|
---|
132 |
|
---|
133 | def test_utf8(self):
|
---|
134 |
|
---|
135 | out = self.Outputter()
|
---|
136 | parser = expat.ParserCreate(namespace_separator='!')
|
---|
137 | for name in self.handler_names:
|
---|
138 | setattr(parser, name, getattr(out, name))
|
---|
139 | parser.returns_unicode = 0
|
---|
140 | parser.Parse(data, 1)
|
---|
141 |
|
---|
142 | # Verify output
|
---|
143 | op = out.out
|
---|
144 | self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'')
|
---|
145 | self.assertEqual(op[1], "Comment: ' comment data '")
|
---|
146 | self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)")
|
---|
147 | self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')")
|
---|
148 | self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}")
|
---|
149 | self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'")
|
---|
150 | self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}")
|
---|
151 | self.assertEqual(op[7], "Character data: 'Contents of subelements'")
|
---|
152 | self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'")
|
---|
153 | self.assertEqual(op[9], "End of NS decl: 'myns'")
|
---|
154 | self.assertEqual(op[10], "Start element: 'sub2' {}")
|
---|
155 | self.assertEqual(op[11], 'Start of CDATA section')
|
---|
156 | self.assertEqual(op[12], "Character data: 'contents of CDATA section'")
|
---|
157 | self.assertEqual(op[13], 'End of CDATA section')
|
---|
158 | self.assertEqual(op[14], "End element: 'sub2'")
|
---|
159 | self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)")
|
---|
160 | self.assertEqual(op[16], "End element: 'root'")
|
---|
161 |
|
---|
162 | def test_unicode(self):
|
---|
163 | # Try the parse again, this time producing Unicode output
|
---|
164 | out = self.Outputter()
|
---|
165 | parser = expat.ParserCreate(namespace_separator='!')
|
---|
166 | parser.returns_unicode = 1
|
---|
167 | for name in self.handler_names:
|
---|
168 | setattr(parser, name, getattr(out, name))
|
---|
169 |
|
---|
170 | parser.Parse(data, 1)
|
---|
171 |
|
---|
172 | op = out.out
|
---|
173 | self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
|
---|
174 | self.assertEqual(op[1], "Comment: u' comment data '")
|
---|
175 | self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
|
---|
176 | self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
|
---|
177 | self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
|
---|
178 | self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
|
---|
179 | self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
|
---|
180 | self.assertEqual(op[7], "Character data: u'Contents of subelements'")
|
---|
181 | self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'")
|
---|
182 | self.assertEqual(op[9], "End of NS decl: u'myns'")
|
---|
183 | self.assertEqual(op[10], "Start element: u'sub2' {}")
|
---|
184 | self.assertEqual(op[11], 'Start of CDATA section')
|
---|
185 | self.assertEqual(op[12], "Character data: u'contents of CDATA section'")
|
---|
186 | self.assertEqual(op[13], 'End of CDATA section')
|
---|
187 | self.assertEqual(op[14], "End element: u'sub2'")
|
---|
188 | self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)")
|
---|
189 | self.assertEqual(op[16], "End element: u'root'")
|
---|
190 |
|
---|
191 | def test_parse_file(self):
|
---|
192 | # Try parsing a file
|
---|
193 | out = self.Outputter()
|
---|
194 | parser = expat.ParserCreate(namespace_separator='!')
|
---|
195 | parser.returns_unicode = 1
|
---|
196 | for name in self.handler_names:
|
---|
197 | setattr(parser, name, getattr(out, name))
|
---|
198 | file = StringIO.StringIO(data)
|
---|
199 |
|
---|
200 | parser.ParseFile(file)
|
---|
201 |
|
---|
202 | op = out.out
|
---|
203 | self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
|
---|
204 | self.assertEqual(op[1], "Comment: u' comment data '")
|
---|
205 | self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)")
|
---|
206 | self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')")
|
---|
207 | self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}")
|
---|
208 | self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'")
|
---|
209 | self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}")
|
---|
210 | self.assertEqual(op[7], "Character data: u'Contents of subelements'")
|
---|
211 | self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'")
|
---|
212 | self.assertEqual(op[9], "End of NS decl: u'myns'")
|
---|
213 | self.assertEqual(op[10], "Start element: u'sub2' {}")
|
---|
214 | self.assertEqual(op[11], 'Start of CDATA section')
|
---|
215 | self.assertEqual(op[12], "Character data: u'contents of CDATA section'")
|
---|
216 | self.assertEqual(op[13], 'End of CDATA section')
|
---|
217 | self.assertEqual(op[14], "End element: u'sub2'")
|
---|
218 | self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)")
|
---|
219 | self.assertEqual(op[16], "End element: u'root'")
|
---|
220 |
|
---|
221 | # Issue 4877: expat.ParseFile causes segfault on a closed file.
|
---|
222 | fp = open(test_support.TESTFN, 'wb')
|
---|
223 | try:
|
---|
224 | fp.close()
|
---|
225 | parser = expat.ParserCreate()
|
---|
226 | with self.assertRaises(ValueError):
|
---|
227 | parser.ParseFile(fp)
|
---|
228 | finally:
|
---|
229 | test_support.unlink(test_support.TESTFN)
|
---|
230 |
|
---|
231 |
|
---|
232 | class NamespaceSeparatorTest(unittest.TestCase):
|
---|
233 | def test_legal(self):
|
---|
234 | # Tests that make sure we get errors when the namespace_separator value
|
---|
235 | # is illegal, and that we don't for good values:
|
---|
236 | expat.ParserCreate()
|
---|
237 | expat.ParserCreate(namespace_separator=None)
|
---|
238 | expat.ParserCreate(namespace_separator=' ')
|
---|
239 |
|
---|
240 | def test_illegal(self):
|
---|
241 | try:
|
---|
242 | expat.ParserCreate(namespace_separator=42)
|
---|
243 | self.fail()
|
---|
244 | except TypeError, e:
|
---|
245 | self.assertEqual(str(e),
|
---|
246 | 'ParserCreate() argument 2 must be string or None, not int')
|
---|
247 |
|
---|
248 | try:
|
---|
249 | expat.ParserCreate(namespace_separator='too long')
|
---|
250 | self.fail()
|
---|
251 | except ValueError, e:
|
---|
252 | self.assertEqual(str(e),
|
---|
253 | 'namespace_separator must be at most one character, omitted, or None')
|
---|
254 |
|
---|
255 | def test_zero_length(self):
|
---|
256 | # ParserCreate() needs to accept a namespace_separator of zero length
|
---|
257 | # to satisfy the requirements of RDF applications that are required
|
---|
258 | # to simply glue together the namespace URI and the localname. Though
|
---|
259 | # considered a wart of the RDF specifications, it needs to be supported.
|
---|
260 | #
|
---|
261 | # See XML-SIG mailing list thread starting with
|
---|
262 | # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
|
---|
263 | #
|
---|
264 | expat.ParserCreate(namespace_separator='') # too short
|
---|
265 |
|
---|
266 |
|
---|
267 | class InterningTest(unittest.TestCase):
|
---|
268 | def test(self):
|
---|
269 | # Test the interning machinery.
|
---|
270 | p = expat.ParserCreate()
|
---|
271 | L = []
|
---|
272 | def collector(name, *args):
|
---|
273 | L.append(name)
|
---|
274 | p.StartElementHandler = collector
|
---|
275 | p.EndElementHandler = collector
|
---|
276 | p.Parse("<e> <e/> <e></e> </e>", 1)
|
---|
277 | tag = L[0]
|
---|
278 | self.assertEqual(len(L), 6)
|
---|
279 | for entry in L:
|
---|
280 | # L should have the same string repeated over and over.
|
---|
281 | self.assertTrue(tag is entry)
|
---|
282 |
|
---|
283 |
|
---|
284 | class BufferTextTest(unittest.TestCase):
|
---|
285 | def setUp(self):
|
---|
286 | self.stuff = []
|
---|
287 | self.parser = expat.ParserCreate()
|
---|
288 | self.parser.buffer_text = 1
|
---|
289 | self.parser.CharacterDataHandler = self.CharacterDataHandler
|
---|
290 |
|
---|
291 | def check(self, expected, label):
|
---|
292 | self.assertEqual(self.stuff, expected,
|
---|
293 | "%s\nstuff = %r\nexpected = %r"
|
---|
294 | % (label, self.stuff, map(unicode, expected)))
|
---|
295 |
|
---|
296 | def CharacterDataHandler(self, text):
|
---|
297 | self.stuff.append(text)
|
---|
298 |
|
---|
299 | def StartElementHandler(self, name, attrs):
|
---|
300 | self.stuff.append("<%s>" % name)
|
---|
301 | bt = attrs.get("buffer-text")
|
---|
302 | if bt == "yes":
|
---|
303 | self.parser.buffer_text = 1
|
---|
304 | elif bt == "no":
|
---|
305 | self.parser.buffer_text = 0
|
---|
306 |
|
---|
307 | def EndElementHandler(self, name):
|
---|
308 | self.stuff.append("</%s>" % name)
|
---|
309 |
|
---|
310 | def CommentHandler(self, data):
|
---|
311 | self.stuff.append("<!--%s-->" % data)
|
---|
312 |
|
---|
313 | def setHandlers(self, handlers=[]):
|
---|
314 | for name in handlers:
|
---|
315 | setattr(self.parser, name, getattr(self, name))
|
---|
316 |
|
---|
317 | def test_default_to_disabled(self):
|
---|
318 | parser = expat.ParserCreate()
|
---|
319 | self.assertFalse(parser.buffer_text)
|
---|
320 |
|
---|
321 | def test_buffering_enabled(self):
|
---|
322 | # Make sure buffering is turned on
|
---|
323 | self.assertTrue(self.parser.buffer_text)
|
---|
324 | self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
|
---|
325 | self.assertEqual(self.stuff, ['123'],
|
---|
326 | "buffered text not properly collapsed")
|
---|
327 |
|
---|
328 | def test1(self):
|
---|
329 | # XXX This test exposes more detail of Expat's text chunking than we
|
---|
330 | # XXX like, but it tests what we need to concisely.
|
---|
331 | self.setHandlers(["StartElementHandler"])
|
---|
332 | self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1)
|
---|
333 | self.assertEqual(self.stuff,
|
---|
334 | ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
|
---|
335 | "buffering control not reacting as expected")
|
---|
336 |
|
---|
337 | def test2(self):
|
---|
338 | self.parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1)
|
---|
339 | self.assertEqual(self.stuff, ["1<2> \n 3"],
|
---|
340 | "buffered text not properly collapsed")
|
---|
341 |
|
---|
342 | def test3(self):
|
---|
343 | self.setHandlers(["StartElementHandler"])
|
---|
344 | self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
|
---|
345 | self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
|
---|
346 | "buffered text not properly split")
|
---|
347 |
|
---|
348 | def test4(self):
|
---|
349 | self.setHandlers(["StartElementHandler", "EndElementHandler"])
|
---|
350 | self.parser.CharacterDataHandler = None
|
---|
351 | self.parser.Parse("<a>1<b/>2<c/>3</a>", 1)
|
---|
352 | self.assertEqual(self.stuff,
|
---|
353 | ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
|
---|
354 |
|
---|
355 | def test5(self):
|
---|
356 | self.setHandlers(["StartElementHandler", "EndElementHandler"])
|
---|
357 | self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1)
|
---|
358 | self.assertEqual(self.stuff,
|
---|
359 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
|
---|
360 |
|
---|
361 | def test6(self):
|
---|
362 | self.setHandlers(["CommentHandler", "EndElementHandler",
|
---|
363 | "StartElementHandler"])
|
---|
364 | self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1)
|
---|
365 | self.assertEqual(self.stuff,
|
---|
366 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
|
---|
367 | "buffered text not properly split")
|
---|
368 |
|
---|
369 | def test7(self):
|
---|
370 | self.setHandlers(["CommentHandler", "EndElementHandler",
|
---|
371 | "StartElementHandler"])
|
---|
372 | self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1)
|
---|
373 | self.assertEqual(self.stuff,
|
---|
374 | ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
|
---|
375 | "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
|
---|
376 | "buffered text not properly split")
|
---|
377 |
|
---|
378 |
|
---|
379 | # Test handling of exception from callback:
|
---|
380 | class HandlerExceptionTest(unittest.TestCase):
|
---|
381 | def StartElementHandler(self, name, attrs):
|
---|
382 | raise RuntimeError(name)
|
---|
383 |
|
---|
384 | def test(self):
|
---|
385 | parser = expat.ParserCreate()
|
---|
386 | parser.StartElementHandler = self.StartElementHandler
|
---|
387 | try:
|
---|
388 | parser.Parse("<a><b><c/></b></a>", 1)
|
---|
389 | self.fail()
|
---|
390 | except RuntimeError, e:
|
---|
391 | self.assertEqual(e.args[0], 'a',
|
---|
392 | "Expected RuntimeError for element 'a', but" + \
|
---|
393 | " found %r" % e.args[0])
|
---|
394 |
|
---|
395 |
|
---|
396 | # Test Current* members:
|
---|
397 | class PositionTest(unittest.TestCase):
|
---|
398 | def StartElementHandler(self, name, attrs):
|
---|
399 | self.check_pos('s')
|
---|
400 |
|
---|
401 | def EndElementHandler(self, name):
|
---|
402 | self.check_pos('e')
|
---|
403 |
|
---|
404 | def check_pos(self, event):
|
---|
405 | pos = (event,
|
---|
406 | self.parser.CurrentByteIndex,
|
---|
407 | self.parser.CurrentLineNumber,
|
---|
408 | self.parser.CurrentColumnNumber)
|
---|
409 | self.assertTrue(self.upto < len(self.expected_list),
|
---|
410 | 'too many parser events')
|
---|
411 | expected = self.expected_list[self.upto]
|
---|
412 | self.assertEqual(pos, expected,
|
---|
413 | 'Expected position %s, got position %s' %(pos, expected))
|
---|
414 | self.upto += 1
|
---|
415 |
|
---|
416 | def test(self):
|
---|
417 | self.parser = expat.ParserCreate()
|
---|
418 | self.parser.StartElementHandler = self.StartElementHandler
|
---|
419 | self.parser.EndElementHandler = self.EndElementHandler
|
---|
420 | self.upto = 0
|
---|
421 | self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
|
---|
422 | ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
|
---|
423 |
|
---|
424 | xml = '<a>\n <b>\n <c/>\n </b>\n</a>'
|
---|
425 | self.parser.Parse(xml, 1)
|
---|
426 |
|
---|
427 |
|
---|
428 | class sf1296433Test(unittest.TestCase):
|
---|
429 | def test_parse_only_xml_data(self):
|
---|
430 | # http://python.org/sf/1296433
|
---|
431 | #
|
---|
432 | xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
|
---|
433 | # this one doesn't crash
|
---|
434 | #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
|
---|
435 |
|
---|
436 | class SpecificException(Exception):
|
---|
437 | pass
|
---|
438 |
|
---|
439 | def handler(text):
|
---|
440 | raise SpecificException
|
---|
441 |
|
---|
442 | parser = expat.ParserCreate()
|
---|
443 | parser.CharacterDataHandler = handler
|
---|
444 |
|
---|
445 | self.assertRaises(Exception, parser.Parse, xml)
|
---|
446 |
|
---|
447 | class ChardataBufferTest(unittest.TestCase):
|
---|
448 | """
|
---|
449 | test setting of chardata buffer size
|
---|
450 | """
|
---|
451 |
|
---|
452 | def test_1025_bytes(self):
|
---|
453 | self.assertEqual(self.small_buffer_test(1025), 2)
|
---|
454 |
|
---|
455 | def test_1000_bytes(self):
|
---|
456 | self.assertEqual(self.small_buffer_test(1000), 1)
|
---|
457 |
|
---|
458 | def test_wrong_size(self):
|
---|
459 | parser = expat.ParserCreate()
|
---|
460 | parser.buffer_text = 1
|
---|
461 | def f(size):
|
---|
462 | parser.buffer_size = size
|
---|
463 |
|
---|
464 | self.assertRaises(TypeError, f, sys.maxint+1)
|
---|
465 | self.assertRaises(ValueError, f, -1)
|
---|
466 | self.assertRaises(ValueError, f, 0)
|
---|
467 |
|
---|
468 | def test_unchanged_size(self):
|
---|
469 | xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512))
|
---|
470 | xml2 = 'a'*512 + '</s>'
|
---|
471 | parser = expat.ParserCreate()
|
---|
472 | parser.CharacterDataHandler = self.counting_handler
|
---|
473 | parser.buffer_size = 512
|
---|
474 | parser.buffer_text = 1
|
---|
475 |
|
---|
476 | # Feed 512 bytes of character data: the handler should be called
|
---|
477 | # once.
|
---|
478 | self.n = 0
|
---|
479 | parser.Parse(xml1)
|
---|
480 | self.assertEqual(self.n, 1)
|
---|
481 |
|
---|
482 | # Reassign to buffer_size, but assign the same size.
|
---|
483 | parser.buffer_size = parser.buffer_size
|
---|
484 | self.assertEqual(self.n, 1)
|
---|
485 |
|
---|
486 | # Try parsing rest of the document
|
---|
487 | parser.Parse(xml2)
|
---|
488 | self.assertEqual(self.n, 2)
|
---|
489 |
|
---|
490 |
|
---|
491 | def test_disabling_buffer(self):
|
---|
492 | xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
|
---|
493 | xml2 = ('b' * 1024)
|
---|
494 | xml3 = "%s</a>" % ('c' * 1024)
|
---|
495 | parser = expat.ParserCreate()
|
---|
496 | parser.CharacterDataHandler = self.counting_handler
|
---|
497 | parser.buffer_text = 1
|
---|
498 | parser.buffer_size = 1024
|
---|
499 | self.assertEqual(parser.buffer_size, 1024)
|
---|
500 |
|
---|
501 | # Parse one chunk of XML
|
---|
502 | self.n = 0
|
---|
503 | parser.Parse(xml1, 0)
|
---|
504 | self.assertEqual(parser.buffer_size, 1024)
|
---|
505 | self.assertEqual(self.n, 1)
|
---|
506 |
|
---|
507 | # Turn off buffering and parse the next chunk.
|
---|
508 | parser.buffer_text = 0
|
---|
509 | self.assertFalse(parser.buffer_text)
|
---|
510 | self.assertEqual(parser.buffer_size, 1024)
|
---|
511 | for i in range(10):
|
---|
512 | parser.Parse(xml2, 0)
|
---|
513 | self.assertEqual(self.n, 11)
|
---|
514 |
|
---|
515 | parser.buffer_text = 1
|
---|
516 | self.assertTrue(parser.buffer_text)
|
---|
517 | self.assertEqual(parser.buffer_size, 1024)
|
---|
518 | parser.Parse(xml3, 1)
|
---|
519 | self.assertEqual(self.n, 12)
|
---|
520 |
|
---|
521 |
|
---|
522 |
|
---|
523 | def make_document(self, bytes):
|
---|
524 | return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>')
|
---|
525 |
|
---|
526 | def counting_handler(self, text):
|
---|
527 | self.n += 1
|
---|
528 |
|
---|
529 | def small_buffer_test(self, buffer_len):
|
---|
530 | xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len)
|
---|
531 | parser = expat.ParserCreate()
|
---|
532 | parser.CharacterDataHandler = self.counting_handler
|
---|
533 | parser.buffer_size = 1024
|
---|
534 | parser.buffer_text = 1
|
---|
535 |
|
---|
536 | self.n = 0
|
---|
537 | parser.Parse(xml)
|
---|
538 | return self.n
|
---|
539 |
|
---|
540 | def test_change_size_1(self):
|
---|
541 | xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024)
|
---|
542 | xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
|
---|
543 | parser = expat.ParserCreate()
|
---|
544 | parser.CharacterDataHandler = self.counting_handler
|
---|
545 | parser.buffer_text = 1
|
---|
546 | parser.buffer_size = 1024
|
---|
547 | self.assertEqual(parser.buffer_size, 1024)
|
---|
548 |
|
---|
549 | self.n = 0
|
---|
550 | parser.Parse(xml1, 0)
|
---|
551 | parser.buffer_size *= 2
|
---|
552 | self.assertEqual(parser.buffer_size, 2048)
|
---|
553 | parser.Parse(xml2, 1)
|
---|
554 | self.assertEqual(self.n, 2)
|
---|
555 |
|
---|
556 | def test_change_size_2(self):
|
---|
557 | xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023)
|
---|
558 | xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025)
|
---|
559 | parser = expat.ParserCreate()
|
---|
560 | parser.CharacterDataHandler = self.counting_handler
|
---|
561 | parser.buffer_text = 1
|
---|
562 | parser.buffer_size = 2048
|
---|
563 | self.assertEqual(parser.buffer_size, 2048)
|
---|
564 |
|
---|
565 | self.n=0
|
---|
566 | parser.Parse(xml1, 0)
|
---|
567 | parser.buffer_size //= 2
|
---|
568 | self.assertEqual(parser.buffer_size, 1024)
|
---|
569 | parser.Parse(xml2, 1)
|
---|
570 | self.assertEqual(self.n, 4)
|
---|
571 |
|
---|
572 | class MalformedInputText(unittest.TestCase):
|
---|
573 | def test1(self):
|
---|
574 | xml = "\0\r\n"
|
---|
575 | parser = expat.ParserCreate()
|
---|
576 | try:
|
---|
577 | parser.Parse(xml, True)
|
---|
578 | self.fail()
|
---|
579 | except expat.ExpatError as e:
|
---|
580 | self.assertEqual(str(e), 'unclosed token: line 2, column 0')
|
---|
581 |
|
---|
582 | def test2(self):
|
---|
583 | xml = "<?xml version\xc2\x85='1.0'?>\r\n"
|
---|
584 | parser = expat.ParserCreate()
|
---|
585 | try:
|
---|
586 | parser.Parse(xml, True)
|
---|
587 | self.fail()
|
---|
588 | except expat.ExpatError as e:
|
---|
589 | self.assertEqual(str(e), 'XML declaration not well-formed: line 1, column 14')
|
---|
590 |
|
---|
591 | class ForeignDTDTests(unittest.TestCase):
|
---|
592 | """
|
---|
593 | Tests for the UseForeignDTD method of expat parser objects.
|
---|
594 | """
|
---|
595 | def test_use_foreign_dtd(self):
|
---|
596 | """
|
---|
597 | If UseForeignDTD is passed True and a document without an external
|
---|
598 | entity reference is parsed, ExternalEntityRefHandler is first called
|
---|
599 | with None for the public and system ids.
|
---|
600 | """
|
---|
601 | handler_call_args = []
|
---|
602 | def resolve_entity(context, base, system_id, public_id):
|
---|
603 | handler_call_args.append((public_id, system_id))
|
---|
604 | return 1
|
---|
605 |
|
---|
606 | parser = expat.ParserCreate()
|
---|
607 | parser.UseForeignDTD(True)
|
---|
608 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
|
---|
609 | parser.ExternalEntityRefHandler = resolve_entity
|
---|
610 | parser.Parse("<?xml version='1.0'?><element/>")
|
---|
611 | self.assertEqual(handler_call_args, [(None, None)])
|
---|
612 |
|
---|
613 | # test UseForeignDTD() is equal to UseForeignDTD(True)
|
---|
614 | handler_call_args[:] = []
|
---|
615 |
|
---|
616 | parser = expat.ParserCreate()
|
---|
617 | parser.UseForeignDTD()
|
---|
618 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
|
---|
619 | parser.ExternalEntityRefHandler = resolve_entity
|
---|
620 | parser.Parse("<?xml version='1.0'?><element/>")
|
---|
621 | self.assertEqual(handler_call_args, [(None, None)])
|
---|
622 |
|
---|
623 | def test_ignore_use_foreign_dtd(self):
|
---|
624 | """
|
---|
625 | If UseForeignDTD is passed True and a document with an external
|
---|
626 | entity reference is parsed, ExternalEntityRefHandler is called with
|
---|
627 | the public and system ids from the document.
|
---|
628 | """
|
---|
629 | handler_call_args = []
|
---|
630 | def resolve_entity(context, base, system_id, public_id):
|
---|
631 | handler_call_args.append((public_id, system_id))
|
---|
632 | return 1
|
---|
633 |
|
---|
634 | parser = expat.ParserCreate()
|
---|
635 | parser.UseForeignDTD(True)
|
---|
636 | parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
|
---|
637 | parser.ExternalEntityRefHandler = resolve_entity
|
---|
638 | parser.Parse(
|
---|
639 | "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
|
---|
640 | self.assertEqual(handler_call_args, [("bar", "baz")])
|
---|
641 |
|
---|
642 |
|
---|
643 | def test_main():
|
---|
644 | run_unittest(SetAttributeTest,
|
---|
645 | ParseTest,
|
---|
646 | NamespaceSeparatorTest,
|
---|
647 | InterningTest,
|
---|
648 | BufferTextTest,
|
---|
649 | HandlerExceptionTest,
|
---|
650 | PositionTest,
|
---|
651 | sf1296433Test,
|
---|
652 | ChardataBufferTest,
|
---|
653 | MalformedInputText,
|
---|
654 | ForeignDTDTests)
|
---|
655 |
|
---|
656 | if __name__ == "__main__":
|
---|
657 | test_main()
|
---|