Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

test_htmlparser.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 24.3 KB

Line
1	"""Tests for HTMLParser.py."""
2
3	import HTMLParser
4	import pprint
5	import unittest
6	from test import test_support
7
8
9	class EventCollector(HTMLParser.HTMLParser):
10
11	def __init__(self):
12	self.events = []
13	self.append = self.events.append
14	HTMLParser.HTMLParser.__init__(self)
15
16	def get_events(self):
17	# Normalize the list of events so that buffer artefacts don't
18	# separate runs of contiguous characters.
19	L = []
20	prevtype = None
21	for event in self.events:
22	type = event[0]
23	if type == prevtype == "data":
24	L[-1] = ("data", L[-1][1] + event[1])
25	else:
26	L.append(event)
27	prevtype = type
28	self.events = L
29	return L
30
31	# structure markup
32
33	def handle_starttag(self, tag, attrs):
34	self.append(("starttag", tag, attrs))
35
36	def handle_startendtag(self, tag, attrs):
37	self.append(("startendtag", tag, attrs))
38
39	def handle_endtag(self, tag):
40	self.append(("endtag", tag))
41
42	# all other markup
43
44	def handle_comment(self, data):
45	self.append(("comment", data))
46
47	def handle_charref(self, data):
48	self.append(("charref", data))
49
50	def handle_data(self, data):
51	self.append(("data", data))
52
53	def handle_decl(self, data):
54	self.append(("decl", data))
55
56	def handle_entityref(self, data):
57	self.append(("entityref", data))
58
59	def handle_pi(self, data):
60	self.append(("pi", data))
61
62	def unknown_decl(self, decl):
63	self.append(("unknown decl", decl))
64
65
66	class EventCollectorExtra(EventCollector):
67
68	def handle_starttag(self, tag, attrs):
69	EventCollector.handle_starttag(self, tag, attrs)
70	self.append(("starttag_text", self.get_starttag_text()))
71
72
73	class TestCaseBase(unittest.TestCase):
74
75	def _run_check(self, source, expected_events, collector=EventCollector):
76	parser = collector()
77	for s in source:
78	parser.feed(s)
79	parser.close()
80	events = parser.get_events()
81	if events != expected_events:
82	self.fail("received events did not match expected events\n"
83	"Expected:\n" + pprint.pformat(expected_events) +
84	"\nReceived:\n" + pprint.pformat(events))
85
86	def _run_check_extra(self, source, events):
87	self._run_check(source, events, EventCollectorExtra)
88
89	def _parse_error(self, source):
90	def parse(source=source):
91	parser = HTMLParser.HTMLParser()
92	parser.feed(source)
93	parser.close()
94	self.assertRaises(HTMLParser.HTMLParseError, parse)
95
96
97	class HTMLParserTestCase(TestCaseBase):
98
99	def test_processing_instruction_only(self):
100	self._run_check("<?processing instruction>", [
101	("pi", "processing instruction"),
102	])
103	self._run_check("<?processing instruction ?>", [
104	("pi", "processing instruction ?"),
105	])
106
107	def test_simple_html(self):
108	self._run_check("""
109	<!DOCTYPE html PUBLIC 'foo'>
110	<HTML>&entity;
111	<!--comment1a
112	-></foo><bar><<?pi?></foo<bar
113	comment1b-->
114	<Img sRc='Bar' isMAP>sample
115	text
116	“
117	<!--comment2a-- --comment2b-->
118	</Html>
119	""", [
120	("data", "\n"),
121	("decl", "DOCTYPE html PUBLIC 'foo'"),
122	("data", "\n"),
123	("starttag", "html", []),
124	("entityref", "entity"),
125	("charref", "32"),
126	("data", "\n"),
127	("comment", "comment1a\n-></foo><bar><<?pi?></foo<bar\ncomment1b"),
128	("data", "\n"),
129	("starttag", "img", [("src", "Bar"), ("ismap", None)]),
130	("data", "sample\ntext\n"),
131	("charref", "x201C"),
132	("data", "\n"),
133	("comment", "comment2a-- --comment2b"),
134	("data", "\n"),
135	("endtag", "html"),
136	("data", "\n"),
137	])
138
139	def test_unclosed_entityref(self):
140	self._run_check("&entityref foo", [
141	("entityref", "entityref"),
142	("data", " foo"),
143	])
144
145	def test_bad_nesting(self):
146	# Strangely, this is supposed to test that overlapping
147	# elements are allowed. HTMLParser is more geared toward
148	# lexing the input that parsing the structure.
149	self._run_check("<a><b></a></b>", [
150	("starttag", "a", []),
151	("starttag", "b", []),
152	("endtag", "a"),
153	("endtag", "b"),
154	])
155
156	def test_bare_ampersands(self):
157	self._run_check("this text & contains & ampersands &", [
158	("data", "this text & contains & ampersands &"),
159	])
160
161	def test_bare_pointy_brackets(self):
162	self._run_check("this < text > contains < bare>pointy< brackets", [
163	("data", "this < text > contains < bare>pointy< brackets"),
164	])
165
166	def test_illegal_declarations(self):
167	self._run_check('<!spacer type="block" height="25">',
168	[('comment', 'spacer type="block" height="25"')])
169
170	def test_starttag_end_boundary(self):
171	self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])
172	self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])])
173
174	def test_buffer_artefacts(self):
175	output = [("starttag", "a", [("b", "<")])]
176	self._run_check(["<a b='<'>"], output)
177	self._run_check(["<a ", "b='<'>"], output)
178	self._run_check(["<a b", "='<'>"], output)
179	self._run_check(["<a b=", "'<'>"], output)
180	self._run_check(["<a b='<", "'>"], output)
181	self._run_check(["<a b='<'", ">"], output)
182
183	output = [("starttag", "a", [("b", ">")])]
184	self._run_check(["<a b='>'>"], output)
185	self._run_check(["<a ", "b='>'>"], output)
186	self._run_check(["<a b", "='>'>"], output)
187	self._run_check(["<a b=", "'>'>"], output)
188	self._run_check(["<a b='>", "'>"], output)
189	self._run_check(["<a b='>'", ">"], output)
190
191	output = [("comment", "abc")]
192	self._run_check(["", "<!--abc-->"], output)
193	self._run_check(["<", "!--abc-->"], output)
194	self._run_check(["<!", "--abc-->"], output)
195	self._run_check(["<!-", "-abc-->"], output)
196	self._run_check(["<!--", "abc-->"], output)
197	self._run_check(["<!--a", "bc-->"], output)
198	self._run_check(["<!--ab", "c-->"], output)
199	self._run_check(["<!--abc", "-->"], output)
200	self._run_check(["<!--abc-", "->"], output)
201	self._run_check(["<!--abc--", ">"], output)
202	self._run_check(["<!--abc-->", ""], output)
203
204	def test_starttag_junk_chars(self):
205	self._run_check("</>", [])
206	self._run_check("</$>", [('comment', '$')])
207	self._run_check("</", [('data', '</')])
208	self._run_check("</a", [('data', '</a')])
209	# XXX this might be wrong
210	self._run_check("<a<a>", [('data', '<a'), ('starttag', 'a', [])])
211	self._run_check("</a<a>", [('endtag', 'a<a')])
212	self._run_check("<!", [('data', '<!')])
213	self._run_check("<a", [('data', '<a')])
214	self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
215	self._run_check("<a foo='bar", [('data', "<a foo='bar")])
216	self._run_check("<a foo='>'", [('data', "<a foo='>'")])
217	self._run_check("<a foo='>", [('data', "<a foo='>")])
218
219	def test_valid_doctypes(self):
220	# from http://www.w3.org/QA/2002/04/valid-dtd-list.html
221	dtds = ['HTML', # HTML5 doctype
222	('HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '
223	'"http://www.w3.org/TR/html4/strict.dtd"'),
224	('HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" '
225	'"http://www.w3.org/TR/html4/loose.dtd"'),
226	('html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" '
227	'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"'),
228	('html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" '
229	'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"'),
230	('math PUBLIC "-//W3C//DTD MathML 2.0//EN" '
231	'"http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"'),
232	('html PUBLIC "-//W3C//DTD '
233	'XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" '
234	'"http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"'),
235	('svg PUBLIC "-//W3C//DTD SVG 1.1//EN" '
236	'"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"'),
237	'html PUBLIC "-//IETF//DTD HTML 2.0//EN"',
238	'html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"']
239	for dtd in dtds:
240	self._run_check("<!DOCTYPE %s>" % dtd,
241	[('decl', 'DOCTYPE ' + dtd)])
242
243	def test_slashes_in_starttag(self):
244	self._run_check('<a foo="var"/>', [('startendtag', 'a', [('foo', 'var')])])
245	html = ('<img width=902 height=250px '
246	'src="/sites/default/files/images/homepage/foo.jpg" '
247	'/what am I doing here/ />')
248	expected = [(
249	'startendtag', 'img',
250	[('width', '902'), ('height', '250px'),
251	('src', '/sites/default/files/images/homepage/foo.jpg'),
252	('*what', None), ('am', None), ('i', None),
253	('doing', None), ('here*', None)]
254	)]
255	self._run_check(html, expected)
256	html = ('<a / /foo/ / /=/ / /bar/ / />'
257	'<a / /foo/ / /=/ / /bar/ / >')
258	expected = [
259	('startendtag', 'a', [('foo', None), ('=', None), ('bar', None)]),
260	('starttag', 'a', [('foo', None), ('=', None), ('bar', None)])
261	]
262	self._run_check(html, expected)
263	#see issue #14538
264	html = ('<meta><meta / ><meta // ><meta / / >'
265	'<meta/><meta /><meta //><meta//>')
266	expected = [
267	('starttag', 'meta', []), ('starttag', 'meta', []),
268	('starttag', 'meta', []), ('starttag', 'meta', []),
269	('startendtag', 'meta', []), ('startendtag', 'meta', []),
270	('startendtag', 'meta', []), ('startendtag', 'meta', []),
271	]
272	self._run_check(html, expected)
273
274	def test_declaration_junk_chars(self):
275	self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')])
276
277	def test_startendtag(self):
278	self._run_check("<p/>", [
279	("startendtag", "p", []),
280	])
281	self._run_check("<p></p>", [
282	("starttag", "p", []),
283	("endtag", "p"),
284	])
285	self._run_check("<p><img src='foo' /></p>", [
286	("starttag", "p", []),
287	("startendtag", "img", [("src", "foo")]),
288	("endtag", "p"),
289	])
290
291	def test_invalid_end_tags(self):
292	# A collection of broken end tags. <br> is used as separator.
293	# see http://www.w3.org/TR/html5/tokenization.html#end-tag-open-state
294	# and #13993
295	html = ('<br></label</p><br></div end tmAd-leaderBoard><br></<h4><br>'
296	'</li class="unit"><br></li\r\n\t\t\t\t\t\t</ul><br></><br>')
297	expected = [('starttag', 'br', []),
298	# < is part of the name, / is discarded, p is an attribute
299	('endtag', 'label<'),
300	('starttag', 'br', []),
301	# text and attributes are discarded
302	('endtag', 'div'),
303	('starttag', 'br', []),
304	# comment because the first char after </ is not a-zA-Z
305	('comment', '<h4'),
306	('starttag', 'br', []),
307	# attributes are discarded
308	('endtag', 'li'),
309	('starttag', 'br', []),
310	# everything till ul (included) is discarded
311	('endtag', 'li'),
312	('starttag', 'br', []),
313	# </> is ignored
314	('starttag', 'br', [])]
315	self._run_check(html, expected)
316
317	def test_broken_invalid_end_tag(self):
318	# This is technically wrong (the "> shouldn't be included in the 'data')
319	# but is probably not worth fixing it (in addition to all the cases of
320	# the previous test, it would require a full attribute parsing).
321	# see #13993
322	html = '<b>This</b attr=">"> confuses the parser'
323	expected = [('starttag', 'b', []),
324	('data', 'This'),
325	('endtag', 'b'),
326	('data', '"> confuses the parser')]
327	self._run_check(html, expected)
328
329	def test_get_starttag_text(self):
330	s = """<foo:bar \n one="1"\ttwo=2 >"""
331	self._run_check_extra(s, [
332	("starttag", "foo:bar", [("one", "1"), ("two", "2")]),
333	("starttag_text", s)])
334
335	def test_cdata_content(self):
336	contents = [
337	'<!-- not a comment --> &not-an-entity-ref;',
338	"<not a='start tag'>",
339	'<a href="" /> <p> <span></span>',
340	'foo = "</scr" + "ipt>";',
341	'foo = "</SCRIPT" + ">";',
342	'foo = <\n/script> ',
343	'<!-- document.write("</scr" + "ipt>"); -->',
344	('\n//<![CDATA[\n'
345	'document.write(\'<s\'+\'cript type="text/javascript" '
346	'src="http://www.example.org/r=\'+new '
347	'Date().getTime()+\'"><\\/s\'+\'cript>\');\n//]]>'),
348	'\n<!-- //\nvar foo = 3.14;\n// -->\n',
349	'foo = "</sty" + "le>";',
350	u'<!-- \u2603 -->',
351	# these two should be invalid according to the HTML 5 spec,
352	# section 8.1.2.2
353	#'foo = </\nscript>',
354	#'foo = </ script>',
355	]
356	elements = ['script', 'style', 'SCRIPT', 'STYLE', 'Script', 'Style']
357	for content in contents:
358	for element in elements:
359	element_lower = element.lower()
360	s = u'<{element}>{content}</{element}>'.format(element=element,
361	content=content)
362	self._run_check(s, [("starttag", element_lower, []),
363	("data", content),
364	("endtag", element_lower)])
365
366	def test_cdata_with_closing_tags(self):
367	# see issue #13358
368	# make sure that HTMLParser calls handle_data only once for each CDATA.
369	# The normal event collector normalizes the events in get_events,
370	# so we override it to return the original list of events.
371	class Collector(EventCollector):
372	def get_events(self):
373	return self.events
374
375	content = """<!-- not a comment --> &not-an-entity-ref;
376	<a href="" /> </p><p> & <span></span></style>
377	'</script' + '>' </html> </head> </scripter>!"""
378	for element in [' script', 'script ', ' script ',
379	'\nscript', 'script\n', '\nscript\n']:
380	s = u'<script>{content}</{element}>'.format(element=element,
381	content=content)
382	self._run_check(s, [("starttag", "script", []),
383	("data", content),
384	("endtag", "script")],
385	collector=Collector)
386
387	def test_malformatted_charref(self):
388	self._run_check("<p>&#bad;</p>", [
389	("starttag", "p", []),
390	("data", "&#bad;"),
391	("endtag", "p"),
392	])
393
394	def test_unescape_function(self):
395	parser = HTMLParser.HTMLParser()
396	self.assertEqual(parser.unescape('&#bad;'),'&#bad;')
397	self.assertEqual(parser.unescape('&'),'&')
398
399
400
401	class AttributesTestCase(TestCaseBase):
402
403	def test_attr_syntax(self):
404	output = [
405	("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)])
406	]
407	self._run_check("""<a b='v' c="v" d=v e>""", output)
408	self._run_check("""<a b = 'v' c = "v" d = v e>""", output)
409	self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output)
410	self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output)
411
412	def test_attr_values(self):
413	self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""",
414	[("starttag", "a", [("b", "xxx\n\txxx"),
415	("c", "yyy\t\nyyy"),
416	("d", "\txyz\n")])])
417	self._run_check("""<a b='' c="">""",
418	[("starttag", "a", [("b", ""), ("c", "")])])
419	# Regression test for SF patch #669683.
420	self._run_check("<e a=rgb(1,2,3)>",
421	[("starttag", "e", [("a", "rgb(1,2,3)")])])
422	# Regression test for SF bug #921657.
423	self._run_check(
424	"<a href=mailto:xyz@example.com>",
425	[("starttag", "a", [("href", "mailto:xyz@example.com")])])
426
427	def test_attr_nonascii(self):
428	# see issue 7311
429	self._run_check(
430	u"<img src=/foo/bar.png alt=\u4e2d\u6587>",
431	[("starttag", "img", [("src", "/foo/bar.png"),
432	("alt", u"\u4e2d\u6587")])])
433	self._run_check(
434	u"<a title='\u30c6\u30b9\u30c8' href='\u30c6\u30b9\u30c8.html'>",
435	[("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),
436	("href", u"\u30c6\u30b9\u30c8.html")])])
437	self._run_check(
438	u'<a title="\u30c6\u30b9\u30c8" href="\u30c6\u30b9\u30c8.html">',
439	[("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),
440	("href", u"\u30c6\u30b9\u30c8.html")])])
441
442	def test_attr_entity_replacement(self):
443	self._run_check(
444	"<a b='&><"''>",
445	[("starttag", "a", [("b", "&><\"'")])])
446
447	def test_attr_funky_names(self):
448	self._run_check(
449	"<a a.b='v' c:d=v e-f=v>",
450	[("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")])])
451	self._run_check(
452	"<a $><b $=%><c \=/>",
453	[("starttag", "a", [("$", None)]),
454	("starttag", "b", [("$", "%")]),
455	("starttag", "c", [("\\", "/")])])
456
457	def test_entityrefs_in_attributes(self):
458	self._run_check(
459	"<html foo='€&aa&unsupported;'>",
460	[("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])])
461
462	def test_entities_in_attribute_value(self):
463	# see #1200313
464	for entity in ['&', '&', '&', '&']:
465	self._run_check('<a href="%s">' % entity,
466	[("starttag", "a", [("href", "&")])])
467	self._run_check("<a href='%s'>" % entity,
468	[("starttag", "a", [("href", "&")])])
469	self._run_check("<a href=%s>" % entity,
470	[("starttag", "a", [("href", "&")])])
471
472	def test_malformed_attributes(self):
473	# see #13357
474	html = (
475	"<a href=test'style='color:red;bad1'>test - bad1</a>"
476	"<a href=test'+style='color:red;ba2'>test - bad2</a>"
477	"<a href=test' style='color:red;bad3'>test - bad3</a>"
478	"<a href = test' style='color:red;bad4' >test - bad4</a>"
479	)
480	expected = [
481	('starttag', 'a', [('href', "test'style='color:red;bad1'")]),
482	('data', 'test - bad1'), ('endtag', 'a'),
483	('starttag', 'a', [('href', "test'+style='color:red;ba2'")]),
484	('data', 'test - bad2'), ('endtag', 'a'),
485	('starttag', 'a', [('href', u"test'\xa0style='color:red;bad3'")]),
486	('data', 'test - bad3'), ('endtag', 'a'),
487	('starttag', 'a', [('href', u"test'\xa0style='color:red;bad4'")]),
488	('data', 'test - bad4'), ('endtag', 'a')
489	]
490	self._run_check(html, expected)
491
492	def test_malformed_adjacent_attributes(self):
493	# see #12629
494	self._run_check('<x><y z=""o"" /></x>',
495	[('starttag', 'x', []),
496	('startendtag', 'y', [('z', ''), ('o""', None)]),
497	('endtag', 'x')])
498	self._run_check('<x><y z="""" /></x>',
499	[('starttag', 'x', []),
500	('startendtag', 'y', [('z', ''), ('""', None)]),
501	('endtag', 'x')])
502
503	# see #755670 for the following 3 tests
504	def test_adjacent_attributes(self):
505	self._run_check('<a width="100%"cellspacing=0>',
506	[("starttag", "a",
507	[("width", "100%"), ("cellspacing","0")])])
508
509	self._run_check('<a id="foo"class="bar">',
510	[("starttag", "a",
511	[("id", "foo"), ("class","bar")])])
512
513	def test_missing_attribute_value(self):
514	self._run_check('<a v=>',
515	[("starttag", "a", [("v", "")])])
516
517	def test_javascript_attribute_value(self):
518	self._run_check("<a href=javascript:popup('/popup/help.html')>",
519	[("starttag", "a",
520	[("href", "javascript:popup('/popup/help.html')")])])
521
522	def test_end_tag_in_attribute_value(self):
523	# see #1745761
524	self._run_check("<a href='http://www.example.org/\">;'>spam</a>",
525	[("starttag", "a",
526	[("href", "http://www.example.org/\">;")]),
527	("data", "spam"), ("endtag", "a")])
528
529	def test_comments(self):
530	html = ("<!-- I'm a valid comment -->"
531	'<!--me too!-->'
532	'<!------>'
533	'<!---->'
534	'<!----I have many hyphens---->'
535	'<!-- I have a > in the middle -->'
536	'<!-- and I have -- in the middle! -->')
537	expected = [('comment', " I'm a valid comment "),
538	('comment', 'me too!'),
539	('comment', '--'),
540	('comment', ''),
541	('comment', '--I have many hyphens--'),
542	('comment', ' I have a > in the middle '),
543	('comment', ' and I have -- in the middle! ')]
544	self._run_check(html, expected)
545
546	def test_broken_comments(self):
547	html = ('<! not really a comment >'
548	'<! not a comment either -->'
549	'<! -- close enough -->'
550	'<!><!<-- this was an empty comment>'
551	'<!!! another bogus comment !!!>')
552	expected = [
553	('comment', ' not really a comment '),
554	('comment', ' not a comment either --'),
555	('comment', ' -- close enough --'),
556	('comment', ''),
557	('comment', '<-- this was an empty comment'),
558	('comment', '!! another bogus comment !!!'),
559	]
560	self._run_check(html, expected)
561
562	def test_condcoms(self):
563	html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->'
564	'<!--[if IE 8]>condcoms<![endif]-->'
565	'<!--[if lte IE 7]>pretty?<![endif]-->')
566	expected = [('comment', "[if IE & !(lte IE 8)]>aren't<![endif]"),
567	('comment', '[if IE 8]>condcoms<![endif]'),
568	('comment', '[if lte IE 7]>pretty?<![endif]')]
569	self._run_check(html, expected)
570
571	def test_broken_condcoms(self):
572	# these condcoms are missing the '--' after '<!' and before the '>'
573	html = ('<![if !(IE)]>broken condcom<![endif]>'
574	'<![if ! IE]><link href="favicon.tiff"/><![endif]>'
575	'<![if !IE 6]><img src="firefox.png" /><![endif]>'
576	'<![if !ie 6]><b>foo</b><![endif]>'
577	'<![if (!IE)\|(lt IE 9)]><img src="mammoth.bmp" /><![endif]>')
578	# According to the HTML5 specs sections "8.2.4.44 Bogus comment state"
579	# and "8.2.4.45 Markup declaration open state", comment tokens should
580	# be emitted instead of 'unknown decl', but calling unknown_decl
581	# provides more flexibility.
582	# See also Lib/_markupbase.py:parse_declaration
583	expected = [
584	('unknown decl', 'if !(IE)'),
585	('data', 'broken condcom'),
586	('unknown decl', 'endif'),
587	('unknown decl', 'if ! IE'),
588	('startendtag', 'link', [('href', 'favicon.tiff')]),
589	('unknown decl', 'endif'),
590	('unknown decl', 'if !IE 6'),
591	('startendtag', 'img', [('src', 'firefox.png')]),
592	('unknown decl', 'endif'),
593	('unknown decl', 'if !ie 6'),
594	('starttag', 'b', []),
595	('data', 'foo'),
596	('endtag', 'b'),
597	('unknown decl', 'endif'),
598	('unknown decl', 'if (!IE)\|(lt IE 9)'),
599	('startendtag', 'img', [('src', 'mammoth.bmp')]),
600	('unknown decl', 'endif')
601	]
602	self._run_check(html, expected)
603
604
605	def test_main():
606	test_support.run_unittest(HTMLParserTestCase, AttributesTestCase)
607
608
609	if __name__ == "__main__":
610	test_main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/test/test_htmlparser.py

Download in other formats: