Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

test_re.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 47.0 KB

Line
1	from test.test_support import verbose, run_unittest, import_module
2	from test.test_support import precisionbigmemtest, _2G, cpython_only
3	import re
4	from re import Scanner
5	import sre_constants
6	import sys
7	import string
8	import traceback
9	from weakref import proxy
10
11
12	# Misc tests from Tim Peters' re.doc
13
14	# WARNING: Don't change details in these tests if you don't know
15	# what you're doing. Some of these tests were carefully modeled to
16	# cover most of the code.
17
18	import unittest
19
20	class ReTests(unittest.TestCase):
21
22	def test_weakref(self):
23	s = 'QabbbcR'
24	x = re.compile('ab+c')
25	y = proxy(x)
26	self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
27
28	def test_search_star_plus(self):
29	self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
30	self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
31	self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
32	self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
33	self.assertEqual(re.search('x', 'aaa'), None)
34	self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
35	self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
36	self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
37	self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
38	self.assertEqual(re.match('a+', 'xxx'), None)
39
40	def bump_num(self, matchobj):
41	int_value = int(matchobj.group(0))
42	return str(int_value + 1)
43
44	def test_basic_re_sub(self):
45	self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
46	self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
47	'9.3 -3 24x100y')
48	self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
49	'9.3 -3 23x99y')
50
51	self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
52	self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
53
54	s = r"\1\1"
55	self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
56	self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
57	self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
58
59	self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
60	self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
61	self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
62	self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
63
64	self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
65	'\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
66	self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
67	self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
68	(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
69
70	self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
71
72	def test_bug_449964(self):
73	# fails for group followed by other escape
74	self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
75	'xx\bxx\b')
76
77	def test_bug_449000(self):
78	# Test for sub() on escaped characters
79	self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
80	'abc\ndef\n')
81	self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
82	'abc\ndef\n')
83	self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
84	'abc\ndef\n')
85	self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
86	'abc\ndef\n')
87
88	def test_bug_1140(self):
89	# re.sub(x, y, u'') should return u'', not '', and
90	# re.sub(x, y, '') should return '', not u''.
91	# Also:
92	# re.sub(x, y, unicode(x)) should return unicode(y), and
93	# re.sub(x, y, str(x)) should return
94	# str(y) if isinstance(y, str) else unicode(y).
95	for x in 'x', u'x':
96	for y in 'y', u'y':
97	z = re.sub(x, y, u'')
98	self.assertEqual(z, u'')
99	self.assertEqual(type(z), unicode)
100	#
101	z = re.sub(x, y, '')
102	self.assertEqual(z, '')
103	self.assertEqual(type(z), str)
104	#
105	z = re.sub(x, y, unicode(x))
106	self.assertEqual(z, y)
107	self.assertEqual(type(z), unicode)
108	#
109	z = re.sub(x, y, str(x))
110	self.assertEqual(z, y)
111	self.assertEqual(type(z), type(y))
112
113	def test_bug_1661(self):
114	# Verify that flags do not get silently ignored with compiled patterns
115	pattern = re.compile('.')
116	self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
117	self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
118	self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
119	self.assertRaises(ValueError, re.compile, pattern, re.I)
120
121	def test_bug_3629(self):
122	# A regex that triggered a bug in the sre-code validator
123	re.compile("(?P<quote>)(?(quote))")
124
125	def test_sub_template_numeric_escape(self):
126	# bug 776311 and friends
127	self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
128	self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
129	self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
130	self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
131	self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
132	self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
133	self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
134
135	self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
136	self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
137
138	self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
139	self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
140	self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
141	self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
142	self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
143
144	self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
145	self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
146
147	self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
148	self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
149	self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
150	self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
151	self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
152	self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
153	self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
154	self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
155	self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
156	self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
157	self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
158	self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
159
160	# in python2.3 (etc), these loop endlessly in sre_parser.py
161	self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
162	self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
163	'xz8')
164	self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
165	'xza')
166
167	def test_qualified_re_sub(self):
168	self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
169	self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
170
171	def test_bug_114660(self):
172	self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
173	'hello there')
174
175	def test_bug_462270(self):
176	# Test for empty sub() behaviour, see SF bug #462270
177	self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
178	self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
179
180	def test_symbolic_groups(self):
181	re.compile('(?P<a>x)(?P=a)(?(a)y)')
182	re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
183	self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
184	self.assertRaises(re.error, re.compile, '(?Px)')
185	self.assertRaises(re.error, re.compile, '(?P=)')
186	self.assertRaises(re.error, re.compile, '(?P=1)')
187	self.assertRaises(re.error, re.compile, '(?P=a)')
188	self.assertRaises(re.error, re.compile, '(?P=a1)')
189	self.assertRaises(re.error, re.compile, '(?P=a.)')
190	self.assertRaises(re.error, re.compile, '(?P<)')
191	self.assertRaises(re.error, re.compile, '(?P<>)')
192	self.assertRaises(re.error, re.compile, '(?P<1>)')
193	self.assertRaises(re.error, re.compile, '(?P<a.>)')
194	self.assertRaises(re.error, re.compile, '(?())')
195	self.assertRaises(re.error, re.compile, '(?(a))')
196	self.assertRaises(re.error, re.compile, '(?(1a))')
197	self.assertRaises(re.error, re.compile, '(?(a.))')
198
199	def test_symbolic_refs(self):
200	self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
201	self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
202	self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
203	self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
204	self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
205	self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
206	self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
207	self.assertRaises(re.error, re.sub, '(?P<a>x)\|(?P<b>y)', '\g<b>', 'xx')
208	self.assertRaises(re.error, re.sub, '(?P<a>x)\|(?P<b>y)', '\\2', 'xx')
209	self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
210
211	def test_re_subn(self):
212	self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
213	self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
214	self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
215	self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
216	self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
217
218	def test_re_split(self):
219	self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
220	self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
221	self.assertEqual(re.split("(:*)", ":a:b::c"),
222	['', ':', 'a', ':', 'b', '::', 'c'])
223	self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
224	self.assertEqual(re.split("(:)*", ":a:b::c"),
225	['', ':', 'a', ':', 'b', ':', 'c'])
226	self.assertEqual(re.split("([b:]+)", ":a:b::c"),
227	['', ':', 'a', ':b::', 'c'])
228	self.assertEqual(re.split("(b)\|(:+)", ":a:b::c"),
229	['', None, ':', 'a', None, ':', '', 'b', None, '',
230	None, '::', 'c'])
231	self.assertEqual(re.split("(?:b)\|(?::+)", ":a:b::c"),
232	['', 'a', '', '', 'c'])
233
234	def test_qualified_re_split(self):
235	self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
236	self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
237	self.assertEqual(re.split("(:)", ":a:b::c", 2),
238	['', ':', 'a', ':', 'b::c'])
239	self.assertEqual(re.split("(:*)", ":a:b::c", 2),
240	['', ':', 'a', ':', 'b::c'])
241
242	def test_re_findall(self):
243	self.assertEqual(re.findall(":+", "abc"), [])
244	self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
245	self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
246	self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
247	(":", ":"),
248	(":", "::")])
249
250	def test_bug_117612(self):
251	self.assertEqual(re.findall(r"(a\|(b))", "aba"),
252	[("a", ""),("b", "b"),("a", "")])
253
254	def test_re_match(self):
255	self.assertEqual(re.match('a', 'a').groups(), ())
256	self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
257	self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
258	self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
259	self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
260
261	pat = re.compile('((a)\|(b))(c)?')
262	self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
263	self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
264	self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
265	self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
266	self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
267
268	# A single group
269	m = re.match('(a)', 'a')
270	self.assertEqual(m.group(0), 'a')
271	self.assertEqual(m.group(0), 'a')
272	self.assertEqual(m.group(1), 'a')
273	self.assertEqual(m.group(1, 1), ('a', 'a'))
274
275	pat = re.compile('(?:(?P<a1>a)\|(?P<b2>b))(?P<c3>c)?')
276	self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
277	self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
278	(None, 'b', None))
279	self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
280
281	def test_re_groupref_exists(self):
282	self.assertEqual(re.match('^($)?([^()]+)(?(1)$)$', '(a)').groups(),
283	('(', 'a'))
284	self.assertEqual(re.match('^($)?([^()]+)(?(1)$)$', 'a').groups(),
285	(None, 'a'))
286	self.assertEqual(re.match('^($)?([^()]+)(?(1)$)$', 'a)'), None)
287	self.assertEqual(re.match('^($)?([^()]+)(?(1)$)$', '(a'), None)
288	self.assertEqual(re.match('^(?:(a)\|c)((?(1)b\|d))$', 'ab').groups(),
289	('a', 'b'))
290	self.assertEqual(re.match('^(?:(a)\|c)((?(1)b\|d))$', 'cd').groups(),
291	(None, 'd'))
292	self.assertEqual(re.match('^(?:(a)\|c)((?(1)\|d))$', 'cd').groups(),
293	(None, 'd'))
294	self.assertEqual(re.match('^(?:(a)\|c)((?(1)\|d))$', 'a').groups(),
295	('a', ''))
296
297	# Tests for bug #1177831: exercise groups other than the first group
298	p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c\|d))')
299	self.assertEqual(p.match('abc').groups(),
300	('a', 'b', 'c'))
301	self.assertEqual(p.match('ad').groups(),
302	('a', None, 'd'))
303	self.assertEqual(p.match('abd'), None)
304	self.assertEqual(p.match('ac'), None)
305
306
307	def test_re_groupref(self):
308	self.assertEqual(re.match(r'^(\\|)?([^()]+)\1$', '\|a\|').groups(),
309	('\|', 'a'))
310	self.assertEqual(re.match(r'^(\\|)?([^()]+)\1?$', 'a').groups(),
311	(None, 'a'))
312	self.assertEqual(re.match(r'^(\\|)?([^()]+)\1$', 'a\|'), None)
313	self.assertEqual(re.match(r'^(\\|)?([^()]+)\1$', '\|a'), None)
314	self.assertEqual(re.match(r'^(?:(a)\|c)(\1)$', 'aa').groups(),
315	('a', 'a'))
316	self.assertEqual(re.match(r'^(?:(a)\|c)(\1)?$', 'c').groups(),
317	(None, None))
318
319	def test_groupdict(self):
320	self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
321	'first second').groupdict(),
322	{'first':'first', 'second':'second'})
323
324	def test_expand(self):
325	self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
326	"first second")
327	.expand(r"\2 \1 \g<second> \g<first>"),
328	"second first second first")
329
330	def test_repeat_minmax(self):
331	self.assertEqual(re.match("^(\w){1}$", "abc"), None)
332	self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
333	self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
334	self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
335
336	self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
337	self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
338	self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
339	self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
340	self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
341	self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
342	self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
343	self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
344
345	self.assertEqual(re.match("^x{1}$", "xxx"), None)
346	self.assertEqual(re.match("^x{1}?$", "xxx"), None)
347	self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
348	self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
349
350	self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
351	self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
352	self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
353	self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
354	self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
355	self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
356	self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
357	self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
358
359	self.assertEqual(re.match("^x{}$", "xxx"), None)
360	self.assertNotEqual(re.match("^x{}$", "x{}"), None)
361
362	def test_getattr(self):
363	self.assertEqual(re.match("(a)", "a").pos, 0)
364	self.assertEqual(re.match("(a)", "a").endpos, 1)
365	self.assertEqual(re.match("(a)", "a").string, "a")
366	self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
367	self.assertNotEqual(re.match("(a)", "a").re, None)
368
369	def test_special_escapes(self):
370	self.assertEqual(re.search(r"\b(b.)\b",
371	"abcd abc bcd bx").group(1), "bx")
372	self.assertEqual(re.search(r"\B(b.)\B",
373	"abc bcd bc abxd").group(1), "bx")
374	self.assertEqual(re.search(r"\b(b.)\b",
375	"abcd abc bcd bx", re.LOCALE).group(1), "bx")
376	self.assertEqual(re.search(r"\B(b.)\B",
377	"abc bcd bc abxd", re.LOCALE).group(1), "bx")
378	self.assertEqual(re.search(r"\b(b.)\b",
379	"abcd abc bcd bx", re.UNICODE).group(1), "bx")
380	self.assertEqual(re.search(r"\B(b.)\B",
381	"abc bcd bc abxd", re.UNICODE).group(1), "bx")
382	self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
383	self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
384	self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
385	self.assertEqual(re.search(r"\b(b.)\b",
386	u"abcd abc bcd bx").group(1), "bx")
387	self.assertEqual(re.search(r"\B(b.)\B",
388	u"abc bcd bc abxd").group(1), "bx")
389	self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
390	self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
391	self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
392	self.assertEqual(re.search(r"\d\D\w\W\s\S",
393	"1aa! a").group(0), "1aa! a")
394	self.assertEqual(re.search(r"\d\D\w\W\s\S",
395	"1aa! a", re.LOCALE).group(0), "1aa! a")
396	self.assertEqual(re.search(r"\d\D\w\W\s\S",
397	"1aa! a", re.UNICODE).group(0), "1aa! a")
398
399	def test_string_boundaries(self):
400	# See http://bugs.python.org/issue10713
401	self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
402	"abc")
403	# There's a word boundary at the start of a string.
404	self.assertTrue(re.match(r"\b", "abc"))
405	# A non-empty string includes a non-boundary zero-length match.
406	self.assertTrue(re.search(r"\B", "abc"))
407	# There is no non-boundary match at the start of a string.
408	self.assertFalse(re.match(r"\B", "abc"))
409	# However, an empty string contains no word boundaries, and also no
410	# non-boundaries.
411	self.assertEqual(re.search(r"\B", ""), None)
412	# This one is questionable and different from the perlre behaviour,
413	# but describes current behavior.
414	self.assertEqual(re.search(r"\b", ""), None)
415	# A single word-character string has two boundaries, but no
416	# non-boundary gaps.
417	self.assertEqual(len(re.findall(r"\b", "a")), 2)
418	self.assertEqual(len(re.findall(r"\B", "a")), 0)
419	# If there are no words, there are no boundaries
420	self.assertEqual(len(re.findall(r"\b", " ")), 0)
421	self.assertEqual(len(re.findall(r"\b", " ")), 0)
422	# Can match around the whitespace.
423	self.assertEqual(len(re.findall(r"\B", " ")), 2)
424
425	def test_bigcharset(self):
426	self.assertEqual(re.match(u"([\u2222\u2223])",
427	u"\u2222").group(1), u"\u2222")
428	self.assertEqual(re.match(u"([\u2222\u2223])",
429	u"\u2222", re.UNICODE).group(1), u"\u2222")
430	r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
431	self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
432
433	def test_big_codesize(self):
434	# Issue #1160
435	r = re.compile('\|'.join(('%d'%x for x in range(10000))))
436	self.assertIsNotNone(r.match('1000'))
437	self.assertIsNotNone(r.match('9999'))
438
439	def test_anyall(self):
440	self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
441	"a\nb")
442	self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
443	"a\n\nb")
444
445	def test_non_consuming(self):
446	self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
447	self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
448	self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
449	self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
450	self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
451	self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
452	self.assertEqual(re.match(r"(a)(?=\s(abc\|a))", "a a").group(1), "a")
453
454	self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
455	self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
456	self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
457	self.assertEqual(re.match(r"(a)(?!\s(abc\|a))", "a b").group(1), "a")
458
459	def test_ignore_case(self):
460	self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
461	self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
462	self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
463	self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
464	self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
465	self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
466	self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
467	self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
468	self.assertEqual(re.match(r"((a)\s(abc\|a))", "a a", re.I).group(1), "a a")
469	self.assertEqual(re.match(r"((a)\s(abc\|a)*)", "a aa", re.I).group(1), "a aa")
470
471	def test_category(self):
472	self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
473
474	def test_getlower(self):
475	import _sre
476	self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
477	self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
478	self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
479
480	self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
481	self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
482
483	def test_not_literal(self):
484	self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
485	self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
486
487	def test_search_coverage(self):
488	self.assertEqual(re.search("\s(b)", " b").group(1), "b")
489	self.assertEqual(re.search("a\s", "a ").group(0), "a ")
490
491	def assertMatch(self, pattern, text, match=None, span=None,
492	matcher=re.match):
493	if match is None and span is None:
494	# the pattern matches the whole text
495	match = text
496	span = (0, len(text))
497	elif match is None or span is None:
498	raise ValueError('If match is not None, span should be specified '
499	'(and vice versa).')
500	m = matcher(pattern, text)
501	self.assertTrue(m)
502	self.assertEqual(m.group(), match)
503	self.assertEqual(m.span(), span)
504
505	def test_re_escape(self):
506	alnum_chars = string.ascii_letters + string.digits
507	p = u''.join(unichr(i) for i in range(256))
508	for c in p:
509	if c in alnum_chars:
510	self.assertEqual(re.escape(c), c)
511	elif c == u'\x00':
512	self.assertEqual(re.escape(c), u'\\000')
513	else:
514	self.assertEqual(re.escape(c), u'\\' + c)
515	self.assertMatch(re.escape(c), c)
516	self.assertMatch(re.escape(p), p)
517
518	def test_re_escape_byte(self):
519	alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
520	p = ''.join(chr(i) for i in range(256))
521	for b in p:
522	if b in alnum_chars:
523	self.assertEqual(re.escape(b), b)
524	elif b == b'\x00':
525	self.assertEqual(re.escape(b), b'\\000')
526	else:
527	self.assertEqual(re.escape(b), b'\\' + b)
528	self.assertMatch(re.escape(b), b)
529	self.assertMatch(re.escape(p), p)
530
531	def test_re_escape_non_ascii(self):
532	s = u'xxx\u2620\u2620\u2620xxx'
533	s_escaped = re.escape(s)
534	self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
535	self.assertMatch(s_escaped, s)
536	self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
537	u'x\u2620\u2620\u2620x', (2, 7), re.search)
538
539	def test_re_escape_non_ascii_bytes(self):
540	b = u'y\u2620y\u2620y'.encode('utf-8')
541	b_escaped = re.escape(b)
542	self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
543	self.assertMatch(b_escaped, b)
544	res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
545	self.assertEqual(len(res), 2)
546
547	def test_pickling(self):
548	import pickle
549	self.pickle_test(pickle)
550	import cPickle
551	self.pickle_test(cPickle)
552	# old pickles expect the _compile() reconstructor in sre module
553	import_module("sre", deprecated=True)
554	from sre import _compile
555
556	def pickle_test(self, pickle):
557	oldpat = re.compile('a(?:b\|(c\|e){1,2}?\|d)+?(.)')
558	s = pickle.dumps(oldpat)
559	newpat = pickle.loads(s)
560	self.assertEqual(oldpat, newpat)
561
562	def test_constants(self):
563	self.assertEqual(re.I, re.IGNORECASE)
564	self.assertEqual(re.L, re.LOCALE)
565	self.assertEqual(re.M, re.MULTILINE)
566	self.assertEqual(re.S, re.DOTALL)
567	self.assertEqual(re.X, re.VERBOSE)
568
569	def test_flags(self):
570	for flag in [re.I, re.M, re.X, re.S, re.L]:
571	self.assertNotEqual(re.compile('^pattern$', flag), None)
572
573	def test_sre_character_literals(self):
574	for i in [0, 8, 16, 32, 64, 127, 128, 255]:
575	self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
576	self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
577	self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
578	self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
579	self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
580	self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
581	self.assertRaises(re.error, re.match, "\911", "")
582
583	def test_sre_character_class_literals(self):
584	for i in [0, 8, 16, 32, 64, 127, 128, 255]:
585	self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
586	self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
587	self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
588	self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
589	self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
590	self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
591	self.assertRaises(re.error, re.match, "[\911]", "")
592
593	def test_bug_113254(self):
594	self.assertEqual(re.match(r'(a)\|(b)', 'b').start(1), -1)
595	self.assertEqual(re.match(r'(a)\|(b)', 'b').end(1), -1)
596	self.assertEqual(re.match(r'(a)\|(b)', 'b').span(1), (-1, -1))
597
598	def test_bug_527371(self):
599	# bug described in patches 527371/672491
600	self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
601	self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
602	self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
603	self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
604	self.assertEqual(re.match("((a))", "a").lastindex, 1)
605
606	def test_bug_545855(self):
607	# bug 545855 -- This pattern failed to cause a compile error as it
608	# should, instead provoking a TypeError.
609	self.assertRaises(re.error, re.compile, 'foo[a-')
610
611	def test_bug_418626(self):
612	# bugs 418626 at al. -- Testing Greg Chapman's addition of op code
613	# SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
614	# pattern '*?' on a long string.
615	self.assertEqual(re.match('.?c', 10000'ab'+'cd').end(0), 20001)
616	self.assertEqual(re.match('.?cd', 5000'ab'+'c'+5000*'ab'+'cde').end(0),
617	20003)
618	self.assertEqual(re.match('.?cd', 20000'abc'+'de').end(0), 60001)
619	# non-simple '*?' still used to hit the recursion limit, before the
620	# non-recursive scheme was implemented.
621	self.assertEqual(re.search('(a\|b)?c', 10000'ab'+'cd').end(0), 20001)
622
623	def test_bug_612074(self):
624	pat=u"["+re.escape(u"\u2039")+u"]"
625	self.assertEqual(re.compile(pat) and 1, 1)
626
627	def test_stack_overflow(self):
628	# nasty cases that used to overflow the straightforward recursive
629	# implementation of repeated groups.
630	self.assertEqual(re.match('(x)', 50000'x').group(1), 'x')
631	self.assertEqual(re.match('(x)y', 50000'x'+'y').group(1), 'x')
632	self.assertEqual(re.match('(x)?y', 50000'x'+'y').group(1), 'x')
633
634	def test_unlimited_zero_width_repeat(self):
635	# Issue #9669
636	self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
637	self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
638	self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
639	self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
640	self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
641	self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
642
643	def test_scanner(self):
644	def s_ident(scanner, token): return token
645	def s_operator(scanner, token): return "op%s" % token
646	def s_float(scanner, token): return float(token)
647	def s_int(scanner, token): return int(token)
648
649	scanner = Scanner([
650	(r"[a-zA-Z_]\w*", s_ident),
651	(r"\d+\.\d*", s_float),
652	(r"\d+", s_int),
653	(r"=\|\+\|-\|\*\|/", s_operator),
654	(r"\s+", None),
655	])
656
657	self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
658
659	self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
660	(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
661	'op+', 'bar'], ''))
662
663	def test_bug_448951(self):
664	# bug 448951 (similar to 429357, but with single char match)
665	# (Also test greedy matches.)
666	for op in '','?','*':
667	self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
668	(None, None))
669	self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
670	('a:', 'a'))
671
672	def test_bug_725106(self):
673	# capturing groups in alternatives in repeats
674	self.assertEqual(re.match('^((a)\|b)*', 'abc').groups(),
675	('b', 'a'))
676	self.assertEqual(re.match('^(([ab])\|c)*', 'abc').groups(),
677	('c', 'b'))
678	self.assertEqual(re.match('^((d)\|[ab])*', 'abc').groups(),
679	('b', None))
680	self.assertEqual(re.match('^((a)c\|[ab])*', 'abc').groups(),
681	('b', None))
682	self.assertEqual(re.match('^((a)\|b)*?c', 'abc').groups(),
683	('b', 'a'))
684	self.assertEqual(re.match('^(([ab])\|c)*?d', 'abcd').groups(),
685	('c', 'b'))
686	self.assertEqual(re.match('^((d)\|[ab])*?c', 'abc').groups(),
687	('b', None))
688	self.assertEqual(re.match('^((a)c\|[ab])*?c', 'abc').groups(),
689	('b', None))
690
691	def test_bug_725149(self):
692	# mark_stack_base restoring before restoring marks
693	self.assertEqual(re.match('(a)(?:(?=(b))c)', 'abb').groups(),
694	('a', None))
695	self.assertEqual(re.match('(a)((?!(b)))', 'abb').groups(),
696	('a', None, None))
697
698	def test_bug_764548(self):
699	# bug 764548, re.compile() barfs on str/unicode subclasses
700	try:
701	unicode
702	except NameError:
703	return # no problem if we have no unicode
704	class my_unicode(unicode): pass
705	pat = re.compile(my_unicode("abc"))
706	self.assertEqual(pat.match("xyz"), None)
707
708	def test_finditer(self):
709	iter = re.finditer(r":+", "a:b::c:::d")
710	self.assertEqual([item.group(0) for item in iter],
711	[":", "::", ":::"])
712
713	def test_bug_926075(self):
714	try:
715	unicode
716	except NameError:
717	return # no problem if we have no unicode
718	self.assertTrue(re.compile('bug_926075') is not
719	re.compile(eval("u'bug_926075'")))
720
721	def test_bug_931848(self):
722	try:
723	unicode
724	except NameError:
725	pass
726	pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
727	self.assertEqual(re.compile(pattern).split("a.b.c"),
728	['a','b','c'])
729
730	def test_bug_581080(self):
731	iter = re.finditer(r"\s", "a b")
732	self.assertEqual(iter.next().span(), (1,2))
733	self.assertRaises(StopIteration, iter.next)
734
735	scanner = re.compile(r"\s").scanner("a b")
736	self.assertEqual(scanner.search().span(), (1, 2))
737	self.assertEqual(scanner.search(), None)
738
739	def test_bug_817234(self):
740	iter = re.finditer(r".*", "asdf")
741	self.assertEqual(iter.next().span(), (0, 4))
742	self.assertEqual(iter.next().span(), (4, 4))
743	self.assertRaises(StopIteration, iter.next)
744
745	def test_bug_6561(self):
746	# '\d' should match characters in Unicode category 'Nd'
747	# (Number, Decimal Digit), but not those in 'Nl' (Number,
748	# Letter) or 'No' (Number, Other).
749	decimal_digits = [
750	u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
751	u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
752	u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
753	]
754	for x in decimal_digits:
755	self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
756
757	not_decimal_digits = [
758	u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
759	u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
760	u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
761	u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
762	]
763	for x in not_decimal_digits:
764	self.assertIsNone(re.match('^\d$', x, re.UNICODE))
765
766	def test_empty_array(self):
767	# SF buf 1647541
768	import array
769	for typecode in 'cbBuhHiIlLfd':
770	a = array.array(typecode)
771	self.assertEqual(re.compile("bla").match(a), None)
772	self.assertEqual(re.compile("").match(a).groups(), ())
773
774	def test_inline_flags(self):
775	# Bug #1700
776	upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
777	lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
778
779	p = re.compile(upper_char, re.I \| re.U)
780	q = p.match(lower_char)
781	self.assertNotEqual(q, None)
782
783	p = re.compile(lower_char, re.I \| re.U)
784	q = p.match(upper_char)
785	self.assertNotEqual(q, None)
786
787	p = re.compile('(?i)' + upper_char, re.U)
788	q = p.match(lower_char)
789	self.assertNotEqual(q, None)
790
791	p = re.compile('(?i)' + lower_char, re.U)
792	q = p.match(upper_char)
793	self.assertNotEqual(q, None)
794
795	p = re.compile('(?iu)' + upper_char)
796	q = p.match(lower_char)
797	self.assertNotEqual(q, None)
798
799	p = re.compile('(?iu)' + lower_char)
800	q = p.match(upper_char)
801	self.assertNotEqual(q, None)
802
803	def test_dollar_matches_twice(self):
804	"$ matches the end of string, and just before the terminating \n"
805	pattern = re.compile('$')
806	self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
807	self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
808	self.assertEqual(pattern.sub('#', '\n'), '#\n#')
809
810	pattern = re.compile('$', re.MULTILINE)
811	self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
812	self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
813	self.assertEqual(pattern.sub('#', '\n'), '#\n#')
814
815	def test_dealloc(self):
816	# issue 3299: check for segfault in debug build
817	import _sre
818	# the overflow limit is different on wide and narrow builds and it
819	# depends on the definition of SRE_CODE (see sre.h).
820	# 2**128 should be big enough to overflow on both. For smaller values
821	# a RuntimeError is raised instead of OverflowError.
822	long_overflow = 2**128
823	self.assertRaises(TypeError, re.finditer, "a", {})
824	self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
825
826	def test_compile(self):
827	# Test return value when given string and pattern as parameter
828	pattern = re.compile('random pattern')
829	self.assertIsInstance(pattern, re._pattern_type)
830	same_pattern = re.compile(pattern)
831	self.assertIsInstance(same_pattern, re._pattern_type)
832	self.assertIs(same_pattern, pattern)
833	# Test behaviour when not given a string or pattern as parameter
834	self.assertRaises(TypeError, re.compile, 0)
835
836	def test_bug_13899(self):
837	# Issue #13899: re pattern r"[\A]" should work like "A" but matches
838	# nothing. Ditto B and Z.
839	self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
840	['A', 'B', '\b', 'C', 'Z'])
841
842	@precisionbigmemtest(size=_2G, memuse=1)
843	def test_large_search(self, size):
844	# Issue #10182: indices were 32-bit-truncated.
845	s = 'a' * size
846	m = re.search('$', s)
847	self.assertIsNotNone(m)
848	self.assertEqual(m.start(), size)
849	self.assertEqual(m.end(), size)
850
851	# The huge memuse is because of re.sub() using a list and a join()
852	# to create the replacement result.
853	@precisionbigmemtest(size=_2G, memuse=16 + 2)
854	def test_large_subn(self, size):
855	# Issue #10182: indices were 32-bit-truncated.
856	s = 'a' * size
857	r, n = re.subn('', '', s)
858	self.assertEqual(r, s)
859	self.assertEqual(n, size + 1)
860
861
862	def test_repeat_minmax_overflow(self):
863	# Issue #13169
864	string = "x" * 100000
865	self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
866	self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
867	self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
868	self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
869	self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
870	self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
871	# 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
872	self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
873	self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
874	self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
875	self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2129, 2128))
876
877	@cpython_only
878	def test_repeat_minmax_overflow_maxrepeat(self):
879	try:
880	from _sre import MAXREPEAT
881	except ImportError:
882	self.skipTest('requires _sre.MAXREPEAT constant')
883	string = "x" * 100000
884	self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
885	self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
886	(0, 100000))
887	self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
888	self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
889	self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
890	self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
891
892	def test_backref_group_name_in_exception(self):
893	# Issue 17341: Poor error message when compiling invalid regex
894	with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
895	re.compile('(?P=<foo>)')
896
897	def test_group_name_in_exception(self):
898	# Issue 17341: Poor error message when compiling invalid regex
899	with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
900	re.compile('(?P<?foo>)')
901
902	def test_issue17998(self):
903	for reps in '*', '+', '?', '{1}':
904	for mod in '', '?':
905	pattern = '.' + reps + mod + 'yz'
906	self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
907	['xyz'], msg=pattern)
908	pattern = pattern.encode()
909	self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
910	[b'xyz'], msg=pattern)
911
912
913	def test_bug_2537(self):
914	# issue 2537: empty submatches
915	for outer_op in ('{0,}', '*', '+', '{1,187}'):
916	for inner_op in ('{0,}', '*', '?'):
917	r = re.compile("^((x\|y)%s)%s" % (inner_op, outer_op))
918	m = r.match("xyyzy")
919	self.assertEqual(m.group(0), "xyy")
920	self.assertEqual(m.group(1), "")
921	self.assertEqual(m.group(2), "y")
922
923	def run_re_tests():
924	from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
925	if verbose:
926	print 'Running re_tests test suite'
927	else:
928	# To save time, only run the first and last 10 tests
929	#tests = tests[:10] + tests[-10:]
930	pass
931
932	for t in tests:
933	sys.stdout.flush()
934	pattern = s = outcome = repl = expected = None
935	if len(t) == 5:
936	pattern, s, outcome, repl, expected = t
937	elif len(t) == 3:
938	pattern, s, outcome = t
939	else:
940	raise ValueError, ('Test tuples should have 3 or 5 fields', t)
941
942	try:
943	obj = re.compile(pattern)
944	except re.error:
945	if outcome == SYNTAX_ERROR: pass # Expected a syntax error
946	else:
947	print '=== Syntax error:', t
948	except KeyboardInterrupt: raise KeyboardInterrupt
949	except:
950	print '* Unexpected error *', t
951	if verbose:
952	traceback.print_exc(file=sys.stdout)
953	else:
954	try:
955	result = obj.search(s)
956	except re.error, msg:
957	print '=== Unexpected exception', t, repr(msg)
958	if outcome == SYNTAX_ERROR:
959	# This should have been a syntax error; forget it.
960	pass
961	elif outcome == FAIL:
962	if result is None: pass # No match, as expected
963	else: print '=== Succeeded incorrectly', t
964	elif outcome == SUCCEED:
965	if result is not None:
966	# Matched, as expected, so now we compute the
967	# result string and compare it to our expected result.
968	start, end = result.span(0)
969	vardict={'found': result.group(0),
970	'groups': result.group(),
971	'flags': result.re.flags}
972	for i in range(1, 100):
973	try:
974	gi = result.group(i)
975	# Special hack because else the string concat fails:
976	if gi is None:
977	gi = "None"
978	except IndexError:
979	gi = "Error"
980	vardict['g%d' % i] = gi
981	for i in result.re.groupindex.keys():
982	try:
983	gi = result.group(i)
984	if gi is None:
985	gi = "None"
986	except IndexError:
987	gi = "Error"
988	vardict[i] = gi
989	repl = eval(repl, vardict)
990	if repl != expected:
991	print '=== grouping error', t,
992	print repr(repl) + ' should be ' + repr(expected)
993	else:
994	print '=== Failed incorrectly', t
995
996	# Try the match on a unicode string, and check that it
997	# still succeeds.
998	try:
999	result = obj.search(unicode(s, "latin-1"))
1000	if result is None:
1001	print '=== Fails on unicode match', t
1002	except NameError:
1003	continue # 1.5.2
1004	except TypeError:
1005	continue # unicode test case
1006
1007	# Try the match on a unicode pattern, and check that it
1008	# still succeeds.
1009	obj=re.compile(unicode(pattern, "latin-1"))
1010	result = obj.search(s)
1011	if result is None:
1012	print '=== Fails on unicode pattern match', t
1013
1014	# Try the match with the search area limited to the extent
1015	# of the match and see if it still succeeds. \B will
1016	# break (because it won't match at the end or start of a
1017	# string), so we'll ignore patterns that feature it.
1018
1019	if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1020	and result is not None:
1021	obj = re.compile(pattern)
1022	result = obj.search(s, result.start(0), result.end(0) + 1)
1023	if result is None:
1024	print '=== Failed on range-limited match', t
1025
1026	# Try the match with IGNORECASE enabled, and check that it
1027	# still succeeds.
1028	obj = re.compile(pattern, re.IGNORECASE)
1029	result = obj.search(s)
1030	if result is None:
1031	print '=== Fails on case-insensitive match', t
1032
1033	# Try the match with LOCALE enabled, and check that it
1034	# still succeeds.
1035	obj = re.compile(pattern, re.LOCALE)
1036	result = obj.search(s)
1037	if result is None:
1038	print '=== Fails on locale-sensitive match', t
1039
1040	# Try the match with UNICODE locale enabled, and check
1041	# that it still succeeds.
1042	obj = re.compile(pattern, re.UNICODE)
1043	result = obj.search(s)
1044	if result is None:
1045	print '=== Fails on unicode-sensitive match', t
1046
1047	def test_main():
1048	run_unittest(ReTests)
1049	run_re_tests()
1050
1051	if __name__ == "__main__":
1052	test_main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/test/test_re.py

Download in other formats: