source: python/trunk/Lib/test/test_re.py

Last change on this file was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 47.0 KB
Line 
1from test.test_support import verbose, run_unittest, import_module
2from test.test_support import precisionbigmemtest, _2G, cpython_only
3import re
4from re import Scanner
5import sre_constants
6import sys
7import string
8import traceback
9from weakref import proxy
10
11
12# Misc tests from Tim Peters' re.doc
13
14# WARNING: Don't change details in these tests if you don't know
15# what you're doing. Some of these tests were carefully modeled to
16# cover most of the code.
17
18import unittest
19
20class ReTests(unittest.TestCase):
21
22 def test_weakref(self):
23 s = 'QabbbcR'
24 x = re.compile('ab+c')
25 y = proxy(x)
26 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
27
28 def test_search_star_plus(self):
29 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
30 self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
31 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
32 self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
33 self.assertEqual(re.search('x', 'aaa'), None)
34 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
35 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
36 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
37 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
38 self.assertEqual(re.match('a+', 'xxx'), None)
39
40 def bump_num(self, matchobj):
41 int_value = int(matchobj.group(0))
42 return str(int_value + 1)
43
44 def test_basic_re_sub(self):
45 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
46 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
47 '9.3 -3 24x100y')
48 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
49 '9.3 -3 23x99y')
50
51 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
52 self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
53
54 s = r"\1\1"
55 self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
56 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
57 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
58
59 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
60 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
61 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
62 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
63
64 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
65 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
66 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
67 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
68 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
69
70 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
71
72 def test_bug_449964(self):
73 # fails for group followed by other escape
74 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
75 'xx\bxx\b')
76
77 def test_bug_449000(self):
78 # Test for sub() on escaped characters
79 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
80 'abc\ndef\n')
81 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
82 'abc\ndef\n')
83 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
84 'abc\ndef\n')
85 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
86 'abc\ndef\n')
87
88 def test_bug_1140(self):
89 # re.sub(x, y, u'') should return u'', not '', and
90 # re.sub(x, y, '') should return '', not u''.
91 # Also:
92 # re.sub(x, y, unicode(x)) should return unicode(y), and
93 # re.sub(x, y, str(x)) should return
94 # str(y) if isinstance(y, str) else unicode(y).
95 for x in 'x', u'x':
96 for y in 'y', u'y':
97 z = re.sub(x, y, u'')
98 self.assertEqual(z, u'')
99 self.assertEqual(type(z), unicode)
100 #
101 z = re.sub(x, y, '')
102 self.assertEqual(z, '')
103 self.assertEqual(type(z), str)
104 #
105 z = re.sub(x, y, unicode(x))
106 self.assertEqual(z, y)
107 self.assertEqual(type(z), unicode)
108 #
109 z = re.sub(x, y, str(x))
110 self.assertEqual(z, y)
111 self.assertEqual(type(z), type(y))
112
113 def test_bug_1661(self):
114 # Verify that flags do not get silently ignored with compiled patterns
115 pattern = re.compile('.')
116 self.assertRaises(ValueError, re.match, pattern, 'A', re.I)
117 self.assertRaises(ValueError, re.search, pattern, 'A', re.I)
118 self.assertRaises(ValueError, re.findall, pattern, 'A', re.I)
119 self.assertRaises(ValueError, re.compile, pattern, re.I)
120
121 def test_bug_3629(self):
122 # A regex that triggered a bug in the sre-code validator
123 re.compile("(?P<quote>)(?(quote))")
124
125 def test_sub_template_numeric_escape(self):
126 # bug 776311 and friends
127 self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
128 self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
129 self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
130 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
131 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
132 self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
133 self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
134
135 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
136 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
137
138 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
139 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
140 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
141 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
142 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
143
144 self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
145 self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
146
147 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
148 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
149 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
150 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
151 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
152 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
153 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
154 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
155 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
156 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
157 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
158 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
159
160 # in python2.3 (etc), these loop endlessly in sre_parser.py
161 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
162 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
163 'xz8')
164 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
165 'xza')
166
167 def test_qualified_re_sub(self):
168 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
169 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
170
171 def test_bug_114660(self):
172 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
173 'hello there')
174
175 def test_bug_462270(self):
176 # Test for empty sub() behaviour, see SF bug #462270
177 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
178 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
179
180 def test_symbolic_groups(self):
181 re.compile('(?P<a>x)(?P=a)(?(a)y)')
182 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
183 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
184 self.assertRaises(re.error, re.compile, '(?Px)')
185 self.assertRaises(re.error, re.compile, '(?P=)')
186 self.assertRaises(re.error, re.compile, '(?P=1)')
187 self.assertRaises(re.error, re.compile, '(?P=a)')
188 self.assertRaises(re.error, re.compile, '(?P=a1)')
189 self.assertRaises(re.error, re.compile, '(?P=a.)')
190 self.assertRaises(re.error, re.compile, '(?P<)')
191 self.assertRaises(re.error, re.compile, '(?P<>)')
192 self.assertRaises(re.error, re.compile, '(?P<1>)')
193 self.assertRaises(re.error, re.compile, '(?P<a.>)')
194 self.assertRaises(re.error, re.compile, '(?())')
195 self.assertRaises(re.error, re.compile, '(?(a))')
196 self.assertRaises(re.error, re.compile, '(?(1a))')
197 self.assertRaises(re.error, re.compile, '(?(a.))')
198
199 def test_symbolic_refs(self):
200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
201 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
206 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
207 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
208 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
209 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
210
211 def test_re_subn(self):
212 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
213 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
214 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
215 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
216 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
217
218 def test_re_split(self):
219 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
220 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
221 self.assertEqual(re.split("(:*)", ":a:b::c"),
222 ['', ':', 'a', ':', 'b', '::', 'c'])
223 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
224 self.assertEqual(re.split("(:)*", ":a:b::c"),
225 ['', ':', 'a', ':', 'b', ':', 'c'])
226 self.assertEqual(re.split("([b:]+)", ":a:b::c"),
227 ['', ':', 'a', ':b::', 'c'])
228 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
229 ['', None, ':', 'a', None, ':', '', 'b', None, '',
230 None, '::', 'c'])
231 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
232 ['', 'a', '', '', 'c'])
233
234 def test_qualified_re_split(self):
235 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
236 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
237 self.assertEqual(re.split("(:)", ":a:b::c", 2),
238 ['', ':', 'a', ':', 'b::c'])
239 self.assertEqual(re.split("(:*)", ":a:b::c", 2),
240 ['', ':', 'a', ':', 'b::c'])
241
242 def test_re_findall(self):
243 self.assertEqual(re.findall(":+", "abc"), [])
244 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
245 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
246 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
247 (":", ":"),
248 (":", "::")])
249
250 def test_bug_117612(self):
251 self.assertEqual(re.findall(r"(a|(b))", "aba"),
252 [("a", ""),("b", "b"),("a", "")])
253
254 def test_re_match(self):
255 self.assertEqual(re.match('a', 'a').groups(), ())
256 self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
257 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
258 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
259 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
260
261 pat = re.compile('((a)|(b))(c)?')
262 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
263 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
264 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
265 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
266 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
267
268 # A single group
269 m = re.match('(a)', 'a')
270 self.assertEqual(m.group(0), 'a')
271 self.assertEqual(m.group(0), 'a')
272 self.assertEqual(m.group(1), 'a')
273 self.assertEqual(m.group(1, 1), ('a', 'a'))
274
275 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
276 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
277 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
278 (None, 'b', None))
279 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
280
281 def test_re_groupref_exists(self):
282 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
283 ('(', 'a'))
284 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
285 (None, 'a'))
286 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
287 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
288 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
289 ('a', 'b'))
290 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
291 (None, 'd'))
292 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
293 (None, 'd'))
294 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
295 ('a', ''))
296
297 # Tests for bug #1177831: exercise groups other than the first group
298 p = re.compile('(?P<g1>a)(?P<g2>b)?((?(g2)c|d))')
299 self.assertEqual(p.match('abc').groups(),
300 ('a', 'b', 'c'))
301 self.assertEqual(p.match('ad').groups(),
302 ('a', None, 'd'))
303 self.assertEqual(p.match('abd'), None)
304 self.assertEqual(p.match('ac'), None)
305
306
307 def test_re_groupref(self):
308 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
309 ('|', 'a'))
310 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
311 (None, 'a'))
312 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
313 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
314 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
315 ('a', 'a'))
316 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
317 (None, None))
318
319 def test_groupdict(self):
320 self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
321 'first second').groupdict(),
322 {'first':'first', 'second':'second'})
323
324 def test_expand(self):
325 self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
326 "first second")
327 .expand(r"\2 \1 \g<second> \g<first>"),
328 "second first second first")
329
330 def test_repeat_minmax(self):
331 self.assertEqual(re.match("^(\w){1}$", "abc"), None)
332 self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
333 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
334 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
335
336 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
337 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
338 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
339 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
340 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
341 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
342 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
343 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
344
345 self.assertEqual(re.match("^x{1}$", "xxx"), None)
346 self.assertEqual(re.match("^x{1}?$", "xxx"), None)
347 self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
348 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
349
350 self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
351 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
352 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
353 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
354 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
355 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
356 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
357 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
358
359 self.assertEqual(re.match("^x{}$", "xxx"), None)
360 self.assertNotEqual(re.match("^x{}$", "x{}"), None)
361
362 def test_getattr(self):
363 self.assertEqual(re.match("(a)", "a").pos, 0)
364 self.assertEqual(re.match("(a)", "a").endpos, 1)
365 self.assertEqual(re.match("(a)", "a").string, "a")
366 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
367 self.assertNotEqual(re.match("(a)", "a").re, None)
368
369 def test_special_escapes(self):
370 self.assertEqual(re.search(r"\b(b.)\b",
371 "abcd abc bcd bx").group(1), "bx")
372 self.assertEqual(re.search(r"\B(b.)\B",
373 "abc bcd bc abxd").group(1), "bx")
374 self.assertEqual(re.search(r"\b(b.)\b",
375 "abcd abc bcd bx", re.LOCALE).group(1), "bx")
376 self.assertEqual(re.search(r"\B(b.)\B",
377 "abc bcd bc abxd", re.LOCALE).group(1), "bx")
378 self.assertEqual(re.search(r"\b(b.)\b",
379 "abcd abc bcd bx", re.UNICODE).group(1), "bx")
380 self.assertEqual(re.search(r"\B(b.)\B",
381 "abc bcd bc abxd", re.UNICODE).group(1), "bx")
382 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
383 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
384 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
385 self.assertEqual(re.search(r"\b(b.)\b",
386 u"abcd abc bcd bx").group(1), "bx")
387 self.assertEqual(re.search(r"\B(b.)\B",
388 u"abc bcd bc abxd").group(1), "bx")
389 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
390 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
391 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
392 self.assertEqual(re.search(r"\d\D\w\W\s\S",
393 "1aa! a").group(0), "1aa! a")
394 self.assertEqual(re.search(r"\d\D\w\W\s\S",
395 "1aa! a", re.LOCALE).group(0), "1aa! a")
396 self.assertEqual(re.search(r"\d\D\w\W\s\S",
397 "1aa! a", re.UNICODE).group(0), "1aa! a")
398
399 def test_string_boundaries(self):
400 # See http://bugs.python.org/issue10713
401 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
402 "abc")
403 # There's a word boundary at the start of a string.
404 self.assertTrue(re.match(r"\b", "abc"))
405 # A non-empty string includes a non-boundary zero-length match.
406 self.assertTrue(re.search(r"\B", "abc"))
407 # There is no non-boundary match at the start of a string.
408 self.assertFalse(re.match(r"\B", "abc"))
409 # However, an empty string contains no word boundaries, and also no
410 # non-boundaries.
411 self.assertEqual(re.search(r"\B", ""), None)
412 # This one is questionable and different from the perlre behaviour,
413 # but describes current behavior.
414 self.assertEqual(re.search(r"\b", ""), None)
415 # A single word-character string has two boundaries, but no
416 # non-boundary gaps.
417 self.assertEqual(len(re.findall(r"\b", "a")), 2)
418 self.assertEqual(len(re.findall(r"\B", "a")), 0)
419 # If there are no words, there are no boundaries
420 self.assertEqual(len(re.findall(r"\b", " ")), 0)
421 self.assertEqual(len(re.findall(r"\b", " ")), 0)
422 # Can match around the whitespace.
423 self.assertEqual(len(re.findall(r"\B", " ")), 2)
424
425 def test_bigcharset(self):
426 self.assertEqual(re.match(u"([\u2222\u2223])",
427 u"\u2222").group(1), u"\u2222")
428 self.assertEqual(re.match(u"([\u2222\u2223])",
429 u"\u2222", re.UNICODE).group(1), u"\u2222")
430 r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
431 self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
432
433 def test_big_codesize(self):
434 # Issue #1160
435 r = re.compile('|'.join(('%d'%x for x in range(10000))))
436 self.assertIsNotNone(r.match('1000'))
437 self.assertIsNotNone(r.match('9999'))
438
439 def test_anyall(self):
440 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
441 "a\nb")
442 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
443 "a\n\nb")
444
445 def test_non_consuming(self):
446 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
447 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
448 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
449 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
450 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
451 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
452 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
453
454 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
455 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
456 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
457 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
458
459 def test_ignore_case(self):
460 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
461 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
462 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
463 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
464 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
465 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
466 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
467 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
468 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
469 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
470
471 def test_category(self):
472 self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
473
474 def test_getlower(self):
475 import _sre
476 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
477 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
478 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
479
480 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
481 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
482
483 def test_not_literal(self):
484 self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
485 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
486
487 def test_search_coverage(self):
488 self.assertEqual(re.search("\s(b)", " b").group(1), "b")
489 self.assertEqual(re.search("a\s", "a ").group(0), "a ")
490
491 def assertMatch(self, pattern, text, match=None, span=None,
492 matcher=re.match):
493 if match is None and span is None:
494 # the pattern matches the whole text
495 match = text
496 span = (0, len(text))
497 elif match is None or span is None:
498 raise ValueError('If match is not None, span should be specified '
499 '(and vice versa).')
500 m = matcher(pattern, text)
501 self.assertTrue(m)
502 self.assertEqual(m.group(), match)
503 self.assertEqual(m.span(), span)
504
505 def test_re_escape(self):
506 alnum_chars = string.ascii_letters + string.digits
507 p = u''.join(unichr(i) for i in range(256))
508 for c in p:
509 if c in alnum_chars:
510 self.assertEqual(re.escape(c), c)
511 elif c == u'\x00':
512 self.assertEqual(re.escape(c), u'\\000')
513 else:
514 self.assertEqual(re.escape(c), u'\\' + c)
515 self.assertMatch(re.escape(c), c)
516 self.assertMatch(re.escape(p), p)
517
518 def test_re_escape_byte(self):
519 alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
520 p = ''.join(chr(i) for i in range(256))
521 for b in p:
522 if b in alnum_chars:
523 self.assertEqual(re.escape(b), b)
524 elif b == b'\x00':
525 self.assertEqual(re.escape(b), b'\\000')
526 else:
527 self.assertEqual(re.escape(b), b'\\' + b)
528 self.assertMatch(re.escape(b), b)
529 self.assertMatch(re.escape(p), p)
530
531 def test_re_escape_non_ascii(self):
532 s = u'xxx\u2620\u2620\u2620xxx'
533 s_escaped = re.escape(s)
534 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
535 self.assertMatch(s_escaped, s)
536 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
537 u'x\u2620\u2620\u2620x', (2, 7), re.search)
538
539 def test_re_escape_non_ascii_bytes(self):
540 b = u'y\u2620y\u2620y'.encode('utf-8')
541 b_escaped = re.escape(b)
542 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
543 self.assertMatch(b_escaped, b)
544 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
545 self.assertEqual(len(res), 2)
546
547 def test_pickling(self):
548 import pickle
549 self.pickle_test(pickle)
550 import cPickle
551 self.pickle_test(cPickle)
552 # old pickles expect the _compile() reconstructor in sre module
553 import_module("sre", deprecated=True)
554 from sre import _compile
555
556 def pickle_test(self, pickle):
557 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
558 s = pickle.dumps(oldpat)
559 newpat = pickle.loads(s)
560 self.assertEqual(oldpat, newpat)
561
562 def test_constants(self):
563 self.assertEqual(re.I, re.IGNORECASE)
564 self.assertEqual(re.L, re.LOCALE)
565 self.assertEqual(re.M, re.MULTILINE)
566 self.assertEqual(re.S, re.DOTALL)
567 self.assertEqual(re.X, re.VERBOSE)
568
569 def test_flags(self):
570 for flag in [re.I, re.M, re.X, re.S, re.L]:
571 self.assertNotEqual(re.compile('^pattern$', flag), None)
572
573 def test_sre_character_literals(self):
574 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
575 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
576 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
577 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
578 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
579 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
580 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
581 self.assertRaises(re.error, re.match, "\911", "")
582
583 def test_sre_character_class_literals(self):
584 for i in [0, 8, 16, 32, 64, 127, 128, 255]:
585 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
586 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
587 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
588 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
589 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
590 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
591 self.assertRaises(re.error, re.match, "[\911]", "")
592
593 def test_bug_113254(self):
594 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
595 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
596 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
597
598 def test_bug_527371(self):
599 # bug described in patches 527371/672491
600 self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
601 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
602 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
603 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
604 self.assertEqual(re.match("((a))", "a").lastindex, 1)
605
606 def test_bug_545855(self):
607 # bug 545855 -- This pattern failed to cause a compile error as it
608 # should, instead provoking a TypeError.
609 self.assertRaises(re.error, re.compile, 'foo[a-')
610
611 def test_bug_418626(self):
612 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
613 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
614 # pattern '*?' on a long string.
615 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
616 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
617 20003)
618 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
619 # non-simple '*?' still used to hit the recursion limit, before the
620 # non-recursive scheme was implemented.
621 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
622
623 def test_bug_612074(self):
624 pat=u"["+re.escape(u"\u2039")+u"]"
625 self.assertEqual(re.compile(pat) and 1, 1)
626
627 def test_stack_overflow(self):
628 # nasty cases that used to overflow the straightforward recursive
629 # implementation of repeated groups.
630 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
631 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
632 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
633
634 def test_unlimited_zero_width_repeat(self):
635 # Issue #9669
636 self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
637 self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
638 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
639 self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
640 self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
641 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
642
643 def test_scanner(self):
644 def s_ident(scanner, token): return token
645 def s_operator(scanner, token): return "op%s" % token
646 def s_float(scanner, token): return float(token)
647 def s_int(scanner, token): return int(token)
648
649 scanner = Scanner([
650 (r"[a-zA-Z_]\w*", s_ident),
651 (r"\d+\.\d*", s_float),
652 (r"\d+", s_int),
653 (r"=|\+|-|\*|/", s_operator),
654 (r"\s+", None),
655 ])
656
657 self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
658
659 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
660 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
661 'op+', 'bar'], ''))
662
663 def test_bug_448951(self):
664 # bug 448951 (similar to 429357, but with single char match)
665 # (Also test greedy matches.)
666 for op in '','?','*':
667 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
668 (None, None))
669 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
670 ('a:', 'a'))
671
672 def test_bug_725106(self):
673 # capturing groups in alternatives in repeats
674 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
675 ('b', 'a'))
676 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
677 ('c', 'b'))
678 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
679 ('b', None))
680 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
681 ('b', None))
682 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
683 ('b', 'a'))
684 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
685 ('c', 'b'))
686 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
687 ('b', None))
688 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
689 ('b', None))
690
691 def test_bug_725149(self):
692 # mark_stack_base restoring before restoring marks
693 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
694 ('a', None))
695 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
696 ('a', None, None))
697
698 def test_bug_764548(self):
699 # bug 764548, re.compile() barfs on str/unicode subclasses
700 try:
701 unicode
702 except NameError:
703 return # no problem if we have no unicode
704 class my_unicode(unicode): pass
705 pat = re.compile(my_unicode("abc"))
706 self.assertEqual(pat.match("xyz"), None)
707
708 def test_finditer(self):
709 iter = re.finditer(r":+", "a:b::c:::d")
710 self.assertEqual([item.group(0) for item in iter],
711 [":", "::", ":::"])
712
713 def test_bug_926075(self):
714 try:
715 unicode
716 except NameError:
717 return # no problem if we have no unicode
718 self.assertTrue(re.compile('bug_926075') is not
719 re.compile(eval("u'bug_926075'")))
720
721 def test_bug_931848(self):
722 try:
723 unicode
724 except NameError:
725 pass
726 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
727 self.assertEqual(re.compile(pattern).split("a.b.c"),
728 ['a','b','c'])
729
730 def test_bug_581080(self):
731 iter = re.finditer(r"\s", "a b")
732 self.assertEqual(iter.next().span(), (1,2))
733 self.assertRaises(StopIteration, iter.next)
734
735 scanner = re.compile(r"\s").scanner("a b")
736 self.assertEqual(scanner.search().span(), (1, 2))
737 self.assertEqual(scanner.search(), None)
738
739 def test_bug_817234(self):
740 iter = re.finditer(r".*", "asdf")
741 self.assertEqual(iter.next().span(), (0, 4))
742 self.assertEqual(iter.next().span(), (4, 4))
743 self.assertRaises(StopIteration, iter.next)
744
745 def test_bug_6561(self):
746 # '\d' should match characters in Unicode category 'Nd'
747 # (Number, Decimal Digit), but not those in 'Nl' (Number,
748 # Letter) or 'No' (Number, Other).
749 decimal_digits = [
750 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
751 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
752 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
753 ]
754 for x in decimal_digits:
755 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
756
757 not_decimal_digits = [
758 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
759 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
760 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
761 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
762 ]
763 for x in not_decimal_digits:
764 self.assertIsNone(re.match('^\d$', x, re.UNICODE))
765
766 def test_empty_array(self):
767 # SF buf 1647541
768 import array
769 for typecode in 'cbBuhHiIlLfd':
770 a = array.array(typecode)
771 self.assertEqual(re.compile("bla").match(a), None)
772 self.assertEqual(re.compile("").match(a).groups(), ())
773
774 def test_inline_flags(self):
775 # Bug #1700
776 upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
777 lower_char = unichr(0x1ea1) # Latin Small Letter A with Dot Bellow
778
779 p = re.compile(upper_char, re.I | re.U)
780 q = p.match(lower_char)
781 self.assertNotEqual(q, None)
782
783 p = re.compile(lower_char, re.I | re.U)
784 q = p.match(upper_char)
785 self.assertNotEqual(q, None)
786
787 p = re.compile('(?i)' + upper_char, re.U)
788 q = p.match(lower_char)
789 self.assertNotEqual(q, None)
790
791 p = re.compile('(?i)' + lower_char, re.U)
792 q = p.match(upper_char)
793 self.assertNotEqual(q, None)
794
795 p = re.compile('(?iu)' + upper_char)
796 q = p.match(lower_char)
797 self.assertNotEqual(q, None)
798
799 p = re.compile('(?iu)' + lower_char)
800 q = p.match(upper_char)
801 self.assertNotEqual(q, None)
802
803 def test_dollar_matches_twice(self):
804 "$ matches the end of string, and just before the terminating \n"
805 pattern = re.compile('$')
806 self.assertEqual(pattern.sub('#', 'a\nb\n'), 'a\nb#\n#')
807 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a\nb\nc#')
808 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
809
810 pattern = re.compile('$', re.MULTILINE)
811 self.assertEqual(pattern.sub('#', 'a\nb\n' ), 'a#\nb#\n#' )
812 self.assertEqual(pattern.sub('#', 'a\nb\nc'), 'a#\nb#\nc#')
813 self.assertEqual(pattern.sub('#', '\n'), '#\n#')
814
815 def test_dealloc(self):
816 # issue 3299: check for segfault in debug build
817 import _sre
818 # the overflow limit is different on wide and narrow builds and it
819 # depends on the definition of SRE_CODE (see sre.h).
820 # 2**128 should be big enough to overflow on both. For smaller values
821 # a RuntimeError is raised instead of OverflowError.
822 long_overflow = 2**128
823 self.assertRaises(TypeError, re.finditer, "a", {})
824 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
825
826 def test_compile(self):
827 # Test return value when given string and pattern as parameter
828 pattern = re.compile('random pattern')
829 self.assertIsInstance(pattern, re._pattern_type)
830 same_pattern = re.compile(pattern)
831 self.assertIsInstance(same_pattern, re._pattern_type)
832 self.assertIs(same_pattern, pattern)
833 # Test behaviour when not given a string or pattern as parameter
834 self.assertRaises(TypeError, re.compile, 0)
835
836 def test_bug_13899(self):
837 # Issue #13899: re pattern r"[\A]" should work like "A" but matches
838 # nothing. Ditto B and Z.
839 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
840 ['A', 'B', '\b', 'C', 'Z'])
841
842 @precisionbigmemtest(size=_2G, memuse=1)
843 def test_large_search(self, size):
844 # Issue #10182: indices were 32-bit-truncated.
845 s = 'a' * size
846 m = re.search('$', s)
847 self.assertIsNotNone(m)
848 self.assertEqual(m.start(), size)
849 self.assertEqual(m.end(), size)
850
851 # The huge memuse is because of re.sub() using a list and a join()
852 # to create the replacement result.
853 @precisionbigmemtest(size=_2G, memuse=16 + 2)
854 def test_large_subn(self, size):
855 # Issue #10182: indices were 32-bit-truncated.
856 s = 'a' * size
857 r, n = re.subn('', '', s)
858 self.assertEqual(r, s)
859 self.assertEqual(n, size + 1)
860
861
862 def test_repeat_minmax_overflow(self):
863 # Issue #13169
864 string = "x" * 100000
865 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
866 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
867 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
868 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
869 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
870 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
871 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
872 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
873 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
874 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
875 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
876
877 @cpython_only
878 def test_repeat_minmax_overflow_maxrepeat(self):
879 try:
880 from _sre import MAXREPEAT
881 except ImportError:
882 self.skipTest('requires _sre.MAXREPEAT constant')
883 string = "x" * 100000
884 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
885 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
886 (0, 100000))
887 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
888 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
889 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
890 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
891
892 def test_backref_group_name_in_exception(self):
893 # Issue 17341: Poor error message when compiling invalid regex
894 with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
895 re.compile('(?P=<foo>)')
896
897 def test_group_name_in_exception(self):
898 # Issue 17341: Poor error message when compiling invalid regex
899 with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
900 re.compile('(?P<?foo>)')
901
902 def test_issue17998(self):
903 for reps in '*', '+', '?', '{1}':
904 for mod in '', '?':
905 pattern = '.' + reps + mod + 'yz'
906 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
907 ['xyz'], msg=pattern)
908 pattern = pattern.encode()
909 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
910 [b'xyz'], msg=pattern)
911
912
913 def test_bug_2537(self):
914 # issue 2537: empty submatches
915 for outer_op in ('{0,}', '*', '+', '{1,187}'):
916 for inner_op in ('{0,}', '*', '?'):
917 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
918 m = r.match("xyyzy")
919 self.assertEqual(m.group(0), "xyy")
920 self.assertEqual(m.group(1), "")
921 self.assertEqual(m.group(2), "y")
922
923def run_re_tests():
924 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
925 if verbose:
926 print 'Running re_tests test suite'
927 else:
928 # To save time, only run the first and last 10 tests
929 #tests = tests[:10] + tests[-10:]
930 pass
931
932 for t in tests:
933 sys.stdout.flush()
934 pattern = s = outcome = repl = expected = None
935 if len(t) == 5:
936 pattern, s, outcome, repl, expected = t
937 elif len(t) == 3:
938 pattern, s, outcome = t
939 else:
940 raise ValueError, ('Test tuples should have 3 or 5 fields', t)
941
942 try:
943 obj = re.compile(pattern)
944 except re.error:
945 if outcome == SYNTAX_ERROR: pass # Expected a syntax error
946 else:
947 print '=== Syntax error:', t
948 except KeyboardInterrupt: raise KeyboardInterrupt
949 except:
950 print '*** Unexpected error ***', t
951 if verbose:
952 traceback.print_exc(file=sys.stdout)
953 else:
954 try:
955 result = obj.search(s)
956 except re.error, msg:
957 print '=== Unexpected exception', t, repr(msg)
958 if outcome == SYNTAX_ERROR:
959 # This should have been a syntax error; forget it.
960 pass
961 elif outcome == FAIL:
962 if result is None: pass # No match, as expected
963 else: print '=== Succeeded incorrectly', t
964 elif outcome == SUCCEED:
965 if result is not None:
966 # Matched, as expected, so now we compute the
967 # result string and compare it to our expected result.
968 start, end = result.span(0)
969 vardict={'found': result.group(0),
970 'groups': result.group(),
971 'flags': result.re.flags}
972 for i in range(1, 100):
973 try:
974 gi = result.group(i)
975 # Special hack because else the string concat fails:
976 if gi is None:
977 gi = "None"
978 except IndexError:
979 gi = "Error"
980 vardict['g%d' % i] = gi
981 for i in result.re.groupindex.keys():
982 try:
983 gi = result.group(i)
984 if gi is None:
985 gi = "None"
986 except IndexError:
987 gi = "Error"
988 vardict[i] = gi
989 repl = eval(repl, vardict)
990 if repl != expected:
991 print '=== grouping error', t,
992 print repr(repl) + ' should be ' + repr(expected)
993 else:
994 print '=== Failed incorrectly', t
995
996 # Try the match on a unicode string, and check that it
997 # still succeeds.
998 try:
999 result = obj.search(unicode(s, "latin-1"))
1000 if result is None:
1001 print '=== Fails on unicode match', t
1002 except NameError:
1003 continue # 1.5.2
1004 except TypeError:
1005 continue # unicode test case
1006
1007 # Try the match on a unicode pattern, and check that it
1008 # still succeeds.
1009 obj=re.compile(unicode(pattern, "latin-1"))
1010 result = obj.search(s)
1011 if result is None:
1012 print '=== Fails on unicode pattern match', t
1013
1014 # Try the match with the search area limited to the extent
1015 # of the match and see if it still succeeds. \B will
1016 # break (because it won't match at the end or start of a
1017 # string), so we'll ignore patterns that feature it.
1018
1019 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
1020 and result is not None:
1021 obj = re.compile(pattern)
1022 result = obj.search(s, result.start(0), result.end(0) + 1)
1023 if result is None:
1024 print '=== Failed on range-limited match', t
1025
1026 # Try the match with IGNORECASE enabled, and check that it
1027 # still succeeds.
1028 obj = re.compile(pattern, re.IGNORECASE)
1029 result = obj.search(s)
1030 if result is None:
1031 print '=== Fails on case-insensitive match', t
1032
1033 # Try the match with LOCALE enabled, and check that it
1034 # still succeeds.
1035 obj = re.compile(pattern, re.LOCALE)
1036 result = obj.search(s)
1037 if result is None:
1038 print '=== Fails on locale-sensitive match', t
1039
1040 # Try the match with UNICODE locale enabled, and check
1041 # that it still succeeds.
1042 obj = re.compile(pattern, re.UNICODE)
1043 result = obj.search(s)
1044 if result is None:
1045 print '=== Fails on unicode-sensitive match', t
1046
1047def test_main():
1048 run_unittest(ReTests)
1049 run_re_tests()
1050
1051if __name__ == "__main__":
1052 test_main()
Note: See TracBrowser for help on using the repository browser.