Changeset 391 for python/trunk/Lib/test/test_re.py
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/test/test_re.py
r2 r391 1 import sys 2 sys.path = ['.'] + sys.path 3 4 from test.test_support import verbose, run_unittest 1 from test.test_support import verbose, run_unittest, import_module 2 from test.test_support import precisionbigmemtest, _2G, cpython_only 5 3 import re 6 4 from re import Scanner 7 import sys, os, traceback 5 import sre_constants 6 import sys 7 import string 8 import traceback 8 9 from weakref import proxy 9 10 11 10 12 # Misc tests from Tim Peters' re.doc 11 13 12 14 # WARNING: Don't change details in these tests if you don't know 13 # what you're doing. Some of these tests were careful y modeled to15 # what you're doing. Some of these tests were carefully modeled to 14 16 # cover most of the code. 15 17 … … 176 178 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d') 177 179 180 def test_symbolic_groups(self): 181 re.compile('(?P<a>x)(?P=a)(?(a)y)') 182 re.compile('(?P<a1>x)(?P=a1)(?(a1)y)') 183 self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)') 184 self.assertRaises(re.error, re.compile, '(?Px)') 185 self.assertRaises(re.error, re.compile, '(?P=)') 186 self.assertRaises(re.error, re.compile, '(?P=1)') 187 self.assertRaises(re.error, re.compile, '(?P=a)') 188 self.assertRaises(re.error, re.compile, '(?P=a1)') 189 self.assertRaises(re.error, re.compile, '(?P=a.)') 190 self.assertRaises(re.error, re.compile, '(?P<)') 191 self.assertRaises(re.error, re.compile, '(?P<>)') 192 self.assertRaises(re.error, re.compile, '(?P<1>)') 193 self.assertRaises(re.error, re.compile, '(?P<a.>)') 194 self.assertRaises(re.error, re.compile, '(?())') 195 self.assertRaises(re.error, re.compile, '(?(a))') 196 self.assertRaises(re.error, re.compile, '(?(1a))') 197 self.assertRaises(re.error, re.compile, '(?(a.))') 198 178 199 def test_symbolic_refs(self): 179 200 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx') … … 181 202 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx') 182 203 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx') 204 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx') 183 205 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx') 184 206 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx') … … 375 397 "1aa! a", re.UNICODE).group(0), "1aa! a") 376 398 399 def test_string_boundaries(self): 400 # See http://bugs.python.org/issue10713 401 self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), 402 "abc") 403 # There's a word boundary at the start of a string. 404 self.assertTrue(re.match(r"\b", "abc")) 405 # A non-empty string includes a non-boundary zero-length match. 406 self.assertTrue(re.search(r"\B", "abc")) 407 # There is no non-boundary match at the start of a string. 408 self.assertFalse(re.match(r"\B", "abc")) 409 # However, an empty string contains no word boundaries, and also no 410 # non-boundaries. 411 self.assertEqual(re.search(r"\B", ""), None) 412 # This one is questionable and different from the perlre behaviour, 413 # but describes current behavior. 414 self.assertEqual(re.search(r"\b", ""), None) 415 # A single word-character string has two boundaries, but no 416 # non-boundary gaps. 417 self.assertEqual(len(re.findall(r"\b", "a")), 2) 418 self.assertEqual(len(re.findall(r"\B", "a")), 0) 419 # If there are no words, there are no boundaries 420 self.assertEqual(len(re.findall(r"\b", " ")), 0) 421 self.assertEqual(len(re.findall(r"\b", " ")), 0) 422 # Can match around the whitespace. 423 self.assertEqual(len(re.findall(r"\B", " ")), 2) 424 377 425 def test_bigcharset(self): 378 426 self.assertEqual(re.match(u"([\u2222\u2223])", … … 380 428 self.assertEqual(re.match(u"([\u2222\u2223])", 381 429 u"\u2222", re.UNICODE).group(1), u"\u2222") 430 r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255))) 431 self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01") 432 433 def test_big_codesize(self): 434 # Issue #1160 435 r = re.compile('|'.join(('%d'%x for x in range(10000)))) 436 self.assertIsNotNone(r.match('1000')) 437 self.assertIsNotNone(r.match('9999')) 382 438 383 439 def test_anyall(self): … … 433 489 self.assertEqual(re.search("a\s", "a ").group(0), "a ") 434 490 491 def assertMatch(self, pattern, text, match=None, span=None, 492 matcher=re.match): 493 if match is None and span is None: 494 # the pattern matches the whole text 495 match = text 496 span = (0, len(text)) 497 elif match is None or span is None: 498 raise ValueError('If match is not None, span should be specified ' 499 '(and vice versa).') 500 m = matcher(pattern, text) 501 self.assertTrue(m) 502 self.assertEqual(m.group(), match) 503 self.assertEqual(m.span(), span) 504 435 505 def test_re_escape(self): 436 p="" 437 for i in range(0, 256): 438 p = p + chr(i) 439 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None, 440 True) 441 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1)) 442 443 pat=re.compile(re.escape(p)) 444 self.assertEqual(pat.match(p) is not None, True) 445 self.assertEqual(pat.match(p).span(), (0,256)) 506 alnum_chars = string.ascii_letters + string.digits 507 p = u''.join(unichr(i) for i in range(256)) 508 for c in p: 509 if c in alnum_chars: 510 self.assertEqual(re.escape(c), c) 511 elif c == u'\x00': 512 self.assertEqual(re.escape(c), u'\\000') 513 else: 514 self.assertEqual(re.escape(c), u'\\' + c) 515 self.assertMatch(re.escape(c), c) 516 self.assertMatch(re.escape(p), p) 517 518 def test_re_escape_byte(self): 519 alnum_chars = (string.ascii_letters + string.digits).encode('ascii') 520 p = ''.join(chr(i) for i in range(256)) 521 for b in p: 522 if b in alnum_chars: 523 self.assertEqual(re.escape(b), b) 524 elif b == b'\x00': 525 self.assertEqual(re.escape(b), b'\\000') 526 else: 527 self.assertEqual(re.escape(b), b'\\' + b) 528 self.assertMatch(re.escape(b), b) 529 self.assertMatch(re.escape(p), p) 530 531 def test_re_escape_non_ascii(self): 532 s = u'xxx\u2620\u2620\u2620xxx' 533 s_escaped = re.escape(s) 534 self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx') 535 self.assertMatch(s_escaped, s) 536 self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s, 537 u'x\u2620\u2620\u2620x', (2, 7), re.search) 538 539 def test_re_escape_non_ascii_bytes(self): 540 b = u'y\u2620y\u2620y'.encode('utf-8') 541 b_escaped = re.escape(b) 542 self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y') 543 self.assertMatch(b_escaped, b) 544 res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b) 545 self.assertEqual(len(res), 2) 446 546 447 547 def test_pickling(self): … … 451 551 self.pickle_test(cPickle) 452 552 # old pickles expect the _compile() reconstructor in sre module 453 import warnings 454 with warnings.catch_warnings(): 455 warnings.filterwarnings("ignore", "The sre module is deprecated", 456 DeprecationWarning) 457 from sre import _compile 553 import_module("sre", deprecated=True) 554 from sre import _compile 458 555 459 556 def pickle_test(self, pickle): … … 534 631 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x') 535 632 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x') 633 634 def test_unlimited_zero_width_repeat(self): 635 # Issue #9669 636 self.assertIsNone(re.match(r'(?:a?)*y', 'z')) 637 self.assertIsNone(re.match(r'(?:a?)+y', 'z')) 638 self.assertIsNone(re.match(r'(?:a?){2,}y', 'z')) 639 self.assertIsNone(re.match(r'(?:a?)*?y', 'z')) 640 self.assertIsNone(re.match(r'(?:a?)+?y', 'z')) 641 self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z')) 536 642 537 643 def test_scanner(self): … … 610 716 except NameError: 611 717 return # no problem if we have no unicode 612 self.assert _(re.compile('bug_926075') is not718 self.assertTrue(re.compile('bug_926075') is not 613 719 re.compile(eval("u'bug_926075'"))) 614 720 … … 636 742 self.assertEqual(iter.next().span(), (4, 4)) 637 743 self.assertRaises(StopIteration, iter.next) 744 745 def test_bug_6561(self): 746 # '\d' should match characters in Unicode category 'Nd' 747 # (Number, Decimal Digit), but not those in 'Nl' (Number, 748 # Letter) or 'No' (Number, Other). 749 decimal_digits = [ 750 u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd' 751 u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd' 752 u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd' 753 ] 754 for x in decimal_digits: 755 self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x) 756 757 not_decimal_digits = [ 758 u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl' 759 u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl' 760 u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No' 761 u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No' 762 ] 763 for x in not_decimal_digits: 764 self.assertIsNone(re.match('^\d$', x, re.UNICODE)) 638 765 639 766 def test_empty_array(self): … … 697 824 self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow]) 698 825 826 def test_compile(self): 827 # Test return value when given string and pattern as parameter 828 pattern = re.compile('random pattern') 829 self.assertIsInstance(pattern, re._pattern_type) 830 same_pattern = re.compile(pattern) 831 self.assertIsInstance(same_pattern, re._pattern_type) 832 self.assertIs(same_pattern, pattern) 833 # Test behaviour when not given a string or pattern as parameter 834 self.assertRaises(TypeError, re.compile, 0) 835 836 def test_bug_13899(self): 837 # Issue #13899: re pattern r"[\A]" should work like "A" but matches 838 # nothing. Ditto B and Z. 839 self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'), 840 ['A', 'B', '\b', 'C', 'Z']) 841 842 @precisionbigmemtest(size=_2G, memuse=1) 843 def test_large_search(self, size): 844 # Issue #10182: indices were 32-bit-truncated. 845 s = 'a' * size 846 m = re.search('$', s) 847 self.assertIsNotNone(m) 848 self.assertEqual(m.start(), size) 849 self.assertEqual(m.end(), size) 850 851 # The huge memuse is because of re.sub() using a list and a join() 852 # to create the replacement result. 853 @precisionbigmemtest(size=_2G, memuse=16 + 2) 854 def test_large_subn(self, size): 855 # Issue #10182: indices were 32-bit-truncated. 856 s = 'a' * size 857 r, n = re.subn('', '', s) 858 self.assertEqual(r, s) 859 self.assertEqual(n, size + 1) 860 861 862 def test_repeat_minmax_overflow(self): 863 # Issue #13169 864 string = "x" * 100000 865 self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535)) 866 self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535)) 867 self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535)) 868 self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536)) 869 self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536)) 870 self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536)) 871 # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t. 872 self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128) 873 self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128) 874 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128) 875 self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128)) 876 877 @cpython_only 878 def test_repeat_minmax_overflow_maxrepeat(self): 879 try: 880 from _sre import MAXREPEAT 881 except ImportError: 882 self.skipTest('requires _sre.MAXREPEAT constant') 883 string = "x" * 100000 884 self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string)) 885 self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(), 886 (0, 100000)) 887 self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string)) 888 self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT) 889 self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT) 890 self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT) 891 892 def test_backref_group_name_in_exception(self): 893 # Issue 17341: Poor error message when compiling invalid regex 894 with self.assertRaisesRegexp(sre_constants.error, '<foo>'): 895 re.compile('(?P=<foo>)') 896 897 def test_group_name_in_exception(self): 898 # Issue 17341: Poor error message when compiling invalid regex 899 with self.assertRaisesRegexp(sre_constants.error, '\?foo'): 900 re.compile('(?P<?foo>)') 901 902 def test_issue17998(self): 903 for reps in '*', '+', '?', '{1}': 904 for mod in '', '?': 905 pattern = '.' + reps + mod + 'yz' 906 self.assertEqual(re.compile(pattern, re.S).findall('xyz'), 907 ['xyz'], msg=pattern) 908 pattern = pattern.encode() 909 self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'), 910 [b'xyz'], msg=pattern) 911 912 913 def test_bug_2537(self): 914 # issue 2537: empty submatches 915 for outer_op in ('{0,}', '*', '+', '{1,187}'): 916 for inner_op in ('{0,}', '*', '?'): 917 r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op)) 918 m = r.match("xyyzy") 919 self.assertEqual(m.group(0), "xyy") 920 self.assertEqual(m.group(1), "") 921 self.assertEqual(m.group(2), "y") 922 699 923 def run_re_tests(): 700 from test.re_tests import benchmarks,tests, SUCCEED, FAIL, SYNTAX_ERROR924 from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR 701 925 if verbose: 702 926 print 'Running re_tests test suite'
Note:
See TracChangeset
for help on using the changeset viewer.