Context Navigation

← Previous Change
Next Change →

test_re.py

Timestamp:

Mar 19, 2014, 11:31:01 PM (11 years ago)

Author:

dmik

Message:

python: Merge vendor 2.7.6 to trunk.

Location:

python/trunk

Files:

: 2 edited

. (modified) (1 prop)
Lib/test/test_re.py (modified) (11 diffs)

Legend:

: Unmodified
: Added
: Removed

python/trunk
- Property svn:mergeinfo set to
  /python/vendor/Python-2.7.6 merged eligible
  /python/vendor/current merged eligible

python/trunk/Lib/test/test_re.py

-              r2
+              r391
+import sys
+sys.path = ['.'] + sys.path
+from test.test_support import verbose, run_unittest
+from test.test_support import verbose, run_unittest, import_module
+from test.test_support import precisionbigmemtest, _2G, cpython_only
 import re
 from re import Scanner
+import sys, os, traceback
+import sre_constants
+import sys
+import string
+import traceback
 from weakref import proxy
 # Misc tests from Tim Peters' re.doc
 # WARNING: Don't change details in these tests if you don't know
 # what you're doing. Some of these tests were carefuly modeled to
+# what you're doing. Some of these tests were carefully modeled to
 # cover most of the code.
 …
         self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
+    def test_symbolic_groups(self):
+        re.compile('(?P<a>x)(?P=a)(?(a)y)')
+        re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
+        self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
+        self.assertRaises(re.error, re.compile, '(?Px)')
+        self.assertRaises(re.error, re.compile, '(?P=)')
+        self.assertRaises(re.error, re.compile, '(?P=1)')
+        self.assertRaises(re.error, re.compile, '(?P=a)')
+        self.assertRaises(re.error, re.compile, '(?P=a1)')
+        self.assertRaises(re.error, re.compile, '(?P=a.)')
+        self.assertRaises(re.error, re.compile, '(?P<)')
+        self.assertRaises(re.error, re.compile, '(?P<>)')
+        self.assertRaises(re.error, re.compile, '(?P<1>)')
+        self.assertRaises(re.error, re.compile, '(?P<a.>)')
+        self.assertRaises(re.error, re.compile, '(?())')
+        self.assertRaises(re.error, re.compile, '(?(a))')
+        self.assertRaises(re.error, re.compile, '(?(1a))')
+        self.assertRaises(re.error, re.compile, '(?(a.))')
     def test_symbolic_refs(self):
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
 …
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
+        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
         self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
 …
                                    "1aa! a", re.UNICODE).group(0), "1aa! a")
+    def test_string_boundaries(self):
+        # See http://bugs.python.org/issue10713
+        self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
+                         "abc")
+        # There's a word boundary at the start of a string.
+        self.assertTrue(re.match(r"\b", "abc"))
+        # A non-empty string includes a non-boundary zero-length match.
+        self.assertTrue(re.search(r"\B", "abc"))
+        # There is no non-boundary match at the start of a string.
+        self.assertFalse(re.match(r"\B", "abc"))
+        # However, an empty string contains no word boundaries, and also no
+        # non-boundaries.
+        self.assertEqual(re.search(r"\B", ""), None)
+        # This one is questionable and different from the perlre behaviour,
+        # but describes current behavior.
+        self.assertEqual(re.search(r"\b", ""), None)
+        # A single word-character string has two boundaries, but no
+        # non-boundary gaps.
+        self.assertEqual(len(re.findall(r"\b", "a")), 2)
+        self.assertEqual(len(re.findall(r"\B", "a")), 0)
+        # If there are no words, there are no boundaries
+        self.assertEqual(len(re.findall(r"\b", " ")), 0)
+        self.assertEqual(len(re.findall(r"\b", "   ")), 0)
+        # Can match around the whitespace.
+        self.assertEqual(len(re.findall(r"\B", " ")), 2)
     def test_bigcharset(self):
         self.assertEqual(re.match(u"([\u2222\u2223])",
 …
         self.assertEqual(re.match(u"([\u2222\u2223])",
                                   u"\u2222", re.UNICODE).group(1), u"\u2222")
+        r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
+        self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
+    def test_big_codesize(self):
+        # Issue #1160
+        r = re.compile('|'.join(('%d'%x for x in range(10000))))
+        self.assertIsNotNone(r.match('1000'))
+        self.assertIsNotNone(r.match('9999'))
     def test_anyall(self):
 …
         self.assertEqual(re.search("a\s", "a ").group(0), "a ")
+    def assertMatch(self, pattern, text, match=None, span=None,
+                    matcher=re.match):
+        if match is None and span is None:
+            # the pattern matches the whole text
+            match = text
+            span = (0, len(text))
+        elif match is None or span is None:
+            raise ValueError('If match is not None, span should be specified '
+                             '(and vice versa).')
+        m = matcher(pattern, text)
+        self.assertTrue(m)
+        self.assertEqual(m.group(), match)
+        self.assertEqual(m.span(), span)
     def test_re_escape(self):
+        p=""
+        for i in range(0, 256):
+            p = p + chr(i)
+            self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
+                             True)
+            self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
+        pat=re.compile(re.escape(p))
+        self.assertEqual(pat.match(p) is not None, True)
+        self.assertEqual(pat.match(p).span(), (0,256))
+        alnum_chars = string.ascii_letters + string.digits
+        p = u''.join(unichr(i) for i in range(256))
+        for c in p:
+            if c in alnum_chars:
+                self.assertEqual(re.escape(c), c)
+            elif c == u'\x00':
+                self.assertEqual(re.escape(c), u'\\000')
+            else:
+                self.assertEqual(re.escape(c), u'\\' + c)
+            self.assertMatch(re.escape(c), c)
+        self.assertMatch(re.escape(p), p)
+    def test_re_escape_byte(self):
+        alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
+        p = ''.join(chr(i) for i in range(256))
+        for b in p:
+            if b in alnum_chars:
+                self.assertEqual(re.escape(b), b)
+            elif b == b'\x00':
+                self.assertEqual(re.escape(b), b'\\000')
+            else:
+                self.assertEqual(re.escape(b), b'\\' + b)
+            self.assertMatch(re.escape(b), b)
+        self.assertMatch(re.escape(p), p)
+    def test_re_escape_non_ascii(self):
+        s = u'xxx\u2620\u2620\u2620xxx'
+        s_escaped = re.escape(s)
+        self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
+        self.assertMatch(s_escaped, s)
+        self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
+                         u'x\u2620\u2620\u2620x', (2, 7), re.search)
+    def test_re_escape_non_ascii_bytes(self):
+        b = u'y\u2620y\u2620y'.encode('utf-8')
+        b_escaped = re.escape(b)
+        self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
+        self.assertMatch(b_escaped, b)
+        res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
+        self.assertEqual(len(res), 2)
     def test_pickling(self):
 …
         self.pickle_test(cPickle)
         # old pickles expect the _compile() reconstructor in sre module
+        import warnings
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", "The sre module is deprecated",
+                                    DeprecationWarning)
+            from sre import _compile
+        import_module("sre", deprecated=True)
+        from sre import _compile
     def pickle_test(self, pickle):
 …
         self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
         self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
+    def test_unlimited_zero_width_repeat(self):
+        # Issue #9669
+        self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
+        self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
     def test_scanner(self):
 …
         except NameError:
             return # no problem if we have no unicode
         self.assert_(re.compile('bug_926075') is not
+        self.assertTrue(re.compile('bug_926075') is not
                      re.compile(eval("u'bug_926075'")))
 …
         self.assertEqual(iter.next().span(), (4, 4))
         self.assertRaises(StopIteration, iter.next)
+    def test_bug_6561(self):
+        # '\d' should match characters in Unicode category 'Nd'
+        # (Number, Decimal Digit), but not those in 'Nl' (Number,
+        # Letter) or 'No' (Number, Other).
+        decimal_digits = [
+            u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
+            u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
+            u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+            ]
+        for x in decimal_digits:
+            self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
+        not_decimal_digits = [
+            u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+            u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+            u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
+            u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+            ]
+        for x in not_decimal_digits:
+            self.assertIsNone(re.match('^\d$', x, re.UNICODE))
     def test_empty_array(self):
 …
         self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
+    def test_compile(self):
+        # Test return value when given string and pattern as parameter
+        pattern = re.compile('random pattern')
+        self.assertIsInstance(pattern, re._pattern_type)
+        same_pattern = re.compile(pattern)
+        self.assertIsInstance(same_pattern, re._pattern_type)
+        self.assertIs(same_pattern, pattern)
+        # Test behaviour when not given a string or pattern as parameter
+        self.assertRaises(TypeError, re.compile, 0)
+    def test_bug_13899(self):
+        # Issue #13899: re pattern r"[\A]" should work like "A" but matches
+        # nothing. Ditto B and Z.
+        self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
+                         ['A', 'B', '\b', 'C', 'Z'])
+    @precisionbigmemtest(size=_2G, memuse=1)
+    def test_large_search(self, size):
+        # Issue #10182: indices were 32-bit-truncated.
+        s = 'a' * size
+        m = re.search('$', s)
+        self.assertIsNotNone(m)
+        self.assertEqual(m.start(), size)
+        self.assertEqual(m.end(), size)
+    # The huge memuse is because of re.sub() using a list and a join()
+    # to create the replacement result.
+    @precisionbigmemtest(size=_2G, memuse=16 + 2)
+    def test_large_subn(self, size):
+        # Issue #10182: indices were 32-bit-truncated.
+        s = 'a' * size
+        r, n = re.subn('', '', s)
+        self.assertEqual(r, s)
+        self.assertEqual(n, size + 1)
+    def test_repeat_minmax_overflow(self):
+        # Issue #13169
+        string = "x" * 100000
+        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
+        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
+        self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
+        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
+    @cpython_only
+    def test_repeat_minmax_overflow_maxrepeat(self):
+        try:
+            from _sre import MAXREPEAT
+        except ImportError:
+            self.skipTest('requires _sre.MAXREPEAT constant')
+        string = "x" * 100000
+        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
+        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
+        self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
+        self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
+        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
+    def test_backref_group_name_in_exception(self):
+        # Issue 17341: Poor error message when compiling invalid regex
+        with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
+            re.compile('(?P=<foo>)')
+    def test_group_name_in_exception(self):
+        # Issue 17341: Poor error message when compiling invalid regex
+        with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
+            re.compile('(?P<?foo>)')
+    def test_issue17998(self):
+        for reps in '*', '+', '?', '{1}':
+            for mod in '', '?':
+                pattern = '.' + reps + mod + 'yz'
+                self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
+                                 ['xyz'], msg=pattern)
+                pattern = pattern.encode()
+                self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
+                                 [b'xyz'], msg=pattern)
+    def test_bug_2537(self):
+        # issue 2537: empty submatches
+        for outer_op in ('{0,}', '*', '+', '{1,187}'):
+            for inner_op in ('{0,}', '*', '?'):
+                r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
+                m = r.match("xyyzy")
+                self.assertEqual(m.group(0), "xyy")
+                self.assertEqual(m.group(1), "")
+                self.assertEqual(m.group(2), "y")
 def run_re_tests():
     from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
+    from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
     if verbose:
         print 'Running re_tests test suite'

Note: See TracChangeset for help on using the changeset viewer.

/python/vendor/Python-2.7.6	merged	eligible
/python/vendor/current	merged	eligible

Context Navigation

Changeset 391 for python/trunk/Lib/test/test_re.py

Legend:

python/trunk

python/trunk/Lib/test/test_re.py

Download in other formats: