Ignore:
Timestamp:
Mar 19, 2014, 11:31:01 PM (11 years ago)
Author:
dmik
Message:

python: Merge vendor 2.7.6 to trunk.

Location:
python/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • python/trunk

  • python/trunk/Lib/test/test_re.py

    r2 r391  
    1 import sys
    2 sys.path = ['.'] + sys.path
    3 
    4 from test.test_support import verbose, run_unittest
     1from test.test_support import verbose, run_unittest, import_module
     2from test.test_support import precisionbigmemtest, _2G, cpython_only
    53import re
    64from re import Scanner
    7 import sys, os, traceback
     5import sre_constants
     6import sys
     7import string
     8import traceback
    89from weakref import proxy
    910
     11
    1012# Misc tests from Tim Peters' re.doc
    1113
    1214# WARNING: Don't change details in these tests if you don't know
    13 # what you're doing. Some of these tests were carefuly modeled to
     15# what you're doing. Some of these tests were carefully modeled to
    1416# cover most of the code.
    1517
     
    176178        self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
    177179
     180    def test_symbolic_groups(self):
     181        re.compile('(?P<a>x)(?P=a)(?(a)y)')
     182        re.compile('(?P<a1>x)(?P=a1)(?(a1)y)')
     183        self.assertRaises(re.error, re.compile, '(?P<a>)(?P<a>)')
     184        self.assertRaises(re.error, re.compile, '(?Px)')
     185        self.assertRaises(re.error, re.compile, '(?P=)')
     186        self.assertRaises(re.error, re.compile, '(?P=1)')
     187        self.assertRaises(re.error, re.compile, '(?P=a)')
     188        self.assertRaises(re.error, re.compile, '(?P=a1)')
     189        self.assertRaises(re.error, re.compile, '(?P=a.)')
     190        self.assertRaises(re.error, re.compile, '(?P<)')
     191        self.assertRaises(re.error, re.compile, '(?P<>)')
     192        self.assertRaises(re.error, re.compile, '(?P<1>)')
     193        self.assertRaises(re.error, re.compile, '(?P<a.>)')
     194        self.assertRaises(re.error, re.compile, '(?())')
     195        self.assertRaises(re.error, re.compile, '(?(a))')
     196        self.assertRaises(re.error, re.compile, '(?(1a))')
     197        self.assertRaises(re.error, re.compile, '(?(a.))')
     198
    178199    def test_symbolic_refs(self):
    179200        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
     
    181202        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
    182203        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
     204        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<>', 'xx')
    183205        self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
    184206        self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
     
    375397                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
    376398
     399    def test_string_boundaries(self):
     400        # See http://bugs.python.org/issue10713
     401        self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
     402                         "abc")
     403        # There's a word boundary at the start of a string.
     404        self.assertTrue(re.match(r"\b", "abc"))
     405        # A non-empty string includes a non-boundary zero-length match.
     406        self.assertTrue(re.search(r"\B", "abc"))
     407        # There is no non-boundary match at the start of a string.
     408        self.assertFalse(re.match(r"\B", "abc"))
     409        # However, an empty string contains no word boundaries, and also no
     410        # non-boundaries.
     411        self.assertEqual(re.search(r"\B", ""), None)
     412        # This one is questionable and different from the perlre behaviour,
     413        # but describes current behavior.
     414        self.assertEqual(re.search(r"\b", ""), None)
     415        # A single word-character string has two boundaries, but no
     416        # non-boundary gaps.
     417        self.assertEqual(len(re.findall(r"\b", "a")), 2)
     418        self.assertEqual(len(re.findall(r"\B", "a")), 0)
     419        # If there are no words, there are no boundaries
     420        self.assertEqual(len(re.findall(r"\b", " ")), 0)
     421        self.assertEqual(len(re.findall(r"\b", "   ")), 0)
     422        # Can match around the whitespace.
     423        self.assertEqual(len(re.findall(r"\B", " ")), 2)
     424
    377425    def test_bigcharset(self):
    378426        self.assertEqual(re.match(u"([\u2222\u2223])",
     
    380428        self.assertEqual(re.match(u"([\u2222\u2223])",
    381429                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
     430        r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
     431        self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
     432
     433    def test_big_codesize(self):
     434        # Issue #1160
     435        r = re.compile('|'.join(('%d'%x for x in range(10000))))
     436        self.assertIsNotNone(r.match('1000'))
     437        self.assertIsNotNone(r.match('9999'))
    382438
    383439    def test_anyall(self):
     
    433489        self.assertEqual(re.search("a\s", "a ").group(0), "a ")
    434490
     491    def assertMatch(self, pattern, text, match=None, span=None,
     492                    matcher=re.match):
     493        if match is None and span is None:
     494            # the pattern matches the whole text
     495            match = text
     496            span = (0, len(text))
     497        elif match is None or span is None:
     498            raise ValueError('If match is not None, span should be specified '
     499                             '(and vice versa).')
     500        m = matcher(pattern, text)
     501        self.assertTrue(m)
     502        self.assertEqual(m.group(), match)
     503        self.assertEqual(m.span(), span)
     504
    435505    def test_re_escape(self):
    436         p=""
    437         for i in range(0, 256):
    438             p = p + chr(i)
    439             self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
    440                              True)
    441             self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
    442 
    443         pat=re.compile(re.escape(p))
    444         self.assertEqual(pat.match(p) is not None, True)
    445         self.assertEqual(pat.match(p).span(), (0,256))
     506        alnum_chars = string.ascii_letters + string.digits
     507        p = u''.join(unichr(i) for i in range(256))
     508        for c in p:
     509            if c in alnum_chars:
     510                self.assertEqual(re.escape(c), c)
     511            elif c == u'\x00':
     512                self.assertEqual(re.escape(c), u'\\000')
     513            else:
     514                self.assertEqual(re.escape(c), u'\\' + c)
     515            self.assertMatch(re.escape(c), c)
     516        self.assertMatch(re.escape(p), p)
     517
     518    def test_re_escape_byte(self):
     519        alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
     520        p = ''.join(chr(i) for i in range(256))
     521        for b in p:
     522            if b in alnum_chars:
     523                self.assertEqual(re.escape(b), b)
     524            elif b == b'\x00':
     525                self.assertEqual(re.escape(b), b'\\000')
     526            else:
     527                self.assertEqual(re.escape(b), b'\\' + b)
     528            self.assertMatch(re.escape(b), b)
     529        self.assertMatch(re.escape(p), p)
     530
     531    def test_re_escape_non_ascii(self):
     532        s = u'xxx\u2620\u2620\u2620xxx'
     533        s_escaped = re.escape(s)
     534        self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
     535        self.assertMatch(s_escaped, s)
     536        self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
     537                         u'x\u2620\u2620\u2620x', (2, 7), re.search)
     538
     539    def test_re_escape_non_ascii_bytes(self):
     540        b = u'y\u2620y\u2620y'.encode('utf-8')
     541        b_escaped = re.escape(b)
     542        self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
     543        self.assertMatch(b_escaped, b)
     544        res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
     545        self.assertEqual(len(res), 2)
    446546
    447547    def test_pickling(self):
     
    451551        self.pickle_test(cPickle)
    452552        # old pickles expect the _compile() reconstructor in sre module
    453         import warnings
    454         with warnings.catch_warnings():
    455             warnings.filterwarnings("ignore", "The sre module is deprecated",
    456                                     DeprecationWarning)
    457             from sre import _compile
     553        import_module("sre", deprecated=True)
     554        from sre import _compile
    458555
    459556    def pickle_test(self, pickle):
     
    534631        self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
    535632        self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
     633
     634    def test_unlimited_zero_width_repeat(self):
     635        # Issue #9669
     636        self.assertIsNone(re.match(r'(?:a?)*y', 'z'))
     637        self.assertIsNone(re.match(r'(?:a?)+y', 'z'))
     638        self.assertIsNone(re.match(r'(?:a?){2,}y', 'z'))
     639        self.assertIsNone(re.match(r'(?:a?)*?y', 'z'))
     640        self.assertIsNone(re.match(r'(?:a?)+?y', 'z'))
     641        self.assertIsNone(re.match(r'(?:a?){2,}?y', 'z'))
    536642
    537643    def test_scanner(self):
     
    610716        except NameError:
    611717            return # no problem if we have no unicode
    612         self.assert_(re.compile('bug_926075') is not
     718        self.assertTrue(re.compile('bug_926075') is not
    613719                     re.compile(eval("u'bug_926075'")))
    614720
     
    636742        self.assertEqual(iter.next().span(), (4, 4))
    637743        self.assertRaises(StopIteration, iter.next)
     744
     745    def test_bug_6561(self):
     746        # '\d' should match characters in Unicode category 'Nd'
     747        # (Number, Decimal Digit), but not those in 'Nl' (Number,
     748        # Letter) or 'No' (Number, Other).
     749        decimal_digits = [
     750            u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
     751            u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
     752            u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
     753            ]
     754        for x in decimal_digits:
     755            self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
     756
     757        not_decimal_digits = [
     758            u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
     759            u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
     760            u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
     761            u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
     762            ]
     763        for x in not_decimal_digits:
     764            self.assertIsNone(re.match('^\d$', x, re.UNICODE))
    638765
    639766    def test_empty_array(self):
     
    697824        self.assertRaises(OverflowError, _sre.compile, "abc", 0, [long_overflow])
    698825
     826    def test_compile(self):
     827        # Test return value when given string and pattern as parameter
     828        pattern = re.compile('random pattern')
     829        self.assertIsInstance(pattern, re._pattern_type)
     830        same_pattern = re.compile(pattern)
     831        self.assertIsInstance(same_pattern, re._pattern_type)
     832        self.assertIs(same_pattern, pattern)
     833        # Test behaviour when not given a string or pattern as parameter
     834        self.assertRaises(TypeError, re.compile, 0)
     835
     836    def test_bug_13899(self):
     837        # Issue #13899: re pattern r"[\A]" should work like "A" but matches
     838        # nothing. Ditto B and Z.
     839        self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
     840                         ['A', 'B', '\b', 'C', 'Z'])
     841
     842    @precisionbigmemtest(size=_2G, memuse=1)
     843    def test_large_search(self, size):
     844        # Issue #10182: indices were 32-bit-truncated.
     845        s = 'a' * size
     846        m = re.search('$', s)
     847        self.assertIsNotNone(m)
     848        self.assertEqual(m.start(), size)
     849        self.assertEqual(m.end(), size)
     850
     851    # The huge memuse is because of re.sub() using a list and a join()
     852    # to create the replacement result.
     853    @precisionbigmemtest(size=_2G, memuse=16 + 2)
     854    def test_large_subn(self, size):
     855        # Issue #10182: indices were 32-bit-truncated.
     856        s = 'a' * size
     857        r, n = re.subn('', '', s)
     858        self.assertEqual(r, s)
     859        self.assertEqual(n, size + 1)
     860
     861
     862    def test_repeat_minmax_overflow(self):
     863        # Issue #13169
     864        string = "x" * 100000
     865        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
     866        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
     867        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
     868        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
     869        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
     870        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
     871        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
     872        self.assertRaises(OverflowError, re.compile, r".{%d}" % 2**128)
     873        self.assertRaises(OverflowError, re.compile, r".{,%d}" % 2**128)
     874        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % 2**128)
     875        self.assertRaises(OverflowError, re.compile, r".{%d,%d}" % (2**129, 2**128))
     876
     877    @cpython_only
     878    def test_repeat_minmax_overflow_maxrepeat(self):
     879        try:
     880            from _sre import MAXREPEAT
     881        except ImportError:
     882            self.skipTest('requires _sre.MAXREPEAT constant')
     883        string = "x" * 100000
     884        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
     885        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
     886                         (0, 100000))
     887        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
     888        self.assertRaises(OverflowError, re.compile, r".{%d}" % MAXREPEAT)
     889        self.assertRaises(OverflowError, re.compile, r".{,%d}" % MAXREPEAT)
     890        self.assertRaises(OverflowError, re.compile, r".{%d,}?" % MAXREPEAT)
     891
     892    def test_backref_group_name_in_exception(self):
     893        # Issue 17341: Poor error message when compiling invalid regex
     894        with self.assertRaisesRegexp(sre_constants.error, '<foo>'):
     895            re.compile('(?P=<foo>)')
     896
     897    def test_group_name_in_exception(self):
     898        # Issue 17341: Poor error message when compiling invalid regex
     899        with self.assertRaisesRegexp(sre_constants.error, '\?foo'):
     900            re.compile('(?P<?foo>)')
     901
     902    def test_issue17998(self):
     903        for reps in '*', '+', '?', '{1}':
     904            for mod in '', '?':
     905                pattern = '.' + reps + mod + 'yz'
     906                self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
     907                                 ['xyz'], msg=pattern)
     908                pattern = pattern.encode()
     909                self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
     910                                 [b'xyz'], msg=pattern)
     911
     912
     913    def test_bug_2537(self):
     914        # issue 2537: empty submatches
     915        for outer_op in ('{0,}', '*', '+', '{1,187}'):
     916            for inner_op in ('{0,}', '*', '?'):
     917                r = re.compile("^((x|y)%s)%s" % (inner_op, outer_op))
     918                m = r.match("xyyzy")
     919                self.assertEqual(m.group(0), "xyy")
     920                self.assertEqual(m.group(1), "")
     921                self.assertEqual(m.group(2), "y")
     922
    699923def run_re_tests():
    700     from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
     924    from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
    701925    if verbose:
    702926        print 'Running re_tests test suite'
Note: See TracChangeset for help on using the changeset viewer.