Ignore:
Timestamp:
Mar 19, 2014, 11:31:01 PM (11 years ago)
Author:
dmik
Message:

python: Merge vendor 2.7.6 to trunk.

Location:
python/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • python/trunk

  • python/trunk/Lib/test/test_unicode.py

    r2 r391  
    1 # -*- coding: iso-8859-1 -*-
    21""" Test script for the Unicode implementation.
    32
     
    76
    87"""#"
    9 import sys, struct, codecs
     8import sys
     9import struct
     10import codecs
     11import unittest
    1012from test import test_support, string_tests
     13
     14# decorator to skip tests on narrow builds
     15requires_wide_build = unittest.skipIf(sys.maxunicode == 65535,
     16                                      'requires wide build')
    1117
    1218# Error handling (bad decoder return)
     
    3541    type2test = unicode
    3642
     43    def assertEqual(self, first, second, msg=None):
     44        # strict assertEqual method: reject implicit bytes/unicode equality
     45        super(UnicodeTest, self).assertEqual(first, second, msg)
     46        if isinstance(first, unicode) or isinstance(second, unicode):
     47            self.assertIsInstance(first, unicode)
     48            self.assertIsInstance(second, unicode)
     49        elif isinstance(first, str) or isinstance(second, str):
     50            self.assertIsInstance(first, str)
     51            self.assertIsInstance(second, str)
     52
    3753    def checkequalnofix(self, result, object, methodname, *args):
    3854        method = getattr(object, methodname)
    3955        realresult = method(*args)
    4056        self.assertEqual(realresult, result)
    41         self.assert_(type(realresult) is type(result))
     57        self.assertTrue(type(realresult) is type(result))
    4258
    4359        # if the original is returned make sure that
     
    5167            realresult = method(*args)
    5268            self.assertEqual(realresult, result)
    53             self.assert_(object is not realresult)
     69            self.assertTrue(object is not realresult)
    5470
    5571    def test_literals(self):
     
    198214    def test_comparison(self):
    199215        # Comparisons:
    200         self.assertEqual(u'abc', 'abc')
    201         self.assertEqual('abc', u'abc')
    202         self.assertEqual(u'abc', u'abc')
    203         self.assert_(u'abcd' > 'abc')
    204         self.assert_('abcd' > u'abc')
    205         self.assert_(u'abcd' > u'abc')
    206         self.assert_(u'abc' < 'abcd')
    207         self.assert_('abc' < u'abcd')
    208         self.assert_(u'abc' < u'abcd')
     216        self.assertTrue(u'abc' == 'abc')
     217        self.assertTrue('abc' == u'abc')
     218        self.assertTrue(u'abc' == u'abc')
     219        self.assertTrue(u'abcd' > 'abc')
     220        self.assertTrue('abcd' > u'abc')
     221        self.assertTrue(u'abcd' > u'abc')
     222        self.assertTrue(u'abc' < 'abcd')
     223        self.assertTrue('abc' < u'abcd')
     224        self.assertTrue(u'abc' < u'abcd')
    209225
    210226        if 0:
     
    213229
    214230            # No surrogates, no fixup required.
    215             self.assert_(u'\u0061' < u'\u20ac')
     231            self.assertTrue(u'\u0061' < u'\u20ac')
    216232            # Non surrogate below surrogate value, no fixup required
    217             self.assert_(u'\u0061' < u'\ud800\udc02')
     233            self.assertTrue(u'\u0061' < u'\ud800\udc02')
    218234
    219235            # Non surrogate above surrogate value, fixup required
    220236            def test_lecmp(s, s2):
    221                 self.assert_(s < s2)
     237                self.assertTrue(s < s2)
    222238
    223239            def test_fixup(s):
     
    259275
    260276        # Surrogates on both sides, no fixup required
    261         self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
     277        self.assertTrue(u'\ud800\udc02' < u'\ud84d\udc56')
     278
     279    def test_capitalize(self):
     280        string_tests.CommonTest.test_capitalize(self)
     281        # check that titlecased chars are lowered correctly
     282        # \u1ffc is the titlecased char
     283        self.checkequal(u'\u1ffc\u1ff3\u1ff3\u1ff3',
     284                        u'\u1ff3\u1ff3\u1ffc\u1ffc', 'capitalize')
     285        # check with cased non-letter chars
     286        self.checkequal(u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
     287                        u'\u24c5\u24ce\u24c9\u24bd\u24c4\u24c3', 'capitalize')
     288        self.checkequal(u'\u24c5\u24e8\u24e3\u24d7\u24de\u24dd',
     289                        u'\u24df\u24e8\u24e3\u24d7\u24de\u24dd', 'capitalize')
     290        self.checkequal(u'\u2160\u2171\u2172',
     291                        u'\u2160\u2161\u2162', 'capitalize')
     292        self.checkequal(u'\u2160\u2171\u2172',
     293                        u'\u2170\u2171\u2172', 'capitalize')
     294        # check with Ll chars with no upper - nothing changes here
     295        self.checkequal(u'\u019b\u1d00\u1d86\u0221\u1fb7',
     296                        u'\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize')
    262297
    263298    def test_islower(self):
    264299        string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
    265300        self.checkequalnofix(False, u'\u1FFc', 'islower')
     301
     302    @requires_wide_build
     303    def test_islower_non_bmp(self):
     304        # non-BMP, uppercase
     305        self.assertFalse(u'\U00010401'.islower())
     306        self.assertFalse(u'\U00010427'.islower())
     307        # non-BMP, lowercase
     308        self.assertTrue(u'\U00010429'.islower())
     309        self.assertTrue(u'\U0001044E'.islower())
     310        # non-BMP, non-cased
     311        self.assertFalse(u'\U0001F40D'.islower())
     312        self.assertFalse(u'\U0001F46F'.islower())
    266313
    267314    def test_isupper(self):
     
    270317            self.checkequalnofix(False, u'\u1FFc', 'isupper')
    271318
     319    @requires_wide_build
     320    def test_isupper_non_bmp(self):
     321        # non-BMP, uppercase
     322        self.assertTrue(u'\U00010401'.isupper())
     323        self.assertTrue(u'\U00010427'.isupper())
     324        # non-BMP, lowercase
     325        self.assertFalse(u'\U00010429'.isupper())
     326        self.assertFalse(u'\U0001044E'.isupper())
     327        # non-BMP, non-cased
     328        self.assertFalse(u'\U0001F40D'.isupper())
     329        self.assertFalse(u'\U0001F46F'.isupper())
     330
    272331    def test_istitle(self):
    273         string_tests.MixinStrUnicodeUserStringTest.test_title(self)
     332        string_tests.MixinStrUnicodeUserStringTest.test_istitle(self)
    274333        self.checkequalnofix(True, u'\u1FFc', 'istitle')
    275334        self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
     335
     336    @requires_wide_build
     337    def test_istitle_non_bmp(self):
     338        # non-BMP, uppercase + lowercase
     339        self.assertTrue(u'\U00010401\U00010429'.istitle())
     340        self.assertTrue(u'\U00010427\U0001044E'.istitle())
     341        # apparently there are no titlecased (Lt) non-BMP chars in Unicode 6
     342        for ch in [u'\U00010429', u'\U0001044E', u'\U0001F40D', u'\U0001F46F']:
     343            self.assertFalse(ch.istitle(), '{!r} is not title'.format(ch))
    276344
    277345    def test_isspace(self):
     
    281349        self.checkequalnofix(False, u'\u2014', 'isspace')
    282350
     351    @requires_wide_build
     352    def test_isspace_non_bmp(self):
     353        # apparently there are no non-BMP spaces chars in Unicode 6
     354        for ch in [u'\U00010401', u'\U00010427', u'\U00010429', u'\U0001044E',
     355                   u'\U0001F40D', u'\U0001F46F']:
     356            self.assertFalse(ch.isspace(), '{!r} is not space.'.format(ch))
     357
     358    @requires_wide_build
     359    def test_isalnum_non_bmp(self):
     360        for ch in [u'\U00010401', u'\U00010427', u'\U00010429', u'\U0001044E',
     361                   u'\U0001D7F6', u'\U000104A0', u'\U000104A0', u'\U0001F107']:
     362            self.assertTrue(ch.isalnum(), '{!r} is alnum.'.format(ch))
     363
    283364    def test_isalpha(self):
    284365        string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
    285366        self.checkequalnofix(True, u'\u1FFc', 'isalpha')
     367
     368    @requires_wide_build
     369    def test_isalpha_non_bmp(self):
     370        # non-BMP, cased
     371        self.assertTrue(u'\U00010401'.isalpha())
     372        self.assertTrue(u'\U00010427'.isalpha())
     373        self.assertTrue(u'\U00010429'.isalpha())
     374        self.assertTrue(u'\U0001044E'.isalpha())
     375        # non-BMP, non-cased
     376        self.assertFalse(u'\U0001F40D'.isalpha())
     377        self.assertFalse(u'\U0001F46F'.isalpha())
    286378
    287379    def test_isdecimal(self):
     
    297389        self.checkraises(TypeError, 'abc', 'isdecimal', 42)
    298390
     391    @requires_wide_build
     392    def test_isdecimal_non_bmp(self):
     393        for ch in [u'\U00010401', u'\U00010427', u'\U00010429', u'\U0001044E',
     394                   u'\U0001F40D', u'\U0001F46F', u'\U00011065', u'\U0001F107']:
     395            self.assertFalse(ch.isdecimal(), '{!r} is not decimal.'.format(ch))
     396        for ch in [u'\U0001D7F6', u'\U000104A0', u'\U000104A0']:
     397            self.assertTrue(ch.isdecimal(), '{!r} is decimal.'.format(ch))
     398
    299399    def test_isdigit(self):
    300400        string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
     
    302402        self.checkequalnofix(False, u'\xbc', 'isdigit')
    303403        self.checkequalnofix(True, u'\u0660', 'isdigit')
     404
     405    @requires_wide_build
     406    def test_isdigit_non_bmp(self):
     407        for ch in [u'\U00010401', u'\U00010427', u'\U00010429', u'\U0001044E',
     408                   u'\U0001F40D', u'\U0001F46F', u'\U00011065']:
     409            self.assertFalse(ch.isdigit(), '{!r} is not a digit.'.format(ch))
     410        for ch in [u'\U0001D7F6', u'\U000104A0', u'\U000104A0', u'\U0001F107']:
     411            self.assertTrue(ch.isdigit(), '{!r} is a digit.'.format(ch))
    304412
    305413    def test_isnumeric(self):
     
    315423        self.assertRaises(TypeError, u"abc".isnumeric, 42)
    316424
     425    @requires_wide_build
     426    def test_isnumeric_non_bmp(self):
     427        for ch in [u'\U00010401', u'\U00010427', u'\U00010429', u'\U0001044E',
     428                   u'\U0001F40D', u'\U0001F46F']:
     429            self.assertFalse(ch.isnumeric(), '{!r} is not numeric.'.format(ch))
     430        for ch in [u'\U00010107', u'\U0001D7F6', u'\U00023b1b',
     431                   u'\U000104A0', u'\U0001F107']:
     432            self.assertTrue(ch.isnumeric(), '{!r} is numeric.'.format(ch))
     433
     434    @requires_wide_build
     435    def test_surrogates(self):
     436        # this test actually passes on narrow too, but it's just by accident.
     437        # Surrogates are seen as non-cased chars, so u'X\uD800X' is as
     438        # uppercase as 'X X'
     439        for s in (u'a\uD800b\uDFFF', u'a\uDFFFb\uD800',
     440                  u'a\uD800b\uDFFFa', u'a\uDFFFb\uD800a'):
     441            self.assertTrue(s.islower())
     442            self.assertFalse(s.isupper())
     443            self.assertFalse(s.istitle())
     444        for s in (u'A\uD800B\uDFFF', u'A\uDFFFB\uD800',
     445                  u'A\uD800B\uDFFFA', u'A\uDFFFB\uD800A'):
     446            self.assertFalse(s.islower())
     447            self.assertTrue(s.isupper())
     448            self.assertTrue(s.istitle())
     449
     450        for meth_name in ('islower', 'isupper', 'istitle'):
     451            meth = getattr(unicode, meth_name)
     452            for s in (u'\uD800', u'\uDFFF', u'\uD800\uD800', u'\uDFFF\uDFFF'):
     453                self.assertFalse(meth(s), '%r.%s() is False' % (s, meth_name))
     454
     455        for meth_name in ('isalpha', 'isalnum', 'isdigit', 'isspace',
     456                          'isdecimal', 'isnumeric'):
     457            meth = getattr(unicode, meth_name)
     458            for s in (u'\uD800', u'\uDFFF', u'\uD800\uD800', u'\uDFFF\uDFFF',
     459                      u'a\uD800b\uDFFF', u'a\uDFFFb\uD800',
     460                      u'a\uD800b\uDFFFa', u'a\uDFFFb\uD800a'):
     461                self.assertFalse(meth(s), '%r.%s() is False' % (s, meth_name))
     462
     463
     464    @requires_wide_build
     465    def test_lower(self):
     466        string_tests.CommonTest.test_lower(self)
     467        self.assertEqual(u'\U00010427'.lower(), u'\U0001044F')
     468        self.assertEqual(u'\U00010427\U00010427'.lower(),
     469                         u'\U0001044F\U0001044F')
     470        self.assertEqual(u'\U00010427\U0001044F'.lower(),
     471                         u'\U0001044F\U0001044F')
     472        self.assertEqual(u'X\U00010427x\U0001044F'.lower(),
     473                         u'x\U0001044Fx\U0001044F')
     474
     475    @requires_wide_build
     476    def test_upper(self):
     477        string_tests.CommonTest.test_upper(self)
     478        self.assertEqual(u'\U0001044F'.upper(), u'\U00010427')
     479        self.assertEqual(u'\U0001044F\U0001044F'.upper(),
     480                         u'\U00010427\U00010427')
     481        self.assertEqual(u'\U00010427\U0001044F'.upper(),
     482                         u'\U00010427\U00010427')
     483        self.assertEqual(u'X\U00010427x\U0001044F'.upper(),
     484                         u'X\U00010427X\U00010427')
     485
     486    @requires_wide_build
     487    def test_capitalize(self):
     488        string_tests.CommonTest.test_capitalize(self)
     489        self.assertEqual(u'\U0001044F'.capitalize(), u'\U00010427')
     490        self.assertEqual(u'\U0001044F\U0001044F'.capitalize(),
     491                         u'\U00010427\U0001044F')
     492        self.assertEqual(u'\U00010427\U0001044F'.capitalize(),
     493                         u'\U00010427\U0001044F')
     494        self.assertEqual(u'\U0001044F\U00010427'.capitalize(),
     495                         u'\U00010427\U0001044F')
     496        self.assertEqual(u'X\U00010427x\U0001044F'.capitalize(),
     497                         u'X\U0001044Fx\U0001044F')
     498
     499    @requires_wide_build
     500    def test_title(self):
     501        string_tests.MixinStrUnicodeUserStringTest.test_title(self)
     502        self.assertEqual(u'\U0001044F'.title(), u'\U00010427')
     503        self.assertEqual(u'\U0001044F\U0001044F'.title(),
     504                         u'\U00010427\U0001044F')
     505        self.assertEqual(u'\U0001044F\U0001044F \U0001044F\U0001044F'.title(),
     506                         u'\U00010427\U0001044F \U00010427\U0001044F')
     507        self.assertEqual(u'\U00010427\U0001044F \U00010427\U0001044F'.title(),
     508                         u'\U00010427\U0001044F \U00010427\U0001044F')
     509        self.assertEqual(u'\U0001044F\U00010427 \U0001044F\U00010427'.title(),
     510                         u'\U00010427\U0001044F \U00010427\U0001044F')
     511        self.assertEqual(u'X\U00010427x\U0001044F X\U00010427x\U0001044F'.title(),
     512                         u'X\U0001044Fx\U0001044F X\U0001044Fx\U0001044F')
     513
     514    @requires_wide_build
     515    def test_swapcase(self):
     516        string_tests.CommonTest.test_swapcase(self)
     517        self.assertEqual(u'\U0001044F'.swapcase(), u'\U00010427')
     518        self.assertEqual(u'\U00010427'.swapcase(), u'\U0001044F')
     519        self.assertEqual(u'\U0001044F\U0001044F'.swapcase(),
     520                         u'\U00010427\U00010427')
     521        self.assertEqual(u'\U00010427\U0001044F'.swapcase(),
     522                         u'\U0001044F\U00010427')
     523        self.assertEqual(u'\U0001044F\U00010427'.swapcase(),
     524                         u'\U00010427\U0001044F')
     525        self.assertEqual(u'X\U00010427x\U0001044F'.swapcase(),
     526                         u'x\U0001044FX\U00010427')
     527
    317528    def test_contains(self):
    318529        # Testing Unicode contains method
    319         self.assert_('a' in u'abdb')
    320         self.assert_('a' in u'bdab')
    321         self.assert_('a' in u'bdaba')
    322         self.assert_('a' in u'bdba')
    323         self.assert_('a' in u'bdba')
    324         self.assert_(u'a' in u'bdba')
    325         self.assert_(u'a' not in u'bdb')
    326         self.assert_(u'a' not in 'bdb')
    327         self.assert_(u'a' in 'bdba')
    328         self.assert_(u'a' in ('a',1,None))
    329         self.assert_(u'a' in (1,None,'a'))
    330         self.assert_(u'a' in (1,None,u'a'))
    331         self.assert_('a' in ('a',1,None))
    332         self.assert_('a' in (1,None,'a'))
    333         self.assert_('a' in (1,None,u'a'))
    334         self.assert_('a' not in ('x',1,u'y'))
    335         self.assert_('a' not in ('x',1,None))
    336         self.assert_(u'abcd' not in u'abcxxxx')
    337         self.assert_(u'ab' in u'abcd')
    338         self.assert_('ab' in u'abc')
    339         self.assert_(u'ab' in 'abc')
    340         self.assert_(u'ab' in (1,None,u'ab'))
    341         self.assert_(u'' in u'abc')
    342         self.assert_('' in u'abc')
     530        self.assertIn('a', u'abdb')
     531        self.assertIn('a', u'bdab')
     532        self.assertIn('a', u'bdaba')
     533        self.assertIn('a', u'bdba')
     534        self.assertIn('a', u'bdba')
     535        self.assertIn(u'a', u'bdba')
     536        self.assertNotIn(u'a', u'bdb')
     537        self.assertNotIn(u'a', 'bdb')
     538        self.assertIn(u'a', 'bdba')
     539        self.assertIn(u'a', ('a',1,None))
     540        self.assertIn(u'a', (1,None,'a'))
     541        self.assertIn(u'a', (1,None,u'a'))
     542        self.assertIn('a', ('a',1,None))
     543        self.assertIn('a', (1,None,'a'))
     544        self.assertIn('a', (1,None,u'a'))
     545        self.assertNotIn('a', ('x',1,u'y'))
     546        self.assertNotIn('a', ('x',1,None))
     547        self.assertNotIn(u'abcd', u'abcxxxx')
     548        self.assertIn(u'ab', u'abcd')
     549        self.assertIn('ab', u'abc')
     550        self.assertIn(u'ab', 'abc')
     551        self.assertIn(u'ab', (1,None,u'ab'))
     552        self.assertIn(u'', u'abc')
     553        self.assertIn('', u'abc')
    343554
    344555        # If the following fails either
    345556        # the contains operator does not propagate UnicodeErrors or
    346557        # someone has changed the default encoding
    347         self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
    348 
    349         self.assert_(u'' in '')
    350         self.assert_('' in u'')
    351         self.assert_(u'' in u'')
    352         self.assert_(u'' in 'abc')
    353         self.assert_('' in u'abc')
    354         self.assert_(u'' in u'abc')
    355         self.assert_(u'\0' not in 'abc')
    356         self.assert_('\0' not in u'abc')
    357         self.assert_(u'\0' not in u'abc')
    358         self.assert_(u'\0' in '\0abc')
    359         self.assert_('\0' in u'\0abc')
    360         self.assert_(u'\0' in u'\0abc')
    361         self.assert_(u'\0' in 'abc\0')
    362         self.assert_('\0' in u'abc\0')
    363         self.assert_(u'\0' in u'abc\0')
    364         self.assert_(u'a' in '\0abc')
    365         self.assert_('a' in u'\0abc')
    366         self.assert_(u'a' in u'\0abc')
    367         self.assert_(u'asdf' in 'asdf')
    368         self.assert_('asdf' in u'asdf')
    369         self.assert_(u'asdf' in u'asdf')
    370         self.assert_(u'asdf' not in 'asd')
    371         self.assert_('asdf' not in u'asd')
    372         self.assert_(u'asdf' not in u'asd')
    373         self.assert_(u'asdf' not in '')
    374         self.assert_('asdf' not in u'')
    375         self.assert_(u'asdf' not in u'')
     558        self.assertRaises(UnicodeDecodeError, 'g\xe2teau'.__contains__, u'\xe2')
     559        self.assertRaises(UnicodeDecodeError, u'g\xe2teau'.__contains__, '\xe2')
     560
     561        self.assertIn(u'', '')
     562        self.assertIn('', u'')
     563        self.assertIn(u'', u'')
     564        self.assertIn(u'', 'abc')
     565        self.assertIn('', u'abc')
     566        self.assertIn(u'', u'abc')
     567        self.assertNotIn(u'\0', 'abc')
     568        self.assertNotIn('\0', u'abc')
     569        self.assertNotIn(u'\0', u'abc')
     570        self.assertIn(u'\0', '\0abc')
     571        self.assertIn('\0', u'\0abc')
     572        self.assertIn(u'\0', u'\0abc')
     573        self.assertIn(u'\0', 'abc\0')
     574        self.assertIn('\0', u'abc\0')
     575        self.assertIn(u'\0', u'abc\0')
     576        self.assertIn(u'a', '\0abc')
     577        self.assertIn('a', u'\0abc')
     578        self.assertIn(u'a', u'\0abc')
     579        self.assertIn(u'asdf', 'asdf')
     580        self.assertIn('asdf', u'asdf')
     581        self.assertIn(u'asdf', u'asdf')
     582        self.assertNotIn(u'asdf', 'asd')
     583        self.assertNotIn('asdf', u'asd')
     584        self.assertNotIn(u'asdf', u'asd')
     585        self.assertNotIn(u'asdf', '')
     586        self.assertNotIn('asdf', u'')
     587        self.assertNotIn(u'asdf', u'')
    376588
    377589        self.assertRaises(TypeError, u"abc".__contains__)
     590        self.assertRaises(TypeError, u"abc".__contains__, object())
    378591
    379592    def test_formatting(self):
     
    393606        self.assertEqual(u'%c' % 0x1234, u'\u1234')
    394607        self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
     608        self.assertRaises(ValueError, u"%.1\u1032f".__mod__, (1.0/3))
    395609
    396610        for num in range(0x00,0x80):
    397611            char = chr(num)
    398             self.assertEqual(u"%c" % char, char)
    399             self.assertEqual(u"%c" % num, char)
     612            self.assertEqual(u"%c" % char, unicode(char))
     613            self.assertEqual(u"%c" % num, unicode(char))
     614            self.assertTrue(char == u"%c" % char)
     615            self.assertTrue(char == u"%c" % num)
    400616        # Issue 7649
    401617        for num in range(0x80,0x100):
     
    429645        self.assertEqual('%s' % Wrapper(), u'\u1234')
    430646
     647    @test_support.cpython_only
     648    def test_formatting_huge_precision(self):
     649        from _testcapi import INT_MAX
     650        format_string = u"%.{}f".format(INT_MAX + 1)
     651        with self.assertRaises(ValueError):
     652            result = format_string % 2.34
     653
     654    def test_formatting_huge_width(self):
     655        format_string = u"%{}f".format(sys.maxsize + 1)
     656        with self.assertRaises(ValueError):
     657            result = format_string % 2.34
     658
     659    def test_startswith_endswith_errors(self):
     660        for meth in (u'foo'.startswith, u'foo'.endswith):
     661            with self.assertRaises(UnicodeDecodeError):
     662                meth('\xff')
     663            with self.assertRaises(TypeError) as cm:
     664                meth(['f'])
     665            exc = str(cm.exception)
     666            self.assertIn('unicode', exc)
     667            self.assertIn('str', exc)
     668            self.assertIn('tuple', exc)
     669
    431670    @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
    432671    def test_format_float(self):
     
    511750
    512751        if not sys.platform.startswith('java'):
     752            with test_support.check_py3k_warnings():
     753                buf = buffer('character buffers are decoded to unicode')
    513754            self.assertEqual(
    514755                unicode(
    515                     buffer('character buffers are decoded to unicode'),
     756                    buf,
    516757                    'utf-8',
    517758                    'strict'
     
    535776            (ur'\\?', '+AFwAXA?'),
    536777            (ur'\\\?', '+AFwAXABc?'),
    537             (ur'++--', '+-+---')
     778            (ur'++--', '+-+---'),
     779            (u'\U000abcde', '+2m/c3g-'),                  # surrogate pairs
     780            (u'/', '/'),
    538781        ]
    539782
     
    541784            self.assertEqual(x.encode('utf-7'), y)
    542785
    543         # surrogates not supported
    544         self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
    545 
    546         self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
    547 
    548         # Issue #2242: crash on some Windows/MSVC versions
    549         self.assertRaises(UnicodeDecodeError, '+\xc1'.decode, 'utf-7')
     786        # Unpaired surrogates are passed through
     787        self.assertEqual(u'\uD801'.encode('utf-7'), '+2AE-')
     788        self.assertEqual(u'\uD801x'.encode('utf-7'), '+2AE-x')
     789        self.assertEqual(u'\uDC01'.encode('utf-7'), '+3AE-')
     790        self.assertEqual(u'\uDC01x'.encode('utf-7'), '+3AE-x')
     791        self.assertEqual('+2AE-'.decode('utf-7'), u'\uD801')
     792        self.assertEqual('+2AE-x'.decode('utf-7'), u'\uD801x')
     793        self.assertEqual('+3AE-'.decode('utf-7'), u'\uDC01')
     794        self.assertEqual('+3AE-x'.decode('utf-7'), u'\uDC01x')
     795
     796        self.assertEqual(u'\uD801\U000abcde'.encode('utf-7'), '+2AHab9ze-')
     797        self.assertEqual('+2AHab9ze-'.decode('utf-7'), u'\uD801\U000abcde')
     798
     799        # Direct encoded characters
     800        set_d = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"
     801        # Optional direct characters
     802        set_o = '!"#$%&*;<=>@[]^_`{|}'
     803        for c in set_d:
     804            self.assertEqual(c.encode('utf7'), c.encode('ascii'))
     805            self.assertEqual(c.encode('ascii').decode('utf7'), unicode(c))
     806            self.assertTrue(c == c.encode('ascii').decode('utf7'))
     807        for c in set_o:
     808            self.assertEqual(c.encode('ascii').decode('utf7'), unicode(c))
     809            self.assertTrue(c == c.encode('ascii').decode('utf7'))
    550810
    551811    def test_codecs_utf8(self):
     
    580840
    581841        # UTF-8 specific decoding tests
    582         self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
    583         self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
    584         self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
     842        self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456')
     843        self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002')
     844        self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac')
    585845
    586846        # Other possible utf-8 test cases:
    587847        # * strict decoding testing for all of the
    588848        #   UTF8_ERROR cases in PyUnicode_DecodeUTF8
     849
     850    def test_utf8_decode_valid_sequences(self):
     851        sequences = [
     852            # single byte
     853            ('\x00', u'\x00'), ('a', u'a'), ('\x7f', u'\x7f'),
     854            # 2 bytes
     855            ('\xc2\x80', u'\x80'), ('\xdf\xbf', u'\u07ff'),
     856            # 3 bytes
     857            ('\xe0\xa0\x80', u'\u0800'), ('\xed\x9f\xbf', u'\ud7ff'),
     858            ('\xee\x80\x80', u'\uE000'), ('\xef\xbf\xbf', u'\uffff'),
     859            # 4 bytes
     860            ('\xF0\x90\x80\x80', u'\U00010000'),
     861            ('\xf4\x8f\xbf\xbf', u'\U0010FFFF')
     862        ]
     863        for seq, res in sequences:
     864            self.assertEqual(seq.decode('utf-8'), res)
     865
     866        for ch in map(unichr, range(0, sys.maxunicode)):
     867            self.assertEqual(ch, ch.encode('utf-8').decode('utf-8'))
     868
     869    def test_utf8_decode_invalid_sequences(self):
     870        # continuation bytes in a sequence of 2, 3, or 4 bytes
     871        continuation_bytes = map(chr, range(0x80, 0xC0))
     872        # start bytes of a 2-byte sequence equivalent to codepoints < 0x7F
     873        invalid_2B_seq_start_bytes = map(chr, range(0xC0, 0xC2))
     874        # start bytes of a 4-byte sequence equivalent to codepoints > 0x10FFFF
     875        invalid_4B_seq_start_bytes = map(chr, range(0xF5, 0xF8))
     876        invalid_start_bytes = (
     877            continuation_bytes + invalid_2B_seq_start_bytes +
     878            invalid_4B_seq_start_bytes + map(chr, range(0xF7, 0x100))
     879        )
     880
     881        for byte in invalid_start_bytes:
     882            self.assertRaises(UnicodeDecodeError, byte.decode, 'utf-8')
     883
     884        for sb in invalid_2B_seq_start_bytes:
     885            for cb in continuation_bytes:
     886                self.assertRaises(UnicodeDecodeError, (sb+cb).decode, 'utf-8')
     887
     888        for sb in invalid_4B_seq_start_bytes:
     889            for cb1 in continuation_bytes[:3]:
     890                for cb3 in continuation_bytes[:3]:
     891                    self.assertRaises(UnicodeDecodeError,
     892                                      (sb+cb1+'\x80'+cb3).decode, 'utf-8')
     893
     894        for cb in map(chr, range(0x80, 0xA0)):
     895            self.assertRaises(UnicodeDecodeError,
     896                              ('\xE0'+cb+'\x80').decode, 'utf-8')
     897            self.assertRaises(UnicodeDecodeError,
     898                              ('\xE0'+cb+'\xBF').decode, 'utf-8')
     899        # XXX: surrogates shouldn't be valid UTF-8!
     900        # see http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
     901        # (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt
     902        #for cb in map(chr, range(0xA0, 0xC0)):
     903            #self.assertRaises(UnicodeDecodeError,
     904                              #('\xED'+cb+'\x80').decode, 'utf-8')
     905            #self.assertRaises(UnicodeDecodeError,
     906                              #('\xED'+cb+'\xBF').decode, 'utf-8')
     907        # but since they are valid on Python 2 add a test for that:
     908        for cb, surrogate in zip(map(chr, range(0xA0, 0xC0)),
     909                                 map(unichr, range(0xd800, 0xe000, 64))):
     910            encoded = '\xED'+cb+'\x80'
     911            self.assertEqual(encoded.decode('utf-8'), surrogate)
     912            self.assertEqual(surrogate.encode('utf-8'), encoded)
     913
     914        for cb in map(chr, range(0x80, 0x90)):
     915            self.assertRaises(UnicodeDecodeError,
     916                              ('\xF0'+cb+'\x80\x80').decode, 'utf-8')
     917            self.assertRaises(UnicodeDecodeError,
     918                              ('\xF0'+cb+'\xBF\xBF').decode, 'utf-8')
     919        for cb in map(chr, range(0x90, 0xC0)):
     920            self.assertRaises(UnicodeDecodeError,
     921                              ('\xF4'+cb+'\x80\x80').decode, 'utf-8')
     922            self.assertRaises(UnicodeDecodeError,
     923                              ('\xF4'+cb+'\xBF\xBF').decode, 'utf-8')
     924
     925    def test_issue8271(self):
     926        # Issue #8271: during the decoding of an invalid UTF-8 byte sequence,
     927        # only the start byte and the continuation byte(s) are now considered
     928        # invalid, instead of the number of bytes specified by the start byte.
     929        # See http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf (page 95,
     930        # table 3-8, Row 2) for more information about the algorithm used.
     931        FFFD = u'\ufffd'
     932        sequences = [
     933            # invalid start bytes
     934            ('\x80', FFFD), # continuation byte
     935            ('\x80\x80', FFFD*2), # 2 continuation bytes
     936            ('\xc0', FFFD),
     937            ('\xc0\xc0', FFFD*2),
     938            ('\xc1', FFFD),
     939            ('\xc1\xc0', FFFD*2),
     940            ('\xc0\xc1', FFFD*2),
     941            # with start byte of a 2-byte sequence
     942            ('\xc2', FFFD), # only the start byte
     943            ('\xc2\xc2', FFFD*2), # 2 start bytes
     944            ('\xc2\xc2\xc2', FFFD*3), # 2 start bytes
     945            ('\xc2\x41', FFFD+'A'), # invalid continuation byte
     946            # with start byte of a 3-byte sequence
     947            ('\xe1', FFFD), # only the start byte
     948            ('\xe1\xe1', FFFD*2), # 2 start bytes
     949            ('\xe1\xe1\xe1', FFFD*3), # 3 start bytes
     950            ('\xe1\xe1\xe1\xe1', FFFD*4), # 4 start bytes
     951            ('\xe1\x80', FFFD), # only 1 continuation byte
     952            ('\xe1\x41', FFFD+'A'), # invalid continuation byte
     953            ('\xe1\x41\x80', FFFD+'A'+FFFD), # invalid cb followed by valid cb
     954            ('\xe1\x41\x41', FFFD+'AA'), # 2 invalid continuation bytes
     955            ('\xe1\x80\x41', FFFD+'A'), # only 1 valid continuation byte
     956            ('\xe1\x80\xe1\x41', FFFD*2+'A'), # 1 valid and the other invalid
     957            ('\xe1\x41\xe1\x80', FFFD+'A'+FFFD), # 1 invalid and the other valid
     958            # with start byte of a 4-byte sequence
     959            ('\xf1', FFFD), # only the start byte
     960            ('\xf1\xf1', FFFD*2), # 2 start bytes
     961            ('\xf1\xf1\xf1', FFFD*3), # 3 start bytes
     962            ('\xf1\xf1\xf1\xf1', FFFD*4), # 4 start bytes
     963            ('\xf1\xf1\xf1\xf1\xf1', FFFD*5), # 5 start bytes
     964            ('\xf1\x80', FFFD), # only 1 continuation bytes
     965            ('\xf1\x80\x80', FFFD), # only 2 continuation bytes
     966            ('\xf1\x80\x41', FFFD+'A'), # 1 valid cb and 1 invalid
     967            ('\xf1\x80\x41\x41', FFFD+'AA'), # 1 valid cb and 1 invalid
     968            ('\xf1\x80\x80\x41', FFFD+'A'), # 2 valid cb and 1 invalid
     969            ('\xf1\x41\x80', FFFD+'A'+FFFD), # 1 invalid cv and 1 valid
     970            ('\xf1\x41\x80\x80', FFFD+'A'+FFFD*2), # 1 invalid cb and 2 invalid
     971            ('\xf1\x41\x80\x41', FFFD+'A'+FFFD+'A'), # 2 invalid cb and 1 invalid
     972            ('\xf1\x41\x41\x80', FFFD+'AA'+FFFD), # 1 valid cb and 1 invalid
     973            ('\xf1\x41\xf1\x80', FFFD+'A'+FFFD),
     974            ('\xf1\x41\x80\xf1', FFFD+'A'+FFFD*2),
     975            ('\xf1\xf1\x80\x41', FFFD*2+'A'),
     976            ('\xf1\x41\xf1\xf1', FFFD+'A'+FFFD*2),
     977            # with invalid start byte of a 4-byte sequence (rfc2279)
     978            ('\xf5', FFFD), # only the start byte
     979            ('\xf5\xf5', FFFD*2), # 2 start bytes
     980            ('\xf5\x80', FFFD*2), # only 1 continuation byte
     981            ('\xf5\x80\x80', FFFD*3), # only 2 continuation byte
     982            ('\xf5\x80\x80\x80', FFFD*4), # 3 continuation bytes
     983            ('\xf5\x80\x41', FFFD*2+'A'), #  1 valid cb and 1 invalid
     984            ('\xf5\x80\x41\xf5', FFFD*2+'A'+FFFD),
     985            ('\xf5\x41\x80\x80\x41', FFFD+'A'+FFFD*2+'A'),
     986            # with invalid start byte of a 5-byte sequence (rfc2279)
     987            ('\xf8', FFFD), # only the start byte
     988            ('\xf8\xf8', FFFD*2), # 2 start bytes
     989            ('\xf8\x80', FFFD*2), # only one continuation byte
     990            ('\xf8\x80\x41', FFFD*2 + 'A'), # 1 valid cb and 1 invalid
     991            ('\xf8\x80\x80\x80\x80', FFFD*5), # invalid 5 bytes seq with 5 bytes
     992            # with invalid start byte of a 6-byte sequence (rfc2279)
     993            ('\xfc', FFFD), # only the start byte
     994            ('\xfc\xfc', FFFD*2), # 2 start bytes
     995            ('\xfc\x80\x80', FFFD*3), # only 2 continuation bytes
     996            ('\xfc\x80\x80\x80\x80\x80', FFFD*6), # 6 continuation bytes
     997            # invalid start byte
     998            ('\xfe', FFFD),
     999            ('\xfe\x80\x80', FFFD*3),
     1000            # other sequences
     1001            ('\xf1\x80\x41\x42\x43', u'\ufffd\x41\x42\x43'),
     1002            ('\xf1\x80\xff\x42\x43', u'\ufffd\ufffd\x42\x43'),
     1003            ('\xf1\x80\xc2\x81\x43', u'\ufffd\x81\x43'),
     1004            ('\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64',
     1005             u'\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'),
     1006        ]
     1007        for n, (seq, res) in enumerate(sequences):
     1008            self.assertRaises(UnicodeDecodeError, seq.decode, 'utf-8', 'strict')
     1009            self.assertEqual(seq.decode('utf-8', 'replace'), res)
     1010            self.assertEqual((seq+'b').decode('utf-8', 'replace'), res+'b')
     1011            self.assertEqual(seq.decode('utf-8', 'ignore'),
     1012                             res.replace(u'\uFFFD', ''))
    5891013
    5901014    def test_codecs_idna(self):
     
    5981022        self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
    5991023        self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
     1024        self.assertEqual(u'Andr\202 x'.encode('ascii', 'replace'),
     1025                         u'Andr\202 x'.encode('ascii', errors='replace'))
     1026        self.assertEqual(u'Andr\202 x'.encode('ascii', 'ignore'),
     1027                         u'Andr\202 x'.encode(encoding='ascii', errors='ignore'))
    6001028
    6011029        # Error handling (decoding)
     
    6041032        self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
    6051033        self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
     1034        self.assertEqual(u'abcde'.decode('ascii', 'ignore'),
     1035                         u'abcde'.decode('ascii', errors='ignore'))
     1036        self.assertEqual(u'abcde'.decode('ascii', 'replace'),
     1037                         u'abcde'.decode(encoding='ascii', errors='replace'))
    6061038
    6071039        # Error handling (unknown character names)
     
    6801112        for encoding in (
    6811113            'cp037', 'cp1026',
    682             'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
    683             'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
     1114            'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
     1115            'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
    6841116            'cp863', 'cp865', 'cp866',
    6851117            'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
     
    7081140        for encoding in (
    7091141            'cp037', 'cp1026',
    710             'cp437', 'cp500', 'cp737', 'cp775', 'cp850',
    711             'cp852', 'cp855', 'cp860', 'cp861', 'cp862',
     1142            'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850',
     1143            'cp852', 'cp855', 'cp858', 'cp860', 'cp861', 'cp862',
    7121144            'cp863', 'cp865', 'cp866',
    7131145            'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15',
     
    10301462        self.assertEqual(u'{0:abc}'.format(C()), u'abc')
    10311463
    1032         # !r and !s coersions
     1464        # !r and !s coercions
    10331465        self.assertEqual(u'{0!s}'.format(u'Hello'), u'Hello')
    10341466        self.assertEqual(u'{0!s:}'.format(u'Hello'), u'Hello')
     
    10441476        self.assertEqual(u'{0}'.format([1]), u'[1]')
    10451477        self.assertEqual(u'{0}'.format(E(u'data')), u'E(data)')
    1046         self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data)  ')
    1047         self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data)  ')
    10481478        self.assertEqual(u'{0:d}'.format(G(u'data')), u'G(data)')
    1049         self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data')
    10501479        self.assertEqual(u'{0!s}'.format(G(u'data')), u'string is data')
     1480
     1481        msg = 'object.__format__ with a non-empty format string is deprecated'
     1482        with test_support.check_warnings((msg, PendingDeprecationWarning)):
     1483            self.assertEqual(u'{0:^10}'.format(E(u'data')), u' E(data)  ')
     1484            self.assertEqual(u'{0:^10s}'.format(E(u'data')), u' E(data)  ')
     1485            self.assertEqual(u'{0:>15s}'.format(G(u'data')), u' string is data')
    10511486
    10521487        self.assertEqual(u"{0:date: %Y-%m-%d}".format(I(year=2007,
     
    11011536        self.assertRaises(ValueError, u"{0!rs}".format, 0)
    11021537        self.assertRaises(ValueError, u"{!}".format)
    1103         self.assertRaises(ValueError, u"{:}".format)
    1104         self.assertRaises(ValueError, u"{:s}".format)
    1105         self.assertRaises(ValueError, u"{}".format)
     1538        self.assertRaises(IndexError, u"{:}".format)
     1539        self.assertRaises(IndexError, u"{:s}".format)
     1540        self.assertRaises(IndexError, u"{}".format)
     1541        big = u"23098475029384702983476098230754973209482573"
     1542        self.assertRaises(ValueError, (u"{" + big + u"}").format)
     1543        self.assertRaises(ValueError, (u"{[" + big + u"]}").format, [0])
    11061544
    11071545        # issue 6089
     
    11301568        #  will fail
    11311569        self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
     1570
     1571    def test_format_huge_precision(self):
     1572        format_string = u".{}f".format(sys.maxsize + 1)
     1573        with self.assertRaises(ValueError):
     1574            result = format(2.34, format_string)
     1575
     1576    def test_format_huge_width(self):
     1577        format_string = u"{}f".format(sys.maxsize + 1)
     1578        with self.assertRaises(ValueError):
     1579            result = format(2.34, format_string)
     1580
     1581    def test_format_huge_item_number(self):
     1582        format_string = u"{{{}:.6f}}".format(sys.maxsize + 1)
     1583        with self.assertRaises(ValueError):
     1584            result = format_string.format(2.34)
     1585
     1586    def test_format_auto_numbering(self):
     1587        class C:
     1588            def __init__(self, x=100):
     1589                self._x = x
     1590            def __format__(self, spec):
     1591                return spec
     1592
     1593        self.assertEqual(u'{}'.format(10), u'10')
     1594        self.assertEqual(u'{:5}'.format('s'), u's    ')
     1595        self.assertEqual(u'{!r}'.format('s'), u"'s'")
     1596        self.assertEqual(u'{._x}'.format(C(10)), u'10')
     1597        self.assertEqual(u'{[1]}'.format([1, 2]), u'2')
     1598        self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), u'4')
     1599        self.assertEqual(u'a{}b{}c'.format(0, 1), u'a0b1c')
     1600
     1601        self.assertEqual(u'a{:{}}b'.format('x', '^10'), u'a    x     b')
     1602        self.assertEqual(u'a{:{}x}b'.format(20, '#'), u'a0x14b')
     1603
     1604        # can't mix and match numbering and auto-numbering
     1605        self.assertRaises(ValueError, u'{}{1}'.format, 1, 2)
     1606        self.assertRaises(ValueError, u'{1}{}'.format, 1, 2)
     1607        self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2)
     1608        self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2)
     1609
     1610        # can mix and match auto-numbering and named
     1611        self.assertEqual(u'{f}{}'.format(4, f='test'), u'test4')
     1612        self.assertEqual(u'{}{f}'.format(4, f='test'), u'4test')
     1613        self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), u' 1g3')
     1614        self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), u' 14g')
    11321615
    11331616    def test_raiseMemError(self):
     
    11431626        self.assertRaises(MemoryError, alloc)
    11441627
     1628    def test_format_subclass(self):
     1629        class U(unicode):
     1630            def __unicode__(self):
     1631                return u'__unicode__ overridden'
     1632        u = U(u'xxx')
     1633        self.assertEqual("%s" % u, u'__unicode__ overridden')
     1634        self.assertEqual("{}".format(u), '__unicode__ overridden')
     1635
     1636    def test_encode_decimal(self):
     1637        from _testcapi import unicode_encodedecimal
     1638        self.assertEqual(unicode_encodedecimal(u'123'),
     1639                         b'123')
     1640        self.assertEqual(unicode_encodedecimal(u'\u0663.\u0661\u0664'),
     1641                         b'3.14')
     1642        self.assertEqual(unicode_encodedecimal(u"\N{EM SPACE}3.14\N{EN SPACE}"),
     1643                         b' 3.14 ')
     1644        self.assertRaises(UnicodeEncodeError,
     1645                          unicode_encodedecimal, u"123\u20ac", "strict")
     1646        self.assertEqual(unicode_encodedecimal(u"123\u20ac", "replace"),
     1647                         b'123?')
     1648        self.assertEqual(unicode_encodedecimal(u"123\u20ac", "ignore"),
     1649                         b'123')
     1650        self.assertEqual(unicode_encodedecimal(u"123\u20ac", "xmlcharrefreplace"),
     1651                         b'123&#8364;')
     1652        self.assertEqual(unicode_encodedecimal(u"123\u20ac", "backslashreplace"),
     1653                         b'123\\u20ac')
     1654        self.assertEqual(unicode_encodedecimal(u"123\u20ac\N{EM SPACE}", "replace"),
     1655                         b'123? ')
     1656        self.assertEqual(unicode_encodedecimal(u"123\u20ac\u20ac", "replace"),
     1657                         b'123??')
     1658        self.assertEqual(unicode_encodedecimal(u"123\u20ac\u0660", "replace"),
     1659                         b'123?0')
     1660
     1661    def test_encode_decimal_with_surrogates(self):
     1662        from _testcapi import unicode_encodedecimal
     1663        tests = [(u'\U0001f49d', '&#128157;'),
     1664                 (u'\ud83d', '&#55357;'),
     1665                 (u'\udc9d', '&#56477;'),
     1666                ]
     1667        if u'\ud83d\udc9d' != u'\U0001f49d':
     1668            tests += [(u'\ud83d\udc9d', '&#55357;&#56477;')]
     1669        for s, exp in tests:
     1670            self.assertEqual(
     1671                    unicode_encodedecimal(u"123" + s, "xmlcharrefreplace"),
     1672                    '123' + exp)
     1673
    11451674def test_main():
    11461675    test_support.run_unittest(__name__)
Note: See TracChangeset for help on using the changeset viewer.