Ignore:
Timestamp:
Mar 19, 2014, 11:31:01 PM (11 years ago)
Author:
dmik
Message:

python: Merge vendor 2.7.6 to trunk.

Location:
python/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • python/trunk

  • python/trunk/Lib/test/test_codeccallbacks.py

    r2 r391  
    6767        # For ascii, latin-1 and charmaps this is completely implemented
    6868        # in C and should be reasonably fast.
    69         s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
     69        s = u"\u30b9\u30d1\u30e2 \xe4nd egg\u0161"
    7070        self.assertEqual(
    7171            s.encode("ascii", "xmlcharrefreplace"),
    72             "スパモ änd eggs"
     72            "スパモ änd eggš"
    7373        )
    7474        self.assertEqual(
    7575            s.encode("latin-1", "xmlcharrefreplace"),
    76             "スパモ \xe4nd eggs"
    77         )
     76            "スパモ \xe4nd eggš"
     77        )
     78        self.assertEqual(
     79            s.encode("iso-8859-15", "xmlcharrefreplace"),
     80            "スパモ \xe4nd egg\xa8"
     81        )
     82
     83    def test_xmlcharrefreplace_with_surrogates(self):
     84        tests = [(u'\U0001f49d', '💝'),
     85                 (u'\ud83d', '�'),
     86                 (u'\udc9d', '�'),
     87                ]
     88        if u'\ud83d\udc9d' != u'\U0001f49d':
     89            tests += [(u'\ud83d\udc9d', '��')]
     90        for encoding in ['ascii', 'latin1', 'iso-8859-15']:
     91            for s, exp in tests:
     92                self.assertEqual(s.encode(encoding, 'xmlcharrefreplace'),
     93                                 exp, msg='%r.encode(%r)' % (s, encoding))
     94                self.assertEqual((s+'X').encode(encoding, 'xmlcharrefreplace'),
     95                                 exp+'X',
     96                                 msg='%r.encode(%r)' % (s + 'X', encoding))
    7897
    7998    def test_xmlcharnamereplace(self):
     
    154173        self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
    155174
    156     def test_decoderelaxedutf8(self):
    157         # This is the test for a decoding callback handler,
    158         # that relaxes the UTF-8 minimal encoding restriction.
    159         # A null byte that is encoded as "\xc0\x80" will be
    160         # decoded as a null byte. All other illegal sequences
    161         # will be handled strictly.
     175    def test_decoding_callbacks(self):
     176        # This is a test for a decoding callback handler
     177        # that allows the decoding of the invalid sequence
     178        # "\xc0\x80" and returns "\x00" instead of raising an error.
     179        # All other illegal sequences will be handled strictly.
    162180        def relaxedutf8(exc):
    163181            if not isinstance(exc, UnicodeDecodeError):
    164182                raise TypeError("don't know how to handle %r" % exc)
    165             if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
     183            if exc.object[exc.start:exc.start+2] == "\xc0\x80":
    166184                return (u"\x00", exc.start+2) # retry after two bytes
    167185            else:
    168186                raise exc
    169187
    170         codecs.register_error(
    171             "test.relaxedutf8", relaxedutf8)
    172 
     188        codecs.register_error("test.relaxedutf8", relaxedutf8)
     189
     190        # all the "\xc0\x80" will be decoded to "\x00"
    173191        sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
    174192        sout = u"a\x00b\x00c\xfc\x00\x00"
    175193        self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
     194
     195        # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised
    176196        sin = "\xc0\x80\xc0\x81"
    177         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
     197        self.assertRaises(UnicodeDecodeError, sin.decode,
     198                          "utf-8", "test.relaxedutf8")
    178199
    179200    def test_charmapencode(self):
     
    185206        sin = u"abc"
    186207        sout = "AABBCC"
    187         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
     208        self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
    188209
    189210        sin = u"abcA"
     
    193214        sin = u"abcDEF"
    194215        sout = "AABBCCXYZXYZXYZ"
    195         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
     216        self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
    196217
    197218        charmap[ord("?")] = u"XYZ"
     
    261282        self.assertEqual(
    262283            "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
    263             u"\u3042[<92><117><51><120>]xx"
     284            u"\u3042[<92><117><51>]xxx"
    264285        )
    265286
    266287        self.assertEqual(
    267288            "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
    268             u"\u3042[<92><117><51><120><120>]"
     289            u"\u3042[<92><117><51>]xx"
    269290        )
    270291
     
    326347        # check with the correct number and type of arguments
    327348        exc = exctype(*args)
    328         self.assertEquals(str(exc), msg)
     349        self.assertEqual(str(exc), msg)
    329350
    330351    def test_unicodeencodeerror(self):
     
    436457        )
    437458        # If the correct exception is passed in, "ignore" returns an empty replacement
    438         self.assertEquals(
     459        self.assertEqual(
    439460            codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
    440461            (u"", 1)
    441462        )
    442         self.assertEquals(
     463        self.assertEqual(
    443464            codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
    444465            (u"", 1)
    445466        )
    446         self.assertEquals(
     467        self.assertEqual(
    447468            codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
    448469            (u"", 1)
     
    473494        )
    474495        # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
    475         self.assertEquals(
     496        self.assertEqual(
    476497            codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
    477498            (u"?", 1)
    478499        )
    479         self.assertEquals(
     500        self.assertEqual(
    480501            codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
    481502            (u"\ufffd", 1)
    482503        )
    483         self.assertEquals(
     504        self.assertEqual(
    484505            codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
    485506            (u"\ufffd", 1)
     
    513534        cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
    514535        s = "".join(unichr(c) for c in cs)
    515         self.assertEquals(
     536        self.assertEqual(
    516537            codecs.xmlcharrefreplace_errors(
    517538                UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
     
    545566        )
    546567        # Use the correct exception
    547         self.assertEquals(
     568        self.assertEqual(
    548569            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
    549570            (u"\\u3042", 1)
    550571        )
    551         self.assertEquals(
     572        self.assertEqual(
    552573            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
    553574            (u"\\x00", 1)
    554575        )
    555         self.assertEquals(
     576        self.assertEqual(
    556577            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
    557578            (u"\\xff", 1)
    558579        )
    559         self.assertEquals(
     580        self.assertEqual(
    560581            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
    561582            (u"\\u0100", 1)
    562583        )
    563         self.assertEquals(
     584        self.assertEqual(
    564585            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
    565586            (u"\\uffff", 1)
    566587        )
    567588        if sys.maxunicode>0xffff:
    568             self.assertEquals(
     589            self.assertEqual(
    569590                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
    570591                (u"\\U00010000", 1)
    571592            )
    572             self.assertEquals(
     593            self.assertEqual(
    573594                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
    574595                (u"\\U0010ffff", 1)
     
    580601
    581602        for res in results:
    582             codecs.register_error("test.badhandler", lambda: res)
     603            codecs.register_error("test.badhandler", lambda x: res)
    583604            for enc in encs:
    584605                self.assertRaises(
     
    602623
    603624    def test_lookup(self):
    604         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
    605         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
    606         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
    607         self.assertEquals(
     625        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
     626        self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore"))
     627        self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
     628        self.assertEqual(
    608629            codecs.xmlcharrefreplace_errors,
    609630            codecs.lookup_error("xmlcharrefreplace")
    610631        )
    611         self.assertEquals(
     632        self.assertEqual(
    612633            codecs.backslashreplace_errors,
    613634            codecs.lookup_error("backslashreplace")
     
    685706        # Valid negative position
    686707        handler.pos = -1
    687         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
     708        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
    688709
    689710        # Valid negative position
    690711        handler.pos = -2
    691         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
     712        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
    692713
    693714        # Negative position out of bounds
     
    697718        # Valid positive position
    698719        handler.pos = 1
    699         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
     720        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
    700721
    701722        # Largest valid positive position (one beyond end of input)
    702723        handler.pos = 2
    703         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
     724        self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>")
    704725
    705726        # Invalid positive position
     
    709730        # Restart at the "0"
    710731        handler.pos = 6
    711         self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
     732        self.assertEqual("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
    712733
    713734        class D(dict):
     
    716737        self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
    717738        self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
    718         self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
     739        self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: 0x110000})
    719740
    720741    def test_encodehelper(self):
     
    739760        # Valid negative position
    740761        handler.pos = -1
    741         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
     762        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
    742763
    743764        # Valid negative position
    744765        handler.pos = -2
    745         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
     766        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
    746767
    747768        # Negative position out of bounds
     
    751772        # Valid positive position
    752773        handler.pos = 1
    753         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
     774        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
    754775
    755776        # Largest valid positive position (one beyond end of input
    756777        handler.pos = 2
    757         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
     778        self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
    758779
    759780        # Invalid positive position
Note: See TracChangeset for help on using the changeset viewer.