Changeset 391 for python/trunk/Lib/test/test_codeccallbacks.py
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/test/test_codeccallbacks.py
r2 r391 67 67 # For ascii, latin-1 and charmaps this is completely implemented 68 68 # in C and should be reasonably fast. 69 s = u"\u30b9\u30d1\u30e2 \xe4nd egg s"69 s = u"\u30b9\u30d1\u30e2 \xe4nd egg\u0161" 70 70 self.assertEqual( 71 71 s.encode("ascii", "xmlcharrefreplace"), 72 "スパモ änd egg s"72 "スパモ änd eggš" 73 73 ) 74 74 self.assertEqual( 75 75 s.encode("latin-1", "xmlcharrefreplace"), 76 "スパモ \xe4nd eggs" 77 ) 76 "スパモ \xe4nd eggš" 77 ) 78 self.assertEqual( 79 s.encode("iso-8859-15", "xmlcharrefreplace"), 80 "スパモ \xe4nd egg\xa8" 81 ) 82 83 def test_xmlcharrefreplace_with_surrogates(self): 84 tests = [(u'\U0001f49d', '💝'), 85 (u'\ud83d', '�'), 86 (u'\udc9d', '�'), 87 ] 88 if u'\ud83d\udc9d' != u'\U0001f49d': 89 tests += [(u'\ud83d\udc9d', '��')] 90 for encoding in ['ascii', 'latin1', 'iso-8859-15']: 91 for s, exp in tests: 92 self.assertEqual(s.encode(encoding, 'xmlcharrefreplace'), 93 exp, msg='%r.encode(%r)' % (s, encoding)) 94 self.assertEqual((s+'X').encode(encoding, 'xmlcharrefreplace'), 95 exp+'X', 96 msg='%r.encode(%r)' % (s + 'X', encoding)) 78 97 79 98 def test_xmlcharnamereplace(self): … … 154 173 self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) 155 174 156 def test_decoderelaxedutf8(self): 157 # This is the test for a decoding callback handler, 158 # that relaxes the UTF-8 minimal encoding restriction. 159 # A null byte that is encoded as "\xc0\x80" will be 160 # decoded as a null byte. All other illegal sequences 161 # will be handled strictly. 175 def test_decoding_callbacks(self): 176 # This is a test for a decoding callback handler 177 # that allows the decoding of the invalid sequence 178 # "\xc0\x80" and returns "\x00" instead of raising an error. 179 # All other illegal sequences will be handled strictly. 162 180 def relaxedutf8(exc): 163 181 if not isinstance(exc, UnicodeDecodeError): 164 182 raise TypeError("don't know how to handle %r" % exc) 165 if exc.object[exc.start:exc. end].startswith("\xc0\x80"):183 if exc.object[exc.start:exc.start+2] == "\xc0\x80": 166 184 return (u"\x00", exc.start+2) # retry after two bytes 167 185 else: 168 186 raise exc 169 187 170 codecs.register_error( 171 "test.relaxedutf8", relaxedutf8) 172 188 codecs.register_error("test.relaxedutf8", relaxedutf8) 189 190 # all the "\xc0\x80" will be decoded to "\x00" 173 191 sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" 174 192 sout = u"a\x00b\x00c\xfc\x00\x00" 175 193 self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) 194 195 # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised 176 196 sin = "\xc0\x80\xc0\x81" 177 self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8") 197 self.assertRaises(UnicodeDecodeError, sin.decode, 198 "utf-8", "test.relaxedutf8") 178 199 179 200 def test_charmapencode(self): … … 185 206 sin = u"abc" 186 207 sout = "AABBCC" 187 self.assertEqual s(codecs.charmap_encode(sin, "strict", charmap)[0], sout)208 self.assertEqual(codecs.charmap_encode(sin, "strict", charmap)[0], sout) 188 209 189 210 sin = u"abcA" … … 193 214 sin = u"abcDEF" 194 215 sout = "AABBCCXYZXYZXYZ" 195 self.assertEqual s(codecs.charmap_encode(sin, "replace", charmap)[0], sout)216 self.assertEqual(codecs.charmap_encode(sin, "replace", charmap)[0], sout) 196 217 197 218 charmap[ord("?")] = u"XYZ" … … 261 282 self.assertEqual( 262 283 "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), 263 u"\u3042[<92><117><51> <120>]xx"284 u"\u3042[<92><117><51>]xxx" 264 285 ) 265 286 266 287 self.assertEqual( 267 288 "\\u3042\u3xx".decode("unicode-escape", "test.handler1"), 268 u"\u3042[<92><117><51> <120><120>]"289 u"\u3042[<92><117><51>]xx" 269 290 ) 270 291 … … 326 347 # check with the correct number and type of arguments 327 348 exc = exctype(*args) 328 self.assertEqual s(str(exc), msg)349 self.assertEqual(str(exc), msg) 329 350 330 351 def test_unicodeencodeerror(self): … … 436 457 ) 437 458 # If the correct exception is passed in, "ignore" returns an empty replacement 438 self.assertEqual s(459 self.assertEqual( 439 460 codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 440 461 (u"", 1) 441 462 ) 442 self.assertEqual s(463 self.assertEqual( 443 464 codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 444 465 (u"", 1) 445 466 ) 446 self.assertEqual s(467 self.assertEqual( 447 468 codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 448 469 (u"", 1) … … 473 494 ) 474 495 # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement 475 self.assertEqual s(496 self.assertEqual( 476 497 codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 477 498 (u"?", 1) 478 499 ) 479 self.assertEqual s(500 self.assertEqual( 480 501 codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 481 502 (u"\ufffd", 1) 482 503 ) 483 self.assertEqual s(504 self.assertEqual( 484 505 codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 485 506 (u"\ufffd", 1) … … 513 534 cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042) 514 535 s = "".join(unichr(c) for c in cs) 515 self.assertEqual s(536 self.assertEqual( 516 537 codecs.xmlcharrefreplace_errors( 517 538 UnicodeEncodeError("ascii", s, 0, len(s), "ouch") … … 545 566 ) 546 567 # Use the correct exception 547 self.assertEqual s(568 self.assertEqual( 548 569 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 549 570 (u"\\u3042", 1) 550 571 ) 551 self.assertEqual s(572 self.assertEqual( 552 573 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), 553 574 (u"\\x00", 1) 554 575 ) 555 self.assertEqual s(576 self.assertEqual( 556 577 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), 557 578 (u"\\xff", 1) 558 579 ) 559 self.assertEqual s(580 self.assertEqual( 560 581 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), 561 582 (u"\\u0100", 1) 562 583 ) 563 self.assertEqual s(584 self.assertEqual( 564 585 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), 565 586 (u"\\uffff", 1) 566 587 ) 567 588 if sys.maxunicode>0xffff: 568 self.assertEqual s(589 self.assertEqual( 569 590 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")), 570 591 (u"\\U00010000", 1) 571 592 ) 572 self.assertEqual s(593 self.assertEqual( 573 594 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")), 574 595 (u"\\U0010ffff", 1) … … 580 601 581 602 for res in results: 582 codecs.register_error("test.badhandler", lambda : res)603 codecs.register_error("test.badhandler", lambda x: res) 583 604 for enc in encs: 584 605 self.assertRaises( … … 602 623 603 624 def test_lookup(self): 604 self.assertEqual s(codecs.strict_errors, codecs.lookup_error("strict"))605 self.assertEqual s(codecs.ignore_errors, codecs.lookup_error("ignore"))606 self.assertEqual s(codecs.strict_errors, codecs.lookup_error("strict"))607 self.assertEqual s(625 self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 626 self.assertEqual(codecs.ignore_errors, codecs.lookup_error("ignore")) 627 self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict")) 628 self.assertEqual( 608 629 codecs.xmlcharrefreplace_errors, 609 630 codecs.lookup_error("xmlcharrefreplace") 610 631 ) 611 self.assertEqual s(632 self.assertEqual( 612 633 codecs.backslashreplace_errors, 613 634 codecs.lookup_error("backslashreplace") … … 685 706 # Valid negative position 686 707 handler.pos = -1 687 self.assertEqual s("\xff0".decode("ascii", "test.posreturn"), u"<?>0")708 self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 688 709 689 710 # Valid negative position 690 711 handler.pos = -2 691 self.assertEqual s("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")712 self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?><?>") 692 713 693 714 # Negative position out of bounds … … 697 718 # Valid positive position 698 719 handler.pos = 1 699 self.assertEqual s("\xff0".decode("ascii", "test.posreturn"), u"<?>0")720 self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 700 721 701 722 # Largest valid positive position (one beyond end of input) 702 723 handler.pos = 2 703 self.assertEqual s("\xff0".decode("ascii", "test.posreturn"), u"<?>")724 self.assertEqual("\xff0".decode("ascii", "test.posreturn"), u"<?>") 704 725 705 726 # Invalid positive position … … 709 730 # Restart at the "0" 710 731 handler.pos = 6 711 self.assertEqual s("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")732 self.assertEqual("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0") 712 733 713 734 class D(dict): … … 716 737 self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None}) 717 738 self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D()) 718 self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})739 self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: 0x110000}) 719 740 720 741 def test_encodehelper(self): … … 739 760 # Valid negative position 740 761 handler.pos = -1 741 self.assertEqual s(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")762 self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 742 763 743 764 # Valid negative position 744 765 handler.pos = -2 745 self.assertEqual s(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")766 self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>") 746 767 747 768 # Negative position out of bounds … … 751 772 # Valid positive position 752 773 handler.pos = 1 753 self.assertEqual s(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")774 self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 754 775 755 776 # Largest valid positive position (one beyond end of input 756 777 handler.pos = 2 757 self.assertEqual s(u"\xff0".encode("ascii", "test.posreturn"), "<?>")778 self.assertEqual(u"\xff0".encode("ascii", "test.posreturn"), "<?>") 758 779 759 780 # Invalid positive position
Note:
See TracChangeset
for help on using the changeset viewer.