Changeset 391 for python/trunk/Lib/test/test_codecs.py
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/test/test_codecs.py
r2 r391 2 2 import unittest 3 3 import codecs 4 import locale 4 5 import sys, StringIO, _testcapi 6 7 def coding_checker(self, coder): 8 def check(input, expect): 9 self.assertEqual(coder(input), (expect, len(input))) 10 return check 5 11 6 12 class Queue(object): … … 264 270 d = s.getvalue() 265 271 # check whether there is exactly one BOM in it 266 self.assert _(d == self.spamle or d == self.spambe)272 self.assertTrue(d == self.spamle or d == self.spambe) 267 273 # try to read it back 268 274 s = StringIO.StringIO(d) 269 275 f = reader(s) 270 self.assertEqual s(f.read(), u"spamspam")276 self.assertEqual(f.read(), u"spamspam") 271 277 272 278 def test_badbom(self): … … 281 287 def test_partial(self): 282 288 self.check_partial( 283 u"\x00\xff\u0100\uffff ",289 u"\x00\xff\u0100\uffff\U00010000", 284 290 [ 285 291 u"", # first byte of BOM read … … 303 309 u"\x00\xff\u0100", 304 310 u"\x00\xff\u0100\uffff", 311 u"\x00\xff\u0100\uffff", 312 u"\x00\xff\u0100\uffff", 313 u"\x00\xff\u0100\uffff", 314 u"\x00\xff\u0100\uffff\U00010000", 305 315 ] 306 316 ) … … 316 326 "\xff", "strict", True) 317 327 328 def test_issue8941(self): 329 # Issue #8941: insufficient result allocation when decoding into 330 # surrogate pairs on UCS-2 builds. 331 encoded_le = '\xff\xfe\x00\x00' + '\x00\x00\x01\x00' * 1024 332 self.assertEqual(u'\U00010000' * 1024, 333 codecs.utf_32_decode(encoded_le)[0]) 334 encoded_be = '\x00\x00\xfe\xff' + '\x00\x01\x00\x00' * 1024 335 self.assertEqual(u'\U00010000' * 1024, 336 codecs.utf_32_decode(encoded_be)[0]) 337 318 338 class UTF32LETest(ReadTest): 319 339 encoding = "utf-32-le" … … 321 341 def test_partial(self): 322 342 self.check_partial( 323 u"\x00\xff\u0100\uffff ",343 u"\x00\xff\u0100\uffff\U00010000", 324 344 [ 325 345 u"", … … 339 359 u"\x00\xff\u0100", 340 360 u"\x00\xff\u0100\uffff", 361 u"\x00\xff\u0100\uffff", 362 u"\x00\xff\u0100\uffff", 363 u"\x00\xff\u0100\uffff", 364 u"\x00\xff\u0100\uffff\U00010000", 341 365 ] 342 366 ) … … 349 373 "\xff", "strict", True) 350 374 375 def test_issue8941(self): 376 # Issue #8941: insufficient result allocation when decoding into 377 # surrogate pairs on UCS-2 builds. 378 encoded = '\x00\x00\x01\x00' * 1024 379 self.assertEqual(u'\U00010000' * 1024, 380 codecs.utf_32_le_decode(encoded)[0]) 381 351 382 class UTF32BETest(ReadTest): 352 383 encoding = "utf-32-be" … … 354 385 def test_partial(self): 355 386 self.check_partial( 356 u"\x00\xff\u0100\uffff ",387 u"\x00\xff\u0100\uffff\U00010000", 357 388 [ 358 389 u"", … … 372 403 u"\x00\xff\u0100", 373 404 u"\x00\xff\u0100\uffff", 405 u"\x00\xff\u0100\uffff", 406 u"\x00\xff\u0100\uffff", 407 u"\x00\xff\u0100\uffff", 408 u"\x00\xff\u0100\uffff\U00010000", 374 409 ] 375 410 ) … … 381 416 self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode, 382 417 "\xff", "strict", True) 418 419 def test_issue8941(self): 420 # Issue #8941: insufficient result allocation when decoding into 421 # surrogate pairs on UCS-2 builds. 422 encoded = '\x00\x01\x00\x00' * 1024 423 self.assertEqual(u'\U00010000' * 1024, 424 codecs.utf_32_be_decode(encoded)[0]) 425 383 426 384 427 class UTF16Test(ReadTest): … … 397 440 d = s.getvalue() 398 441 # check whether there is exactly one BOM in it 399 self.assert _(d == self.spamle or d == self.spambe)442 self.assertTrue(d == self.spamle or d == self.spambe) 400 443 # try to read it back 401 444 s = StringIO.StringIO(d) 402 445 f = reader(s) 403 self.assertEqual s(f.read(), u"spamspam")446 self.assertEqual(f.read(), u"spamspam") 404 447 405 448 def test_badbom(self): … … 414 457 def test_partial(self): 415 458 self.check_partial( 416 u"\x00\xff\u0100\uffff ",459 u"\x00\xff\u0100\uffff\U00010000", 417 460 [ 418 461 u"", # first byte of BOM read … … 426 469 u"\x00\xff\u0100", 427 470 u"\x00\xff\u0100\uffff", 471 u"\x00\xff\u0100\uffff", 472 u"\x00\xff\u0100\uffff", 473 u"\x00\xff\u0100\uffff", 474 u"\x00\xff\u0100\uffff\U00010000", 428 475 ] 429 476 ) … … 445 492 446 493 s = s1.encode(self.encoding) 447 try: 448 with open(test_support.TESTFN, 'wb') as fp: 449 fp.write(s) 450 with codecs.open(test_support.TESTFN, 'U', encoding=self.encoding) as reader: 451 self.assertEqual(reader.read(), s1) 452 finally: 453 test_support.unlink(test_support.TESTFN) 494 self.addCleanup(test_support.unlink, test_support.TESTFN) 495 with open(test_support.TESTFN, 'wb') as fp: 496 fp.write(s) 497 with codecs.open(test_support.TESTFN, 'U', encoding=self.encoding) as reader: 498 self.assertEqual(reader.read(), s1) 454 499 455 500 class UTF16LETest(ReadTest): … … 458 503 def test_partial(self): 459 504 self.check_partial( 460 u"\x00\xff\u0100\uffff ",505 u"\x00\xff\u0100\uffff\U00010000", 461 506 [ 462 507 u"", … … 468 513 u"\x00\xff\u0100", 469 514 u"\x00\xff\u0100\uffff", 515 u"\x00\xff\u0100\uffff", 516 u"\x00\xff\u0100\uffff", 517 u"\x00\xff\u0100\uffff", 518 u"\x00\xff\u0100\uffff\U00010000", 470 519 ] 471 520 ) 472 521 473 522 def test_errors(self): 474 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, "\xff", "strict", True) 523 tests = [ 524 (b'\xff', u'\ufffd'), 525 (b'A\x00Z', u'A\ufffd'), 526 (b'A\x00B\x00C\x00D\x00Z', u'ABCD\ufffd'), 527 (b'\x00\xd8', u'\ufffd'), 528 (b'\x00\xd8A', u'\ufffd'), 529 (b'\x00\xd8A\x00', u'\ufffdA'), 530 (b'\x00\xdcA\x00', u'\ufffdA'), 531 ] 532 for raw, expected in tests: 533 self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, 534 raw, 'strict', True) 535 self.assertEqual(raw.decode('utf-16le', 'replace'), expected) 475 536 476 537 class UTF16BETest(ReadTest): … … 479 540 def test_partial(self): 480 541 self.check_partial( 481 u"\x00\xff\u0100\uffff ",542 u"\x00\xff\u0100\uffff\U00010000", 482 543 [ 483 544 u"", … … 489 550 u"\x00\xff\u0100", 490 551 u"\x00\xff\u0100\uffff", 552 u"\x00\xff\u0100\uffff", 553 u"\x00\xff\u0100\uffff", 554 u"\x00\xff\u0100\uffff", 555 u"\x00\xff\u0100\uffff\U00010000", 491 556 ] 492 557 ) 493 558 494 559 def test_errors(self): 495 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, "\xff", "strict", True) 560 tests = [ 561 (b'\xff', u'\ufffd'), 562 (b'\x00A\xff', u'A\ufffd'), 563 (b'\x00A\x00B\x00C\x00DZ', u'ABCD\ufffd'), 564 (b'\xd8\x00', u'\ufffd'), 565 (b'\xd8\x00\xdc', u'\ufffd'), 566 (b'\xd8\x00\x00A', u'\ufffdA'), 567 (b'\xdc\x00\x00A', u'\ufffdA'), 568 ] 569 for raw, expected in tests: 570 self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, 571 raw, 'strict', True) 572 self.assertEqual(raw.decode('utf-16be', 'replace'), expected) 496 573 497 574 class UTF8Test(ReadTest): … … 500 577 def test_partial(self): 501 578 self.check_partial( 502 u"\x00\xff\u07ff\u0800\uffff ",579 u"\x00\xff\u07ff\u0800\uffff\U00010000", 503 580 [ 504 581 u"\x00", … … 513 590 u"\x00\xff\u07ff\u0800", 514 591 u"\x00\xff\u07ff\u0800\uffff", 592 u"\x00\xff\u07ff\u0800\uffff", 593 u"\x00\xff\u07ff\u0800\uffff", 594 u"\x00\xff\u07ff\u0800\uffff", 595 u"\x00\xff\u07ff\u0800\uffff\U00010000", 515 596 ] 516 597 ) … … 531 612 ) 532 613 614 def test_errors(self): 615 tests = [ 616 ('a\xffb', u'a\ufffdb'), 617 ('a+IK', u'a\ufffd'), 618 ('a+IK-b', u'a\ufffdb'), 619 ('a+IK,b', u'a\ufffdb'), 620 ('a+IKx', u'a\u20ac\ufffd'), 621 ('a+IKx-b', u'a\u20ac\ufffdb'), 622 ('a+IKwgr', u'a\u20ac\ufffd'), 623 ('a+IKwgr-b', u'a\u20ac\ufffdb'), 624 ('a+IKwgr,', u'a\u20ac\ufffd'), 625 ('a+IKwgr,-b', u'a\u20ac\ufffd-b'), 626 ('a+IKwgrB', u'a\u20ac\u20ac\ufffd'), 627 ('a+IKwgrB-b', u'a\u20ac\u20ac\ufffdb'), 628 ('a+/,+IKw-b', u'a\ufffd\u20acb'), 629 ('a+//,+IKw-b', u'a\ufffd\u20acb'), 630 ('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'), 631 ('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'), 632 ] 633 for raw, expected in tests: 634 self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode, 635 raw, 'strict', True) 636 self.assertEqual(raw.decode('utf-7', 'replace'), expected) 637 638 def test_nonbmp(self): 639 self.assertEqual(u'\U000104A0'.encode(self.encoding), '+2AHcoA-') 640 self.assertEqual(u'\ud801\udca0'.encode(self.encoding), '+2AHcoA-') 641 self.assertEqual('+2AHcoA-'.decode(self.encoding), u'\U000104A0') 642 533 643 class UTF16ExTest(unittest.TestCase): 534 644 … … 572 682 def test_partial(self): 573 683 self.check_partial( 574 u"\ufeff\x00\xff\u07ff\u0800\uffff ",684 u"\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", 575 685 [ 576 686 u"", … … 591 701 u"\ufeff\x00\xff\u07ff\u0800", 592 702 u"\ufeff\x00\xff\u07ff\u0800\uffff", 703 u"\ufeff\x00\xff\u07ff\u0800\uffff", 704 u"\ufeff\x00\xff\u07ff\u0800\uffff", 705 u"\ufeff\x00\xff\u07ff\u0800\uffff", 706 u"\ufeff\x00\xff\u07ff\u0800\uffff\U00010000", 593 707 ] 594 708 ) … … 649 763 class EscapeDecodeTest(unittest.TestCase): 650 764 def test_empty(self): 651 self.assertEquals(codecs.escape_decode(""), ("", 0)) 765 self.assertEqual(codecs.escape_decode(""), ("", 0)) 766 767 def test_raw(self): 768 decode = codecs.escape_decode 769 for b in range(256): 770 b = chr(b) 771 if b != '\\': 772 self.assertEqual(decode(b + '0'), (b + '0', 2)) 773 774 def test_escape(self): 775 decode = codecs.escape_decode 776 check = coding_checker(self, decode) 777 check(b"[\\\n]", b"[]") 778 check(br'[\"]', b'["]') 779 check(br"[\']", b"[']") 780 check(br"[\\]", br"[\]") 781 check(br"[\a]", b"[\x07]") 782 check(br"[\b]", b"[\x08]") 783 check(br"[\t]", b"[\x09]") 784 check(br"[\n]", b"[\x0a]") 785 check(br"[\v]", b"[\x0b]") 786 check(br"[\f]", b"[\x0c]") 787 check(br"[\r]", b"[\x0d]") 788 check(br"[\7]", b"[\x07]") 789 check(br"[\8]", br"[\8]") 790 check(br"[\78]", b"[\x078]") 791 check(br"[\41]", b"[!]") 792 check(br"[\418]", b"[!8]") 793 check(br"[\101]", b"[A]") 794 check(br"[\1010]", b"[A0]") 795 check(br"[\501]", b"[A]") 796 check(br"[\x41]", b"[A]") 797 check(br"[\X41]", br"[\X41]") 798 check(br"[\x410]", b"[A0]") 799 for b in range(256): 800 b = chr(b) 801 if b not in '\n"\'\\abtnvfr01234567x': 802 check('\\' + b, '\\' + b) 803 804 def test_errors(self): 805 decode = codecs.escape_decode 806 self.assertRaises(ValueError, decode, br"\x") 807 self.assertRaises(ValueError, decode, br"[\x]") 808 self.assertEqual(decode(br"[\x]\x", "ignore"), (b"[]", 6)) 809 self.assertEqual(decode(br"[\x]\x", "replace"), (b"[?]?", 6)) 810 self.assertRaises(ValueError, decode, br"\x0") 811 self.assertRaises(ValueError, decode, br"[\x0]") 812 self.assertEqual(decode(br"[\x0]\x0", "ignore"), (b"[]", 8)) 813 self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8)) 652 814 653 815 class RecodingTest(unittest.TestCase): … … 776 938 # lower is also insufficient, since some of the input characters 777 939 # are upper case. 778 self.assertEqual s(uni.encode("punycode").lower(), puny.lower())940 self.assertEqual(uni.encode("punycode").lower(), puny.lower()) 779 941 780 942 def test_decode(self): 781 943 for uni, puny in punycode_testcases: 782 self.assertEqual s(uni, puny.decode("punycode"))944 self.assertEqual(uni, puny.decode("punycode")) 783 945 784 946 class UnicodeInternalTest(unittest.TestCase): … … 802 964 if sys.byteorder == "little": 803 965 internal = "".join(reversed(internal)) 804 self.assertEqual s(uni, internal.decode("unicode_internal"))966 self.assertEqual(uni, internal.decode("unicode_internal")) 805 967 for internal in not_ok: 806 968 if sys.byteorder == "little": … … 814 976 "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal") 815 977 except UnicodeDecodeError, ex: 816 self.assertEqual s("unicode_internal", ex.encoding)817 self.assertEqual s("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)818 self.assertEqual s(4, ex.start)819 self.assertEqual s(8, ex.end)978 self.assertEqual("unicode_internal", ex.encoding) 979 self.assertEqual("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object) 980 self.assertEqual(4, ex.start) 981 self.assertEqual(8, ex.end) 820 982 else: 821 983 self.fail() … … 828 990 ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]), 829 991 "UnicodeInternalTest") 830 self.assertEquals((u"ab", 12), ignored) 992 self.assertEqual((u"ab", 12), ignored) 993 994 def test_encode_length(self): 995 # Issue 3739 996 encoder = codecs.getencoder("unicode_internal") 997 self.assertEqual(encoder(u"a")[1], 1) 998 self.assertEqual(encoder(u"\xe9\u0142")[1], 2) 999 1000 encoder = codecs.getencoder("string-escape") 1001 self.assertEqual(encoder(r'\x00')[1], 4) 831 1002 832 1003 # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html … … 999 1170 prepped = unicode(prepped, "utf-8") 1000 1171 try: 1001 self.assertEqual s(nameprep(orig), prepped)1172 self.assertEqual(nameprep(orig), prepped) 1002 1173 except Exception,e: 1003 1174 raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e))) … … 1005 1176 class IDNACodecTest(unittest.TestCase): 1006 1177 def test_builtin_decode(self): 1007 self.assertEqual s(unicode("python.org", "idna"), u"python.org")1008 self.assertEqual s(unicode("python.org.", "idna"), u"python.org.")1009 self.assertEqual s(unicode("xn--pythn-mua.org", "idna"), u"pyth\xf6n.org")1010 self.assertEqual s(unicode("xn--pythn-mua.org.", "idna"), u"pyth\xf6n.org.")1178 self.assertEqual(unicode("python.org", "idna"), u"python.org") 1179 self.assertEqual(unicode("python.org.", "idna"), u"python.org.") 1180 self.assertEqual(unicode("xn--pythn-mua.org", "idna"), u"pyth\xf6n.org") 1181 self.assertEqual(unicode("xn--pythn-mua.org.", "idna"), u"pyth\xf6n.org.") 1011 1182 1012 1183 def test_builtin_encode(self): 1013 self.assertEqual s(u"python.org".encode("idna"), "python.org")1014 self.assertEqual s("python.org.".encode("idna"), "python.org.")1015 self.assertEqual s(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")1016 self.assertEqual s(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")1184 self.assertEqual(u"python.org".encode("idna"), "python.org") 1185 self.assertEqual("python.org.".encode("idna"), "python.org.") 1186 self.assertEqual(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org") 1187 self.assertEqual(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.") 1017 1188 1018 1189 def test_stream(self): … … 1020 1191 r = codecs.getreader("idna")(StringIO.StringIO("abc")) 1021 1192 r.read(3) 1022 self.assertEqual s(r.read(), u"")1193 self.assertEqual(r.read(), u"") 1023 1194 1024 1195 def test_incremental_decode(self): 1025 self.assertEqual s(1196 self.assertEqual( 1026 1197 "".join(codecs.iterdecode("python.org", "idna")), 1027 1198 u"python.org" 1028 1199 ) 1029 self.assertEqual s(1200 self.assertEqual( 1030 1201 "".join(codecs.iterdecode("python.org.", "idna")), 1031 1202 u"python.org." 1032 1203 ) 1033 self.assertEqual s(1204 self.assertEqual( 1034 1205 "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")), 1035 1206 u"pyth\xf6n.org." 1036 1207 ) 1037 self.assertEqual s(1208 self.assertEqual( 1038 1209 "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")), 1039 1210 u"pyth\xf6n.org." … … 1041 1212 1042 1213 decoder = codecs.getincrementaldecoder("idna")() 1043 self.assertEqual s(decoder.decode("xn--xam", ), u"")1044 self.assertEqual s(decoder.decode("ple-9ta.o", ), u"\xe4xample.")1045 self.assertEqual s(decoder.decode(u"rg"), u"")1046 self.assertEqual s(decoder.decode(u"", True), u"org")1214 self.assertEqual(decoder.decode("xn--xam", ), u"") 1215 self.assertEqual(decoder.decode("ple-9ta.o", ), u"\xe4xample.") 1216 self.assertEqual(decoder.decode(u"rg"), u"") 1217 self.assertEqual(decoder.decode(u"", True), u"org") 1047 1218 1048 1219 decoder.reset() 1049 self.assertEqual s(decoder.decode("xn--xam", ), u"")1050 self.assertEqual s(decoder.decode("ple-9ta.o", ), u"\xe4xample.")1051 self.assertEqual s(decoder.decode("rg."), u"org.")1052 self.assertEqual s(decoder.decode("", True), u"")1220 self.assertEqual(decoder.decode("xn--xam", ), u"") 1221 self.assertEqual(decoder.decode("ple-9ta.o", ), u"\xe4xample.") 1222 self.assertEqual(decoder.decode("rg."), u"org.") 1223 self.assertEqual(decoder.decode("", True), u"") 1053 1224 1054 1225 def test_incremental_encode(self): 1055 self.assertEqual s(1226 self.assertEqual( 1056 1227 "".join(codecs.iterencode(u"python.org", "idna")), 1057 1228 "python.org" 1058 1229 ) 1059 self.assertEqual s(1230 self.assertEqual( 1060 1231 "".join(codecs.iterencode(u"python.org.", "idna")), 1061 1232 "python.org." 1062 1233 ) 1063 self.assertEqual s(1234 self.assertEqual( 1064 1235 "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")), 1065 1236 "xn--pythn-mua.org." 1066 1237 ) 1067 self.assertEqual s(1238 self.assertEqual( 1068 1239 "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")), 1069 1240 "xn--pythn-mua.org." … … 1071 1242 1072 1243 encoder = codecs.getincrementalencoder("idna")() 1073 self.assertEqual s(encoder.encode(u"\xe4x"), "")1074 self.assertEqual s(encoder.encode(u"ample.org"), "xn--xample-9ta.")1075 self.assertEqual s(encoder.encode(u"", True), "org")1244 self.assertEqual(encoder.encode(u"\xe4x"), "") 1245 self.assertEqual(encoder.encode(u"ample.org"), "xn--xample-9ta.") 1246 self.assertEqual(encoder.encode(u"", True), "org") 1076 1247 1077 1248 encoder.reset() 1078 self.assertEqual s(encoder.encode(u"\xe4x"), "")1079 self.assertEqual s(encoder.encode(u"ample.org."), "xn--xample-9ta.org.")1080 self.assertEqual s(encoder.encode(u"", True), "")1249 self.assertEqual(encoder.encode(u"\xe4x"), "") 1250 self.assertEqual(encoder.encode(u"ample.org."), "xn--xample-9ta.org.") 1251 self.assertEqual(encoder.encode(u"", True), "") 1081 1252 1082 1253 class CodecsModuleTest(unittest.TestCase): 1083 1254 1084 1255 def test_decode(self): 1085 self.assertEqual s(codecs.decode('\xe4\xf6\xfc', 'latin-1'),1256 self.assertEqual(codecs.decode('\xe4\xf6\xfc', 'latin-1'), 1086 1257 u'\xe4\xf6\xfc') 1087 1258 self.assertRaises(TypeError, codecs.decode) 1088 self.assertEqual s(codecs.decode('abc'), u'abc')1259 self.assertEqual(codecs.decode('abc'), u'abc') 1089 1260 self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii') 1090 1261 1091 1262 def test_encode(self): 1092 self.assertEqual s(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),1263 self.assertEqual(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'), 1093 1264 '\xe4\xf6\xfc') 1094 1265 self.assertRaises(TypeError, codecs.encode) 1095 1266 self.assertRaises(LookupError, codecs.encode, "foo", "__spam__") 1096 self.assertEqual s(codecs.encode(u'abc'), 'abc')1267 self.assertEqual(codecs.encode(u'abc'), 'abc') 1097 1268 self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii') 1098 1269 … … 1122 1293 self.assertRaises(LookupError, codecs.getwriter, "__spam__") 1123 1294 1295 def test_lookup_issue1813(self): 1296 # Issue #1813: under Turkish locales, lookup of some codecs failed 1297 # because 'I' is lowercased as a dotless "i" 1298 oldlocale = locale.getlocale(locale.LC_CTYPE) 1299 self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale) 1300 try: 1301 locale.setlocale(locale.LC_CTYPE, 'tr_TR') 1302 except locale.Error: 1303 # Unsupported locale on this system 1304 self.skipTest('test needs Turkish locale') 1305 c = codecs.lookup('ASCII') 1306 self.assertEqual(c.name, 'ascii') 1307 1124 1308 class StreamReaderTest(unittest.TestCase): 1125 1309 … … 1130 1314 def test_readlines(self): 1131 1315 f = self.reader(self.stream) 1132 self.assertEqual s(f.readlines(), [u'\ud55c\n', u'\uae00'])1316 self.assertEqual(f.readlines(), [u'\ud55c\n', u'\uae00']) 1133 1317 1134 1318 class EncodedFileTest(unittest.TestCase): … … 1137 1321 f = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80') 1138 1322 ef = codecs.EncodedFile(f, 'utf-16-le', 'utf-8') 1139 self.assertEqual s(ef.read(), '\\\xd5\n\x00\x00\xae')1323 self.assertEqual(ef.read(), '\\\xd5\n\x00\x00\xae') 1140 1324 1141 1325 f = StringIO.StringIO() 1142 1326 ef = codecs.EncodedFile(f, 'utf-8', 'latin1') 1143 1327 ef.write('\xc3\xbc') 1144 self.assertEqual s(f.getvalue(), '\xfc')1328 self.assertEqual(f.getvalue(), '\xfc') 1145 1329 1146 1330 class Str2StrTest(unittest.TestCase): … … 1151 1335 sout = reader.read() 1152 1336 self.assertEqual(sout, "\x80") 1153 self.assert _(isinstance(sout, str))1337 self.assertIsInstance(sout, str) 1154 1338 1155 1339 def test_readline(self): … … 1158 1342 sout = reader.readline() 1159 1343 self.assertEqual(sout, "\x80") 1160 self.assert _(isinstance(sout, str))1344 self.assertIsInstance(sout, str) 1161 1345 1162 1346 all_unicode_encodings = [ … … 1182 1366 "cp437", 1183 1367 "cp500", 1368 "cp720", 1184 1369 "cp737", 1185 1370 "cp775", … … 1189 1374 "cp856", 1190 1375 "cp857", 1376 "cp858", 1191 1377 "cp860", 1192 1378 "cp861", … … 1320 1506 self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-")) 1321 1507 (bytes, size) = codecs.getencoder(encoding)(s) 1322 if encoding != "unicode_internal": 1323 self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding)) 1508 self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding)) 1324 1509 (chars, size) = codecs.getdecoder(encoding)(bytes) 1325 1510 self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding)) … … 1444 1629 class CharmapTest(unittest.TestCase): 1445 1630 def test_decode_with_string_map(self): 1446 self.assertEqual s(1631 self.assertEqual( 1447 1632 codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"), 1448 1633 (u"abc", 3) 1449 1634 ) 1450 1635 1451 self.assertEquals( 1636 self.assertRaises(UnicodeDecodeError, 1637 codecs.charmap_decode, b"\x00\x01\x02", "strict", u"ab" 1638 ) 1639 1640 self.assertRaises(UnicodeDecodeError, 1641 codecs.charmap_decode, "\x00\x01\x02", "strict", u"ab\ufffe" 1642 ) 1643 1644 self.assertEqual( 1452 1645 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"), 1453 1646 (u"ab\ufffd", 3) 1454 1647 ) 1455 1648 1456 self.assertEqual s(1649 self.assertEqual( 1457 1650 codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"), 1458 1651 (u"ab\ufffd", 3) 1459 1652 ) 1460 1653 1461 self.assertEqual s(1654 self.assertEqual( 1462 1655 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"), 1463 1656 (u"ab", 3) 1464 1657 ) 1465 1658 1466 self.assertEqual s(1659 self.assertEqual( 1467 1660 codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"), 1468 1661 (u"ab", 3) … … 1470 1663 1471 1664 allbytes = "".join(chr(i) for i in xrange(256)) 1472 self.assertEqual s(1665 self.assertEqual( 1473 1666 codecs.charmap_decode(allbytes, "ignore", u""), 1474 1667 (u"", len(allbytes)) 1475 1668 ) 1669 1670 def test_decode_with_int2str_map(self): 1671 self.assertEqual( 1672 codecs.charmap_decode("\x00\x01\x02", "strict", 1673 {0: u'a', 1: u'b', 2: u'c'}), 1674 (u"abc", 3) 1675 ) 1676 1677 self.assertEqual( 1678 codecs.charmap_decode("\x00\x01\x02", "strict", 1679 {0: u'Aa', 1: u'Bb', 2: u'Cc'}), 1680 (u"AaBbCc", 3) 1681 ) 1682 1683 self.assertEqual( 1684 codecs.charmap_decode("\x00\x01\x02", "strict", 1685 {0: u'\U0010FFFF', 1: u'b', 2: u'c'}), 1686 (u"\U0010FFFFbc", 3) 1687 ) 1688 1689 self.assertEqual( 1690 codecs.charmap_decode("\x00\x01\x02", "strict", 1691 {0: u'a', 1: u'b', 2: u''}), 1692 (u"ab", 3) 1693 ) 1694 1695 self.assertRaises(UnicodeDecodeError, 1696 codecs.charmap_decode, "\x00\x01\x02", "strict", 1697 {0: u'a', 1: u'b'} 1698 ) 1699 1700 self.assertRaises(UnicodeDecodeError, 1701 codecs.charmap_decode, "\x00\x01\x02", "strict", 1702 {0: u'a', 1: u'b', 2: None} 1703 ) 1704 1705 # Issue #14850 1706 self.assertRaises(UnicodeDecodeError, 1707 codecs.charmap_decode, "\x00\x01\x02", "strict", 1708 {0: u'a', 1: u'b', 2: u'\ufffe'} 1709 ) 1710 1711 self.assertEqual( 1712 codecs.charmap_decode("\x00\x01\x02", "replace", 1713 {0: u'a', 1: u'b'}), 1714 (u"ab\ufffd", 3) 1715 ) 1716 1717 self.assertEqual( 1718 codecs.charmap_decode("\x00\x01\x02", "replace", 1719 {0: u'a', 1: u'b', 2: None}), 1720 (u"ab\ufffd", 3) 1721 ) 1722 1723 # Issue #14850 1724 self.assertEqual( 1725 codecs.charmap_decode("\x00\x01\x02", "replace", 1726 {0: u'a', 1: u'b', 2: u'\ufffe'}), 1727 (u"ab\ufffd", 3) 1728 ) 1729 1730 self.assertEqual( 1731 codecs.charmap_decode("\x00\x01\x02", "ignore", 1732 {0: u'a', 1: u'b'}), 1733 (u"ab", 3) 1734 ) 1735 1736 self.assertEqual( 1737 codecs.charmap_decode("\x00\x01\x02", "ignore", 1738 {0: u'a', 1: u'b', 2: None}), 1739 (u"ab", 3) 1740 ) 1741 1742 # Issue #14850 1743 self.assertEqual( 1744 codecs.charmap_decode("\x00\x01\x02", "ignore", 1745 {0: u'a', 1: u'b', 2: u'\ufffe'}), 1746 (u"ab", 3) 1747 ) 1748 1749 allbytes = "".join(chr(i) for i in xrange(256)) 1750 self.assertEqual( 1751 codecs.charmap_decode(allbytes, "ignore", {}), 1752 (u"", len(allbytes)) 1753 ) 1754 1755 def test_decode_with_int2int_map(self): 1756 a = ord(u'a') 1757 b = ord(u'b') 1758 c = ord(u'c') 1759 1760 self.assertEqual( 1761 codecs.charmap_decode("\x00\x01\x02", "strict", 1762 {0: a, 1: b, 2: c}), 1763 (u"abc", 3) 1764 ) 1765 1766 # Issue #15379 1767 self.assertEqual( 1768 codecs.charmap_decode("\x00\x01\x02", "strict", 1769 {0: 0x10FFFF, 1: b, 2: c}), 1770 (u"\U0010FFFFbc", 3) 1771 ) 1772 1773 self.assertRaises(TypeError, 1774 codecs.charmap_decode, "\x00\x01\x02", "strict", 1775 {0: 0x110000, 1: b, 2: c} 1776 ) 1777 1778 self.assertRaises(UnicodeDecodeError, 1779 codecs.charmap_decode, "\x00\x01\x02", "strict", 1780 {0: a, 1: b}, 1781 ) 1782 1783 self.assertRaises(UnicodeDecodeError, 1784 codecs.charmap_decode, "\x00\x01\x02", "strict", 1785 {0: a, 1: b, 2: 0xFFFE}, 1786 ) 1787 1788 self.assertEqual( 1789 codecs.charmap_decode("\x00\x01\x02", "replace", 1790 {0: a, 1: b}), 1791 (u"ab\ufffd", 3) 1792 ) 1793 1794 self.assertEqual( 1795 codecs.charmap_decode("\x00\x01\x02", "replace", 1796 {0: a, 1: b, 2: 0xFFFE}), 1797 (u"ab\ufffd", 3) 1798 ) 1799 1800 self.assertEqual( 1801 codecs.charmap_decode("\x00\x01\x02", "ignore", 1802 {0: a, 1: b}), 1803 (u"ab", 3) 1804 ) 1805 1806 self.assertEqual( 1807 codecs.charmap_decode("\x00\x01\x02", "ignore", 1808 {0: a, 1: b, 2: 0xFFFE}), 1809 (u"ab", 3) 1810 ) 1811 1476 1812 1477 1813 class WithStmtTest(unittest.TestCase): … … 1479 1815 f = StringIO.StringIO("\xc3\xbc") 1480 1816 with codecs.EncodedFile(f, "latin-1", "utf-8") as ef: 1481 self.assertEqual s(ef.read(), "\xfc")1817 self.assertEqual(ef.read(), "\xfc") 1482 1818 1483 1819 def test_streamreaderwriter(self): … … 1486 1822 with codecs.StreamReaderWriter(f, info.streamreader, 1487 1823 info.streamwriter, 'strict') as srw: 1488 self.assertEquals(srw.read(), u"\xfc") 1824 self.assertEqual(srw.read(), u"\xfc") 1825 1826 1827 class UnicodeEscapeTest(unittest.TestCase): 1828 def test_empty(self): 1829 self.assertEqual(codecs.unicode_escape_encode(u""), ("", 0)) 1830 self.assertEqual(codecs.unicode_escape_decode(""), (u"", 0)) 1831 1832 def test_raw_encode(self): 1833 encode = codecs.unicode_escape_encode 1834 for b in range(32, 127): 1835 if b != ord('\\'): 1836 self.assertEqual(encode(unichr(b)), (chr(b), 1)) 1837 1838 def test_raw_decode(self): 1839 decode = codecs.unicode_escape_decode 1840 for b in range(256): 1841 if b != ord('\\'): 1842 self.assertEqual(decode(chr(b) + '0'), (unichr(b) + u'0', 2)) 1843 1844 def test_escape_encode(self): 1845 encode = codecs.unicode_escape_encode 1846 check = coding_checker(self, encode) 1847 check(u'\t', r'\t') 1848 check(u'\n', r'\n') 1849 check(u'\r', r'\r') 1850 check(u'\\', r'\\') 1851 for b in range(32): 1852 if chr(b) not in '\t\n\r': 1853 check(unichr(b), '\\x%02x' % b) 1854 for b in range(127, 256): 1855 check(unichr(b), '\\x%02x' % b) 1856 check(u'\u20ac', r'\u20ac') 1857 check(u'\U0001d120', r'\U0001d120') 1858 1859 def test_escape_decode(self): 1860 decode = codecs.unicode_escape_decode 1861 check = coding_checker(self, decode) 1862 check("[\\\n]", u"[]") 1863 check(r'[\"]', u'["]') 1864 check(r"[\']", u"[']") 1865 check(r"[\\]", ur"[\]") 1866 check(r"[\a]", u"[\x07]") 1867 check(r"[\b]", u"[\x08]") 1868 check(r"[\t]", u"[\x09]") 1869 check(r"[\n]", u"[\x0a]") 1870 check(r"[\v]", u"[\x0b]") 1871 check(r"[\f]", u"[\x0c]") 1872 check(r"[\r]", u"[\x0d]") 1873 check(r"[\7]", u"[\x07]") 1874 check(r"[\8]", ur"[\8]") 1875 check(r"[\78]", u"[\x078]") 1876 check(r"[\41]", u"[!]") 1877 check(r"[\418]", u"[!8]") 1878 check(r"[\101]", u"[A]") 1879 check(r"[\1010]", u"[A0]") 1880 check(r"[\x41]", u"[A]") 1881 check(r"[\x410]", u"[A0]") 1882 check(r"\u20ac", u"\u20ac") 1883 check(r"\U0001d120", u"\U0001d120") 1884 for b in range(256): 1885 if chr(b) not in '\n"\'\\abtnvfr01234567xuUN': 1886 check('\\' + chr(b), u'\\' + unichr(b)) 1887 1888 def test_decode_errors(self): 1889 decode = codecs.unicode_escape_decode 1890 for c, d in ('x', 2), ('u', 4), ('U', 4): 1891 for i in range(d): 1892 self.assertRaises(UnicodeDecodeError, decode, 1893 "\\" + c + "0"*i) 1894 self.assertRaises(UnicodeDecodeError, decode, 1895 "[\\" + c + "0"*i + "]") 1896 data = "[\\" + c + "0"*i + "]\\" + c + "0"*i 1897 self.assertEqual(decode(data, "ignore"), (u"[]", len(data))) 1898 self.assertEqual(decode(data, "replace"), 1899 (u"[\ufffd]\ufffd", len(data))) 1900 self.assertRaises(UnicodeDecodeError, decode, r"\U00110000") 1901 self.assertEqual(decode(r"\U00110000", "ignore"), (u"", 10)) 1902 self.assertEqual(decode(r"\U00110000", "replace"), (u"\ufffd", 10)) 1903 1904 1905 class RawUnicodeEscapeTest(unittest.TestCase): 1906 def test_empty(self): 1907 self.assertEqual(codecs.raw_unicode_escape_encode(u""), ("", 0)) 1908 self.assertEqual(codecs.raw_unicode_escape_decode(""), (u"", 0)) 1909 1910 def test_raw_encode(self): 1911 encode = codecs.raw_unicode_escape_encode 1912 for b in range(256): 1913 self.assertEqual(encode(unichr(b)), (chr(b), 1)) 1914 1915 def test_raw_decode(self): 1916 decode = codecs.raw_unicode_escape_decode 1917 for b in range(256): 1918 self.assertEqual(decode(chr(b) + '0'), (unichr(b) + u'0', 2)) 1919 1920 def test_escape_encode(self): 1921 encode = codecs.raw_unicode_escape_encode 1922 check = coding_checker(self, encode) 1923 for b in range(256): 1924 if chr(b) not in 'uU': 1925 check(u'\\' + unichr(b), '\\' + chr(b)) 1926 check(u'\u20ac', r'\u20ac') 1927 check(u'\U0001d120', r'\U0001d120') 1928 1929 def test_escape_decode(self): 1930 decode = codecs.raw_unicode_escape_decode 1931 check = coding_checker(self, decode) 1932 for b in range(256): 1933 if chr(b) not in 'uU': 1934 check('\\' + chr(b), u'\\' + unichr(b)) 1935 check(r"\u20ac", u"\u20ac") 1936 check(r"\U0001d120", u"\U0001d120") 1937 1938 def test_decode_errors(self): 1939 decode = codecs.raw_unicode_escape_decode 1940 for c, d in ('u', 4), ('U', 4): 1941 for i in range(d): 1942 self.assertRaises(UnicodeDecodeError, decode, 1943 "\\" + c + "0"*i) 1944 self.assertRaises(UnicodeDecodeError, decode, 1945 "[\\" + c + "0"*i + "]") 1946 data = "[\\" + c + "0"*i + "]\\" + c + "0"*i 1947 self.assertEqual(decode(data, "ignore"), (u"[]", len(data))) 1948 self.assertEqual(decode(data, "replace"), 1949 (u"[\ufffd]\ufffd", len(data))) 1950 self.assertRaises(UnicodeDecodeError, decode, r"\U00110000") 1951 self.assertEqual(decode(r"\U00110000", "ignore"), (u"", 10)) 1952 self.assertEqual(decode(r"\U00110000", "replace"), (u"\ufffd", 10)) 1953 1954 1955 class BomTest(unittest.TestCase): 1956 def test_seek0(self): 1957 data = u"1234567890" 1958 tests = ("utf-16", 1959 "utf-16-le", 1960 "utf-16-be", 1961 "utf-32", 1962 "utf-32-le", 1963 "utf-32-be") 1964 self.addCleanup(test_support.unlink, test_support.TESTFN) 1965 for encoding in tests: 1966 # Check if the BOM is written only once 1967 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f: 1968 f.write(data) 1969 f.write(data) 1970 f.seek(0) 1971 self.assertEqual(f.read(), data * 2) 1972 f.seek(0) 1973 self.assertEqual(f.read(), data * 2) 1974 1975 # Check that the BOM is written after a seek(0) 1976 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f: 1977 f.write(data[0]) 1978 self.assertNotEqual(f.tell(), 0) 1979 f.seek(0) 1980 f.write(data) 1981 f.seek(0) 1982 self.assertEqual(f.read(), data) 1983 1984 # (StreamWriter) Check that the BOM is written after a seek(0) 1985 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f: 1986 f.writer.write(data[0]) 1987 self.assertNotEqual(f.writer.tell(), 0) 1988 f.writer.seek(0) 1989 f.writer.write(data) 1990 f.seek(0) 1991 self.assertEqual(f.read(), data) 1992 1993 # Check that the BOM is not written after a seek() at a position 1994 # different than the start 1995 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f: 1996 f.write(data) 1997 f.seek(f.tell()) 1998 f.write(data) 1999 f.seek(0) 2000 self.assertEqual(f.read(), data * 2) 2001 2002 # (StreamWriter) Check that the BOM is not written after a seek() 2003 # at a position different than the start 2004 with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f: 2005 f.writer.write(data) 2006 f.writer.seek(f.writer.tell()) 2007 f.writer.write(data) 2008 f.seek(0) 2009 self.assertEqual(f.read(), data * 2) 1489 2010 1490 2011 … … 1517 2038 CharmapTest, 1518 2039 WithStmtTest, 2040 UnicodeEscapeTest, 2041 RawUnicodeEscapeTest, 2042 BomTest, 1519 2043 ) 1520 2044
Note:
See TracChangeset
for help on using the changeset viewer.