1 | #!/usr/bin/env python
|
---|
2 | #
|
---|
3 | # test_codecencodings_cn.py
|
---|
4 | # Codec encoding tests for PRC encodings.
|
---|
5 | #
|
---|
6 |
|
---|
7 | from test import test_support
|
---|
8 | from test import test_multibytecodec_support
|
---|
9 | import unittest
|
---|
10 |
|
---|
11 | class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
|
---|
12 | encoding = 'gb2312'
|
---|
13 | tstring = test_multibytecodec_support.load_teststring('gb2312')
|
---|
14 | codectests = (
|
---|
15 | # invalid bytes
|
---|
16 | ("abc\x81\x81\xc1\xc4", "strict", None),
|
---|
17 | ("abc\xc8", "strict", None),
|
---|
18 | ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
|
---|
19 | ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
|
---|
20 | ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"),
|
---|
21 | ("\xc1\x64", "strict", None),
|
---|
22 | )
|
---|
23 |
|
---|
24 | class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
|
---|
25 | encoding = 'gbk'
|
---|
26 | tstring = test_multibytecodec_support.load_teststring('gbk')
|
---|
27 | codectests = (
|
---|
28 | # invalid bytes
|
---|
29 | ("abc\x80\x80\xc1\xc4", "strict", None),
|
---|
30 | ("abc\xc8", "strict", None),
|
---|
31 | ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
|
---|
32 | ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
|
---|
33 | ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
|
---|
34 | ("\x83\x34\x83\x31", "strict", None),
|
---|
35 | (u"\u30fb", "strict", None),
|
---|
36 | )
|
---|
37 |
|
---|
38 | class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
|
---|
39 | encoding = 'gb18030'
|
---|
40 | tstring = test_multibytecodec_support.load_teststring('gb18030')
|
---|
41 | codectests = (
|
---|
42 | # invalid bytes
|
---|
43 | ("abc\x80\x80\xc1\xc4", "strict", None),
|
---|
44 | ("abc\xc8", "strict", None),
|
---|
45 | ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
|
---|
46 | ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
|
---|
47 | ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
|
---|
48 | ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
|
---|
49 | (u"\u30fb", "strict", "\x819\xa79"),
|
---|
50 | )
|
---|
51 | has_iso10646 = True
|
---|
52 |
|
---|
53 | class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
|
---|
54 | encoding = 'hz'
|
---|
55 | tstring = test_multibytecodec_support.load_teststring('hz')
|
---|
56 | codectests = (
|
---|
57 | # test '~\n' (3 lines)
|
---|
58 | (b'This sentence is in ASCII.\n'
|
---|
59 | b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
|
---|
60 | b'~{NpJ)l6HK!#~}Bye.\n',
|
---|
61 | 'strict',
|
---|
62 | u'This sentence is in ASCII.\n'
|
---|
63 | u'The next sentence is in GB.'
|
---|
64 | u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
|
---|
65 | u'Bye.\n'),
|
---|
66 | # test '~\n' (4 lines)
|
---|
67 | (b'This sentence is in ASCII.\n'
|
---|
68 | b'The next sentence is in GB.~\n'
|
---|
69 | b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
|
---|
70 | b'Bye.\n',
|
---|
71 | 'strict',
|
---|
72 | u'This sentence is in ASCII.\n'
|
---|
73 | u'The next sentence is in GB.'
|
---|
74 | u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
|
---|
75 | u'Bye.\n'),
|
---|
76 | # invalid bytes
|
---|
77 | (b'ab~cd', 'replace', u'ab\uFFFDd'),
|
---|
78 | (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
|
---|
79 | (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
|
---|
80 | )
|
---|
81 |
|
---|
82 | def test_main():
|
---|
83 | test_support.run_unittest(__name__)
|
---|
84 |
|
---|
85 | if __name__ == "__main__":
|
---|
86 | test_main()
|
---|