source: python/trunk/Lib/test/test_ucn.py

Last change on this file was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 6.0 KB
Line 
1""" Test script for the Unicode implementation.
2
3Written by Bill Tutt.
4Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
5
6(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7
8"""#"
9
10import unittest
11import sys
12import _testcapi
13
14from test import test_support
15
16class UnicodeNamesTest(unittest.TestCase):
17
18 def checkletter(self, name, code):
19 # Helper that put all \N escapes inside eval'd raw strings,
20 # to make sure this script runs even if the compiler
21 # chokes on \N escapes
22 res = eval(ur'u"\N{%s}"' % name)
23 self.assertEqual(res, code)
24 return res
25
26 def test_general(self):
27 # General and case insensitivity test:
28 chars = [
29 "LATIN CAPITAL LETTER T",
30 "LATIN SMALL LETTER H",
31 "LATIN SMALL LETTER E",
32 "SPACE",
33 "LATIN SMALL LETTER R",
34 "LATIN CAPITAL LETTER E",
35 "LATIN SMALL LETTER D",
36 "SPACE",
37 "LATIN SMALL LETTER f",
38 "LATIN CAPITAL LeTtEr o",
39 "LATIN SMaLl LETTER x",
40 "SPACE",
41 "LATIN SMALL LETTER A",
42 "LATIN SMALL LETTER T",
43 "LATIN SMALL LETTER E",
44 "SPACE",
45 "LATIN SMALL LETTER T",
46 "LATIN SMALL LETTER H",
47 "LATIN SMALL LETTER E",
48 "SpAcE",
49 "LATIN SMALL LETTER S",
50 "LATIN SMALL LETTER H",
51 "LATIN small LETTER e",
52 "LATIN small LETTER e",
53 "LATIN SMALL LETTER P",
54 "FULL STOP"
55 ]
56 string = u"The rEd fOx ate the sheep."
57
58 self.assertEqual(
59 u"".join([self.checkletter(*args) for args in zip(chars, string)]),
60 string
61 )
62
63 def test_ascii_letters(self):
64 import unicodedata
65
66 for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
67 name = "LATIN SMALL LETTER %s" % char.upper()
68 code = unicodedata.lookup(name)
69 self.assertEqual(unicodedata.name(code), name)
70
71 def test_hangul_syllables(self):
72 self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
73 self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
74 self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
75 self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
76 self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
77 self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
78 self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
79 self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
80 self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
81 self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
82 self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
83 self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
84 self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
85
86 import unicodedata
87 self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
88
89 def test_cjk_unified_ideographs(self):
90 self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
91 self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
92 self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
93 self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
94 self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
95 self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
96
97 def test_bmp_characters(self):
98 import unicodedata
99 count = 0
100 for code in xrange(0x10000):
101 char = unichr(code)
102 name = unicodedata.name(char, None)
103 if name is not None:
104 self.assertEqual(unicodedata.lookup(name), char)
105 count += 1
106
107 def test_misc_symbols(self):
108 self.checkletter("PILCROW SIGN", u"\u00b6")
109 self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
110 self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
111 self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
112
113 def test_errors(self):
114 import unicodedata
115 self.assertRaises(TypeError, unicodedata.name)
116 self.assertRaises(TypeError, unicodedata.name, u'xx')
117 self.assertRaises(TypeError, unicodedata.lookup)
118 self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
119
120 def test_strict_eror_handling(self):
121 # bogus character name
122 self.assertRaises(
123 UnicodeError,
124 unicode, "\\N{blah}", 'unicode-escape', 'strict'
125 )
126 # long bogus character name
127 self.assertRaises(
128 UnicodeError,
129 unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
130 )
131 # missing closing brace
132 self.assertRaises(
133 UnicodeError,
134 unicode, "\\N{SPACE", 'unicode-escape', 'strict'
135 )
136 # missing opening brace
137 self.assertRaises(
138 UnicodeError,
139 unicode, "\\NSPACE", 'unicode-escape', 'strict'
140 )
141
142 @unittest.skipUnless(_testcapi.INT_MAX < _testcapi.PY_SSIZE_T_MAX,
143 "needs UINT_MAX < SIZE_MAX")
144 @unittest.skipUnless(_testcapi.UINT_MAX < sys.maxint,
145 "needs UINT_MAX < sys.maxint")
146 @test_support.bigmemtest(minsize=_testcapi.UINT_MAX + 1,
147 memuse=2 + 4 // len(u'\U00010000'))
148 def test_issue16335(self, size):
149 func = self.test_issue16335
150 if size < func.minsize:
151 raise unittest.SkipTest("not enough memory: %.1fG minimum needed" %
152 (func.minsize * func.memuse / float(1024**3),))
153 # very very long bogus character name
154 x = b'\\N{SPACE' + b'x' * int(_testcapi.UINT_MAX + 1) + b'}'
155 self.assertEqual(len(x), len(b'\\N{SPACE}') +
156 (_testcapi.UINT_MAX + 1))
157 self.assertRaisesRegexp(UnicodeError,
158 'unknown Unicode character name',
159 x.decode, 'unicode-escape'
160 )
161
162
163def test_main():
164 test_support.run_unittest(UnicodeNamesTest)
165
166if __name__ == "__main__":
167 test_main()
Note: See TracBrowser for help on using the repository browser.