1 | from test.test_support import run_unittest, open_urlresource
|
---|
2 | import unittest
|
---|
3 |
|
---|
4 | from httplib import HTTPException
|
---|
5 | import sys
|
---|
6 | import os
|
---|
7 | from unicodedata import normalize, unidata_version
|
---|
8 |
|
---|
9 | TESTDATAFILE = "NormalizationTest.txt"
|
---|
10 | TESTDATAURL = "http://www.unicode.org/Public/" + unidata_version + "/ucd/" + TESTDATAFILE
|
---|
11 |
|
---|
12 | def check_version(testfile):
|
---|
13 | hdr = testfile.readline()
|
---|
14 | return unidata_version in hdr
|
---|
15 |
|
---|
16 | class RangeError(Exception):
|
---|
17 | pass
|
---|
18 |
|
---|
19 | def NFC(str):
|
---|
20 | return normalize("NFC", str)
|
---|
21 |
|
---|
22 | def NFKC(str):
|
---|
23 | return normalize("NFKC", str)
|
---|
24 |
|
---|
25 | def NFD(str):
|
---|
26 | return normalize("NFD", str)
|
---|
27 |
|
---|
28 | def NFKD(str):
|
---|
29 | return normalize("NFKD", str)
|
---|
30 |
|
---|
31 | def unistr(data):
|
---|
32 | data = [int(x, 16) for x in data.split(" ")]
|
---|
33 | for x in data:
|
---|
34 | if x > sys.maxunicode:
|
---|
35 | raise RangeError
|
---|
36 | return u"".join([unichr(x) for x in data])
|
---|
37 |
|
---|
38 | class NormalizationTest(unittest.TestCase):
|
---|
39 | def test_main(self):
|
---|
40 | part = None
|
---|
41 | part1_data = {}
|
---|
42 | # Hit the exception early
|
---|
43 | try:
|
---|
44 | testdata = open_urlresource(TESTDATAURL, check_version)
|
---|
45 | except (IOError, HTTPException):
|
---|
46 | self.skipTest("Could not retrieve " + TESTDATAURL)
|
---|
47 | for line in testdata:
|
---|
48 | if '#' in line:
|
---|
49 | line = line.split('#')[0]
|
---|
50 | line = line.strip()
|
---|
51 | if not line:
|
---|
52 | continue
|
---|
53 | if line.startswith("@Part"):
|
---|
54 | part = line.split()[0]
|
---|
55 | continue
|
---|
56 | try:
|
---|
57 | c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
|
---|
58 | except RangeError:
|
---|
59 | # Skip unsupported characters;
|
---|
60 | # try at least adding c1 if we are in part1
|
---|
61 | if part == "@Part1":
|
---|
62 | try:
|
---|
63 | c1 = unistr(line.split(';')[0])
|
---|
64 | except RangeError:
|
---|
65 | pass
|
---|
66 | else:
|
---|
67 | part1_data[c1] = 1
|
---|
68 | continue
|
---|
69 |
|
---|
70 | # Perform tests
|
---|
71 | self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line)
|
---|
72 | self.assertTrue(c4 == NFC(c4) == NFC(c5), line)
|
---|
73 | self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line)
|
---|
74 | self.assertTrue(c5 == NFD(c4) == NFD(c5), line)
|
---|
75 | self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \
|
---|
76 | NFKC(c3) == NFKC(c4) == NFKC(c5),
|
---|
77 | line)
|
---|
78 | self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \
|
---|
79 | NFKD(c3) == NFKD(c4) == NFKD(c5),
|
---|
80 | line)
|
---|
81 |
|
---|
82 | # Record part 1 data
|
---|
83 | if part == "@Part1":
|
---|
84 | part1_data[c1] = 1
|
---|
85 |
|
---|
86 | # Perform tests for all other data
|
---|
87 | for c in range(sys.maxunicode+1):
|
---|
88 | X = unichr(c)
|
---|
89 | if X in part1_data:
|
---|
90 | continue
|
---|
91 | self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c)
|
---|
92 |
|
---|
93 | def test_bug_834676(self):
|
---|
94 | # Check for bug 834676
|
---|
95 | normalize('NFC', u'\ud55c\uae00')
|
---|
96 |
|
---|
97 |
|
---|
98 | def test_main():
|
---|
99 | run_unittest(NormalizationTest)
|
---|
100 |
|
---|
101 | if __name__ == "__main__":
|
---|
102 | test_main()
|
---|