1 | """This script generates a Python codec module from a Windows Code Page.
|
---|
2 |
|
---|
3 | It uses the function MultiByteToWideChar to generate a decoding table.
|
---|
4 | """
|
---|
5 |
|
---|
6 | import ctypes
|
---|
7 | from ctypes import wintypes
|
---|
8 | from gencodec import codegen
|
---|
9 | import unicodedata
|
---|
10 |
|
---|
11 | def genwinmap(codepage):
|
---|
12 | MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
|
---|
13 | MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
|
---|
14 | wintypes.LPCSTR, ctypes.c_int,
|
---|
15 | wintypes.LPWSTR, ctypes.c_int]
|
---|
16 | MultiByteToWideChar.restype = ctypes.c_int
|
---|
17 |
|
---|
18 | enc2uni = {}
|
---|
19 |
|
---|
20 | for i in range(32) + [127]:
|
---|
21 | enc2uni[i] = (i, 'CONTROL CHARACTER')
|
---|
22 |
|
---|
23 | for i in range(256):
|
---|
24 | buf = ctypes.create_unicode_buffer(2)
|
---|
25 | ret = MultiByteToWideChar(
|
---|
26 | codepage, 0,
|
---|
27 | chr(i), 1,
|
---|
28 | buf, 2)
|
---|
29 | assert ret == 1, "invalid code page"
|
---|
30 | assert buf[1] == '\x00'
|
---|
31 | try:
|
---|
32 | name = unicodedata.name(buf[0])
|
---|
33 | except ValueError:
|
---|
34 | try:
|
---|
35 | name = enc2uni[i][1]
|
---|
36 | except KeyError:
|
---|
37 | name = ''
|
---|
38 |
|
---|
39 | enc2uni[i] = (ord(buf[0]), name)
|
---|
40 |
|
---|
41 | return enc2uni
|
---|
42 |
|
---|
43 | def genwincodec(codepage):
|
---|
44 | import platform
|
---|
45 | map = genwinmap(codepage)
|
---|
46 | encodingname = 'cp%d' % codepage
|
---|
47 | code = codegen("", map, encodingname)
|
---|
48 | # Replace first lines with our own docstring
|
---|
49 | code = '''\
|
---|
50 | """Python Character Mapping Codec %s generated on Windows:
|
---|
51 | %s with the command:
|
---|
52 | python Tools/unicode/genwincodec.py %s
|
---|
53 | """#"
|
---|
54 | ''' % (encodingname, ' '.join(platform.win32_ver()), codepage
|
---|
55 | ) + code.split('"""#"', 1)[1]
|
---|
56 |
|
---|
57 | print code
|
---|
58 |
|
---|
59 | if __name__ == '__main__':
|
---|
60 | import sys
|
---|
61 | genwincodec(int(sys.argv[1]))
|
---|