source: python/trunk/Lib/encodings/aliases.py@ 1843

Last change on this file since 1843 was 1565, checked in by Silvan Scherrer, 7 years ago

add simplified chinese to the alias table. fixes #328

  • Property svn:eol-style set to native
File size: 14.6 KB
Line 
1""" Encoding Aliases Support
2
3 This module is used by the encodings package search function to
4 map encodings names to module names.
5
6 Note that the search function normalizes the encoding names before
7 doing the lookup, so the mapping will have to map normalized
8 encoding names to module names.
9
10 Contents:
11
12 The following aliases dictionary contains mappings of all IANA
13 character set names for which the Python core library provides
14 codecs. In addition to these, a few Python specific codec
15 aliases have also been added.
16
17"""
18aliases = {
19
20 # Please keep this list sorted alphabetically by value !
21
22 # ascii codec
23 '646' : 'ascii',
24 'ansi_x3.4_1968' : 'ascii',
25 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
26 'ansi_x3.4_1986' : 'ascii',
27 'cp367' : 'ascii',
28 'csascii' : 'ascii',
29 'ibm367' : 'ascii',
30 'iso646_us' : 'ascii',
31 'iso_646.irv_1991' : 'ascii',
32 'iso_ir_6' : 'ascii',
33 'us' : 'ascii',
34 'us_ascii' : 'ascii',
35
36 # base64_codec codec
37 'base64' : 'base64_codec',
38 'base_64' : 'base64_codec',
39
40 # big5 codec
41 'big5_tw' : 'big5',
42 'csbig5' : 'big5',
43
44 # big5hkscs codec
45 'big5_hkscs' : 'big5hkscs',
46 'hkscs' : 'big5hkscs',
47
48 # bz2_codec codec
49 'bz2' : 'bz2_codec',
50
51 # cp037 codec
52 '037' : 'cp037',
53 'csibm037' : 'cp037',
54 'ebcdic_cp_ca' : 'cp037',
55 'ebcdic_cp_nl' : 'cp037',
56 'ebcdic_cp_us' : 'cp037',
57 'ebcdic_cp_wt' : 'cp037',
58 'ibm037' : 'cp037',
59 'ibm039' : 'cp037',
60
61 # cp1026 codec
62 '1026' : 'cp1026',
63 'csibm1026' : 'cp1026',
64 'ibm1026' : 'cp1026',
65
66 # cp1140 codec
67 '1140' : 'cp1140',
68 'ibm1140' : 'cp1140',
69
70 # cp1250 codec
71 '1250' : 'cp1250',
72 'windows_1250' : 'cp1250',
73
74 # cp1251 codec
75 '1251' : 'cp1251',
76 'windows_1251' : 'cp1251',
77
78 # cp1252 codec
79 '1252' : 'cp1252',
80 'windows_1252' : 'cp1252',
81
82 # cp1253 codec
83 '1253' : 'cp1253',
84 'windows_1253' : 'cp1253',
85
86 # cp1254 codec
87 '1254' : 'cp1254',
88 'windows_1254' : 'cp1254',
89
90 # cp1255 codec
91 '1255' : 'cp1255',
92 'windows_1255' : 'cp1255',
93
94 # cp1256 codec
95 '1256' : 'cp1256',
96 'windows_1256' : 'cp1256',
97
98 # cp1257 codec
99 '1257' : 'cp1257',
100 'windows_1257' : 'cp1257',
101
102 # cp1258 codec
103 '1258' : 'cp1258',
104 'windows_1258' : 'cp1258',
105
106 # cp424 codec
107 '424' : 'cp424',
108 'csibm424' : 'cp424',
109 'ebcdic_cp_he' : 'cp424',
110 'ibm424' : 'cp424',
111
112 # cp437 codec
113 '437' : 'cp437',
114 'cspc8codepage437' : 'cp437',
115 'ibm437' : 'cp437',
116
117 # cp500 codec
118 '500' : 'cp500',
119 'csibm500' : 'cp500',
120 'ebcdic_cp_be' : 'cp500',
121 'ebcdic_cp_ch' : 'cp500',
122 'ibm500' : 'cp500',
123
124 # cp775 codec
125 '775' : 'cp775',
126 'cspc775baltic' : 'cp775',
127 'ibm775' : 'cp775',
128
129 # cp850 codec
130 '850' : 'cp850',
131 'cspc850multilingual' : 'cp850',
132 'ibm850' : 'cp850',
133
134 # cp852 codec
135 '852' : 'cp852',
136 'cspcp852' : 'cp852',
137 'ibm852' : 'cp852',
138
139 # cp855 codec
140 '855' : 'cp855',
141 'csibm855' : 'cp855',
142 'ibm855' : 'cp855',
143
144 # cp857 codec
145 '857' : 'cp857',
146 'csibm857' : 'cp857',
147 'ibm857' : 'cp857',
148
149 # cp858 codec
150 '858' : 'cp858',
151 'csibm858' : 'cp858',
152 'ibm858' : 'cp858',
153
154 # cp860 codec
155 '860' : 'cp860',
156 'csibm860' : 'cp860',
157 'ibm860' : 'cp860',
158
159 # cp861 codec
160 '861' : 'cp861',
161 'cp_is' : 'cp861',
162 'csibm861' : 'cp861',
163 'ibm861' : 'cp861',
164
165 # cp862 codec
166 '862' : 'cp862',
167 'cspc862latinhebrew' : 'cp862',
168 'ibm862' : 'cp862',
169
170 # cp863 codec
171 '863' : 'cp863',
172 'csibm863' : 'cp863',
173 'ibm863' : 'cp863',
174
175 # cp864 codec
176 '864' : 'cp864',
177 'csibm864' : 'cp864',
178 'ibm864' : 'cp864',
179
180 # cp865 codec
181 '865' : 'cp865',
182 'csibm865' : 'cp865',
183 'ibm865' : 'cp865',
184
185 # cp866 codec
186 '866' : 'cp866',
187 'csibm866' : 'cp866',
188 'ibm866' : 'cp866',
189
190 # cp869 codec
191 '869' : 'cp869',
192 'cp_gr' : 'cp869',
193 'csibm869' : 'cp869',
194 'ibm869' : 'cp869',
195
196 # cp932 codec
197 '932' : 'cp932',
198 'ms932' : 'cp932',
199 'mskanji' : 'cp932',
200 'ms_kanji' : 'cp932',
201
202 # cp949 codec
203 '949' : 'cp949',
204 'ms949' : 'cp949',
205 'uhc' : 'cp949',
206
207 # cp950 codec
208 '950' : 'cp950',
209 'ms950' : 'cp950',
210
211 # euc_jis_2004 codec
212 'jisx0213' : 'euc_jis_2004',
213 'eucjis2004' : 'euc_jis_2004',
214 'euc_jis2004' : 'euc_jis_2004',
215
216 # euc_jisx0213 codec
217 'eucjisx0213' : 'euc_jisx0213',
218
219 # euc_jp codec
220 'eucjp' : 'euc_jp',
221 'ujis' : 'euc_jp',
222 'u_jis' : 'euc_jp',
223
224 # euc_kr codec
225 'euckr' : 'euc_kr',
226 'korean' : 'euc_kr',
227 'ksc5601' : 'euc_kr',
228 'ks_c_5601' : 'euc_kr',
229 'ks_c_5601_1987' : 'euc_kr',
230 'ksx1001' : 'euc_kr',
231 'ks_x_1001' : 'euc_kr',
232
233 # gb18030 codec
234 'gb18030_2000' : 'gb18030',
235
236 # gb2312 codec
237 '1381' : 'gb2312',
238 'chinese' : 'gb2312',
239 'cp1381' : 'gb2312',
240 'csiso58gb231280' : 'gb2312',
241 'euc_cn' : 'gb2312',
242 'euccn' : 'gb2312',
243 'eucgb2312_cn' : 'gb2312',
244 'gb2312_1980' : 'gb2312',
245 'gb2312_80' : 'gb2312',
246 'iso_ir_58' : 'gb2312',
247
248 # gbk codec
249 '1386' : 'gbk',
250 '936' : 'gbk',
251 'cp1386' : 'gbk',
252 'cp936' : 'gbk',
253 'ms936' : 'gbk',
254
255 # hex_codec codec
256 'hex' : 'hex_codec',
257
258 # hp_roman8 codec
259 'roman8' : 'hp_roman8',
260 'r8' : 'hp_roman8',
261 'csHPRoman8' : 'hp_roman8',
262
263 # hz codec
264 'hzgb' : 'hz',
265 'hz_gb' : 'hz',
266 'hz_gb_2312' : 'hz',
267
268 # iso2022_jp codec
269 'csiso2022jp' : 'iso2022_jp',
270 'iso2022jp' : 'iso2022_jp',
271 'iso_2022_jp' : 'iso2022_jp',
272
273 # iso2022_jp_1 codec
274 'iso2022jp_1' : 'iso2022_jp_1',
275 'iso_2022_jp_1' : 'iso2022_jp_1',
276
277 # iso2022_jp_2 codec
278 'iso2022jp_2' : 'iso2022_jp_2',
279 'iso_2022_jp_2' : 'iso2022_jp_2',
280
281 # iso2022_jp_2004 codec
282 'iso_2022_jp_2004' : 'iso2022_jp_2004',
283 'iso2022jp_2004' : 'iso2022_jp_2004',
284
285 # iso2022_jp_3 codec
286 'iso2022jp_3' : 'iso2022_jp_3',
287 'iso_2022_jp_3' : 'iso2022_jp_3',
288
289 # iso2022_jp_ext codec
290 'iso2022jp_ext' : 'iso2022_jp_ext',
291 'iso_2022_jp_ext' : 'iso2022_jp_ext',
292
293 # iso2022_kr codec
294 'csiso2022kr' : 'iso2022_kr',
295 'iso2022kr' : 'iso2022_kr',
296 'iso_2022_kr' : 'iso2022_kr',
297
298 # iso8859_10 codec
299 'csisolatin6' : 'iso8859_10',
300 'iso_8859_10' : 'iso8859_10',
301 'iso_8859_10_1992' : 'iso8859_10',
302 'iso_ir_157' : 'iso8859_10',
303 'l6' : 'iso8859_10',
304 'latin6' : 'iso8859_10',
305
306 # iso8859_11 codec
307 'thai' : 'iso8859_11',
308 'iso_8859_11' : 'iso8859_11',
309 'iso_8859_11_2001' : 'iso8859_11',
310
311 # iso8859_13 codec
312 'iso_8859_13' : 'iso8859_13',
313 'l7' : 'iso8859_13',
314 'latin7' : 'iso8859_13',
315
316 # iso8859_14 codec
317 'iso_8859_14' : 'iso8859_14',
318 'iso_8859_14_1998' : 'iso8859_14',
319 'iso_celtic' : 'iso8859_14',
320 'iso_ir_199' : 'iso8859_14',
321 'l8' : 'iso8859_14',
322 'latin8' : 'iso8859_14',
323
324 # iso8859_15 codec
325 'iso_8859_15' : 'iso8859_15',
326 'l9' : 'iso8859_15',
327 'latin9' : 'iso8859_15',
328
329 # iso8859_16 codec
330 'iso_8859_16' : 'iso8859_16',
331 'iso_8859_16_2001' : 'iso8859_16',
332 'iso_ir_226' : 'iso8859_16',
333 'l10' : 'iso8859_16',
334 'latin10' : 'iso8859_16',
335
336 # iso8859_2 codec
337 'csisolatin2' : 'iso8859_2',
338 'iso_8859_2' : 'iso8859_2',
339 'iso_8859_2_1987' : 'iso8859_2',
340 'iso_ir_101' : 'iso8859_2',
341 'l2' : 'iso8859_2',
342 'latin2' : 'iso8859_2',
343
344 # iso8859_3 codec
345 'csisolatin3' : 'iso8859_3',
346 'iso_8859_3' : 'iso8859_3',
347 'iso_8859_3_1988' : 'iso8859_3',
348 'iso_ir_109' : 'iso8859_3',
349 'l3' : 'iso8859_3',
350 'latin3' : 'iso8859_3',
351
352 # iso8859_4 codec
353 'csisolatin4' : 'iso8859_4',
354 'iso_8859_4' : 'iso8859_4',
355 'iso_8859_4_1988' : 'iso8859_4',
356 'iso_ir_110' : 'iso8859_4',
357 'l4' : 'iso8859_4',
358 'latin4' : 'iso8859_4',
359
360 # iso8859_5 codec
361 'csisolatincyrillic' : 'iso8859_5',
362 'cyrillic' : 'iso8859_5',
363 'iso_8859_5' : 'iso8859_5',
364 'iso_8859_5_1988' : 'iso8859_5',
365 'iso_ir_144' : 'iso8859_5',
366
367 # iso8859_6 codec
368 'arabic' : 'iso8859_6',
369 'asmo_708' : 'iso8859_6',
370 'csisolatinarabic' : 'iso8859_6',
371 'ecma_114' : 'iso8859_6',
372 'iso_8859_6' : 'iso8859_6',
373 'iso_8859_6_1987' : 'iso8859_6',
374 'iso_ir_127' : 'iso8859_6',
375
376 # iso8859_7 codec
377 'csisolatingreek' : 'iso8859_7',
378 'ecma_118' : 'iso8859_7',
379 'elot_928' : 'iso8859_7',
380 'greek' : 'iso8859_7',
381 'greek8' : 'iso8859_7',
382 'iso_8859_7' : 'iso8859_7',
383 'iso_8859_7_1987' : 'iso8859_7',
384 'iso_ir_126' : 'iso8859_7',
385
386 # iso8859_8 codec
387 'csisolatinhebrew' : 'iso8859_8',
388 'hebrew' : 'iso8859_8',
389 'iso_8859_8' : 'iso8859_8',
390 'iso_8859_8_1988' : 'iso8859_8',
391 'iso_ir_138' : 'iso8859_8',
392
393 # iso8859_9 codec
394 'csisolatin5' : 'iso8859_9',
395 'iso_8859_9' : 'iso8859_9',
396 'iso_8859_9_1989' : 'iso8859_9',
397 'iso_ir_148' : 'iso8859_9',
398 'l5' : 'iso8859_9',
399 'latin5' : 'iso8859_9',
400
401 # johab codec
402 'cp1361' : 'johab',
403 'ms1361' : 'johab',
404
405 # koi8_r codec
406 'cskoi8r' : 'koi8_r',
407
408 # latin_1 codec
409 #
410 # Note that the latin_1 codec is implemented internally in C and a
411 # lot faster than the charmap codec iso8859_1 which uses the same
412 # encoding. This is why we discourage the use of the iso8859_1
413 # codec and alias it to latin_1 instead.
414 #
415 '8859' : 'latin_1',
416 'cp819' : 'latin_1',
417 'csisolatin1' : 'latin_1',
418 'ibm819' : 'latin_1',
419 'iso8859' : 'latin_1',
420 'iso8859_1' : 'latin_1',
421 'iso_8859_1' : 'latin_1',
422 'iso_8859_1_1987' : 'latin_1',
423 'iso_ir_100' : 'latin_1',
424 'l1' : 'latin_1',
425 'latin' : 'latin_1',
426 'latin1' : 'latin_1',
427
428 # mac_cyrillic codec
429 'maccyrillic' : 'mac_cyrillic',
430
431 # mac_greek codec
432 'macgreek' : 'mac_greek',
433
434 # mac_iceland codec
435 'maciceland' : 'mac_iceland',
436
437 # mac_latin2 codec
438 'maccentraleurope' : 'mac_latin2',
439 'maclatin2' : 'mac_latin2',
440
441 # mac_roman codec
442 'macroman' : 'mac_roman',
443
444 # mac_turkish codec
445 'macturkish' : 'mac_turkish',
446
447 # mbcs codec
448 'dbcs' : 'mbcs',
449
450 # ptcp154 codec
451 'csptcp154' : 'ptcp154',
452 'pt154' : 'ptcp154',
453 'cp154' : 'ptcp154',
454 'cyrillic_asian' : 'ptcp154',
455
456 # quopri_codec codec
457 'quopri' : 'quopri_codec',
458 'quoted_printable' : 'quopri_codec',
459 'quotedprintable' : 'quopri_codec',
460
461 # rot_13 codec
462 'rot13' : 'rot_13',
463
464 # shift_jis codec
465 'csshiftjis' : 'shift_jis',
466 'shiftjis' : 'shift_jis',
467 'sjis' : 'shift_jis',
468 's_jis' : 'shift_jis',
469
470 # shift_jis_2004 codec
471 'shiftjis2004' : 'shift_jis_2004',
472 'sjis_2004' : 'shift_jis_2004',
473 's_jis_2004' : 'shift_jis_2004',
474
475 # shift_jisx0213 codec
476 'shiftjisx0213' : 'shift_jisx0213',
477 'sjisx0213' : 'shift_jisx0213',
478 's_jisx0213' : 'shift_jisx0213',
479
480 # tactis codec
481 'tis260' : 'tactis',
482
483 # tis_620 codec
484 'tis620' : 'tis_620',
485 'tis_620_0' : 'tis_620',
486 'tis_620_2529_0' : 'tis_620',
487 'tis_620_2529_1' : 'tis_620',
488 'iso_ir_166' : 'tis_620',
489
490 # utf_16 codec
491 'u16' : 'utf_16',
492 'utf16' : 'utf_16',
493
494 # utf_16_be codec
495 'unicodebigunmarked' : 'utf_16_be',
496 'utf_16be' : 'utf_16_be',
497
498 # utf_16_le codec
499 'unicodelittleunmarked' : 'utf_16_le',
500 'utf_16le' : 'utf_16_le',
501
502 # utf_32 codec
503 'u32' : 'utf_32',
504 'utf32' : 'utf_32',
505
506 # utf_32_be codec
507 'utf_32be' : 'utf_32_be',
508
509 # utf_32_le codec
510 'utf_32le' : 'utf_32_le',
511
512 # utf_7 codec
513 'u7' : 'utf_7',
514 'utf7' : 'utf_7',
515 'unicode_1_1_utf_7' : 'utf_7',
516
517 # utf_8 codec
518 'u8' : 'utf_8',
519 'utf' : 'utf_8',
520 'utf8' : 'utf_8',
521 'utf8_ucs2' : 'utf_8',
522 'utf8_ucs4' : 'utf_8',
523
524 # uu_codec codec
525 'uu' : 'uu_codec',
526
527 # zlib_codec codec
528 'zip' : 'zlib_codec',
529 'zlib' : 'zlib_codec',
530
531}
Note: See TracBrowser for help on using the repository browser.