1 | """ Python 'utf-16' Codec
|
---|
2 |
|
---|
3 |
|
---|
4 | Written by Marc-Andre Lemburg (mal@lemburg.com).
|
---|
5 |
|
---|
6 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
|
---|
7 |
|
---|
8 | """
|
---|
9 | import codecs, sys
|
---|
10 |
|
---|
11 | ### Codec APIs
|
---|
12 |
|
---|
13 | encode = codecs.utf_16_encode
|
---|
14 |
|
---|
15 | def decode(input, errors='strict'):
|
---|
16 | return codecs.utf_16_decode(input, errors, True)
|
---|
17 |
|
---|
18 | class IncrementalEncoder(codecs.IncrementalEncoder):
|
---|
19 | def __init__(self, errors='strict'):
|
---|
20 | codecs.IncrementalEncoder.__init__(self, errors)
|
---|
21 | self.encoder = None
|
---|
22 |
|
---|
23 | def encode(self, input, final=False):
|
---|
24 | if self.encoder is None:
|
---|
25 | result = codecs.utf_16_encode(input, self.errors)[0]
|
---|
26 | if sys.byteorder == 'little':
|
---|
27 | self.encoder = codecs.utf_16_le_encode
|
---|
28 | else:
|
---|
29 | self.encoder = codecs.utf_16_be_encode
|
---|
30 | return result
|
---|
31 | return self.encoder(input, self.errors)[0]
|
---|
32 |
|
---|
33 | def reset(self):
|
---|
34 | codecs.IncrementalEncoder.reset(self)
|
---|
35 | self.encoder = None
|
---|
36 |
|
---|
37 | def getstate(self):
|
---|
38 | # state info we return to the caller:
|
---|
39 | # 0: stream is in natural order for this platform
|
---|
40 | # 2: endianness hasn't been determined yet
|
---|
41 | # (we're never writing in unnatural order)
|
---|
42 | return (2 if self.encoder is None else 0)
|
---|
43 |
|
---|
44 | def setstate(self, state):
|
---|
45 | if state:
|
---|
46 | self.encoder = None
|
---|
47 | else:
|
---|
48 | if sys.byteorder == 'little':
|
---|
49 | self.encoder = codecs.utf_16_le_encode
|
---|
50 | else:
|
---|
51 | self.encoder = codecs.utf_16_be_encode
|
---|
52 |
|
---|
53 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
---|
54 | def __init__(self, errors='strict'):
|
---|
55 | codecs.BufferedIncrementalDecoder.__init__(self, errors)
|
---|
56 | self.decoder = None
|
---|
57 |
|
---|
58 | def _buffer_decode(self, input, errors, final):
|
---|
59 | if self.decoder is None:
|
---|
60 | (output, consumed, byteorder) = \
|
---|
61 | codecs.utf_16_ex_decode(input, errors, 0, final)
|
---|
62 | if byteorder == -1:
|
---|
63 | self.decoder = codecs.utf_16_le_decode
|
---|
64 | elif byteorder == 1:
|
---|
65 | self.decoder = codecs.utf_16_be_decode
|
---|
66 | elif consumed >= 2:
|
---|
67 | raise UnicodeError("UTF-16 stream does not start with BOM")
|
---|
68 | return (output, consumed)
|
---|
69 | return self.decoder(input, self.errors, final)
|
---|
70 |
|
---|
71 | def reset(self):
|
---|
72 | codecs.BufferedIncrementalDecoder.reset(self)
|
---|
73 | self.decoder = None
|
---|
74 |
|
---|
75 | class StreamWriter(codecs.StreamWriter):
|
---|
76 | def __init__(self, stream, errors='strict'):
|
---|
77 | codecs.StreamWriter.__init__(self, stream, errors)
|
---|
78 | self.encoder = None
|
---|
79 |
|
---|
80 | def reset(self):
|
---|
81 | codecs.StreamWriter.reset(self)
|
---|
82 | self.encoder = None
|
---|
83 |
|
---|
84 | def encode(self, input, errors='strict'):
|
---|
85 | if self.encoder is None:
|
---|
86 | result = codecs.utf_16_encode(input, errors)
|
---|
87 | if sys.byteorder == 'little':
|
---|
88 | self.encoder = codecs.utf_16_le_encode
|
---|
89 | else:
|
---|
90 | self.encoder = codecs.utf_16_be_encode
|
---|
91 | return result
|
---|
92 | else:
|
---|
93 | return self.encoder(input, errors)
|
---|
94 |
|
---|
95 | class StreamReader(codecs.StreamReader):
|
---|
96 |
|
---|
97 | def reset(self):
|
---|
98 | codecs.StreamReader.reset(self)
|
---|
99 | try:
|
---|
100 | del self.decode
|
---|
101 | except AttributeError:
|
---|
102 | pass
|
---|
103 |
|
---|
104 | def decode(self, input, errors='strict'):
|
---|
105 | (object, consumed, byteorder) = \
|
---|
106 | codecs.utf_16_ex_decode(input, errors, 0, False)
|
---|
107 | if byteorder == -1:
|
---|
108 | self.decode = codecs.utf_16_le_decode
|
---|
109 | elif byteorder == 1:
|
---|
110 | self.decode = codecs.utf_16_be_decode
|
---|
111 | elif consumed>=2:
|
---|
112 | raise UnicodeError,"UTF-16 stream does not start with BOM"
|
---|
113 | return (object, consumed)
|
---|
114 |
|
---|
115 | ### encodings module API
|
---|
116 |
|
---|
117 | def getregentry():
|
---|
118 | return codecs.CodecInfo(
|
---|
119 | name='utf-16',
|
---|
120 | encode=encode,
|
---|
121 | decode=decode,
|
---|
122 | incrementalencoder=IncrementalEncoder,
|
---|
123 | incrementaldecoder=IncrementalDecoder,
|
---|
124 | streamreader=StreamReader,
|
---|
125 | streamwriter=StreamWriter,
|
---|
126 | )
|
---|