1 | """
|
---|
2 | Python 'utf-32' Codec
|
---|
3 | """
|
---|
4 | import codecs, sys
|
---|
5 |
|
---|
6 | ### Codec APIs
|
---|
7 |
|
---|
8 | encode = codecs.utf_32_encode
|
---|
9 |
|
---|
10 | def decode(input, errors='strict'):
|
---|
11 | return codecs.utf_32_decode(input, errors, True)
|
---|
12 |
|
---|
13 | class IncrementalEncoder(codecs.IncrementalEncoder):
|
---|
14 | def __init__(self, errors='strict'):
|
---|
15 | codecs.IncrementalEncoder.__init__(self, errors)
|
---|
16 | self.encoder = None
|
---|
17 |
|
---|
18 | def encode(self, input, final=False):
|
---|
19 | if self.encoder is None:
|
---|
20 | result = codecs.utf_32_encode(input, self.errors)[0]
|
---|
21 | if sys.byteorder == 'little':
|
---|
22 | self.encoder = codecs.utf_32_le_encode
|
---|
23 | else:
|
---|
24 | self.encoder = codecs.utf_32_be_encode
|
---|
25 | return result
|
---|
26 | return self.encoder(input, self.errors)[0]
|
---|
27 |
|
---|
28 | def reset(self):
|
---|
29 | codecs.IncrementalEncoder.reset(self)
|
---|
30 | self.encoder = None
|
---|
31 |
|
---|
32 | def getstate(self):
|
---|
33 | # state info we return to the caller:
|
---|
34 | # 0: stream is in natural order for this platform
|
---|
35 | # 2: endianness hasn't been determined yet
|
---|
36 | # (we're never writing in unnatural order)
|
---|
37 | return (2 if self.encoder is None else 0)
|
---|
38 |
|
---|
39 | def setstate(self, state):
|
---|
40 | if state:
|
---|
41 | self.encoder = None
|
---|
42 | else:
|
---|
43 | if sys.byteorder == 'little':
|
---|
44 | self.encoder = codecs.utf_32_le_encode
|
---|
45 | else:
|
---|
46 | self.encoder = codecs.utf_32_be_encode
|
---|
47 |
|
---|
48 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
|
---|
49 | def __init__(self, errors='strict'):
|
---|
50 | codecs.BufferedIncrementalDecoder.__init__(self, errors)
|
---|
51 | self.decoder = None
|
---|
52 |
|
---|
53 | def _buffer_decode(self, input, errors, final):
|
---|
54 | if self.decoder is None:
|
---|
55 | (output, consumed, byteorder) = \
|
---|
56 | codecs.utf_32_ex_decode(input, errors, 0, final)
|
---|
57 | if byteorder == -1:
|
---|
58 | self.decoder = codecs.utf_32_le_decode
|
---|
59 | elif byteorder == 1:
|
---|
60 | self.decoder = codecs.utf_32_be_decode
|
---|
61 | elif consumed >= 4:
|
---|
62 | raise UnicodeError("UTF-32 stream does not start with BOM")
|
---|
63 | return (output, consumed)
|
---|
64 | return self.decoder(input, self.errors, final)
|
---|
65 |
|
---|
66 | def reset(self):
|
---|
67 | codecs.BufferedIncrementalDecoder.reset(self)
|
---|
68 | self.decoder = None
|
---|
69 |
|
---|
70 | def getstate(self):
|
---|
71 | # additonal state info from the base class must be None here,
|
---|
72 | # as it isn't passed along to the caller
|
---|
73 | state = codecs.BufferedIncrementalDecoder.getstate(self)[0]
|
---|
74 | # additional state info we pass to the caller:
|
---|
75 | # 0: stream is in natural order for this platform
|
---|
76 | # 1: stream is in unnatural order
|
---|
77 | # 2: endianness hasn't been determined yet
|
---|
78 | if self.decoder is None:
|
---|
79 | return (state, 2)
|
---|
80 | addstate = int((sys.byteorder == "big") !=
|
---|
81 | (self.decoder is codecs.utf_32_be_decode))
|
---|
82 | return (state, addstate)
|
---|
83 |
|
---|
84 | def setstate(self, state):
|
---|
85 | # state[1] will be ignored by BufferedIncrementalDecoder.setstate()
|
---|
86 | codecs.BufferedIncrementalDecoder.setstate(self, state)
|
---|
87 | state = state[1]
|
---|
88 | if state == 0:
|
---|
89 | self.decoder = (codecs.utf_32_be_decode
|
---|
90 | if sys.byteorder == "big"
|
---|
91 | else codecs.utf_32_le_decode)
|
---|
92 | elif state == 1:
|
---|
93 | self.decoder = (codecs.utf_32_le_decode
|
---|
94 | if sys.byteorder == "big"
|
---|
95 | else codecs.utf_32_be_decode)
|
---|
96 | else:
|
---|
97 | self.decoder = None
|
---|
98 |
|
---|
99 | class StreamWriter(codecs.StreamWriter):
|
---|
100 | def __init__(self, stream, errors='strict'):
|
---|
101 | self.encoder = None
|
---|
102 | codecs.StreamWriter.__init__(self, stream, errors)
|
---|
103 |
|
---|
104 | def reset(self):
|
---|
105 | codecs.StreamWriter.reset(self)
|
---|
106 | self.encoder = None
|
---|
107 |
|
---|
108 | def encode(self, input, errors='strict'):
|
---|
109 | if self.encoder is None:
|
---|
110 | result = codecs.utf_32_encode(input, errors)
|
---|
111 | if sys.byteorder == 'little':
|
---|
112 | self.encoder = codecs.utf_32_le_encode
|
---|
113 | else:
|
---|
114 | self.encoder = codecs.utf_32_be_encode
|
---|
115 | return result
|
---|
116 | else:
|
---|
117 | return self.encoder(input, errors)
|
---|
118 |
|
---|
119 | class StreamReader(codecs.StreamReader):
|
---|
120 |
|
---|
121 | def reset(self):
|
---|
122 | codecs.StreamReader.reset(self)
|
---|
123 | try:
|
---|
124 | del self.decode
|
---|
125 | except AttributeError:
|
---|
126 | pass
|
---|
127 |
|
---|
128 | def decode(self, input, errors='strict'):
|
---|
129 | (object, consumed, byteorder) = \
|
---|
130 | codecs.utf_32_ex_decode(input, errors, 0, False)
|
---|
131 | if byteorder == -1:
|
---|
132 | self.decode = codecs.utf_32_le_decode
|
---|
133 | elif byteorder == 1:
|
---|
134 | self.decode = codecs.utf_32_be_decode
|
---|
135 | elif consumed>=4:
|
---|
136 | raise UnicodeError,"UTF-32 stream does not start with BOM"
|
---|
137 | return (object, consumed)
|
---|
138 |
|
---|
139 | ### encodings module API
|
---|
140 |
|
---|
141 | def getregentry():
|
---|
142 | return codecs.CodecInfo(
|
---|
143 | name='utf-32',
|
---|
144 | encode=encode,
|
---|
145 | decode=decode,
|
---|
146 | incrementalencoder=IncrementalEncoder,
|
---|
147 | incrementaldecoder=IncrementalDecoder,
|
---|
148 | streamreader=StreamReader,
|
---|
149 | streamwriter=StreamWriter,
|
---|
150 | )
|
---|