Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

utf_8_sig.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 3.6 KB

Line
1	""" Python 'utf-8-sig' Codec
2	This work similar to UTF-8 with the following changes:
3
4	* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the
5	first three bytes.
6
7	* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these
8	bytes will be skipped.
9	"""
10	import codecs
11
12	### Codec APIs
13
14	def encode(input, errors='strict'):
15	return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))
16
17	def decode(input, errors='strict'):
18	prefix = 0
19	if input[:3] == codecs.BOM_UTF8:
20	input = input[3:]
21	prefix = 3
22	(output, consumed) = codecs.utf_8_decode(input, errors, True)
23	return (output, consumed+prefix)
24
25	class IncrementalEncoder(codecs.IncrementalEncoder):
26	def __init__(self, errors='strict'):
27	codecs.IncrementalEncoder.__init__(self, errors)
28	self.first = 1
29
30	def encode(self, input, final=False):
31	if self.first:
32	self.first = 0
33	return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0]
34	else:
35	return codecs.utf_8_encode(input, self.errors)[0]
36
37	def reset(self):
38	codecs.IncrementalEncoder.reset(self)
39	self.first = 1
40
41	def getstate(self):
42	return self.first
43
44	def setstate(self, state):
45	self.first = state
46
47	class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
48	def __init__(self, errors='strict'):
49	codecs.BufferedIncrementalDecoder.__init__(self, errors)
50	self.first = True
51
52	def _buffer_decode(self, input, errors, final):
53	if self.first:
54	if len(input) < 3:
55	if codecs.BOM_UTF8.startswith(input):
56	# not enough data to decide if this really is a BOM
57	# => try again on the next call
58	return (u"", 0)
59	else:
60	self.first = None
61	else:
62	self.first = None
63	if input[:3] == codecs.BOM_UTF8:
64	(output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
65	return (output, consumed+3)
66	return codecs.utf_8_decode(input, errors, final)
67
68	def reset(self):
69	codecs.BufferedIncrementalDecoder.reset(self)
70	self.first = True
71
72	class StreamWriter(codecs.StreamWriter):
73	def reset(self):
74	codecs.StreamWriter.reset(self)
75	try:
76	del self.encode
77	except AttributeError:
78	pass
79
80	def encode(self, input, errors='strict'):
81	self.encode = codecs.utf_8_encode
82	return encode(input, errors)
83
84	class StreamReader(codecs.StreamReader):
85	def reset(self):
86	codecs.StreamReader.reset(self)
87	try:
88	del self.decode
89	except AttributeError:
90	pass
91
92	def decode(self, input, errors='strict'):
93	if len(input) < 3:
94	if codecs.BOM_UTF8.startswith(input):
95	# not enough data to decide if this is a BOM
96	# => try again on the next call
97	return (u"", 0)
98	elif input[:3] == codecs.BOM_UTF8:
99	self.decode = codecs.utf_8_decode
100	(output, consumed) = codecs.utf_8_decode(input[3:],errors)
101	return (output, consumed+3)
102	# (else) no BOM present
103	self.decode = codecs.utf_8_decode
104	return codecs.utf_8_decode(input, errors)
105
106	### encodings module API
107
108	def getregentry():
109	return codecs.CodecInfo(
110	name='utf-8-sig',
111	encode=encode,
112	decode=decode,
113	incrementalencoder=IncrementalEncoder,
114	incrementaldecoder=IncrementalDecoder,
115	streamreader=StreamReader,
116	streamwriter=StreamWriter,
117	)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/encodings/utf_8_sig.py

Download in other formats: