Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

encoder.py@ 6

Last change on this file since 6 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 13.1 KB

Line
1	"""Implementation of JSONEncoder
2	"""
3
4	import re
5	import math
6
7	try:
8	from _json import encode_basestring_ascii as c_encode_basestring_ascii
9	except ImportError:
10	c_encode_basestring_ascii = None
11
12	__all__ = ['JSONEncoder']
13
14	ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15	ESCAPE_ASCII = re.compile(r'([\\"]\|[^\ -~])')
16	HAS_UTF8 = re.compile(r'[\x80-\xff]')
17	ESCAPE_DCT = {
18	'\\': '\\\\',
19	'"': '\\"',
20	'\b': '\\b',
21	'\f': '\\f',
22	'\n': '\\n',
23	'\r': '\\r',
24	'\t': '\\t',
25	}
26	for i in range(0x20):
27	ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28
29	FLOAT_REPR = repr
30
31	def floatstr(o, allow_nan=True):
32	# Check for specials. Note that this type of test is processor- and/or
33	# platform-specific, so do tests which don't depend on the internals.
34
35	if math.isnan(o):
36	text = 'NaN'
37	elif math.isinf(o):
38	if math.copysign(1., o) == 1.:
39	text = 'Infinity'
40	else:
41	text = '-Infinity'
42	else:
43	return FLOAT_REPR(o)
44
45	if not allow_nan:
46	msg = "Out of range float values are not JSON compliant: " + repr(o)
47	raise ValueError(msg)
48
49	return text
50
51
52	def encode_basestring(s):
53	"""Return a JSON representation of a Python string
54
55	"""
56	def replace(match):
57	return ESCAPE_DCT[match.group(0)]
58	return '"' + ESCAPE.sub(replace, s) + '"'
59
60
61	def py_encode_basestring_ascii(s):
62	if isinstance(s, str) and HAS_UTF8.search(s) is not None:
63	s = s.decode('utf-8')
64	def replace(match):
65	s = match.group(0)
66	try:
67	return ESCAPE_DCT[s]
68	except KeyError:
69	n = ord(s)
70	if n < 0x10000:
71	return '\\u{0:04x}'.format(n)
72	else:
73	# surrogate pair
74	n -= 0x10000
75	s1 = 0xd800 \| ((n >> 10) & 0x3ff)
76	s2 = 0xdc00 \| (n & 0x3ff)
77	return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
78	return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
79
80
81	if c_encode_basestring_ascii is not None:
82	encode_basestring_ascii = c_encode_basestring_ascii
83	else:
84	encode_basestring_ascii = py_encode_basestring_ascii
85
86
87	class JSONEncoder(object):
88	"""Extensible JSON <http://json.org> encoder for Python data structures.
89
90	Supports the following objects and types by default:
91
92	+-------------------+---------------+
93	\| Python \| JSON \|
94	+===================+===============+
95	\| dict \| object \|
96	+-------------------+---------------+
97	\| list, tuple \| array \|
98	+-------------------+---------------+
99	\| str, unicode \| string \|
100	+-------------------+---------------+
101	\| int, long, float \| number \|
102	+-------------------+---------------+
103	\| True \| true \|
104	+-------------------+---------------+
105	\| False \| false \|
106	+-------------------+---------------+
107	\| None \| null \|
108	+-------------------+---------------+
109
110	To extend this to recognize other objects, subclass and implement a
111	``.default()`` method with another method that returns a serializable
112	object for ``o`` if possible, otherwise it should call the superclass
113	implementation (to raise ``TypeError``).
114
115	"""
116	__all__ = ['__init__', 'default', 'encode', 'iterencode']
117	item_separator = ', '
118	key_separator = ': '
119	def __init__(self, skipkeys=False, ensure_ascii=True,
120	check_circular=True, allow_nan=True, sort_keys=False,
121	indent=None, separators=None, encoding='utf-8', default=None):
122	"""Constructor for JSONEncoder, with sensible defaults.
123
124	If skipkeys is False, then it is a TypeError to attempt
125	encoding of keys that are not str, int, long, float or None. If
126	skipkeys is True, such items are simply skipped.
127
128	If ensure_ascii is True, the output is guaranteed to be str
129	objects with all incoming unicode characters escaped. If
130	ensure_ascii is false, the output will be unicode object.
131
132	If check_circular is True, then lists, dicts, and custom encoded
133	objects will be checked for circular references during encoding to
134	prevent an infinite recursion (which would cause an OverflowError).
135	Otherwise, no such check takes place.
136
137	If allow_nan is True, then NaN, Infinity, and -Infinity will be
138	encoded as such. This behavior is not JSON specification compliant,
139	but is consistent with most JavaScript based encoders and decoders.
140	Otherwise, it will be a ValueError to encode such floats.
141
142	If sort_keys is True, then the output of dictionaries will be
143	sorted by key; this is useful for regression tests to ensure
144	that JSON serializations can be compared on a day-to-day basis.
145
146	If indent is a non-negative integer, then JSON array
147	elements and object members will be pretty-printed with that
148	indent level. An indent level of 0 will only insert newlines.
149	None is the most compact representation.
150
151	If specified, separators should be a (item_separator, key_separator)
152	tuple. The default is (', ', ': '). To get the most compact JSON
153	representation you should specify (',', ':') to eliminate whitespace.
154
155	If specified, default is a function that gets called for objects
156	that can't otherwise be serialized. It should return a JSON encodable
157	version of the object or raise a ``TypeError``.
158
159	If encoding is not None, then all input strings will be
160	transformed into unicode using that encoding prior to JSON-encoding.
161	The default is UTF-8.
162
163	"""
164	self.skipkeys = skipkeys
165	self.ensure_ascii = ensure_ascii
166	self.check_circular = check_circular
167	self.allow_nan = allow_nan
168	self.sort_keys = sort_keys
169	self.indent = indent
170	self.current_indent_level = 0
171	if separators is not None:
172	self.item_separator, self.key_separator = separators
173	if default is not None:
174	self.default = default
175	self.encoding = encoding
176
177	def _newline_indent(self):
178	return '\n' + (' ' * (self.indent * self.current_indent_level))
179
180	def _iterencode_list(self, lst, markers=None):
181	if not lst:
182	yield '[]'
183	return
184	if markers is not None:
185	markerid = id(lst)
186	if markerid in markers:
187	raise ValueError("Circular reference detected")
188	markers[markerid] = lst
189	yield '['
190	if self.indent is not None:
191	self.current_indent_level += 1
192	newline_indent = self._newline_indent()
193	separator = self.item_separator + newline_indent
194	yield newline_indent
195	else:
196	newline_indent = None
197	separator = self.item_separator
198	first = True
199	for value in lst:
200	if first:
201	first = False
202	else:
203	yield separator
204	for chunk in self._iterencode(value, markers):
205	yield chunk
206	if newline_indent is not None:
207	self.current_indent_level -= 1
208	yield self._newline_indent()
209	yield ']'
210	if markers is not None:
211	del markers[markerid]
212
213	def _iterencode_dict(self, dct, markers=None):
214	if not dct:
215	yield '{}'
216	return
217	if markers is not None:
218	markerid = id(dct)
219	if markerid in markers:
220	raise ValueError("Circular reference detected")
221	markers[markerid] = dct
222	yield '{'
223	key_separator = self.key_separator
224	if self.indent is not None:
225	self.current_indent_level += 1
226	newline_indent = self._newline_indent()
227	item_separator = self.item_separator + newline_indent
228	yield newline_indent
229	else:
230	newline_indent = None
231	item_separator = self.item_separator
232	first = True
233	if self.ensure_ascii:
234	encoder = encode_basestring_ascii
235	else:
236	encoder = encode_basestring
237	allow_nan = self.allow_nan
238	if self.sort_keys:
239	keys = dct.keys()
240	keys.sort()
241	items = [(k, dct[k]) for k in keys]
242	else:
243	items = dct.iteritems()
244	_encoding = self.encoding
245	_do_decode = (_encoding is not None
246	and not (_encoding == 'utf-8'))
247	for key, value in items:
248	if isinstance(key, str):
249	if _do_decode:
250	key = key.decode(_encoding)
251	elif isinstance(key, basestring):
252	pass
253	# JavaScript is weakly typed for these, so it makes sense to
254	# also allow them. Many encoders seem to do something like this.
255	elif isinstance(key, float):
256	key = floatstr(key, allow_nan)
257	elif isinstance(key, (int, long)):
258	key = str(key)
259	elif key is True:
260	key = 'true'
261	elif key is False:
262	key = 'false'
263	elif key is None:
264	key = 'null'
265	elif self.skipkeys:
266	continue
267	else:
268	raise TypeError("key {0!r} is not a string".format(key))
269	if first:
270	first = False
271	else:
272	yield item_separator
273	yield encoder(key)
274	yield key_separator
275	for chunk in self._iterencode(value, markers):
276	yield chunk
277	if newline_indent is not None:
278	self.current_indent_level -= 1
279	yield self._newline_indent()
280	yield '}'
281	if markers is not None:
282	del markers[markerid]
283
284	def _iterencode(self, o, markers=None):
285	if isinstance(o, basestring):
286	if self.ensure_ascii:
287	encoder = encode_basestring_ascii
288	else:
289	encoder = encode_basestring
290	_encoding = self.encoding
291	if (_encoding is not None and isinstance(o, str)
292	and not (_encoding == 'utf-8')):
293	o = o.decode(_encoding)
294	yield encoder(o)
295	elif o is None:
296	yield 'null'
297	elif o is True:
298	yield 'true'
299	elif o is False:
300	yield 'false'
301	elif isinstance(o, (int, long)):
302	yield str(o)
303	elif isinstance(o, float):
304	yield floatstr(o, self.allow_nan)
305	elif isinstance(o, (list, tuple)):
306	for chunk in self._iterencode_list(o, markers):
307	yield chunk
308	elif isinstance(o, dict):
309	for chunk in self._iterencode_dict(o, markers):
310	yield chunk
311	else:
312	if markers is not None:
313	markerid = id(o)
314	if markerid in markers:
315	raise ValueError("Circular reference detected")
316	markers[markerid] = o
317	for chunk in self._iterencode_default(o, markers):
318	yield chunk
319	if markers is not None:
320	del markers[markerid]
321
322	def _iterencode_default(self, o, markers=None):
323	newobj = self.default(o)
324	return self._iterencode(newobj, markers)
325
326	def default(self, o):
327	"""Implement this method in a subclass such that it returns a serializable
328	object for ``o``, or calls the base implementation (to raise a
329	``TypeError``).
330
331	For example, to support arbitrary iterators, you could implement
332	default like this::
333
334	def default(self, o):
335	try:
336	iterable = iter(o)
337	except TypeError:
338	pass
339	else:
340	return list(iterable)
341	return JSONEncoder.default(self, o)
342
343	"""
344	raise TypeError(repr(o) + " is not JSON serializable")
345
346	def encode(self, o):
347	"""Return a JSON string representation of a Python data structure.
348
349	>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
350	'{"foo": ["bar", "baz"]}'
351
352	"""
353	# This is for extremely simple cases and benchmarks.
354	if isinstance(o, basestring):
355	if isinstance(o, str):
356	_encoding = self.encoding
357	if (_encoding is not None
358	and not (_encoding == 'utf-8')):
359	o = o.decode(_encoding)
360	if self.ensure_ascii:
361	return encode_basestring_ascii(o)
362	else:
363	return encode_basestring(o)
364	# This doesn't pass the iterator directly to ''.join() because the
365	# exceptions aren't as detailed. The list call should be roughly
366	# equivalent to the PySequence_Fast that ''.join() would do.
367	chunks = list(self.iterencode(o))
368	return ''.join(chunks)
369
370	def iterencode(self, o):
371	"""Encode the given object and yield each string representation as
372	available.
373
374	For example::
375
376	for chunk in JSONEncoder().iterencode(bigobject):
377	mysocket.write(chunk)
378
379	"""
380	if self.check_circular:
381	markers = {}
382	else:
383	markers = None
384	return self._iterencode(o, markers)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/json/encoder.py@ 6

Download in other formats: