Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

encoder.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 16.0 KB

Line
1	"""Implementation of JSONEncoder
2	"""
3	import re
4
5	try:
6	from _json import encode_basestring_ascii as c_encode_basestring_ascii
7	except ImportError:
8	c_encode_basestring_ascii = None
9	try:
10	from _json import make_encoder as c_make_encoder
11	except ImportError:
12	c_make_encoder = None
13
14	ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15	ESCAPE_ASCII = re.compile(r'([\\"]\|[^\ -~])')
16	HAS_UTF8 = re.compile(r'[\x80-\xff]')
17	ESCAPE_DCT = {
18	'\\': '\\\\',
19	'"': '\\"',
20	'\b': '\\b',
21	'\f': '\\f',
22	'\n': '\\n',
23	'\r': '\\r',
24	'\t': '\\t',
25	}
26	for i in range(0x20):
27	ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28	#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
29
30	INFINITY = float('inf')
31	FLOAT_REPR = repr
32
33	def encode_basestring(s):
34	"""Return a JSON representation of a Python string
35
36	"""
37	def replace(match):
38	return ESCAPE_DCT[match.group(0)]
39	return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42	def py_encode_basestring_ascii(s):
43	"""Return an ASCII-only JSON representation of a Python string
44
45	"""
46	if isinstance(s, str) and HAS_UTF8.search(s) is not None:
47	s = s.decode('utf-8')
48	def replace(match):
49	s = match.group(0)
50	try:
51	return ESCAPE_DCT[s]
52	except KeyError:
53	n = ord(s)
54	if n < 0x10000:
55	return '\\u{0:04x}'.format(n)
56	#return '\\u%04x' % (n,)
57	else:
58	# surrogate pair
59	n -= 0x10000
60	s1 = 0xd800 \| ((n >> 10) & 0x3ff)
61	s2 = 0xdc00 \| (n & 0x3ff)
62	return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
63	#return '\\u%04x\\u%04x' % (s1, s2)
64	return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
65
66
67	encode_basestring_ascii = (
68	c_encode_basestring_ascii or py_encode_basestring_ascii)
69
70	class JSONEncoder(object):
71	"""Extensible JSON <http://json.org> encoder for Python data structures.
72
73	Supports the following objects and types by default:
74
75	+-------------------+---------------+
76	\| Python \| JSON \|
77	+===================+===============+
78	\| dict \| object \|
79	+-------------------+---------------+
80	\| list, tuple \| array \|
81	+-------------------+---------------+
82	\| str, unicode \| string \|
83	+-------------------+---------------+
84	\| int, long, float \| number \|
85	+-------------------+---------------+
86	\| True \| true \|
87	+-------------------+---------------+
88	\| False \| false \|
89	+-------------------+---------------+
90	\| None \| null \|
91	+-------------------+---------------+
92
93	To extend this to recognize other objects, subclass and implement a
94	``.default()`` method with another method that returns a serializable
95	object for ``o`` if possible, otherwise it should call the superclass
96	implementation (to raise ``TypeError``).
97
98	"""
99	item_separator = ', '
100	key_separator = ': '
101	def __init__(self, skipkeys=False, ensure_ascii=True,
102	check_circular=True, allow_nan=True, sort_keys=False,
103	indent=None, separators=None, encoding='utf-8', default=None):
104	"""Constructor for JSONEncoder, with sensible defaults.
105
106	If skipkeys is false, then it is a TypeError to attempt
107	encoding of keys that are not str, int, long, float or None. If
108	skipkeys is True, such items are simply skipped.
109
110	If ensure_ascii is true (the default), all non-ASCII
111	characters in the output are escaped with \uXXXX sequences,
112	and the results are str instances consisting of ASCII
113	characters only. If ensure_ascii is False, a result may be a
114	unicode instance. This usually happens if the input contains
115	unicode strings or the encoding parameter is used.
116
117	If check_circular is true, then lists, dicts, and custom encoded
118	objects will be checked for circular references during encoding to
119	prevent an infinite recursion (which would cause an OverflowError).
120	Otherwise, no such check takes place.
121
122	If allow_nan is true, then NaN, Infinity, and -Infinity will be
123	encoded as such. This behavior is not JSON specification compliant,
124	but is consistent with most JavaScript based encoders and decoders.
125	Otherwise, it will be a ValueError to encode such floats.
126
127	If sort_keys is true, then the output of dictionaries will be
128	sorted by key; this is useful for regression tests to ensure
129	that JSON serializations can be compared on a day-to-day basis.
130
131	If indent is a non-negative integer, then JSON array
132	elements and object members will be pretty-printed with that
133	indent level. An indent level of 0 will only insert newlines.
134	None is the most compact representation. Since the default
135	item separator is ', ', the output might include trailing
136	whitespace when indent is specified. You can use
137	separators=(',', ': ') to avoid this.
138
139	If specified, separators should be a (item_separator, key_separator)
140	tuple. The default is (', ', ': '). To get the most compact JSON
141	representation you should specify (',', ':') to eliminate whitespace.
142
143	If specified, default is a function that gets called for objects
144	that can't otherwise be serialized. It should return a JSON encodable
145	version of the object or raise a ``TypeError``.
146
147	If encoding is not None, then all input strings will be
148	transformed into unicode using that encoding prior to JSON-encoding.
149	The default is UTF-8.
150
151	"""
152
153	self.skipkeys = skipkeys
154	self.ensure_ascii = ensure_ascii
155	self.check_circular = check_circular
156	self.allow_nan = allow_nan
157	self.sort_keys = sort_keys
158	self.indent = indent
159	if separators is not None:
160	self.item_separator, self.key_separator = separators
161	if default is not None:
162	self.default = default
163	self.encoding = encoding
164
165	def default(self, o):
166	"""Implement this method in a subclass such that it returns
167	a serializable object for ``o``, or calls the base implementation
168	(to raise a ``TypeError``).
169
170	For example, to support arbitrary iterators, you could
171	implement default like this::
172
173	def default(self, o):
174	try:
175	iterable = iter(o)
176	except TypeError:
177	pass
178	else:
179	return list(iterable)
180	# Let the base class default method raise the TypeError
181	return JSONEncoder.default(self, o)
182
183	"""
184	raise TypeError(repr(o) + " is not JSON serializable")
185
186	def encode(self, o):
187	"""Return a JSON string representation of a Python data structure.
188
189	>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
190	'{"foo": ["bar", "baz"]}'
191
192	"""
193	# This is for extremely simple cases and benchmarks.
194	if isinstance(o, basestring):
195	if isinstance(o, str):
196	_encoding = self.encoding
197	if (_encoding is not None
198	and not (_encoding == 'utf-8')):
199	o = o.decode(_encoding)
200	if self.ensure_ascii:
201	return encode_basestring_ascii(o)
202	else:
203	return encode_basestring(o)
204	# This doesn't pass the iterator directly to ''.join() because the
205	# exceptions aren't as detailed. The list call should be roughly
206	# equivalent to the PySequence_Fast that ''.join() would do.
207	chunks = self.iterencode(o, _one_shot=True)
208	if not isinstance(chunks, (list, tuple)):
209	chunks = list(chunks)
210	return ''.join(chunks)
211
212	def iterencode(self, o, _one_shot=False):
213	"""Encode the given object and yield each string
214	representation as available.
215
216	For example::
217
218	for chunk in JSONEncoder().iterencode(bigobject):
219	mysocket.write(chunk)
220
221	"""
222	if self.check_circular:
223	markers = {}
224	else:
225	markers = None
226	if self.ensure_ascii:
227	_encoder = encode_basestring_ascii
228	else:
229	_encoder = encode_basestring
230	if self.encoding != 'utf-8':
231	def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
232	if isinstance(o, str):
233	o = o.decode(_encoding)
234	return _orig_encoder(o)
235
236	def floatstr(o, allow_nan=self.allow_nan,
237	_repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
238	# Check for specials. Note that this type of test is processor
239	# and/or platform-specific, so do tests which don't depend on the
240	# internals.
241
242	if o != o:
243	text = 'NaN'
244	elif o == _inf:
245	text = 'Infinity'
246	elif o == _neginf:
247	text = '-Infinity'
248	else:
249	return _repr(o)
250
251	if not allow_nan:
252	raise ValueError(
253	"Out of range float values are not JSON compliant: " +
254	repr(o))
255
256	return text
257
258
259	if (_one_shot and c_make_encoder is not None
260	and self.indent is None and not self.sort_keys):
261	_iterencode = c_make_encoder(
262	markers, self.default, _encoder, self.indent,
263	self.key_separator, self.item_separator, self.sort_keys,
264	self.skipkeys, self.allow_nan)
265	else:
266	_iterencode = _make_iterencode(
267	markers, self.default, _encoder, self.indent, floatstr,
268	self.key_separator, self.item_separator, self.sort_keys,
269	self.skipkeys, _one_shot)
270	return _iterencode(o, 0)
271
272	def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
273	_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
274	## HACK: hand-optimized bytecode; turn globals into locals
275	ValueError=ValueError,
276	basestring=basestring,
277	dict=dict,
278	float=float,
279	id=id,
280	int=int,
281	isinstance=isinstance,
282	list=list,
283	long=long,
284	str=str,
285	tuple=tuple,
286	):
287
288	def _iterencode_list(lst, _current_indent_level):
289	if not lst:
290	yield '[]'
291	return
292	if markers is not None:
293	markerid = id(lst)
294	if markerid in markers:
295	raise ValueError("Circular reference detected")
296	markers[markerid] = lst
297	buf = '['
298	if _indent is not None:
299	_current_indent_level += 1
300	newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
301	separator = _item_separator + newline_indent
302	buf += newline_indent
303	else:
304	newline_indent = None
305	separator = _item_separator
306	first = True
307	for value in lst:
308	if first:
309	first = False
310	else:
311	buf = separator
312	if isinstance(value, basestring):
313	yield buf + _encoder(value)
314	elif value is None:
315	yield buf + 'null'
316	elif value is True:
317	yield buf + 'true'
318	elif value is False:
319	yield buf + 'false'
320	elif isinstance(value, (int, long)):
321	yield buf + str(value)
322	elif isinstance(value, float):
323	yield buf + _floatstr(value)
324	else:
325	yield buf
326	if isinstance(value, (list, tuple)):
327	chunks = _iterencode_list(value, _current_indent_level)
328	elif isinstance(value, dict):
329	chunks = _iterencode_dict(value, _current_indent_level)
330	else:
331	chunks = _iterencode(value, _current_indent_level)
332	for chunk in chunks:
333	yield chunk
334	if newline_indent is not None:
335	_current_indent_level -= 1
336	yield '\n' + (' ' * (_indent * _current_indent_level))
337	yield ']'
338	if markers is not None:
339	del markers[markerid]
340
341	def _iterencode_dict(dct, _current_indent_level):
342	if not dct:
343	yield '{}'
344	return
345	if markers is not None:
346	markerid = id(dct)
347	if markerid in markers:
348	raise ValueError("Circular reference detected")
349	markers[markerid] = dct
350	yield '{'
351	if _indent is not None:
352	_current_indent_level += 1
353	newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
354	item_separator = _item_separator + newline_indent
355	yield newline_indent
356	else:
357	newline_indent = None
358	item_separator = _item_separator
359	first = True
360	if _sort_keys:
361	items = sorted(dct.items(), key=lambda kv: kv[0])
362	else:
363	items = dct.iteritems()
364	for key, value in items:
365	if isinstance(key, basestring):
366	pass
367	# JavaScript is weakly typed for these, so it makes sense to
368	# also allow them. Many encoders seem to do something like this.
369	elif isinstance(key, float):
370	key = _floatstr(key)
371	elif key is True:
372	key = 'true'
373	elif key is False:
374	key = 'false'
375	elif key is None:
376	key = 'null'
377	elif isinstance(key, (int, long)):
378	key = str(key)
379	elif _skipkeys:
380	continue
381	else:
382	raise TypeError("key " + repr(key) + " is not a string")
383	if first:
384	first = False
385	else:
386	yield item_separator
387	yield _encoder(key)
388	yield _key_separator
389	if isinstance(value, basestring):
390	yield _encoder(value)
391	elif value is None:
392	yield 'null'
393	elif value is True:
394	yield 'true'
395	elif value is False:
396	yield 'false'
397	elif isinstance(value, (int, long)):
398	yield str(value)
399	elif isinstance(value, float):
400	yield _floatstr(value)
401	else:
402	if isinstance(value, (list, tuple)):
403	chunks = _iterencode_list(value, _current_indent_level)
404	elif isinstance(value, dict):
405	chunks = _iterencode_dict(value, _current_indent_level)
406	else:
407	chunks = _iterencode(value, _current_indent_level)
408	for chunk in chunks:
409	yield chunk
410	if newline_indent is not None:
411	_current_indent_level -= 1
412	yield '\n' + (' ' * (_indent * _current_indent_level))
413	yield '}'
414	if markers is not None:
415	del markers[markerid]
416
417	def _iterencode(o, _current_indent_level):
418	if isinstance(o, basestring):
419	yield _encoder(o)
420	elif o is None:
421	yield 'null'
422	elif o is True:
423	yield 'true'
424	elif o is False:
425	yield 'false'
426	elif isinstance(o, (int, long)):
427	yield str(o)
428	elif isinstance(o, float):
429	yield _floatstr(o)
430	elif isinstance(o, (list, tuple)):
431	for chunk in _iterencode_list(o, _current_indent_level):
432	yield chunk
433	elif isinstance(o, dict):
434	for chunk in _iterencode_dict(o, _current_indent_level):
435	yield chunk
436	else:
437	if markers is not None:
438	markerid = id(o)
439	if markerid in markers:
440	raise ValueError("Circular reference detected")
441	markers[markerid] = o
442	o = _default(o)
443	for chunk in _iterencode(o, _current_indent_level):
444	yield chunk
445	if markers is not None:
446	del markers[markerid]
447
448	return _iterencode

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/json/encoder.py

Download in other formats: