Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

encoder.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 16.0 KB

Rev	Line
[2]	1	"""Implementation of JSONEncoder
	2	"""
	3	import re
	4
	5	try:
	6	from _json import encode_basestring_ascii as c_encode_basestring_ascii
	7	except ImportError:
	8	c_encode_basestring_ascii = None
[391]	9	try:
	10	from _json import make_encoder as c_make_encoder
	11	except ImportError:
	12	c_make_encoder = None
[2]	13
	14	ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
	15	ESCAPE_ASCII = re.compile(r'([\\"]\|[^\ -~])')
	16	HAS_UTF8 = re.compile(r'[\x80-\xff]')
	17	ESCAPE_DCT = {
	18	'\\': '\\\\',
	19	'"': '\\"',
	20	'\b': '\\b',
	21	'\f': '\\f',
	22	'\n': '\\n',
	23	'\r': '\\r',
	24	'\t': '\\t',
	25	}
	26	for i in range(0x20):
	27	ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
[391]	28	#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
[2]	29
[391]	30	INFINITY = float('inf')
[2]	31	FLOAT_REPR = repr
	32
	33	def encode_basestring(s):
	34	"""Return a JSON representation of a Python string
	35
	36	"""
	37	def replace(match):
	38	return ESCAPE_DCT[match.group(0)]
	39	return '"' + ESCAPE.sub(replace, s) + '"'
	40
	41
	42	def py_encode_basestring_ascii(s):
[391]	43	"""Return an ASCII-only JSON representation of a Python string
	44
	45	"""
[2]	46	if isinstance(s, str) and HAS_UTF8.search(s) is not None:
	47	s = s.decode('utf-8')
	48	def replace(match):
	49	s = match.group(0)
	50	try:
	51	return ESCAPE_DCT[s]
	52	except KeyError:
	53	n = ord(s)
	54	if n < 0x10000:
	55	return '\\u{0:04x}'.format(n)
[391]	56	#return '\\u%04x' % (n,)
[2]	57	else:
	58	# surrogate pair
	59	n -= 0x10000
	60	s1 = 0xd800 \| ((n >> 10) & 0x3ff)
	61	s2 = 0xdc00 \| (n & 0x3ff)
	62	return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
[391]	63	#return '\\u%04x\\u%04x' % (s1, s2)
[2]	64	return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
	65
	66
[391]	67	encode_basestring_ascii = (
	68	c_encode_basestring_ascii or py_encode_basestring_ascii)
[2]	69
	70	class JSONEncoder(object):
	71	"""Extensible JSON <http://json.org> encoder for Python data structures.
	72
	73	Supports the following objects and types by default:
	74
	75	+-------------------+---------------+
	76	\| Python \| JSON \|
	77	+===================+===============+
	78	\| dict \| object \|
	79	+-------------------+---------------+
	80	\| list, tuple \| array \|
	81	+-------------------+---------------+
	82	\| str, unicode \| string \|
	83	+-------------------+---------------+
	84	\| int, long, float \| number \|
	85	+-------------------+---------------+
	86	\| True \| true \|
	87	+-------------------+---------------+
	88	\| False \| false \|
	89	+-------------------+---------------+
	90	\| None \| null \|
	91	+-------------------+---------------+
	92
	93	To extend this to recognize other objects, subclass and implement a
	94	``.default()`` method with another method that returns a serializable
	95	object for ``o`` if possible, otherwise it should call the superclass
	96	implementation (to raise ``TypeError``).
	97
	98	"""
	99	item_separator = ', '
	100	key_separator = ': '
	101	def __init__(self, skipkeys=False, ensure_ascii=True,
	102	check_circular=True, allow_nan=True, sort_keys=False,
	103	indent=None, separators=None, encoding='utf-8', default=None):
	104	"""Constructor for JSONEncoder, with sensible defaults.
	105
[391]	106	If skipkeys is false, then it is a TypeError to attempt
[2]	107	encoding of keys that are not str, int, long, float or None. If
	108	skipkeys is True, such items are simply skipped.
	109
[391]	110	If ensure_ascii is true (the default), all non-ASCII
	111	characters in the output are escaped with \uXXXX sequences,
	112	and the results are str instances consisting of ASCII
	113	characters only. If ensure_ascii is False, a result may be a
	114	unicode instance. This usually happens if the input contains
	115	unicode strings or the encoding parameter is used.
[2]	116
[391]	117	If check_circular is true, then lists, dicts, and custom encoded
[2]	118	objects will be checked for circular references during encoding to
	119	prevent an infinite recursion (which would cause an OverflowError).
	120	Otherwise, no such check takes place.
	121
[391]	122	If allow_nan is true, then NaN, Infinity, and -Infinity will be
[2]	123	encoded as such. This behavior is not JSON specification compliant,
	124	but is consistent with most JavaScript based encoders and decoders.
	125	Otherwise, it will be a ValueError to encode such floats.
	126
[391]	127	If sort_keys is true, then the output of dictionaries will be
[2]	128	sorted by key; this is useful for regression tests to ensure
	129	that JSON serializations can be compared on a day-to-day basis.
	130
	131	If indent is a non-negative integer, then JSON array
	132	elements and object members will be pretty-printed with that
	133	indent level. An indent level of 0 will only insert newlines.
[391]	134	None is the most compact representation. Since the default
	135	item separator is ', ', the output might include trailing
	136	whitespace when indent is specified. You can use
	137	separators=(',', ': ') to avoid this.
[2]	138
	139	If specified, separators should be a (item_separator, key_separator)
	140	tuple. The default is (', ', ': '). To get the most compact JSON
	141	representation you should specify (',', ':') to eliminate whitespace.
	142
	143	If specified, default is a function that gets called for objects
	144	that can't otherwise be serialized. It should return a JSON encodable
	145	version of the object or raise a ``TypeError``.
	146
	147	If encoding is not None, then all input strings will be
	148	transformed into unicode using that encoding prior to JSON-encoding.
	149	The default is UTF-8.
	150
	151	"""
[391]	152
[2]	153	self.skipkeys = skipkeys
	154	self.ensure_ascii = ensure_ascii
	155	self.check_circular = check_circular
	156	self.allow_nan = allow_nan
	157	self.sort_keys = sort_keys
	158	self.indent = indent
	159	if separators is not None:
	160	self.item_separator, self.key_separator = separators
	161	if default is not None:
	162	self.default = default
	163	self.encoding = encoding
	164
[391]	165	def default(self, o):
	166	"""Implement this method in a subclass such that it returns
	167	a serializable object for ``o``, or calls the base implementation
	168	(to raise a ``TypeError``).
[2]	169
[391]	170	For example, to support arbitrary iterators, you could
	171	implement default like this::
	172
	173	def default(self, o):
	174	try:
	175	iterable = iter(o)
	176	except TypeError:
	177	pass
	178	else:
	179	return list(iterable)
	180	# Let the base class default method raise the TypeError
	181	return JSONEncoder.default(self, o)
	182
	183	"""
	184	raise TypeError(repr(o) + " is not JSON serializable")
	185
	186	def encode(self, o):
	187	"""Return a JSON string representation of a Python data structure.
	188
	189	>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
	190	'{"foo": ["bar", "baz"]}'
	191
	192	"""
	193	# This is for extremely simple cases and benchmarks.
	194	if isinstance(o, basestring):
	195	if isinstance(o, str):
	196	_encoding = self.encoding
	197	if (_encoding is not None
	198	and not (_encoding == 'utf-8')):
	199	o = o.decode(_encoding)
	200	if self.ensure_ascii:
	201	return encode_basestring_ascii(o)
	202	else:
	203	return encode_basestring(o)
	204	# This doesn't pass the iterator directly to ''.join() because the
	205	# exceptions aren't as detailed. The list call should be roughly
	206	# equivalent to the PySequence_Fast that ''.join() would do.
	207	chunks = self.iterencode(o, _one_shot=True)
	208	if not isinstance(chunks, (list, tuple)):
	209	chunks = list(chunks)
	210	return ''.join(chunks)
	211
	212	def iterencode(self, o, _one_shot=False):
	213	"""Encode the given object and yield each string
	214	representation as available.
	215
	216	For example::
	217
	218	for chunk in JSONEncoder().iterencode(bigobject):
	219	mysocket.write(chunk)
	220
	221	"""
	222	if self.check_circular:
	223	markers = {}
	224	else:
	225	markers = None
	226	if self.ensure_ascii:
	227	_encoder = encode_basestring_ascii
	228	else:
	229	_encoder = encode_basestring
	230	if self.encoding != 'utf-8':
	231	def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
	232	if isinstance(o, str):
	233	o = o.decode(_encoding)
	234	return _orig_encoder(o)
	235
	236	def floatstr(o, allow_nan=self.allow_nan,
	237	_repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
	238	# Check for specials. Note that this type of test is processor
	239	# and/or platform-specific, so do tests which don't depend on the
	240	# internals.
	241
	242	if o != o:
	243	text = 'NaN'
	244	elif o == _inf:
	245	text = 'Infinity'
	246	elif o == _neginf:
	247	text = '-Infinity'
	248	else:
	249	return _repr(o)
	250
	251	if not allow_nan:
	252	raise ValueError(
	253	"Out of range float values are not JSON compliant: " +
	254	repr(o))
	255
	256	return text
	257
	258
	259	if (_one_shot and c_make_encoder is not None
	260	and self.indent is None and not self.sort_keys):
	261	_iterencode = c_make_encoder(
	262	markers, self.default, _encoder, self.indent,
	263	self.key_separator, self.item_separator, self.sort_keys,
	264	self.skipkeys, self.allow_nan)
	265	else:
	266	_iterencode = _make_iterencode(
	267	markers, self.default, _encoder, self.indent, floatstr,
	268	self.key_separator, self.item_separator, self.sort_keys,
	269	self.skipkeys, _one_shot)
	270	return _iterencode(o, 0)
	271
	272	def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
	273	_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
	274	## HACK: hand-optimized bytecode; turn globals into locals
	275	ValueError=ValueError,
	276	basestring=basestring,
	277	dict=dict,
	278	float=float,
	279	id=id,
	280	int=int,
	281	isinstance=isinstance,
	282	list=list,
	283	long=long,
	284	str=str,
	285	tuple=tuple,
	286	):
	287
	288	def _iterencode_list(lst, _current_indent_level):
[2]	289	if not lst:
	290	yield '[]'
	291	return
	292	if markers is not None:
	293	markerid = id(lst)
	294	if markerid in markers:
	295	raise ValueError("Circular reference detected")
	296	markers[markerid] = lst
[391]	297	buf = '['
	298	if _indent is not None:
	299	_current_indent_level += 1
	300	newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
	301	separator = _item_separator + newline_indent
	302	buf += newline_indent
[2]	303	else:
	304	newline_indent = None
[391]	305	separator = _item_separator
[2]	306	first = True
	307	for value in lst:
	308	if first:
	309	first = False
	310	else:
[391]	311	buf = separator
	312	if isinstance(value, basestring):
	313	yield buf + _encoder(value)
	314	elif value is None:
	315	yield buf + 'null'
	316	elif value is True:
	317	yield buf + 'true'
	318	elif value is False:
	319	yield buf + 'false'
	320	elif isinstance(value, (int, long)):
	321	yield buf + str(value)
	322	elif isinstance(value, float):
	323	yield buf + _floatstr(value)
	324	else:
	325	yield buf
	326	if isinstance(value, (list, tuple)):
	327	chunks = _iterencode_list(value, _current_indent_level)
	328	elif isinstance(value, dict):
	329	chunks = _iterencode_dict(value, _current_indent_level)
	330	else:
	331	chunks = _iterencode(value, _current_indent_level)
	332	for chunk in chunks:
	333	yield chunk
[2]	334	if newline_indent is not None:
[391]	335	_current_indent_level -= 1
	336	yield '\n' + (' ' * (_indent * _current_indent_level))
[2]	337	yield ']'
	338	if markers is not None:
	339	del markers[markerid]
	340
[391]	341	def _iterencode_dict(dct, _current_indent_level):
[2]	342	if not dct:
	343	yield '{}'
	344	return
	345	if markers is not None:
	346	markerid = id(dct)
	347	if markerid in markers:
	348	raise ValueError("Circular reference detected")
	349	markers[markerid] = dct
	350	yield '{'
[391]	351	if _indent is not None:
	352	_current_indent_level += 1
	353	newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
	354	item_separator = _item_separator + newline_indent
[2]	355	yield newline_indent
	356	else:
	357	newline_indent = None
[391]	358	item_separator = _item_separator
[2]	359	first = True
[391]	360	if _sort_keys:
	361	items = sorted(dct.items(), key=lambda kv: kv[0])
[2]	362	else:
	363	items = dct.iteritems()
	364	for key, value in items:
[391]	365	if isinstance(key, basestring):
[2]	366	pass
	367	# JavaScript is weakly typed for these, so it makes sense to
	368	# also allow them. Many encoders seem to do something like this.
	369	elif isinstance(key, float):
[391]	370	key = _floatstr(key)
[2]	371	elif key is True:
	372	key = 'true'
	373	elif key is False:
	374	key = 'false'
	375	elif key is None:
	376	key = 'null'
[391]	377	elif isinstance(key, (int, long)):
	378	key = str(key)
	379	elif _skipkeys:
[2]	380	continue
	381	else:
[391]	382	raise TypeError("key " + repr(key) + " is not a string")
[2]	383	if first:
	384	first = False
	385	else:
	386	yield item_separator
[391]	387	yield _encoder(key)
	388	yield _key_separator
	389	if isinstance(value, basestring):
	390	yield _encoder(value)
	391	elif value is None:
	392	yield 'null'
	393	elif value is True:
	394	yield 'true'
	395	elif value is False:
	396	yield 'false'
	397	elif isinstance(value, (int, long)):
	398	yield str(value)
	399	elif isinstance(value, float):
	400	yield _floatstr(value)
	401	else:
	402	if isinstance(value, (list, tuple)):
	403	chunks = _iterencode_list(value, _current_indent_level)
	404	elif isinstance(value, dict):
	405	chunks = _iterencode_dict(value, _current_indent_level)
	406	else:
	407	chunks = _iterencode(value, _current_indent_level)
	408	for chunk in chunks:
	409	yield chunk
[2]	410	if newline_indent is not None:
[391]	411	_current_indent_level -= 1
	412	yield '\n' + (' ' * (_indent * _current_indent_level))
[2]	413	yield '}'
	414	if markers is not None:
	415	del markers[markerid]
	416
[391]	417	def _iterencode(o, _current_indent_level):
[2]	418	if isinstance(o, basestring):
[391]	419	yield _encoder(o)
[2]	420	elif o is None:
	421	yield 'null'
	422	elif o is True:
	423	yield 'true'
	424	elif o is False:
	425	yield 'false'
	426	elif isinstance(o, (int, long)):
	427	yield str(o)
	428	elif isinstance(o, float):
[391]	429	yield _floatstr(o)
[2]	430	elif isinstance(o, (list, tuple)):
[391]	431	for chunk in _iterencode_list(o, _current_indent_level):
[2]	432	yield chunk
	433	elif isinstance(o, dict):
[391]	434	for chunk in _iterencode_dict(o, _current_indent_level):
[2]	435	yield chunk
	436	else:
	437	if markers is not None:
	438	markerid = id(o)
	439	if markerid in markers:
	440	raise ValueError("Circular reference detected")
	441	markers[markerid] = o
[391]	442	o = _default(o)
	443	for chunk in _iterencode(o, _current_indent_level):
[2]	444	yield chunk
	445	if markers is not None:
	446	del markers[markerid]
	447
[391]	448	return _iterencode

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/json/encoder.py

Download in other formats: