Context Navigation

decoder.py@ 393

Last change on this file since 393 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 13.6 KB

Line
1	"""Implementation of JSONDecoder
2	"""
3	import re
4	import sys
5	import struct
6
7	from json import scanner
8	try:
9	from _json import scanstring as c_scanstring
10	except ImportError:
11	c_scanstring = None
12
13	__all__ = ['JSONDecoder']
14
15	FLAGS = re.VERBOSE \| re.MULTILINE \| re.DOTALL
16
17	def _floatconstants():
18	_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19	if sys.byteorder != 'big':
20	_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21	nan, inf = struct.unpack('dd', _BYTES)
22	return nan, inf, -inf
23
24	NaN, PosInf, NegInf = _floatconstants()
25
26
27	def linecol(doc, pos):
28	lineno = doc.count('\n', 0, pos) + 1
29	if lineno == 1:
30	colno = pos + 1
31	else:
32	colno = pos - doc.rindex('\n', 0, pos)
33	return lineno, colno
34
35
36	def errmsg(msg, doc, pos, end=None):
37	# Note that this function is called from _json
38	lineno, colno = linecol(doc, pos)
39	if end is None:
40	fmt = '{0}: line {1} column {2} (char {3})'
41	return fmt.format(msg, lineno, colno, pos)
42	#fmt = '%s: line %d column %d (char %d)'
43	#return fmt % (msg, lineno, colno, pos)
44	endlineno, endcolno = linecol(doc, end)
45	fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
46	return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
47	#fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
48	#return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
49
50
51	_CONSTANTS = {
52	'-Infinity': NegInf,
53	'Infinity': PosInf,
54	'NaN': NaN,
55	}
56
57	STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
58	BACKSLASH = {
59	'"': u'"', '\\': u'\\', '/': u'/',
60	'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
61	}
62
63	DEFAULT_ENCODING = "utf-8"
64
65	def py_scanstring(s, end, encoding=None, strict=True,
66	_b=BACKSLASH, _m=STRINGCHUNK.match):
67	"""Scan the string s for a JSON string. End is the index of the
68	character in s after the quote that started the JSON string.
69	Unescapes all valid JSON string escape sequences and raises ValueError
70	on attempt to decode an invalid string. If strict is False then literal
71	control characters are allowed in the string.
72
73	Returns a tuple of the decoded string and the index of the character in s
74	after the end quote."""
75	if encoding is None:
76	encoding = DEFAULT_ENCODING
77	chunks = []
78	_append = chunks.append
79	begin = end - 1
80	while 1:
81	chunk = _m(s, end)
82	if chunk is None:
83	raise ValueError(
84	errmsg("Unterminated string starting at", s, begin))
85	end = chunk.end()
86	content, terminator = chunk.groups()
87	# Content is contains zero or more unescaped string characters
88	if content:
89	if not isinstance(content, unicode):
90	content = unicode(content, encoding)
91	_append(content)
92	# Terminator is the end of string, a literal control character,
93	# or a backslash denoting that an escape sequence follows
94	if terminator == '"':
95	break
96	elif terminator != '\\':
97	if strict:
98	#msg = "Invalid control character %r at" % (terminator,)
99	msg = "Invalid control character {0!r} at".format(terminator)
100	raise ValueError(errmsg(msg, s, end))
101	else:
102	_append(terminator)
103	continue
104	try:
105	esc = s[end]
106	except IndexError:
107	raise ValueError(
108	errmsg("Unterminated string starting at", s, begin))
109	# If not a unicode escape sequence, must be in the lookup table
110	if esc != 'u':
111	try:
112	char = _b[esc]
113	except KeyError:
114	msg = "Invalid \\escape: " + repr(esc)
115	raise ValueError(errmsg(msg, s, end))
116	end += 1
117	else:
118	# Unicode escape sequence
119	esc = s[end + 1:end + 5]
120	next_end = end + 5
121	if len(esc) != 4:
122	msg = "Invalid \\uXXXX escape"
123	raise ValueError(errmsg(msg, s, end))
124	uni = int(esc, 16)
125	# Check for surrogate pair on UCS-4 systems
126	if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
127	msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
128	if not s[end + 5:end + 7] == '\\u':
129	raise ValueError(errmsg(msg, s, end))
130	esc2 = s[end + 7:end + 11]
131	if len(esc2) != 4:
132	raise ValueError(errmsg(msg, s, end))
133	uni2 = int(esc2, 16)
134	uni = 0x10000 + (((uni - 0xd800) << 10) \| (uni2 - 0xdc00))
135	next_end += 6
136	char = unichr(uni)
137	end = next_end
138	# Append the unescaped character
139	_append(char)
140	return u''.join(chunks), end
141
142
143	# Use speedup if available
144	scanstring = c_scanstring or py_scanstring
145
146	WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
147	WHITESPACE_STR = ' \t\n\r'
148
149	def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
150	object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
151	s, end = s_and_end
152	pairs = []
153	pairs_append = pairs.append
154	# Use a slice to prevent IndexError from being raised, the following
155	# check will raise a more specific ValueError if the string is empty
156	nextchar = s[end:end + 1]
157	# Normally we expect nextchar == '"'
158	if nextchar != '"':
159	if nextchar in _ws:
160	end = _w(s, end).end()
161	nextchar = s[end:end + 1]
162	# Trivial empty object
163	if nextchar == '}':
164	if object_pairs_hook is not None:
165	result = object_pairs_hook(pairs)
166	return result, end + 1
167	pairs = {}
168	if object_hook is not None:
169	pairs = object_hook(pairs)
170	return pairs, end + 1
171	elif nextchar != '"':
172	raise ValueError(errmsg(
173	"Expecting property name enclosed in double quotes", s, end))
174	end += 1
175	while True:
176	key, end = scanstring(s, end, encoding, strict)
177
178	# To skip some function call overhead we optimize the fast paths where
179	# the JSON key separator is ": " or just ":".
180	if s[end:end + 1] != ':':
181	end = _w(s, end).end()
182	if s[end:end + 1] != ':':
183	raise ValueError(errmsg("Expecting ':' delimiter", s, end))
184	end += 1
185
186	try:
187	if s[end] in _ws:
188	end += 1
189	if s[end] in _ws:
190	end = _w(s, end + 1).end()
191	except IndexError:
192	pass
193
194	try:
195	value, end = scan_once(s, end)
196	except StopIteration:
197	raise ValueError(errmsg("Expecting object", s, end))
198	pairs_append((key, value))
199
200	try:
201	nextchar = s[end]
202	if nextchar in _ws:
203	end = _w(s, end + 1).end()
204	nextchar = s[end]
205	except IndexError:
206	nextchar = ''
207	end += 1
208
209	if nextchar == '}':
210	break
211	elif nextchar != ',':
212	raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
213
214	try:
215	nextchar = s[end]
216	if nextchar in _ws:
217	end += 1
218	nextchar = s[end]
219	if nextchar in _ws:
220	end = _w(s, end + 1).end()
221	nextchar = s[end]
222	except IndexError:
223	nextchar = ''
224
225	end += 1
226	if nextchar != '"':
227	raise ValueError(errmsg(
228	"Expecting property name enclosed in double quotes", s, end - 1))
229	if object_pairs_hook is not None:
230	result = object_pairs_hook(pairs)
231	return result, end
232	pairs = dict(pairs)
233	if object_hook is not None:
234	pairs = object_hook(pairs)
235	return pairs, end
236
237	def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
238	s, end = s_and_end
239	values = []
240	nextchar = s[end:end + 1]
241	if nextchar in _ws:
242	end = _w(s, end + 1).end()
243	nextchar = s[end:end + 1]
244	# Look-ahead for trivial empty array
245	if nextchar == ']':
246	return values, end + 1
247	_append = values.append
248	while True:
249	try:
250	value, end = scan_once(s, end)
251	except StopIteration:
252	raise ValueError(errmsg("Expecting object", s, end))
253	_append(value)
254	nextchar = s[end:end + 1]
255	if nextchar in _ws:
256	end = _w(s, end + 1).end()
257	nextchar = s[end:end + 1]
258	end += 1
259	if nextchar == ']':
260	break
261	elif nextchar != ',':
262	raise ValueError(errmsg("Expecting ',' delimiter", s, end))
263	try:
264	if s[end] in _ws:
265	end += 1
266	if s[end] in _ws:
267	end = _w(s, end + 1).end()
268	except IndexError:
269	pass
270
271	return values, end
272
273	class JSONDecoder(object):
274	"""Simple JSON <http://json.org> decoder
275
276	Performs the following translations in decoding by default:
277
278	+---------------+-------------------+
279	\| JSON \| Python \|
280	+===============+===================+
281	\| object \| dict \|
282	+---------------+-------------------+
283	\| array \| list \|
284	+---------------+-------------------+
285	\| string \| unicode \|
286	+---------------+-------------------+
287	\| number (int) \| int, long \|
288	+---------------+-------------------+
289	\| number (real) \| float \|
290	+---------------+-------------------+
291	\| true \| True \|
292	+---------------+-------------------+
293	\| false \| False \|
294	+---------------+-------------------+
295	\| null \| None \|
296	+---------------+-------------------+
297
298	It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
299	their corresponding ``float`` values, which is outside the JSON spec.
300
301	"""
302
303	def __init__(self, encoding=None, object_hook=None, parse_float=None,
304	parse_int=None, parse_constant=None, strict=True,
305	object_pairs_hook=None):
306	"""``encoding`` determines the encoding used to interpret any ``str``
307	objects decoded by this instance (utf-8 by default). It has no
308	effect when decoding ``unicode`` objects.
309
310	Note that currently only encodings that are a superset of ASCII work,
311	strings of other encodings should be passed in as ``unicode``.
312
313	``object_hook``, if specified, will be called with the result
314	of every JSON object decoded and its return value will be used in
315	place of the given ``dict``. This can be used to provide custom
316	deserializations (e.g. to support JSON-RPC class hinting).
317
318	``object_pairs_hook``, if specified will be called with the result of
319	every JSON object decoded with an ordered list of pairs. The return
320	value of ``object_pairs_hook`` will be used instead of the ``dict``.
321	This feature can be used to implement custom decoders that rely on the
322	order that the key and value pairs are decoded (for example,
323	collections.OrderedDict will remember the order of insertion). If
324	``object_hook`` is also defined, the ``object_pairs_hook`` takes
325	priority.
326
327	``parse_float``, if specified, will be called with the string
328	of every JSON float to be decoded. By default this is equivalent to
329	float(num_str). This can be used to use another datatype or parser
330	for JSON floats (e.g. decimal.Decimal).
331
332	``parse_int``, if specified, will be called with the string
333	of every JSON int to be decoded. By default this is equivalent to
334	int(num_str). This can be used to use another datatype or parser
335	for JSON integers (e.g. float).
336
337	``parse_constant``, if specified, will be called with one of the
338	following strings: -Infinity, Infinity, NaN.
339	This can be used to raise an exception if invalid JSON numbers
340	are encountered.
341
342	If ``strict`` is false (true is the default), then control
343	characters will be allowed inside strings. Control characters in
344	this context are those with character codes in the 0-31 range,
345	including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
346
347	"""
348	self.encoding = encoding
349	self.object_hook = object_hook
350	self.object_pairs_hook = object_pairs_hook
351	self.parse_float = parse_float or float
352	self.parse_int = parse_int or int
353	self.parse_constant = parse_constant or _CONSTANTS.__getitem__
354	self.strict = strict
355	self.parse_object = JSONObject
356	self.parse_array = JSONArray
357	self.parse_string = scanstring
358	self.scan_once = scanner.make_scanner(self)
359
360	def decode(self, s, _w=WHITESPACE.match):
361	"""Return the Python representation of ``s`` (a ``str`` or ``unicode``
362	instance containing a JSON document)
363
364	"""
365	obj, end = self.raw_decode(s, idx=_w(s, 0).end())
366	end = _w(s, end).end()
367	if end != len(s):
368	raise ValueError(errmsg("Extra data", s, end, len(s)))
369	return obj
370
371	def raw_decode(self, s, idx=0):
372	"""Decode a JSON document from ``s`` (a ``str`` or ``unicode``
373	beginning with a JSON document) and return a 2-tuple of the Python
374	representation and the index in ``s`` where the document ended.
375
376	This can be used to decode a JSON document from a string that may
377	have extraneous data at the end.
378
379	"""
380	try:
381	obj, end = self.scan_once(s, idx)
382	except StopIteration:
383	raise ValueError("No JSON object could be decoded")
384	return obj, end

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/json/decoder.py@ 393

Download in other formats: