1 | """Implementation of JSONDecoder
|
---|
2 | """
|
---|
3 |
|
---|
4 | import re
|
---|
5 | import sys
|
---|
6 |
|
---|
7 | from json.scanner import Scanner, pattern
|
---|
8 | try:
|
---|
9 | from _json import scanstring as c_scanstring
|
---|
10 | except ImportError:
|
---|
11 | c_scanstring = None
|
---|
12 |
|
---|
13 | __all__ = ['JSONDecoder']
|
---|
14 |
|
---|
15 | FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
|
---|
16 |
|
---|
17 | NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
|
---|
18 |
|
---|
19 |
|
---|
20 | def linecol(doc, pos):
|
---|
21 | lineno = doc.count('\n', 0, pos) + 1
|
---|
22 | if lineno == 1:
|
---|
23 | colno = pos
|
---|
24 | else:
|
---|
25 | colno = pos - doc.rindex('\n', 0, pos)
|
---|
26 | return lineno, colno
|
---|
27 |
|
---|
28 |
|
---|
29 | def errmsg(msg, doc, pos, end=None):
|
---|
30 | lineno, colno = linecol(doc, pos)
|
---|
31 | if end is None:
|
---|
32 | fmt = '{0}: line {1} column {2} (char {3})'
|
---|
33 | return fmt.format(msg, lineno, colno, pos)
|
---|
34 | endlineno, endcolno = linecol(doc, end)
|
---|
35 | fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
|
---|
36 | return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
|
---|
37 |
|
---|
38 |
|
---|
39 | _CONSTANTS = {
|
---|
40 | '-Infinity': NegInf,
|
---|
41 | 'Infinity': PosInf,
|
---|
42 | 'NaN': NaN,
|
---|
43 | 'true': True,
|
---|
44 | 'false': False,
|
---|
45 | 'null': None,
|
---|
46 | }
|
---|
47 |
|
---|
48 |
|
---|
49 | def JSONConstant(match, context, c=_CONSTANTS):
|
---|
50 | s = match.group(0)
|
---|
51 | fn = getattr(context, 'parse_constant', None)
|
---|
52 | if fn is None:
|
---|
53 | rval = c[s]
|
---|
54 | else:
|
---|
55 | rval = fn(s)
|
---|
56 | return rval, None
|
---|
57 | pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
|
---|
58 |
|
---|
59 |
|
---|
60 | def JSONNumber(match, context):
|
---|
61 | match = JSONNumber.regex.match(match.string, *match.span())
|
---|
62 | integer, frac, exp = match.groups()
|
---|
63 | if frac or exp:
|
---|
64 | fn = getattr(context, 'parse_float', None) or float
|
---|
65 | res = fn(integer + (frac or '') + (exp or ''))
|
---|
66 | else:
|
---|
67 | fn = getattr(context, 'parse_int', None) or int
|
---|
68 | res = fn(integer)
|
---|
69 | return res, None
|
---|
70 | pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
|
---|
71 |
|
---|
72 |
|
---|
73 | STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
|
---|
74 | BACKSLASH = {
|
---|
75 | '"': u'"', '\\': u'\\', '/': u'/',
|
---|
76 | 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
|
---|
77 | }
|
---|
78 |
|
---|
79 | DEFAULT_ENCODING = "utf-8"
|
---|
80 |
|
---|
81 |
|
---|
82 | def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
|
---|
83 | if encoding is None:
|
---|
84 | encoding = DEFAULT_ENCODING
|
---|
85 | chunks = []
|
---|
86 | _append = chunks.append
|
---|
87 | begin = end - 1
|
---|
88 | while 1:
|
---|
89 | chunk = _m(s, end)
|
---|
90 | if chunk is None:
|
---|
91 | raise ValueError(
|
---|
92 | errmsg("Unterminated string starting at", s, begin))
|
---|
93 | end = chunk.end()
|
---|
94 | content, terminator = chunk.groups()
|
---|
95 | if content:
|
---|
96 | if not isinstance(content, unicode):
|
---|
97 | content = unicode(content, encoding)
|
---|
98 | _append(content)
|
---|
99 | if terminator == '"':
|
---|
100 | break
|
---|
101 | elif terminator != '\\':
|
---|
102 | if strict:
|
---|
103 | msg = "Invalid control character {0!r} at".format(terminator)
|
---|
104 | raise ValueError(errmsg(msg, s, end))
|
---|
105 | else:
|
---|
106 | _append(terminator)
|
---|
107 | continue
|
---|
108 | try:
|
---|
109 | esc = s[end]
|
---|
110 | except IndexError:
|
---|
111 | raise ValueError(
|
---|
112 | errmsg("Unterminated string starting at", s, begin))
|
---|
113 | if esc != 'u':
|
---|
114 | try:
|
---|
115 | m = _b[esc]
|
---|
116 | except KeyError:
|
---|
117 | msg = "Invalid \\escape: {0!r}".format(esc)
|
---|
118 | raise ValueError(errmsg(msg, s, end))
|
---|
119 | end += 1
|
---|
120 | else:
|
---|
121 | esc = s[end + 1:end + 5]
|
---|
122 | next_end = end + 5
|
---|
123 | msg = "Invalid \\uXXXX escape"
|
---|
124 | try:
|
---|
125 | if len(esc) != 4:
|
---|
126 | raise ValueError
|
---|
127 | uni = int(esc, 16)
|
---|
128 | if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
|
---|
129 | msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
|
---|
130 | if not s[end + 5:end + 7] == '\\u':
|
---|
131 | raise ValueError
|
---|
132 | esc2 = s[end + 7:end + 11]
|
---|
133 | if len(esc2) != 4:
|
---|
134 | raise ValueError
|
---|
135 | uni2 = int(esc2, 16)
|
---|
136 | uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
|
---|
137 | next_end += 6
|
---|
138 | m = unichr(uni)
|
---|
139 | except ValueError:
|
---|
140 | raise ValueError(errmsg(msg, s, end))
|
---|
141 | end = next_end
|
---|
142 | _append(m)
|
---|
143 | return u''.join(chunks), end
|
---|
144 |
|
---|
145 |
|
---|
146 | # Use speedup
|
---|
147 | if c_scanstring is not None:
|
---|
148 | scanstring = c_scanstring
|
---|
149 | else:
|
---|
150 | scanstring = py_scanstring
|
---|
151 |
|
---|
152 | def JSONString(match, context):
|
---|
153 | encoding = getattr(context, 'encoding', None)
|
---|
154 | strict = getattr(context, 'strict', True)
|
---|
155 | return scanstring(match.string, match.end(), encoding, strict)
|
---|
156 | pattern(r'"')(JSONString)
|
---|
157 |
|
---|
158 |
|
---|
159 | WHITESPACE = re.compile(r'\s*', FLAGS)
|
---|
160 |
|
---|
161 |
|
---|
162 | def JSONObject(match, context, _w=WHITESPACE.match):
|
---|
163 | pairs = {}
|
---|
164 | s = match.string
|
---|
165 | end = _w(s, match.end()).end()
|
---|
166 | nextchar = s[end:end + 1]
|
---|
167 | # Trivial empty object
|
---|
168 | if nextchar == '}':
|
---|
169 | return pairs, end + 1
|
---|
170 | if nextchar != '"':
|
---|
171 | raise ValueError(errmsg("Expecting property name", s, end))
|
---|
172 | end += 1
|
---|
173 | encoding = getattr(context, 'encoding', None)
|
---|
174 | strict = getattr(context, 'strict', True)
|
---|
175 | iterscan = JSONScanner.iterscan
|
---|
176 | while True:
|
---|
177 | key, end = scanstring(s, end, encoding, strict)
|
---|
178 | end = _w(s, end).end()
|
---|
179 | if s[end:end + 1] != ':':
|
---|
180 | raise ValueError(errmsg("Expecting : delimiter", s, end))
|
---|
181 | end = _w(s, end + 1).end()
|
---|
182 | try:
|
---|
183 | value, end = iterscan(s, idx=end, context=context).next()
|
---|
184 | except StopIteration:
|
---|
185 | raise ValueError(errmsg("Expecting object", s, end))
|
---|
186 | pairs[key] = value
|
---|
187 | end = _w(s, end).end()
|
---|
188 | nextchar = s[end:end + 1]
|
---|
189 | end += 1
|
---|
190 | if nextchar == '}':
|
---|
191 | break
|
---|
192 | if nextchar != ',':
|
---|
193 | raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
|
---|
194 | end = _w(s, end).end()
|
---|
195 | nextchar = s[end:end + 1]
|
---|
196 | end += 1
|
---|
197 | if nextchar != '"':
|
---|
198 | raise ValueError(errmsg("Expecting property name", s, end - 1))
|
---|
199 | object_hook = getattr(context, 'object_hook', None)
|
---|
200 | if object_hook is not None:
|
---|
201 | pairs = object_hook(pairs)
|
---|
202 | return pairs, end
|
---|
203 | pattern(r'{')(JSONObject)
|
---|
204 |
|
---|
205 |
|
---|
206 | def JSONArray(match, context, _w=WHITESPACE.match):
|
---|
207 | values = []
|
---|
208 | s = match.string
|
---|
209 | end = _w(s, match.end()).end()
|
---|
210 | # Look-ahead for trivial empty array
|
---|
211 | nextchar = s[end:end + 1]
|
---|
212 | if nextchar == ']':
|
---|
213 | return values, end + 1
|
---|
214 | iterscan = JSONScanner.iterscan
|
---|
215 | while True:
|
---|
216 | try:
|
---|
217 | value, end = iterscan(s, idx=end, context=context).next()
|
---|
218 | except StopIteration:
|
---|
219 | raise ValueError(errmsg("Expecting object", s, end))
|
---|
220 | values.append(value)
|
---|
221 | end = _w(s, end).end()
|
---|
222 | nextchar = s[end:end + 1]
|
---|
223 | end += 1
|
---|
224 | if nextchar == ']':
|
---|
225 | break
|
---|
226 | if nextchar != ',':
|
---|
227 | raise ValueError(errmsg("Expecting , delimiter", s, end))
|
---|
228 | end = _w(s, end).end()
|
---|
229 | return values, end
|
---|
230 | pattern(r'\[')(JSONArray)
|
---|
231 |
|
---|
232 |
|
---|
233 | ANYTHING = [
|
---|
234 | JSONObject,
|
---|
235 | JSONArray,
|
---|
236 | JSONString,
|
---|
237 | JSONConstant,
|
---|
238 | JSONNumber,
|
---|
239 | ]
|
---|
240 |
|
---|
241 | JSONScanner = Scanner(ANYTHING)
|
---|
242 |
|
---|
243 |
|
---|
244 | class JSONDecoder(object):
|
---|
245 | """Simple JSON <http://json.org> decoder
|
---|
246 |
|
---|
247 | Performs the following translations in decoding by default:
|
---|
248 |
|
---|
249 | +---------------+-------------------+
|
---|
250 | | JSON | Python |
|
---|
251 | +===============+===================+
|
---|
252 | | object | dict |
|
---|
253 | +---------------+-------------------+
|
---|
254 | | array | list |
|
---|
255 | +---------------+-------------------+
|
---|
256 | | string | unicode |
|
---|
257 | +---------------+-------------------+
|
---|
258 | | number (int) | int, long |
|
---|
259 | +---------------+-------------------+
|
---|
260 | | number (real) | float |
|
---|
261 | +---------------+-------------------+
|
---|
262 | | true | True |
|
---|
263 | +---------------+-------------------+
|
---|
264 | | false | False |
|
---|
265 | +---------------+-------------------+
|
---|
266 | | null | None |
|
---|
267 | +---------------+-------------------+
|
---|
268 |
|
---|
269 | It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
|
---|
270 | their corresponding ``float`` values, which is outside the JSON spec.
|
---|
271 | """
|
---|
272 |
|
---|
273 | _scanner = Scanner(ANYTHING)
|
---|
274 | __all__ = ['__init__', 'decode', 'raw_decode']
|
---|
275 |
|
---|
276 | def __init__(self, encoding=None, object_hook=None, parse_float=None,
|
---|
277 | parse_int=None, parse_constant=None, strict=True):
|
---|
278 | """``encoding`` determines the encoding used to interpret any ``str``
|
---|
279 | objects decoded by this instance (utf-8 by default). It has no
|
---|
280 | effect when decoding ``unicode`` objects.
|
---|
281 |
|
---|
282 | Note that currently only encodings that are a superset of ASCII work,
|
---|
283 | strings of other encodings should be passed in as ``unicode``.
|
---|
284 |
|
---|
285 | ``object_hook``, if specified, will be called with the result of
|
---|
286 | every JSON object decoded and its return value will be used in
|
---|
287 | place of the given ``dict``. This can be used to provide custom
|
---|
288 | deserializations (e.g. to support JSON-RPC class hinting).
|
---|
289 |
|
---|
290 | ``parse_float``, if specified, will be called with the string
|
---|
291 | of every JSON float to be decoded. By default this is equivalent to
|
---|
292 | float(num_str). This can be used to use another datatype or parser
|
---|
293 | for JSON floats (e.g. decimal.Decimal).
|
---|
294 |
|
---|
295 | ``parse_int``, if specified, will be called with the string
|
---|
296 | of every JSON int to be decoded. By default this is equivalent to
|
---|
297 | int(num_str). This can be used to use another datatype or parser
|
---|
298 | for JSON integers (e.g. float).
|
---|
299 |
|
---|
300 | ``parse_constant``, if specified, will be called with one of the
|
---|
301 | following strings: -Infinity, Infinity, NaN, null, true, false.
|
---|
302 | This can be used to raise an exception if invalid JSON numbers
|
---|
303 | are encountered.
|
---|
304 |
|
---|
305 | """
|
---|
306 | self.encoding = encoding
|
---|
307 | self.object_hook = object_hook
|
---|
308 | self.parse_float = parse_float
|
---|
309 | self.parse_int = parse_int
|
---|
310 | self.parse_constant = parse_constant
|
---|
311 | self.strict = strict
|
---|
312 |
|
---|
313 | def decode(self, s, _w=WHITESPACE.match):
|
---|
314 | """
|
---|
315 | Return the Python representation of ``s`` (a ``str`` or ``unicode``
|
---|
316 | instance containing a JSON document)
|
---|
317 |
|
---|
318 | """
|
---|
319 | obj, end = self.raw_decode(s, idx=_w(s, 0).end())
|
---|
320 | end = _w(s, end).end()
|
---|
321 | if end != len(s):
|
---|
322 | raise ValueError(errmsg("Extra data", s, end, len(s)))
|
---|
323 | return obj
|
---|
324 |
|
---|
325 | def raw_decode(self, s, **kw):
|
---|
326 | """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
|
---|
327 | with a JSON document) and return a 2-tuple of the Python
|
---|
328 | representation and the index in ``s`` where the document ended.
|
---|
329 |
|
---|
330 | This can be used to decode a JSON document from a string that may
|
---|
331 | have extraneous data at the end.
|
---|
332 |
|
---|
333 | """
|
---|
334 | kw.setdefault('context', self)
|
---|
335 | try:
|
---|
336 | obj, end = self._scanner.iterscan(s, **kw).next()
|
---|
337 | except StopIteration:
|
---|
338 | raise ValueError("No JSON object could be decoded")
|
---|
339 | return obj, end
|
---|