Changeset 391 for python/trunk/Lib/json/decoder.py
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/json/decoder.py
r2 r391 1 1 """Implementation of JSONDecoder 2 2 """ 3 4 3 import re 5 4 import sys 6 7 from json.scanner import Scanner, pattern 5 import struct 6 7 from json import scanner 8 8 try: 9 9 from _json import scanstring as c_scanstring … … 15 15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 16 16 17 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf') 17 def _floatconstants(): 18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') 19 if sys.byteorder != 'big': 20 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] 21 nan, inf = struct.unpack('dd', _BYTES) 22 return nan, inf, -inf 23 24 NaN, PosInf, NegInf = _floatconstants() 18 25 19 26 … … 21 28 lineno = doc.count('\n', 0, pos) + 1 22 29 if lineno == 1: 23 colno = pos 30 colno = pos + 1 24 31 else: 25 32 colno = pos - doc.rindex('\n', 0, pos) … … 28 35 29 36 def errmsg(msg, doc, pos, end=None): 37 # Note that this function is called from _json 30 38 lineno, colno = linecol(doc, pos) 31 39 if end is None: 32 40 fmt = '{0}: line {1} column {2} (char {3})' 33 41 return fmt.format(msg, lineno, colno, pos) 42 #fmt = '%s: line %d column %d (char %d)' 43 #return fmt % (msg, lineno, colno, pos) 34 44 endlineno, endcolno = linecol(doc, end) 35 45 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 36 46 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 47 #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' 48 #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) 37 49 38 50 … … 41 53 'Infinity': PosInf, 42 54 'NaN': NaN, 43 'true': True,44 'false': False,45 'null': None,46 55 } 47 48 49 def JSONConstant(match, context, c=_CONSTANTS):50 s = match.group(0)51 fn = getattr(context, 'parse_constant', None)52 if fn is None:53 rval = c[s]54 else:55 rval = fn(s)56 return rval, None57 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)58 59 60 def JSONNumber(match, context):61 match = JSONNumber.regex.match(match.string, *match.span())62 integer, frac, exp = match.groups()63 if frac or exp:64 fn = getattr(context, 'parse_float', None) or float65 res = fn(integer + (frac or '') + (exp or ''))66 else:67 fn = getattr(context, 'parse_int', None) or int68 res = fn(integer)69 return res, None70 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)71 72 56 73 57 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) … … 79 63 DEFAULT_ENCODING = "utf-8" 80 64 81 82 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): 65 def py_scanstring(s, end, encoding=None, strict=True, 66 _b=BACKSLASH, _m=STRINGCHUNK.match): 67 """Scan the string s for a JSON string. End is the index of the 68 character in s after the quote that started the JSON string. 69 Unescapes all valid JSON string escape sequences and raises ValueError 70 on attempt to decode an invalid string. If strict is False then literal 71 control characters are allowed in the string. 72 73 Returns a tuple of the decoded string and the index of the character in s 74 after the end quote.""" 83 75 if encoding is None: 84 76 encoding = DEFAULT_ENCODING … … 93 85 end = chunk.end() 94 86 content, terminator = chunk.groups() 87 # Content is contains zero or more unescaped string characters 95 88 if content: 96 89 if not isinstance(content, unicode): 97 90 content = unicode(content, encoding) 98 91 _append(content) 92 # Terminator is the end of string, a literal control character, 93 # or a backslash denoting that an escape sequence follows 99 94 if terminator == '"': 100 95 break 101 96 elif terminator != '\\': 102 97 if strict: 98 #msg = "Invalid control character %r at" % (terminator,) 103 99 msg = "Invalid control character {0!r} at".format(terminator) 104 100 raise ValueError(errmsg(msg, s, end)) … … 111 107 raise ValueError( 112 108 errmsg("Unterminated string starting at", s, begin)) 109 # If not a unicode escape sequence, must be in the lookup table 113 110 if esc != 'u': 114 111 try: 115 m= _b[esc]112 char = _b[esc] 116 113 except KeyError: 117 msg = "Invalid \\escape: {0!r}".format(esc)114 msg = "Invalid \\escape: " + repr(esc) 118 115 raise ValueError(errmsg(msg, s, end)) 119 116 end += 1 120 117 else: 118 # Unicode escape sequence 121 119 esc = s[end + 1:end + 5] 122 120 next_end = end + 5 123 msg = "Invalid \\uXXXX escape" 124 try: 125 if len(esc) != 4: 126 raise ValueError 127 uni = int(esc, 16) 128 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: 129 msg = "Invalid \\uXXXX\\uXXXX surrogate pair" 130 if not s[end + 5:end + 7] == '\\u': 131 raise ValueError 132 esc2 = s[end + 7:end + 11] 133 if len(esc2) != 4: 134 raise ValueError 135 uni2 = int(esc2, 16) 136 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 137 next_end += 6 138 m = unichr(uni) 139 except ValueError: 121 if len(esc) != 4: 122 msg = "Invalid \\uXXXX escape" 140 123 raise ValueError(errmsg(msg, s, end)) 124 uni = int(esc, 16) 125 # Check for surrogate pair on UCS-4 systems 126 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: 127 msg = "Invalid \\uXXXX\\uXXXX surrogate pair" 128 if not s[end + 5:end + 7] == '\\u': 129 raise ValueError(errmsg(msg, s, end)) 130 esc2 = s[end + 7:end + 11] 131 if len(esc2) != 4: 132 raise ValueError(errmsg(msg, s, end)) 133 uni2 = int(esc2, 16) 134 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 135 next_end += 6 136 char = unichr(uni) 141 137 end = next_end 142 _append(m) 138 # Append the unescaped character 139 _append(char) 143 140 return u''.join(chunks), end 144 141 145 142 146 # Use speedup 147 if c_scanstring is not None: 148 scanstring = c_scanstring 149 else: 150 scanstring = py_scanstring 151 152 def JSONString(match, context): 153 encoding = getattr(context, 'encoding', None) 154 strict = getattr(context, 'strict', True) 155 return scanstring(match.string, match.end(), encoding, strict) 156 pattern(r'"')(JSONString) 157 158 159 WHITESPACE = re.compile(r'\s*', FLAGS) 160 161 162 def JSONObject(match, context, _w=WHITESPACE.match): 163 pairs = {} 164 s = match.string 165 end = _w(s, match.end()).end() 143 # Use speedup if available 144 scanstring = c_scanstring or py_scanstring 145 146 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 147 WHITESPACE_STR = ' \t\n\r' 148 149 def JSONObject(s_and_end, encoding, strict, scan_once, object_hook, 150 object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 151 s, end = s_and_end 152 pairs = [] 153 pairs_append = pairs.append 154 # Use a slice to prevent IndexError from being raised, the following 155 # check will raise a more specific ValueError if the string is empty 166 156 nextchar = s[end:end + 1] 167 # Trivial empty object 168 if nextchar == '}': 169 return pairs, end + 1 157 # Normally we expect nextchar == '"' 170 158 if nextchar != '"': 171 raise ValueError(errmsg("Expecting property name", s, end)) 159 if nextchar in _ws: 160 end = _w(s, end).end() 161 nextchar = s[end:end + 1] 162 # Trivial empty object 163 if nextchar == '}': 164 if object_pairs_hook is not None: 165 result = object_pairs_hook(pairs) 166 return result, end + 1 167 pairs = {} 168 if object_hook is not None: 169 pairs = object_hook(pairs) 170 return pairs, end + 1 171 elif nextchar != '"': 172 raise ValueError(errmsg( 173 "Expecting property name enclosed in double quotes", s, end)) 172 174 end += 1 173 encoding = getattr(context, 'encoding', None)174 strict = getattr(context, 'strict', True)175 iterscan = JSONScanner.iterscan176 175 while True: 177 176 key, end = scanstring(s, end, encoding, strict) 178 end = _w(s, end).end() 177 178 # To skip some function call overhead we optimize the fast paths where 179 # the JSON key separator is ": " or just ":". 179 180 if s[end:end + 1] != ':': 180 raise ValueError(errmsg("Expecting : delimiter", s, end)) 181 end = _w(s, end + 1).end() 182 try: 183 value, end = iterscan(s, idx=end, context=context).next() 181 end = _w(s, end).end() 182 if s[end:end + 1] != ':': 183 raise ValueError(errmsg("Expecting ':' delimiter", s, end)) 184 end += 1 185 186 try: 187 if s[end] in _ws: 188 end += 1 189 if s[end] in _ws: 190 end = _w(s, end + 1).end() 191 except IndexError: 192 pass 193 194 try: 195 value, end = scan_once(s, end) 184 196 except StopIteration: 185 197 raise ValueError(errmsg("Expecting object", s, end)) 186 pairs[key] = value 187 end = _w(s, end).end() 188 nextchar = s[end:end + 1] 198 pairs_append((key, value)) 199 200 try: 201 nextchar = s[end] 202 if nextchar in _ws: 203 end = _w(s, end + 1).end() 204 nextchar = s[end] 205 except IndexError: 206 nextchar = '' 189 207 end += 1 208 190 209 if nextchar == '}': 191 210 break 192 if nextchar != ',': 193 raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) 194 end = _w(s, end).end() 195 nextchar = s[end:end + 1] 211 elif nextchar != ',': 212 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1)) 213 214 try: 215 nextchar = s[end] 216 if nextchar in _ws: 217 end += 1 218 nextchar = s[end] 219 if nextchar in _ws: 220 end = _w(s, end + 1).end() 221 nextchar = s[end] 222 except IndexError: 223 nextchar = '' 224 196 225 end += 1 197 226 if nextchar != '"': 198 raise ValueError(errmsg("Expecting property name", s, end - 1)) 199 object_hook = getattr(context, 'object_hook', None) 227 raise ValueError(errmsg( 228 "Expecting property name enclosed in double quotes", s, end - 1)) 229 if object_pairs_hook is not None: 230 result = object_pairs_hook(pairs) 231 return result, end 232 pairs = dict(pairs) 200 233 if object_hook is not None: 201 234 pairs = object_hook(pairs) 202 235 return pairs, end 203 pattern(r'{')(JSONObject) 204 205 206 def JSONArray(match, context, _w=WHITESPACE.match): 236 237 def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 238 s, end = s_and_end 207 239 values = [] 208 s = match.string 209 end = _w(s, match.end()).end() 240 nextchar = s[end:end + 1] 241 if nextchar in _ws: 242 end = _w(s, end + 1).end() 243 nextchar = s[end:end + 1] 210 244 # Look-ahead for trivial empty array 211 nextchar = s[end:end + 1]212 245 if nextchar == ']': 213 246 return values, end + 1 214 iterscan = JSONScanner.iterscan247 _append = values.append 215 248 while True: 216 249 try: 217 value, end = iterscan(s, idx=end, context=context).next()250 value, end = scan_once(s, end) 218 251 except StopIteration: 219 252 raise ValueError(errmsg("Expecting object", s, end)) 220 values.append(value) 221 end = _w(s, end).end() 253 _append(value) 222 254 nextchar = s[end:end + 1] 255 if nextchar in _ws: 256 end = _w(s, end + 1).end() 257 nextchar = s[end:end + 1] 223 258 end += 1 224 259 if nextchar == ']': 225 260 break 226 if nextchar != ',': 227 raise ValueError(errmsg("Expecting , delimiter", s, end)) 228 end = _w(s, end).end() 261 elif nextchar != ',': 262 raise ValueError(errmsg("Expecting ',' delimiter", s, end)) 263 try: 264 if s[end] in _ws: 265 end += 1 266 if s[end] in _ws: 267 end = _w(s, end + 1).end() 268 except IndexError: 269 pass 270 229 271 return values, end 230 pattern(r'\[')(JSONArray)231 232 233 ANYTHING = [234 JSONObject,235 JSONArray,236 JSONString,237 JSONConstant,238 JSONNumber,239 ]240 241 JSONScanner = Scanner(ANYTHING)242 243 272 244 273 class JSONDecoder(object): … … 269 298 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 270 299 their corresponding ``float`` values, which is outside the JSON spec. 300 271 301 """ 272 302 273 _scanner = Scanner(ANYTHING)274 __all__ = ['__init__', 'decode', 'raw_decode']275 276 303 def __init__(self, encoding=None, object_hook=None, parse_float=None, 277 parse_int=None, parse_constant=None, strict=True): 304 parse_int=None, parse_constant=None, strict=True, 305 object_pairs_hook=None): 278 306 """``encoding`` determines the encoding used to interpret any ``str`` 279 307 objects decoded by this instance (utf-8 by default). It has no … … 283 311 strings of other encodings should be passed in as ``unicode``. 284 312 285 ``object_hook``, if specified, will be called with the result of286 every JSON object decoded and its return value will be used in313 ``object_hook``, if specified, will be called with the result 314 of every JSON object decoded and its return value will be used in 287 315 place of the given ``dict``. This can be used to provide custom 288 316 deserializations (e.g. to support JSON-RPC class hinting). 317 318 ``object_pairs_hook``, if specified will be called with the result of 319 every JSON object decoded with an ordered list of pairs. The return 320 value of ``object_pairs_hook`` will be used instead of the ``dict``. 321 This feature can be used to implement custom decoders that rely on the 322 order that the key and value pairs are decoded (for example, 323 collections.OrderedDict will remember the order of insertion). If 324 ``object_hook`` is also defined, the ``object_pairs_hook`` takes 325 priority. 289 326 290 327 ``parse_float``, if specified, will be called with the string … … 299 336 300 337 ``parse_constant``, if specified, will be called with one of the 301 following strings: -Infinity, Infinity, NaN , null, true, false.338 following strings: -Infinity, Infinity, NaN. 302 339 This can be used to raise an exception if invalid JSON numbers 303 340 are encountered. 341 342 If ``strict`` is false (true is the default), then control 343 characters will be allowed inside strings. Control characters in 344 this context are those with character codes in the 0-31 range, 345 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``. 304 346 305 347 """ 306 348 self.encoding = encoding 307 349 self.object_hook = object_hook 308 self.parse_float = parse_float 309 self.parse_int = parse_int 310 self.parse_constant = parse_constant 350 self.object_pairs_hook = object_pairs_hook 351 self.parse_float = parse_float or float 352 self.parse_int = parse_int or int 353 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 311 354 self.strict = strict 355 self.parse_object = JSONObject 356 self.parse_array = JSONArray 357 self.parse_string = scanstring 358 self.scan_once = scanner.make_scanner(self) 312 359 313 360 def decode(self, s, _w=WHITESPACE.match): 314 """ 315 Return the Python representation of ``s`` (a ``str`` or ``unicode`` 361 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 316 362 instance containing a JSON document) 317 363 … … 323 369 return obj 324 370 325 def raw_decode(self, s, **kw):326 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning327 with a JSON document) and return a 2-tuple of the Python371 def raw_decode(self, s, idx=0): 372 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 373 beginning with a JSON document) and return a 2-tuple of the Python 328 374 representation and the index in ``s`` where the document ended. 329 375 … … 332 378 333 379 """ 334 kw.setdefault('context', self) 335 try: 336 obj, end = self._scanner.iterscan(s, **kw).next() 380 try: 381 obj, end = self.scan_once(s, idx) 337 382 except StopIteration: 338 383 raise ValueError("No JSON object could be decoded")
Note:
See TracChangeset
for help on using the changeset viewer.