Changeset 391 for python/trunk/Lib/json/encoder.py
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/json/encoder.py
r2 r391 1 1 """Implementation of JSONEncoder 2 2 """ 3 4 3 import re 5 import math6 4 7 5 try: … … 9 7 except ImportError: 10 8 c_encode_basestring_ascii = None 11 12 __all__ = ['JSONEncoder'] 9 try: 10 from _json import make_encoder as c_make_encoder 11 except ImportError: 12 c_make_encoder = None 13 13 14 14 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') … … 26 26 for i in range(0x20): 27 27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 28 28 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 29 30 INFINITY = float('inf') 29 31 FLOAT_REPR = repr 30 31 def floatstr(o, allow_nan=True):32 # Check for specials. Note that this type of test is processor- and/or33 # platform-specific, so do tests which don't depend on the internals.34 35 if math.isnan(o):36 text = 'NaN'37 elif math.isinf(o):38 if math.copysign(1., o) == 1.:39 text = 'Infinity'40 else:41 text = '-Infinity'42 else:43 return FLOAT_REPR(o)44 45 if not allow_nan:46 msg = "Out of range float values are not JSON compliant: " + repr(o)47 raise ValueError(msg)48 49 return text50 51 32 52 33 def encode_basestring(s): … … 60 41 61 42 def py_encode_basestring_ascii(s): 43 """Return an ASCII-only JSON representation of a Python string 44 45 """ 62 46 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 63 47 s = s.decode('utf-8') … … 70 54 if n < 0x10000: 71 55 return '\\u{0:04x}'.format(n) 56 #return '\\u%04x' % (n,) 72 57 else: 73 58 # surrogate pair … … 76 61 s2 = 0xdc00 | (n & 0x3ff) 77 62 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) 63 #return '\\u%04x\\u%04x' % (s1, s2) 78 64 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' 79 65 80 66 81 if c_encode_basestring_ascii is not None: 82 encode_basestring_ascii = c_encode_basestring_ascii 83 else: 84 encode_basestring_ascii = py_encode_basestring_ascii 85 67 encode_basestring_ascii = ( 68 c_encode_basestring_ascii or py_encode_basestring_ascii) 86 69 87 70 class JSONEncoder(object): … … 114 97 115 98 """ 116 __all__ = ['__init__', 'default', 'encode', 'iterencode']117 99 item_separator = ', ' 118 100 key_separator = ': ' … … 122 104 """Constructor for JSONEncoder, with sensible defaults. 123 105 124 If skipkeys is False, then it is a TypeError to attempt106 If skipkeys is false, then it is a TypeError to attempt 125 107 encoding of keys that are not str, int, long, float or None. If 126 108 skipkeys is True, such items are simply skipped. 127 109 128 If ensure_ascii is True, the output is guaranteed to be str 129 objects with all incoming unicode characters escaped. If 130 ensure_ascii is false, the output will be unicode object. 131 132 If check_circular is True, then lists, dicts, and custom encoded 110 If *ensure_ascii* is true (the default), all non-ASCII 111 characters in the output are escaped with \uXXXX sequences, 112 and the results are str instances consisting of ASCII 113 characters only. If ensure_ascii is False, a result may be a 114 unicode instance. This usually happens if the input contains 115 unicode strings or the *encoding* parameter is used. 116 117 If check_circular is true, then lists, dicts, and custom encoded 133 118 objects will be checked for circular references during encoding to 134 119 prevent an infinite recursion (which would cause an OverflowError). 135 120 Otherwise, no such check takes place. 136 121 137 If allow_nan is True, then NaN, Infinity, and -Infinity will be122 If allow_nan is true, then NaN, Infinity, and -Infinity will be 138 123 encoded as such. This behavior is not JSON specification compliant, 139 124 but is consistent with most JavaScript based encoders and decoders. 140 125 Otherwise, it will be a ValueError to encode such floats. 141 126 142 If sort_keys is True, then the output of dictionaries will be127 If sort_keys is true, then the output of dictionaries will be 143 128 sorted by key; this is useful for regression tests to ensure 144 129 that JSON serializations can be compared on a day-to-day basis. … … 147 132 elements and object members will be pretty-printed with that 148 133 indent level. An indent level of 0 will only insert newlines. 149 None is the most compact representation. 134 None is the most compact representation. Since the default 135 item separator is ', ', the output might include trailing 136 whitespace when indent is specified. You can use 137 separators=(',', ': ') to avoid this. 150 138 151 139 If specified, separators should be a (item_separator, key_separator) … … 162 150 163 151 """ 152 164 153 self.skipkeys = skipkeys 165 154 self.ensure_ascii = ensure_ascii … … 168 157 self.sort_keys = sort_keys 169 158 self.indent = indent 170 self.current_indent_level = 0171 159 if separators is not None: 172 160 self.item_separator, self.key_separator = separators … … 175 163 self.encoding = encoding 176 164 177 def _newline_indent(self):178 return '\n' + (' ' * (self.indent * self.current_indent_level))179 180 def _iterencode_list(self, lst, markers=None):181 if not lst:182 yield '[]'183 return184 if markers is not None:185 markerid = id(lst)186 if markerid in markers:187 raise ValueError("Circular reference detected")188 markers[markerid] = lst189 yield '['190 if self.indent is not None:191 self.current_indent_level += 1192 newline_indent = self._newline_indent()193 separator = self.item_separator + newline_indent194 yield newline_indent195 else:196 newline_indent = None197 separator = self.item_separator198 first = True199 for value in lst:200 if first:201 first = False202 else:203 yield separator204 for chunk in self._iterencode(value, markers):205 yield chunk206 if newline_indent is not None:207 self.current_indent_level -= 1208 yield self._newline_indent()209 yield ']'210 if markers is not None:211 del markers[markerid]212 213 def _iterencode_dict(self, dct, markers=None):214 if not dct:215 yield '{}'216 return217 if markers is not None:218 markerid = id(dct)219 if markerid in markers:220 raise ValueError("Circular reference detected")221 markers[markerid] = dct222 yield '{'223 key_separator = self.key_separator224 if self.indent is not None:225 self.current_indent_level += 1226 newline_indent = self._newline_indent()227 item_separator = self.item_separator + newline_indent228 yield newline_indent229 else:230 newline_indent = None231 item_separator = self.item_separator232 first = True233 if self.ensure_ascii:234 encoder = encode_basestring_ascii235 else:236 encoder = encode_basestring237 allow_nan = self.allow_nan238 if self.sort_keys:239 keys = dct.keys()240 keys.sort()241 items = [(k, dct[k]) for k in keys]242 else:243 items = dct.iteritems()244 _encoding = self.encoding245 _do_decode = (_encoding is not None246 and not (_encoding == 'utf-8'))247 for key, value in items:248 if isinstance(key, str):249 if _do_decode:250 key = key.decode(_encoding)251 elif isinstance(key, basestring):252 pass253 # JavaScript is weakly typed for these, so it makes sense to254 # also allow them. Many encoders seem to do something like this.255 elif isinstance(key, float):256 key = floatstr(key, allow_nan)257 elif isinstance(key, (int, long)):258 key = str(key)259 elif key is True:260 key = 'true'261 elif key is False:262 key = 'false'263 elif key is None:264 key = 'null'265 elif self.skipkeys:266 continue267 else:268 raise TypeError("key {0!r} is not a string".format(key))269 if first:270 first = False271 else:272 yield item_separator273 yield encoder(key)274 yield key_separator275 for chunk in self._iterencode(value, markers):276 yield chunk277 if newline_indent is not None:278 self.current_indent_level -= 1279 yield self._newline_indent()280 yield '}'281 if markers is not None:282 del markers[markerid]283 284 def _iterencode(self, o, markers=None):285 if isinstance(o, basestring):286 if self.ensure_ascii:287 encoder = encode_basestring_ascii288 else:289 encoder = encode_basestring290 _encoding = self.encoding291 if (_encoding is not None and isinstance(o, str)292 and not (_encoding == 'utf-8')):293 o = o.decode(_encoding)294 yield encoder(o)295 elif o is None:296 yield 'null'297 elif o is True:298 yield 'true'299 elif o is False:300 yield 'false'301 elif isinstance(o, (int, long)):302 yield str(o)303 elif isinstance(o, float):304 yield floatstr(o, self.allow_nan)305 elif isinstance(o, (list, tuple)):306 for chunk in self._iterencode_list(o, markers):307 yield chunk308 elif isinstance(o, dict):309 for chunk in self._iterencode_dict(o, markers):310 yield chunk311 else:312 if markers is not None:313 markerid = id(o)314 if markerid in markers:315 raise ValueError("Circular reference detected")316 markers[markerid] = o317 for chunk in self._iterencode_default(o, markers):318 yield chunk319 if markers is not None:320 del markers[markerid]321 322 def _iterencode_default(self, o, markers=None):323 newobj = self.default(o)324 return self._iterencode(newobj, markers)325 326 165 def default(self, o): 327 """Implement this method in a subclass such that it returns a serializable328 object for ``o``, or calls the base implementation (to raise a329 ``TypeError``).330 331 For example, to support arbitrary iterators, you could implement332 default like this::166 """Implement this method in a subclass such that it returns 167 a serializable object for ``o``, or calls the base implementation 168 (to raise a ``TypeError``). 169 170 For example, to support arbitrary iterators, you could 171 implement default like this:: 333 172 334 173 def default(self, o): … … 339 178 else: 340 179 return list(iterable) 180 # Let the base class default method raise the TypeError 341 181 return JSONEncoder.default(self, o) 342 182 … … 365 205 # exceptions aren't as detailed. The list call should be roughly 366 206 # equivalent to the PySequence_Fast that ''.join() would do. 367 chunks = list(self.iterencode(o)) 207 chunks = self.iterencode(o, _one_shot=True) 208 if not isinstance(chunks, (list, tuple)): 209 chunks = list(chunks) 368 210 return ''.join(chunks) 369 211 370 def iterencode(self, o ):371 """Encode the given object and yield each string representation as372 available.212 def iterencode(self, o, _one_shot=False): 213 """Encode the given object and yield each string 214 representation as available. 373 215 374 216 For example:: … … 382 224 else: 383 225 markers = None 384 return self._iterencode(o, markers) 226 if self.ensure_ascii: 227 _encoder = encode_basestring_ascii 228 else: 229 _encoder = encode_basestring 230 if self.encoding != 'utf-8': 231 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): 232 if isinstance(o, str): 233 o = o.decode(_encoding) 234 return _orig_encoder(o) 235 236 def floatstr(o, allow_nan=self.allow_nan, 237 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): 238 # Check for specials. Note that this type of test is processor 239 # and/or platform-specific, so do tests which don't depend on the 240 # internals. 241 242 if o != o: 243 text = 'NaN' 244 elif o == _inf: 245 text = 'Infinity' 246 elif o == _neginf: 247 text = '-Infinity' 248 else: 249 return _repr(o) 250 251 if not allow_nan: 252 raise ValueError( 253 "Out of range float values are not JSON compliant: " + 254 repr(o)) 255 256 return text 257 258 259 if (_one_shot and c_make_encoder is not None 260 and self.indent is None and not self.sort_keys): 261 _iterencode = c_make_encoder( 262 markers, self.default, _encoder, self.indent, 263 self.key_separator, self.item_separator, self.sort_keys, 264 self.skipkeys, self.allow_nan) 265 else: 266 _iterencode = _make_iterencode( 267 markers, self.default, _encoder, self.indent, floatstr, 268 self.key_separator, self.item_separator, self.sort_keys, 269 self.skipkeys, _one_shot) 270 return _iterencode(o, 0) 271 272 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, 273 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, 274 ## HACK: hand-optimized bytecode; turn globals into locals 275 ValueError=ValueError, 276 basestring=basestring, 277 dict=dict, 278 float=float, 279 id=id, 280 int=int, 281 isinstance=isinstance, 282 list=list, 283 long=long, 284 str=str, 285 tuple=tuple, 286 ): 287 288 def _iterencode_list(lst, _current_indent_level): 289 if not lst: 290 yield '[]' 291 return 292 if markers is not None: 293 markerid = id(lst) 294 if markerid in markers: 295 raise ValueError("Circular reference detected") 296 markers[markerid] = lst 297 buf = '[' 298 if _indent is not None: 299 _current_indent_level += 1 300 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) 301 separator = _item_separator + newline_indent 302 buf += newline_indent 303 else: 304 newline_indent = None 305 separator = _item_separator 306 first = True 307 for value in lst: 308 if first: 309 first = False 310 else: 311 buf = separator 312 if isinstance(value, basestring): 313 yield buf + _encoder(value) 314 elif value is None: 315 yield buf + 'null' 316 elif value is True: 317 yield buf + 'true' 318 elif value is False: 319 yield buf + 'false' 320 elif isinstance(value, (int, long)): 321 yield buf + str(value) 322 elif isinstance(value, float): 323 yield buf + _floatstr(value) 324 else: 325 yield buf 326 if isinstance(value, (list, tuple)): 327 chunks = _iterencode_list(value, _current_indent_level) 328 elif isinstance(value, dict): 329 chunks = _iterencode_dict(value, _current_indent_level) 330 else: 331 chunks = _iterencode(value, _current_indent_level) 332 for chunk in chunks: 333 yield chunk 334 if newline_indent is not None: 335 _current_indent_level -= 1 336 yield '\n' + (' ' * (_indent * _current_indent_level)) 337 yield ']' 338 if markers is not None: 339 del markers[markerid] 340 341 def _iterencode_dict(dct, _current_indent_level): 342 if not dct: 343 yield '{}' 344 return 345 if markers is not None: 346 markerid = id(dct) 347 if markerid in markers: 348 raise ValueError("Circular reference detected") 349 markers[markerid] = dct 350 yield '{' 351 if _indent is not None: 352 _current_indent_level += 1 353 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) 354 item_separator = _item_separator + newline_indent 355 yield newline_indent 356 else: 357 newline_indent = None 358 item_separator = _item_separator 359 first = True 360 if _sort_keys: 361 items = sorted(dct.items(), key=lambda kv: kv[0]) 362 else: 363 items = dct.iteritems() 364 for key, value in items: 365 if isinstance(key, basestring): 366 pass 367 # JavaScript is weakly typed for these, so it makes sense to 368 # also allow them. Many encoders seem to do something like this. 369 elif isinstance(key, float): 370 key = _floatstr(key) 371 elif key is True: 372 key = 'true' 373 elif key is False: 374 key = 'false' 375 elif key is None: 376 key = 'null' 377 elif isinstance(key, (int, long)): 378 key = str(key) 379 elif _skipkeys: 380 continue 381 else: 382 raise TypeError("key " + repr(key) + " is not a string") 383 if first: 384 first = False 385 else: 386 yield item_separator 387 yield _encoder(key) 388 yield _key_separator 389 if isinstance(value, basestring): 390 yield _encoder(value) 391 elif value is None: 392 yield 'null' 393 elif value is True: 394 yield 'true' 395 elif value is False: 396 yield 'false' 397 elif isinstance(value, (int, long)): 398 yield str(value) 399 elif isinstance(value, float): 400 yield _floatstr(value) 401 else: 402 if isinstance(value, (list, tuple)): 403 chunks = _iterencode_list(value, _current_indent_level) 404 elif isinstance(value, dict): 405 chunks = _iterencode_dict(value, _current_indent_level) 406 else: 407 chunks = _iterencode(value, _current_indent_level) 408 for chunk in chunks: 409 yield chunk 410 if newline_indent is not None: 411 _current_indent_level -= 1 412 yield '\n' + (' ' * (_indent * _current_indent_level)) 413 yield '}' 414 if markers is not None: 415 del markers[markerid] 416 417 def _iterencode(o, _current_indent_level): 418 if isinstance(o, basestring): 419 yield _encoder(o) 420 elif o is None: 421 yield 'null' 422 elif o is True: 423 yield 'true' 424 elif o is False: 425 yield 'false' 426 elif isinstance(o, (int, long)): 427 yield str(o) 428 elif isinstance(o, float): 429 yield _floatstr(o) 430 elif isinstance(o, (list, tuple)): 431 for chunk in _iterencode_list(o, _current_indent_level): 432 yield chunk 433 elif isinstance(o, dict): 434 for chunk in _iterencode_dict(o, _current_indent_level): 435 yield chunk 436 else: 437 if markers is not None: 438 markerid = id(o) 439 if markerid in markers: 440 raise ValueError("Circular reference detected") 441 markers[markerid] = o 442 o = _default(o) 443 for chunk in _iterencode(o, _current_indent_level): 444 yield chunk 445 if markers is not None: 446 del markers[markerid] 447 448 return _iterencode
Note:
See TracChangeset
for help on using the changeset viewer.