Ignore:
Timestamp:
Mar 19, 2014, 11:31:01 PM (11 years ago)
Author:
dmik
Message:

python: Merge vendor 2.7.6 to trunk.

Location:
python/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • python/trunk

  • python/trunk/Lib/json/encoder.py

    r2 r391  
    11"""Implementation of JSONEncoder
    22"""
    3 
    43import re
    5 import math
    64
    75try:
     
    97except ImportError:
    108    c_encode_basestring_ascii = None
    11 
    12 __all__ = ['JSONEncoder']
     9try:
     10    from _json import make_encoder as c_make_encoder
     11except ImportError:
     12    c_make_encoder = None
    1313
    1414ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
     
    2626for i in range(0x20):
    2727    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
    28 
     28    #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
     29
     30INFINITY = float('inf')
    2931FLOAT_REPR = repr
    30 
    31 def floatstr(o, allow_nan=True):
    32     # Check for specials.  Note that this type of test is processor- and/or
    33     # platform-specific, so do tests which don't depend on the internals.
    34 
    35     if math.isnan(o):
    36         text = 'NaN'
    37     elif math.isinf(o):
    38         if math.copysign(1., o) == 1.:
    39             text = 'Infinity'
    40         else:
    41             text = '-Infinity'
    42     else:
    43         return FLOAT_REPR(o)
    44 
    45     if not allow_nan:
    46         msg = "Out of range float values are not JSON compliant: " + repr(o)
    47         raise ValueError(msg)
    48 
    49     return text
    50 
    5132
    5233def encode_basestring(s):
     
    6041
    6142def py_encode_basestring_ascii(s):
     43    """Return an ASCII-only JSON representation of a Python string
     44
     45    """
    6246    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
    6347        s = s.decode('utf-8')
     
    7054            if n < 0x10000:
    7155                return '\\u{0:04x}'.format(n)
     56                #return '\\u%04x' % (n,)
    7257            else:
    7358                # surrogate pair
     
    7661                s2 = 0xdc00 | (n & 0x3ff)
    7762                return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
     63                #return '\\u%04x\\u%04x' % (s1, s2)
    7864    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
    7965
    8066
    81 if c_encode_basestring_ascii is not None:
    82     encode_basestring_ascii = c_encode_basestring_ascii
    83 else:
    84     encode_basestring_ascii = py_encode_basestring_ascii
    85 
     67encode_basestring_ascii = (
     68    c_encode_basestring_ascii or py_encode_basestring_ascii)
    8669
    8770class JSONEncoder(object):
     
    11497
    11598    """
    116     __all__ = ['__init__', 'default', 'encode', 'iterencode']
    11799    item_separator = ', '
    118100    key_separator = ': '
     
    122104        """Constructor for JSONEncoder, with sensible defaults.
    123105
    124         If skipkeys is False, then it is a TypeError to attempt
     106        If skipkeys is false, then it is a TypeError to attempt
    125107        encoding of keys that are not str, int, long, float or None.  If
    126108        skipkeys is True, such items are simply skipped.
    127109
    128         If ensure_ascii is True, the output is guaranteed to be str
    129         objects with all incoming unicode characters escaped.  If
    130         ensure_ascii is false, the output will be unicode object.
    131 
    132         If check_circular is True, then lists, dicts, and custom encoded
     110        If *ensure_ascii* is true (the default), all non-ASCII
     111        characters in the output are escaped with \uXXXX sequences,
     112        and the results are str instances consisting of ASCII
     113        characters only.  If ensure_ascii is False, a result may be a
     114        unicode instance.  This usually happens if the input contains
     115        unicode strings or the *encoding* parameter is used.
     116
     117        If check_circular is true, then lists, dicts, and custom encoded
    133118        objects will be checked for circular references during encoding to
    134119        prevent an infinite recursion (which would cause an OverflowError).
    135120        Otherwise, no such check takes place.
    136121
    137         If allow_nan is True, then NaN, Infinity, and -Infinity will be
     122        If allow_nan is true, then NaN, Infinity, and -Infinity will be
    138123        encoded as such.  This behavior is not JSON specification compliant,
    139124        but is consistent with most JavaScript based encoders and decoders.
    140125        Otherwise, it will be a ValueError to encode such floats.
    141126
    142         If sort_keys is True, then the output of dictionaries will be
     127        If sort_keys is true, then the output of dictionaries will be
    143128        sorted by key; this is useful for regression tests to ensure
    144129        that JSON serializations can be compared on a day-to-day basis.
     
    147132        elements and object members will be pretty-printed with that
    148133        indent level.  An indent level of 0 will only insert newlines.
    149         None is the most compact representation.
     134        None is the most compact representation.  Since the default
     135        item separator is ', ',  the output might include trailing
     136        whitespace when indent is specified.  You can use
     137        separators=(',', ': ') to avoid this.
    150138
    151139        If specified, separators should be a (item_separator, key_separator)
     
    162150
    163151        """
     152
    164153        self.skipkeys = skipkeys
    165154        self.ensure_ascii = ensure_ascii
     
    168157        self.sort_keys = sort_keys
    169158        self.indent = indent
    170         self.current_indent_level = 0
    171159        if separators is not None:
    172160            self.item_separator, self.key_separator = separators
     
    175163        self.encoding = encoding
    176164
    177     def _newline_indent(self):
    178         return '\n' + (' ' * (self.indent * self.current_indent_level))
    179 
    180     def _iterencode_list(self, lst, markers=None):
    181         if not lst:
    182             yield '[]'
    183             return
    184         if markers is not None:
    185             markerid = id(lst)
    186             if markerid in markers:
    187                 raise ValueError("Circular reference detected")
    188             markers[markerid] = lst
    189         yield '['
    190         if self.indent is not None:
    191             self.current_indent_level += 1
    192             newline_indent = self._newline_indent()
    193             separator = self.item_separator + newline_indent
    194             yield newline_indent
    195         else:
    196             newline_indent = None
    197             separator = self.item_separator
    198         first = True
    199         for value in lst:
    200             if first:
    201                 first = False
    202             else:
    203                 yield separator
    204             for chunk in self._iterencode(value, markers):
    205                 yield chunk
    206         if newline_indent is not None:
    207             self.current_indent_level -= 1
    208             yield self._newline_indent()
    209         yield ']'
    210         if markers is not None:
    211             del markers[markerid]
    212 
    213     def _iterencode_dict(self, dct, markers=None):
    214         if not dct:
    215             yield '{}'
    216             return
    217         if markers is not None:
    218             markerid = id(dct)
    219             if markerid in markers:
    220                 raise ValueError("Circular reference detected")
    221             markers[markerid] = dct
    222         yield '{'
    223         key_separator = self.key_separator
    224         if self.indent is not None:
    225             self.current_indent_level += 1
    226             newline_indent = self._newline_indent()
    227             item_separator = self.item_separator + newline_indent
    228             yield newline_indent
    229         else:
    230             newline_indent = None
    231             item_separator = self.item_separator
    232         first = True
    233         if self.ensure_ascii:
    234             encoder = encode_basestring_ascii
    235         else:
    236             encoder = encode_basestring
    237         allow_nan = self.allow_nan
    238         if self.sort_keys:
    239             keys = dct.keys()
    240             keys.sort()
    241             items = [(k, dct[k]) for k in keys]
    242         else:
    243             items = dct.iteritems()
    244         _encoding = self.encoding
    245         _do_decode = (_encoding is not None
    246             and not (_encoding == 'utf-8'))
    247         for key, value in items:
    248             if isinstance(key, str):
    249                 if _do_decode:
    250                     key = key.decode(_encoding)
    251             elif isinstance(key, basestring):
    252                 pass
    253             # JavaScript is weakly typed for these, so it makes sense to
    254             # also allow them.  Many encoders seem to do something like this.
    255             elif isinstance(key, float):
    256                 key = floatstr(key, allow_nan)
    257             elif isinstance(key, (int, long)):
    258                 key = str(key)
    259             elif key is True:
    260                 key = 'true'
    261             elif key is False:
    262                 key = 'false'
    263             elif key is None:
    264                 key = 'null'
    265             elif self.skipkeys:
    266                 continue
    267             else:
    268                 raise TypeError("key {0!r} is not a string".format(key))
    269             if first:
    270                 first = False
    271             else:
    272                 yield item_separator
    273             yield encoder(key)
    274             yield key_separator
    275             for chunk in self._iterencode(value, markers):
    276                 yield chunk
    277         if newline_indent is not None:
    278             self.current_indent_level -= 1
    279             yield self._newline_indent()
    280         yield '}'
    281         if markers is not None:
    282             del markers[markerid]
    283 
    284     def _iterencode(self, o, markers=None):
    285         if isinstance(o, basestring):
    286             if self.ensure_ascii:
    287                 encoder = encode_basestring_ascii
    288             else:
    289                 encoder = encode_basestring
    290             _encoding = self.encoding
    291             if (_encoding is not None and isinstance(o, str)
    292                     and not (_encoding == 'utf-8')):
    293                 o = o.decode(_encoding)
    294             yield encoder(o)
    295         elif o is None:
    296             yield 'null'
    297         elif o is True:
    298             yield 'true'
    299         elif o is False:
    300             yield 'false'
    301         elif isinstance(o, (int, long)):
    302             yield str(o)
    303         elif isinstance(o, float):
    304             yield floatstr(o, self.allow_nan)
    305         elif isinstance(o, (list, tuple)):
    306             for chunk in self._iterencode_list(o, markers):
    307                 yield chunk
    308         elif isinstance(o, dict):
    309             for chunk in self._iterencode_dict(o, markers):
    310                 yield chunk
    311         else:
    312             if markers is not None:
    313                 markerid = id(o)
    314                 if markerid in markers:
    315                     raise ValueError("Circular reference detected")
    316                 markers[markerid] = o
    317             for chunk in self._iterencode_default(o, markers):
    318                 yield chunk
    319             if markers is not None:
    320                 del markers[markerid]
    321 
    322     def _iterencode_default(self, o, markers=None):
    323         newobj = self.default(o)
    324         return self._iterencode(newobj, markers)
    325 
    326165    def default(self, o):
    327         """Implement this method in a subclass such that it returns a serializable
    328         object for ``o``, or calls the base implementation (to raise a
    329         ``TypeError``).
    330 
    331         For example, to support arbitrary iterators, you could implement
    332         default like this::
     166        """Implement this method in a subclass such that it returns
     167        a serializable object for ``o``, or calls the base implementation
     168        (to raise a ``TypeError``).
     169
     170        For example, to support arbitrary iterators, you could
     171        implement default like this::
    333172
    334173            def default(self, o):
     
    339178                else:
    340179                    return list(iterable)
     180                # Let the base class default method raise the TypeError
    341181                return JSONEncoder.default(self, o)
    342182
     
    365205        # exceptions aren't as detailed.  The list call should be roughly
    366206        # equivalent to the PySequence_Fast that ''.join() would do.
    367         chunks = list(self.iterencode(o))
     207        chunks = self.iterencode(o, _one_shot=True)
     208        if not isinstance(chunks, (list, tuple)):
     209            chunks = list(chunks)
    368210        return ''.join(chunks)
    369211
    370     def iterencode(self, o):
    371         """Encode the given object and yield each string representation as
    372         available.
     212    def iterencode(self, o, _one_shot=False):
     213        """Encode the given object and yield each string
     214        representation as available.
    373215
    374216        For example::
     
    382224        else:
    383225            markers = None
    384         return self._iterencode(o, markers)
     226        if self.ensure_ascii:
     227            _encoder = encode_basestring_ascii
     228        else:
     229            _encoder = encode_basestring
     230        if self.encoding != 'utf-8':
     231            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
     232                if isinstance(o, str):
     233                    o = o.decode(_encoding)
     234                return _orig_encoder(o)
     235
     236        def floatstr(o, allow_nan=self.allow_nan,
     237                _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
     238            # Check for specials.  Note that this type of test is processor
     239            # and/or platform-specific, so do tests which don't depend on the
     240            # internals.
     241
     242            if o != o:
     243                text = 'NaN'
     244            elif o == _inf:
     245                text = 'Infinity'
     246            elif o == _neginf:
     247                text = '-Infinity'
     248            else:
     249                return _repr(o)
     250
     251            if not allow_nan:
     252                raise ValueError(
     253                    "Out of range float values are not JSON compliant: " +
     254                    repr(o))
     255
     256            return text
     257
     258
     259        if (_one_shot and c_make_encoder is not None
     260                and self.indent is None and not self.sort_keys):
     261            _iterencode = c_make_encoder(
     262                markers, self.default, _encoder, self.indent,
     263                self.key_separator, self.item_separator, self.sort_keys,
     264                self.skipkeys, self.allow_nan)
     265        else:
     266            _iterencode = _make_iterencode(
     267                markers, self.default, _encoder, self.indent, floatstr,
     268                self.key_separator, self.item_separator, self.sort_keys,
     269                self.skipkeys, _one_shot)
     270        return _iterencode(o, 0)
     271
     272def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
     273        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
     274        ## HACK: hand-optimized bytecode; turn globals into locals
     275        ValueError=ValueError,
     276        basestring=basestring,
     277        dict=dict,
     278        float=float,
     279        id=id,
     280        int=int,
     281        isinstance=isinstance,
     282        list=list,
     283        long=long,
     284        str=str,
     285        tuple=tuple,
     286    ):
     287
     288    def _iterencode_list(lst, _current_indent_level):
     289        if not lst:
     290            yield '[]'
     291            return
     292        if markers is not None:
     293            markerid = id(lst)
     294            if markerid in markers:
     295                raise ValueError("Circular reference detected")
     296            markers[markerid] = lst
     297        buf = '['
     298        if _indent is not None:
     299            _current_indent_level += 1
     300            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
     301            separator = _item_separator + newline_indent
     302            buf += newline_indent
     303        else:
     304            newline_indent = None
     305            separator = _item_separator
     306        first = True
     307        for value in lst:
     308            if first:
     309                first = False
     310            else:
     311                buf = separator
     312            if isinstance(value, basestring):
     313                yield buf + _encoder(value)
     314            elif value is None:
     315                yield buf + 'null'
     316            elif value is True:
     317                yield buf + 'true'
     318            elif value is False:
     319                yield buf + 'false'
     320            elif isinstance(value, (int, long)):
     321                yield buf + str(value)
     322            elif isinstance(value, float):
     323                yield buf + _floatstr(value)
     324            else:
     325                yield buf
     326                if isinstance(value, (list, tuple)):
     327                    chunks = _iterencode_list(value, _current_indent_level)
     328                elif isinstance(value, dict):
     329                    chunks = _iterencode_dict(value, _current_indent_level)
     330                else:
     331                    chunks = _iterencode(value, _current_indent_level)
     332                for chunk in chunks:
     333                    yield chunk
     334        if newline_indent is not None:
     335            _current_indent_level -= 1
     336            yield '\n' + (' ' * (_indent * _current_indent_level))
     337        yield ']'
     338        if markers is not None:
     339            del markers[markerid]
     340
     341    def _iterencode_dict(dct, _current_indent_level):
     342        if not dct:
     343            yield '{}'
     344            return
     345        if markers is not None:
     346            markerid = id(dct)
     347            if markerid in markers:
     348                raise ValueError("Circular reference detected")
     349            markers[markerid] = dct
     350        yield '{'
     351        if _indent is not None:
     352            _current_indent_level += 1
     353            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
     354            item_separator = _item_separator + newline_indent
     355            yield newline_indent
     356        else:
     357            newline_indent = None
     358            item_separator = _item_separator
     359        first = True
     360        if _sort_keys:
     361            items = sorted(dct.items(), key=lambda kv: kv[0])
     362        else:
     363            items = dct.iteritems()
     364        for key, value in items:
     365            if isinstance(key, basestring):
     366                pass
     367            # JavaScript is weakly typed for these, so it makes sense to
     368            # also allow them.  Many encoders seem to do something like this.
     369            elif isinstance(key, float):
     370                key = _floatstr(key)
     371            elif key is True:
     372                key = 'true'
     373            elif key is False:
     374                key = 'false'
     375            elif key is None:
     376                key = 'null'
     377            elif isinstance(key, (int, long)):
     378                key = str(key)
     379            elif _skipkeys:
     380                continue
     381            else:
     382                raise TypeError("key " + repr(key) + " is not a string")
     383            if first:
     384                first = False
     385            else:
     386                yield item_separator
     387            yield _encoder(key)
     388            yield _key_separator
     389            if isinstance(value, basestring):
     390                yield _encoder(value)
     391            elif value is None:
     392                yield 'null'
     393            elif value is True:
     394                yield 'true'
     395            elif value is False:
     396                yield 'false'
     397            elif isinstance(value, (int, long)):
     398                yield str(value)
     399            elif isinstance(value, float):
     400                yield _floatstr(value)
     401            else:
     402                if isinstance(value, (list, tuple)):
     403                    chunks = _iterencode_list(value, _current_indent_level)
     404                elif isinstance(value, dict):
     405                    chunks = _iterencode_dict(value, _current_indent_level)
     406                else:
     407                    chunks = _iterencode(value, _current_indent_level)
     408                for chunk in chunks:
     409                    yield chunk
     410        if newline_indent is not None:
     411            _current_indent_level -= 1
     412            yield '\n' + (' ' * (_indent * _current_indent_level))
     413        yield '}'
     414        if markers is not None:
     415            del markers[markerid]
     416
     417    def _iterencode(o, _current_indent_level):
     418        if isinstance(o, basestring):
     419            yield _encoder(o)
     420        elif o is None:
     421            yield 'null'
     422        elif o is True:
     423            yield 'true'
     424        elif o is False:
     425            yield 'false'
     426        elif isinstance(o, (int, long)):
     427            yield str(o)
     428        elif isinstance(o, float):
     429            yield _floatstr(o)
     430        elif isinstance(o, (list, tuple)):
     431            for chunk in _iterencode_list(o, _current_indent_level):
     432                yield chunk
     433        elif isinstance(o, dict):
     434            for chunk in _iterencode_dict(o, _current_indent_level):
     435                yield chunk
     436        else:
     437            if markers is not None:
     438                markerid = id(o)
     439                if markerid in markers:
     440                    raise ValueError("Circular reference detected")
     441                markers[markerid] = o
     442            o = _default(o)
     443            for chunk in _iterencode(o, _current_indent_level):
     444                yield chunk
     445            if markers is not None:
     446                del markers[markerid]
     447
     448    return _iterencode
Note: See TracChangeset for help on using the changeset viewer.