Ignore:
Timestamp:
Mar 19, 2014, 11:31:01 PM (11 years ago)
Author:
dmik
Message:

python: Merge vendor 2.7.6 to trunk.

Location:
python/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • python/trunk

  • python/trunk/Lib/json/decoder.py

    r2 r391  
    11"""Implementation of JSONDecoder
    22"""
    3 
    43import re
    54import sys
    6 
    7 from json.scanner import Scanner, pattern
     5import struct
     6
     7from json import scanner
    88try:
    99    from _json import scanstring as c_scanstring
     
    1515FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
    1616
    17 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
     17def _floatconstants():
     18    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
     19    if sys.byteorder != 'big':
     20        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
     21    nan, inf = struct.unpack('dd', _BYTES)
     22    return nan, inf, -inf
     23
     24NaN, PosInf, NegInf = _floatconstants()
    1825
    1926
     
    2128    lineno = doc.count('\n', 0, pos) + 1
    2229    if lineno == 1:
    23         colno = pos
     30        colno = pos + 1
    2431    else:
    2532        colno = pos - doc.rindex('\n', 0, pos)
     
    2835
    2936def errmsg(msg, doc, pos, end=None):
     37    # Note that this function is called from _json
    3038    lineno, colno = linecol(doc, pos)
    3139    if end is None:
    3240        fmt = '{0}: line {1} column {2} (char {3})'
    3341        return fmt.format(msg, lineno, colno, pos)
     42        #fmt = '%s: line %d column %d (char %d)'
     43        #return fmt % (msg, lineno, colno, pos)
    3444    endlineno, endcolno = linecol(doc, end)
    3545    fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
    3646    return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
     47    #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
     48    #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
    3749
    3850
     
    4153    'Infinity': PosInf,
    4254    'NaN': NaN,
    43     'true': True,
    44     'false': False,
    45     'null': None,
    4655}
    47 
    48 
    49 def JSONConstant(match, context, c=_CONSTANTS):
    50     s = match.group(0)
    51     fn = getattr(context, 'parse_constant', None)
    52     if fn is None:
    53         rval = c[s]
    54     else:
    55         rval = fn(s)
    56     return rval, None
    57 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
    58 
    59 
    60 def JSONNumber(match, context):
    61     match = JSONNumber.regex.match(match.string, *match.span())
    62     integer, frac, exp = match.groups()
    63     if frac or exp:
    64         fn = getattr(context, 'parse_float', None) or float
    65         res = fn(integer + (frac or '') + (exp or ''))
    66     else:
    67         fn = getattr(context, 'parse_int', None) or int
    68         res = fn(integer)
    69     return res, None
    70 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
    71 
    7256
    7357STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
     
    7963DEFAULT_ENCODING = "utf-8"
    8064
    81 
    82 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
     65def py_scanstring(s, end, encoding=None, strict=True,
     66        _b=BACKSLASH, _m=STRINGCHUNK.match):
     67    """Scan the string s for a JSON string. End is the index of the
     68    character in s after the quote that started the JSON string.
     69    Unescapes all valid JSON string escape sequences and raises ValueError
     70    on attempt to decode an invalid string. If strict is False then literal
     71    control characters are allowed in the string.
     72
     73    Returns a tuple of the decoded string and the index of the character in s
     74    after the end quote."""
    8375    if encoding is None:
    8476        encoding = DEFAULT_ENCODING
     
    9385        end = chunk.end()
    9486        content, terminator = chunk.groups()
     87        # Content is contains zero or more unescaped string characters
    9588        if content:
    9689            if not isinstance(content, unicode):
    9790                content = unicode(content, encoding)
    9891            _append(content)
     92        # Terminator is the end of string, a literal control character,
     93        # or a backslash denoting that an escape sequence follows
    9994        if terminator == '"':
    10095            break
    10196        elif terminator != '\\':
    10297            if strict:
     98                #msg = "Invalid control character %r at" % (terminator,)
    10399                msg = "Invalid control character {0!r} at".format(terminator)
    104100                raise ValueError(errmsg(msg, s, end))
     
    111107            raise ValueError(
    112108                errmsg("Unterminated string starting at", s, begin))
     109        # If not a unicode escape sequence, must be in the lookup table
    113110        if esc != 'u':
    114111            try:
    115                 m = _b[esc]
     112                char = _b[esc]
    116113            except KeyError:
    117                 msg = "Invalid \\escape: {0!r}".format(esc)
     114                msg = "Invalid \\escape: " + repr(esc)
    118115                raise ValueError(errmsg(msg, s, end))
    119116            end += 1
    120117        else:
     118            # Unicode escape sequence
    121119            esc = s[end + 1:end + 5]
    122120            next_end = end + 5
    123             msg = "Invalid \\uXXXX escape"
    124             try:
    125                 if len(esc) != 4:
    126                     raise ValueError
    127                 uni = int(esc, 16)
    128                 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
    129                     msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
    130                     if not s[end + 5:end + 7] == '\\u':
    131                         raise ValueError
    132                     esc2 = s[end + 7:end + 11]
    133                     if len(esc2) != 4:
    134                         raise ValueError
    135                     uni2 = int(esc2, 16)
    136                     uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
    137                     next_end += 6
    138                 m = unichr(uni)
    139             except ValueError:
     121            if len(esc) != 4:
     122                msg = "Invalid \\uXXXX escape"
    140123                raise ValueError(errmsg(msg, s, end))
     124            uni = int(esc, 16)
     125            # Check for surrogate pair on UCS-4 systems
     126            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
     127                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
     128                if not s[end + 5:end + 7] == '\\u':
     129                    raise ValueError(errmsg(msg, s, end))
     130                esc2 = s[end + 7:end + 11]
     131                if len(esc2) != 4:
     132                    raise ValueError(errmsg(msg, s, end))
     133                uni2 = int(esc2, 16)
     134                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
     135                next_end += 6
     136            char = unichr(uni)
    141137            end = next_end
    142         _append(m)
     138        # Append the unescaped character
     139        _append(char)
    143140    return u''.join(chunks), end
    144141
    145142
    146 # Use speedup
    147 if c_scanstring is not None:
    148     scanstring = c_scanstring
    149 else:
    150     scanstring = py_scanstring
    151 
    152 def JSONString(match, context):
    153     encoding = getattr(context, 'encoding', None)
    154     strict = getattr(context, 'strict', True)
    155     return scanstring(match.string, match.end(), encoding, strict)
    156 pattern(r'"')(JSONString)
    157 
    158 
    159 WHITESPACE = re.compile(r'\s*', FLAGS)
    160 
    161 
    162 def JSONObject(match, context, _w=WHITESPACE.match):
    163     pairs = {}
    164     s = match.string
    165     end = _w(s, match.end()).end()
     143# Use speedup if available
     144scanstring = c_scanstring or py_scanstring
     145
     146WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
     147WHITESPACE_STR = ' \t\n\r'
     148
     149def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
     150               object_pairs_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
     151    s, end = s_and_end
     152    pairs = []
     153    pairs_append = pairs.append
     154    # Use a slice to prevent IndexError from being raised, the following
     155    # check will raise a more specific ValueError if the string is empty
    166156    nextchar = s[end:end + 1]
    167     # Trivial empty object
    168     if nextchar == '}':
    169         return pairs, end + 1
     157    # Normally we expect nextchar == '"'
    170158    if nextchar != '"':
    171         raise ValueError(errmsg("Expecting property name", s, end))
     159        if nextchar in _ws:
     160            end = _w(s, end).end()
     161            nextchar = s[end:end + 1]
     162        # Trivial empty object
     163        if nextchar == '}':
     164            if object_pairs_hook is not None:
     165                result = object_pairs_hook(pairs)
     166                return result, end + 1
     167            pairs = {}
     168            if object_hook is not None:
     169                pairs = object_hook(pairs)
     170            return pairs, end + 1
     171        elif nextchar != '"':
     172            raise ValueError(errmsg(
     173                "Expecting property name enclosed in double quotes", s, end))
    172174    end += 1
    173     encoding = getattr(context, 'encoding', None)
    174     strict = getattr(context, 'strict', True)
    175     iterscan = JSONScanner.iterscan
    176175    while True:
    177176        key, end = scanstring(s, end, encoding, strict)
    178         end = _w(s, end).end()
     177
     178        # To skip some function call overhead we optimize the fast paths where
     179        # the JSON key separator is ": " or just ":".
    179180        if s[end:end + 1] != ':':
    180             raise ValueError(errmsg("Expecting : delimiter", s, end))
    181         end = _w(s, end + 1).end()
    182         try:
    183             value, end = iterscan(s, idx=end, context=context).next()
     181            end = _w(s, end).end()
     182            if s[end:end + 1] != ':':
     183                raise ValueError(errmsg("Expecting ':' delimiter", s, end))
     184        end += 1
     185
     186        try:
     187            if s[end] in _ws:
     188                end += 1
     189                if s[end] in _ws:
     190                    end = _w(s, end + 1).end()
     191        except IndexError:
     192            pass
     193
     194        try:
     195            value, end = scan_once(s, end)
    184196        except StopIteration:
    185197            raise ValueError(errmsg("Expecting object", s, end))
    186         pairs[key] = value
    187         end = _w(s, end).end()
    188         nextchar = s[end:end + 1]
     198        pairs_append((key, value))
     199
     200        try:
     201            nextchar = s[end]
     202            if nextchar in _ws:
     203                end = _w(s, end + 1).end()
     204                nextchar = s[end]
     205        except IndexError:
     206            nextchar = ''
    189207        end += 1
     208
    190209        if nextchar == '}':
    191210            break
    192         if nextchar != ',':
    193             raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
    194         end = _w(s, end).end()
    195         nextchar = s[end:end + 1]
     211        elif nextchar != ',':
     212            raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
     213
     214        try:
     215            nextchar = s[end]
     216            if nextchar in _ws:
     217                end += 1
     218                nextchar = s[end]
     219                if nextchar in _ws:
     220                    end = _w(s, end + 1).end()
     221                    nextchar = s[end]
     222        except IndexError:
     223            nextchar = ''
     224
    196225        end += 1
    197226        if nextchar != '"':
    198             raise ValueError(errmsg("Expecting property name", s, end - 1))
    199     object_hook = getattr(context, 'object_hook', None)
     227            raise ValueError(errmsg(
     228                "Expecting property name enclosed in double quotes", s, end - 1))
     229    if object_pairs_hook is not None:
     230        result = object_pairs_hook(pairs)
     231        return result, end
     232    pairs = dict(pairs)
    200233    if object_hook is not None:
    201234        pairs = object_hook(pairs)
    202235    return pairs, end
    203 pattern(r'{')(JSONObject)
    204 
    205 
    206 def JSONArray(match, context, _w=WHITESPACE.match):
     236
     237def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
     238    s, end = s_and_end
    207239    values = []
    208     s = match.string
    209     end = _w(s, match.end()).end()
     240    nextchar = s[end:end + 1]
     241    if nextchar in _ws:
     242        end = _w(s, end + 1).end()
     243        nextchar = s[end:end + 1]
    210244    # Look-ahead for trivial empty array
    211     nextchar = s[end:end + 1]
    212245    if nextchar == ']':
    213246        return values, end + 1
    214     iterscan = JSONScanner.iterscan
     247    _append = values.append
    215248    while True:
    216249        try:
    217             value, end = iterscan(s, idx=end, context=context).next()
     250            value, end = scan_once(s, end)
    218251        except StopIteration:
    219252            raise ValueError(errmsg("Expecting object", s, end))
    220         values.append(value)
    221         end = _w(s, end).end()
     253        _append(value)
    222254        nextchar = s[end:end + 1]
     255        if nextchar in _ws:
     256            end = _w(s, end + 1).end()
     257            nextchar = s[end:end + 1]
    223258        end += 1
    224259        if nextchar == ']':
    225260            break
    226         if nextchar != ',':
    227             raise ValueError(errmsg("Expecting , delimiter", s, end))
    228         end = _w(s, end).end()
     261        elif nextchar != ',':
     262            raise ValueError(errmsg("Expecting ',' delimiter", s, end))
     263        try:
     264            if s[end] in _ws:
     265                end += 1
     266                if s[end] in _ws:
     267                    end = _w(s, end + 1).end()
     268        except IndexError:
     269            pass
     270
    229271    return values, end
    230 pattern(r'\[')(JSONArray)
    231 
    232 
    233 ANYTHING = [
    234     JSONObject,
    235     JSONArray,
    236     JSONString,
    237     JSONConstant,
    238     JSONNumber,
    239 ]
    240 
    241 JSONScanner = Scanner(ANYTHING)
    242 
    243272
    244273class JSONDecoder(object):
     
    269298    It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
    270299    their corresponding ``float`` values, which is outside the JSON spec.
     300
    271301    """
    272302
    273     _scanner = Scanner(ANYTHING)
    274     __all__ = ['__init__', 'decode', 'raw_decode']
    275 
    276303    def __init__(self, encoding=None, object_hook=None, parse_float=None,
    277             parse_int=None, parse_constant=None, strict=True):
     304            parse_int=None, parse_constant=None, strict=True,
     305            object_pairs_hook=None):
    278306        """``encoding`` determines the encoding used to interpret any ``str``
    279307        objects decoded by this instance (utf-8 by default).  It has no
     
    283311        strings of other encodings should be passed in as ``unicode``.
    284312
    285         ``object_hook``, if specified, will be called with the result of
    286         every JSON object decoded and its return value will be used in
     313        ``object_hook``, if specified, will be called with the result
     314        of every JSON object decoded and its return value will be used in
    287315        place of the given ``dict``.  This can be used to provide custom
    288316        deserializations (e.g. to support JSON-RPC class hinting).
     317
     318        ``object_pairs_hook``, if specified will be called with the result of
     319        every JSON object decoded with an ordered list of pairs.  The return
     320        value of ``object_pairs_hook`` will be used instead of the ``dict``.
     321        This feature can be used to implement custom decoders that rely on the
     322        order that the key and value pairs are decoded (for example,
     323        collections.OrderedDict will remember the order of insertion). If
     324        ``object_hook`` is also defined, the ``object_pairs_hook`` takes
     325        priority.
    289326
    290327        ``parse_float``, if specified, will be called with the string
     
    299336
    300337        ``parse_constant``, if specified, will be called with one of the
    301         following strings: -Infinity, Infinity, NaN, null, true, false.
     338        following strings: -Infinity, Infinity, NaN.
    302339        This can be used to raise an exception if invalid JSON numbers
    303340        are encountered.
     341
     342        If ``strict`` is false (true is the default), then control
     343        characters will be allowed inside strings.  Control characters in
     344        this context are those with character codes in the 0-31 range,
     345        including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
    304346
    305347        """
    306348        self.encoding = encoding
    307349        self.object_hook = object_hook
    308         self.parse_float = parse_float
    309         self.parse_int = parse_int
    310         self.parse_constant = parse_constant
     350        self.object_pairs_hook = object_pairs_hook
     351        self.parse_float = parse_float or float
     352        self.parse_int = parse_int or int
     353        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
    311354        self.strict = strict
     355        self.parse_object = JSONObject
     356        self.parse_array = JSONArray
     357        self.parse_string = scanstring
     358        self.scan_once = scanner.make_scanner(self)
    312359
    313360    def decode(self, s, _w=WHITESPACE.match):
    314         """
    315         Return the Python representation of ``s`` (a ``str`` or ``unicode``
     361        """Return the Python representation of ``s`` (a ``str`` or ``unicode``
    316362        instance containing a JSON document)
    317363
     
    323369        return obj
    324370
    325     def raw_decode(self, s, **kw):
    326         """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
    327         with a JSON document) and return a 2-tuple of the Python
     371    def raw_decode(self, s, idx=0):
     372        """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
     373        beginning with a JSON document) and return a 2-tuple of the Python
    328374        representation and the index in ``s`` where the document ended.
    329375
     
    332378
    333379        """
    334         kw.setdefault('context', self)
    335         try:
    336             obj, end = self._scanner.iterscan(s, **kw).next()
     380        try:
     381            obj, end = self.scan_once(s, idx)
    337382        except StopIteration:
    338383            raise ValueError("No JSON object could be decoded")
Note: See TracChangeset for help on using the changeset viewer.