Ignore:
Timestamp:
Mar 19, 2014, 11:31:01 PM (11 years ago)
Author:
dmik
Message:

python: Merge vendor 2.7.6 to trunk.

Location:
python/trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • python/trunk

  • python/trunk/Lib/lib2to3/pgen2/tokenize.py

    r2 r391  
    3838           "generate_tokens", "untokenize"]
    3939del token
     40
     41try:
     42    bytes
     43except NameError:
     44    # Support bytes type in Python <= 2.5, so 2to3 turns itself into
     45    # valid Python 3 code.
     46    bytes = str
    4047
    4148def group(*choices): return '(' + '|'.join(choices) + ')'
     
    230237            toks_append(tokval)
    231238
    232 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
     239cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
    233240
    234241def _get_normal_name(orig_enc):
     
    254261
    255262    It detects the encoding from the presence of a utf-8 bom or an encoding
    256     cookie as specified in pep-0263. If both a bom and a cookie are present,
    257     but disagree, a SyntaxError will be raised. If the encoding cookie is an
    258     invalid charset, raise a SyntaxError.
     263    cookie as specified in pep-0263. If both a bom and a cookie are present, but
     264    disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
     265    charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
     266    'utf-8-sig' is returned.
    259267
    260268    If no encoding is specified, then the default of 'utf-8' will be returned.
     
    262270    bom_found = False
    263271    encoding = None
     272    default = 'utf-8'
    264273    def read_or_stop():
    265274        try:
    266275            return readline()
    267276        except StopIteration:
    268             return b''
     277            return bytes()
    269278
    270279    def find_cookie(line):
     
    273282        except UnicodeDecodeError:
    274283            return None
    275 
    276         matches = cookie_re.findall(line_string)
    277         if not matches:
     284        match = cookie_re.match(line_string)
     285        if not match:
    278286            return None
    279         encoding = _get_normal_name(matches[0])
     287        encoding = _get_normal_name(match.group(1))
    280288        try:
    281289            codec = lookup(encoding)
     
    288296                # This behaviour mimics the Python interpreter
    289297                raise SyntaxError('encoding problem: utf-8')
    290             else:
    291                 # Allow it to be properly encoded and decoded.
    292                 encoding = 'utf-8-sig'
     298            encoding += '-sig'
    293299        return encoding
    294300
     
    297303        bom_found = True
    298304        first = first[3:]
     305        default = 'utf-8-sig'
    299306    if not first:
    300         return 'utf-8', []
     307        return default, []
    301308
    302309    encoding = find_cookie(first)
     
    306313    second = read_or_stop()
    307314    if not second:
    308         return 'utf-8', [first]
     315        return default, [first]
    309316
    310317    encoding = find_cookie(second)
     
    312319        return encoding, [first, second]
    313320
    314     return 'utf-8', [first, second]
     321    return default, [first, second]
    315322
    316323def untokenize(iterable):
Note: See TracChangeset for help on using the changeset viewer.