Ignore:
Timestamp:
Mar 19, 2014, 11:31:01 PM (11 years ago)
Author:
dmik
Message:

python: Merge vendor 2.7.6 to trunk.

Location:
python/trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • python/trunk

  • python/trunk/Lib/lib2to3/pgen2/conv.py

    r2 r391  
    5252
    5353    def parse_graminit_h(self, filename):
    54         """Parse the .h file writen by pgen.  (Internal)
     54        """Parse the .h file written by pgen.  (Internal)
    5555
    5656        This file is a sequence of #define statements defining the
     
    8383
    8484    def parse_graminit_c(self, filename):
    85         """Parse the .c file writen by pgen.  (Internal)
     85        """Parse the .c file written by pgen.  (Internal)
    8686
    8787        The file looks as follows.  The first two lines are always this:
  • python/trunk/Lib/lib2to3/pgen2/driver.py

    r2 r391  
    2020import os
    2121import logging
     22import StringIO
    2223import sys
    2324
     
    102103    def parse_string(self, text, debug=False):
    103104        """Parse a string and return the syntax tree."""
    104         tokens = tokenize.generate_tokens(generate_lines(text).next)
     105        tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline)
    105106        return self.parse_tokens(tokens, debug)
    106 
    107 
    108 def generate_lines(text):
    109     """Generator that behaves like readline without using StringIO."""
    110     for line in text.splitlines(True):
    111         yield line
    112     while True:
    113         yield ""
    114107
    115108
     
    146139        return True
    147140    return os.path.getmtime(a) >= os.path.getmtime(b)
     141
     142
     143def main(*args):
     144    """Main program, when run as a script: produce grammar pickle files.
     145
     146    Calls load_grammar for each argument, a path to a grammar text file.
     147    """
     148    if not args:
     149        args = sys.argv[1:]
     150    logging.basicConfig(level=logging.INFO, stream=sys.stdout,
     151                        format='%(message)s')
     152    for gt in args:
     153        load_grammar(gt, save=True, force=True)
     154    return True
     155
     156if __name__ == "__main__":
     157    sys.exit(int(not main()))
  • python/trunk/Lib/lib2to3/pgen2/grammar.py

    r2 r391  
    2121
    2222class Grammar(object):
    23     """Pgen parsing tables tables conversion class.
     23    """Pgen parsing tables conversion class.
    2424
    2525    Once initialized, this class supplies the grammar tables for the
     
    4646
    4747    states        -- a list of DFAs, where each DFA is a list of
    48                      states, each state is is a list of arcs, and each
     48                     states, each state is a list of arcs, and each
    4949                     arc is a (i, j) pair where i is a label and j is
    5050                     a state number.  The DFA number is the index into
  • python/trunk/Lib/lib2to3/pgen2/tokenize.py

    r2 r391  
    3838           "generate_tokens", "untokenize"]
    3939del token
     40
     41try:
     42    bytes
     43except NameError:
     44    # Support bytes type in Python <= 2.5, so 2to3 turns itself into
     45    # valid Python 3 code.
     46    bytes = str
    4047
    4148def group(*choices): return '(' + '|'.join(choices) + ')'
     
    230237            toks_append(tokval)
    231238
    232 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
     239cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
    233240
    234241def _get_normal_name(orig_enc):
     
    254261
    255262    It detects the encoding from the presence of a utf-8 bom or an encoding
    256     cookie as specified in pep-0263. If both a bom and a cookie are present,
    257     but disagree, a SyntaxError will be raised. If the encoding cookie is an
    258     invalid charset, raise a SyntaxError.
     263    cookie as specified in pep-0263. If both a bom and a cookie are present, but
     264    disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
     265    charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
     266    'utf-8-sig' is returned.
    259267
    260268    If no encoding is specified, then the default of 'utf-8' will be returned.
     
    262270    bom_found = False
    263271    encoding = None
     272    default = 'utf-8'
    264273    def read_or_stop():
    265274        try:
    266275            return readline()
    267276        except StopIteration:
    268             return b''
     277            return bytes()
    269278
    270279    def find_cookie(line):
     
    273282        except UnicodeDecodeError:
    274283            return None
    275 
    276         matches = cookie_re.findall(line_string)
    277         if not matches:
     284        match = cookie_re.match(line_string)
     285        if not match:
    278286            return None
    279         encoding = _get_normal_name(matches[0])
     287        encoding = _get_normal_name(match.group(1))
    280288        try:
    281289            codec = lookup(encoding)
     
    288296                # This behaviour mimics the Python interpreter
    289297                raise SyntaxError('encoding problem: utf-8')
    290             else:
    291                 # Allow it to be properly encoded and decoded.
    292                 encoding = 'utf-8-sig'
     298            encoding += '-sig'
    293299        return encoding
    294300
     
    297303        bom_found = True
    298304        first = first[3:]
     305        default = 'utf-8-sig'
    299306    if not first:
    300         return 'utf-8', []
     307        return default, []
    301308
    302309    encoding = find_cookie(first)
     
    306313    second = read_or_stop()
    307314    if not second:
    308         return 'utf-8', [first]
     315        return default, [first]
    309316
    310317    encoding = find_cookie(second)
     
    312319        return encoding, [first, second]
    313320
    314     return 'utf-8', [first, second]
     321    return default, [first, second]
    315322
    316323def untokenize(iterable):
Note: See TracChangeset for help on using the changeset viewer.