Context Navigation

← Previous Change
Next Change →

pgen2

Timestamp:

Mar 19, 2014, 11:31:01 PM (12 years ago)

Author:

dmik

Message:

python: Merge vendor 2.7.6 to trunk.

Location:

python/trunk

Files:

: 5 edited

. (modified) (1 prop)
Lib/lib2to3/pgen2/conv.py (modified) (2 diffs)
Lib/lib2to3/pgen2/driver.py (modified) (3 diffs)
Lib/lib2to3/pgen2/grammar.py (modified) (2 diffs)
Lib/lib2to3/pgen2/tokenize.py (modified) (9 diffs)

Legend:

: Unmodified
: Added
: Removed

python/trunk
- Property svn:mergeinfo set to
  /python/vendor/Python-2.7.6 merged eligible
  /python/vendor/current merged eligible

python/trunk/Lib/lib2to3/pgen2/conv.py

-              r2
+              r391
     def parse_graminit_h(self, filename):
         """Parse the .h file writen by pgen.  (Internal)
+        """Parse the .h file written by pgen.  (Internal)
         This file is a sequence of #define statements defining the
 …
     def parse_graminit_c(self, filename):
         """Parse the .c file writen by pgen.  (Internal)
+        """Parse the .c file written by pgen.  (Internal)
         The file looks as follows.  The first two lines are always this:

python/trunk/Lib/lib2to3/pgen2/driver.py

-              r2
+              r391
 import os
 import logging
+import StringIO
 import sys
 …
     def parse_string(self, text, debug=False):
         """Parse a string and return the syntax tree."""
         tokens = tokenize.generate_tokens(generate_lines(text).next)
+        tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline)
         return self.parse_tokens(tokens, debug)
-def generate_lines(text):
-    """Generator that behaves like readline without using StringIO."""
-    for line in text.splitlines(True):
-        yield line
-    while True:
-        yield ""
 …
         return True
     return os.path.getmtime(a) >= os.path.getmtime(b)
+def main(*args):
+    """Main program, when run as a script: produce grammar pickle files.
+    Calls load_grammar for each argument, a path to a grammar text file.
+    """
+    if not args:
+        args = sys.argv[1:]
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout,
+                        format='%(message)s')
+    for gt in args:
+        load_grammar(gt, save=True, force=True)
+    return True
+if __name__ == "__main__":
+    sys.exit(int(not main()))

python/trunk/Lib/lib2to3/pgen2/grammar.py

-              r2
+              r391
 class Grammar(object):
     """Pgen parsing tables tables conversion class.
+    """Pgen parsing tables conversion class.
     Once initialized, this class supplies the grammar tables for the
 …
     states        -- a list of DFAs, where each DFA is a list of
                      states, each state is is a list of arcs, and each
+                     states, each state is a list of arcs, and each
                      arc is a (i, j) pair where i is a label and j is
                      a state number.  The DFA number is the index into

python/trunk/Lib/lib2to3/pgen2/tokenize.py

-              r2
+              r391
            "generate_tokens", "untokenize"]
 del token
+try:
+    bytes
+except NameError:
+    # Support bytes type in Python <= 2.5, so 2to3 turns itself into
+    # valid Python 3 code.
+    bytes = str
 def group(*choices): return '(' + '|'.join(choices) + ')'
 …
             toks_append(tokval)
 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
+cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
 def _get_normal_name(orig_enc):
 …
     It detects the encoding from the presence of a utf-8 bom or an encoding
+    cookie as specified in pep-0263. If both a bom and a cookie are present,
+    but disagree, a SyntaxError will be raised. If the encoding cookie is an
+    invalid charset, raise a SyntaxError.
+    cookie as specified in pep-0263. If both a bom and a cookie are present, but
+    disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
+    charset, raise a SyntaxError.  Note that if a utf-8 bom is found,
+    'utf-8-sig' is returned.
     If no encoding is specified, then the default of 'utf-8' will be returned.
 …
     bom_found = False
     encoding = None
+    default = 'utf-8'
     def read_or_stop():
         try:
             return readline()
         except StopIteration:
             return b''
+            return bytes()
     def find_cookie(line):
 …
         except UnicodeDecodeError:
             return None
+        matches = cookie_re.findall(line_string)
+        if not matches:
+        match = cookie_re.match(line_string)
+        if not match:
             return None
         encoding = _get_normal_name(matches[0])
+        encoding = _get_normal_name(match.group(1))
         try:
             codec = lookup(encoding)
 …
                 # This behaviour mimics the Python interpreter
                 raise SyntaxError('encoding problem: utf-8')
+            else:
+                # Allow it to be properly encoded and decoded.
+                encoding = 'utf-8-sig'
+            encoding += '-sig'
         return encoding
 …
         bom_found = True
         first = first[3:]
+        default = 'utf-8-sig'
     if not first:
         return 'utf-8', []
+        return default, []
     encoding = find_cookie(first)
 …
     second = read_or_stop()
     if not second:
         return 'utf-8', [first]
+        return default, [first]
     encoding = find_cookie(second)
 …
         return encoding, [first, second]
     return 'utf-8', [first, second]
+    return default, [first, second]
 def untokenize(iterable):

Note: See TracChangeset for help on using the changeset viewer.

/python/vendor/Python-2.7.6	merged	eligible
/python/vendor/current	merged	eligible