Changeset 391 for python/trunk/Lib/lib2to3/pgen2
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/lib2to3/pgen2/conv.py
r2 r391 52 52 53 53 def parse_graminit_h(self, filename): 54 """Parse the .h file writ en by pgen. (Internal)54 """Parse the .h file written by pgen. (Internal) 55 55 56 56 This file is a sequence of #define statements defining the … … 83 83 84 84 def parse_graminit_c(self, filename): 85 """Parse the .c file writ en by pgen. (Internal)85 """Parse the .c file written by pgen. (Internal) 86 86 87 87 The file looks as follows. The first two lines are always this: -
python/trunk/Lib/lib2to3/pgen2/driver.py
r2 r391 20 20 import os 21 21 import logging 22 import StringIO 22 23 import sys 23 24 … … 102 103 def parse_string(self, text, debug=False): 103 104 """Parse a string and return the syntax tree.""" 104 tokens = tokenize.generate_tokens( generate_lines(text).next)105 tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline) 105 106 return self.parse_tokens(tokens, debug) 106 107 108 def generate_lines(text):109 """Generator that behaves like readline without using StringIO."""110 for line in text.splitlines(True):111 yield line112 while True:113 yield ""114 107 115 108 … … 146 139 return True 147 140 return os.path.getmtime(a) >= os.path.getmtime(b) 141 142 143 def main(*args): 144 """Main program, when run as a script: produce grammar pickle files. 145 146 Calls load_grammar for each argument, a path to a grammar text file. 147 """ 148 if not args: 149 args = sys.argv[1:] 150 logging.basicConfig(level=logging.INFO, stream=sys.stdout, 151 format='%(message)s') 152 for gt in args: 153 load_grammar(gt, save=True, force=True) 154 return True 155 156 if __name__ == "__main__": 157 sys.exit(int(not main())) -
python/trunk/Lib/lib2to3/pgen2/grammar.py
r2 r391 21 21 22 22 class Grammar(object): 23 """Pgen parsing tables tablesconversion class.23 """Pgen parsing tables conversion class. 24 24 25 25 Once initialized, this class supplies the grammar tables for the … … 46 46 47 47 states -- a list of DFAs, where each DFA is a list of 48 states, each state is isa list of arcs, and each48 states, each state is a list of arcs, and each 49 49 arc is a (i, j) pair where i is a label and j is 50 50 a state number. The DFA number is the index into -
python/trunk/Lib/lib2to3/pgen2/tokenize.py
r2 r391 38 38 "generate_tokens", "untokenize"] 39 39 del token 40 41 try: 42 bytes 43 except NameError: 44 # Support bytes type in Python <= 2.5, so 2to3 turns itself into 45 # valid Python 3 code. 46 bytes = str 40 47 41 48 def group(*choices): return '(' + '|'.join(choices) + ')' … … 230 237 toks_append(tokval) 231 238 232 cookie_re = re.compile( "coding[:=]\s*([-\w.]+)")239 cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)') 233 240 234 241 def _get_normal_name(orig_enc): … … 254 261 255 262 It detects the encoding from the presence of a utf-8 bom or an encoding 256 cookie as specified in pep-0263. If both a bom and a cookie are present, 257 but disagree, a SyntaxError will be raised. If the encoding cookie is an 258 invalid charset, raise a SyntaxError. 263 cookie as specified in pep-0263. If both a bom and a cookie are present, but 264 disagree, a SyntaxError will be raised. If the encoding cookie is an invalid 265 charset, raise a SyntaxError. Note that if a utf-8 bom is found, 266 'utf-8-sig' is returned. 259 267 260 268 If no encoding is specified, then the default of 'utf-8' will be returned. … … 262 270 bom_found = False 263 271 encoding = None 272 default = 'utf-8' 264 273 def read_or_stop(): 265 274 try: 266 275 return readline() 267 276 except StopIteration: 268 return b ''277 return bytes() 269 278 270 279 def find_cookie(line): … … 273 282 except UnicodeDecodeError: 274 283 return None 275 276 matches = cookie_re.findall(line_string) 277 if not matches: 284 match = cookie_re.match(line_string) 285 if not match: 278 286 return None 279 encoding = _get_normal_name(match es[0])287 encoding = _get_normal_name(match.group(1)) 280 288 try: 281 289 codec = lookup(encoding) … … 288 296 # This behaviour mimics the Python interpreter 289 297 raise SyntaxError('encoding problem: utf-8') 290 else: 291 # Allow it to be properly encoded and decoded. 292 encoding = 'utf-8-sig' 298 encoding += '-sig' 293 299 return encoding 294 300 … … 297 303 bom_found = True 298 304 first = first[3:] 305 default = 'utf-8-sig' 299 306 if not first: 300 return 'utf-8', []307 return default, [] 301 308 302 309 encoding = find_cookie(first) … … 306 313 second = read_or_stop() 307 314 if not second: 308 return 'utf-8', [first]315 return default, [first] 309 316 310 317 encoding = find_cookie(second) … … 312 319 return encoding, [first, second] 313 320 314 return 'utf-8', [first, second]321 return default, [first, second] 315 322 316 323 def untokenize(iterable):
Note:
See TracChangeset
for help on using the changeset viewer.