1 | # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
|
---|
2 | # Licensed to PSF under a Contributor Agreement.
|
---|
3 |
|
---|
4 | # Modifications:
|
---|
5 | # Copyright 2006 Google, Inc. All Rights Reserved.
|
---|
6 | # Licensed to PSF under a Contributor Agreement.
|
---|
7 |
|
---|
8 | """Parser driver.
|
---|
9 |
|
---|
10 | This provides a high-level interface to parse a file into a syntax tree.
|
---|
11 |
|
---|
12 | """
|
---|
13 |
|
---|
14 | __author__ = "Guido van Rossum <guido@python.org>"
|
---|
15 |
|
---|
16 | __all__ = ["Driver", "load_grammar"]
|
---|
17 |
|
---|
18 | # Python imports
|
---|
19 | import codecs
|
---|
20 | import os
|
---|
21 | import logging
|
---|
22 | import StringIO
|
---|
23 | import sys
|
---|
24 |
|
---|
25 | # Pgen imports
|
---|
26 | from . import grammar, parse, token, tokenize, pgen
|
---|
27 |
|
---|
28 |
|
---|
29 | class Driver(object):
|
---|
30 |
|
---|
31 | def __init__(self, grammar, convert=None, logger=None):
|
---|
32 | self.grammar = grammar
|
---|
33 | if logger is None:
|
---|
34 | logger = logging.getLogger()
|
---|
35 | self.logger = logger
|
---|
36 | self.convert = convert
|
---|
37 |
|
---|
38 | def parse_tokens(self, tokens, debug=False):
|
---|
39 | """Parse a series of tokens and return the syntax tree."""
|
---|
40 | # XXX Move the prefix computation into a wrapper around tokenize.
|
---|
41 | p = parse.Parser(self.grammar, self.convert)
|
---|
42 | p.setup()
|
---|
43 | lineno = 1
|
---|
44 | column = 0
|
---|
45 | type = value = start = end = line_text = None
|
---|
46 | prefix = u""
|
---|
47 | for quintuple in tokens:
|
---|
48 | type, value, start, end, line_text = quintuple
|
---|
49 | if start != (lineno, column):
|
---|
50 | assert (lineno, column) <= start, ((lineno, column), start)
|
---|
51 | s_lineno, s_column = start
|
---|
52 | if lineno < s_lineno:
|
---|
53 | prefix += "\n" * (s_lineno - lineno)
|
---|
54 | lineno = s_lineno
|
---|
55 | column = 0
|
---|
56 | if column < s_column:
|
---|
57 | prefix += line_text[column:s_column]
|
---|
58 | column = s_column
|
---|
59 | if type in (tokenize.COMMENT, tokenize.NL):
|
---|
60 | prefix += value
|
---|
61 | lineno, column = end
|
---|
62 | if value.endswith("\n"):
|
---|
63 | lineno += 1
|
---|
64 | column = 0
|
---|
65 | continue
|
---|
66 | if type == token.OP:
|
---|
67 | type = grammar.opmap[value]
|
---|
68 | if debug:
|
---|
69 | self.logger.debug("%s %r (prefix=%r)",
|
---|
70 | token.tok_name[type], value, prefix)
|
---|
71 | if p.addtoken(type, value, (prefix, start)):
|
---|
72 | if debug:
|
---|
73 | self.logger.debug("Stop.")
|
---|
74 | break
|
---|
75 | prefix = ""
|
---|
76 | lineno, column = end
|
---|
77 | if value.endswith("\n"):
|
---|
78 | lineno += 1
|
---|
79 | column = 0
|
---|
80 | else:
|
---|
81 | # We never broke out -- EOF is too soon (how can this happen???)
|
---|
82 | raise parse.ParseError("incomplete input",
|
---|
83 | type, value, (prefix, start))
|
---|
84 | return p.rootnode
|
---|
85 |
|
---|
86 | def parse_stream_raw(self, stream, debug=False):
|
---|
87 | """Parse a stream and return the syntax tree."""
|
---|
88 | tokens = tokenize.generate_tokens(stream.readline)
|
---|
89 | return self.parse_tokens(tokens, debug)
|
---|
90 |
|
---|
91 | def parse_stream(self, stream, debug=False):
|
---|
92 | """Parse a stream and return the syntax tree."""
|
---|
93 | return self.parse_stream_raw(stream, debug)
|
---|
94 |
|
---|
95 | def parse_file(self, filename, encoding=None, debug=False):
|
---|
96 | """Parse a file and return the syntax tree."""
|
---|
97 | stream = codecs.open(filename, "r", encoding)
|
---|
98 | try:
|
---|
99 | return self.parse_stream(stream, debug)
|
---|
100 | finally:
|
---|
101 | stream.close()
|
---|
102 |
|
---|
103 | def parse_string(self, text, debug=False):
|
---|
104 | """Parse a string and return the syntax tree."""
|
---|
105 | tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline)
|
---|
106 | return self.parse_tokens(tokens, debug)
|
---|
107 |
|
---|
108 |
|
---|
109 | def load_grammar(gt="Grammar.txt", gp=None,
|
---|
110 | save=True, force=False, logger=None):
|
---|
111 | """Load the grammar (maybe from a pickle)."""
|
---|
112 | if logger is None:
|
---|
113 | logger = logging.getLogger()
|
---|
114 | if gp is None:
|
---|
115 | head, tail = os.path.splitext(gt)
|
---|
116 | if tail == ".txt":
|
---|
117 | tail = ""
|
---|
118 | gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle"
|
---|
119 | if force or not _newer(gp, gt):
|
---|
120 | logger.info("Generating grammar tables from %s", gt)
|
---|
121 | g = pgen.generate_grammar(gt)
|
---|
122 | if save:
|
---|
123 | logger.info("Writing grammar tables to %s", gp)
|
---|
124 | try:
|
---|
125 | g.dump(gp)
|
---|
126 | except IOError, e:
|
---|
127 | logger.info("Writing failed:"+str(e))
|
---|
128 | else:
|
---|
129 | g = grammar.Grammar()
|
---|
130 | g.load(gp)
|
---|
131 | return g
|
---|
132 |
|
---|
133 |
|
---|
134 | def _newer(a, b):
|
---|
135 | """Inquire whether file a was written since file b."""
|
---|
136 | if not os.path.exists(a):
|
---|
137 | return False
|
---|
138 | if not os.path.exists(b):
|
---|
139 | return True
|
---|
140 | return os.path.getmtime(a) >= os.path.getmtime(b)
|
---|
141 |
|
---|
142 |
|
---|
143 | def main(*args):
|
---|
144 | """Main program, when run as a script: produce grammar pickle files.
|
---|
145 |
|
---|
146 | Calls load_grammar for each argument, a path to a grammar text file.
|
---|
147 | """
|
---|
148 | if not args:
|
---|
149 | args = sys.argv[1:]
|
---|
150 | logging.basicConfig(level=logging.INFO, stream=sys.stdout,
|
---|
151 | format='%(message)s')
|
---|
152 | for gt in args:
|
---|
153 | load_grammar(gt, save=True, force=True)
|
---|
154 | return True
|
---|
155 |
|
---|
156 | if __name__ == "__main__":
|
---|
157 | sys.exit(int(not main()))
|
---|