1 | # Copyright 2006 Google, Inc. All Rights Reserved.
|
---|
2 | # Licensed to PSF under a Contributor Agreement.
|
---|
3 |
|
---|
4 | """Pattern compiler.
|
---|
5 |
|
---|
6 | The grammer is taken from PatternGrammar.txt.
|
---|
7 |
|
---|
8 | The compiler compiles a pattern to a pytree.*Pattern instance.
|
---|
9 | """
|
---|
10 |
|
---|
11 | __author__ = "Guido van Rossum <guido@python.org>"
|
---|
12 |
|
---|
13 | # Python imports
|
---|
14 | import os
|
---|
15 |
|
---|
16 | # Fairly local imports
|
---|
17 | from .pgen2 import driver, literals, token, tokenize, parse, grammar
|
---|
18 |
|
---|
19 | # Really local imports
|
---|
20 | from . import pytree
|
---|
21 | from . import pygram
|
---|
22 |
|
---|
23 | # The pattern grammar file
|
---|
24 | _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
|
---|
25 | "PatternGrammar.txt")
|
---|
26 |
|
---|
27 |
|
---|
28 | class PatternSyntaxError(Exception):
|
---|
29 | pass
|
---|
30 |
|
---|
31 |
|
---|
32 | def tokenize_wrapper(input):
|
---|
33 | """Tokenizes a string suppressing significant whitespace."""
|
---|
34 | skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
|
---|
35 | tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
|
---|
36 | for quintuple in tokens:
|
---|
37 | type, value, start, end, line_text = quintuple
|
---|
38 | if type not in skip:
|
---|
39 | yield quintuple
|
---|
40 |
|
---|
41 |
|
---|
42 | class PatternCompiler(object):
|
---|
43 |
|
---|
44 | def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
|
---|
45 | """Initializer.
|
---|
46 |
|
---|
47 | Takes an optional alternative filename for the pattern grammar.
|
---|
48 | """
|
---|
49 | self.grammar = driver.load_grammar(grammar_file)
|
---|
50 | self.syms = pygram.Symbols(self.grammar)
|
---|
51 | self.pygrammar = pygram.python_grammar
|
---|
52 | self.pysyms = pygram.python_symbols
|
---|
53 | self.driver = driver.Driver(self.grammar, convert=pattern_convert)
|
---|
54 |
|
---|
55 | def compile_pattern(self, input, debug=False):
|
---|
56 | """Compiles a pattern string to a nested pytree.*Pattern object."""
|
---|
57 | tokens = tokenize_wrapper(input)
|
---|
58 | try:
|
---|
59 | root = self.driver.parse_tokens(tokens, debug=debug)
|
---|
60 | except parse.ParseError as e:
|
---|
61 | raise PatternSyntaxError(str(e))
|
---|
62 | return self.compile_node(root)
|
---|
63 |
|
---|
64 | def compile_node(self, node):
|
---|
65 | """Compiles a node, recursively.
|
---|
66 |
|
---|
67 | This is one big switch on the node type.
|
---|
68 | """
|
---|
69 | # XXX Optimize certain Wildcard-containing-Wildcard patterns
|
---|
70 | # that can be merged
|
---|
71 | if node.type == self.syms.Matcher:
|
---|
72 | node = node.children[0] # Avoid unneeded recursion
|
---|
73 |
|
---|
74 | if node.type == self.syms.Alternatives:
|
---|
75 | # Skip the odd children since they are just '|' tokens
|
---|
76 | alts = [self.compile_node(ch) for ch in node.children[::2]]
|
---|
77 | if len(alts) == 1:
|
---|
78 | return alts[0]
|
---|
79 | p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
|
---|
80 | return p.optimize()
|
---|
81 |
|
---|
82 | if node.type == self.syms.Alternative:
|
---|
83 | units = [self.compile_node(ch) for ch in node.children]
|
---|
84 | if len(units) == 1:
|
---|
85 | return units[0]
|
---|
86 | p = pytree.WildcardPattern([units], min=1, max=1)
|
---|
87 | return p.optimize()
|
---|
88 |
|
---|
89 | if node.type == self.syms.NegatedUnit:
|
---|
90 | pattern = self.compile_basic(node.children[1:])
|
---|
91 | p = pytree.NegatedPattern(pattern)
|
---|
92 | return p.optimize()
|
---|
93 |
|
---|
94 | assert node.type == self.syms.Unit
|
---|
95 |
|
---|
96 | name = None
|
---|
97 | nodes = node.children
|
---|
98 | if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
|
---|
99 | name = nodes[0].value
|
---|
100 | nodes = nodes[2:]
|
---|
101 | repeat = None
|
---|
102 | if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
|
---|
103 | repeat = nodes[-1]
|
---|
104 | nodes = nodes[:-1]
|
---|
105 |
|
---|
106 | # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
|
---|
107 | pattern = self.compile_basic(nodes, repeat)
|
---|
108 |
|
---|
109 | if repeat is not None:
|
---|
110 | assert repeat.type == self.syms.Repeater
|
---|
111 | children = repeat.children
|
---|
112 | child = children[0]
|
---|
113 | if child.type == token.STAR:
|
---|
114 | min = 0
|
---|
115 | max = pytree.HUGE
|
---|
116 | elif child.type == token.PLUS:
|
---|
117 | min = 1
|
---|
118 | max = pytree.HUGE
|
---|
119 | elif child.type == token.LBRACE:
|
---|
120 | assert children[-1].type == token.RBRACE
|
---|
121 | assert len(children) in (3, 5)
|
---|
122 | min = max = self.get_int(children[1])
|
---|
123 | if len(children) == 5:
|
---|
124 | max = self.get_int(children[3])
|
---|
125 | else:
|
---|
126 | assert False
|
---|
127 | if min != 1 or max != 1:
|
---|
128 | pattern = pattern.optimize()
|
---|
129 | pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
|
---|
130 |
|
---|
131 | if name is not None:
|
---|
132 | pattern.name = name
|
---|
133 | return pattern.optimize()
|
---|
134 |
|
---|
135 | def compile_basic(self, nodes, repeat=None):
|
---|
136 | # Compile STRING | NAME [Details] | (...) | [...]
|
---|
137 | assert len(nodes) >= 1
|
---|
138 | node = nodes[0]
|
---|
139 | if node.type == token.STRING:
|
---|
140 | value = unicode(literals.evalString(node.value))
|
---|
141 | return pytree.LeafPattern(_type_of_literal(value), value)
|
---|
142 | elif node.type == token.NAME:
|
---|
143 | value = node.value
|
---|
144 | if value.isupper():
|
---|
145 | if value not in TOKEN_MAP:
|
---|
146 | raise PatternSyntaxError("Invalid token: %r" % value)
|
---|
147 | if nodes[1:]:
|
---|
148 | raise PatternSyntaxError("Can't have details for token")
|
---|
149 | return pytree.LeafPattern(TOKEN_MAP[value])
|
---|
150 | else:
|
---|
151 | if value == "any":
|
---|
152 | type = None
|
---|
153 | elif not value.startswith("_"):
|
---|
154 | type = getattr(self.pysyms, value, None)
|
---|
155 | if type is None:
|
---|
156 | raise PatternSyntaxError("Invalid symbol: %r" % value)
|
---|
157 | if nodes[1:]: # Details present
|
---|
158 | content = [self.compile_node(nodes[1].children[1])]
|
---|
159 | else:
|
---|
160 | content = None
|
---|
161 | return pytree.NodePattern(type, content)
|
---|
162 | elif node.value == "(":
|
---|
163 | return self.compile_node(nodes[1])
|
---|
164 | elif node.value == "[":
|
---|
165 | assert repeat is None
|
---|
166 | subpattern = self.compile_node(nodes[1])
|
---|
167 | return pytree.WildcardPattern([[subpattern]], min=0, max=1)
|
---|
168 | assert False, node
|
---|
169 |
|
---|
170 | def get_int(self, node):
|
---|
171 | assert node.type == token.NUMBER
|
---|
172 | return int(node.value)
|
---|
173 |
|
---|
174 |
|
---|
175 | # Map named tokens to the type value for a LeafPattern
|
---|
176 | TOKEN_MAP = {"NAME": token.NAME,
|
---|
177 | "STRING": token.STRING,
|
---|
178 | "NUMBER": token.NUMBER,
|
---|
179 | "TOKEN": None}
|
---|
180 |
|
---|
181 |
|
---|
182 | def _type_of_literal(value):
|
---|
183 | if value[0].isalpha():
|
---|
184 | return token.NAME
|
---|
185 | elif value in grammar.opmap:
|
---|
186 | return grammar.opmap[value]
|
---|
187 | else:
|
---|
188 | return None
|
---|
189 |
|
---|
190 |
|
---|
191 | def pattern_convert(grammar, raw_node_info):
|
---|
192 | """Converts raw node information to a Node or Leaf instance."""
|
---|
193 | type, value, context, children = raw_node_info
|
---|
194 | if children or type in grammar.number2symbol:
|
---|
195 | return pytree.Node(type, children, context=context)
|
---|
196 | else:
|
---|
197 | return pytree.Leaf(type, value, context=context)
|
---|
198 |
|
---|
199 |
|
---|
200 | def compile_pattern(pattern):
|
---|
201 | return PatternCompiler().compile_pattern(pattern)
|
---|