[2] | 1 | # Copyright 2006 Google, Inc. All Rights Reserved.
|
---|
| 2 | # Licensed to PSF under a Contributor Agreement.
|
---|
| 3 |
|
---|
| 4 | """Pattern compiler.
|
---|
| 5 |
|
---|
| 6 | The grammer is taken from PatternGrammar.txt.
|
---|
| 7 |
|
---|
| 8 | The compiler compiles a pattern to a pytree.*Pattern instance.
|
---|
| 9 | """
|
---|
| 10 |
|
---|
| 11 | __author__ = "Guido van Rossum <guido@python.org>"
|
---|
| 12 |
|
---|
| 13 | # Python imports
|
---|
| 14 | import os
|
---|
[391] | 15 | import StringIO
|
---|
[2] | 16 |
|
---|
| 17 | # Fairly local imports
|
---|
| 18 | from .pgen2 import driver, literals, token, tokenize, parse, grammar
|
---|
| 19 |
|
---|
| 20 | # Really local imports
|
---|
| 21 | from . import pytree
|
---|
| 22 | from . import pygram
|
---|
| 23 |
|
---|
| 24 | # The pattern grammar file
|
---|
| 25 | _PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__),
|
---|
| 26 | "PatternGrammar.txt")
|
---|
| 27 |
|
---|
| 28 |
|
---|
| 29 | class PatternSyntaxError(Exception):
|
---|
| 30 | pass
|
---|
| 31 |
|
---|
| 32 |
|
---|
| 33 | def tokenize_wrapper(input):
|
---|
| 34 | """Tokenizes a string suppressing significant whitespace."""
|
---|
| 35 | skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
|
---|
[391] | 36 | tokens = tokenize.generate_tokens(StringIO.StringIO(input).readline)
|
---|
[2] | 37 | for quintuple in tokens:
|
---|
| 38 | type, value, start, end, line_text = quintuple
|
---|
| 39 | if type not in skip:
|
---|
| 40 | yield quintuple
|
---|
| 41 |
|
---|
| 42 |
|
---|
| 43 | class PatternCompiler(object):
|
---|
| 44 |
|
---|
| 45 | def __init__(self, grammar_file=_PATTERN_GRAMMAR_FILE):
|
---|
| 46 | """Initializer.
|
---|
| 47 |
|
---|
| 48 | Takes an optional alternative filename for the pattern grammar.
|
---|
| 49 | """
|
---|
| 50 | self.grammar = driver.load_grammar(grammar_file)
|
---|
| 51 | self.syms = pygram.Symbols(self.grammar)
|
---|
| 52 | self.pygrammar = pygram.python_grammar
|
---|
| 53 | self.pysyms = pygram.python_symbols
|
---|
| 54 | self.driver = driver.Driver(self.grammar, convert=pattern_convert)
|
---|
| 55 |
|
---|
[391] | 56 | def compile_pattern(self, input, debug=False, with_tree=False):
|
---|
[2] | 57 | """Compiles a pattern string to a nested pytree.*Pattern object."""
|
---|
| 58 | tokens = tokenize_wrapper(input)
|
---|
| 59 | try:
|
---|
| 60 | root = self.driver.parse_tokens(tokens, debug=debug)
|
---|
| 61 | except parse.ParseError as e:
|
---|
| 62 | raise PatternSyntaxError(str(e))
|
---|
[391] | 63 | if with_tree:
|
---|
| 64 | return self.compile_node(root), root
|
---|
| 65 | else:
|
---|
| 66 | return self.compile_node(root)
|
---|
[2] | 67 |
|
---|
| 68 | def compile_node(self, node):
|
---|
| 69 | """Compiles a node, recursively.
|
---|
| 70 |
|
---|
| 71 | This is one big switch on the node type.
|
---|
| 72 | """
|
---|
| 73 | # XXX Optimize certain Wildcard-containing-Wildcard patterns
|
---|
| 74 | # that can be merged
|
---|
| 75 | if node.type == self.syms.Matcher:
|
---|
| 76 | node = node.children[0] # Avoid unneeded recursion
|
---|
| 77 |
|
---|
| 78 | if node.type == self.syms.Alternatives:
|
---|
| 79 | # Skip the odd children since they are just '|' tokens
|
---|
| 80 | alts = [self.compile_node(ch) for ch in node.children[::2]]
|
---|
| 81 | if len(alts) == 1:
|
---|
| 82 | return alts[0]
|
---|
| 83 | p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
|
---|
| 84 | return p.optimize()
|
---|
| 85 |
|
---|
| 86 | if node.type == self.syms.Alternative:
|
---|
| 87 | units = [self.compile_node(ch) for ch in node.children]
|
---|
| 88 | if len(units) == 1:
|
---|
| 89 | return units[0]
|
---|
| 90 | p = pytree.WildcardPattern([units], min=1, max=1)
|
---|
| 91 | return p.optimize()
|
---|
| 92 |
|
---|
| 93 | if node.type == self.syms.NegatedUnit:
|
---|
| 94 | pattern = self.compile_basic(node.children[1:])
|
---|
| 95 | p = pytree.NegatedPattern(pattern)
|
---|
| 96 | return p.optimize()
|
---|
| 97 |
|
---|
| 98 | assert node.type == self.syms.Unit
|
---|
| 99 |
|
---|
| 100 | name = None
|
---|
| 101 | nodes = node.children
|
---|
| 102 | if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
|
---|
| 103 | name = nodes[0].value
|
---|
| 104 | nodes = nodes[2:]
|
---|
| 105 | repeat = None
|
---|
| 106 | if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
|
---|
| 107 | repeat = nodes[-1]
|
---|
| 108 | nodes = nodes[:-1]
|
---|
| 109 |
|
---|
| 110 | # Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
|
---|
| 111 | pattern = self.compile_basic(nodes, repeat)
|
---|
| 112 |
|
---|
| 113 | if repeat is not None:
|
---|
| 114 | assert repeat.type == self.syms.Repeater
|
---|
| 115 | children = repeat.children
|
---|
| 116 | child = children[0]
|
---|
| 117 | if child.type == token.STAR:
|
---|
| 118 | min = 0
|
---|
| 119 | max = pytree.HUGE
|
---|
| 120 | elif child.type == token.PLUS:
|
---|
| 121 | min = 1
|
---|
| 122 | max = pytree.HUGE
|
---|
| 123 | elif child.type == token.LBRACE:
|
---|
| 124 | assert children[-1].type == token.RBRACE
|
---|
| 125 | assert len(children) in (3, 5)
|
---|
| 126 | min = max = self.get_int(children[1])
|
---|
| 127 | if len(children) == 5:
|
---|
| 128 | max = self.get_int(children[3])
|
---|
| 129 | else:
|
---|
| 130 | assert False
|
---|
| 131 | if min != 1 or max != 1:
|
---|
| 132 | pattern = pattern.optimize()
|
---|
| 133 | pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
|
---|
| 134 |
|
---|
| 135 | if name is not None:
|
---|
| 136 | pattern.name = name
|
---|
| 137 | return pattern.optimize()
|
---|
| 138 |
|
---|
| 139 | def compile_basic(self, nodes, repeat=None):
|
---|
| 140 | # Compile STRING | NAME [Details] | (...) | [...]
|
---|
| 141 | assert len(nodes) >= 1
|
---|
| 142 | node = nodes[0]
|
---|
| 143 | if node.type == token.STRING:
|
---|
| 144 | value = unicode(literals.evalString(node.value))
|
---|
| 145 | return pytree.LeafPattern(_type_of_literal(value), value)
|
---|
| 146 | elif node.type == token.NAME:
|
---|
| 147 | value = node.value
|
---|
| 148 | if value.isupper():
|
---|
| 149 | if value not in TOKEN_MAP:
|
---|
| 150 | raise PatternSyntaxError("Invalid token: %r" % value)
|
---|
| 151 | if nodes[1:]:
|
---|
| 152 | raise PatternSyntaxError("Can't have details for token")
|
---|
| 153 | return pytree.LeafPattern(TOKEN_MAP[value])
|
---|
| 154 | else:
|
---|
| 155 | if value == "any":
|
---|
| 156 | type = None
|
---|
| 157 | elif not value.startswith("_"):
|
---|
| 158 | type = getattr(self.pysyms, value, None)
|
---|
| 159 | if type is None:
|
---|
| 160 | raise PatternSyntaxError("Invalid symbol: %r" % value)
|
---|
| 161 | if nodes[1:]: # Details present
|
---|
| 162 | content = [self.compile_node(nodes[1].children[1])]
|
---|
| 163 | else:
|
---|
| 164 | content = None
|
---|
| 165 | return pytree.NodePattern(type, content)
|
---|
| 166 | elif node.value == "(":
|
---|
| 167 | return self.compile_node(nodes[1])
|
---|
| 168 | elif node.value == "[":
|
---|
| 169 | assert repeat is None
|
---|
| 170 | subpattern = self.compile_node(nodes[1])
|
---|
| 171 | return pytree.WildcardPattern([[subpattern]], min=0, max=1)
|
---|
| 172 | assert False, node
|
---|
| 173 |
|
---|
| 174 | def get_int(self, node):
|
---|
| 175 | assert node.type == token.NUMBER
|
---|
| 176 | return int(node.value)
|
---|
| 177 |
|
---|
| 178 |
|
---|
| 179 | # Map named tokens to the type value for a LeafPattern
|
---|
| 180 | TOKEN_MAP = {"NAME": token.NAME,
|
---|
| 181 | "STRING": token.STRING,
|
---|
| 182 | "NUMBER": token.NUMBER,
|
---|
| 183 | "TOKEN": None}
|
---|
| 184 |
|
---|
| 185 |
|
---|
| 186 | def _type_of_literal(value):
|
---|
| 187 | if value[0].isalpha():
|
---|
| 188 | return token.NAME
|
---|
| 189 | elif value in grammar.opmap:
|
---|
| 190 | return grammar.opmap[value]
|
---|
| 191 | else:
|
---|
| 192 | return None
|
---|
| 193 |
|
---|
| 194 |
|
---|
| 195 | def pattern_convert(grammar, raw_node_info):
|
---|
| 196 | """Converts raw node information to a Node or Leaf instance."""
|
---|
| 197 | type, value, context, children = raw_node_info
|
---|
| 198 | if children or type in grammar.number2symbol:
|
---|
| 199 | return pytree.Node(type, children, context=context)
|
---|
| 200 | else:
|
---|
| 201 | return pytree.Leaf(type, value, context=context)
|
---|
| 202 |
|
---|
| 203 |
|
---|
| 204 | def compile_pattern(pattern):
|
---|
| 205 | return PatternCompiler().compile_pattern(pattern)
|
---|