1 | """A bottom-up tree matching algorithm implementation meant to speed
|
---|
2 | up 2to3's matching process. After the tree patterns are reduced to
|
---|
3 | their rarest linear path, a linear Aho-Corasick automaton is
|
---|
4 | created. The linear automaton traverses the linear paths from the
|
---|
5 | leaves to the root of the AST and returns a set of nodes for further
|
---|
6 | matching. This reduces significantly the number of candidate nodes."""
|
---|
7 |
|
---|
8 | __author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
|
---|
9 |
|
---|
10 | import logging
|
---|
11 | import itertools
|
---|
12 | from collections import defaultdict
|
---|
13 |
|
---|
14 | from . import pytree
|
---|
15 | from .btm_utils import reduce_tree
|
---|
16 |
|
---|
17 | class BMNode(object):
|
---|
18 | """Class for a node of the Aho-Corasick automaton used in matching"""
|
---|
19 | count = itertools.count()
|
---|
20 | def __init__(self):
|
---|
21 | self.transition_table = {}
|
---|
22 | self.fixers = []
|
---|
23 | self.id = next(BMNode.count)
|
---|
24 | self.content = ''
|
---|
25 |
|
---|
26 | class BottomMatcher(object):
|
---|
27 | """The main matcher class. After instantiating the patterns should
|
---|
28 | be added using the add_fixer method"""
|
---|
29 |
|
---|
30 | def __init__(self):
|
---|
31 | self.match = set()
|
---|
32 | self.root = BMNode()
|
---|
33 | self.nodes = [self.root]
|
---|
34 | self.fixers = []
|
---|
35 | self.logger = logging.getLogger("RefactoringTool")
|
---|
36 |
|
---|
37 | def add_fixer(self, fixer):
|
---|
38 | """Reduces a fixer's pattern tree to a linear path and adds it
|
---|
39 | to the matcher(a common Aho-Corasick automaton). The fixer is
|
---|
40 | appended on the matching states and called when they are
|
---|
41 | reached"""
|
---|
42 | self.fixers.append(fixer)
|
---|
43 | tree = reduce_tree(fixer.pattern_tree)
|
---|
44 | linear = tree.get_linear_subpattern()
|
---|
45 | match_nodes = self.add(linear, start=self.root)
|
---|
46 | for match_node in match_nodes:
|
---|
47 | match_node.fixers.append(fixer)
|
---|
48 |
|
---|
49 | def add(self, pattern, start):
|
---|
50 | "Recursively adds a linear pattern to the AC automaton"
|
---|
51 | #print("adding pattern", pattern, "to", start)
|
---|
52 | if not pattern:
|
---|
53 | #print("empty pattern")
|
---|
54 | return [start]
|
---|
55 | if isinstance(pattern[0], tuple):
|
---|
56 | #alternatives
|
---|
57 | #print("alternatives")
|
---|
58 | match_nodes = []
|
---|
59 | for alternative in pattern[0]:
|
---|
60 | #add all alternatives, and add the rest of the pattern
|
---|
61 | #to each end node
|
---|
62 | end_nodes = self.add(alternative, start=start)
|
---|
63 | for end in end_nodes:
|
---|
64 | match_nodes.extend(self.add(pattern[1:], end))
|
---|
65 | return match_nodes
|
---|
66 | else:
|
---|
67 | #single token
|
---|
68 | #not last
|
---|
69 | if pattern[0] not in start.transition_table:
|
---|
70 | #transition did not exist, create new
|
---|
71 | next_node = BMNode()
|
---|
72 | start.transition_table[pattern[0]] = next_node
|
---|
73 | else:
|
---|
74 | #transition exists already, follow
|
---|
75 | next_node = start.transition_table[pattern[0]]
|
---|
76 |
|
---|
77 | if pattern[1:]:
|
---|
78 | end_nodes = self.add(pattern[1:], start=next_node)
|
---|
79 | else:
|
---|
80 | end_nodes = [next_node]
|
---|
81 | return end_nodes
|
---|
82 |
|
---|
83 | def run(self, leaves):
|
---|
84 | """The main interface with the bottom matcher. The tree is
|
---|
85 | traversed from the bottom using the constructed
|
---|
86 | automaton. Nodes are only checked once as the tree is
|
---|
87 | retraversed. When the automaton fails, we give it one more
|
---|
88 | shot(in case the above tree matches as a whole with the
|
---|
89 | rejected leaf), then we break for the next leaf. There is the
|
---|
90 | special case of multiple arguments(see code comments) where we
|
---|
91 | recheck the nodes
|
---|
92 |
|
---|
93 | Args:
|
---|
94 | The leaves of the AST tree to be matched
|
---|
95 |
|
---|
96 | Returns:
|
---|
97 | A dictionary of node matches with fixers as the keys
|
---|
98 | """
|
---|
99 | current_ac_node = self.root
|
---|
100 | results = defaultdict(list)
|
---|
101 | for leaf in leaves:
|
---|
102 | current_ast_node = leaf
|
---|
103 | while current_ast_node:
|
---|
104 | current_ast_node.was_checked = True
|
---|
105 | for child in current_ast_node.children:
|
---|
106 | # multiple statements, recheck
|
---|
107 | if isinstance(child, pytree.Leaf) and child.value == u";":
|
---|
108 | current_ast_node.was_checked = False
|
---|
109 | break
|
---|
110 | if current_ast_node.type == 1:
|
---|
111 | #name
|
---|
112 | node_token = current_ast_node.value
|
---|
113 | else:
|
---|
114 | node_token = current_ast_node.type
|
---|
115 |
|
---|
116 | if node_token in current_ac_node.transition_table:
|
---|
117 | #token matches
|
---|
118 | current_ac_node = current_ac_node.transition_table[node_token]
|
---|
119 | for fixer in current_ac_node.fixers:
|
---|
120 | if not fixer in results:
|
---|
121 | results[fixer] = []
|
---|
122 | results[fixer].append(current_ast_node)
|
---|
123 |
|
---|
124 | else:
|
---|
125 | #matching failed, reset automaton
|
---|
126 | current_ac_node = self.root
|
---|
127 | if (current_ast_node.parent is not None
|
---|
128 | and current_ast_node.parent.was_checked):
|
---|
129 | #the rest of the tree upwards has been checked, next leaf
|
---|
130 | break
|
---|
131 |
|
---|
132 | #recheck the rejected node once from the root
|
---|
133 | if node_token in current_ac_node.transition_table:
|
---|
134 | #token matches
|
---|
135 | current_ac_node = current_ac_node.transition_table[node_token]
|
---|
136 | for fixer in current_ac_node.fixers:
|
---|
137 | if not fixer in results.keys():
|
---|
138 | results[fixer] = []
|
---|
139 | results[fixer].append(current_ast_node)
|
---|
140 |
|
---|
141 | current_ast_node = current_ast_node.parent
|
---|
142 | return results
|
---|
143 |
|
---|
144 | def print_ac(self):
|
---|
145 | "Prints a graphviz diagram of the BM automaton(for debugging)"
|
---|
146 | print("digraph g{")
|
---|
147 | def print_node(node):
|
---|
148 | for subnode_key in node.transition_table.keys():
|
---|
149 | subnode = node.transition_table[subnode_key]
|
---|
150 | print("%d -> %d [label=%s] //%s" %
|
---|
151 | (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
|
---|
152 | if subnode_key == 1:
|
---|
153 | print(subnode.content)
|
---|
154 | print_node(subnode)
|
---|
155 | print_node(self.root)
|
---|
156 | print("}")
|
---|
157 |
|
---|
158 | # taken from pytree.py for debugging; only used by print_ac
|
---|
159 | _type_reprs = {}
|
---|
160 | def type_repr(type_num):
|
---|
161 | global _type_reprs
|
---|
162 | if not _type_reprs:
|
---|
163 | from .pygram import python_symbols
|
---|
164 | # printing tokens is possible but not as useful
|
---|
165 | # from .pgen2 import token // token.__dict__.items():
|
---|
166 | for name, val in python_symbols.__dict__.items():
|
---|
167 | if type(val) == int: _type_reprs[val] = name
|
---|
168 | return _type_reprs.setdefault(type_num, type_num)
|
---|