source: python/trunk/Lib/lib2to3/pytree.py@ 1538

Last change on this file since 1538 was 391, checked in by dmik, 12 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 28.4 KB
Line 
1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""
5Python parse tree definitions.
6
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
15import sys
16import warnings
17from StringIO import StringIO
18
19HUGE = 0x7FFFFFFF # maximum repeat count, default max
20
21_type_reprs = {}
22def type_repr(type_num):
23 global _type_reprs
24 if not _type_reprs:
25 from .pygram import python_symbols
26 # printing tokens is possible but not as useful
27 # from .pgen2 import token // token.__dict__.items():
28 for name, val in python_symbols.__dict__.items():
29 if type(val) == int: _type_reprs[val] = name
30 return _type_reprs.setdefault(type_num, type_num)
31
32class Base(object):
33
34 """
35 Abstract base class for Node and Leaf.
36
37 This provides some default functionality and boilerplate using the
38 template pattern.
39
40 A node may be a subnode of at most one parent.
41 """
42
43 # Default values for instance variables
44 type = None # int: token number (< 256) or symbol number (>= 256)
45 parent = None # Parent node pointer, or None
46 children = () # Tuple of subnodes
47 was_changed = False
48 was_checked = False
49
50 def __new__(cls, *args, **kwds):
51 """Constructor that prevents Base from being instantiated."""
52 assert cls is not Base, "Cannot instantiate Base"
53 return object.__new__(cls)
54
55 def __eq__(self, other):
56 """
57 Compare two nodes for equality.
58
59 This calls the method _eq().
60 """
61 if self.__class__ is not other.__class__:
62 return NotImplemented
63 return self._eq(other)
64
65 __hash__ = None # For Py3 compatibility.
66
67 def __ne__(self, other):
68 """
69 Compare two nodes for inequality.
70
71 This calls the method _eq().
72 """
73 if self.__class__ is not other.__class__:
74 return NotImplemented
75 return not self._eq(other)
76
77 def _eq(self, other):
78 """
79 Compare two nodes for equality.
80
81 This is called by __eq__ and __ne__. It is only called if the two nodes
82 have the same type. This must be implemented by the concrete subclass.
83 Nodes should be considered equal if they have the same structure,
84 ignoring the prefix string and other context information.
85 """
86 raise NotImplementedError
87
88 def clone(self):
89 """
90 Return a cloned (deep) copy of self.
91
92 This must be implemented by the concrete subclass.
93 """
94 raise NotImplementedError
95
96 def post_order(self):
97 """
98 Return a post-order iterator for the tree.
99
100 This must be implemented by the concrete subclass.
101 """
102 raise NotImplementedError
103
104 def pre_order(self):
105 """
106 Return a pre-order iterator for the tree.
107
108 This must be implemented by the concrete subclass.
109 """
110 raise NotImplementedError
111
112 def set_prefix(self, prefix):
113 """
114 Set the prefix for the node (see Leaf class).
115
116 DEPRECATED; use the prefix property directly.
117 """
118 warnings.warn("set_prefix() is deprecated; use the prefix property",
119 DeprecationWarning, stacklevel=2)
120 self.prefix = prefix
121
122 def get_prefix(self):
123 """
124 Return the prefix for the node (see Leaf class).
125
126 DEPRECATED; use the prefix property directly.
127 """
128 warnings.warn("get_prefix() is deprecated; use the prefix property",
129 DeprecationWarning, stacklevel=2)
130 return self.prefix
131
132 def replace(self, new):
133 """Replace this node with a new one in the parent."""
134 assert self.parent is not None, str(self)
135 assert new is not None
136 if not isinstance(new, list):
137 new = [new]
138 l_children = []
139 found = False
140 for ch in self.parent.children:
141 if ch is self:
142 assert not found, (self.parent.children, self, new)
143 if new is not None:
144 l_children.extend(new)
145 found = True
146 else:
147 l_children.append(ch)
148 assert found, (self.children, self, new)
149 self.parent.changed()
150 self.parent.children = l_children
151 for x in new:
152 x.parent = self.parent
153 self.parent = None
154
155 def get_lineno(self):
156 """Return the line number which generated the invocant node."""
157 node = self
158 while not isinstance(node, Leaf):
159 if not node.children:
160 return
161 node = node.children[0]
162 return node.lineno
163
164 def changed(self):
165 if self.parent:
166 self.parent.changed()
167 self.was_changed = True
168
169 def remove(self):
170 """
171 Remove the node from the tree. Returns the position of the node in its
172 parent's children before it was removed.
173 """
174 if self.parent:
175 for i, node in enumerate(self.parent.children):
176 if node is self:
177 self.parent.changed()
178 del self.parent.children[i]
179 self.parent = None
180 return i
181
182 @property
183 def next_sibling(self):
184 """
185 The node immediately following the invocant in their parent's children
186 list. If the invocant does not have a next sibling, it is None
187 """
188 if self.parent is None:
189 return None
190
191 # Can't use index(); we need to test by identity
192 for i, child in enumerate(self.parent.children):
193 if child is self:
194 try:
195 return self.parent.children[i+1]
196 except IndexError:
197 return None
198
199 @property
200 def prev_sibling(self):
201 """
202 The node immediately preceding the invocant in their parent's children
203 list. If the invocant does not have a previous sibling, it is None.
204 """
205 if self.parent is None:
206 return None
207
208 # Can't use index(); we need to test by identity
209 for i, child in enumerate(self.parent.children):
210 if child is self:
211 if i == 0:
212 return None
213 return self.parent.children[i-1]
214
215 def leaves(self):
216 for child in self.children:
217 for x in child.leaves():
218 yield x
219
220 def depth(self):
221 if self.parent is None:
222 return 0
223 return 1 + self.parent.depth()
224
225 def get_suffix(self):
226 """
227 Return the string immediately following the invocant node. This is
228 effectively equivalent to node.next_sibling.prefix
229 """
230 next_sib = self.next_sibling
231 if next_sib is None:
232 return u""
233 return next_sib.prefix
234
235 if sys.version_info < (3, 0):
236 def __str__(self):
237 return unicode(self).encode("ascii")
238
239class Node(Base):
240
241 """Concrete implementation for interior nodes."""
242
243 def __init__(self,type, children,
244 context=None,
245 prefix=None,
246 fixers_applied=None):
247 """
248 Initializer.
249
250 Takes a type constant (a symbol number >= 256), a sequence of
251 child nodes, and an optional context keyword argument.
252
253 As a side effect, the parent pointers of the children are updated.
254 """
255 assert type >= 256, type
256 self.type = type
257 self.children = list(children)
258 for ch in self.children:
259 assert ch.parent is None, repr(ch)
260 ch.parent = self
261 if prefix is not None:
262 self.prefix = prefix
263 if fixers_applied:
264 self.fixers_applied = fixers_applied[:]
265 else:
266 self.fixers_applied = None
267
268 def __repr__(self):
269 """Return a canonical string representation."""
270 return "%s(%s, %r)" % (self.__class__.__name__,
271 type_repr(self.type),
272 self.children)
273
274 def __unicode__(self):
275 """
276 Return a pretty string representation.
277
278 This reproduces the input source exactly.
279 """
280 return u"".join(map(unicode, self.children))
281
282 if sys.version_info > (3, 0):
283 __str__ = __unicode__
284
285 def _eq(self, other):
286 """Compare two nodes for equality."""
287 return (self.type, self.children) == (other.type, other.children)
288
289 def clone(self):
290 """Return a cloned (deep) copy of self."""
291 return Node(self.type, [ch.clone() for ch in self.children],
292 fixers_applied=self.fixers_applied)
293
294 def post_order(self):
295 """Return a post-order iterator for the tree."""
296 for child in self.children:
297 for node in child.post_order():
298 yield node
299 yield self
300
301 def pre_order(self):
302 """Return a pre-order iterator for the tree."""
303 yield self
304 for child in self.children:
305 for node in child.pre_order():
306 yield node
307
308 def _prefix_getter(self):
309 """
310 The whitespace and comments preceding this node in the input.
311 """
312 if not self.children:
313 return ""
314 return self.children[0].prefix
315
316 def _prefix_setter(self, prefix):
317 if self.children:
318 self.children[0].prefix = prefix
319
320 prefix = property(_prefix_getter, _prefix_setter)
321
322 def set_child(self, i, child):
323 """
324 Equivalent to 'node.children[i] = child'. This method also sets the
325 child's parent attribute appropriately.
326 """
327 child.parent = self
328 self.children[i].parent = None
329 self.children[i] = child
330 self.changed()
331
332 def insert_child(self, i, child):
333 """
334 Equivalent to 'node.children.insert(i, child)'. This method also sets
335 the child's parent attribute appropriately.
336 """
337 child.parent = self
338 self.children.insert(i, child)
339 self.changed()
340
341 def append_child(self, child):
342 """
343 Equivalent to 'node.children.append(child)'. This method also sets the
344 child's parent attribute appropriately.
345 """
346 child.parent = self
347 self.children.append(child)
348 self.changed()
349
350
351class Leaf(Base):
352
353 """Concrete implementation for leaf nodes."""
354
355 # Default values for instance variables
356 _prefix = "" # Whitespace and comments preceding this token in the input
357 lineno = 0 # Line where this token starts in the input
358 column = 0 # Column where this token tarts in the input
359
360 def __init__(self, type, value,
361 context=None,
362 prefix=None,
363 fixers_applied=[]):
364 """
365 Initializer.
366
367 Takes a type constant (a token number < 256), a string value, and an
368 optional context keyword argument.
369 """
370 assert 0 <= type < 256, type
371 if context is not None:
372 self._prefix, (self.lineno, self.column) = context
373 self.type = type
374 self.value = value
375 if prefix is not None:
376 self._prefix = prefix
377 self.fixers_applied = fixers_applied[:]
378
379 def __repr__(self):
380 """Return a canonical string representation."""
381 return "%s(%r, %r)" % (self.__class__.__name__,
382 self.type,
383 self.value)
384
385 def __unicode__(self):
386 """
387 Return a pretty string representation.
388
389 This reproduces the input source exactly.
390 """
391 return self.prefix + unicode(self.value)
392
393 if sys.version_info > (3, 0):
394 __str__ = __unicode__
395
396 def _eq(self, other):
397 """Compare two nodes for equality."""
398 return (self.type, self.value) == (other.type, other.value)
399
400 def clone(self):
401 """Return a cloned (deep) copy of self."""
402 return Leaf(self.type, self.value,
403 (self.prefix, (self.lineno, self.column)),
404 fixers_applied=self.fixers_applied)
405
406 def leaves(self):
407 yield self
408
409 def post_order(self):
410 """Return a post-order iterator for the tree."""
411 yield self
412
413 def pre_order(self):
414 """Return a pre-order iterator for the tree."""
415 yield self
416
417 def _prefix_getter(self):
418 """
419 The whitespace and comments preceding this token in the input.
420 """
421 return self._prefix
422
423 def _prefix_setter(self, prefix):
424 self.changed()
425 self._prefix = prefix
426
427 prefix = property(_prefix_getter, _prefix_setter)
428
429def convert(gr, raw_node):
430 """
431 Convert raw node information to a Node or Leaf instance.
432
433 This is passed to the parser driver which calls it whenever a reduction of a
434 grammar rule produces a new complete node, so that the tree is build
435 strictly bottom-up.
436 """
437 type, value, context, children = raw_node
438 if children or type in gr.number2symbol:
439 # If there's exactly one child, return that child instead of
440 # creating a new node.
441 if len(children) == 1:
442 return children[0]
443 return Node(type, children, context=context)
444 else:
445 return Leaf(type, value, context=context)
446
447
448class BasePattern(object):
449
450 """
451 A pattern is a tree matching pattern.
452
453 It looks for a specific node type (token or symbol), and
454 optionally for a specific content.
455
456 This is an abstract base class. There are three concrete
457 subclasses:
458
459 - LeafPattern matches a single leaf node;
460 - NodePattern matches a single node (usually non-leaf);
461 - WildcardPattern matches a sequence of nodes of variable length.
462 """
463
464 # Defaults for instance variables
465 type = None # Node type (token if < 256, symbol if >= 256)
466 content = None # Optional content matching pattern
467 name = None # Optional name used to store match in results dict
468
469 def __new__(cls, *args, **kwds):
470 """Constructor that prevents BasePattern from being instantiated."""
471 assert cls is not BasePattern, "Cannot instantiate BasePattern"
472 return object.__new__(cls)
473
474 def __repr__(self):
475 args = [type_repr(self.type), self.content, self.name]
476 while args and args[-1] is None:
477 del args[-1]
478 return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
479
480 def optimize(self):
481 """
482 A subclass can define this as a hook for optimizations.
483
484 Returns either self or another node with the same effect.
485 """
486 return self
487
488 def match(self, node, results=None):
489 """
490 Does this pattern exactly match a node?
491
492 Returns True if it matches, False if not.
493
494 If results is not None, it must be a dict which will be
495 updated with the nodes matching named subpatterns.
496
497 Default implementation for non-wildcard patterns.
498 """
499 if self.type is not None and node.type != self.type:
500 return False
501 if self.content is not None:
502 r = None
503 if results is not None:
504 r = {}
505 if not self._submatch(node, r):
506 return False
507 if r:
508 results.update(r)
509 if results is not None and self.name:
510 results[self.name] = node
511 return True
512
513 def match_seq(self, nodes, results=None):
514 """
515 Does this pattern exactly match a sequence of nodes?
516
517 Default implementation for non-wildcard patterns.
518 """
519 if len(nodes) != 1:
520 return False
521 return self.match(nodes[0], results)
522
523 def generate_matches(self, nodes):
524 """
525 Generator yielding all matches for this pattern.
526
527 Default implementation for non-wildcard patterns.
528 """
529 r = {}
530 if nodes and self.match(nodes[0], r):
531 yield 1, r
532
533
534class LeafPattern(BasePattern):
535
536 def __init__(self, type=None, content=None, name=None):
537 """
538 Initializer. Takes optional type, content, and name.
539
540 The type, if given must be a token type (< 256). If not given,
541 this matches any *leaf* node; the content may still be required.
542
543 The content, if given, must be a string.
544
545 If a name is given, the matching node is stored in the results
546 dict under that key.
547 """
548 if type is not None:
549 assert 0 <= type < 256, type
550 if content is not None:
551 assert isinstance(content, basestring), repr(content)
552 self.type = type
553 self.content = content
554 self.name = name
555
556 def match(self, node, results=None):
557 """Override match() to insist on a leaf node."""
558 if not isinstance(node, Leaf):
559 return False
560 return BasePattern.match(self, node, results)
561
562 def _submatch(self, node, results=None):
563 """
564 Match the pattern's content to the node's children.
565
566 This assumes the node type matches and self.content is not None.
567
568 Returns True if it matches, False if not.
569
570 If results is not None, it must be a dict which will be
571 updated with the nodes matching named subpatterns.
572
573 When returning False, the results dict may still be updated.
574 """
575 return self.content == node.value
576
577
578class NodePattern(BasePattern):
579
580 wildcards = False
581
582 def __init__(self, type=None, content=None, name=None):
583 """
584 Initializer. Takes optional type, content, and name.
585
586 The type, if given, must be a symbol type (>= 256). If the
587 type is None this matches *any* single node (leaf or not),
588 except if content is not None, in which it only matches
589 non-leaf nodes that also match the content pattern.
590
591 The content, if not None, must be a sequence of Patterns that
592 must match the node's children exactly. If the content is
593 given, the type must not be None.
594
595 If a name is given, the matching node is stored in the results
596 dict under that key.
597 """
598 if type is not None:
599 assert type >= 256, type
600 if content is not None:
601 assert not isinstance(content, basestring), repr(content)
602 content = list(content)
603 for i, item in enumerate(content):
604 assert isinstance(item, BasePattern), (i, item)
605 if isinstance(item, WildcardPattern):
606 self.wildcards = True
607 self.type = type
608 self.content = content
609 self.name = name
610
611 def _submatch(self, node, results=None):
612 """
613 Match the pattern's content to the node's children.
614
615 This assumes the node type matches and self.content is not None.
616
617 Returns True if it matches, False if not.
618
619 If results is not None, it must be a dict which will be
620 updated with the nodes matching named subpatterns.
621
622 When returning False, the results dict may still be updated.
623 """
624 if self.wildcards:
625 for c, r in generate_matches(self.content, node.children):
626 if c == len(node.children):
627 if results is not None:
628 results.update(r)
629 return True
630 return False
631 if len(self.content) != len(node.children):
632 return False
633 for subpattern, child in zip(self.content, node.children):
634 if not subpattern.match(child, results):
635 return False
636 return True
637
638
639class WildcardPattern(BasePattern):
640
641 """
642 A wildcard pattern can match zero or more nodes.
643
644 This has all the flexibility needed to implement patterns like:
645
646 .* .+ .? .{m,n}
647 (a b c | d e | f)
648 (...)* (...)+ (...)? (...){m,n}
649
650 except it always uses non-greedy matching.
651 """
652
653 def __init__(self, content=None, min=0, max=HUGE, name=None):
654 """
655 Initializer.
656
657 Args:
658 content: optional sequence of subsequences of patterns;
659 if absent, matches one node;
660 if present, each subsequence is an alternative [*]
661 min: optional minimum number of times to match, default 0
662 max: optional maximum number of times to match, default HUGE
663 name: optional name assigned to this match
664
665 [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
666 equivalent to (a b c | d e | f g h); if content is None,
667 this is equivalent to '.' in regular expression terms.
668 The min and max parameters work as follows:
669 min=0, max=maxint: .*
670 min=1, max=maxint: .+
671 min=0, max=1: .?
672 min=1, max=1: .
673 If content is not None, replace the dot with the parenthesized
674 list of alternatives, e.g. (a b c | d e | f g h)*
675 """
676 assert 0 <= min <= max <= HUGE, (min, max)
677 if content is not None:
678 content = tuple(map(tuple, content)) # Protect against alterations
679 # Check sanity of alternatives
680 assert len(content), repr(content) # Can't have zero alternatives
681 for alt in content:
682 assert len(alt), repr(alt) # Can have empty alternatives
683 self.content = content
684 self.min = min
685 self.max = max
686 self.name = name
687
688 def optimize(self):
689 """Optimize certain stacked wildcard patterns."""
690 subpattern = None
691 if (self.content is not None and
692 len(self.content) == 1 and len(self.content[0]) == 1):
693 subpattern = self.content[0][0]
694 if self.min == 1 and self.max == 1:
695 if self.content is None:
696 return NodePattern(name=self.name)
697 if subpattern is not None and self.name == subpattern.name:
698 return subpattern.optimize()
699 if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
700 subpattern.min <= 1 and self.name == subpattern.name):
701 return WildcardPattern(subpattern.content,
702 self.min*subpattern.min,
703 self.max*subpattern.max,
704 subpattern.name)
705 return self
706
707 def match(self, node, results=None):
708 """Does this pattern exactly match a node?"""
709 return self.match_seq([node], results)
710
711 def match_seq(self, nodes, results=None):
712 """Does this pattern exactly match a sequence of nodes?"""
713 for c, r in self.generate_matches(nodes):
714 if c == len(nodes):
715 if results is not None:
716 results.update(r)
717 if self.name:
718 results[self.name] = list(nodes)
719 return True
720 return False
721
722 def generate_matches(self, nodes):
723 """
724 Generator yielding matches for a sequence of nodes.
725
726 Args:
727 nodes: sequence of nodes
728
729 Yields:
730 (count, results) tuples where:
731 count: the match comprises nodes[:count];
732 results: dict containing named submatches.
733 """
734 if self.content is None:
735 # Shortcut for special case (see __init__.__doc__)
736 for count in xrange(self.min, 1 + min(len(nodes), self.max)):
737 r = {}
738 if self.name:
739 r[self.name] = nodes[:count]
740 yield count, r
741 elif self.name == "bare_name":
742 yield self._bare_name_matches(nodes)
743 else:
744 # The reason for this is that hitting the recursion limit usually
745 # results in some ugly messages about how RuntimeErrors are being
746 # ignored. We don't do this on non-CPython implementation because
747 # they don't have this problem.
748 if hasattr(sys, "getrefcount"):
749 save_stderr = sys.stderr
750 sys.stderr = StringIO()
751 try:
752 for count, r in self._recursive_matches(nodes, 0):
753 if self.name:
754 r[self.name] = nodes[:count]
755 yield count, r
756 except RuntimeError:
757 # We fall back to the iterative pattern matching scheme if the recursive
758 # scheme hits the recursion limit.
759 for count, r in self._iterative_matches(nodes):
760 if self.name:
761 r[self.name] = nodes[:count]
762 yield count, r
763 finally:
764 if hasattr(sys, "getrefcount"):
765 sys.stderr = save_stderr
766
767 def _iterative_matches(self, nodes):
768 """Helper to iteratively yield the matches."""
769 nodelen = len(nodes)
770 if 0 >= self.min:
771 yield 0, {}
772
773 results = []
774 # generate matches that use just one alt from self.content
775 for alt in self.content:
776 for c, r in generate_matches(alt, nodes):
777 yield c, r
778 results.append((c, r))
779
780 # for each match, iterate down the nodes
781 while results:
782 new_results = []
783 for c0, r0 in results:
784 # stop if the entire set of nodes has been matched
785 if c0 < nodelen and c0 <= self.max:
786 for alt in self.content:
787 for c1, r1 in generate_matches(alt, nodes[c0:]):
788 if c1 > 0:
789 r = {}
790 r.update(r0)
791 r.update(r1)
792 yield c0 + c1, r
793 new_results.append((c0 + c1, r))
794 results = new_results
795
796 def _bare_name_matches(self, nodes):
797 """Special optimized matcher for bare_name."""
798 count = 0
799 r = {}
800 done = False
801 max = len(nodes)
802 while not done and count < max:
803 done = True
804 for leaf in self.content:
805 if leaf[0].match(nodes[count], r):
806 count += 1
807 done = False
808 break
809 r[self.name] = nodes[:count]
810 return count, r
811
812 def _recursive_matches(self, nodes, count):
813 """Helper to recursively yield the matches."""
814 assert self.content is not None
815 if count >= self.min:
816 yield 0, {}
817 if count < self.max:
818 for alt in self.content:
819 for c0, r0 in generate_matches(alt, nodes):
820 for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
821 r = {}
822 r.update(r0)
823 r.update(r1)
824 yield c0 + c1, r
825
826
827class NegatedPattern(BasePattern):
828
829 def __init__(self, content=None):
830 """
831 Initializer.
832
833 The argument is either a pattern or None. If it is None, this
834 only matches an empty sequence (effectively '$' in regex
835 lingo). If it is not None, this matches whenever the argument
836 pattern doesn't have any matches.
837 """
838 if content is not None:
839 assert isinstance(content, BasePattern), repr(content)
840 self.content = content
841
842 def match(self, node):
843 # We never match a node in its entirety
844 return False
845
846 def match_seq(self, nodes):
847 # We only match an empty sequence of nodes in its entirety
848 return len(nodes) == 0
849
850 def generate_matches(self, nodes):
851 if self.content is None:
852 # Return a match if there is an empty sequence
853 if len(nodes) == 0:
854 yield 0, {}
855 else:
856 # Return a match if the argument pattern has no matches
857 for c, r in self.content.generate_matches(nodes):
858 return
859 yield 0, {}
860
861
862def generate_matches(patterns, nodes):
863 """
864 Generator yielding matches for a sequence of patterns and nodes.
865
866 Args:
867 patterns: a sequence of patterns
868 nodes: a sequence of nodes
869
870 Yields:
871 (count, results) tuples where:
872 count: the entire sequence of patterns matches nodes[:count];
873 results: dict containing named submatches.
874 """
875 if not patterns:
876 yield 0, {}
877 else:
878 p, rest = patterns[0], patterns[1:]
879 for c0, r0 in p.generate_matches(nodes):
880 if not rest:
881 yield c0, r0
882 else:
883 for c1, r1 in generate_matches(rest, nodes[c0:]):
884 r = {}
885 r.update(r0)
886 r.update(r1)
887 yield c0 + c1, r
Note: See TracBrowser for help on using the repository browser.