| 1 | """Iterator based sre token scanner
|
|---|
| 2 |
|
|---|
| 3 | """
|
|---|
| 4 |
|
|---|
| 5 | import re
|
|---|
| 6 | import sre_parse
|
|---|
| 7 | import sre_compile
|
|---|
| 8 | import sre_constants
|
|---|
| 9 |
|
|---|
| 10 | from re import VERBOSE, MULTILINE, DOTALL
|
|---|
| 11 | from sre_constants import BRANCH, SUBPATTERN
|
|---|
| 12 |
|
|---|
| 13 | __all__ = ['Scanner', 'pattern']
|
|---|
| 14 |
|
|---|
| 15 | FLAGS = (VERBOSE | MULTILINE | DOTALL)
|
|---|
| 16 |
|
|---|
| 17 | class Scanner(object):
|
|---|
| 18 | def __init__(self, lexicon, flags=FLAGS):
|
|---|
| 19 | self.actions = [None]
|
|---|
| 20 | # Combine phrases into a compound pattern
|
|---|
| 21 | s = sre_parse.Pattern()
|
|---|
| 22 | s.flags = flags
|
|---|
| 23 | p = []
|
|---|
| 24 | for idx, token in enumerate(lexicon):
|
|---|
| 25 | phrase = token.pattern
|
|---|
| 26 | try:
|
|---|
| 27 | subpattern = sre_parse.SubPattern(s,
|
|---|
| 28 | [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
|
|---|
| 29 | except sre_constants.error:
|
|---|
| 30 | raise
|
|---|
| 31 | p.append(subpattern)
|
|---|
| 32 | self.actions.append(token)
|
|---|
| 33 |
|
|---|
| 34 | s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
|
|---|
| 35 | p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
|
|---|
| 36 | self.scanner = sre_compile.compile(p)
|
|---|
| 37 |
|
|---|
| 38 | def iterscan(self, string, idx=0, context=None):
|
|---|
| 39 | """Yield match, end_idx for each match
|
|---|
| 40 |
|
|---|
| 41 | """
|
|---|
| 42 | match = self.scanner.scanner(string, idx).match
|
|---|
| 43 | actions = self.actions
|
|---|
| 44 | lastend = idx
|
|---|
| 45 | end = len(string)
|
|---|
| 46 | while True:
|
|---|
| 47 | m = match()
|
|---|
| 48 | if m is None:
|
|---|
| 49 | break
|
|---|
| 50 | matchbegin, matchend = m.span()
|
|---|
| 51 | if lastend == matchend:
|
|---|
| 52 | break
|
|---|
| 53 | action = actions[m.lastindex]
|
|---|
| 54 | if action is not None:
|
|---|
| 55 | rval, next_pos = action(m, context)
|
|---|
| 56 | if next_pos is not None and next_pos != matchend:
|
|---|
| 57 | # "fast forward" the scanner
|
|---|
| 58 | matchend = next_pos
|
|---|
| 59 | match = self.scanner.scanner(string, matchend).match
|
|---|
| 60 | yield rval, matchend
|
|---|
| 61 | lastend = matchend
|
|---|
| 62 |
|
|---|
| 63 |
|
|---|
| 64 | def pattern(pattern, flags=FLAGS):
|
|---|
| 65 | def decorator(fn):
|
|---|
| 66 | fn.pattern = pattern
|
|---|
| 67 | fn.regex = re.compile(pattern, flags)
|
|---|
| 68 | return fn
|
|---|
| 69 | return decorator
|
|---|