1 | # Module 'parser'
|
---|
2 | #
|
---|
3 | # Parse S-expressions output by the Panel Editor
|
---|
4 | # (which is written in Scheme so it can't help writing S-expressions).
|
---|
5 | #
|
---|
6 | # See notes at end of file.
|
---|
7 | from warnings import warnpy3k
|
---|
8 | warnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2)
|
---|
9 | del warnpy3k
|
---|
10 |
|
---|
11 |
|
---|
12 | whitespace = ' \t\n'
|
---|
13 | operators = '()\''
|
---|
14 | separators = operators + whitespace + ';' + '"'
|
---|
15 |
|
---|
16 |
|
---|
17 | # Tokenize a string.
|
---|
18 | # Return a list of tokens (strings).
|
---|
19 | #
|
---|
20 | def tokenize_string(s):
|
---|
21 | tokens = []
|
---|
22 | while s:
|
---|
23 | c = s[:1]
|
---|
24 | if c in whitespace:
|
---|
25 | s = s[1:]
|
---|
26 | elif c == ';':
|
---|
27 | s = ''
|
---|
28 | elif c == '"':
|
---|
29 | n = len(s)
|
---|
30 | i = 1
|
---|
31 | while i < n:
|
---|
32 | c = s[i]
|
---|
33 | i = i+1
|
---|
34 | if c == '"': break
|
---|
35 | if c == '\\': i = i+1
|
---|
36 | tokens.append(s[:i])
|
---|
37 | s = s[i:]
|
---|
38 | elif c in operators:
|
---|
39 | tokens.append(c)
|
---|
40 | s = s[1:]
|
---|
41 | else:
|
---|
42 | n = len(s)
|
---|
43 | i = 1
|
---|
44 | while i < n:
|
---|
45 | if s[i] in separators: break
|
---|
46 | i = i+1
|
---|
47 | tokens.append(s[:i])
|
---|
48 | s = s[i:]
|
---|
49 | return tokens
|
---|
50 |
|
---|
51 |
|
---|
52 | # Tokenize a whole file (given as file object, not as file name).
|
---|
53 | # Return a list of tokens (strings).
|
---|
54 | #
|
---|
55 | def tokenize_file(fp):
|
---|
56 | tokens = []
|
---|
57 | while 1:
|
---|
58 | line = fp.readline()
|
---|
59 | if not line: break
|
---|
60 | tokens = tokens + tokenize_string(line)
|
---|
61 | return tokens
|
---|
62 |
|
---|
63 |
|
---|
64 | # Exception raised by parse_exr.
|
---|
65 | #
|
---|
66 | syntax_error = 'syntax error'
|
---|
67 |
|
---|
68 |
|
---|
69 | # Parse an S-expression.
|
---|
70 | # Input is a list of tokens as returned by tokenize_*().
|
---|
71 | # Return a pair (expr, tokens)
|
---|
72 | # where expr is a list representing the s-expression,
|
---|
73 | # and tokens contains the remaining tokens.
|
---|
74 | # May raise syntax_error.
|
---|
75 | #
|
---|
76 | def parse_expr(tokens):
|
---|
77 | if (not tokens) or tokens[0] != '(':
|
---|
78 | raise syntax_error, 'expected "("'
|
---|
79 | tokens = tokens[1:]
|
---|
80 | expr = []
|
---|
81 | while 1:
|
---|
82 | if not tokens:
|
---|
83 | raise syntax_error, 'missing ")"'
|
---|
84 | if tokens[0] == ')':
|
---|
85 | return expr, tokens[1:]
|
---|
86 | elif tokens[0] == '(':
|
---|
87 | subexpr, tokens = parse_expr(tokens)
|
---|
88 | expr.append(subexpr)
|
---|
89 | else:
|
---|
90 | expr.append(tokens[0])
|
---|
91 | tokens = tokens[1:]
|
---|
92 |
|
---|
93 |
|
---|
94 | # Parse a file (given as file object, not as file name).
|
---|
95 | # Return a list of parsed S-expressions found at the top level.
|
---|
96 | #
|
---|
97 | def parse_file(fp):
|
---|
98 | tokens = tokenize_file(fp)
|
---|
99 | exprlist = []
|
---|
100 | while tokens:
|
---|
101 | expr, tokens = parse_expr(tokens)
|
---|
102 | exprlist.append(expr)
|
---|
103 | return exprlist
|
---|
104 |
|
---|
105 |
|
---|
106 | # EXAMPLE:
|
---|
107 | #
|
---|
108 | # The input
|
---|
109 | # '(hip (hop hur-ray))'
|
---|
110 | #
|
---|
111 | # passed to tokenize_string() returns the token list
|
---|
112 | # ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
|
---|
113 | #
|
---|
114 | # When this is passed to parse_expr() it returns the expression
|
---|
115 | # ['hip', ['hop', 'hur-ray']]
|
---|
116 | # plus an empty token list (because there are no tokens left.
|
---|
117 | #
|
---|
118 | # When a file containing the example is passed to parse_file() it returns
|
---|
119 | # a list whose only element is the output of parse_expr() above:
|
---|
120 | # [['hip', ['hop', 'hur-ray']]]
|
---|
121 |
|
---|
122 |
|
---|
123 | # TOKENIZING:
|
---|
124 | #
|
---|
125 | # Comments start with semicolon (;) and continue till the end of the line.
|
---|
126 | #
|
---|
127 | # Tokens are separated by whitespace, except the following characters
|
---|
128 | # always form a separate token (outside strings):
|
---|
129 | # ( ) '
|
---|
130 | # Strings are enclosed in double quotes (") and backslash (\) is used
|
---|
131 | # as escape character in strings.
|
---|