source: vendor/python/2.5/Parser/parser.c

Last change on this file was 3225, checked in by bird, 18 years ago

Python 2.5

File size: 9.5 KB
Line 
1
2/* Parser implementation */
3
4/* For a description, see the comments at end of this file */
5
6/* XXX To do: error recovery */
7
8#include "Python.h"
9#include "pgenheaders.h"
10#include "token.h"
11#include "grammar.h"
12#include "node.h"
13#include "parser.h"
14#include "errcode.h"
15
16
17#ifdef Py_DEBUG
18extern int Py_DebugFlag;
19#define D(x) if (!Py_DebugFlag); else x
20#else
21#define D(x)
22#endif
23
24
25/* STACK DATA TYPE */
26
27static void s_reset(stack *);
28
29static void
30s_reset(stack *s)
31{
32 s->s_top = &s->s_base[MAXSTACK];
33}
34
35#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
37static int
38s_push(register stack *s, dfa *d, node *parent)
39{
40 register stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
43 return E_NOMEM;
44 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
50}
51
52#ifdef Py_DEBUG
53
54static void
55s_pop(register stack *s)
56{
57 if (s_empty(s))
58 Py_FatalError("s_pop: parser stack underflow -- FATAL");
59 s->s_top++;
60}
61
62#else /* !Py_DEBUG */
63
64#define s_pop(s) (s)->s_top++
65
66#endif
67
68
69/* PARSER CREATION */
70
71parser_state *
72PyParser_New(grammar *g, int start)
73{
74 parser_state *ps;
75
76 if (!g->g_accel)
77 PyGrammar_AddAccelerators(g);
78 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
82#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83 ps->p_flags = 0;
84#endif
85 ps->p_tree = PyNode_New(start);
86 if (ps->p_tree == NULL) {
87 PyMem_FREE(ps);
88 return NULL;
89 }
90 s_reset(&ps->p_stack);
91 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92 return ps;
93}
94
95void
96PyParser_Delete(parser_state *ps)
97{
98 /* NB If you want to save the parse tree,
99 you must set p_tree to NULL before calling delparser! */
100 PyNode_Free(ps->p_tree);
101 PyMem_FREE(ps);
102}
103
104
105/* PARSER STACK OPERATIONS */
106
107static int
108shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
109{
110 int err;
111 assert(!s_empty(s));
112 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113 if (err)
114 return err;
115 s->s_top->s_state = newstate;
116 return 0;
117}
118
119static int
120push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
121{
122 int err;
123 register node *n;
124 n = s->s_top->s_parent;
125 assert(!s_empty(s));
126 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127 if (err)
128 return err;
129 s->s_top->s_state = newstate;
130 return s_push(s, d, CHILD(n, NCH(n)-1));
131}
132
133
134/* PARSER PROPER */
135
136static int
137classify(parser_state *ps, int type, char *str)
138{
139 grammar *g = ps->p_grammar;
140 register int n = g->g_ll.ll_nlabels;
141
142 if (type == NAME) {
143 register char *s = str;
144 register label *l = g->g_ll.ll_label;
145 register int i;
146 for (i = n; i > 0; i--, l++) {
147 if (l->lb_type != NAME || l->lb_str == NULL ||
148 l->lb_str[0] != s[0] ||
149 strcmp(l->lb_str, s) != 0)
150 continue;
151#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
152 if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
153 if (s[0] == 'w' && strcmp(s, "with") == 0)
154 break; /* not a keyword yet */
155 else if (s[0] == 'a' && strcmp(s, "as") == 0)
156 break; /* not a keyword yet */
157 }
158#endif
159 D(printf("It's a keyword\n"));
160 return n - i;
161 }
162 }
163
164 {
165 register label *l = g->g_ll.ll_label;
166 register int i;
167 for (i = n; i > 0; i--, l++) {
168 if (l->lb_type == type && l->lb_str == NULL) {
169 D(printf("It's a token we know\n"));
170 return n - i;
171 }
172 }
173 }
174
175 D(printf("Illegal token\n"));
176 return -1;
177}
178
179#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
180static void
181future_hack(parser_state *ps)
182{
183 node *n = ps->p_stack.s_top->s_parent;
184 node *ch;
185 int i;
186
187 /* from __future__ import ..., must have at least 4 children */
188 n = CHILD(n, 0);
189 if (NCH(n) < 4)
190 return;
191 ch = CHILD(n, 0);
192 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
193 return;
194 ch = CHILD(n, 1);
195 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
196 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
197 return;
198 for (i = 3; i < NCH(n); i += 2) {
199 /* XXX: assume we don't have parentheses in import:
200 from __future__ import (x, y, z)
201 */
202 ch = CHILD(n, i);
203 if (NCH(ch) == 1)
204 ch = CHILD(ch, 0);
205 if (NCH(ch) >= 1 && TYPE(CHILD(ch, 0)) == NAME &&
206 strcmp(STR(CHILD(ch, 0)), "with_statement") == 0) {
207 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
208 break;
209 }
210 }
211}
212#endif /* future keyword */
213
214int
215PyParser_AddToken(register parser_state *ps, register int type, char *str,
216 int lineno, int col_offset, int *expected_ret)
217{
218 register int ilabel;
219 int err;
220
221 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
222
223 /* Find out which label this token is */
224 ilabel = classify(ps, type, str);
225 if (ilabel < 0)
226 return E_SYNTAX;
227
228 /* Loop until the token is shifted or an error occurred */
229 for (;;) {
230 /* Fetch the current dfa and state */
231 register dfa *d = ps->p_stack.s_top->s_dfa;
232 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
233
234 D(printf(" DFA '%s', state %d:",
235 d->d_name, ps->p_stack.s_top->s_state));
236
237 /* Check accelerator */
238 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
239 register int x = s->s_accel[ilabel - s->s_lower];
240 if (x != -1) {
241 if (x & (1<<7)) {
242 /* Push non-terminal */
243 int nt = (x >> 8) + NT_OFFSET;
244 int arrow = x & ((1<<7)-1);
245 dfa *d1 = PyGrammar_FindDFA(
246 ps->p_grammar, nt);
247 if ((err = push(&ps->p_stack, nt, d1,
248 arrow, lineno, col_offset)) > 0) {
249 D(printf(" MemError: push\n"));
250 return err;
251 }
252 D(printf(" Push ...\n"));
253 continue;
254 }
255
256 /* Shift the token */
257 if ((err = shift(&ps->p_stack, type, str,
258 x, lineno, col_offset)) > 0) {
259 D(printf(" MemError: shift.\n"));
260 return err;
261 }
262 D(printf(" Shift.\n"));
263 /* Pop while we are in an accept-only state */
264 while (s = &d->d_state
265 [ps->p_stack.s_top->s_state],
266 s->s_accept && s->s_narcs == 1) {
267 D(printf(" DFA '%s', state %d: "
268 "Direct pop.\n",
269 d->d_name,
270 ps->p_stack.s_top->s_state));
271#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
272 if (d->d_name[0] == 'i' &&
273 strcmp(d->d_name,
274 "import_stmt") == 0)
275 future_hack(ps);
276#endif
277 s_pop(&ps->p_stack);
278 if (s_empty(&ps->p_stack)) {
279 D(printf(" ACCEPT.\n"));
280 return E_DONE;
281 }
282 d = ps->p_stack.s_top->s_dfa;
283 }
284 return E_OK;
285 }
286 }
287
288 if (s->s_accept) {
289#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
290 if (d->d_name[0] == 'i' &&
291 strcmp(d->d_name, "import_stmt") == 0)
292 future_hack(ps);
293#endif
294 /* Pop this dfa and try again */
295 s_pop(&ps->p_stack);
296 D(printf(" Pop ...\n"));
297 if (s_empty(&ps->p_stack)) {
298 D(printf(" Error: bottom of stack.\n"));
299 return E_SYNTAX;
300 }
301 continue;
302 }
303
304 /* Stuck, report syntax error */
305 D(printf(" Error.\n"));
306 if (expected_ret) {
307 if (s->s_lower == s->s_upper - 1) {
308 /* Only one possible expected token */
309 *expected_ret = ps->p_grammar->
310 g_ll.ll_label[s->s_lower].lb_type;
311 }
312 else
313 *expected_ret = -1;
314 }
315 return E_SYNTAX;
316 }
317}
318
319
320#ifdef Py_DEBUG
321
322/* DEBUG OUTPUT */
323
324void
325dumptree(grammar *g, node *n)
326{
327 int i;
328
329 if (n == NULL)
330 printf("NIL");
331 else {
332 label l;
333 l.lb_type = TYPE(n);
334 l.lb_str = STR(n);
335 printf("%s", PyGrammar_LabelRepr(&l));
336 if (ISNONTERMINAL(TYPE(n))) {
337 printf("(");
338 for (i = 0; i < NCH(n); i++) {
339 if (i > 0)
340 printf(",");
341 dumptree(g, CHILD(n, i));
342 }
343 printf(")");
344 }
345 }
346}
347
348void
349showtree(grammar *g, node *n)
350{
351 int i;
352
353 if (n == NULL)
354 return;
355 if (ISNONTERMINAL(TYPE(n))) {
356 for (i = 0; i < NCH(n); i++)
357 showtree(g, CHILD(n, i));
358 }
359 else if (ISTERMINAL(TYPE(n))) {
360 printf("%s", _PyParser_TokenNames[TYPE(n)]);
361 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
362 printf("(%s)", STR(n));
363 printf(" ");
364 }
365 else
366 printf("? ");
367}
368
369void
370printtree(parser_state *ps)
371{
372 if (Py_DebugFlag) {
373 printf("Parse tree:\n");
374 dumptree(ps->p_grammar, ps->p_tree);
375 printf("\n");
376 printf("Tokens:\n");
377 showtree(ps->p_grammar, ps->p_tree);
378 printf("\n");
379 }
380 printf("Listing:\n");
381 PyNode_ListTree(ps->p_tree);
382 printf("\n");
383}
384
385#endif /* Py_DEBUG */
386
387/*
388
389Description
390-----------
391
392The parser's interface is different than usual: the function addtoken()
393must be called for each token in the input. This makes it possible to
394turn it into an incremental parsing system later. The parsing system
395constructs a parse tree as it goes.
396
397A parsing rule is represented as a Deterministic Finite-state Automaton
398(DFA). A node in a DFA represents a state of the parser; an arc represents
399a transition. Transitions are either labeled with terminal symbols or
400with non-terminals. When the parser decides to follow an arc labeled
401with a non-terminal, it is invoked recursively with the DFA representing
402the parsing rule for that as its initial state; when that DFA accepts,
403the parser that invoked it continues. The parse tree constructed by the
404recursively called parser is inserted as a child in the current parse tree.
405
406The DFA's can be constructed automatically from a more conventional
407language description. An extended LL(1) grammar (ELL(1)) is suitable.
408Certain restrictions make the parser's life easier: rules that can produce
409the empty string should be outlawed (there are other ways to put loops
410or optional parts in the language). To avoid the need to construct
411FIRST sets, we can require that all but the last alternative of a rule
412(really: arc going out of a DFA's state) must begin with a terminal
413symbol.
414
415As an example, consider this grammar:
416
417expr: term (OP term)*
418term: CONSTANT | '(' expr ')'
419
420The DFA corresponding to the rule for expr is:
421
422------->.---term-->.------->
423 ^ |
424 | |
425 \----OP----/
426
427The parse tree generated for the input a+b is:
428
429(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
430
431*/
Note: See TracBrowser for help on using the repository browser.