Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

parser.c

Last change on this file was 388, checked in by dmik, 11 years ago
python: Update vendor to 2.7.6.
Property svn:eol-style set to `native`
File size: 11.5 KB

Line
1
2	/* Parser implementation */
3
4	/* For a description, see the comments at end of this file */
5
6	/* XXX To do: error recovery */
7
8	#include "Python.h"
9	#include "pgenheaders.h"
10	#include "token.h"
11	#include "grammar.h"
12	#include "node.h"
13	#include "parser.h"
14	#include "errcode.h"
15
16
17	#ifdef Py_DEBUG
18	extern int Py_DebugFlag;
19	#define D(x) if (!Py_DebugFlag); else x
20	#else
21	#define D(x)
22	#endif
23
24
25	/* STACK DATA TYPE */
26
27	static void s_reset(stack *);
28
29	static void
30	s_reset(stack *s)
31	{
32	s->s_top = &s->s_base[MAXSTACK];
33	}
34
35	#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
37	static int
38	s_push(register stack s, dfa d, node *parent)
39	{
40	register stackentry *top;
41	if (s->s_top == s->s_base) {
42	fprintf(stderr, "s_push: parser stack overflow\n");
43	return E_NOMEM;
44	}
45	top = --s->s_top;
46	top->s_dfa = d;
47	top->s_parent = parent;
48	top->s_state = 0;
49	return 0;
50	}
51
52	#ifdef Py_DEBUG
53
54	static void
55	s_pop(register stack *s)
56	{
57	if (s_empty(s))
58	Py_FatalError("s_pop: parser stack underflow -- FATAL");
59	s->s_top++;
60	}
61
62	#else /* !Py_DEBUG */
63
64	#define s_pop(s) (s)->s_top++
65
66	#endif
67
68
69	/* PARSER CREATION */
70
71	parser_state *
72	PyParser_New(grammar *g, int start)
73	{
74	parser_state *ps;
75
76	if (!g->g_accel)
77	PyGrammar_AddAccelerators(g);
78	ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79	if (ps == NULL)
80	return NULL;
81	ps->p_grammar = g;
82	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83	ps->p_flags = 0;
84	#endif
85	ps->p_tree = PyNode_New(start);
86	if (ps->p_tree == NULL) {
87	PyMem_FREE(ps);
88	return NULL;
89	}
90	s_reset(&ps->p_stack);
91	(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
92	return ps;
93	}
94
95	void
96	PyParser_Delete(parser_state *ps)
97	{
98	/* NB If you want to save the parse tree,
99	you must set p_tree to NULL before calling delparser! */
100	PyNode_Free(ps->p_tree);
101	PyMem_FREE(ps);
102	}
103
104
105	/* PARSER STACK OPERATIONS */
106
107	static int
108	shift(register stack s, int type, char str, int newstate, int lineno, int col_offset)
109	{
110	int err;
111	assert(!s_empty(s));
112	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113	if (err)
114	return err;
115	s->s_top->s_state = newstate;
116	return 0;
117	}
118
119	static int
120	push(register stack s, int type, dfa d, int newstate, int lineno, int col_offset)
121	{
122	int err;
123	register node *n;
124	n = s->s_top->s_parent;
125	assert(!s_empty(s));
126	err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
127	if (err)
128	return err;
129	s->s_top->s_state = newstate;
130	return s_push(s, d, CHILD(n, NCH(n)-1));
131	}
132
133
134	/* PARSER PROPER */
135
136	static int
137	classify(parser_state ps, int type, char str)
138	{
139	grammar *g = ps->p_grammar;
140	register int n = g->g_ll.ll_nlabels;
141
142	if (type == NAME) {
143	register char *s = str;
144	register label *l = g->g_ll.ll_label;
145	register int i;
146	for (i = n; i > 0; i--, l++) {
147	if (l->lb_type != NAME \|\| l->lb_str == NULL \|\|
148	l->lb_str[0] != s[0] \|\|
149	strcmp(l->lb_str, s) != 0)
150	continue;
151	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
152	if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
153	s[0] == 'p' && strcmp(s, "print") == 0) {
154	break; /* no longer a keyword */
155	}
156	#endif
157	D(printf("It's a keyword\n"));
158	return n - i;
159	}
160	}
161
162	{
163	register label *l = g->g_ll.ll_label;
164	register int i;
165	for (i = n; i > 0; i--, l++) {
166	if (l->lb_type == type && l->lb_str == NULL) {
167	D(printf("It's a token we know\n"));
168	return n - i;
169	}
170	}
171	}
172
173	D(printf("Illegal token\n"));
174	return -1;
175	}
176
177	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
178	static void
179	future_hack(parser_state *ps)
180	{
181	node *n = ps->p_stack.s_top->s_parent;
182	node ch, cch;
183	int i;
184
185	/* from __future__ import ..., must have at least 4 children */
186	n = CHILD(n, 0);
187	if (NCH(n) < 4)
188	return;
189	ch = CHILD(n, 0);
190	if (STR(ch) == NULL \|\| strcmp(STR(ch), "from") != 0)
191	return;
192	ch = CHILD(n, 1);
193	if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
194	strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
195	return;
196	ch = CHILD(n, 3);
197	/* ch can be a star, a parenthesis or import_as_names */
198	if (TYPE(ch) == STAR)
199	return;
200	if (TYPE(ch) == LPAR)
201	ch = CHILD(n, 4);
202
203	for (i = 0; i < NCH(ch); i += 2) {
204	cch = CHILD(ch, i);
205	if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
206	char *str_ch = STR(CHILD(cch, 0));
207	if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
208	ps->p_flags \|= CO_FUTURE_WITH_STATEMENT;
209	} else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
210	ps->p_flags \|= CO_FUTURE_PRINT_FUNCTION;
211	} else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
212	ps->p_flags \|= CO_FUTURE_UNICODE_LITERALS;
213	}
214	}
215	}
216	}
217	#endif /* future keyword */
218
219	int
220	PyParser_AddToken(register parser_state ps, register int type, char str,
221	int lineno, int col_offset, int *expected_ret)
222	{
223	register int ilabel;
224	int err;
225
226	D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
227
228	/* Find out which label this token is */
229	ilabel = classify(ps, type, str);
230	if (ilabel < 0)
231	return E_SYNTAX;
232
233	/* Loop until the token is shifted or an error occurred */
234	for (;;) {
235	/* Fetch the current dfa and state */
236	register dfa *d = ps->p_stack.s_top->s_dfa;
237	register state *s = &d->d_state[ps->p_stack.s_top->s_state];
238
239	D(printf(" DFA '%s', state %d:",
240	d->d_name, ps->p_stack.s_top->s_state));
241
242	/* Check accelerator */
243	if (s->s_lower <= ilabel && ilabel < s->s_upper) {
244	register int x = s->s_accel[ilabel - s->s_lower];
245	if (x != -1) {
246	if (x & (1<<7)) {
247	/* Push non-terminal */
248	int nt = (x >> 8) + NT_OFFSET;
249	int arrow = x & ((1<<7)-1);
250	dfa *d1 = PyGrammar_FindDFA(
251	ps->p_grammar, nt);
252	if ((err = push(&ps->p_stack, nt, d1,
253	arrow, lineno, col_offset)) > 0) {
254	D(printf(" MemError: push\n"));
255	return err;
256	}
257	D(printf(" Push ...\n"));
258	continue;
259	}
260
261	/* Shift the token */
262	if ((err = shift(&ps->p_stack, type, str,
263	x, lineno, col_offset)) > 0) {
264	D(printf(" MemError: shift.\n"));
265	return err;
266	}
267	D(printf(" Shift.\n"));
268	/* Pop while we are in an accept-only state */
269	while (s = &d->d_state
270	[ps->p_stack.s_top->s_state],
271	s->s_accept && s->s_narcs == 1) {
272	D(printf(" DFA '%s', state %d: "
273	"Direct pop.\n",
274	d->d_name,
275	ps->p_stack.s_top->s_state));
276	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
277	if (d->d_name[0] == 'i' &&
278	strcmp(d->d_name,
279	"import_stmt") == 0)
280	future_hack(ps);
281	#endif
282	s_pop(&ps->p_stack);
283	if (s_empty(&ps->p_stack)) {
284	D(printf(" ACCEPT.\n"));
285	return E_DONE;
286	}
287	d = ps->p_stack.s_top->s_dfa;
288	}
289	return E_OK;
290	}
291	}
292
293	if (s->s_accept) {
294	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
295	if (d->d_name[0] == 'i' &&
296	strcmp(d->d_name, "import_stmt") == 0)
297	future_hack(ps);
298	#endif
299	/* Pop this dfa and try again */
300	s_pop(&ps->p_stack);
301	D(printf(" Pop ...\n"));
302	if (s_empty(&ps->p_stack)) {
303	D(printf(" Error: bottom of stack.\n"));
304	return E_SYNTAX;
305	}
306	continue;
307	}
308
309	/* Stuck, report syntax error */
310	D(printf(" Error.\n"));
311	if (expected_ret) {
312	if (s->s_lower == s->s_upper - 1) {
313	/* Only one possible expected token */
314	*expected_ret = ps->p_grammar->
315	g_ll.ll_label[s->s_lower].lb_type;
316	}
317	else
318	*expected_ret = -1;
319	}
320	return E_SYNTAX;
321	}
322	}
323
324
325	#ifdef Py_DEBUG
326
327	/* DEBUG OUTPUT */
328
329	void
330	dumptree(grammar g, node n)
331	{
332	int i;
333
334	if (n == NULL)
335	printf("NIL");
336	else {
337	label l;
338	l.lb_type = TYPE(n);
339	l.lb_str = STR(n);
340	printf("%s", PyGrammar_LabelRepr(&l));
341	if (ISNONTERMINAL(TYPE(n))) {
342	printf("(");
343	for (i = 0; i < NCH(n); i++) {
344	if (i > 0)
345	printf(",");
346	dumptree(g, CHILD(n, i));
347	}
348	printf(")");
349	}
350	}
351	}
352
353	void
354	showtree(grammar g, node n)
355	{
356	int i;
357
358	if (n == NULL)
359	return;
360	if (ISNONTERMINAL(TYPE(n))) {
361	for (i = 0; i < NCH(n); i++)
362	showtree(g, CHILD(n, i));
363	}
364	else if (ISTERMINAL(TYPE(n))) {
365	printf("%s", _PyParser_TokenNames[TYPE(n)]);
366	if (TYPE(n) == NUMBER \|\| TYPE(n) == NAME)
367	printf("(%s)", STR(n));
368	printf(" ");
369	}
370	else
371	printf("? ");
372	}
373
374	void
375	printtree(parser_state *ps)
376	{
377	if (Py_DebugFlag) {
378	printf("Parse tree:\n");
379	dumptree(ps->p_grammar, ps->p_tree);
380	printf("\n");
381	printf("Tokens:\n");
382	showtree(ps->p_grammar, ps->p_tree);
383	printf("\n");
384	}
385	printf("Listing:\n");
386	PyNode_ListTree(ps->p_tree);
387	printf("\n");
388	}
389
390	#endif /* Py_DEBUG */
391
392	/*
393
394	Description
395	-----------
396
397	The parser's interface is different than usual: the function addtoken()
398	must be called for each token in the input. This makes it possible to
399	turn it into an incremental parsing system later. The parsing system
400	constructs a parse tree as it goes.
401
402	A parsing rule is represented as a Deterministic Finite-state Automaton
403	(DFA). A node in a DFA represents a state of the parser; an arc represents
404	a transition. Transitions are either labeled with terminal symbols or
405	with non-terminals. When the parser decides to follow an arc labeled
406	with a non-terminal, it is invoked recursively with the DFA representing
407	the parsing rule for that as its initial state; when that DFA accepts,
408	the parser that invoked it continues. The parse tree constructed by the
409	recursively called parser is inserted as a child in the current parse tree.
410
411	The DFA's can be constructed automatically from a more conventional
412	language description. An extended LL(1) grammar (ELL(1)) is suitable.
413	Certain restrictions make the parser's life easier: rules that can produce
414	the empty string should be outlawed (there are other ways to put loops
415	or optional parts in the language). To avoid the need to construct
416	FIRST sets, we can require that all but the last alternative of a rule
417	(really: arc going out of a DFA's state) must begin with a terminal
418	symbol.
419
420	As an example, consider this grammar:
421
422	expr: term (OP term)*
423	term: CONSTANT \| '(' expr ')'
424
425	The DFA corresponding to the rule for expr is:
426
427	------->.---term-->.------->
428	^ \|
429	\| \|
430	\----OP----/
431
432	The parse tree generated for the input a+b is:
433
434	(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
435
436	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/vendor/Python-2.7.6/Parser/parser.c

Download in other formats: