Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

parser.c

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 11.5 KB

Rev	Line
[2]	1
	2	/* Parser implementation */
	3
	4	/* For a description, see the comments at end of this file */
	5
	6	/* XXX To do: error recovery */
	7
	8	#include "Python.h"
	9	#include "pgenheaders.h"
	10	#include "token.h"
	11	#include "grammar.h"
	12	#include "node.h"
	13	#include "parser.h"
	14	#include "errcode.h"
	15
	16
	17	#ifdef Py_DEBUG
	18	extern int Py_DebugFlag;
	19	#define D(x) if (!Py_DebugFlag); else x
	20	#else
	21	#define D(x)
	22	#endif
	23
	24
	25	/* STACK DATA TYPE */
	26
	27	static void s_reset(stack *);
	28
	29	static void
	30	s_reset(stack *s)
	31	{
[391]	32	s->s_top = &s->s_base[MAXSTACK];
[2]	33	}
	34
	35	#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
	36
	37	static int
	38	s_push(register stack s, dfa d, node *parent)
	39	{
[391]	40	register stackentry *top;
	41	if (s->s_top == s->s_base) {
	42	fprintf(stderr, "s_push: parser stack overflow\n");
	43	return E_NOMEM;
	44	}
	45	top = --s->s_top;
	46	top->s_dfa = d;
	47	top->s_parent = parent;
	48	top->s_state = 0;
	49	return 0;
[2]	50	}
	51
	52	#ifdef Py_DEBUG
	53
	54	static void
	55	s_pop(register stack *s)
	56	{
[391]	57	if (s_empty(s))
	58	Py_FatalError("s_pop: parser stack underflow -- FATAL");
	59	s->s_top++;
[2]	60	}
	61
	62	#else /* !Py_DEBUG */
	63
	64	#define s_pop(s) (s)->s_top++
	65
	66	#endif
	67
	68
	69	/* PARSER CREATION */
	70
	71	parser_state *
	72	PyParser_New(grammar *g, int start)
	73	{
[391]	74	parser_state *ps;
	75
	76	if (!g->g_accel)
	77	PyGrammar_AddAccelerators(g);
	78	ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
	79	if (ps == NULL)
	80	return NULL;
	81	ps->p_grammar = g;
[2]	82	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
[391]	83	ps->p_flags = 0;
[2]	84	#endif
[391]	85	ps->p_tree = PyNode_New(start);
	86	if (ps->p_tree == NULL) {
	87	PyMem_FREE(ps);
	88	return NULL;
	89	}
	90	s_reset(&ps->p_stack);
	91	(void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
	92	return ps;
[2]	93	}
	94
	95	void
	96	PyParser_Delete(parser_state *ps)
	97	{
[391]	98	/* NB If you want to save the parse tree,
	99	you must set p_tree to NULL before calling delparser! */
	100	PyNode_Free(ps->p_tree);
	101	PyMem_FREE(ps);
[2]	102	}
	103
	104
	105	/* PARSER STACK OPERATIONS */
	106
	107	static int
	108	shift(register stack s, int type, char str, int newstate, int lineno, int col_offset)
	109	{
[391]	110	int err;
	111	assert(!s_empty(s));
	112	err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
	113	if (err)
	114	return err;
	115	s->s_top->s_state = newstate;
	116	return 0;
[2]	117	}
	118
	119	static int
	120	push(register stack s, int type, dfa d, int newstate, int lineno, int col_offset)
	121	{
[391]	122	int err;
	123	register node *n;
	124	n = s->s_top->s_parent;
	125	assert(!s_empty(s));
	126	err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
	127	if (err)
	128	return err;
	129	s->s_top->s_state = newstate;
	130	return s_push(s, d, CHILD(n, NCH(n)-1));
[2]	131	}
	132
	133
	134	/* PARSER PROPER */
	135
	136	static int
	137	classify(parser_state ps, int type, char str)
	138	{
[391]	139	grammar *g = ps->p_grammar;
	140	register int n = g->g_ll.ll_nlabels;
	141
	142	if (type == NAME) {
	143	register char *s = str;
	144	register label *l = g->g_ll.ll_label;
	145	register int i;
	146	for (i = n; i > 0; i--, l++) {
	147	if (l->lb_type != NAME \|\| l->lb_str == NULL \|\|
	148	l->lb_str[0] != s[0] \|\|
	149	strcmp(l->lb_str, s) != 0)
	150	continue;
[2]	151	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
[391]	152	if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
	153	s[0] == 'p' && strcmp(s, "print") == 0) {
	154	break; /* no longer a keyword */
	155	}
[2]	156	#endif
[391]	157	D(printf("It's a keyword\n"));
	158	return n - i;
	159	}
	160	}
	161
	162	{
	163	register label *l = g->g_ll.ll_label;
	164	register int i;
	165	for (i = n; i > 0; i--, l++) {
	166	if (l->lb_type == type && l->lb_str == NULL) {
	167	D(printf("It's a token we know\n"));
	168	return n - i;
	169	}
	170	}
	171	}
	172
	173	D(printf("Illegal token\n"));
	174	return -1;
[2]	175	}
	176
	177	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
	178	static void
	179	future_hack(parser_state *ps)
	180	{
[391]	181	node *n = ps->p_stack.s_top->s_parent;
	182	node ch, cch;
	183	int i;
[2]	184
[391]	185	/* from __future__ import ..., must have at least 4 children */
	186	n = CHILD(n, 0);
	187	if (NCH(n) < 4)
	188	return;
	189	ch = CHILD(n, 0);
	190	if (STR(ch) == NULL \|\| strcmp(STR(ch), "from") != 0)
	191	return;
	192	ch = CHILD(n, 1);
	193	if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
	194	strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
	195	return;
	196	ch = CHILD(n, 3);
	197	/* ch can be a star, a parenthesis or import_as_names */
	198	if (TYPE(ch) == STAR)
	199	return;
	200	if (TYPE(ch) == LPAR)
	201	ch = CHILD(n, 4);
	202
	203	for (i = 0; i < NCH(ch); i += 2) {
	204	cch = CHILD(ch, i);
	205	if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
	206	char *str_ch = STR(CHILD(cch, 0));
	207	if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
	208	ps->p_flags \|= CO_FUTURE_WITH_STATEMENT;
	209	} else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
	210	ps->p_flags \|= CO_FUTURE_PRINT_FUNCTION;
	211	} else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
	212	ps->p_flags \|= CO_FUTURE_UNICODE_LITERALS;
	213	}
	214	}
	215	}
[2]	216	}
	217	#endif /* future keyword */
	218
	219	int
	220	PyParser_AddToken(register parser_state ps, register int type, char str,
[391]	221	int lineno, int col_offset, int *expected_ret)
[2]	222	{
[391]	223	register int ilabel;
	224	int err;
	225
	226	D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
	227
	228	/* Find out which label this token is */
	229	ilabel = classify(ps, type, str);
	230	if (ilabel < 0)
	231	return E_SYNTAX;
	232
	233	/* Loop until the token is shifted or an error occurred */
	234	for (;;) {
	235	/* Fetch the current dfa and state */
	236	register dfa *d = ps->p_stack.s_top->s_dfa;
	237	register state *s = &d->d_state[ps->p_stack.s_top->s_state];
	238
	239	D(printf(" DFA '%s', state %d:",
	240	d->d_name, ps->p_stack.s_top->s_state));
	241
	242	/* Check accelerator */
	243	if (s->s_lower <= ilabel && ilabel < s->s_upper) {
	244	register int x = s->s_accel[ilabel - s->s_lower];
	245	if (x != -1) {
	246	if (x & (1<<7)) {
	247	/* Push non-terminal */
	248	int nt = (x >> 8) + NT_OFFSET;
	249	int arrow = x & ((1<<7)-1);
	250	dfa *d1 = PyGrammar_FindDFA(
	251	ps->p_grammar, nt);
	252	if ((err = push(&ps->p_stack, nt, d1,
	253	arrow, lineno, col_offset)) > 0) {
	254	D(printf(" MemError: push\n"));
	255	return err;
	256	}
	257	D(printf(" Push ...\n"));
	258	continue;
	259	}
	260
	261	/* Shift the token */
	262	if ((err = shift(&ps->p_stack, type, str,
	263	x, lineno, col_offset)) > 0) {
	264	D(printf(" MemError: shift.\n"));
	265	return err;
	266	}
	267	D(printf(" Shift.\n"));
	268	/* Pop while we are in an accept-only state */
	269	while (s = &d->d_state
	270	[ps->p_stack.s_top->s_state],
	271	s->s_accept && s->s_narcs == 1) {
	272	D(printf(" DFA '%s', state %d: "
	273	"Direct pop.\n",
	274	d->d_name,
	275	ps->p_stack.s_top->s_state));
[2]	276	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
[391]	277	if (d->d_name[0] == 'i' &&
	278	strcmp(d->d_name,
	279	"import_stmt") == 0)
	280	future_hack(ps);
[2]	281	#endif
[391]	282	s_pop(&ps->p_stack);
	283	if (s_empty(&ps->p_stack)) {
	284	D(printf(" ACCEPT.\n"));
	285	return E_DONE;
	286	}
	287	d = ps->p_stack.s_top->s_dfa;
	288	}
	289	return E_OK;
	290	}
	291	}
	292
	293	if (s->s_accept) {
[2]	294	#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
[391]	295	if (d->d_name[0] == 'i' &&
	296	strcmp(d->d_name, "import_stmt") == 0)
	297	future_hack(ps);
[2]	298	#endif
[391]	299	/* Pop this dfa and try again */
	300	s_pop(&ps->p_stack);
	301	D(printf(" Pop ...\n"));
	302	if (s_empty(&ps->p_stack)) {
	303	D(printf(" Error: bottom of stack.\n"));
	304	return E_SYNTAX;
	305	}
	306	continue;
	307	}
	308
	309	/* Stuck, report syntax error */
	310	D(printf(" Error.\n"));
	311	if (expected_ret) {
	312	if (s->s_lower == s->s_upper - 1) {
	313	/* Only one possible expected token */
	314	*expected_ret = ps->p_grammar->
	315	g_ll.ll_label[s->s_lower].lb_type;
	316	}
	317	else
	318	*expected_ret = -1;
	319	}
	320	return E_SYNTAX;
	321	}
[2]	322	}
	323
	324
	325	#ifdef Py_DEBUG
	326
	327	/* DEBUG OUTPUT */
	328
	329	void
	330	dumptree(grammar g, node n)
	331	{
[391]	332	int i;
	333
	334	if (n == NULL)
	335	printf("NIL");
	336	else {
	337	label l;
	338	l.lb_type = TYPE(n);
	339	l.lb_str = STR(n);
	340	printf("%s", PyGrammar_LabelRepr(&l));
	341	if (ISNONTERMINAL(TYPE(n))) {
	342	printf("(");
	343	for (i = 0; i < NCH(n); i++) {
	344	if (i > 0)
	345	printf(",");
	346	dumptree(g, CHILD(n, i));
	347	}
	348	printf(")");
	349	}
	350	}
[2]	351	}
	352
	353	void
	354	showtree(grammar g, node n)
	355	{
[391]	356	int i;
	357
	358	if (n == NULL)
	359	return;
	360	if (ISNONTERMINAL(TYPE(n))) {
	361	for (i = 0; i < NCH(n); i++)
	362	showtree(g, CHILD(n, i));
	363	}
	364	else if (ISTERMINAL(TYPE(n))) {
	365	printf("%s", _PyParser_TokenNames[TYPE(n)]);
	366	if (TYPE(n) == NUMBER \|\| TYPE(n) == NAME)
	367	printf("(%s)", STR(n));
	368	printf(" ");
	369	}
	370	else
	371	printf("? ");
[2]	372	}
	373
	374	void
	375	printtree(parser_state *ps)
	376	{
[391]	377	if (Py_DebugFlag) {
	378	printf("Parse tree:\n");
	379	dumptree(ps->p_grammar, ps->p_tree);
	380	printf("\n");
	381	printf("Tokens:\n");
	382	showtree(ps->p_grammar, ps->p_tree);
	383	printf("\n");
	384	}
	385	printf("Listing:\n");
	386	PyNode_ListTree(ps->p_tree);
	387	printf("\n");
[2]	388	}
	389
	390	#endif /* Py_DEBUG */
	391
	392	/*
	393
	394	Description
	395	-----------
	396
	397	The parser's interface is different than usual: the function addtoken()
	398	must be called for each token in the input. This makes it possible to
	399	turn it into an incremental parsing system later. The parsing system
	400	constructs a parse tree as it goes.
	401
	402	A parsing rule is represented as a Deterministic Finite-state Automaton
	403	(DFA). A node in a DFA represents a state of the parser; an arc represents
	404	a transition. Transitions are either labeled with terminal symbols or
	405	with non-terminals. When the parser decides to follow an arc labeled
	406	with a non-terminal, it is invoked recursively with the DFA representing
	407	the parsing rule for that as its initial state; when that DFA accepts,
	408	the parser that invoked it continues. The parse tree constructed by the
	409	recursively called parser is inserted as a child in the current parse tree.
	410
	411	The DFA's can be constructed automatically from a more conventional
	412	language description. An extended LL(1) grammar (ELL(1)) is suitable.
	413	Certain restrictions make the parser's life easier: rules that can produce
	414	the empty string should be outlawed (there are other ways to put loops
	415	or optional parts in the language). To avoid the need to construct
	416	FIRST sets, we can require that all but the last alternative of a rule
	417	(really: arc going out of a DFA's state) must begin with a terminal
	418	symbol.
	419
	420	As an example, consider this grammar:
	421
[391]	422	expr: term (OP term)*
	423	term: CONSTANT \| '(' expr ')'
[2]	424
	425	The DFA corresponding to the rule for expr is:
	426
	427	------->.---term-->.------->
[391]	428	^ \|
	429	\| \|
	430	\----OP----/
[2]	431
	432	The parse tree generated for the input a+b is:
	433
	434	(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
	435
	436	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Parser/parser.c

Download in other formats: