Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

awkgram.y@ 3181

Visit:

Last change on this file since 3181 was 3076, checked in by bird, 18 years ago
gawk 3.1.5
File size: 78.2 KB

Line
1	/*
2	* awkgram.y --- yacc/bison parser
3	*/
4
5	/*
6	* Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
7	*
8	* This file is part of GAWK, the GNU implementation of the
9	* AWK Programming Language.
10	*
11	* GAWK is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 2 of the License, or
14	* (at your option) any later version.
15	*
16	* GAWK is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with this program; if not, write to the Free Software
23	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24	*/
25
26	%{
27	#ifdef GAWKDEBUG
28	#define YYDEBUG 12
29	#endif
30
31	#include "awk.h"
32
33	#define CAN_FREE TRUE
34	#define DONT_FREE FALSE
35
36	#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
37	static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38	#else
39	static void yyerror(); /* va_alist */
40	#endif
41	static char *get_src_buf P((void));
42	static int yylex P((void));
43	static NODE *node_common P((NODETYPE op));
44	static NODE snode P((NODE subn, NODETYPE op, int sindex));
45	static NODE make_for_loop P((NODE init, NODE cond, NODE incr));
46	static NODE append_right P((NODE list, NODE *new));
47	static inline NODE append_pattern P((NODE list, NODE patt));
48	static void func_install P((NODE params, NODE def));
49	static void pop_var P((NODE *np, int freeit));
50	static void pop_params P((NODE *params));
51	static NODE make_param P((char name));
52	static NODE mk_rexp P((NODE exp));
53	static int dup_parms P((NODE *func));
54	static void param_sanity P((NODE *arglist));
55	static int parms_shadow P((const char fname, NODE func));
56	static int isnoeffect P((NODETYPE t));
57	static int isassignable P((NODE *n));
58	static void dumpintlstr P((const char *str, size_t len));
59	static void dumpintlstr2 P((const char str1, size_t len1, const char str2, size_t len2));
60	static void count_args P((NODE *n));
61	static int isarray P((NODE *n));
62
63	enum defref { FUNC_DEFINE, FUNC_USE };
64	static void func_use P((const char *name, enum defref how));
65	static void check_funcs P((void));
66
67	static int want_regexp; /* lexical scanning kludge */
68	static int can_return; /* parsing kludge */
69	static int begin_or_end_rule = FALSE; /* parsing kludge */
70	static int parsing_end_rule = FALSE; /* for warnings */
71	static int in_print = FALSE; /* lexical scanning kludge for print */
72	static int in_parens = 0; /* lexical scanning kludge for print */
73	static char lexptr; / pointer to next char during parsing */
74	static char *lexend;
75	static char lexptr_begin; / keep track of where we were for error msgs */
76	static char lexeme; / beginning of lexeme for debugging */
77	static char *thisline = NULL;
78	#define YYDEBUG_LEXER_TEXT (lexeme)
79	static int param_counter;
80	static char *tokstart = NULL;
81	static char *tok = NULL;
82	static char *tokend;
83
84	static long func_count; /* total number of functions */
85
86	#define HASHSIZE 1021 /* this constant only used here */
87	NODE *variables[HASHSIZE];
88	static int var_count; /* total number of global variables */
89
90	extern char *source;
91	extern int sourceline;
92	extern struct src *srcfiles;
93	extern long numfiles;
94	extern int errcount;
95	extern NODE *begin_block;
96	extern NODE *end_block;
97
98	/*
99	* This string cannot occur as a real awk identifier.
100	* Use it as a special token to make function parsing
101	* uniform, but if it's seen, don't install the function.
102	* e.g.
103	* function split(x) { return x }
104	* function x(a) { return a }
105	* should only produce one error message, and not core dump.
106	*/
107	static char builtin_func[] = "@builtin";
108	%}
109
110	%union {
111	long lval;
112	AWKNUM fval;
113	NODE *nodeval;
114	NODETYPE nodetypeval;
115	char *sval;
116	NODE (ptrval) P((void));
117	}
118
119	%type <nodeval> function_prologue pattern action variable param_list
120	%type <nodeval> exp common_exp
121	%type <nodeval> simp_exp non_post_simp_exp
122	%type <nodeval> expression_list opt_expression_list print_expression_list
123	%type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list
124	%type <nodeval> simple_stmt opt_simple_stmt
125	%type <nodeval> opt_exp opt_variable regexp
126	%type <nodeval> input_redir output_redir
127	%type <nodetypeval> print
128	%type <nodetypeval> assign_operator a_relop relop_or_less
129	%type <sval> func_name
130	%type <lval> lex_builtin
131
132	%token <sval> FUNC_CALL NAME REGEXP
133	%token <lval> ERROR
134	%token <nodeval> YNUMBER YSTRING
135	%token <nodetypeval> RELOP IO_OUT IO_IN
136	%token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP
137	%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
138	%token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
139	%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
140	%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
141	%token <nodetypeval> LEX_IN
142	%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
143	%token <lval> LEX_BUILTIN LEX_LENGTH
144	%token NEWLINE
145
146	/* these are just yylval numbers */
147
148	/* Lowest to highest */
149	%right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
150	%right '?' ':'
151	%left LEX_OR
152	%left LEX_AND
153	%left LEX_GETLINE
154	%nonassoc LEX_IN
155	%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
156	%nonassoc ','
157	%nonassoc MATCHOP
158	%nonassoc RELOP '<' '>' IO_IN IO_OUT
159	%left CONCAT_OP
160	%left YSTRING YNUMBER
161	%left '+' '-'
162	%left '*' '/' '%'
163	%right '!' UNARY
164	%right '^'
165	%left INCREMENT DECREMENT
166	%left '$'
167	%left '(' ')'
168	%%
169
170	start
171	: opt_nls program opt_nls
172	{
173	check_funcs();
174	}
175	;
176
177	program
178	: /* empty */
179	\| program rule
180	{
181	begin_or_end_rule = parsing_end_rule = FALSE;
182	yyerrok;
183	}
184	\| program error
185	{
186	begin_or_end_rule = parsing_end_rule = FALSE;
187	/*
188	* If errors, give up, don't produce an infinite
189	* stream of syntax error messages.
190	*/
191	/* yyerrok; */
192	}
193	;
194
195	rule
196	: pattern action
197	{
198	$1->rnode = $2;
199	}
200	\| pattern statement_term
201	{
202	if ($1->lnode != NULL) {
203	/* pattern rule with non-empty pattern */
204	$1->rnode = node(NULL, Node_K_print_rec, NULL);
205	} else {
206	/* an error */
207	if (begin_or_end_rule)
208	msg(_("%s blocks must have an action part"),
209	(parsing_end_rule ? "END" : "BEGIN"));
210	else
211	msg(_("each rule must have a pattern or an action part"));
212	errcount++;
213	}
214	}
215	\| function_prologue action
216	{
217	can_return = FALSE;
218	if ($1)
219	func_install($1, $2);
220	yyerrok;
221	}
222	;
223
224	pattern
225	: /* empty */
226	{
227	$$ = append_pattern(&expression_value, (NODE *) NULL);
228	}
229	\| exp
230	{
231	$$ = append_pattern(&expression_value, $1);
232	}
233	\| exp ',' exp
234	{
235	NODE *r;
236
237	getnode(r);
238	r->type = Node_line_range;
239	r->condpair = node($1, Node_cond_pair, $3);
240	r->triggered = FALSE;
241	$$ = append_pattern(&expression_value, r);
242	}
243	\| LEX_BEGIN
244	{
245	begin_or_end_rule = TRUE;
246	$$ = append_pattern(&begin_block, (NODE *) NULL);
247	}
248	\| LEX_END
249	{
250	begin_or_end_rule = parsing_end_rule = TRUE;
251	$$ = append_pattern(&end_block, (NODE *) NULL);
252	}
253	;
254
255	action
256	: l_brace statements r_brace opt_semi opt_nls
257	{ $$ = $2; }
258	;
259
260	func_name
261	: NAME
262	{ $$ = $1; }
263	\| FUNC_CALL
264	{ $$ = $1; }
265	\| lex_builtin
266	{
267	yyerror(_("`%s' is a built-in function, it cannot be redefined"),
268	tokstart);
269	errcount++;
270	$$ = builtin_func;
271	/* yyerrok; */
272	}
273	;
274
275	lex_builtin
276	: LEX_BUILTIN
277	\| LEX_LENGTH
278	;
279
280	function_prologue
281	: LEX_FUNCTION
282	{
283	param_counter = 0;
284	}
285	func_name '(' opt_param_list r_paren opt_nls
286	{
287	NODE *t;
288
289	t = make_param($3);
290	t->flags \|= FUNC;
291	$$ = append_right(t, $5);
292	can_return = TRUE;
293	/* check for duplicate parameter names */
294	if (dup_parms($$))
295	errcount++;
296	}
297	;
298
299	regexp
300	/*
301	* In this rule, want_regexp tells yylex that the next thing
302	* is a regexp so it should read up to the closing slash.
303	*/
304	: a_slash
305	{ ++want_regexp; }
306	REGEXP /* The terminating '/' is consumed by yylex(). */
307	{
308	NODE *n;
309	size_t len = strlen($3);
310
311	if (do_lint) {
312	if (len == 0)
313	lintwarn(_("regexp constant `//' looks like a C++ comment, but is not"));
314	else if (($3)[0] == '' && ($3)[len-1] == '')
315	/* possible C comment */
316	lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart);
317	}
318	getnode(n);
319	n->type = Node_regex;
320	n->re_exp = make_string($3, len);
321	n->re_reg = make_regexp($3, len, FALSE, TRUE);
322	n->re_text = NULL;
323	n->re_flags = CONST;
324	n->re_cnt = 1;
325	$$ = n;
326	}
327	;
328
329	a_slash
330	: '/'
331	\| SLASH_BEFORE_EQUAL
332	;
333
334	statements
335	: /* empty */
336	{ $$ = NULL; }
337	\| statements statement
338	{
339	if ($2 == NULL)
340	$$ = $1;
341	else {
342	if (do_lint && isnoeffect($2->type))
343	lintwarn(_("statement may have no effect"));
344	if ($1 == NULL)
345	$$ = $2;
346	else
347	$$ = append_right(
348	($1->type == Node_statement_list ? $1
349	: node($1, Node_statement_list, (NODE *) NULL)),
350	($2->type == Node_statement_list ? $2
351	: node($2, Node_statement_list, (NODE *) NULL)));
352	}
353	yyerrok;
354	}
355	\| statements error
356	{ $$ = NULL; }
357	;
358
359	statement_term
360	: nls
361	\| semi opt_nls
362	;
363
364	statement
365	: semi opt_nls
366	{ $$ = NULL; }
367	\| l_brace statements r_brace
368	{ $$ = $2; }
369	\| if_statement
370	{ $$ = $1; }
371	\| LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace
372	{ $$ = node($3, Node_K_switch, $7); }
373	\| LEX_WHILE '(' exp r_paren opt_nls statement
374	{ $$ = node($3, Node_K_while, $6); }
375	\| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
376	{ $$ = node($6, Node_K_do, $3); }
377	\| LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
378	{
379	/*
380	* Efficiency hack. Recognize the special case of
381	*
382	* for (iggy in foo)
383	* delete foo[iggy]
384	*
385	* and treat it as if it were
386	*
387	* delete foo
388	*
389	* Check that the body is a `delete a[i]' statement,
390	* and that both the loop var and array names match.
391	*/
392	if ($8 != NULL && $8->type == Node_K_delete && $8->rnode != NULL) {
393	NODE arr, sub;
394
395	assert($8->rnode->type == Node_expression_list);
396	arr = $8->lnode; /* array var */
397	sub = $8->rnode->lnode; /* index var */
398
399	if ( (arr->type == Node_var_new
400	\|\| arr->type == Node_var_array
401	\|\| arr->type == Node_param_list)
402	&& (sub->type == Node_var_new
403	\|\| sub->type == Node_var
404	\|\| sub->type == Node_param_list)
405	&& strcmp($3, sub->vname) == 0
406	&& strcmp($5, arr->vname) == 0) {
407	$8->type = Node_K_delete_loop;
408	$$ = $8;
409	free($3); /* thanks to valgrind for pointing these out */
410	free($5);
411	}
412	else
413	goto regular_loop;
414	} else {
415	regular_loop:
416	$$ = node($8, Node_K_arrayfor,
417	make_for_loop(variable($3, CAN_FREE, Node_var),
418	(NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
419	}
420	}
421	\| LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
422	{
423	$$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9));
424	}
425	\| LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
426	{
427	$$ = node($11, Node_K_for,
428	(NODE ) make_for_loop($3, (NODE ) NULL, $8));
429	}
430	\| LEX_BREAK statement_term
431	/* for break, maybe we'll have to remember where to break to */
432	{ $$ = node((NODE ) NULL, Node_K_break, (NODE ) NULL); }
433	\| LEX_CONTINUE statement_term
434	/* similarly */
435	{ $$ = node((NODE ) NULL, Node_K_continue, (NODE ) NULL); }
436	\| LEX_NEXT statement_term
437	{ NODETYPE type;
438
439	if (begin_or_end_rule)
440	yyerror(_("`%s' used in %s action"), "next",
441	(parsing_end_rule ? "END" : "BEGIN"));
442	type = Node_K_next;
443	$$ = node((NODE ) NULL, type, (NODE ) NULL);
444	}
445	\| LEX_NEXTFILE statement_term
446	{
447	if (do_traditional) {
448	/*
449	* can't use yyerror, since may have overshot
450	* the source line
451	*/
452	errcount++;
453	error(_("`nextfile' is a gawk extension"));
454	}
455	if (do_lint)
456	lintwarn(_("`nextfile' is a gawk extension"));
457	if (begin_or_end_rule) {
458	/* same thing */
459	errcount++;
460	error(_("`%s' used in %s action"), "nextfile",
461	(parsing_end_rule ? "END" : "BEGIN"));
462	}
463	$$ = node((NODE ) NULL, Node_K_nextfile, (NODE ) NULL);
464	}
465	\| LEX_EXIT opt_exp statement_term
466	{ $$ = node($2, Node_K_exit, (NODE *) NULL); }
467	\| LEX_RETURN
468	{
469	if (! can_return)
470	yyerror(_("`return' used outside function context"));
471	}
472	opt_exp statement_term
473	{
474	$$ = node($3 == NULL ? Nnull_string : $3,
475	Node_K_return, (NODE *) NULL);
476	}
477	\| simple_stmt statement_term
478	;
479
480	/*
481	* A simple_stmt exists to satisfy a constraint in the POSIX
482	* grammar allowing them to occur as the 1st and 3rd parts
483	* in a `for (...;...;...)' loop. This is a historical oddity
484	* inherited from Unix awk, not at all documented in the AK&W
485	* awk book. We support it, as this was reported as a bug.
486	* We don't bother to document it though. So there.
487	*/
488	simple_stmt
489	: print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir
490	{
491	/*
492	* Optimization: plain `print' has no expression list, so $3 is null.
493	* If $3 is an expression list with one element (rnode == null)
494	* and lnode is a field spec for field 0, we have `print $0'.
495	* For both, use Node_K_print_rec, which is faster for these two cases.
496	*/
497	if ($1 == Node_K_print &&
498	($3 == NULL
499	\|\| ($3->type == Node_expression_list
500	&& $3->rnode == NULL
501	&& $3->lnode->type == Node_field_spec
502	&& $3->lnode->lnode->type == Node_val
503	&& $3->lnode->lnode->numbr == 0.0))
504	) {
505	static int warned = FALSE;
506
507	$$ = node(NULL, Node_K_print_rec, $4);
508
509	if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) {
510	warned = TRUE;
511	lintwarn(
512	_("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
513	}
514	} else {
515	$$ = node($3, $1, $4);
516	if ($$->type == Node_K_printf)
517	count_args($$);
518	}
519	}
520	\| LEX_DELETE NAME '[' expression_list ']'
521	{ $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
522	\| LEX_DELETE NAME
523	{
524	if (do_lint)
525	lintwarn(_("`delete array' is a gawk extension"));
526	if (do_traditional) {
527	/*
528	* can't use yyerror, since may have overshot
529	* the source line
530	*/
531	errcount++;
532	error(_("`delete array' is a gawk extension"));
533	}
534	$$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
535	}
536	\| LEX_DELETE '(' NAME ')'
537	{
538	/* this is for tawk compatibility. maybe the warnings should always be done. */
539	if (do_lint)
540	lintwarn(_("`delete(array)' is a non-portable tawk extension"));
541	if (do_traditional) {
542	/*
543	* can't use yyerror, since may have overshot
544	* the source line
545	*/
546	errcount++;
547	error(_("`delete(array)' is a non-portable tawk extension"));
548	}
549	$$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
550	}
551	\| exp
552	{ $$ = $1; }
553	;
554
555	opt_simple_stmt
556	: /* empty */
557	{ $$ = NULL; }
558	\| simple_stmt
559	{ $$ = $1; }
560	;
561
562	switch_body
563	: case_statements
564	{
565	if ($1 == NULL) {
566	$$ = NULL;
567	} else {
568	NODE *dflt = NULL;
569	NODE *head = $1;
570	NODE *curr;
571
572	const char **case_values = NULL;
573
574	int maxcount = 128;
575	int case_count = 0;
576	int i;
577
578	emalloc(case_values, const char *, sizeof(char) * maxcount, "switch_body");
579	for (curr = $1; curr != NULL; curr = curr->rnode) {
580	/* Assure that case statement values are unique. */
581	if (curr->lnode->type == Node_K_case) {
582	char *caseval;
583
584	if (curr->lnode->lnode->type == Node_regex)
585	caseval = curr->lnode->lnode->re_exp->stptr;
586	else
587	caseval = force_string(tree_eval(curr->lnode->lnode))->stptr;
588
589	for (i = 0; i < case_count; i++)
590	if (strcmp(caseval, case_values[i]) == 0)
591	yyerror(_("duplicate case values in switch body: %s"), caseval);
592
593	if (case_count >= maxcount) {
594	maxcount += 128;
595	erealloc(case_values, const char *, sizeof(char) * maxcount, "switch_body");
596	}
597	case_values[case_count++] = caseval;
598	} else {
599	/* Otherwise save a pointer to the default node. */
600	if (dflt != NULL)
601	yyerror(_("Duplicate `default' detected in switch body"));
602	dflt = curr;
603	}
604	}
605
606	free(case_values);
607
608	/* Create the switch body. */
609	$$ = node(head, Node_switch_body, dflt);
610	}
611	}
612	;
613
614	case_statements
615	: /* empty */
616	{ $$ = NULL; }
617	\| case_statements case_statement
618	{
619	if ($2 == NULL)
620	$$ = $1;
621	else {
622	if (do_lint && isnoeffect($2->type))
623	lintwarn(_("statement may have no effect"));
624	if ($1 == NULL)
625	$$ = node($2, Node_case_list, (NODE *) NULL);
626	else
627	$$ = append_right(
628	($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)),
629	($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL))
630	);
631	}
632	yyerrok;
633	}
634	\| case_statements error
635	{ $$ = NULL; }
636	;
637
638	case_statement
639	: LEX_CASE case_value colon opt_nls statements
640	{ $$ = node($2, Node_K_case, $5); }
641	\| LEX_DEFAULT colon opt_nls statements
642	{ $$ = node((NODE *) NULL, Node_K_default, $4); }
643	;
644
645	case_value
646	: YNUMBER
647	{ $$ = $1; }
648	\| '-' YNUMBER %prec UNARY
649	{
650	$2->numbr = -(force_number($2));
651	$$ = $2;
652	}
653	\| '+' YNUMBER %prec UNARY
654	{ $$ = $2; }
655	\| YSTRING
656	{ $$ = $1; }
657	\| regexp
658	{ $$ = $1; }
659	;
660
661	print
662	: LEX_PRINT
663	\| LEX_PRINTF
664	;
665
666	/*
667	* Note: ``print(x)'' is already parsed by the first rule,
668	* so there is no good in covering it by the second one too.
669	*/
670	print_expression_list
671	: opt_expression_list
672	\| '(' exp comma expression_list r_paren
673	{ $$ = node($2, Node_expression_list, $4); }
674	;
675
676	output_redir
677	: /* empty */
678	{
679	in_print = FALSE;
680	in_parens = 0;
681	$$ = NULL;
682	}
683	\| IO_OUT { in_print = FALSE; in_parens = 0; } common_exp
684	{
685	$$ = node($3, $1, (NODE *) NULL);
686	if ($1 == Node_redirect_twoway
687	&& $3->type == Node_K_getline
688	&& $3->rnode != NULL
689	&& $3->rnode->type == Node_redirect_twoway)
690	yyerror(_("multistage two-way pipelines don't work"));
691	}
692	;
693
694	if_statement
695	: LEX_IF '(' exp r_paren opt_nls statement
696	{
697	$$ = node($3, Node_K_if,
698	node($6, Node_if_branches, (NODE *) NULL));
699	}
700	\| LEX_IF '(' exp r_paren opt_nls statement
701	LEX_ELSE opt_nls statement
702	{ $$ = node($3, Node_K_if,
703	node($6, Node_if_branches, $9)); }
704	;
705
706	nls
707	: NEWLINE
708	\| nls NEWLINE
709	;
710
711	opt_nls
712	: /* empty */
713	\| nls
714	;
715
716	input_redir
717	: /* empty */
718	{ $$ = NULL; }
719	\| '<' simp_exp
720	{ $$ = node($2, Node_redirect_input, (NODE *) NULL); }
721	;
722
723	opt_param_list
724	: /* empty */
725	{ $$ = NULL; }
726	\| param_list
727	{ $$ = $1; }
728	;
729
730	param_list
731	: NAME
732	{ $$ = make_param($1); }
733	\| param_list comma NAME
734	{ $$ = append_right($1, make_param($3)); yyerrok; }
735	\| error
736	{ $$ = NULL; }
737	\| param_list error
738	{ $$ = NULL; }
739	\| param_list comma error
740	{ $$ = NULL; }
741	;
742
743	/* optional expression, as in for loop */
744	opt_exp
745	: /* empty */
746	{ $$ = NULL; }
747	\| exp
748	{ $$ = $1; }
749	;
750
751	opt_expression_list
752	: /* empty */
753	{ $$ = NULL; }
754	\| expression_list
755	{ $$ = $1; }
756	;
757
758	expression_list
759	: exp
760	{ $$ = node($1, Node_expression_list, (NODE *) NULL); }
761	\| expression_list comma exp
762	{
763	$$ = append_right($1,
764	node($3, Node_expression_list, (NODE *) NULL));
765	yyerrok;
766	}
767	\| error
768	{ $$ = NULL; }
769	\| expression_list error
770	{ $$ = NULL; }
771	\| expression_list error exp
772	{ $$ = NULL; }
773	\| expression_list comma error
774	{ $$ = NULL; }
775	;
776
777	/* Expressions, not including the comma operator. */
778	exp : variable assign_operator exp %prec ASSIGNOP
779	{
780	if (do_lint && $3->type == Node_regex)
781	lintwarn(_("regular expression on right of assignment"));
782	/*
783	* Optimization of `x = x y'. Can save lots of time
784	* if done a lot.
785	*/
786	if (( $1->type == Node_var
787	\|\| $1->type == Node_var_new
788	\|\| $1->type == Node_param_list)
789	&& $2 == Node_assign
790	&& $3->type == Node_concat
791	&& $3->lnode == $1) {
792	$3->type = Node_assign_concat; /* Just change the type */
793	$$ = $3; /* And use it directly */
794	} else
795	$$ = node($1, $2, $3);
796	}
797	\| exp LEX_AND exp
798	{ $$ = node($1, Node_and, $3); }
799	\| exp LEX_OR exp
800	{ $$ = node($1, Node_or, $3); }
801	\| exp MATCHOP exp
802	{
803	if ($1->type == Node_regex)
804	warning(_("regular expression on left of `~' or `!~' operator"));
805	$$ = node($1, $2, mk_rexp($3));
806	}
807	\| exp LEX_IN NAME
808	{ $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
809	\| exp a_relop exp %prec RELOP
810	{
811	if (do_lint && $3->type == Node_regex)
812	lintwarn(_("regular expression on right of comparison"));
813	$$ = node($1, $2, $3);
814	}
815	\| exp '?' exp ':' exp
816	{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
817	\| common_exp
818	{ $$ = $1; }
819	;
820
821	assign_operator
822	: ASSIGN
823	{ $$ = $1; }
824	\| ASSIGNOP
825	{ $$ = $1; }
826	\| SLASH_BEFORE_EQUAL ASSIGN /* `/=' */
827	{ $$ = Node_assign_quotient; }
828	;
829
830	relop_or_less
831	: RELOP
832	{ $$ = $1; }
833	\| '<'
834	{ $$ = Node_less; }
835	;
836	a_relop
837	: relop_or_less
838	\| '>'
839	{ $$ = Node_greater; }
840	;
841
842	common_exp
843	: regexp
844	{ $$ = $1; }
845	\| '!' regexp %prec UNARY
846	{
847	$$ = node(node(make_number(0.0),
848	Node_field_spec,
849	(NODE *) NULL),
850	Node_nomatch,
851	$2);
852	}
853	\| '(' expression_list r_paren LEX_IN NAME
854	{ $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); }
855	\| simp_exp
856	{ $$ = $1; }
857	\| common_exp simp_exp %prec CONCAT_OP
858	{ $$ = node($1, Node_concat, $2); }
859	;
860
861	simp_exp
862	: non_post_simp_exp
863	/* Binary operators in order of decreasing precedence. */
864	\| simp_exp '^' simp_exp
865	{ $$ = node($1, Node_exp, $3); }
866	\| simp_exp '*' simp_exp
867	{ $$ = node($1, Node_times, $3); }
868	\| simp_exp '/' simp_exp
869	{ $$ = node($1, Node_quotient, $3); }
870	\| simp_exp '%' simp_exp
871	{ $$ = node($1, Node_mod, $3); }
872	\| simp_exp '+' simp_exp
873	{ $$ = node($1, Node_plus, $3); }
874	\| simp_exp '-' simp_exp
875	{ $$ = node($1, Node_minus, $3); }
876	\| LEX_GETLINE opt_variable input_redir
877	{
878	if (do_lint && parsing_end_rule && $3 == NULL)
879	lintwarn(_("non-redirected `getline' undefined inside END action"));
880	$$ = node($2, Node_K_getline, $3);
881	}
882	\| simp_exp IO_IN LEX_GETLINE opt_variable
883	{
884	$$ = node($4, Node_K_getline,
885	node($1, $2, (NODE *) NULL));
886	}
887	\| variable INCREMENT
888	{ $$ = node($1, Node_postincrement, (NODE *) NULL); }
889	\| variable DECREMENT
890	{ $$ = node($1, Node_postdecrement, (NODE *) NULL); }
891	;
892
893	non_post_simp_exp
894	: '!' simp_exp %prec UNARY
895	{ $$ = node($2, Node_not, (NODE *) NULL); }
896	\| '(' exp r_paren
897	{ $$ = $2; }
898	\| LEX_BUILTIN
899	'(' opt_expression_list r_paren
900	{ $$ = snode($3, Node_builtin, (int) $1); }
901	\| LEX_LENGTH '(' opt_expression_list r_paren
902	{ $$ = snode($3, Node_builtin, (int) $1); }
903	\| LEX_LENGTH
904	{
905	if (do_lint)
906	lintwarn(_("call of `length' without parentheses is not portable"));
907	$$ = snode((NODE *) NULL, Node_builtin, (int) $1);
908	if (do_posix)
909	warning(_("call of `length' without parentheses is deprecated by POSIX"));
910	}
911	\| FUNC_CALL '(' opt_expression_list r_paren
912	{
913	$$ = node($3, Node_func_call, make_string($1, strlen($1)));
914	$$->funcbody = NULL;
915	func_use($1, FUNC_USE);
916	param_sanity($3);
917	free($1);
918	}
919	\| variable
920	\| INCREMENT variable
921	{ $$ = node($2, Node_preincrement, (NODE *) NULL); }
922	\| DECREMENT variable
923	{ $$ = node($2, Node_predecrement, (NODE *) NULL); }
924	\| YNUMBER
925	{ $$ = $1; }
926	\| YSTRING
927	{ $$ = $1; }
928
929	\| '-' simp_exp %prec UNARY
930	{
931	if ($2->type == Node_val && ($2->flags & (STRCUR\|STRING)) == 0) {
932	$2->numbr = -(force_number($2));
933	$$ = $2;
934	} else
935	$$ = node($2, Node_unary_minus, (NODE *) NULL);
936	}
937	\| '+' simp_exp %prec UNARY
938	{
939	/*
940	* was: $$ = $2
941	* POSIX semantics: force a conversion to numeric type
942	*/
943	$$ = node (make_number(0.0), Node_plus, $2);
944	}
945	;
946
947	opt_variable
948	: /* empty */
949	{ $$ = NULL; }
950	\| variable
951	{ $$ = $1; }
952	;
953
954	variable
955	: NAME
956	{ $$ = variable($1, CAN_FREE, Node_var_new); }
957	\| NAME '[' expression_list ']'
958	{
959	NODE *n;
960
961	if ((n = lookup($1)) != NULL && ! isarray(n))
962	yyerror(_("use of non-array as array"));
963	else if ($3 == NULL) {
964	fatal(_("invalid subscript expression"));
965	} else if ($3->rnode == NULL) {
966	$$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
967	freenode($3);
968	} else
969	$$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
970	}
971	\| '$' non_post_simp_exp
972	{ $$ = node($2, Node_field_spec, (NODE *) NULL); }
973	/*
974	#if 0
975	\| lex_builtin
976	{ fatal(_("can't use built-in function `%s' as a variable"), tokstart); }
977	#endif
978	*/
979	;
980
981	l_brace
982	: '{' opt_nls
983	;
984
985	r_brace
986	: '}' opt_nls { yyerrok; }
987	;
988
989	r_paren
990	: ')' { yyerrok; }
991	;
992
993	opt_semi
994	: /* empty */
995	\| semi
996	;
997
998	semi
999	: ';' { yyerrok; }
1000	;
1001
1002	colon
1003	: ':' { yyerrok; }
1004	;
1005
1006	comma : ',' opt_nls { yyerrok; }
1007	;
1008
1009	%%
1010
1011	struct token {
1012	const char operator; / text to match */
1013	NODETYPE value; /* node type */
1014	int class; /* lexical class */
1015	unsigned flags; /* # of args. allowed and compatability */
1016	# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
1017	# define A(n) (1<<(n))
1018	# define VERSION_MASK 0xFF00 /* old awk is zero */
1019	# define NOT_OLD 0x0100 /* feature not in old awk */
1020	# define NOT_POSIX 0x0200 /* feature not in POSIX */
1021	# define GAWKX 0x0400 /* gawk extension */
1022	# define RESX 0x0800 /* Bell Labs Research extension */
1023	NODE (ptr) P((NODE )); / function that implements this keyword */
1024	};
1025
1026	/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
1027	/* Function pointers come from declarations in awk.h. */
1028
1029	static const struct token tokentab[] = {
1030	{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
1031	{"END", Node_illegal, LEX_END, 0, 0},
1032	#ifdef ARRAYDEBUG
1033	{"adump", Node_builtin, LEX_BUILTIN, GAWKX\|A(1), do_adump},
1034	#endif
1035	{"and", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_and},
1036	{"asort", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2), do_asort},
1037	{"asorti", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2), do_asorti},
1038	{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(2), do_atan2},
1039	{"bindtextdomain", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2), do_bindtextdomain},
1040	{"break", Node_K_break, LEX_BREAK, 0, 0},
1041	#ifdef ALLOW_SWITCH
1042	{"case", Node_K_case, LEX_CASE, GAWKX, 0},
1043	#endif
1044	{"close", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1)\|A(2), do_close},
1045	{"compl", Node_builtin, LEX_BUILTIN, GAWKX\|A(1), do_compl},
1046	{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
1047	{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_cos},
1048	{"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2)\|A(3), do_dcgettext},
1049	{"dcngettext", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2)\|A(3)\|A(4)\|A(5), do_dcngettext},
1050	#ifdef ALLOW_SWITCH
1051	{"default", Node_K_default, LEX_DEFAULT, GAWKX, 0},
1052	#endif
1053	{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
1054	{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
1055	{"else", Node_illegal, LEX_ELSE, 0, 0},
1056	{"exit", Node_K_exit, LEX_EXIT, 0, 0},
1057	{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
1058	{"extension", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_ext},
1059	{"fflush", Node_builtin, LEX_BUILTIN, RESX\|A(0)\|A(1), do_fflush},
1060	{"for", Node_K_for, LEX_FOR, 0, 0},
1061	{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX\|NOT_OLD, 0},
1062	{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
1063	{"gensub", Node_builtin, LEX_BUILTIN, GAWKX\|A(3)\|A(4), do_gensub},
1064	{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
1065	{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(2)\|A(3), do_gsub},
1066	{"if", Node_K_if, LEX_IF, 0, 0},
1067	{"in", Node_illegal, LEX_IN, 0, 0},
1068	{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
1069	{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
1070	{"length", Node_builtin, LEX_LENGTH, A(0)\|A(1), do_length},
1071	{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
1072	{"lshift", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_lshift},
1073	{"match", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(2)\|A(3), do_match},
1074	{"mktime", Node_builtin, LEX_BUILTIN, GAWKX\|A(1), do_mktime},
1075	{"next", Node_K_next, LEX_NEXT, 0, 0},
1076	{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
1077	{"or", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_or},
1078	{"print", Node_K_print, LEX_PRINT, 0, 0},
1079	{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
1080	{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(0), do_rand},
1081	{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
1082	{"rshift", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_rshift},
1083	{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_sin},
1084	{"split", Node_builtin, LEX_BUILTIN, A(2)\|A(3), do_split},
1085	{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
1086	{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
1087	{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(0)\|A(1), do_srand},
1088	#if defined(GAWKDEBUG) \|\| defined(ARRAYDEBUG) /* \|\| ... */
1089	{"stopme", Node_builtin, LEX_BUILTIN, GAWKX\|A(0), stopme},
1090	#endif
1091	{"strftime", Node_builtin, LEX_BUILTIN, GAWKX\|A(0)\|A(1)\|A(2), do_strftime},
1092	{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX\|A(1), do_strtonum},
1093	{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(2)\|A(3), do_sub},
1094	{"substr", Node_builtin, LEX_BUILTIN, A(2)\|A(3), do_substr},
1095	#ifdef ALLOW_SWITCH
1096	{"switch", Node_K_switch, LEX_SWITCH, GAWKX, 0},
1097	#endif
1098	{"system", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_system},
1099	{"systime", Node_builtin, LEX_BUILTIN, GAWKX\|A(0), do_systime},
1100	{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_tolower},
1101	{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_toupper},
1102	{"while", Node_K_while, LEX_WHILE, 0, 0},
1103	{"xor", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_xor},
1104	};
1105
1106	#ifdef MBS_SUPPORT
1107	/* Variable containing the current shift state. */
1108	static mbstate_t cur_mbstate;
1109	/* Ring buffer containing current characters. */
1110	#define MAX_CHAR_IN_RING_BUFFER 8
1111	#define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
1112	static char cur_char_ring[RING_BUFFER_SIZE];
1113	/* Index for ring buffers. */
1114	static int cur_ring_idx;
1115	/* This macro means that last nextc() return a singlebyte character
1116	or 1st byte of a multibyte character. */
1117	#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
1118	#else /* MBS_SUPPORT */
1119	/* a dummy */
1120	#define nextc_is_1stbyte 1
1121	#endif /* MBS_SUPPORT */
1122
1123	/* getfname --- return name of a builtin function (for pretty printing) */
1124
1125	const char *
1126	getfname(register NODE (fptr)(NODE *))
1127	{
1128	register int i, j;
1129
1130	j = sizeof(tokentab) / sizeof(tokentab[0]);
1131	/* linear search, no other way to do it */
1132	for (i = 0; i < j; i++)
1133	if (tokentab[i].ptr == fptr)
1134	return tokentab[i].operator;
1135
1136	return NULL;
1137	}
1138
1139	/* yyerror --- print a syntax error message, show where */
1140
1141	/*
1142	* Function identifier purposely indented to avoid mangling
1143	* by ansi2knr. Sigh.
1144	*/
1145
1146	static void
1147	#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
1148	yyerror(const char *m, ...)
1149	#else
1150	/* VARARGS0 */
1151	yyerror(va_alist)
1152	va_dcl
1153	#endif
1154	{
1155	va_list args;
1156	const char *mesg = NULL;
1157	register char bp, cp;
1158	char *scan;
1159	char *buf;
1160	int count;
1161	static char end_of_file_line[] = "(END OF FILE)";
1162	char save;
1163
1164	errcount++;
1165	/* Find the current line in the input file */
1166	if (lexptr && lexeme) {
1167	if (thisline == NULL) {
1168	cp = lexeme;
1169	if (*cp == '\n') {
1170	cp--;
1171	mesg = _("unexpected newline or end of string");
1172	}
1173	for (; cp != lexptr_begin && *cp != '\n'; --cp)
1174	continue;
1175	if (*cp == '\n')
1176	cp++;
1177	thisline = cp;
1178	}
1179	/* NL isn't guaranteed */
1180	bp = lexeme;
1181	while (bp < lexend && bp && bp != '\n')
1182	bp++;
1183	} else {
1184	thisline = end_of_file_line;
1185	bp = thisline + strlen(thisline);
1186	}
1187
1188	/*
1189	* Saving and restoring *bp keeps valgrind happy,
1190	* since the guts of glibc uses strlen, even though
1191	* we're passing an explict precision. Sigh.
1192	*
1193	* 8/2003: We may not need this anymore.
1194	*/
1195	save = *bp;
1196	*bp = '\0';
1197
1198	msg("%.*s", (int) (bp - thisline), thisline);
1199
1200	*bp = save;
1201
1202	#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
1203	va_start(args, m);
1204	if (mesg == NULL)
1205	mesg = m;
1206	#else
1207	va_start(args);
1208	if (mesg == NULL)
1209	mesg = va_arg(args, char *);
1210	#endif
1211	count = (bp - thisline) + strlen(mesg) + 2 + 1;
1212	emalloc(buf, char *, count, "yyerror");
1213
1214	bp = buf;
1215
1216	if (lexptr != NULL) {
1217	scan = thisline;
1218	while (scan < lexeme)
1219	if (*scan++ == '\t')
1220	*bp++ = '\t';
1221	else
1222	*bp++ = ' ';
1223	*bp++ = '^';
1224	*bp++ = ' ';
1225	}
1226	strcpy(bp, mesg);
1227	err("", buf, args);
1228	va_end(args);
1229	free(buf);
1230	}
1231
1232	/* get_src_buf --- read the next buffer of source program */
1233
1234	static char *
1235	get_src_buf()
1236	{
1237	static int samefile = FALSE;
1238	static int nextfile = 0;
1239	static char *buf = NULL;
1240	static size_t buflen = 0;
1241	static int fd;
1242
1243	int n;
1244	register char *scan;
1245	int newfile;
1246	struct stat sbuf;
1247	int readcount = 0;
1248	int l;
1249	char *readloc;
1250
1251	again:
1252	newfile = FALSE;
1253	if (nextfile > numfiles)
1254	return NULL;
1255
1256	if (srcfiles[nextfile].stype == CMDLINE) {
1257	if ((l = strlen(srcfiles[nextfile].val)) == 0) {
1258	/*
1259	* Yet Another Special case:
1260	* gawk '' /path/name
1261	* Sigh.
1262	*/
1263	static int warned = FALSE;
1264
1265	if (do_lint && ! warned) {
1266	warned = TRUE;
1267	lintwarn(_("empty program text on command line"));
1268	}
1269	++nextfile;
1270	goto again;
1271	}
1272	if (srcfiles[nextfile].val[l-1] == '\n') {
1273	/* has terminating newline, can use it directly */
1274	sourceline = 1;
1275	lexptr = lexptr_begin = srcfiles[nextfile].val;
1276	/* fall through to pointer adjustment and return, below */
1277	} else {
1278	/* copy it into static buffer */
1279
1280	/* make sure buffer exists and has room */
1281	if (buflen == 0) {
1282	emalloc(buf, char *, l+2, "get_src_buf");
1283	buflen = l + 2;
1284	} else if (l+2 > buflen) {
1285	erealloc(buf, char *, l+2, "get_src_buf");
1286	buflen = l + 2;
1287	} /* else
1288	buffer has room, just use it */
1289
1290	/* copy in data */
1291	memcpy(buf, srcfiles[nextfile].val, l);
1292	buf[l] = '\n';
1293	buf[++l] = '\0';
1294
1295	/* set vars and return */
1296	lexptr = lexptr_begin = buf;
1297	}
1298	lexend = lexptr + l;
1299	nextfile++; /* for next entry to this routine */
1300	return lexptr;
1301	}
1302
1303	if (! samefile) {
1304	source = srcfiles[nextfile].val;
1305	if (source == NULL) { /* read all the source files, all done */
1306	if (buf != NULL) {
1307	free(buf);
1308	buf = NULL;
1309	}
1310	buflen = 0;
1311	return lexeme = lexptr = lexptr_begin = NULL;
1312	}
1313	fd = pathopen(source);
1314	if (fd <= INVALID_HANDLE) {
1315	char *in;
1316
1317	/* suppress file name and line no. in error mesg */
1318	in = source;
1319	source = NULL;
1320	fatal(_("can't open source file `%s' for reading (%s)"),
1321	in, strerror(errno));
1322	}
1323	l = optimal_bufsize(fd, & sbuf);
1324	/*
1325	* Make sure that something silly like
1326	* AWKBUFSIZE=8 make check
1327	* works ok.
1328	*/
1329	#define A_DECENT_BUFFER_SIZE 128
1330	if (l < A_DECENT_BUFFER_SIZE)
1331	l = A_DECENT_BUFFER_SIZE;
1332	#undef A_DECENT_BUFFER_SIZE
1333
1334	newfile = TRUE;
1335
1336	/* make sure buffer exists and has room */
1337	if (buflen == 0) {
1338	emalloc(buf, char *, l+2, "get_src_buf");
1339	buflen = l + 2;
1340	} else if (l+2 > buflen) {
1341	erealloc(buf, char *, l+2, "get_src_buf");
1342	buflen = l + 2;
1343	} /* else
1344	buffer has room, just use it */
1345
1346	readcount = l;
1347	readloc = lexeme = lexptr = lexptr_begin = buf;
1348	samefile = TRUE;
1349	sourceline = 1;
1350	} else {
1351	/*
1352	* In same file, ran off edge of buffer.
1353	* Shift current line down to front, adjust
1354	* pointers and fill in the rest of the buffer.
1355	*/
1356
1357	int lexeme_offset = lexeme - lexptr_begin;
1358	int lexptr_offset = lexptr - lexptr_begin;
1359	int lexend_offset = lexend - lexptr_begin;
1360
1361	/* find beginning of current line */
1362	for (scan = lexeme; scan >= lexptr_begin; scan--) {
1363	if (*scan == '\n') {
1364	scan++;
1365	break;
1366	}
1367	}
1368
1369	if (scan <= buf) {
1370	/* have to grow the buffer */
1371	buflen *= 2;
1372	erealloc(buf, char *, buflen, "get_src_buf");
1373	} else {
1374	/* shift things down */
1375	memmove(buf, scan, lexend - scan);
1376	/*
1377	* make offsets relative to start of line,
1378	* not start of buffer.
1379	*/
1380	lexend_offset = lexend - scan;
1381	lexeme_offset = lexeme - scan;
1382	lexptr_offset = lexptr - scan;
1383	}
1384
1385	/* adjust pointers */
1386	lexeme = buf + lexeme_offset;
1387	lexptr = buf + lexptr_offset;
1388	lexend = buf + lexend_offset;
1389	lexptr_begin = buf;
1390	readcount = buflen - (lexend - buf);
1391	readloc = lexend;
1392	}
1393
1394	/* add more data to buffer */
1395	n = read(fd, readloc, readcount);
1396	if (n == -1)
1397	fatal(_("can't read sourcefile `%s' (%s)"),
1398	source, strerror(errno));
1399	if (n == 0) {
1400	if (newfile) {
1401	static int warned = FALSE;
1402
1403	if (do_lint && ! warned) {
1404	warned = TRUE;
1405	lintwarn(_("source file `%s' is empty"), source);
1406	}
1407	}
1408	if (fd != fileno(stdin)) /* safety */
1409	close(fd);
1410	samefile = FALSE;
1411	nextfile++;
1412	goto again;
1413	}
1414	lexend = lexptr + n;
1415	return lexptr;
1416	}
1417
1418	/* tokadd --- add a character to the token buffer */
1419
1420	#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
1421
1422	/* tokexpand --- grow the token buffer */
1423
1424	char *
1425	tokexpand()
1426	{
1427	static int toksize = 60;
1428	int tokoffset;
1429
1430	tokoffset = tok - tokstart;
1431	toksize *= 2;
1432	if (tokstart != NULL)
1433	erealloc(tokstart, char *, toksize, "tokexpand");
1434	else
1435	emalloc(tokstart, char *, toksize, "tokexpand");
1436	tokend = tokstart + toksize;
1437	tok = tokstart + tokoffset;
1438	return tok;
1439	}
1440
1441	/* nextc --- get the next input character */
1442
1443	#ifdef MBS_SUPPORT
1444
1445	static int
1446	nextc(void)
1447	{
1448	if (gawk_mb_cur_max > 1) {
1449	if (!lexptr \|\| lexptr >= lexend) {
1450	if (! get_src_buf())
1451	return EOF;
1452	}
1453
1454	/* Update the buffer index. */
1455	cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
1456	cur_ring_idx + 1;
1457
1458	/* Did we already check the current character? */
1459	if (cur_char_ring[cur_ring_idx] == 0) {
1460	/* No, we need to check the next character on the buffer. */
1461	int idx, work_ring_idx = cur_ring_idx;
1462	mbstate_t tmp_state;
1463	size_t mbclen;
1464
1465	for (idx = 0 ; lexptr + idx < lexend ; idx++) {
1466	tmp_state = cur_mbstate;
1467	mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
1468
1469	if (mbclen == 1 \|\| mbclen == (size_t)-1 \|\| mbclen == 0) {
1470	/* It is a singlebyte character, non-complete multibyte
1471	character or EOF. We treat it as a singlebyte
1472	character. */
1473	cur_char_ring[work_ring_idx] = 1;
1474	break;
1475	} else if (mbclen == (size_t)-2) {
1476	/* It is not a complete multibyte character. */
1477	cur_char_ring[work_ring_idx] = idx + 1;
1478	} else {
1479	/* mbclen > 1 */
1480	cur_char_ring[work_ring_idx] = mbclen;
1481	break;
1482	}
1483	work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
1484	0 : work_ring_idx + 1;
1485	}
1486	cur_mbstate = tmp_state;
1487
1488	/* Put a mark on the position on which we write next character. */
1489	work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
1490	0 : work_ring_idx + 1;
1491	cur_char_ring[work_ring_idx] = 0;
1492	}
1493
1494	return (int) (unsigned char) *lexptr++;
1495	}
1496	else {
1497	int c;
1498
1499	if (lexptr && lexptr < lexend)
1500	c = (int) (unsigned char) *lexptr++;
1501	else if (get_src_buf())
1502	c = (int) (unsigned char) *lexptr++;
1503	else
1504	c = EOF;
1505
1506	return c;
1507	}
1508	}
1509
1510	#else /* MBS_SUPPORT */
1511
1512	#if GAWKDEBUG
1513	int
1514	nextc(void)
1515	{
1516	int c;
1517
1518	if (lexptr && lexptr < lexend)
1519	c = (int) (unsigned char) *lexptr++;
1520	else if (get_src_buf())
1521	c = (int) (unsigned char) *lexptr++;
1522	else
1523	c = EOF;
1524
1525	return c;
1526	}
1527	#else
1528	#define nextc() ((lexptr && lexptr < lexend) ? \
1529	((int) (unsigned char) *lexptr++) : \
1530	(get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \
1531	)
1532	#endif
1533
1534	#endif /* MBS_SUPPORT */
1535
1536	/* pushback --- push a character back on the input */
1537
1538	static inline void
1539	pushback(void)
1540	{
1541	#ifdef MBS_SUPPORT
1542	if (gawk_mb_cur_max > 1)
1543	cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
1544	cur_ring_idx - 1;
1545	#endif
1546	(lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
1547	}
1548
1549
1550	/* allow_newline --- allow newline after &&, \|\|, ? and : */
1551
1552	static void
1553	allow_newline(void)
1554	{
1555	int c;
1556
1557	for (;;) {
1558	c = nextc();
1559	if (c == EOF)
1560	break;
1561	if (c == '#') {
1562	while ((c = nextc()) != '\n' && c != EOF)
1563	continue;
1564	if (c == EOF)
1565	break;
1566	}
1567	if (c == '\n')
1568	sourceline++;
1569	if (! ISSPACE(c)) {
1570	pushback();
1571	break;
1572	}
1573	}
1574	}
1575
1576	/* yylex --- Read the input and turn it into tokens. */
1577
1578	static int
1579	yylex(void)
1580	{
1581	register int c;
1582	int seen_e = FALSE; /* These are for numbers */
1583	int seen_point = FALSE;
1584	int esc_seen; /* for literal strings */
1585	int mid;
1586	static int did_newline = FALSE;
1587	char *tokkey;
1588	static int lasttok = 0, eof_warned = FALSE;
1589	int inhex = FALSE;
1590	int intlstr = FALSE;
1591
1592	if (nextc() == EOF) {
1593	if (lasttok != NEWLINE) {
1594	lasttok = NEWLINE;
1595	if (do_lint && ! eof_warned) {
1596	lintwarn(_("source file does not end in newline"));
1597	eof_warned = TRUE;
1598	}
1599	return NEWLINE; /* fake it */
1600	}
1601	return 0;
1602	}
1603	pushback();
1604	#if defined OS2 \|\| defined __EMX__
1605	/*
1606	* added for OS/2's extproc feature of cmd.exe
1607	* (like #! in BSD sh)
1608	*/
1609	if (strncasecmp(lexptr, "extproc ", 8) == 0) {
1610	while (lexptr && lexptr != '\n')
1611	lexptr++;
1612	}
1613	#endif
1614	lexeme = lexptr;
1615	thisline = NULL;
1616	if (want_regexp) {
1617	int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
1618	/*
1619	* Counting brackets is non-trivial. [[] is ok,
1620	* and so is [\]], with a point being that /[/]/ as a regexp
1621	* constant has to work.
1622	*
1623	* Do not count [ or ] if either one is preceded by a \.
1624	* A `[' should be counted if
1625	* a) it is the first one so far (in_brack == 0)
1626	* b) it is the `[' in `[:'
1627	* A ']' should be counted if not preceded by a \, since
1628	* it is either closing `:]' or just a plain list.
1629	* According to POSIX, []] is how you put a ] into a set.
1630	* Try to handle that too.
1631	*
1632	* The code for \ handles \[ and \].
1633	*/
1634
1635	want_regexp = FALSE;
1636	tok = tokstart;
1637	for (;;) {
1638	c = nextc();
1639
1640	if (gawk_mb_cur_max == 1 \|\| nextc_is_1stbyte) switch (c) {
1641	case '[':
1642	/* one day check for `.' and `=' too */
1643	if (nextc() == ':' \|\| in_brack == 0)
1644	in_brack++;
1645	pushback();
1646	break;
1647	case ']':
1648	if (tokstart[0] == '['
1649	&& (tok == tokstart + 1
1650	\|\| (tok == tokstart + 2
1651	&& tokstart[1] == '^')))
1652	/* do nothing */;
1653	else
1654	in_brack--;
1655	break;
1656	case '\\':
1657	if ((c = nextc()) == EOF) {
1658	yyerror(_("unterminated regexp ends with `\\' at end of file"));
1659	goto end_regexp; /* kludge */
1660	} else if (c == '\n') {
1661	sourceline++;
1662	continue;
1663	} else {
1664	tokadd('\\');
1665	tokadd(c);
1666	continue;
1667	}
1668	break;
1669	case '/': /* end of the regexp */
1670	if (in_brack > 0)
1671	break;
1672	end_regexp:
1673	tokadd('\0');
1674	yylval.sval = tokstart;
1675	if (do_lint) {
1676	int peek = nextc();
1677
1678	pushback();
1679	if (peek == 'i' \|\| peek == 's') {
1680	if (source)
1681	lintwarn(
1682	_("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
1683	source, sourceline, peek);
1684	else
1685	lintwarn(
1686	_("tawk regex modifier `/.../%c' doesn't work in gawk"),
1687	peek);
1688	}
1689	}
1690	return lasttok = REGEXP;
1691	case '\n':
1692	pushback();
1693	yyerror(_("unterminated regexp"));
1694	goto end_regexp; /* kludge */
1695	case EOF:
1696	yyerror(_("unterminated regexp at end of file"));
1697	goto end_regexp; /* kludge */
1698	}
1699	tokadd(c);
1700	}
1701	}
1702	retry:
1703
1704	/* skipping \r is a hack, but windows is just too pervasive. sigh. */
1705	while ((c = nextc()) == ' ' \|\| c == '\t' \|\| c == '\r')
1706	continue;
1707
1708	lexeme = lexptr ? lexptr - 1 : lexptr;
1709	thisline = NULL;
1710	tok = tokstart;
1711	yylval.nodetypeval = Node_illegal;
1712
1713	if (gawk_mb_cur_max == 1 \|\| nextc_is_1stbyte) switch (c) {
1714	case EOF:
1715	if (lasttok != NEWLINE) {
1716	lasttok = NEWLINE;
1717	if (do_lint && ! eof_warned) {
1718	lintwarn(_("source file does not end in newline"));
1719	eof_warned = TRUE;
1720	}
1721	return NEWLINE; /* fake it */
1722	}
1723	return 0;
1724
1725	case '\n':
1726	sourceline++;
1727	return lasttok = NEWLINE;
1728
1729	case '#': /* it's a comment */
1730	while ((c = nextc()) != '\n') {
1731	if (c == EOF) {
1732	if (lasttok != NEWLINE) {
1733	lasttok = NEWLINE;
1734	if (do_lint && ! eof_warned) {
1735	lintwarn(
1736	_("source file does not end in newline"));
1737	eof_warned = TRUE;
1738	}
1739	return NEWLINE; /* fake it */
1740	}
1741	return 0;
1742	}
1743	}
1744	sourceline++;
1745	return lasttok = NEWLINE;
1746
1747	case '\\':
1748	#ifdef RELAXED_CONTINUATION
1749	/*
1750	* This code puports to allow comments and/or whitespace
1751	* after the `\' at the end of a line used for continuation.
1752	* Use it at your own risk. We think it's a bad idea, which
1753	* is why it's not on by default.
1754	*/
1755	if (! do_traditional) {
1756	/* strip trailing white-space and/or comment */
1757	while ((c = nextc()) == ' ' \|\| c == '\t' \|\| c == '\r')
1758	continue;
1759	if (c == '#') {
1760	if (do_lint)
1761	lintwarn(
1762	_("use of `\\ #...' line continuation is not portable"));
1763	while ((c = nextc()) != '\n')
1764	if (c == EOF)
1765	break;
1766	}
1767	pushback();
1768	}
1769	#endif /* RELAXED_CONTINUATION */
1770	if (nextc() == '\n') {
1771	sourceline++;
1772	goto retry;
1773	} else {
1774	yyerror(_("backslash not last character on line"));
1775	exit(1);
1776	}
1777	break;
1778
1779	case ':':
1780	case '?':
1781	if (! do_posix)
1782	allow_newline();
1783	return lasttok = c;
1784
1785	/*
1786	* in_parens is undefined unless we are parsing a print
1787	* statement (in_print), but why bother with a check?
1788	*/
1789	case ')':
1790	in_parens--;
1791	return lasttok = c;
1792
1793	case '(':
1794	in_parens++;
1795	/* FALL THROUGH */
1796	case '$':
1797	case ';':
1798	case '{':
1799	case ',':
1800	case '[':
1801	case ']':
1802	return lasttok = c;
1803
1804	case '*':
1805	if ((c = nextc()) == '=') {
1806	yylval.nodetypeval = Node_assign_times;
1807	return lasttok = ASSIGNOP;
1808	} else if (do_posix) {
1809	pushback();
1810	return lasttok = '*';
1811	} else if (c == '*') {
1812	/* make and = aliases for ^ and ^= */
1813	static int did_warn_op = FALSE, did_warn_assgn = FALSE;
1814
1815	if (nextc() == '=') {
1816	if (! did_warn_assgn) {
1817	did_warn_assgn = TRUE;
1818	if (do_lint)
1819	lintwarn(_("POSIX does not allow operator `**='"));
1820	if (do_lint_old)
1821	warning(_("old awk does not support operator `**='"));
1822	}
1823	yylval.nodetypeval = Node_assign_exp;
1824	return ASSIGNOP;
1825	} else {
1826	pushback();
1827	if (! did_warn_op) {
1828	did_warn_op = TRUE;
1829	if (do_lint)
1830	lintwarn(_("POSIX does not allow operator `**'"));
1831	if (do_lint_old)
1832	warning(_("old awk does not support operator `**'"));
1833	}
1834	return lasttok = '^';
1835	}
1836	}
1837	pushback();
1838	return lasttok = '*';
1839
1840	case '/':
1841	if (nextc() == '=') {
1842	pushback();
1843	return lasttok = SLASH_BEFORE_EQUAL;
1844	}
1845	pushback();
1846	return lasttok = '/';
1847
1848	case '%':
1849	if (nextc() == '=') {
1850	yylval.nodetypeval = Node_assign_mod;
1851	return lasttok = ASSIGNOP;
1852	}
1853	pushback();
1854	return lasttok = '%';
1855
1856	case '^':
1857	{
1858	static int did_warn_op = FALSE, did_warn_assgn = FALSE;
1859
1860	if (nextc() == '=') {
1861	if (do_lint_old && ! did_warn_assgn) {
1862	did_warn_assgn = TRUE;
1863	warning(_("operator `^=' is not supported in old awk"));
1864	}
1865	yylval.nodetypeval = Node_assign_exp;
1866	return lasttok = ASSIGNOP;
1867	}
1868	pushback();
1869	if (do_lint_old && ! did_warn_op) {
1870	did_warn_op = TRUE;
1871	warning(_("operator `^' is not supported in old awk"));
1872	}
1873	return lasttok = '^';
1874	}
1875
1876	case '+':
1877	if ((c = nextc()) == '=') {
1878	yylval.nodetypeval = Node_assign_plus;
1879	return lasttok = ASSIGNOP;
1880	}
1881	if (c == '+')
1882	return lasttok = INCREMENT;
1883	pushback();
1884	return lasttok = '+';
1885
1886	case '!':
1887	if ((c = nextc()) == '=') {
1888	yylval.nodetypeval = Node_notequal;
1889	return lasttok = RELOP;
1890	}
1891	if (c == '~') {
1892	yylval.nodetypeval = Node_nomatch;
1893	return lasttok = MATCHOP;
1894	}
1895	pushback();
1896	return lasttok = '!';
1897
1898	case '<':
1899	if (nextc() == '=') {
1900	yylval.nodetypeval = Node_leq;
1901	return lasttok = RELOP;
1902	}
1903	yylval.nodetypeval = Node_less;
1904	pushback();
1905	return lasttok = '<';
1906
1907	case '=':
1908	if (nextc() == '=') {
1909	yylval.nodetypeval = Node_equal;
1910	return lasttok = RELOP;
1911	}
1912	yylval.nodetypeval = Node_assign;
1913	pushback();
1914	return lasttok = ASSIGN;
1915
1916	case '>':
1917	if ((c = nextc()) == '=') {
1918	yylval.nodetypeval = Node_geq;
1919	return lasttok = RELOP;
1920	} else if (c == '>') {
1921	yylval.nodetypeval = Node_redirect_append;
1922	return lasttok = IO_OUT;
1923	}
1924	pushback();
1925	if (in_print && in_parens == 0) {
1926	yylval.nodetypeval = Node_redirect_output;
1927	return lasttok = IO_OUT;
1928	}
1929	yylval.nodetypeval = Node_greater;
1930	return lasttok = '>';
1931
1932	case '~':
1933	yylval.nodetypeval = Node_match;
1934	return lasttok = MATCHOP;
1935
1936	case '}':
1937	/*
1938	* Added did newline stuff. Easier than
1939	* hacking the grammar.
1940	*/
1941	if (did_newline) {
1942	did_newline = FALSE;
1943	return lasttok = c;
1944	}
1945	did_newline++;
1946	--lexptr; /* pick up } next time */
1947	return lasttok = NEWLINE;
1948
1949	case '"':
1950	string:
1951	esc_seen = FALSE;
1952	while ((c = nextc()) != '"') {
1953	if (c == '\n') {
1954	pushback();
1955	yyerror(_("unterminated string"));
1956	exit(1);
1957	}
1958	if ((gawk_mb_cur_max == 1 \|\| nextc_is_1stbyte) &&
1959	c == '\\') {
1960	c = nextc();
1961	if (c == '\n') {
1962	sourceline++;
1963	continue;
1964	}
1965	esc_seen = TRUE;
1966	tokadd('\\');
1967	}
1968	if (c == EOF) {
1969	pushback();
1970	yyerror(_("unterminated string"));
1971	exit(1);
1972	}
1973	tokadd(c);
1974	}
1975	yylval.nodeval = make_str_node(tokstart,
1976	tok - tokstart, esc_seen ? SCAN : 0);
1977	yylval.nodeval->flags \|= PERM;
1978	if (intlstr) {
1979	yylval.nodeval->flags \|= INTLSTR;
1980	intlstr = FALSE;
1981	if (do_intl)
1982	dumpintlstr(yylval.nodeval->stptr,
1983	yylval.nodeval->stlen);
1984	}
1985	return lasttok = YSTRING;
1986
1987	case '-':
1988	if ((c = nextc()) == '=') {
1989	yylval.nodetypeval = Node_assign_minus;
1990	return lasttok = ASSIGNOP;
1991	}
1992	if (c == '-')
1993	return lasttok = DECREMENT;
1994	pushback();
1995	return lasttok = '-';
1996
1997	case '.':
1998	c = nextc();
1999	pushback();
2000	if (! ISDIGIT(c))
2001	return lasttok = '.';
2002	else
2003	c = '.';
2004	/* FALL THROUGH */
2005	case '0':
2006	case '1':
2007	case '2':
2008	case '3':
2009	case '4':
2010	case '5':
2011	case '6':
2012	case '7':
2013	case '8':
2014	case '9':
2015	/* It's a number */
2016	for (;;) {
2017	int gotnumber = FALSE;
2018
2019	tokadd(c);
2020	switch (c) {
2021	case 'x':
2022	case 'X':
2023	if (do_traditional)
2024	goto done;
2025	if (tok == tokstart + 2) {
2026	int peek = nextc();
2027
2028	if (ISXDIGIT(peek)) {
2029	inhex = TRUE;
2030	pushback(); /* following digit */
2031	} else {
2032	pushback(); /* x or X */
2033	goto done;
2034	}
2035	}
2036	break;
2037	case '.':
2038	/* period ends exponent part of floating point number */
2039	if (seen_point \|\| seen_e) {
2040	gotnumber = TRUE;
2041	break;
2042	}
2043	seen_point = TRUE;
2044	break;
2045	case 'e':
2046	case 'E':
2047	if (inhex)
2048	break;
2049	if (seen_e) {
2050	gotnumber = TRUE;
2051	break;
2052	}
2053	seen_e = TRUE;
2054	if ((c = nextc()) == '-' \|\| c == '+') {
2055	int c2 = nextc();
2056
2057	if (ISDIGIT(c2)) {
2058	tokadd(c);
2059	tokadd(c2);
2060	} else {
2061	pushback(); /* non-digit after + or - */
2062	pushback(); /* + or - */
2063	pushback(); /* e or E */
2064	}
2065	} else if (! ISDIGIT(c)) {
2066	pushback(); /* character after e or E */
2067	pushback(); /* e or E */
2068	} else {
2069	pushback(); /* digit */
2070	}
2071	break;
2072	case 'a':
2073	case 'A':
2074	case 'b':
2075	case 'B':
2076	case 'c':
2077	case 'C':
2078	case 'D':
2079	case 'd':
2080	case 'f':
2081	case 'F':
2082	if (do_traditional \|\| ! inhex)
2083	goto done;
2084	/* fall through */
2085	case '0':
2086	case '1':
2087	case '2':
2088	case '3':
2089	case '4':
2090	case '5':
2091	case '6':
2092	case '7':
2093	case '8':
2094	case '9':
2095	break;
2096	default:
2097	done:
2098	gotnumber = TRUE;
2099	}
2100	if (gotnumber)
2101	break;
2102	c = nextc();
2103	}
2104	if (c != EOF)
2105	pushback();
2106	else if (do_lint && ! eof_warned) {
2107	lintwarn(_("source file does not end in newline"));
2108	eof_warned = TRUE;
2109	}
2110	tokadd('\0');
2111	if (! do_traditional && isnondecimal(tokstart, FALSE)) {
2112	if (do_lint) {
2113	if (ISDIGIT(tokstart[1])) /* not an 'x' or 'X' */
2114	lintwarn("numeric constant `%.*s' treated as octal",
2115	(int) strlen(tokstart)-1, tokstart);
2116	else if (tokstart[1] == 'x' \|\| tokstart[1] == 'X')
2117	lintwarn("numeric constant `%.*s' treated as hexadecimal",
2118	(int) strlen(tokstart)-1, tokstart);
2119	}
2120	yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
2121	} else
2122	yylval.nodeval = make_number(atof(tokstart));
2123	yylval.nodeval->flags \|= PERM;
2124	return lasttok = YNUMBER;
2125
2126	case '&':
2127	if ((c = nextc()) == '&') {
2128	yylval.nodetypeval = Node_and;
2129	allow_newline();
2130	return lasttok = LEX_AND;
2131	}
2132	pushback();
2133	return lasttok = '&';
2134
2135	case '\|':
2136	if ((c = nextc()) == '\|') {
2137	yylval.nodetypeval = Node_or;
2138	allow_newline();
2139	return lasttok = LEX_OR;
2140	} else if (! do_traditional && c == '&') {
2141	yylval.nodetypeval = Node_redirect_twoway;
2142	return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
2143	}
2144	pushback();
2145	if (in_print && in_parens == 0) {
2146	yylval.nodetypeval = Node_redirect_pipe;
2147	return lasttok = IO_OUT;
2148	} else {
2149	yylval.nodetypeval = Node_redirect_pipein;
2150	return lasttok = IO_IN;
2151	}
2152	}
2153
2154	if (c != '_' && ! ISALPHA(c)) {
2155	yyerror(_("invalid char '%c' in expression"), c);
2156	exit(1);
2157	}
2158
2159	/*
2160	* Lots of fog here. Consider:
2161	*
2162	* print "xyzzy"$_"foo"
2163	*
2164	* Without the check for ` lasttok != '$' ', this is parsed as
2165	*
2166	* print "xxyzz" $(_"foo")
2167	*
2168	* With the check, it is "correctly" parsed as three
2169	* string concatenations. Sigh. This seems to be
2170	* "more correct", but this is definitely one of those
2171	* occasions where the interactions are funny.
2172	*/
2173	if (! do_traditional && c == '_' && lasttok != '$') {
2174	if ((c = nextc()) == '"') {
2175	intlstr = TRUE;
2176	goto string;
2177	}
2178	pushback();
2179	c = '_';
2180	}
2181
2182	/* it's some type of name-type-thing. Find its length. */
2183	tok = tokstart;
2184	while (is_identchar(c)) {
2185	tokadd(c);
2186	c = nextc();
2187	}
2188	tokadd('\0');
2189	emalloc(tokkey, char *, tok - tokstart, "yylex");
2190	memcpy(tokkey, tokstart, tok - tokstart);
2191	if (c != EOF)
2192	pushback();
2193	else if (do_lint && ! eof_warned) {
2194	lintwarn(_("source file does not end in newline"));
2195	eof_warned = TRUE;
2196	}
2197
2198	/* See if it is a special token. */
2199
2200	if ((mid = check_special(tokstart)) >= 0) {
2201	if (do_lint) {
2202	if (tokentab[mid].flags & GAWKX)
2203	lintwarn(_("`%s' is a gawk extension"),
2204	tokentab[mid].operator);
2205	if (tokentab[mid].flags & RESX)
2206	lintwarn(_("`%s' is a Bell Labs extension"),
2207	tokentab[mid].operator);
2208	if (tokentab[mid].flags & NOT_POSIX)
2209	lintwarn(_("POSIX does not allow `%s'"),
2210	tokentab[mid].operator);
2211	}
2212	if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
2213	warning(_("`%s' is not supported in old awk"),
2214	tokentab[mid].operator);
2215	if ((do_traditional && (tokentab[mid].flags & GAWKX))
2216	\|\| (do_posix && (tokentab[mid].flags & NOT_POSIX)))
2217	;
2218	else {
2219	if (tokentab[mid].class == LEX_BUILTIN
2220	\|\| tokentab[mid].class == LEX_LENGTH)
2221	yylval.lval = mid;
2222	else
2223	yylval.nodetypeval = tokentab[mid].value;
2224	free(tokkey);
2225	return lasttok = tokentab[mid].class;
2226	}
2227	}
2228
2229	yylval.sval = tokkey;
2230	if (*lexptr == '(')
2231	return lasttok = FUNC_CALL;
2232	else {
2233	static short goto_warned = FALSE;
2234
2235	#define SMART_ALECK 1
2236	if (SMART_ALECK && do_lint
2237	&& ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
2238	goto_warned = TRUE;
2239	lintwarn(_("`goto' considered harmful!\n"));
2240	}
2241	return lasttok = NAME;
2242	}
2243	}
2244
2245	/* node_common --- common code for allocating a new node */
2246
2247	static NODE *
2248	node_common(NODETYPE op)
2249	{
2250	register NODE *r;
2251
2252	getnode(r);
2253	r->type = op;
2254	r->flags = MALLOC;
2255	/* if lookahead is a NL, lineno is 1 too high */
2256	if (lexeme && lexeme >= lexptr_begin && *lexeme == '\n')
2257	r->source_line = sourceline - 1;
2258	else
2259	r->source_line = sourceline;
2260	r->source_file = source;
2261	return r;
2262	}
2263
2264	/* node --- allocates a node with defined lnode and rnode. */
2265
2266	NODE *
2267	node(NODE left, NODETYPE op, NODE right)
2268	{
2269	register NODE *r;
2270
2271	r = node_common(op);
2272	r->lnode = left;
2273	r->rnode = right;
2274	return r;
2275	}
2276
2277	/* snode --- allocate a node with defined subnode and builtin for builtin
2278	functions. Checks for arg. count and supplies defaults where
2279	possible. */
2280
2281	static NODE *
2282	snode(NODE *subn, NODETYPE op, int idx)
2283	{
2284	register NODE *r;
2285	register NODE *n;
2286	int nexp = 0;
2287	int args_allowed;
2288
2289	r = node_common(op);
2290
2291	/* traverse expression list to see how many args. given */
2292	for (n = subn; n != NULL; n = n->rnode) {
2293	nexp++;
2294	if (nexp > 5)
2295	break;
2296	}
2297
2298	/* check against how many args. are allowed for this builtin */
2299	args_allowed = tokentab[idx].flags & ARGS;
2300	if (args_allowed && (args_allowed & A(nexp)) == 0)
2301	fatal(_("%d is invalid as number of arguments for %s"),
2302	nexp, tokentab[idx].operator);
2303
2304	r->builtin = tokentab[idx].ptr;
2305
2306	/* special case processing for a few builtins */
2307	if (nexp == 0 && r->builtin == do_length) {
2308	subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
2309	Node_expression_list,
2310	(NODE *) NULL);
2311	} else if (r->builtin == do_match) {
2312	static short warned = FALSE;
2313
2314	if (subn->rnode->lnode->type != Node_regex)
2315	subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
2316
2317	if (subn->rnode->rnode != NULL) { /* 3rd argument there */
2318	if (do_lint && ! warned) {
2319	warned = TRUE;
2320	lintwarn(_("match: third argument is a gawk extension"));
2321	}
2322	if (do_traditional)
2323	fatal(_("match: third argument is a gawk extension"));
2324	}
2325	} else if (r->builtin == do_sub \|\| r->builtin == do_gsub) {
2326	if (subn->lnode->type != Node_regex)
2327	subn->lnode = mk_rexp(subn->lnode);
2328	if (nexp == 2)
2329	append_right(subn, node(node(make_number(0.0),
2330	Node_field_spec,
2331	(NODE *) NULL),
2332	Node_expression_list,
2333	(NODE *) NULL));
2334	else if (subn->rnode->rnode->lnode->type == Node_val) {
2335	if (do_lint)
2336	lintwarn(_("%s: string literal as last arg of substitute has no effect"),
2337	(r->builtin == do_sub) ? "sub" : "gsub");
2338	} else if (! isassignable(subn->rnode->rnode->lnode)) {
2339	yyerror(_("%s third parameter is not a changeable object"),
2340	(r->builtin == do_sub) ? "sub" : "gsub");
2341	}
2342	} else if (r->builtin == do_gensub) {
2343	if (subn->lnode->type != Node_regex)
2344	subn->lnode = mk_rexp(subn->lnode);
2345	if (nexp == 3)
2346	append_right(subn, node(node(make_number(0.0),
2347	Node_field_spec,
2348	(NODE *) NULL),
2349	Node_expression_list,
2350	(NODE *) NULL));
2351	} else if (r->builtin == do_split) {
2352	if (nexp == 2)
2353	append_right(subn,
2354	node(FS_node, Node_expression_list, (NODE *) NULL));
2355	n = subn->rnode->rnode->lnode;
2356	if (n->type != Node_regex)
2357	subn->rnode->rnode->lnode = mk_rexp(n);
2358	if (nexp == 2)
2359	subn->rnode->rnode->lnode->re_flags \|= FS_DFLT;
2360	} else if (r->builtin == do_close) {
2361	static short warned = FALSE;
2362
2363	if ( nexp == 2) {
2364	if (do_lint && nexp == 2 && ! warned) {
2365	warned = TRUE;
2366	lintwarn(_("close: second argument is a gawk extension"));
2367	}
2368	if (do_traditional)
2369	fatal(_("close: second argument is a gawk extension"));
2370	}
2371	} else if (do_intl /* --gen-po */
2372	&& r->builtin == do_dcgettext /* dcgettext(...) */
2373	&& subn->lnode->type == Node_val /* 1st arg is constant */
2374	&& (subn->lnode->flags & STRCUR) != 0) { /* it's a string constant */
2375	/* ala xgettext, dcgettext("some string" ...) dumps the string */
2376	NODE *str = subn->lnode;
2377
2378	if ((str->flags & INTLSTR) != 0)
2379	warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
2380	/* don't dump it, the lexer already did */
2381	else
2382	dumpintlstr(str->stptr, str->stlen);
2383	} else if (do_intl /* --gen-po */
2384	&& r->builtin == do_dcngettext /* dcngettext(...) */
2385	&& subn->lnode->type == Node_val /* 1st arg is constant */
2386	&& (subn->lnode->flags & STRCUR) != 0 /* it's a string constant */
2387	&& subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */
2388	&& (subn->rnode->lnode->flags & STRCUR) != 0) { /* it's a string constant */
2389	/* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
2390	NODE *str1 = subn->lnode;
2391	NODE *str2 = subn->rnode->lnode;
2392
2393	if (((str1->flags \| str2->flags) & INTLSTR) != 0)
2394	warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
2395	else
2396	dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
2397	}
2398
2399	r->subnode = subn;
2400	if (r->builtin == do_sprintf) {
2401	count_args(r);
2402	r->lnode->printf_count = r->printf_count; /* hack */
2403	}
2404	return r;
2405	}
2406
2407	/* make_for_loop --- build a for loop */
2408
2409	static NODE *
2410	make_for_loop(NODE init, NODE cond, NODE *incr)
2411	{
2412	register FOR_LOOP_HEADER *r;
2413	NODE *n;
2414
2415	emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
2416	getnode(n);
2417	n->type = Node_illegal;
2418	r->init = init;
2419	r->cond = cond;
2420	r->incr = incr;
2421	n->sub.nodep.r.hd = r;
2422	return n;
2423	}
2424
2425	/* dup_parms --- return TRUE if there are duplicate parameters */
2426
2427	static int
2428	dup_parms(NODE *func)
2429	{
2430	register NODE *np;
2431	const char fname, *names;
2432	int count, i, j, dups;
2433	NODE *params;
2434
2435	if (func == NULL) /* error earlier */
2436	return TRUE;
2437
2438	fname = func->param;
2439	count = func->param_cnt;
2440	params = func->rnode;
2441
2442	if (count == 0) /* no args, no problem */
2443	return FALSE;
2444
2445	if (params == NULL) /* error earlier */
2446	return TRUE;
2447
2448	emalloc(names, const char *, count sizeof(char *), "dup_parms");
2449
2450	i = 0;
2451	for (np = params; np != NULL; np = np->rnode) {
2452	if (np->param == NULL) { /* error earlier, give up, go home */
2453	free(names);
2454	return TRUE;
2455	}
2456	names[i++] = np->param;
2457	}
2458
2459	dups = 0;
2460	for (i = 1; i < count; i++) {
2461	for (j = 0; j < i; j++) {
2462	if (strcmp(names[i], names[j]) == 0) {
2463	dups++;
2464	error(
2465	_("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
2466	fname, i+1, names[j], j+1);
2467	}
2468	}
2469	}
2470
2471	free(names);
2472	return (dups > 0 ? TRUE : FALSE);
2473	}
2474
2475	/* parms_shadow --- check if parameters shadow globals */
2476
2477	static int
2478	parms_shadow(const char fname, NODE func)
2479	{
2480	int count, i;
2481	int ret = FALSE;
2482
2483	if (fname == NULL \|\| func == NULL) /* error earlier */
2484	return FALSE;
2485
2486	count = func->lnode->param_cnt;
2487
2488	if (count == 0) /* no args, no problem */
2489	return FALSE;
2490
2491	/*
2492	* Use warning() and not lintwarn() so that can warn
2493	* about all shadowed parameters.
2494	*/
2495	for (i = 0; i < count; i++) {
2496	if (lookup(func->parmlist[i]) != NULL) {
2497	warning(
2498	_("function `%s': parameter `%s' shadows global variable"),
2499	fname, func->parmlist[i]);
2500	ret = TRUE;
2501	}
2502	}
2503
2504	return ret;
2505	}
2506
2507	/*
2508	* install:
2509	* Install a name in the symbol table, even if it is already there.
2510	* Caller must check against redefinition if that is desired.
2511	*/
2512
2513	NODE *
2514	install(char name, NODE value)
2515	{
2516	register NODE *hp;
2517	register size_t len;
2518	register int bucket;
2519
2520	var_count++;
2521	len = strlen(name);
2522	bucket = hash(name, len, (unsigned long) HASHSIZE);
2523	getnode(hp);
2524	hp->type = Node_hashnode;
2525	hp->hnext = variables[bucket];
2526	variables[bucket] = hp;
2527	hp->hlength = len;
2528	hp->hvalue = value;
2529	hp->hname = name;
2530	hp->hvalue->vname = name;
2531	return hp->hvalue;
2532	}
2533
2534	/* lookup --- find the most recent hash node for name installed by install */
2535
2536	NODE *
2537	lookup(const char *name)
2538	{
2539	register NODE *bucket;
2540	register size_t len;
2541
2542	len = strlen(name);
2543	for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
2544	bucket != NULL; bucket = bucket->hnext)
2545	if (bucket->hlength == len && STREQN(bucket->hname, name, len))
2546	return bucket->hvalue;
2547
2548	return NULL;
2549	}
2550
2551	/* var_comp --- compare two variable names */
2552
2553	static int
2554	var_comp(const void v1, const void v2)
2555	{
2556	const NODE const npp1, const npp2;
2557	const NODE n1, n2;
2558	int minlen;
2559
2560	npp1 = (const NODE const ) v1;
2561	npp2 = (const NODE const ) v2;
2562	n1 = *npp1;
2563	n2 = *npp2;
2564
2565	if (n1->hlength > n2->hlength)
2566	minlen = n1->hlength;
2567	else
2568	minlen = n2->hlength;
2569
2570	return strncmp(n1->hname, n2->hname, minlen);
2571	}
2572
2573	/* valinfo --- dump var info */
2574
2575	static void
2576	valinfo(NODE n, FILE fp)
2577	{
2578	if (n->flags & STRING) {
2579	fprintf(fp, "string (");
2580	pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
2581	fprintf(fp, ")\n");
2582	} else if (n->flags & NUMBER)
2583	fprintf(fp, "number (%.17g)\n", n->numbr);
2584	else if (n->flags & STRCUR) {
2585	fprintf(fp, "string value (");
2586	pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
2587	fprintf(fp, ")\n");
2588	} else if (n->flags & NUMCUR)
2589	fprintf(fp, "number value (%.17g)\n", n->numbr);
2590	else
2591	fprintf(fp, "?? flags %s\n", flags2str(n->flags));
2592	}
2593
2594
2595	/* dump_vars --- dump the symbol table */
2596
2597	void
2598	dump_vars(const char *fname)
2599	{
2600	int i, j;
2601	NODE **table;
2602	NODE *p;
2603	FILE *fp;
2604
2605	emalloc(table, NODE *, var_count sizeof(NODE *), "dump_vars");
2606
2607	if (fname == NULL)
2608	fp = stderr;
2609	else if ((fp = fopen(fname, "w")) == NULL) {
2610	warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
2611	warning(_("sending profile to standard error"));
2612	fp = stderr;
2613	}
2614
2615	for (i = j = 0; i < HASHSIZE; i++)
2616	for (p = variables[i]; p != NULL; p = p->hnext)
2617	table[j++] = p;
2618
2619	assert(j == var_count);
2620
2621	/* Shazzam! */
2622	qsort(table, j, sizeof(NODE *), var_comp);
2623
2624	for (i = 0; i < j; i++) {
2625	p = table[i];
2626	if (p->hvalue->type == Node_func)
2627	continue;
2628	fprintf(fp, "%.*s: ", (int) p->hlength, p->hname);
2629	if (p->hvalue->type == Node_var_array)
2630	fprintf(fp, "array, %ld elements\n", p->hvalue->table_size);
2631	else if (p->hvalue->type == Node_var_new)
2632	fprintf(fp, "unused variable\n");
2633	else if (p->hvalue->type == Node_var)
2634	valinfo(p->hvalue->var_value, fp);
2635	else {
2636	NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
2637
2638	valinfo(*lhs, fp);
2639	}
2640	}
2641
2642	if (fp != stderr && fclose(fp) != 0)
2643	warning(_("%s: close failed (%s)"), fname, strerror(errno));
2644
2645	free(table);
2646	}
2647
2648	/* release_all_vars --- free all variable memory */
2649
2650	void
2651	release_all_vars()
2652	{
2653	int i;
2654	NODE p, next;
2655
2656	for (i = 0; i < HASHSIZE; i++)
2657	for (p = variables[i]; p != NULL; p = next) {
2658	next = p->hnext;
2659
2660	if (p->hvalue->type == Node_func)
2661	continue;
2662	else if (p->hvalue->type == Node_var_array)
2663	assoc_clear(p->hvalue);
2664	else if (p->hvalue->type != Node_var_new) {
2665	NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
2666
2667	unref(*lhs);
2668	}
2669	unref(p);
2670	}
2671	}
2672
2673	/* finfo --- for use in comparison and sorting of function names */
2674
2675	struct finfo {
2676	const char *name;
2677	size_t nlen;
2678	NODE *func;
2679	};
2680
2681	/* fcompare --- comparison function for qsort */
2682
2683	static int
2684	fcompare(const void p1, const void p2)
2685	{
2686	const struct finfo f1, f2;
2687	int minlen;
2688
2689	f1 = (const struct finfo *) p1;
2690	f2 = (const struct finfo *) p2;
2691
2692	if (f1->nlen > f2->nlen)
2693	minlen = f2->nlen;
2694	else
2695	minlen = f1->nlen;
2696
2697	return strncmp(f1->name, f2->name, minlen);
2698	}
2699
2700	/* dump_funcs --- print all functions */
2701
2702	void
2703	dump_funcs()
2704	{
2705	int i, j;
2706	NODE *p;
2707	struct finfo *tab = NULL;
2708
2709	/*
2710	* Walk through symbol table countng functions.
2711	* Could be more than func_count if there are
2712	* extension functions.
2713	*/
2714	for (i = j = 0; i < HASHSIZE; i++) {
2715	for (p = variables[i]; p != NULL; p = p->hnext) {
2716	if (p->hvalue->type == Node_func) {
2717	j++;
2718	}
2719	}
2720	}
2721
2722	if (j == 0)
2723	return;
2724
2725	emalloc(tab, struct finfo , j sizeof(struct finfo), "dump_funcs");
2726
2727	/* now walk again, copying info */
2728	for (i = j = 0; i < HASHSIZE; i++) {
2729	for (p = variables[i]; p != NULL; p = p->hnext) {
2730	if (p->hvalue->type == Node_func) {
2731	tab[j].name = p->hname;
2732	tab[j].nlen = p->hlength;
2733	tab[j].func = p->hvalue;
2734	j++;
2735	}
2736	}
2737	}
2738
2739
2740	/* Shazzam! */
2741	qsort(tab, j, sizeof(struct finfo), fcompare);
2742
2743	for (i = 0; i < j; i++)
2744	pp_func(tab[i].name, tab[i].nlen, tab[i].func);
2745
2746	free(tab);
2747	}
2748
2749	/* shadow_funcs --- check all functions for parameters that shadow globals */
2750
2751	void
2752	shadow_funcs()
2753	{
2754	int i, j;
2755	NODE *p;
2756	struct finfo *tab;
2757	static int calls = 0;
2758	int shadow = FALSE;
2759
2760	if (func_count == 0)
2761	return;
2762
2763	if (calls++ != 0)
2764	fatal(_("shadow_funcs() called twice!"));
2765
2766	emalloc(tab, struct finfo , func_count sizeof(struct finfo), "shadow_funcs");
2767
2768	for (i = j = 0; i < HASHSIZE; i++) {
2769	for (p = variables[i]; p != NULL; p = p->hnext) {
2770	if (p->hvalue->type == Node_func) {
2771	tab[j].name = p->hname;
2772	tab[j].nlen = p->hlength;
2773	tab[j].func = p->hvalue;
2774	j++;
2775	}
2776	}
2777	}
2778
2779	assert(j == func_count);
2780
2781	/* Shazzam! */
2782	qsort(tab, func_count, sizeof(struct finfo), fcompare);
2783
2784	for (i = 0; i < j; i++)
2785	shadow \|= parms_shadow(tab[i].name, tab[i].func);
2786
2787	free(tab);
2788
2789	/* End with fatal if the user requested it. */
2790	if (shadow && lintfunc != warning)
2791	lintwarn(_("there were shadowed variables."));
2792	}
2793
2794	/*
2795	* append_right:
2796	* Add new to the rightmost branch of LIST. This uses n^2 time, so we make
2797	* a simple attempt at optimizing it.
2798	*/
2799
2800	static NODE *
2801	append_right(NODE list, NODE new)
2802	{
2803	register NODE *oldlist;
2804	static NODE savefront = NULL, savetail = NULL;
2805
2806	if (list == NULL \|\| new == NULL)
2807	return list;
2808
2809	oldlist = list;
2810	if (savefront == oldlist)
2811	list = savetail; /* Be careful: maybe list->rnode != NULL */
2812	else
2813	savefront = oldlist;
2814
2815	while (list->rnode != NULL)
2816	list = list->rnode;
2817	savetail = list->rnode = new;
2818	return oldlist;
2819	}
2820
2821	/*
2822	* append_pattern:
2823	* A wrapper around append_right, used for rule lists.
2824	*/
2825	static inline NODE *
2826	append_pattern(NODE *list, NODE patt)
2827	{
2828	NODE n = node(patt, Node_rule_node, (NODE ) NULL);
2829
2830	if (*list == NULL)
2831	*list = n;
2832	else {
2833	NODE n1 = node(n, Node_rule_list, (NODE ) NULL);
2834	if ((*list)->type != Node_rule_list)
2835	list = node(list, Node_rule_list, n1);
2836	else
2837	(void) append_right(*list, n1);
2838	}
2839	return n;
2840	}
2841
2842	/*
2843	* func_install:
2844	* check if name is already installed; if so, it had better have Null value,
2845	* in which case def is added as the value. Otherwise, install name with def
2846	* as value.
2847	*
2848	* Extra work, build up and save a list of the parameter names in a table
2849	* and hang it off params->parmlist. This is used to set the `vname' field
2850	* of each function parameter during a function call. See eval.c.
2851	*/
2852
2853	static void
2854	func_install(NODE params, NODE def)
2855	{
2856	NODE r, n, *thisfunc;
2857	char *pnames, names, *sp;
2858	size_t pcount = 0, space = 0;
2859	int i;
2860
2861	/* check for function foo(foo) { ... }. bleah. */
2862	for (n = params->rnode; n != NULL; n = n->rnode) {
2863	if (strcmp(n->param, params->param) == 0)
2864	fatal(_("function `%s': can't use function name as parameter name"),
2865	params->param);
2866	}
2867
2868	thisfunc = NULL; /* turn off warnings */
2869
2870	/* symbol table managment */
2871	pop_var(params, FALSE);
2872	r = lookup(params->param);
2873	if (r != NULL) {
2874	fatal(_("function name `%s' previously defined"), params->param);
2875	} else if (params->param == builtin_func) /* not a valid function name */
2876	goto remove_params;
2877
2878	/* install the function */
2879	thisfunc = node(params, Node_func, def);
2880	(void) install(params->param, thisfunc);
2881
2882	/* figure out amount of space to allocate for variable names */
2883	for (n = params->rnode; n != NULL; n = n->rnode) {
2884	pcount++;
2885	space += strlen(n->param) + 1;
2886	}
2887
2888	/* allocate it and fill it in */
2889	if (pcount != 0) {
2890	emalloc(names, char *, space, "func_install");
2891	emalloc(pnames, char *, pcount sizeof(char *), "func_install");
2892	sp = names;
2893	for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) {
2894	pnames[i] = sp;
2895	strcpy(sp, n->param);
2896	sp += strlen(n->param) + 1;
2897	}
2898	thisfunc->parmlist = pnames;
2899	} else {
2900	thisfunc->parmlist = NULL;
2901	}
2902
2903	/* update lint table info */
2904	func_use(params->param, FUNC_DEFINE);
2905
2906	func_count++; /* used by profiling / pretty printer */
2907
2908	remove_params:
2909	/* remove params from symbol table */
2910	pop_params(params->rnode);
2911	}
2912
2913	/* pop_var --- remove a variable from the symbol table */
2914
2915	static void
2916	pop_var(NODE *np, int freeit)
2917	{
2918	register NODE bucket, *save;
2919	register size_t len;
2920	char *name;
2921
2922	name = np->param;
2923	len = strlen(name);
2924	save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
2925	for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
2926	if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
2927	var_count--;
2928	*save = bucket->hnext;
2929	freenode(bucket);
2930	if (freeit)
2931	free(np->param);
2932	return;
2933	}
2934	save = &(bucket->hnext);
2935	}
2936	}
2937
2938	/* pop_params --- remove list of function parameters from symbol table */
2939
2940	/*
2941	* pop parameters out of the symbol table. do this in reverse order to
2942	* avoid reading freed memory if there were duplicated parameters.
2943	*/
2944	static void
2945	pop_params(NODE *params)
2946	{
2947	if (params == NULL)
2948	return;
2949	pop_params(params->rnode);
2950	pop_var(params, TRUE);
2951	}
2952
2953	/* make_param --- make NAME into a function parameter */
2954
2955	static NODE *
2956	make_param(char *name)
2957	{
2958	NODE *r;
2959
2960	getnode(r);
2961	r->type = Node_param_list;
2962	r->rnode = NULL;
2963	r->param = name;
2964	r->param_cnt = param_counter++;
2965	return (install(name, r));
2966	}
2967
2968	static struct fdesc {
2969	char *name;
2970	short used;
2971	short defined;
2972	struct fdesc *next;
2973	} *ftable[HASHSIZE];
2974
2975	/* func_use --- track uses and definitions of functions */
2976
2977	static void
2978	func_use(const char *name, enum defref how)
2979	{
2980	struct fdesc *fp;
2981	int len;
2982	int ind;
2983
2984	len = strlen(name);
2985	ind = hash(name, len, HASHSIZE);
2986
2987	for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
2988	if (strcmp(fp->name, name) == 0) {
2989	if (how == FUNC_DEFINE)
2990	fp->defined++;
2991	else
2992	fp->used++;
2993	return;
2994	}
2995	}
2996
2997	/* not in the table, fall through to allocate a new one */
2998
2999	emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
3000	memset(fp, '\0', sizeof(struct fdesc));
3001	emalloc(fp->name, char *, len + 1, "func_use");
3002	strcpy(fp->name, name);
3003	if (how == FUNC_DEFINE)
3004	fp->defined++;
3005	else
3006	fp->used++;
3007	fp->next = ftable[ind];
3008	ftable[ind] = fp;
3009	}
3010
3011	/* check_funcs --- verify functions that are called but not defined */
3012
3013	static void
3014	check_funcs()
3015	{
3016	struct fdesc fp, next;
3017	int i;
3018
3019	for (i = 0; i < HASHSIZE; i++) {
3020	for (fp = ftable[i]; fp != NULL; fp = fp->next) {
3021	#ifdef REALLYMEAN
3022	/* making this the default breaks old code. sigh. */
3023	if (fp->defined == 0) {
3024	error(
3025	_("function `%s' called but never defined"), fp->name);
3026	errcount++;
3027	}
3028	#else
3029	if (do_lint && fp->defined == 0)
3030	lintwarn(
3031	_("function `%s' called but never defined"), fp->name);
3032	#endif
3033	if (do_lint && fp->used == 0) {
3034	lintwarn(_("function `%s' defined but never called"),
3035	fp->name);
3036	}
3037	}
3038	}
3039
3040	/* now let's free all the memory */
3041	for (i = 0; i < HASHSIZE; i++) {
3042	for (fp = ftable[i]; fp != NULL; fp = next) {
3043	next = fp->next;
3044	free(fp->name);
3045	free(fp);
3046	}
3047	}
3048	}
3049
3050	/* param_sanity --- look for parameters that are regexp constants */
3051
3052	static void
3053	param_sanity(NODE *arglist)
3054	{
3055	NODE argp, arg;
3056	int i;
3057
3058	for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
3059	arg = argp->lnode;
3060	if (arg->type == Node_regex)
3061	warning(_("regexp constant for parameter #%d yields boolean value"), i);
3062	}
3063	}
3064
3065	/* deferred varibles --- those that are only defined if needed. */
3066
3067	/*
3068	* Is there any reason to use a hash table for deferred variables? At the
3069	* moment, there are only 1 to 3 such variables, so it may not be worth
3070	* the overhead. If more modules start using this facility, it should
3071	* probably be converted into a hash table.
3072	*/
3073
3074	static struct deferred_variable {
3075	NODE (load_func)(void);
3076	struct deferred_variable *next;
3077	char name[1]; /* variable-length array */
3078	} *deferred_variables;
3079
3080	/* register_deferred_variable --- add a var name and loading function to the list */
3081
3082	void
3083	register_deferred_variable(const char name, NODE (*load_func)(void))
3084	{
3085	struct deferred_variable *dv;
3086	size_t sl = strlen(name);
3087
3088	emalloc(dv, struct deferred_variable , sizeof(dv)+sl,
3089	"register_deferred_variable");
3090	dv->load_func = load_func;
3091	dv->next = deferred_variables;
3092	memcpy(dv->name, name, sl+1);
3093	deferred_variables = dv;
3094	}
3095
3096	/* variable --- make sure NAME is in the symbol table */
3097
3098	NODE *
3099	variable(char *name, int can_free, NODETYPE type)
3100	{
3101	register NODE *r;
3102
3103	if ((r = lookup(name)) != NULL) {
3104	if (r->type == Node_func)
3105	fatal(_("function `%s' called with space between name and `(',\nor used as a variable or an array"),
3106	r->vname);
3107
3108	} else {
3109	/* not found */
3110	struct deferred_variable *dv;
3111
3112	for (dv = deferred_variables; TRUE; dv = dv->next) {
3113	if (dv == NULL) {
3114	/*
3115	* This is the only case in which we may not
3116	* free the string.
3117	*/
3118	NODE *n;
3119
3120	if (type == Node_var_array)
3121	n = node(NULL, type, NULL);
3122	else
3123	n = node(Nnull_string, type, NULL);
3124
3125	return install(name, n);
3126	}
3127	if (STREQ(name, dv->name)) {
3128	r = (*dv->load_func)();
3129	break;
3130	}
3131	}
3132	}
3133	if (can_free)
3134	free(name);
3135	return r;
3136	}
3137
3138	/* mk_rexp --- make a regular expression constant */
3139
3140	static NODE *
3141	mk_rexp(NODE *exp)
3142	{
3143	NODE *n;
3144
3145	if (exp->type == Node_regex)
3146	return exp;
3147
3148	getnode(n);
3149	n->type = Node_dynregex;
3150	n->re_exp = exp;
3151	n->re_text = NULL;
3152	n->re_reg = NULL;
3153	n->re_flags = 0;
3154	n->re_cnt = 1;
3155	return n;
3156	}
3157
3158	/* isnoeffect --- when used as a statement, has no side effects */
3159
3160	/*
3161	* To be completely general, we should recursively walk the parse
3162	* tree, to make sure that all the subexpressions also have no effect.
3163	* Instead, we just weaken the actual warning that's printed, up above
3164	* in the grammar.
3165	*/
3166
3167	static int
3168	isnoeffect(NODETYPE type)
3169	{
3170	switch (type) {
3171	case Node_times:
3172	case Node_quotient:
3173	case Node_mod:
3174	case Node_plus:
3175	case Node_minus:
3176	case Node_subscript:
3177	case Node_concat:
3178	case Node_exp:
3179	case Node_unary_minus:
3180	case Node_field_spec:
3181	case Node_and:
3182	case Node_or:
3183	case Node_equal:
3184	case Node_notequal:
3185	case Node_less:
3186	case Node_greater:
3187	case Node_leq:
3188	case Node_geq:
3189	case Node_match:
3190	case Node_nomatch:
3191	case Node_not:
3192	case Node_val:
3193	case Node_in_array:
3194	case Node_NF:
3195	case Node_NR:
3196	case Node_FNR:
3197	case Node_FS:
3198	case Node_RS:
3199	case Node_FIELDWIDTHS:
3200	case Node_IGNORECASE:
3201	case Node_OFS:
3202	case Node_ORS:
3203	case Node_OFMT:
3204	case Node_CONVFMT:
3205	case Node_BINMODE:
3206	case Node_LINT:
3207	case Node_SUBSEP:
3208	case Node_TEXTDOMAIN:
3209	return TRUE;
3210	default:
3211	break; /* keeps gcc -Wall happy */
3212	}
3213
3214	return FALSE;
3215	}
3216
3217	/* isassignable --- can this node be assigned to? */
3218
3219	static int
3220	isassignable(register NODE *n)
3221	{
3222	switch (n->type) {
3223	case Node_var_new:
3224	case Node_var:
3225	case Node_FIELDWIDTHS:
3226	case Node_RS:
3227	case Node_FS:
3228	case Node_FNR:
3229	case Node_NR:
3230	case Node_NF:
3231	case Node_IGNORECASE:
3232	case Node_OFMT:
3233	case Node_CONVFMT:
3234	case Node_ORS:
3235	case Node_OFS:
3236	case Node_LINT:
3237	case Node_BINMODE:
3238	case Node_SUBSEP:
3239	case Node_TEXTDOMAIN:
3240	case Node_field_spec:
3241	case Node_subscript:
3242	return TRUE;
3243	case Node_param_list:
3244	return ((n->flags & FUNC) == 0); /* ok if not func name */
3245	default:
3246	break; /* keeps gcc -Wall happy */
3247	}
3248	return FALSE;
3249	}
3250
3251	/* stopme --- for debugging */
3252
3253	NODE *
3254	stopme(NODE *tree ATTRIBUTE_UNUSED)
3255	{
3256	return (NODE *) 0;
3257	}
3258
3259	/* dumpintlstr --- write out an initial .po file entry for the string */
3260
3261	static void
3262	dumpintlstr(const char *str, size_t len)
3263	{
3264	char *cp;
3265
3266	/* See the GNU gettext distribution for details on the file format */
3267
3268	if (source != NULL) {
3269	/* ala the gettext sources, remove leading `./'s */
3270	for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
3271	continue;
3272	printf("#: %s:%d\n", cp, sourceline);
3273	}
3274
3275	printf("msgid ");
3276	pp_string_fp(stdout, str, len, '"', TRUE);
3277	putchar('\n');
3278	printf("msgstr \"\"\n\n");
3279	fflush(stdout);
3280	}
3281
3282	/* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
3283
3284	static void
3285	dumpintlstr2(const char str1, size_t len1, const char str2, size_t len2)
3286	{
3287	char *cp;
3288
3289	/* See the GNU gettext distribution for details on the file format */
3290
3291	if (source != NULL) {
3292	/* ala the gettext sources, remove leading `./'s */
3293	for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
3294	continue;
3295	printf("#: %s:%d\n", cp, sourceline);
3296	}
3297
3298	printf("msgid ");
3299	pp_string_fp(stdout, str1, len1, '"', TRUE);
3300	putchar('\n');
3301	printf("msgid_plural ");
3302	pp_string_fp(stdout, str2, len2, '"', TRUE);
3303	putchar('\n');
3304	printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
3305	fflush(stdout);
3306	}
3307
3308	/* count_args --- count the number of printf arguments */
3309
3310	static void
3311	count_args(NODE *tree)
3312	{
3313	size_t count = 0;
3314	NODE *save_tree;
3315
3316	assert(tree->type == Node_K_printf
3317	\|\| (tree->type == Node_builtin && tree->builtin == do_sprintf));
3318	save_tree = tree;
3319
3320	tree = tree->lnode; /* printf format string */
3321
3322	for (count = 0; tree != NULL; tree = tree->rnode)
3323	count++;
3324
3325	save_tree->printf_count = count;
3326	}
3327
3328	/* isarray --- can this type be subscripted? */
3329
3330	static int
3331	isarray(NODE *n)
3332	{
3333	switch (n->type) {
3334	case Node_var_new:
3335	case Node_var_array:
3336	return TRUE;
3337	case Node_param_list:
3338	return (n->flags & FUNC) == 0;
3339	case Node_array_ref:
3340	cant_happen();
3341	break;
3342	default:
3343	break; /* keeps gcc -Wall happy */
3344	}
3345
3346	return FALSE;
3347	}
3348
3349	/* See if name is a special token. */
3350
3351	int
3352	check_special(const char *name)
3353	{
3354	int low, high, mid;
3355	int i;
3356
3357	low = 0;
3358	high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
3359	while (low <= high) {
3360	mid = (low + high) / 2;
3361	i = *name - tokentab[mid].operator[0];
3362	if (i == 0)
3363	i = strcmp(name, tokentab[mid].operator);
3364
3365	if (i < 0) /* token < mid */
3366	high = mid - 1;
3367	else if (i > 0) /* token > mid */
3368	low = mid + 1;
3369	else
3370	return mid;
3371	}
3372	return -1;
3373	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format