Context Navigation

awkgram.y

Visit:

Last change on this file was 3076, checked in by bird, 18 years ago
gawk 3.1.5
File size: 78.2 KB

Rev	Line
[3076]	1	/*
	2	* awkgram.y --- yacc/bison parser
	3	*/
	4
	5	/*
	6	* Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
	7	*
	8	* This file is part of GAWK, the GNU implementation of the
	9	* AWK Programming Language.
	10	*
	11	* GAWK is free software; you can redistribute it and/or modify
	12	* it under the terms of the GNU General Public License as published by
	13	* the Free Software Foundation; either version 2 of the License, or
	14	* (at your option) any later version.
	15	*
	16	* GAWK is distributed in the hope that it will be useful,
	17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	* GNU General Public License for more details.
	20	*
	21	* You should have received a copy of the GNU General Public License
	22	* along with this program; if not, write to the Free Software
	23	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
	24	*/
	25
	26	%{
	27	#ifdef GAWKDEBUG
	28	#define YYDEBUG 12
	29	#endif
	30
	31	#include "awk.h"
	32
	33	#define CAN_FREE TRUE
	34	#define DONT_FREE FALSE
	35
	36	#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
	37	static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
	38	#else
	39	static void yyerror(); /* va_alist */
	40	#endif
	41	static char *get_src_buf P((void));
	42	static int yylex P((void));
	43	static NODE *node_common P((NODETYPE op));
	44	static NODE snode P((NODE subn, NODETYPE op, int sindex));
	45	static NODE make_for_loop P((NODE init, NODE cond, NODE incr));
	46	static NODE append_right P((NODE list, NODE *new));
	47	static inline NODE append_pattern P((NODE list, NODE patt));
	48	static void func_install P((NODE params, NODE def));
	49	static void pop_var P((NODE *np, int freeit));
	50	static void pop_params P((NODE *params));
	51	static NODE make_param P((char name));
	52	static NODE mk_rexp P((NODE exp));
	53	static int dup_parms P((NODE *func));
	54	static void param_sanity P((NODE *arglist));
	55	static int parms_shadow P((const char fname, NODE func));
	56	static int isnoeffect P((NODETYPE t));
	57	static int isassignable P((NODE *n));
	58	static void dumpintlstr P((const char *str, size_t len));
	59	static void dumpintlstr2 P((const char str1, size_t len1, const char str2, size_t len2));
	60	static void count_args P((NODE *n));
	61	static int isarray P((NODE *n));
	62
	63	enum defref { FUNC_DEFINE, FUNC_USE };
	64	static void func_use P((const char *name, enum defref how));
	65	static void check_funcs P((void));
	66
	67	static int want_regexp; /* lexical scanning kludge */
	68	static int can_return; /* parsing kludge */
	69	static int begin_or_end_rule = FALSE; /* parsing kludge */
	70	static int parsing_end_rule = FALSE; /* for warnings */
	71	static int in_print = FALSE; /* lexical scanning kludge for print */
	72	static int in_parens = 0; /* lexical scanning kludge for print */
	73	static char lexptr; / pointer to next char during parsing */
	74	static char *lexend;
	75	static char lexptr_begin; / keep track of where we were for error msgs */
	76	static char lexeme; / beginning of lexeme for debugging */
	77	static char *thisline = NULL;
	78	#define YYDEBUG_LEXER_TEXT (lexeme)
	79	static int param_counter;
	80	static char *tokstart = NULL;
	81	static char *tok = NULL;
	82	static char *tokend;
	83
	84	static long func_count; /* total number of functions */
	85
	86	#define HASHSIZE 1021 /* this constant only used here */
	87	NODE *variables[HASHSIZE];
	88	static int var_count; /* total number of global variables */
	89
	90	extern char *source;
	91	extern int sourceline;
	92	extern struct src *srcfiles;
	93	extern long numfiles;
	94	extern int errcount;
	95	extern NODE *begin_block;
	96	extern NODE *end_block;
	97
	98	/*
	99	* This string cannot occur as a real awk identifier.
	100	* Use it as a special token to make function parsing
	101	* uniform, but if it's seen, don't install the function.
	102	* e.g.
	103	* function split(x) { return x }
	104	* function x(a) { return a }
	105	* should only produce one error message, and not core dump.
	106	*/
	107	static char builtin_func[] = "@builtin";
	108	%}
	109
	110	%union {
	111	long lval;
	112	AWKNUM fval;
	113	NODE *nodeval;
	114	NODETYPE nodetypeval;
	115	char *sval;
	116	NODE (ptrval) P((void));
	117	}
	118
	119	%type <nodeval> function_prologue pattern action variable param_list
	120	%type <nodeval> exp common_exp
	121	%type <nodeval> simp_exp non_post_simp_exp
	122	%type <nodeval> expression_list opt_expression_list print_expression_list
	123	%type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list
	124	%type <nodeval> simple_stmt opt_simple_stmt
	125	%type <nodeval> opt_exp opt_variable regexp
	126	%type <nodeval> input_redir output_redir
	127	%type <nodetypeval> print
	128	%type <nodetypeval> assign_operator a_relop relop_or_less
	129	%type <sval> func_name
	130	%type <lval> lex_builtin
	131
	132	%token <sval> FUNC_CALL NAME REGEXP
	133	%token <lval> ERROR
	134	%token <nodeval> YNUMBER YSTRING
	135	%token <nodetypeval> RELOP IO_OUT IO_IN
	136	%token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP
	137	%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
	138	%token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
	139	%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
	140	%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
	141	%token <nodetypeval> LEX_IN
	142	%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
	143	%token <lval> LEX_BUILTIN LEX_LENGTH
	144	%token NEWLINE
	145
	146	/* these are just yylval numbers */
	147
	148	/* Lowest to highest */
	149	%right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
	150	%right '?' ':'
	151	%left LEX_OR
	152	%left LEX_AND
	153	%left LEX_GETLINE
	154	%nonassoc LEX_IN
	155	%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
	156	%nonassoc ','
	157	%nonassoc MATCHOP
	158	%nonassoc RELOP '<' '>' IO_IN IO_OUT
	159	%left CONCAT_OP
	160	%left YSTRING YNUMBER
	161	%left '+' '-'
	162	%left '*' '/' '%'
	163	%right '!' UNARY
	164	%right '^'
	165	%left INCREMENT DECREMENT
	166	%left '$'
	167	%left '(' ')'
	168	%%
	169
	170	start
	171	: opt_nls program opt_nls
	172	{
	173	check_funcs();
	174	}
	175	;
	176
	177	program
	178	: /* empty */
	179	\| program rule
	180	{
	181	begin_or_end_rule = parsing_end_rule = FALSE;
	182	yyerrok;
	183	}
	184	\| program error
	185	{
	186	begin_or_end_rule = parsing_end_rule = FALSE;
	187	/*
	188	* If errors, give up, don't produce an infinite
	189	* stream of syntax error messages.
	190	*/
	191	/* yyerrok; */
	192	}
	193	;
	194
	195	rule
	196	: pattern action
	197	{
	198	$1->rnode = $2;
	199	}
	200	\| pattern statement_term
	201	{
	202	if ($1->lnode != NULL) {
	203	/* pattern rule with non-empty pattern */
	204	$1->rnode = node(NULL, Node_K_print_rec, NULL);
	205	} else {
	206	/* an error */
	207	if (begin_or_end_rule)
	208	msg(_("%s blocks must have an action part"),
	209	(parsing_end_rule ? "END" : "BEGIN"));
	210	else
	211	msg(_("each rule must have a pattern or an action part"));
	212	errcount++;
	213	}
	214	}
	215	\| function_prologue action
	216	{
	217	can_return = FALSE;
	218	if ($1)
	219	func_install($1, $2);
	220	yyerrok;
	221	}
	222	;
	223
	224	pattern
	225	: /* empty */
	226	{
	227	$$ = append_pattern(&expression_value, (NODE *) NULL);
	228	}
	229	\| exp
	230	{
	231	$$ = append_pattern(&expression_value, $1);
	232	}
	233	\| exp ',' exp
	234	{
	235	NODE *r;
	236
	237	getnode(r);
	238	r->type = Node_line_range;
	239	r->condpair = node($1, Node_cond_pair, $3);
	240	r->triggered = FALSE;
	241	$$ = append_pattern(&expression_value, r);
	242	}
	243	\| LEX_BEGIN
	244	{
	245	begin_or_end_rule = TRUE;
	246	$$ = append_pattern(&begin_block, (NODE *) NULL);
	247	}
	248	\| LEX_END
	249	{
	250	begin_or_end_rule = parsing_end_rule = TRUE;
	251	$$ = append_pattern(&end_block, (NODE *) NULL);
	252	}
	253	;
	254
	255	action
	256	: l_brace statements r_brace opt_semi opt_nls
	257	{ $$ = $2; }
	258	;
	259
	260	func_name
	261	: NAME
	262	{ $$ = $1; }
	263	\| FUNC_CALL
	264	{ $$ = $1; }
	265	\| lex_builtin
	266	{
	267	yyerror(_("`%s' is a built-in function, it cannot be redefined"),
	268	tokstart);
	269	errcount++;
	270	$$ = builtin_func;
	271	/* yyerrok; */
	272	}
	273	;
	274
	275	lex_builtin
	276	: LEX_BUILTIN
	277	\| LEX_LENGTH
	278	;
	279
	280	function_prologue
	281	: LEX_FUNCTION
	282	{
	283	param_counter = 0;
	284	}
	285	func_name '(' opt_param_list r_paren opt_nls
	286	{
	287	NODE *t;
	288
	289	t = make_param($3);
	290	t->flags \|= FUNC;
	291	$$ = append_right(t, $5);
	292	can_return = TRUE;
	293	/* check for duplicate parameter names */
	294	if (dup_parms($$))
	295	errcount++;
	296	}
	297	;
	298
	299	regexp
	300	/*
	301	* In this rule, want_regexp tells yylex that the next thing
	302	* is a regexp so it should read up to the closing slash.
	303	*/
	304	: a_slash
	305	{ ++want_regexp; }
	306	REGEXP /* The terminating '/' is consumed by yylex(). */
	307	{
	308	NODE *n;
	309	size_t len = strlen($3);
	310
	311	if (do_lint) {
	312	if (len == 0)
	313	lintwarn(_("regexp constant `//' looks like a C++ comment, but is not"));
	314	else if (($3)[0] == '' && ($3)[len-1] == '')
	315	/* possible C comment */
	316	lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart);
	317	}
	318	getnode(n);
	319	n->type = Node_regex;
	320	n->re_exp = make_string($3, len);
	321	n->re_reg = make_regexp($3, len, FALSE, TRUE);
	322	n->re_text = NULL;
	323	n->re_flags = CONST;
	324	n->re_cnt = 1;
	325	$$ = n;
	326	}
	327	;
	328
	329	a_slash
	330	: '/'
	331	\| SLASH_BEFORE_EQUAL
	332	;
	333
	334	statements
	335	: /* empty */
	336	{ $$ = NULL; }
	337	\| statements statement
	338	{
	339	if ($2 == NULL)
	340	$$ = $1;
	341	else {
	342	if (do_lint && isnoeffect($2->type))
	343	lintwarn(_("statement may have no effect"));
	344	if ($1 == NULL)
	345	$$ = $2;
	346	else
	347	$$ = append_right(
	348	($1->type == Node_statement_list ? $1
	349	: node($1, Node_statement_list, (NODE *) NULL)),
	350	($2->type == Node_statement_list ? $2
	351	: node($2, Node_statement_list, (NODE *) NULL)));
	352	}
	353	yyerrok;
	354	}
	355	\| statements error
	356	{ $$ = NULL; }
	357	;
	358
	359	statement_term
	360	: nls
	361	\| semi opt_nls
	362	;
	363
	364	statement
	365	: semi opt_nls
	366	{ $$ = NULL; }
	367	\| l_brace statements r_brace
	368	{ $$ = $2; }
	369	\| if_statement
	370	{ $$ = $1; }
	371	\| LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace
	372	{ $$ = node($3, Node_K_switch, $7); }
	373	\| LEX_WHILE '(' exp r_paren opt_nls statement
	374	{ $$ = node($3, Node_K_while, $6); }
	375	\| LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
	376	{ $$ = node($6, Node_K_do, $3); }
	377	\| LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
	378	{
	379	/*
	380	* Efficiency hack. Recognize the special case of
	381	*
	382	* for (iggy in foo)
	383	* delete foo[iggy]
	384	*
	385	* and treat it as if it were
	386	*
	387	* delete foo
	388	*
	389	* Check that the body is a `delete a[i]' statement,
	390	* and that both the loop var and array names match.
	391	*/
	392	if ($8 != NULL && $8->type == Node_K_delete && $8->rnode != NULL) {
	393	NODE arr, sub;
	394
	395	assert($8->rnode->type == Node_expression_list);
	396	arr = $8->lnode; /* array var */
	397	sub = $8->rnode->lnode; /* index var */
	398
	399	if ( (arr->type == Node_var_new
	400	\|\| arr->type == Node_var_array
	401	\|\| arr->type == Node_param_list)
	402	&& (sub->type == Node_var_new
	403	\|\| sub->type == Node_var
	404	\|\| sub->type == Node_param_list)
	405	&& strcmp($3, sub->vname) == 0
	406	&& strcmp($5, arr->vname) == 0) {
	407	$8->type = Node_K_delete_loop;
	408	$$ = $8;
	409	free($3); /* thanks to valgrind for pointing these out */
	410	free($5);
	411	}
	412	else
	413	goto regular_loop;
	414	} else {
	415	regular_loop:
	416	$$ = node($8, Node_K_arrayfor,
	417	make_for_loop(variable($3, CAN_FREE, Node_var),
	418	(NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
	419	}
	420	}
	421	\| LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
	422	{
	423	$$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9));
	424	}
	425	\| LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
	426	{
	427	$$ = node($11, Node_K_for,
	428	(NODE ) make_for_loop($3, (NODE ) NULL, $8));
	429	}
	430	\| LEX_BREAK statement_term
	431	/* for break, maybe we'll have to remember where to break to */
	432	{ $$ = node((NODE ) NULL, Node_K_break, (NODE ) NULL); }
	433	\| LEX_CONTINUE statement_term
	434	/* similarly */
	435	{ $$ = node((NODE ) NULL, Node_K_continue, (NODE ) NULL); }
	436	\| LEX_NEXT statement_term
	437	{ NODETYPE type;
	438
	439	if (begin_or_end_rule)
	440	yyerror(_("`%s' used in %s action"), "next",
	441	(parsing_end_rule ? "END" : "BEGIN"));
	442	type = Node_K_next;
	443	$$ = node((NODE ) NULL, type, (NODE ) NULL);
	444	}
	445	\| LEX_NEXTFILE statement_term
	446	{
	447	if (do_traditional) {
	448	/*
	449	* can't use yyerror, since may have overshot
	450	* the source line
	451	*/
	452	errcount++;
	453	error(_("`nextfile' is a gawk extension"));
	454	}
	455	if (do_lint)
	456	lintwarn(_("`nextfile' is a gawk extension"));
	457	if (begin_or_end_rule) {
	458	/* same thing */
	459	errcount++;
	460	error(_("`%s' used in %s action"), "nextfile",
	461	(parsing_end_rule ? "END" : "BEGIN"));
	462	}
	463	$$ = node((NODE ) NULL, Node_K_nextfile, (NODE ) NULL);
	464	}
	465	\| LEX_EXIT opt_exp statement_term
	466	{ $$ = node($2, Node_K_exit, (NODE *) NULL); }
	467	\| LEX_RETURN
	468	{
	469	if (! can_return)
	470	yyerror(_("`return' used outside function context"));
	471	}
	472	opt_exp statement_term
	473	{
	474	$$ = node($3 == NULL ? Nnull_string : $3,
	475	Node_K_return, (NODE *) NULL);
	476	}
	477	\| simple_stmt statement_term
	478	;
	479
	480	/*
	481	* A simple_stmt exists to satisfy a constraint in the POSIX
	482	* grammar allowing them to occur as the 1st and 3rd parts
	483	* in a `for (...;...;...)' loop. This is a historical oddity
	484	* inherited from Unix awk, not at all documented in the AK&W
	485	* awk book. We support it, as this was reported as a bug.
	486	* We don't bother to document it though. So there.
	487	*/
	488	simple_stmt
	489	: print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir
	490	{
	491	/*
	492	* Optimization: plain `print' has no expression list, so $3 is null.
	493	* If $3 is an expression list with one element (rnode == null)
	494	* and lnode is a field spec for field 0, we have `print $0'.
	495	* For both, use Node_K_print_rec, which is faster for these two cases.
	496	*/
	497	if ($1 == Node_K_print &&
	498	($3 == NULL
	499	\|\| ($3->type == Node_expression_list
	500	&& $3->rnode == NULL
	501	&& $3->lnode->type == Node_field_spec
	502	&& $3->lnode->lnode->type == Node_val
	503	&& $3->lnode->lnode->numbr == 0.0))
	504	) {
	505	static int warned = FALSE;
	506
	507	$$ = node(NULL, Node_K_print_rec, $4);
	508
	509	if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) {
	510	warned = TRUE;
	511	lintwarn(
	512	_("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
	513	}
	514	} else {
	515	$$ = node($3, $1, $4);
	516	if ($$->type == Node_K_printf)
	517	count_args($$);
	518	}
	519	}
	520	\| LEX_DELETE NAME '[' expression_list ']'
	521	{ $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
	522	\| LEX_DELETE NAME
	523	{
	524	if (do_lint)
	525	lintwarn(_("`delete array' is a gawk extension"));
	526	if (do_traditional) {
	527	/*
	528	* can't use yyerror, since may have overshot
	529	* the source line
	530	*/
	531	errcount++;
	532	error(_("`delete array' is a gawk extension"));
	533	}
	534	$$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
	535	}
	536	\| LEX_DELETE '(' NAME ')'
	537	{
	538	/* this is for tawk compatibility. maybe the warnings should always be done. */
	539	if (do_lint)
	540	lintwarn(_("`delete(array)' is a non-portable tawk extension"));
	541	if (do_traditional) {
	542	/*
	543	* can't use yyerror, since may have overshot
	544	* the source line
	545	*/
	546	errcount++;
	547	error(_("`delete(array)' is a non-portable tawk extension"));
	548	}
	549	$$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
	550	}
	551	\| exp
	552	{ $$ = $1; }
	553	;
	554
	555	opt_simple_stmt
	556	: /* empty */
	557	{ $$ = NULL; }
	558	\| simple_stmt
	559	{ $$ = $1; }
	560	;
	561
	562	switch_body
	563	: case_statements
	564	{
	565	if ($1 == NULL) {
	566	$$ = NULL;
	567	} else {
	568	NODE *dflt = NULL;
	569	NODE *head = $1;
	570	NODE *curr;
	571
	572	const char **case_values = NULL;
	573
	574	int maxcount = 128;
	575	int case_count = 0;
	576	int i;
	577
	578	emalloc(case_values, const char *, sizeof(char) * maxcount, "switch_body");
	579	for (curr = $1; curr != NULL; curr = curr->rnode) {
	580	/* Assure that case statement values are unique. */
	581	if (curr->lnode->type == Node_K_case) {
	582	char *caseval;
	583
	584	if (curr->lnode->lnode->type == Node_regex)
	585	caseval = curr->lnode->lnode->re_exp->stptr;
	586	else
	587	caseval = force_string(tree_eval(curr->lnode->lnode))->stptr;
	588
	589	for (i = 0; i < case_count; i++)
	590	if (strcmp(caseval, case_values[i]) == 0)
	591	yyerror(_("duplicate case values in switch body: %s"), caseval);
	592
	593	if (case_count >= maxcount) {
	594	maxcount += 128;
	595	erealloc(case_values, const char *, sizeof(char) * maxcount, "switch_body");
	596	}
	597	case_values[case_count++] = caseval;
	598	} else {
	599	/* Otherwise save a pointer to the default node. */
	600	if (dflt != NULL)
	601	yyerror(_("Duplicate `default' detected in switch body"));
	602	dflt = curr;
	603	}
	604	}
	605
	606	free(case_values);
	607
	608	/* Create the switch body. */
	609	$$ = node(head, Node_switch_body, dflt);
	610	}
	611	}
	612	;
	613
	614	case_statements
	615	: /* empty */
	616	{ $$ = NULL; }
	617	\| case_statements case_statement
	618	{
	619	if ($2 == NULL)
	620	$$ = $1;
	621	else {
	622	if (do_lint && isnoeffect($2->type))
	623	lintwarn(_("statement may have no effect"));
	624	if ($1 == NULL)
	625	$$ = node($2, Node_case_list, (NODE *) NULL);
	626	else
	627	$$ = append_right(
	628	($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)),
	629	($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL))
	630	);
	631	}
	632	yyerrok;
	633	}
	634	\| case_statements error
	635	{ $$ = NULL; }
	636	;
	637
	638	case_statement
	639	: LEX_CASE case_value colon opt_nls statements
	640	{ $$ = node($2, Node_K_case, $5); }
	641	\| LEX_DEFAULT colon opt_nls statements
	642	{ $$ = node((NODE *) NULL, Node_K_default, $4); }
	643	;
	644
	645	case_value
	646	: YNUMBER
	647	{ $$ = $1; }
	648	\| '-' YNUMBER %prec UNARY
	649	{
	650	$2->numbr = -(force_number($2));
	651	$$ = $2;
	652	}
	653	\| '+' YNUMBER %prec UNARY
	654	{ $$ = $2; }
	655	\| YSTRING
	656	{ $$ = $1; }
	657	\| regexp
	658	{ $$ = $1; }
	659	;
	660
	661	print
	662	: LEX_PRINT
	663	\| LEX_PRINTF
	664	;
	665
	666	/*
	667	* Note: ``print(x)'' is already parsed by the first rule,
	668	* so there is no good in covering it by the second one too.
	669	*/
	670	print_expression_list
	671	: opt_expression_list
	672	\| '(' exp comma expression_list r_paren
	673	{ $$ = node($2, Node_expression_list, $4); }
	674	;
	675
	676	output_redir
	677	: /* empty */
	678	{
	679	in_print = FALSE;
	680	in_parens = 0;
	681	$$ = NULL;
	682	}
	683	\| IO_OUT { in_print = FALSE; in_parens = 0; } common_exp
	684	{
	685	$$ = node($3, $1, (NODE *) NULL);
	686	if ($1 == Node_redirect_twoway
	687	&& $3->type == Node_K_getline
	688	&& $3->rnode != NULL
	689	&& $3->rnode->type == Node_redirect_twoway)
	690	yyerror(_("multistage two-way pipelines don't work"));
	691	}
	692	;
	693
	694	if_statement
	695	: LEX_IF '(' exp r_paren opt_nls statement
	696	{
	697	$$ = node($3, Node_K_if,
	698	node($6, Node_if_branches, (NODE *) NULL));
	699	}
	700	\| LEX_IF '(' exp r_paren opt_nls statement
	701	LEX_ELSE opt_nls statement
	702	{ $$ = node($3, Node_K_if,
	703	node($6, Node_if_branches, $9)); }
	704	;
	705
	706	nls
	707	: NEWLINE
	708	\| nls NEWLINE
	709	;
	710
	711	opt_nls
	712	: /* empty */
	713	\| nls
	714	;
	715
	716	input_redir
	717	: /* empty */
	718	{ $$ = NULL; }
	719	\| '<' simp_exp
	720	{ $$ = node($2, Node_redirect_input, (NODE *) NULL); }
	721	;
	722
	723	opt_param_list
	724	: /* empty */
	725	{ $$ = NULL; }
	726	\| param_list
	727	{ $$ = $1; }
	728	;
	729
	730	param_list
	731	: NAME
	732	{ $$ = make_param($1); }
	733	\| param_list comma NAME
	734	{ $$ = append_right($1, make_param($3)); yyerrok; }
	735	\| error
	736	{ $$ = NULL; }
	737	\| param_list error
	738	{ $$ = NULL; }
	739	\| param_list comma error
	740	{ $$ = NULL; }
	741	;
	742
	743	/* optional expression, as in for loop */
	744	opt_exp
	745	: /* empty */
	746	{ $$ = NULL; }
	747	\| exp
	748	{ $$ = $1; }
	749	;
	750
	751	opt_expression_list
	752	: /* empty */
	753	{ $$ = NULL; }
	754	\| expression_list
	755	{ $$ = $1; }
	756	;
	757
	758	expression_list
	759	: exp
	760	{ $$ = node($1, Node_expression_list, (NODE *) NULL); }
	761	\| expression_list comma exp
	762	{
	763	$$ = append_right($1,
	764	node($3, Node_expression_list, (NODE *) NULL));
	765	yyerrok;
	766	}
	767	\| error
	768	{ $$ = NULL; }
	769	\| expression_list error
	770	{ $$ = NULL; }
	771	\| expression_list error exp
	772	{ $$ = NULL; }
	773	\| expression_list comma error
	774	{ $$ = NULL; }
	775	;
	776
	777	/* Expressions, not including the comma operator. */
	778	exp : variable assign_operator exp %prec ASSIGNOP
	779	{
	780	if (do_lint && $3->type == Node_regex)
	781	lintwarn(_("regular expression on right of assignment"));
	782	/*
	783	* Optimization of `x = x y'. Can save lots of time
	784	* if done a lot.
	785	*/
	786	if (( $1->type == Node_var
	787	\|\| $1->type == Node_var_new
	788	\|\| $1->type == Node_param_list)
	789	&& $2 == Node_assign
	790	&& $3->type == Node_concat
	791	&& $3->lnode == $1) {
	792	$3->type = Node_assign_concat; /* Just change the type */
	793	$$ = $3; /* And use it directly */
	794	} else
	795	$$ = node($1, $2, $3);
	796	}
	797	\| exp LEX_AND exp
	798	{ $$ = node($1, Node_and, $3); }
	799	\| exp LEX_OR exp
	800	{ $$ = node($1, Node_or, $3); }
	801	\| exp MATCHOP exp
	802	{
	803	if ($1->type == Node_regex)
	804	warning(_("regular expression on left of `~' or `!~' operator"));
	805	$$ = node($1, $2, mk_rexp($3));
	806	}
	807	\| exp LEX_IN NAME
	808	{ $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
	809	\| exp a_relop exp %prec RELOP
	810	{
	811	if (do_lint && $3->type == Node_regex)
	812	lintwarn(_("regular expression on right of comparison"));
	813	$$ = node($1, $2, $3);
	814	}
	815	\| exp '?' exp ':' exp
	816	{ $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
	817	\| common_exp
	818	{ $$ = $1; }
	819	;
	820
	821	assign_operator
	822	: ASSIGN
	823	{ $$ = $1; }
	824	\| ASSIGNOP
	825	{ $$ = $1; }
	826	\| SLASH_BEFORE_EQUAL ASSIGN /* `/=' */
	827	{ $$ = Node_assign_quotient; }
	828	;
	829
	830	relop_or_less
	831	: RELOP
	832	{ $$ = $1; }
	833	\| '<'
	834	{ $$ = Node_less; }
	835	;
	836	a_relop
	837	: relop_or_less
	838	\| '>'
	839	{ $$ = Node_greater; }
	840	;
	841
	842	common_exp
	843	: regexp
	844	{ $$ = $1; }
	845	\| '!' regexp %prec UNARY
	846	{
	847	$$ = node(node(make_number(0.0),
	848	Node_field_spec,
	849	(NODE *) NULL),
	850	Node_nomatch,
	851	$2);
	852	}
	853	\| '(' expression_list r_paren LEX_IN NAME
	854	{ $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); }
	855	\| simp_exp
	856	{ $$ = $1; }
	857	\| common_exp simp_exp %prec CONCAT_OP
	858	{ $$ = node($1, Node_concat, $2); }
	859	;
	860
	861	simp_exp
	862	: non_post_simp_exp
	863	/* Binary operators in order of decreasing precedence. */
	864	\| simp_exp '^' simp_exp
	865	{ $$ = node($1, Node_exp, $3); }
	866	\| simp_exp '*' simp_exp
	867	{ $$ = node($1, Node_times, $3); }
	868	\| simp_exp '/' simp_exp
	869	{ $$ = node($1, Node_quotient, $3); }
	870	\| simp_exp '%' simp_exp
	871	{ $$ = node($1, Node_mod, $3); }
	872	\| simp_exp '+' simp_exp
	873	{ $$ = node($1, Node_plus, $3); }
	874	\| simp_exp '-' simp_exp
	875	{ $$ = node($1, Node_minus, $3); }
	876	\| LEX_GETLINE opt_variable input_redir
	877	{
	878	if (do_lint && parsing_end_rule && $3 == NULL)
	879	lintwarn(_("non-redirected `getline' undefined inside END action"));
	880	$$ = node($2, Node_K_getline, $3);
	881	}
	882	\| simp_exp IO_IN LEX_GETLINE opt_variable
	883	{
	884	$$ = node($4, Node_K_getline,
	885	node($1, $2, (NODE *) NULL));
	886	}
	887	\| variable INCREMENT
	888	{ $$ = node($1, Node_postincrement, (NODE *) NULL); }
	889	\| variable DECREMENT
	890	{ $$ = node($1, Node_postdecrement, (NODE *) NULL); }
	891	;
	892
	893	non_post_simp_exp
	894	: '!' simp_exp %prec UNARY
	895	{ $$ = node($2, Node_not, (NODE *) NULL); }
	896	\| '(' exp r_paren
	897	{ $$ = $2; }
	898	\| LEX_BUILTIN
	899	'(' opt_expression_list r_paren
	900	{ $$ = snode($3, Node_builtin, (int) $1); }
	901	\| LEX_LENGTH '(' opt_expression_list r_paren
	902	{ $$ = snode($3, Node_builtin, (int) $1); }
	903	\| LEX_LENGTH
	904	{
	905	if (do_lint)
	906	lintwarn(_("call of `length' without parentheses is not portable"));
	907	$$ = snode((NODE *) NULL, Node_builtin, (int) $1);
	908	if (do_posix)
	909	warning(_("call of `length' without parentheses is deprecated by POSIX"));
	910	}
	911	\| FUNC_CALL '(' opt_expression_list r_paren
	912	{
	913	$$ = node($3, Node_func_call, make_string($1, strlen($1)));
	914	$$->funcbody = NULL;
	915	func_use($1, FUNC_USE);
	916	param_sanity($3);
	917	free($1);
	918	}
	919	\| variable
	920	\| INCREMENT variable
	921	{ $$ = node($2, Node_preincrement, (NODE *) NULL); }
	922	\| DECREMENT variable
	923	{ $$ = node($2, Node_predecrement, (NODE *) NULL); }
	924	\| YNUMBER
	925	{ $$ = $1; }
	926	\| YSTRING
	927	{ $$ = $1; }
	928
	929	\| '-' simp_exp %prec UNARY
	930	{
	931	if ($2->type == Node_val && ($2->flags & (STRCUR\|STRING)) == 0) {
	932	$2->numbr = -(force_number($2));
	933	$$ = $2;
	934	} else
	935	$$ = node($2, Node_unary_minus, (NODE *) NULL);
	936	}
	937	\| '+' simp_exp %prec UNARY
	938	{
	939	/*
	940	* was: $$ = $2
	941	* POSIX semantics: force a conversion to numeric type
	942	*/
	943	$$ = node (make_number(0.0), Node_plus, $2);
	944	}
	945	;
	946
	947	opt_variable
	948	: /* empty */
	949	{ $$ = NULL; }
	950	\| variable
	951	{ $$ = $1; }
	952	;
	953
	954	variable
	955	: NAME
	956	{ $$ = variable($1, CAN_FREE, Node_var_new); }
	957	\| NAME '[' expression_list ']'
	958	{
	959	NODE *n;
	960
	961	if ((n = lookup($1)) != NULL && ! isarray(n))
	962	yyerror(_("use of non-array as array"));
	963	else if ($3 == NULL) {
	964	fatal(_("invalid subscript expression"));
	965	} else if ($3->rnode == NULL) {
	966	$$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
	967	freenode($3);
	968	} else
	969	$$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
	970	}
	971	\| '$' non_post_simp_exp
	972	{ $$ = node($2, Node_field_spec, (NODE *) NULL); }
	973	/*
	974	#if 0
	975	\| lex_builtin
	976	{ fatal(_("can't use built-in function `%s' as a variable"), tokstart); }
	977	#endif
	978	*/
	979	;
	980
	981	l_brace
	982	: '{' opt_nls
	983	;
	984
	985	r_brace
	986	: '}' opt_nls { yyerrok; }
	987	;
	988
	989	r_paren
	990	: ')' { yyerrok; }
	991	;
	992
	993	opt_semi
	994	: /* empty */
	995	\| semi
	996	;
	997
	998	semi
	999	: ';' { yyerrok; }
	1000	;
	1001
	1002	colon
	1003	: ':' { yyerrok; }
	1004	;
	1005
	1006	comma : ',' opt_nls { yyerrok; }
	1007	;
	1008
	1009	%%
	1010
	1011	struct token {
	1012	const char operator; / text to match */
	1013	NODETYPE value; /* node type */
	1014	int class; /* lexical class */
	1015	unsigned flags; /* # of args. allowed and compatability */
	1016	# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
	1017	# define A(n) (1<<(n))
	1018	# define VERSION_MASK 0xFF00 /* old awk is zero */
	1019	# define NOT_OLD 0x0100 /* feature not in old awk */
	1020	# define NOT_POSIX 0x0200 /* feature not in POSIX */
	1021	# define GAWKX 0x0400 /* gawk extension */
	1022	# define RESX 0x0800 /* Bell Labs Research extension */
	1023	NODE (ptr) P((NODE )); / function that implements this keyword */
	1024	};
	1025
	1026	/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
	1027	/* Function pointers come from declarations in awk.h. */
	1028
	1029	static const struct token tokentab[] = {
	1030	{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
	1031	{"END", Node_illegal, LEX_END, 0, 0},
	1032	#ifdef ARRAYDEBUG
	1033	{"adump", Node_builtin, LEX_BUILTIN, GAWKX\|A(1), do_adump},
	1034	#endif
	1035	{"and", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_and},
	1036	{"asort", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2), do_asort},
	1037	{"asorti", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2), do_asorti},
	1038	{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(2), do_atan2},
	1039	{"bindtextdomain", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2), do_bindtextdomain},
	1040	{"break", Node_K_break, LEX_BREAK, 0, 0},
	1041	#ifdef ALLOW_SWITCH
	1042	{"case", Node_K_case, LEX_CASE, GAWKX, 0},
	1043	#endif
	1044	{"close", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1)\|A(2), do_close},
	1045	{"compl", Node_builtin, LEX_BUILTIN, GAWKX\|A(1), do_compl},
	1046	{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
	1047	{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_cos},
	1048	{"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2)\|A(3), do_dcgettext},
	1049	{"dcngettext", Node_builtin, LEX_BUILTIN, GAWKX\|A(1)\|A(2)\|A(3)\|A(4)\|A(5), do_dcngettext},
	1050	#ifdef ALLOW_SWITCH
	1051	{"default", Node_K_default, LEX_DEFAULT, GAWKX, 0},
	1052	#endif
	1053	{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
	1054	{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
	1055	{"else", Node_illegal, LEX_ELSE, 0, 0},
	1056	{"exit", Node_K_exit, LEX_EXIT, 0, 0},
	1057	{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
	1058	{"extension", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_ext},
	1059	{"fflush", Node_builtin, LEX_BUILTIN, RESX\|A(0)\|A(1), do_fflush},
	1060	{"for", Node_K_for, LEX_FOR, 0, 0},
	1061	{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX\|NOT_OLD, 0},
	1062	{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
	1063	{"gensub", Node_builtin, LEX_BUILTIN, GAWKX\|A(3)\|A(4), do_gensub},
	1064	{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
	1065	{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(2)\|A(3), do_gsub},
	1066	{"if", Node_K_if, LEX_IF, 0, 0},
	1067	{"in", Node_illegal, LEX_IN, 0, 0},
	1068	{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
	1069	{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
	1070	{"length", Node_builtin, LEX_LENGTH, A(0)\|A(1), do_length},
	1071	{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
	1072	{"lshift", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_lshift},
	1073	{"match", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(2)\|A(3), do_match},
	1074	{"mktime", Node_builtin, LEX_BUILTIN, GAWKX\|A(1), do_mktime},
	1075	{"next", Node_K_next, LEX_NEXT, 0, 0},
	1076	{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
	1077	{"or", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_or},
	1078	{"print", Node_K_print, LEX_PRINT, 0, 0},
	1079	{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
	1080	{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(0), do_rand},
	1081	{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
	1082	{"rshift", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_rshift},
	1083	{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_sin},
	1084	{"split", Node_builtin, LEX_BUILTIN, A(2)\|A(3), do_split},
	1085	{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
	1086	{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
	1087	{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(0)\|A(1), do_srand},
	1088	#if defined(GAWKDEBUG) \|\| defined(ARRAYDEBUG) /* \|\| ... */
	1089	{"stopme", Node_builtin, LEX_BUILTIN, GAWKX\|A(0), stopme},
	1090	#endif
	1091	{"strftime", Node_builtin, LEX_BUILTIN, GAWKX\|A(0)\|A(1)\|A(2), do_strftime},
	1092	{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX\|A(1), do_strtonum},
	1093	{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(2)\|A(3), do_sub},
	1094	{"substr", Node_builtin, LEX_BUILTIN, A(2)\|A(3), do_substr},
	1095	#ifdef ALLOW_SWITCH
	1096	{"switch", Node_K_switch, LEX_SWITCH, GAWKX, 0},
	1097	#endif
	1098	{"system", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_system},
	1099	{"systime", Node_builtin, LEX_BUILTIN, GAWKX\|A(0), do_systime},
	1100	{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_tolower},
	1101	{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD\|A(1), do_toupper},
	1102	{"while", Node_K_while, LEX_WHILE, 0, 0},
	1103	{"xor", Node_builtin, LEX_BUILTIN, GAWKX\|A(2), do_xor},
	1104	};
	1105
	1106	#ifdef MBS_SUPPORT
	1107	/* Variable containing the current shift state. */
	1108	static mbstate_t cur_mbstate;
	1109	/* Ring buffer containing current characters. */
	1110	#define MAX_CHAR_IN_RING_BUFFER 8
	1111	#define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
	1112	static char cur_char_ring[RING_BUFFER_SIZE];
	1113	/* Index for ring buffers. */
	1114	static int cur_ring_idx;
	1115	/* This macro means that last nextc() return a singlebyte character
	1116	or 1st byte of a multibyte character. */
	1117	#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
	1118	#else /* MBS_SUPPORT */
	1119	/* a dummy */
	1120	#define nextc_is_1stbyte 1
	1121	#endif /* MBS_SUPPORT */
	1122
	1123	/* getfname --- return name of a builtin function (for pretty printing) */
	1124
	1125	const char *
	1126	getfname(register NODE (fptr)(NODE *))
	1127	{
	1128	register int i, j;
	1129
	1130	j = sizeof(tokentab) / sizeof(tokentab[0]);
	1131	/* linear search, no other way to do it */
	1132	for (i = 0; i < j; i++)
	1133	if (tokentab[i].ptr == fptr)
	1134	return tokentab[i].operator;
	1135
	1136	return NULL;
	1137	}
	1138
	1139	/* yyerror --- print a syntax error message, show where */
	1140
	1141	/*
	1142	* Function identifier purposely indented to avoid mangling
	1143	* by ansi2knr. Sigh.
	1144	*/
	1145
	1146	static void
	1147	#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
	1148	yyerror(const char *m, ...)
	1149	#else
	1150	/* VARARGS0 */
	1151	yyerror(va_alist)
	1152	va_dcl
	1153	#endif
	1154	{
	1155	va_list args;
	1156	const char *mesg = NULL;
	1157	register char bp, cp;
	1158	char *scan;
	1159	char *buf;
	1160	int count;
	1161	static char end_of_file_line[] = "(END OF FILE)";
	1162	char save;
	1163
	1164	errcount++;
	1165	/* Find the current line in the input file */
	1166	if (lexptr && lexeme) {
	1167	if (thisline == NULL) {
	1168	cp = lexeme;
	1169	if (*cp == '\n') {
	1170	cp--;
	1171	mesg = _("unexpected newline or end of string");
	1172	}
	1173	for (; cp != lexptr_begin && *cp != '\n'; --cp)
	1174	continue;
	1175	if (*cp == '\n')
	1176	cp++;
	1177	thisline = cp;
	1178	}
	1179	/* NL isn't guaranteed */
	1180	bp = lexeme;
	1181	while (bp < lexend && bp && bp != '\n')
	1182	bp++;
	1183	} else {
	1184	thisline = end_of_file_line;
	1185	bp = thisline + strlen(thisline);
	1186	}
	1187
	1188	/*
	1189	* Saving and restoring *bp keeps valgrind happy,
	1190	* since the guts of glibc uses strlen, even though
	1191	* we're passing an explict precision. Sigh.
	1192	*
	1193	* 8/2003: We may not need this anymore.
	1194	*/
	1195	save = *bp;
	1196	*bp = '\0';
	1197
	1198	msg("%.*s", (int) (bp - thisline), thisline);
	1199
	1200	*bp = save;
	1201
	1202	#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
	1203	va_start(args, m);
	1204	if (mesg == NULL)
	1205	mesg = m;
	1206	#else
	1207	va_start(args);
	1208	if (mesg == NULL)
	1209	mesg = va_arg(args, char *);
	1210	#endif
	1211	count = (bp - thisline) + strlen(mesg) + 2 + 1;
	1212	emalloc(buf, char *, count, "yyerror");
	1213
	1214	bp = buf;
	1215
	1216	if (lexptr != NULL) {
	1217	scan = thisline;
	1218	while (scan < lexeme)
	1219	if (*scan++ == '\t')
	1220	*bp++ = '\t';
	1221	else
	1222	*bp++ = ' ';
	1223	*bp++ = '^';
	1224	*bp++ = ' ';
	1225	}
	1226	strcpy(bp, mesg);
	1227	err("", buf, args);
	1228	va_end(args);
	1229	free(buf);
	1230	}
	1231
	1232	/* get_src_buf --- read the next buffer of source program */
	1233
	1234	static char *
	1235	get_src_buf()
	1236	{
	1237	static int samefile = FALSE;
	1238	static int nextfile = 0;
	1239	static char *buf = NULL;
	1240	static size_t buflen = 0;
	1241	static int fd;
	1242
	1243	int n;
	1244	register char *scan;
	1245	int newfile;
	1246	struct stat sbuf;
	1247	int readcount = 0;
	1248	int l;
	1249	char *readloc;
	1250
	1251	again:
	1252	newfile = FALSE;
	1253	if (nextfile > numfiles)
	1254	return NULL;
	1255
	1256	if (srcfiles[nextfile].stype == CMDLINE) {
	1257	if ((l = strlen(srcfiles[nextfile].val)) == 0) {
	1258	/*
	1259	* Yet Another Special case:
	1260	* gawk '' /path/name
	1261	* Sigh.
	1262	*/
	1263	static int warned = FALSE;
	1264
	1265	if (do_lint && ! warned) {
	1266	warned = TRUE;
	1267	lintwarn(_("empty program text on command line"));
	1268	}
	1269	++nextfile;
	1270	goto again;
	1271	}
	1272	if (srcfiles[nextfile].val[l-1] == '\n') {
	1273	/* has terminating newline, can use it directly */
	1274	sourceline = 1;
	1275	lexptr = lexptr_begin = srcfiles[nextfile].val;
	1276	/* fall through to pointer adjustment and return, below */
	1277	} else {
	1278	/* copy it into static buffer */
	1279
	1280	/* make sure buffer exists and has room */
	1281	if (buflen == 0) {
	1282	emalloc(buf, char *, l+2, "get_src_buf");
	1283	buflen = l + 2;
	1284	} else if (l+2 > buflen) {
	1285	erealloc(buf, char *, l+2, "get_src_buf");
	1286	buflen = l + 2;
	1287	} /* else
	1288	buffer has room, just use it */
	1289
	1290	/* copy in data */
	1291	memcpy(buf, srcfiles[nextfile].val, l);
	1292	buf[l] = '\n';
	1293	buf[++l] = '\0';
	1294
	1295	/* set vars and return */
	1296	lexptr = lexptr_begin = buf;
	1297	}
	1298	lexend = lexptr + l;
	1299	nextfile++; /* for next entry to this routine */
	1300	return lexptr;
	1301	}
	1302
	1303	if (! samefile) {
	1304	source = srcfiles[nextfile].val;
	1305	if (source == NULL) { /* read all the source files, all done */
	1306	if (buf != NULL) {
	1307	free(buf);
	1308	buf = NULL;
	1309	}
	1310	buflen = 0;
	1311	return lexeme = lexptr = lexptr_begin = NULL;
	1312	}
	1313	fd = pathopen(source);
	1314	if (fd <= INVALID_HANDLE) {
	1315	char *in;
	1316
	1317	/* suppress file name and line no. in error mesg */
	1318	in = source;
	1319	source = NULL;
	1320	fatal(_("can't open source file `%s' for reading (%s)"),
	1321	in, strerror(errno));
	1322	}
	1323	l = optimal_bufsize(fd, & sbuf);
	1324	/*
	1325	* Make sure that something silly like
	1326	* AWKBUFSIZE=8 make check
	1327	* works ok.
	1328	*/
	1329	#define A_DECENT_BUFFER_SIZE 128
	1330	if (l < A_DECENT_BUFFER_SIZE)
	1331	l = A_DECENT_BUFFER_SIZE;
	1332	#undef A_DECENT_BUFFER_SIZE
	1333
	1334	newfile = TRUE;
	1335
	1336	/* make sure buffer exists and has room */
	1337	if (buflen == 0) {
	1338	emalloc(buf, char *, l+2, "get_src_buf");
	1339	buflen = l + 2;
	1340	} else if (l+2 > buflen) {
	1341	erealloc(buf, char *, l+2, "get_src_buf");
	1342	buflen = l + 2;
	1343	} /* else
	1344	buffer has room, just use it */
	1345
	1346	readcount = l;
	1347	readloc = lexeme = lexptr = lexptr_begin = buf;
	1348	samefile = TRUE;
	1349	sourceline = 1;
	1350	} else {
	1351	/*
	1352	* In same file, ran off edge of buffer.
	1353	* Shift current line down to front, adjust
	1354	* pointers and fill in the rest of the buffer.
	1355	*/
	1356
	1357	int lexeme_offset = lexeme - lexptr_begin;
	1358	int lexptr_offset = lexptr - lexptr_begin;
	1359	int lexend_offset = lexend - lexptr_begin;
	1360
	1361	/* find beginning of current line */
	1362	for (scan = lexeme; scan >= lexptr_begin; scan--) {
	1363	if (*scan == '\n') {
	1364	scan++;
	1365	break;
	1366	}
	1367	}
	1368
	1369	if (scan <= buf) {
	1370	/* have to grow the buffer */
	1371	buflen *= 2;
	1372	erealloc(buf, char *, buflen, "get_src_buf");
	1373	} else {
	1374	/* shift things down */
	1375	memmove(buf, scan, lexend - scan);
	1376	/*
	1377	* make offsets relative to start of line,
	1378	* not start of buffer.
	1379	*/
	1380	lexend_offset = lexend - scan;
	1381	lexeme_offset = lexeme - scan;
	1382	lexptr_offset = lexptr - scan;
	1383	}
	1384
	1385	/* adjust pointers */
	1386	lexeme = buf + lexeme_offset;
	1387	lexptr = buf + lexptr_offset;
	1388	lexend = buf + lexend_offset;
	1389	lexptr_begin = buf;
	1390	readcount = buflen - (lexend - buf);
	1391	readloc = lexend;
	1392	}
	1393
	1394	/* add more data to buffer */
	1395	n = read(fd, readloc, readcount);
	1396	if (n == -1)
	1397	fatal(_("can't read sourcefile `%s' (%s)"),
	1398	source, strerror(errno));
	1399	if (n == 0) {
	1400	if (newfile) {
	1401	static int warned = FALSE;
	1402
	1403	if (do_lint && ! warned) {
	1404	warned = TRUE;
	1405	lintwarn(_("source file `%s' is empty"), source);
	1406	}
	1407	}
	1408	if (fd != fileno(stdin)) /* safety */
	1409	close(fd);
	1410	samefile = FALSE;
	1411	nextfile++;
	1412	goto again;
	1413	}
	1414	lexend = lexptr + n;
	1415	return lexptr;
	1416	}
	1417
	1418	/* tokadd --- add a character to the token buffer */
	1419
	1420	#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
	1421
	1422	/* tokexpand --- grow the token buffer */
	1423
	1424	char *
	1425	tokexpand()
	1426	{
	1427	static int toksize = 60;
	1428	int tokoffset;
	1429
	1430	tokoffset = tok - tokstart;
	1431	toksize *= 2;
	1432	if (tokstart != NULL)
	1433	erealloc(tokstart, char *, toksize, "tokexpand");
	1434	else
	1435	emalloc(tokstart, char *, toksize, "tokexpand");
	1436	tokend = tokstart + toksize;
	1437	tok = tokstart + tokoffset;
	1438	return tok;
	1439	}
	1440
	1441	/* nextc --- get the next input character */
	1442
	1443	#ifdef MBS_SUPPORT
	1444
	1445	static int
	1446	nextc(void)
	1447	{
	1448	if (gawk_mb_cur_max > 1) {
	1449	if (!lexptr \|\| lexptr >= lexend) {
	1450	if (! get_src_buf())
	1451	return EOF;
	1452	}
	1453
	1454	/* Update the buffer index. */
	1455	cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
	1456	cur_ring_idx + 1;
	1457
	1458	/* Did we already check the current character? */
	1459	if (cur_char_ring[cur_ring_idx] == 0) {
	1460	/* No, we need to check the next character on the buffer. */
	1461	int idx, work_ring_idx = cur_ring_idx;
	1462	mbstate_t tmp_state;
	1463	size_t mbclen;
	1464
	1465	for (idx = 0 ; lexptr + idx < lexend ; idx++) {
	1466	tmp_state = cur_mbstate;
	1467	mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
	1468
	1469	if (mbclen == 1 \|\| mbclen == (size_t)-1 \|\| mbclen == 0) {
	1470	/* It is a singlebyte character, non-complete multibyte
	1471	character or EOF. We treat it as a singlebyte
	1472	character. */
	1473	cur_char_ring[work_ring_idx] = 1;
	1474	break;
	1475	} else if (mbclen == (size_t)-2) {
	1476	/* It is not a complete multibyte character. */
	1477	cur_char_ring[work_ring_idx] = idx + 1;
	1478	} else {
	1479	/* mbclen > 1 */
	1480	cur_char_ring[work_ring_idx] = mbclen;
	1481	break;
	1482	}
	1483	work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
	1484	0 : work_ring_idx + 1;
	1485	}
	1486	cur_mbstate = tmp_state;
	1487
	1488	/* Put a mark on the position on which we write next character. */
	1489	work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
	1490	0 : work_ring_idx + 1;
	1491	cur_char_ring[work_ring_idx] = 0;
	1492	}
	1493
	1494	return (int) (unsigned char) *lexptr++;
	1495	}
	1496	else {
	1497	int c;
	1498
	1499	if (lexptr && lexptr < lexend)
	1500	c = (int) (unsigned char) *lexptr++;
	1501	else if (get_src_buf())
	1502	c = (int) (unsigned char) *lexptr++;
	1503	else
	1504	c = EOF;
	1505
	1506	return c;
	1507	}
	1508	}
	1509
	1510	#else /* MBS_SUPPORT */
	1511
	1512	#if GAWKDEBUG
	1513	int
	1514	nextc(void)
	1515	{
	1516	int c;
	1517
	1518	if (lexptr && lexptr < lexend)
	1519	c = (int) (unsigned char) *lexptr++;
	1520	else if (get_src_buf())
	1521	c = (int) (unsigned char) *lexptr++;
	1522	else
	1523	c = EOF;
	1524
	1525	return c;
	1526	}
	1527	#else
	1528	#define nextc() ((lexptr && lexptr < lexend) ? \
	1529	((int) (unsigned char) *lexptr++) : \
	1530	(get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \
	1531	)
	1532	#endif
	1533
	1534	#endif /* MBS_SUPPORT */
	1535
	1536	/* pushback --- push a character back on the input */
	1537
	1538	static inline void
	1539	pushback(void)
	1540	{
	1541	#ifdef MBS_SUPPORT
	1542	if (gawk_mb_cur_max > 1)
	1543	cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
	1544	cur_ring_idx - 1;
	1545	#endif
	1546	(lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
	1547	}
	1548
	1549
	1550	/* allow_newline --- allow newline after &&, \|\|, ? and : */
	1551
	1552	static void
	1553	allow_newline(void)
	1554	{
	1555	int c;
	1556
	1557	for (;;) {
	1558	c = nextc();
	1559	if (c == EOF)
	1560	break;
	1561	if (c == '#') {
	1562	while ((c = nextc()) != '\n' && c != EOF)
	1563	continue;
	1564	if (c == EOF)
	1565	break;
	1566	}
	1567	if (c == '\n')
	1568	sourceline++;
	1569	if (! ISSPACE(c)) {
	1570	pushback();
	1571	break;
	1572	}
	1573	}
	1574	}
	1575
	1576	/* yylex --- Read the input and turn it into tokens. */
	1577
	1578	static int
	1579	yylex(void)
	1580	{
	1581	register int c;
	1582	int seen_e = FALSE; /* These are for numbers */
	1583	int seen_point = FALSE;
	1584	int esc_seen; /* for literal strings */
	1585	int mid;
	1586	static int did_newline = FALSE;
	1587	char *tokkey;
	1588	static int lasttok = 0, eof_warned = FALSE;
	1589	int inhex = FALSE;
	1590	int intlstr = FALSE;
	1591
	1592	if (nextc() == EOF) {
	1593	if (lasttok != NEWLINE) {
	1594	lasttok = NEWLINE;
	1595	if (do_lint && ! eof_warned) {
	1596	lintwarn(_("source file does not end in newline"));
	1597	eof_warned = TRUE;
	1598	}
	1599	return NEWLINE; /* fake it */
	1600	}
	1601	return 0;
	1602	}
	1603	pushback();
	1604	#if defined OS2 \|\| defined __EMX__
	1605	/*
	1606	* added for OS/2's extproc feature of cmd.exe
	1607	* (like #! in BSD sh)
	1608	*/
	1609	if (strncasecmp(lexptr, "extproc ", 8) == 0) {
	1610	while (lexptr && lexptr != '\n')
	1611	lexptr++;
	1612	}
	1613	#endif
	1614	lexeme = lexptr;
	1615	thisline = NULL;
	1616	if (want_regexp) {
	1617	int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
	1618	/*
	1619	* Counting brackets is non-trivial. [[] is ok,
	1620	* and so is [\]], with a point being that /[/]/ as a regexp
	1621	* constant has to work.
	1622	*
	1623	* Do not count [ or ] if either one is preceded by a \.
	1624	* A `[' should be counted if
	1625	* a) it is the first one so far (in_brack == 0)
	1626	* b) it is the `[' in `[:'
	1627	* A ']' should be counted if not preceded by a \, since
	1628	* it is either closing `:]' or just a plain list.
	1629	* According to POSIX, []] is how you put a ] into a set.
	1630	* Try to handle that too.
	1631	*
	1632	* The code for \ handles \[ and \].
	1633	*/
	1634
	1635	want_regexp = FALSE;
	1636	tok = tokstart;
	1637	for (;;) {
	1638	c = nextc();
	1639
	1640	if (gawk_mb_cur_max == 1 \|\| nextc_is_1stbyte) switch (c) {
	1641	case '[':
	1642	/* one day check for `.' and `=' too */
	1643	if (nextc() == ':' \|\| in_brack == 0)
	1644	in_brack++;
	1645	pushback();
	1646	break;
	1647	case ']':
	1648	if (tokstart[0] == '['
	1649	&& (tok == tokstart + 1
	1650	\|\| (tok == tokstart + 2
	1651	&& tokstart[1] == '^')))
	1652	/* do nothing */;
	1653	else
	1654	in_brack--;
	1655	break;
	1656	case '\\':
	1657	if ((c = nextc()) == EOF) {
	1658	yyerror(_("unterminated regexp ends with `\\' at end of file"));
	1659	goto end_regexp; /* kludge */
	1660	} else if (c == '\n') {
	1661	sourceline++;
	1662	continue;
	1663	} else {
	1664	tokadd('\\');
	1665	tokadd(c);
	1666	continue;
	1667	}
	1668	break;
	1669	case '/': /* end of the regexp */
	1670	if (in_brack > 0)
	1671	break;
	1672	end_regexp:
	1673	tokadd('\0');
	1674	yylval.sval = tokstart;
	1675	if (do_lint) {
	1676	int peek = nextc();
	1677
	1678	pushback();
	1679	if (peek == 'i' \|\| peek == 's') {
	1680	if (source)
	1681	lintwarn(
	1682	_("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
	1683	source, sourceline, peek);
	1684	else
	1685	lintwarn(
	1686	_("tawk regex modifier `/.../%c' doesn't work in gawk"),
	1687	peek);
	1688	}
	1689	}
	1690	return lasttok = REGEXP;
	1691	case '\n':
	1692	pushback();
	1693	yyerror(_("unterminated regexp"));
	1694	goto end_regexp; /* kludge */
	1695	case EOF:
	1696	yyerror(_("unterminated regexp at end of file"));
	1697	goto end_regexp; /* kludge */
	1698	}
	1699	tokadd(c);
	1700	}
	1701	}
	1702	retry:
	1703
	1704	/* skipping \r is a hack, but windows is just too pervasive. sigh. */
	1705	while ((c = nextc()) == ' ' \|\| c == '\t' \|\| c == '\r')
	1706	continue;
	1707
	1708	lexeme = lexptr ? lexptr - 1 : lexptr;
	1709	thisline = NULL;
	1710	tok = tokstart;
	1711	yylval.nodetypeval = Node_illegal;
	1712
	1713	if (gawk_mb_cur_max == 1 \|\| nextc_is_1stbyte) switch (c) {
	1714	case EOF:
	1715	if (lasttok != NEWLINE) {
	1716	lasttok = NEWLINE;
	1717	if (do_lint && ! eof_warned) {
	1718	lintwarn(_("source file does not end in newline"));
	1719	eof_warned = TRUE;
	1720	}
	1721	return NEWLINE; /* fake it */
	1722	}
	1723	return 0;
	1724
	1725	case '\n':
	1726	sourceline++;
	1727	return lasttok = NEWLINE;
	1728
	1729	case '#': /* it's a comment */
	1730	while ((c = nextc()) != '\n') {
	1731	if (c == EOF) {
	1732	if (lasttok != NEWLINE) {
	1733	lasttok = NEWLINE;
	1734	if (do_lint && ! eof_warned) {
	1735	lintwarn(
	1736	_("source file does not end in newline"));
	1737	eof_warned = TRUE;
	1738	}
	1739	return NEWLINE; /* fake it */
	1740	}
	1741	return 0;
	1742	}
	1743	}
	1744	sourceline++;
	1745	return lasttok = NEWLINE;
	1746
	1747	case '\\':
	1748	#ifdef RELAXED_CONTINUATION
	1749	/*
	1750	* This code puports to allow comments and/or whitespace
	1751	* after the `\' at the end of a line used for continuation.
	1752	* Use it at your own risk. We think it's a bad idea, which
	1753	* is why it's not on by default.
	1754	*/
	1755	if (! do_traditional) {
	1756	/* strip trailing white-space and/or comment */
	1757	while ((c = nextc()) == ' ' \|\| c == '\t' \|\| c == '\r')
	1758	continue;
	1759	if (c == '#') {
	1760	if (do_lint)
	1761	lintwarn(
	1762	_("use of `\\ #...' line continuation is not portable"));
	1763	while ((c = nextc()) != '\n')
	1764	if (c == EOF)
	1765	break;
	1766	}
	1767	pushback();
	1768	}
	1769	#endif /* RELAXED_CONTINUATION */
	1770	if (nextc() == '\n') {
	1771	sourceline++;
	1772	goto retry;
	1773	} else {
	1774	yyerror(_("backslash not last character on line"));
	1775	exit(1);
	1776	}
	1777	break;
	1778
	1779	case ':':
	1780	case '?':
	1781	if (! do_posix)
	1782	allow_newline();
	1783	return lasttok = c;
	1784
	1785	/*
	1786	* in_parens is undefined unless we are parsing a print
	1787	* statement (in_print), but why bother with a check?
	1788	*/
	1789	case ')':
	1790	in_parens--;
	1791	return lasttok = c;
	1792
	1793	case '(':
	1794	in_parens++;
	1795	/* FALL THROUGH */
	1796	case '$':
	1797	case ';':
	1798	case '{':
	1799	case ',':
	1800	case '[':
	1801	case ']':
	1802	return lasttok = c;
	1803
	1804	case '*':
	1805	if ((c = nextc()) == '=') {
	1806	yylval.nodetypeval = Node_assign_times;
	1807	return lasttok = ASSIGNOP;
	1808	} else if (do_posix) {
	1809	pushback();
	1810	return lasttok = '*';
	1811	} else if (c == '*') {
	1812	/* make and = aliases for ^ and ^= */
	1813	static int did_warn_op = FALSE, did_warn_assgn = FALSE;
	1814
	1815	if (nextc() == '=') {
	1816	if (! did_warn_assgn) {
	1817	did_warn_assgn = TRUE;
	1818	if (do_lint)
	1819	lintwarn(_("POSIX does not allow operator `**='"));
	1820	if (do_lint_old)
	1821	warning(_("old awk does not support operator `**='"));
	1822	}
	1823	yylval.nodetypeval = Node_assign_exp;
	1824	return ASSIGNOP;
	1825	} else {
	1826	pushback();
	1827	if (! did_warn_op) {
	1828	did_warn_op = TRUE;
	1829	if (do_lint)
	1830	lintwarn(_("POSIX does not allow operator `**'"));
	1831	if (do_lint_old)
	1832	warning(_("old awk does not support operator `**'"));
	1833	}
	1834	return lasttok = '^';
	1835	}
	1836	}
	1837	pushback();
	1838	return lasttok = '*';
	1839
	1840	case '/':
	1841	if (nextc() == '=') {
	1842	pushback();
	1843	return lasttok = SLASH_BEFORE_EQUAL;
	1844	}
	1845	pushback();
	1846	return lasttok = '/';
	1847
	1848	case '%':
	1849	if (nextc() == '=') {
	1850	yylval.nodetypeval = Node_assign_mod;
	1851	return lasttok = ASSIGNOP;
	1852	}
	1853	pushback();
	1854	return lasttok = '%';
	1855
	1856	case '^':
	1857	{
	1858	static int did_warn_op = FALSE, did_warn_assgn = FALSE;
	1859
	1860	if (nextc() == '=') {
	1861	if (do_lint_old && ! did_warn_assgn) {
	1862	did_warn_assgn = TRUE;
	1863	warning(_("operator `^=' is not supported in old awk"));
	1864	}
	1865	yylval.nodetypeval = Node_assign_exp;
	1866	return lasttok = ASSIGNOP;
	1867	}
	1868	pushback();
	1869	if (do_lint_old && ! did_warn_op) {
	1870	did_warn_op = TRUE;
	1871	warning(_("operator `^' is not supported in old awk"));
	1872	}
	1873	return lasttok = '^';
	1874	}
	1875
	1876	case '+':
	1877	if ((c = nextc()) == '=') {
	1878	yylval.nodetypeval = Node_assign_plus;
	1879	return lasttok = ASSIGNOP;
	1880	}
	1881	if (c == '+')
	1882	return lasttok = INCREMENT;
	1883	pushback();
	1884	return lasttok = '+';
	1885
	1886	case '!':
	1887	if ((c = nextc()) == '=') {
	1888	yylval.nodetypeval = Node_notequal;
	1889	return lasttok = RELOP;
	1890	}
	1891	if (c == '~') {
	1892	yylval.nodetypeval = Node_nomatch;
	1893	return lasttok = MATCHOP;
	1894	}
	1895	pushback();
	1896	return lasttok = '!';
	1897
	1898	case '<':
	1899	if (nextc() == '=') {
	1900	yylval.nodetypeval = Node_leq;
	1901	return lasttok = RELOP;
	1902	}
	1903	yylval.nodetypeval = Node_less;
	1904	pushback();
	1905	return lasttok = '<';
	1906
	1907	case '=':
	1908	if (nextc() == '=') {
	1909	yylval.nodetypeval = Node_equal;
	1910	return lasttok = RELOP;
	1911	}
	1912	yylval.nodetypeval = Node_assign;
	1913	pushback();
	1914	return lasttok = ASSIGN;
	1915
	1916	case '>':
	1917	if ((c = nextc()) == '=') {
	1918	yylval.nodetypeval = Node_geq;
	1919	return lasttok = RELOP;
	1920	} else if (c == '>') {
	1921	yylval.nodetypeval = Node_redirect_append;
	1922	return lasttok = IO_OUT;
	1923	}
	1924	pushback();
	1925	if (in_print && in_parens == 0) {
	1926	yylval.nodetypeval = Node_redirect_output;
	1927	return lasttok = IO_OUT;
	1928	}
	1929	yylval.nodetypeval = Node_greater;
	1930	return lasttok = '>';
	1931
	1932	case '~':
	1933	yylval.nodetypeval = Node_match;
	1934	return lasttok = MATCHOP;
	1935
	1936	case '}':
	1937	/*
	1938	* Added did newline stuff. Easier than
	1939	* hacking the grammar.
	1940	*/
	1941	if (did_newline) {
	1942	did_newline = FALSE;
	1943	return lasttok = c;
	1944	}
	1945	did_newline++;
	1946	--lexptr; /* pick up } next time */
	1947	return lasttok = NEWLINE;
	1948
	1949	case '"':
	1950	string:
	1951	esc_seen = FALSE;
	1952	while ((c = nextc()) != '"') {
	1953	if (c == '\n') {
	1954	pushback();
	1955	yyerror(_("unterminated string"));
	1956	exit(1);
	1957	}
	1958	if ((gawk_mb_cur_max == 1 \|\| nextc_is_1stbyte) &&
	1959	c == '\\') {
	1960	c = nextc();
	1961	if (c == '\n') {
	1962	sourceline++;
	1963	continue;
	1964	}
	1965	esc_seen = TRUE;
	1966	tokadd('\\');
	1967	}
	1968	if (c == EOF) {
	1969	pushback();
	1970	yyerror(_("unterminated string"));
	1971	exit(1);
	1972	}
	1973	tokadd(c);
	1974	}
	1975	yylval.nodeval = make_str_node(tokstart,
	1976	tok - tokstart, esc_seen ? SCAN : 0);
	1977	yylval.nodeval->flags \|= PERM;
	1978	if (intlstr) {
	1979	yylval.nodeval->flags \|= INTLSTR;
	1980	intlstr = FALSE;
	1981	if (do_intl)
	1982	dumpintlstr(yylval.nodeval->stptr,
	1983	yylval.nodeval->stlen);
	1984	}
	1985	return lasttok = YSTRING;
	1986
	1987	case '-':
	1988	if ((c = nextc()) == '=') {
	1989	yylval.nodetypeval = Node_assign_minus;
	1990	return lasttok = ASSIGNOP;
	1991	}
	1992	if (c == '-')
	1993	return lasttok = DECREMENT;
	1994	pushback();
	1995	return lasttok = '-';
	1996
	1997	case '.':
	1998	c = nextc();
	1999	pushback();
	2000	if (! ISDIGIT(c))
	2001	return lasttok = '.';
	2002	else
	2003	c = '.';
	2004	/* FALL THROUGH */
	2005	case '0':
	2006	case '1':
	2007	case '2':
	2008	case '3':
	2009	case '4':
	2010	case '5':
	2011	case '6':
	2012	case '7':
	2013	case '8':
	2014	case '9':
	2015	/* It's a number */
	2016	for (;;) {
	2017	int gotnumber = FALSE;
	2018
	2019	tokadd(c);
	2020	switch (c) {
	2021	case 'x':
	2022	case 'X':
	2023	if (do_traditional)
	2024	goto done;
	2025	if (tok == tokstart + 2) {
	2026	int peek = nextc();
	2027
	2028	if (ISXDIGIT(peek)) {
	2029	inhex = TRUE;
	2030	pushback(); /* following digit */
	2031	} else {
	2032	pushback(); /* x or X */
	2033	goto done;
	2034	}
	2035	}
	2036	break;
	2037	case '.':
	2038	/* period ends exponent part of floating point number */
	2039	if (seen_point \|\| seen_e) {
	2040	gotnumber = TRUE;
	2041	break;
	2042	}
	2043	seen_point = TRUE;
	2044	break;
	2045	case 'e':
	2046	case 'E':
	2047	if (inhex)
	2048	break;
	2049	if (seen_e) {
	2050	gotnumber = TRUE;
	2051	break;
	2052	}
	2053	seen_e = TRUE;
	2054	if ((c = nextc()) == '-' \|\| c == '+') {
	2055	int c2 = nextc();
	2056
	2057	if (ISDIGIT(c2)) {
	2058	tokadd(c);
	2059	tokadd(c2);
	2060	} else {
	2061	pushback(); /* non-digit after + or - */
	2062	pushback(); /* + or - */
	2063	pushback(); /* e or E */
	2064	}
	2065	} else if (! ISDIGIT(c)) {
	2066	pushback(); /* character after e or E */
	2067	pushback(); /* e or E */
	2068	} else {
	2069	pushback(); /* digit */
	2070	}
	2071	break;
	2072	case 'a':
	2073	case 'A':
	2074	case 'b':
	2075	case 'B':
	2076	case 'c':
	2077	case 'C':
	2078	case 'D':
	2079	case 'd':
	2080	case 'f':
	2081	case 'F':
	2082	if (do_traditional \|\| ! inhex)
	2083	goto done;
	2084	/* fall through */
	2085	case '0':
	2086	case '1':
	2087	case '2':
	2088	case '3':
	2089	case '4':
	2090	case '5':
	2091	case '6':
	2092	case '7':
	2093	case '8':
	2094	case '9':
	2095	break;
	2096	default:
	2097	done:
	2098	gotnumber = TRUE;
	2099	}
	2100	if (gotnumber)
	2101	break;
	2102	c = nextc();
	2103	}
	2104	if (c != EOF)
	2105	pushback();
	2106	else if (do_lint && ! eof_warned) {
	2107	lintwarn(_("source file does not end in newline"));
	2108	eof_warned = TRUE;
	2109	}
	2110	tokadd('\0');
	2111	if (! do_traditional && isnondecimal(tokstart, FALSE)) {
	2112	if (do_lint) {
	2113	if (ISDIGIT(tokstart[1])) /* not an 'x' or 'X' */
	2114	lintwarn("numeric constant `%.*s' treated as octal",
	2115	(int) strlen(tokstart)-1, tokstart);
	2116	else if (tokstart[1] == 'x' \|\| tokstart[1] == 'X')
	2117	lintwarn("numeric constant `%.*s' treated as hexadecimal",
	2118	(int) strlen(tokstart)-1, tokstart);
	2119	}
	2120	yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
	2121	} else
	2122	yylval.nodeval = make_number(atof(tokstart));
	2123	yylval.nodeval->flags \|= PERM;
	2124	return lasttok = YNUMBER;
	2125
	2126	case '&':
	2127	if ((c = nextc()) == '&') {
	2128	yylval.nodetypeval = Node_and;
	2129	allow_newline();
	2130	return lasttok = LEX_AND;
	2131	}
	2132	pushback();
	2133	return lasttok = '&';
	2134
	2135	case '\|':
	2136	if ((c = nextc()) == '\|') {
	2137	yylval.nodetypeval = Node_or;
	2138	allow_newline();
	2139	return lasttok = LEX_OR;
	2140	} else if (! do_traditional && c == '&') {
	2141	yylval.nodetypeval = Node_redirect_twoway;
	2142	return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
	2143	}
	2144	pushback();
	2145	if (in_print && in_parens == 0) {
	2146	yylval.nodetypeval = Node_redirect_pipe;
	2147	return lasttok = IO_OUT;
	2148	} else {
	2149	yylval.nodetypeval = Node_redirect_pipein;
	2150	return lasttok = IO_IN;
	2151	}
	2152	}
	2153
	2154	if (c != '_' && ! ISALPHA(c)) {
	2155	yyerror(_("invalid char '%c' in expression"), c);
	2156	exit(1);
	2157	}
	2158
	2159	/*
	2160	* Lots of fog here. Consider:
	2161	*
	2162	* print "xyzzy"$_"foo"
	2163	*
	2164	* Without the check for ` lasttok != '$' ', this is parsed as
	2165	*
	2166	* print "xxyzz" $(_"foo")
	2167	*
	2168	* With the check, it is "correctly" parsed as three
	2169	* string concatenations. Sigh. This seems to be
	2170	* "more correct", but this is definitely one of those
	2171	* occasions where the interactions are funny.
	2172	*/
	2173	if (! do_traditional && c == '_' && lasttok != '$') {
	2174	if ((c = nextc()) == '"') {
	2175	intlstr = TRUE;
	2176	goto string;
	2177	}
	2178	pushback();
	2179	c = '_';
	2180	}
	2181
	2182	/* it's some type of name-type-thing. Find its length. */
	2183	tok = tokstart;
	2184	while (is_identchar(c)) {
	2185	tokadd(c);
	2186	c = nextc();
	2187	}
	2188	tokadd('\0');
	2189	emalloc(tokkey, char *, tok - tokstart, "yylex");
	2190	memcpy(tokkey, tokstart, tok - tokstart);
	2191	if (c != EOF)
	2192	pushback();
	2193	else if (do_lint && ! eof_warned) {
	2194	lintwarn(_("source file does not end in newline"));
	2195	eof_warned = TRUE;
	2196	}
	2197
	2198	/* See if it is a special token. */
	2199
	2200	if ((mid = check_special(tokstart)) >= 0) {
	2201	if (do_lint) {
	2202	if (tokentab[mid].flags & GAWKX)
	2203	lintwarn(_("`%s' is a gawk extension"),
	2204	tokentab[mid].operator);
	2205	if (tokentab[mid].flags & RESX)
	2206	lintwarn(_("`%s' is a Bell Labs extension"),
	2207	tokentab[mid].operator);
	2208	if (tokentab[mid].flags & NOT_POSIX)
	2209	lintwarn(_("POSIX does not allow `%s'"),
	2210	tokentab[mid].operator);
	2211	}
	2212	if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
	2213	warning(_("`%s' is not supported in old awk"),
	2214	tokentab[mid].operator);
	2215	if ((do_traditional && (tokentab[mid].flags & GAWKX))
	2216	\|\| (do_posix && (tokentab[mid].flags & NOT_POSIX)))
	2217	;
	2218	else {
	2219	if (tokentab[mid].class == LEX_BUILTIN
	2220	\|\| tokentab[mid].class == LEX_LENGTH)
	2221	yylval.lval = mid;
	2222	else
	2223	yylval.nodetypeval = tokentab[mid].value;
	2224	free(tokkey);
	2225	return lasttok = tokentab[mid].class;
	2226	}
	2227	}
	2228
	2229	yylval.sval = tokkey;
	2230	if (*lexptr == '(')
	2231	return lasttok = FUNC_CALL;
	2232	else {
	2233	static short goto_warned = FALSE;
	2234
	2235	#define SMART_ALECK 1
	2236	if (SMART_ALECK && do_lint
	2237	&& ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
	2238	goto_warned = TRUE;
	2239	lintwarn(_("`goto' considered harmful!\n"));
	2240	}
	2241	return lasttok = NAME;
	2242	}
	2243	}
	2244
	2245	/* node_common --- common code for allocating a new node */
	2246
	2247	static NODE *
	2248	node_common(NODETYPE op)
	2249	{
	2250	register NODE *r;
	2251
	2252	getnode(r);
	2253	r->type = op;
	2254	r->flags = MALLOC;
	2255	/* if lookahead is a NL, lineno is 1 too high */
	2256	if (lexeme && lexeme >= lexptr_begin && *lexeme == '\n')
	2257	r->source_line = sourceline - 1;
	2258	else
	2259	r->source_line = sourceline;
	2260	r->source_file = source;
	2261	return r;
	2262	}
	2263
	2264	/* node --- allocates a node with defined lnode and rnode. */
	2265
	2266	NODE *
	2267	node(NODE left, NODETYPE op, NODE right)
	2268	{
	2269	register NODE *r;
	2270
	2271	r = node_common(op);
	2272	r->lnode = left;
	2273	r->rnode = right;
	2274	return r;
	2275	}
	2276
	2277	/* snode --- allocate a node with defined subnode and builtin for builtin
	2278	functions. Checks for arg. count and supplies defaults where
	2279	possible. */
	2280
	2281	static NODE *
	2282	snode(NODE *subn, NODETYPE op, int idx)
	2283	{
	2284	register NODE *r;
	2285	register NODE *n;
	2286	int nexp = 0;
	2287	int args_allowed;
	2288
	2289	r = node_common(op);
	2290
	2291	/* traverse expression list to see how many args. given */
	2292	for (n = subn; n != NULL; n = n->rnode) {
	2293	nexp++;
	2294	if (nexp > 5)
	2295	break;
	2296	}
	2297
	2298	/* check against how many args. are allowed for this builtin */
	2299	args_allowed = tokentab[idx].flags & ARGS;
	2300	if (args_allowed && (args_allowed & A(nexp)) == 0)
	2301	fatal(_("%d is invalid as number of arguments for %s"),
	2302	nexp, tokentab[idx].operator);
	2303
	2304	r->builtin = tokentab[idx].ptr;
	2305
	2306	/* special case processing for a few builtins */
	2307	if (nexp == 0 && r->builtin == do_length) {
	2308	subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
	2309	Node_expression_list,
	2310	(NODE *) NULL);
	2311	} else if (r->builtin == do_match) {
	2312	static short warned = FALSE;
	2313
	2314	if (subn->rnode->lnode->type != Node_regex)
	2315	subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
	2316
	2317	if (subn->rnode->rnode != NULL) { /* 3rd argument there */
	2318	if (do_lint && ! warned) {
	2319	warned = TRUE;
	2320	lintwarn(_("match: third argument is a gawk extension"));
	2321	}
	2322	if (do_traditional)
	2323	fatal(_("match: third argument is a gawk extension"));
	2324	}
	2325	} else if (r->builtin == do_sub \|\| r->builtin == do_gsub) {
	2326	if (subn->lnode->type != Node_regex)
	2327	subn->lnode = mk_rexp(subn->lnode);
	2328	if (nexp == 2)
	2329	append_right(subn, node(node(make_number(0.0),
	2330	Node_field_spec,
	2331	(NODE *) NULL),
	2332	Node_expression_list,
	2333	(NODE *) NULL));
	2334	else if (subn->rnode->rnode->lnode->type == Node_val) {
	2335	if (do_lint)
	2336	lintwarn(_("%s: string literal as last arg of substitute has no effect"),
	2337	(r->builtin == do_sub) ? "sub" : "gsub");
	2338	} else if (! isassignable(subn->rnode->rnode->lnode)) {
	2339	yyerror(_("%s third parameter is not a changeable object"),
	2340	(r->builtin == do_sub) ? "sub" : "gsub");
	2341	}
	2342	} else if (r->builtin == do_gensub) {
	2343	if (subn->lnode->type != Node_regex)
	2344	subn->lnode = mk_rexp(subn->lnode);
	2345	if (nexp == 3)
	2346	append_right(subn, node(node(make_number(0.0),
	2347	Node_field_spec,
	2348	(NODE *) NULL),
	2349	Node_expression_list,
	2350	(NODE *) NULL));
	2351	} else if (r->builtin == do_split) {
	2352	if (nexp == 2)
	2353	append_right(subn,
	2354	node(FS_node, Node_expression_list, (NODE *) NULL));
	2355	n = subn->rnode->rnode->lnode;
	2356	if (n->type != Node_regex)
	2357	subn->rnode->rnode->lnode = mk_rexp(n);
	2358	if (nexp == 2)
	2359	subn->rnode->rnode->lnode->re_flags \|= FS_DFLT;
	2360	} else if (r->builtin == do_close) {
	2361	static short warned = FALSE;
	2362
	2363	if ( nexp == 2) {
	2364	if (do_lint && nexp == 2 && ! warned) {
	2365	warned = TRUE;
	2366	lintwarn(_("close: second argument is a gawk extension"));
	2367	}
	2368	if (do_traditional)
	2369	fatal(_("close: second argument is a gawk extension"));
	2370	}
	2371	} else if (do_intl /* --gen-po */
	2372	&& r->builtin == do_dcgettext /* dcgettext(...) */
	2373	&& subn->lnode->type == Node_val /* 1st arg is constant */
	2374	&& (subn->lnode->flags & STRCUR) != 0) { /* it's a string constant */
	2375	/* ala xgettext, dcgettext("some string" ...) dumps the string */
	2376	NODE *str = subn->lnode;
	2377
	2378	if ((str->flags & INTLSTR) != 0)
	2379	warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
	2380	/* don't dump it, the lexer already did */
	2381	else
	2382	dumpintlstr(str->stptr, str->stlen);
	2383	} else if (do_intl /* --gen-po */
	2384	&& r->builtin == do_dcngettext /* dcngettext(...) */
	2385	&& subn->lnode->type == Node_val /* 1st arg is constant */
	2386	&& (subn->lnode->flags & STRCUR) != 0 /* it's a string constant */
	2387	&& subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */
	2388	&& (subn->rnode->lnode->flags & STRCUR) != 0) { /* it's a string constant */
	2389	/* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
	2390	NODE *str1 = subn->lnode;
	2391	NODE *str2 = subn->rnode->lnode;
	2392
	2393	if (((str1->flags \| str2->flags) & INTLSTR) != 0)
	2394	warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
	2395	else
	2396	dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
	2397	}
	2398
	2399	r->subnode = subn;
	2400	if (r->builtin == do_sprintf) {
	2401	count_args(r);
	2402	r->lnode->printf_count = r->printf_count; /* hack */
	2403	}
	2404	return r;
	2405	}
	2406
	2407	/* make_for_loop --- build a for loop */
	2408
	2409	static NODE *
	2410	make_for_loop(NODE init, NODE cond, NODE *incr)
	2411	{
	2412	register FOR_LOOP_HEADER *r;
	2413	NODE *n;
	2414
	2415	emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
	2416	getnode(n);
	2417	n->type = Node_illegal;
	2418	r->init = init;
	2419	r->cond = cond;
	2420	r->incr = incr;
	2421	n->sub.nodep.r.hd = r;
	2422	return n;
	2423	}
	2424
	2425	/* dup_parms --- return TRUE if there are duplicate parameters */
	2426
	2427	static int
	2428	dup_parms(NODE *func)
	2429	{
	2430	register NODE *np;
	2431	const char fname, *names;
	2432	int count, i, j, dups;
	2433	NODE *params;
	2434
	2435	if (func == NULL) /* error earlier */
	2436	return TRUE;
	2437
	2438	fname = func->param;
	2439	count = func->param_cnt;
	2440	params = func->rnode;
	2441
	2442	if (count == 0) /* no args, no problem */
	2443	return FALSE;
	2444
	2445	if (params == NULL) /* error earlier */
	2446	return TRUE;
	2447
	2448	emalloc(names, const char *, count sizeof(char *), "dup_parms");
	2449
	2450	i = 0;
	2451	for (np = params; np != NULL; np = np->rnode) {
	2452	if (np->param == NULL) { /* error earlier, give up, go home */
	2453	free(names);
	2454	return TRUE;
	2455	}
	2456	names[i++] = np->param;
	2457	}
	2458
	2459	dups = 0;
	2460	for (i = 1; i < count; i++) {
	2461	for (j = 0; j < i; j++) {
	2462	if (strcmp(names[i], names[j]) == 0) {
	2463	dups++;
	2464	error(
	2465	_("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
	2466	fname, i+1, names[j], j+1);
	2467	}
	2468	}
	2469	}
	2470
	2471	free(names);
	2472	return (dups > 0 ? TRUE : FALSE);
	2473	}
	2474
	2475	/* parms_shadow --- check if parameters shadow globals */
	2476
	2477	static int
	2478	parms_shadow(const char fname, NODE func)
	2479	{
	2480	int count, i;
	2481	int ret = FALSE;
	2482
	2483	if (fname == NULL \|\| func == NULL) /* error earlier */
	2484	return FALSE;
	2485
	2486	count = func->lnode->param_cnt;
	2487
	2488	if (count == 0) /* no args, no problem */
	2489	return FALSE;
	2490
	2491	/*
	2492	* Use warning() and not lintwarn() so that can warn
	2493	* about all shadowed parameters.
	2494	*/
	2495	for (i = 0; i < count; i++) {
	2496	if (lookup(func->parmlist[i]) != NULL) {
	2497	warning(
	2498	_("function `%s': parameter `%s' shadows global variable"),
	2499	fname, func->parmlist[i]);
	2500	ret = TRUE;
	2501	}
	2502	}
	2503
	2504	return ret;
	2505	}
	2506
	2507	/*
	2508	* install:
	2509	* Install a name in the symbol table, even if it is already there.
	2510	* Caller must check against redefinition if that is desired.
	2511	*/
	2512
	2513	NODE *
	2514	install(char name, NODE value)
	2515	{
	2516	register NODE *hp;
	2517	register size_t len;
	2518	register int bucket;
	2519
	2520	var_count++;
	2521	len = strlen(name);
	2522	bucket = hash(name, len, (unsigned long) HASHSIZE);
	2523	getnode(hp);
	2524	hp->type = Node_hashnode;
	2525	hp->hnext = variables[bucket];
	2526	variables[bucket] = hp;
	2527	hp->hlength = len;
	2528	hp->hvalue = value;
	2529	hp->hname = name;
	2530	hp->hvalue->vname = name;
	2531	return hp->hvalue;
	2532	}
	2533
	2534	/* lookup --- find the most recent hash node for name installed by install */
	2535
	2536	NODE *
	2537	lookup(const char *name)
	2538	{
	2539	register NODE *bucket;
	2540	register size_t len;
	2541
	2542	len = strlen(name);
	2543	for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
	2544	bucket != NULL; bucket = bucket->hnext)
	2545	if (bucket->hlength == len && STREQN(bucket->hname, name, len))
	2546	return bucket->hvalue;
	2547
	2548	return NULL;
	2549	}
	2550
	2551	/* var_comp --- compare two variable names */
	2552
	2553	static int
	2554	var_comp(const void v1, const void v2)
	2555	{
	2556	const NODE const npp1, const npp2;
	2557	const NODE n1, n2;
	2558	int minlen;
	2559
	2560	npp1 = (const NODE const ) v1;
	2561	npp2 = (const NODE const ) v2;
	2562	n1 = *npp1;
	2563	n2 = *npp2;
	2564
	2565	if (n1->hlength > n2->hlength)
	2566	minlen = n1->hlength;
	2567	else
	2568	minlen = n2->hlength;
	2569
	2570	return strncmp(n1->hname, n2->hname, minlen);
	2571	}
	2572
	2573	/* valinfo --- dump var info */
	2574
	2575	static void
	2576	valinfo(NODE n, FILE fp)
	2577	{
	2578	if (n->flags & STRING) {
	2579	fprintf(fp, "string (");
	2580	pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
	2581	fprintf(fp, ")\n");
	2582	} else if (n->flags & NUMBER)
	2583	fprintf(fp, "number (%.17g)\n", n->numbr);
	2584	else if (n->flags & STRCUR) {
	2585	fprintf(fp, "string value (");
	2586	pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
	2587	fprintf(fp, ")\n");
	2588	} else if (n->flags & NUMCUR)
	2589	fprintf(fp, "number value (%.17g)\n", n->numbr);
	2590	else
	2591	fprintf(fp, "?? flags %s\n", flags2str(n->flags));
	2592	}
	2593
	2594
	2595	/* dump_vars --- dump the symbol table */
	2596
	2597	void
	2598	dump_vars(const char *fname)
	2599	{
	2600	int i, j;
	2601	NODE **table;
	2602	NODE *p;
	2603	FILE *fp;
	2604
	2605	emalloc(table, NODE *, var_count sizeof(NODE *), "dump_vars");
	2606
	2607	if (fname == NULL)
	2608	fp = stderr;
	2609	else if ((fp = fopen(fname, "w")) == NULL) {
	2610	warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
	2611	warning(_("sending profile to standard error"));
	2612	fp = stderr;
	2613	}
	2614
	2615	for (i = j = 0; i < HASHSIZE; i++)
	2616	for (p = variables[i]; p != NULL; p = p->hnext)
	2617	table[j++] = p;
	2618
	2619	assert(j == var_count);
	2620
	2621	/* Shazzam! */
	2622	qsort(table, j, sizeof(NODE *), var_comp);
	2623
	2624	for (i = 0; i < j; i++) {
	2625	p = table[i];
	2626	if (p->hvalue->type == Node_func)
	2627	continue;
	2628	fprintf(fp, "%.*s: ", (int) p->hlength, p->hname);
	2629	if (p->hvalue->type == Node_var_array)
	2630	fprintf(fp, "array, %ld elements\n", p->hvalue->table_size);
	2631	else if (p->hvalue->type == Node_var_new)
	2632	fprintf(fp, "unused variable\n");
	2633	else if (p->hvalue->type == Node_var)
	2634	valinfo(p->hvalue->var_value, fp);
	2635	else {
	2636	NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
	2637
	2638	valinfo(*lhs, fp);
	2639	}
	2640	}
	2641
	2642	if (fp != stderr && fclose(fp) != 0)
	2643	warning(_("%s: close failed (%s)"), fname, strerror(errno));
	2644
	2645	free(table);
	2646	}
	2647
	2648	/* release_all_vars --- free all variable memory */
	2649
	2650	void
	2651	release_all_vars()
	2652	{
	2653	int i;
	2654	NODE p, next;
	2655
	2656	for (i = 0; i < HASHSIZE; i++)
	2657	for (p = variables[i]; p != NULL; p = next) {
	2658	next = p->hnext;
	2659
	2660	if (p->hvalue->type == Node_func)
	2661	continue;
	2662	else if (p->hvalue->type == Node_var_array)
	2663	assoc_clear(p->hvalue);
	2664	else if (p->hvalue->type != Node_var_new) {
	2665	NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
	2666
	2667	unref(*lhs);
	2668	}
	2669	unref(p);
	2670	}
	2671	}
	2672
	2673	/* finfo --- for use in comparison and sorting of function names */
	2674
	2675	struct finfo {
	2676	const char *name;
	2677	size_t nlen;
	2678	NODE *func;
	2679	};
	2680
	2681	/* fcompare --- comparison function for qsort */
	2682
	2683	static int
	2684	fcompare(const void p1, const void p2)
	2685	{
	2686	const struct finfo f1, f2;
	2687	int minlen;
	2688
	2689	f1 = (const struct finfo *) p1;
	2690	f2 = (const struct finfo *) p2;
	2691
	2692	if (f1->nlen > f2->nlen)
	2693	minlen = f2->nlen;
	2694	else
	2695	minlen = f1->nlen;
	2696
	2697	return strncmp(f1->name, f2->name, minlen);
	2698	}
	2699
	2700	/* dump_funcs --- print all functions */
	2701
	2702	void
	2703	dump_funcs()
	2704	{
	2705	int i, j;
	2706	NODE *p;
	2707	struct finfo *tab = NULL;
	2708
	2709	/*
	2710	* Walk through symbol table countng functions.
	2711	* Could be more than func_count if there are
	2712	* extension functions.
	2713	*/
	2714	for (i = j = 0; i < HASHSIZE; i++) {
	2715	for (p = variables[i]; p != NULL; p = p->hnext) {
	2716	if (p->hvalue->type == Node_func) {
	2717	j++;
	2718	}
	2719	}
	2720	}
	2721
	2722	if (j == 0)
	2723	return;
	2724
	2725	emalloc(tab, struct finfo , j sizeof(struct finfo), "dump_funcs");
	2726
	2727	/* now walk again, copying info */
	2728	for (i = j = 0; i < HASHSIZE; i++) {
	2729	for (p = variables[i]; p != NULL; p = p->hnext) {
	2730	if (p->hvalue->type == Node_func) {
	2731	tab[j].name = p->hname;
	2732	tab[j].nlen = p->hlength;
	2733	tab[j].func = p->hvalue;
	2734	j++;
	2735	}
	2736	}
	2737	}
	2738
	2739
	2740	/* Shazzam! */
	2741	qsort(tab, j, sizeof(struct finfo), fcompare);
	2742
	2743	for (i = 0; i < j; i++)
	2744	pp_func(tab[i].name, tab[i].nlen, tab[i].func);
	2745
	2746	free(tab);
	2747	}
	2748
	2749	/* shadow_funcs --- check all functions for parameters that shadow globals */
	2750
	2751	void
	2752	shadow_funcs()
	2753	{
	2754	int i, j;
	2755	NODE *p;
	2756	struct finfo *tab;
	2757	static int calls = 0;
	2758	int shadow = FALSE;
	2759
	2760	if (func_count == 0)
	2761	return;
	2762
	2763	if (calls++ != 0)
	2764	fatal(_("shadow_funcs() called twice!"));
	2765
	2766	emalloc(tab, struct finfo , func_count sizeof(struct finfo), "shadow_funcs");
	2767
	2768	for (i = j = 0; i < HASHSIZE; i++) {
	2769	for (p = variables[i]; p != NULL; p = p->hnext) {
	2770	if (p->hvalue->type == Node_func) {
	2771	tab[j].name = p->hname;
	2772	tab[j].nlen = p->hlength;
	2773	tab[j].func = p->hvalue;
	2774	j++;
	2775	}
	2776	}
	2777	}
	2778
	2779	assert(j == func_count);
	2780
	2781	/* Shazzam! */
	2782	qsort(tab, func_count, sizeof(struct finfo), fcompare);
	2783
	2784	for (i = 0; i < j; i++)
	2785	shadow \|= parms_shadow(tab[i].name, tab[i].func);
	2786
	2787	free(tab);
	2788
	2789	/* End with fatal if the user requested it. */
	2790	if (shadow && lintfunc != warning)
	2791	lintwarn(_("there were shadowed variables."));
	2792	}
	2793
	2794	/*
	2795	* append_right:
	2796	* Add new to the rightmost branch of LIST. This uses n^2 time, so we make
	2797	* a simple attempt at optimizing it.
	2798	*/
	2799
	2800	static NODE *
	2801	append_right(NODE list, NODE new)
	2802	{
	2803	register NODE *oldlist;
	2804	static NODE savefront = NULL, savetail = NULL;
	2805
	2806	if (list == NULL \|\| new == NULL)
	2807	return list;
	2808
	2809	oldlist = list;
	2810	if (savefront == oldlist)
	2811	list = savetail; /* Be careful: maybe list->rnode != NULL */
	2812	else
	2813	savefront = oldlist;
	2814
	2815	while (list->rnode != NULL)
	2816	list = list->rnode;
	2817	savetail = list->rnode = new;
	2818	return oldlist;
	2819	}
	2820
	2821	/*
	2822	* append_pattern:
	2823	* A wrapper around append_right, used for rule lists.
	2824	*/
	2825	static inline NODE *
	2826	append_pattern(NODE *list, NODE patt)
	2827	{
	2828	NODE n = node(patt, Node_rule_node, (NODE ) NULL);
	2829
	2830	if (*list == NULL)
	2831	*list = n;
	2832	else {
	2833	NODE n1 = node(n, Node_rule_list, (NODE ) NULL);
	2834	if ((*list)->type != Node_rule_list)
	2835	list = node(list, Node_rule_list, n1);
	2836	else
	2837	(void) append_right(*list, n1);
	2838	}
	2839	return n;
	2840	}
	2841
	2842	/*
	2843	* func_install:
	2844	* check if name is already installed; if so, it had better have Null value,
	2845	* in which case def is added as the value. Otherwise, install name with def
	2846	* as value.
	2847	*
	2848	* Extra work, build up and save a list of the parameter names in a table
	2849	* and hang it off params->parmlist. This is used to set the `vname' field
	2850	* of each function parameter during a function call. See eval.c.
	2851	*/
	2852
	2853	static void
	2854	func_install(NODE params, NODE def)
	2855	{
	2856	NODE r, n, *thisfunc;
	2857	char *pnames, names, *sp;
	2858	size_t pcount = 0, space = 0;
	2859	int i;
	2860
	2861	/* check for function foo(foo) { ... }. bleah. */
	2862	for (n = params->rnode; n != NULL; n = n->rnode) {
	2863	if (strcmp(n->param, params->param) == 0)
	2864	fatal(_("function `%s': can't use function name as parameter name"),
	2865	params->param);
	2866	}
	2867
	2868	thisfunc = NULL; /* turn off warnings */
	2869
	2870	/* symbol table managment */
	2871	pop_var(params, FALSE);
	2872	r = lookup(params->param);
	2873	if (r != NULL) {
	2874	fatal(_("function name `%s' previously defined"), params->param);
	2875	} else if (params->param == builtin_func) /* not a valid function name */
	2876	goto remove_params;
	2877
	2878	/* install the function */
	2879	thisfunc = node(params, Node_func, def);
	2880	(void) install(params->param, thisfunc);
	2881
	2882	/* figure out amount of space to allocate for variable names */
	2883	for (n = params->rnode; n != NULL; n = n->rnode) {
	2884	pcount++;
	2885	space += strlen(n->param) + 1;
	2886	}
	2887
	2888	/* allocate it and fill it in */
	2889	if (pcount != 0) {
	2890	emalloc(names, char *, space, "func_install");
	2891	emalloc(pnames, char *, pcount sizeof(char *), "func_install");
	2892	sp = names;
	2893	for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) {
	2894	pnames[i] = sp;
	2895	strcpy(sp, n->param);
	2896	sp += strlen(n->param) + 1;
	2897	}
	2898	thisfunc->parmlist = pnames;
	2899	} else {
	2900	thisfunc->parmlist = NULL;
	2901	}
	2902
	2903	/* update lint table info */
	2904	func_use(params->param, FUNC_DEFINE);
	2905
	2906	func_count++; /* used by profiling / pretty printer */
	2907
	2908	remove_params:
	2909	/* remove params from symbol table */
	2910	pop_params(params->rnode);
	2911	}
	2912
	2913	/* pop_var --- remove a variable from the symbol table */
	2914
	2915	static void
	2916	pop_var(NODE *np, int freeit)
	2917	{
	2918	register NODE bucket, *save;
	2919	register size_t len;
	2920	char *name;
	2921
	2922	name = np->param;
	2923	len = strlen(name);
	2924	save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
	2925	for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
	2926	if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
	2927	var_count--;
	2928	*save = bucket->hnext;
	2929	freenode(bucket);
	2930	if (freeit)
	2931	free(np->param);
	2932	return;
	2933	}
	2934	save = &(bucket->hnext);
	2935	}
	2936	}
	2937
	2938	/* pop_params --- remove list of function parameters from symbol table */
	2939
	2940	/*
	2941	* pop parameters out of the symbol table. do this in reverse order to
	2942	* avoid reading freed memory if there were duplicated parameters.
	2943	*/
	2944	static void
	2945	pop_params(NODE *params)
	2946	{
	2947	if (params == NULL)
	2948	return;
	2949	pop_params(params->rnode);
	2950	pop_var(params, TRUE);
	2951	}
	2952
	2953	/* make_param --- make NAME into a function parameter */
	2954
	2955	static NODE *
	2956	make_param(char *name)
	2957	{
	2958	NODE *r;
	2959
	2960	getnode(r);
	2961	r->type = Node_param_list;
	2962	r->rnode = NULL;
	2963	r->param = name;
	2964	r->param_cnt = param_counter++;
	2965	return (install(name, r));
	2966	}
	2967
	2968	static struct fdesc {
	2969	char *name;
	2970	short used;
	2971	short defined;
	2972	struct fdesc *next;
	2973	} *ftable[HASHSIZE];
	2974
	2975	/* func_use --- track uses and definitions of functions */
	2976
	2977	static void
	2978	func_use(const char *name, enum defref how)
	2979	{
	2980	struct fdesc *fp;
	2981	int len;
	2982	int ind;
	2983
	2984	len = strlen(name);
	2985	ind = hash(name, len, HASHSIZE);
	2986
	2987	for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
	2988	if (strcmp(fp->name, name) == 0) {
	2989	if (how == FUNC_DEFINE)
	2990	fp->defined++;
	2991	else
	2992	fp->used++;
	2993	return;
	2994	}
	2995	}
	2996
	2997	/* not in the table, fall through to allocate a new one */
	2998
	2999	emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
	3000	memset(fp, '\0', sizeof(struct fdesc));
	3001	emalloc(fp->name, char *, len + 1, "func_use");
	3002	strcpy(fp->name, name);
	3003	if (how == FUNC_DEFINE)
	3004	fp->defined++;
	3005	else
	3006	fp->used++;
	3007	fp->next = ftable[ind];
	3008	ftable[ind] = fp;
	3009	}
	3010
	3011	/* check_funcs --- verify functions that are called but not defined */
	3012
	3013	static void
	3014	check_funcs()
	3015	{
	3016	struct fdesc fp, next;
	3017	int i;
	3018
	3019	for (i = 0; i < HASHSIZE; i++) {
	3020	for (fp = ftable[i]; fp != NULL; fp = fp->next) {
	3021	#ifdef REALLYMEAN
	3022	/* making this the default breaks old code. sigh. */
	3023	if (fp->defined == 0) {
	3024	error(
	3025	_("function `%s' called but never defined"), fp->name);
	3026	errcount++;
	3027	}
	3028	#else
	3029	if (do_lint && fp->defined == 0)
	3030	lintwarn(
	3031	_("function `%s' called but never defined"), fp->name);
	3032	#endif
	3033	if (do_lint && fp->used == 0) {
	3034	lintwarn(_("function `%s' defined but never called"),
	3035	fp->name);
	3036	}
	3037	}
	3038	}
	3039
	3040	/* now let's free all the memory */
	3041	for (i = 0; i < HASHSIZE; i++) {
	3042	for (fp = ftable[i]; fp != NULL; fp = next) {
	3043	next = fp->next;
	3044	free(fp->name);
	3045	free(fp);
	3046	}
	3047	}
	3048	}
	3049
	3050	/* param_sanity --- look for parameters that are regexp constants */
	3051
	3052	static void
	3053	param_sanity(NODE *arglist)
	3054	{
	3055	NODE argp, arg;
	3056	int i;
	3057
	3058	for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
	3059	arg = argp->lnode;
	3060	if (arg->type == Node_regex)
	3061	warning(_("regexp constant for parameter #%d yields boolean value"), i);
	3062	}
	3063	}
	3064
	3065	/* deferred varibles --- those that are only defined if needed. */
	3066
	3067	/*
	3068	* Is there any reason to use a hash table for deferred variables? At the
	3069	* moment, there are only 1 to 3 such variables, so it may not be worth
	3070	* the overhead. If more modules start using this facility, it should
	3071	* probably be converted into a hash table.
	3072	*/
	3073
	3074	static struct deferred_variable {
	3075	NODE (load_func)(void);
	3076	struct deferred_variable *next;
	3077	char name[1]; /* variable-length array */
	3078	} *deferred_variables;
	3079
	3080	/* register_deferred_variable --- add a var name and loading function to the list */
	3081
	3082	void
	3083	register_deferred_variable(const char name, NODE (*load_func)(void))
	3084	{
	3085	struct deferred_variable *dv;
	3086	size_t sl = strlen(name);
	3087
	3088	emalloc(dv, struct deferred_variable , sizeof(dv)+sl,
	3089	"register_deferred_variable");
	3090	dv->load_func = load_func;
	3091	dv->next = deferred_variables;
	3092	memcpy(dv->name, name, sl+1);
	3093	deferred_variables = dv;
	3094	}
	3095
	3096	/* variable --- make sure NAME is in the symbol table */
	3097
	3098	NODE *
	3099	variable(char *name, int can_free, NODETYPE type)
	3100	{
	3101	register NODE *r;
	3102
	3103	if ((r = lookup(name)) != NULL) {
	3104	if (r->type == Node_func)
	3105	fatal(_("function `%s' called with space between name and `(',\nor used as a variable or an array"),
	3106	r->vname);
	3107
	3108	} else {
	3109	/* not found */
	3110	struct deferred_variable *dv;
	3111
	3112	for (dv = deferred_variables; TRUE; dv = dv->next) {
	3113	if (dv == NULL) {
	3114	/*
	3115	* This is the only case in which we may not
	3116	* free the string.
	3117	*/
	3118	NODE *n;
	3119
	3120	if (type == Node_var_array)
	3121	n = node(NULL, type, NULL);
	3122	else
	3123	n = node(Nnull_string, type, NULL);
	3124
	3125	return install(name, n);
	3126	}
	3127	if (STREQ(name, dv->name)) {
	3128	r = (*dv->load_func)();
	3129	break;
	3130	}
	3131	}
	3132	}
	3133	if (can_free)
	3134	free(name);
	3135	return r;
	3136	}
	3137
	3138	/* mk_rexp --- make a regular expression constant */
	3139
	3140	static NODE *
	3141	mk_rexp(NODE *exp)
	3142	{
	3143	NODE *n;
	3144
	3145	if (exp->type == Node_regex)
	3146	return exp;
	3147
	3148	getnode(n);
	3149	n->type = Node_dynregex;
	3150	n->re_exp = exp;
	3151	n->re_text = NULL;
	3152	n->re_reg = NULL;
	3153	n->re_flags = 0;
	3154	n->re_cnt = 1;
	3155	return n;
	3156	}
	3157
	3158	/* isnoeffect --- when used as a statement, has no side effects */
	3159
	3160	/*
	3161	* To be completely general, we should recursively walk the parse
	3162	* tree, to make sure that all the subexpressions also have no effect.
	3163	* Instead, we just weaken the actual warning that's printed, up above
	3164	* in the grammar.
	3165	*/
	3166
	3167	static int
	3168	isnoeffect(NODETYPE type)
	3169	{
	3170	switch (type) {
	3171	case Node_times:
	3172	case Node_quotient:
	3173	case Node_mod:
	3174	case Node_plus:
	3175	case Node_minus:
	3176	case Node_subscript:
	3177	case Node_concat:
	3178	case Node_exp:
	3179	case Node_unary_minus:
	3180	case Node_field_spec:
	3181	case Node_and:
	3182	case Node_or:
	3183	case Node_equal:
	3184	case Node_notequal:
	3185	case Node_less:
	3186	case Node_greater:
	3187	case Node_leq:
	3188	case Node_geq:
	3189	case Node_match:
	3190	case Node_nomatch:
	3191	case Node_not:
	3192	case Node_val:
	3193	case Node_in_array:
	3194	case Node_NF:
	3195	case Node_NR:
	3196	case Node_FNR:
	3197	case Node_FS:
	3198	case Node_RS:
	3199	case Node_FIELDWIDTHS:
	3200	case Node_IGNORECASE:
	3201	case Node_OFS:
	3202	case Node_ORS:
	3203	case Node_OFMT:
	3204	case Node_CONVFMT:
	3205	case Node_BINMODE:
	3206	case Node_LINT:
	3207	case Node_SUBSEP:
	3208	case Node_TEXTDOMAIN:
	3209	return TRUE;
	3210	default:
	3211	break; /* keeps gcc -Wall happy */
	3212	}
	3213
	3214	return FALSE;
	3215	}
	3216
	3217	/* isassignable --- can this node be assigned to? */
	3218
	3219	static int
	3220	isassignable(register NODE *n)
	3221	{
	3222	switch (n->type) {
	3223	case Node_var_new:
	3224	case Node_var:
	3225	case Node_FIELDWIDTHS:
	3226	case Node_RS:
	3227	case Node_FS:
	3228	case Node_FNR:
	3229	case Node_NR:
	3230	case Node_NF:
	3231	case Node_IGNORECASE:
	3232	case Node_OFMT:
	3233	case Node_CONVFMT:
	3234	case Node_ORS:
	3235	case Node_OFS:
	3236	case Node_LINT:
	3237	case Node_BINMODE:
	3238	case Node_SUBSEP:
	3239	case Node_TEXTDOMAIN:
	3240	case Node_field_spec:
	3241	case Node_subscript:
	3242	return TRUE;
	3243	case Node_param_list:
	3244	return ((n->flags & FUNC) == 0); /* ok if not func name */
	3245	default:
	3246	break; /* keeps gcc -Wall happy */
	3247	}
	3248	return FALSE;
	3249	}
	3250
	3251	/* stopme --- for debugging */
	3252
	3253	NODE *
	3254	stopme(NODE *tree ATTRIBUTE_UNUSED)
	3255	{
	3256	return (NODE *) 0;
	3257	}
	3258
	3259	/* dumpintlstr --- write out an initial .po file entry for the string */
	3260
	3261	static void
	3262	dumpintlstr(const char *str, size_t len)
	3263	{
	3264	char *cp;
	3265
	3266	/* See the GNU gettext distribution for details on the file format */
	3267
	3268	if (source != NULL) {
	3269	/* ala the gettext sources, remove leading `./'s */
	3270	for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
	3271	continue;
	3272	printf("#: %s:%d\n", cp, sourceline);
	3273	}
	3274
	3275	printf("msgid ");
	3276	pp_string_fp(stdout, str, len, '"', TRUE);
	3277	putchar('\n');
	3278	printf("msgstr \"\"\n\n");
	3279	fflush(stdout);
	3280	}
	3281
	3282	/* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
	3283
	3284	static void
	3285	dumpintlstr2(const char str1, size_t len1, const char str2, size_t len2)
	3286	{
	3287	char *cp;
	3288
	3289	/* See the GNU gettext distribution for details on the file format */
	3290
	3291	if (source != NULL) {
	3292	/* ala the gettext sources, remove leading `./'s */
	3293	for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
	3294	continue;
	3295	printf("#: %s:%d\n", cp, sourceline);
	3296	}
	3297
	3298	printf("msgid ");
	3299	pp_string_fp(stdout, str1, len1, '"', TRUE);
	3300	putchar('\n');
	3301	printf("msgid_plural ");
	3302	pp_string_fp(stdout, str2, len2, '"', TRUE);
	3303	putchar('\n');
	3304	printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
	3305	fflush(stdout);
	3306	}
	3307
	3308	/* count_args --- count the number of printf arguments */
	3309
	3310	static void
	3311	count_args(NODE *tree)
	3312	{
	3313	size_t count = 0;
	3314	NODE *save_tree;
	3315
	3316	assert(tree->type == Node_K_printf
	3317	\|\| (tree->type == Node_builtin && tree->builtin == do_sprintf));
	3318	save_tree = tree;
	3319
	3320	tree = tree->lnode; /* printf format string */
	3321
	3322	for (count = 0; tree != NULL; tree = tree->rnode)
	3323	count++;
	3324
	3325	save_tree->printf_count = count;
	3326	}
	3327
	3328	/* isarray --- can this type be subscripted? */
	3329
	3330	static int
	3331	isarray(NODE *n)
	3332	{
	3333	switch (n->type) {
	3334	case Node_var_new:
	3335	case Node_var_array:
	3336	return TRUE;
	3337	case Node_param_list:
	3338	return (n->flags & FUNC) == 0;
	3339	case Node_array_ref:
	3340	cant_happen();
	3341	break;
	3342	default:
	3343	break; /* keeps gcc -Wall happy */
	3344	}
	3345
	3346	return FALSE;
	3347	}
	3348
	3349	/* See if name is a special token. */
	3350
	3351	int
	3352	check_special(const char *name)
	3353	{
	3354	int low, high, mid;
	3355	int i;
	3356
	3357	low = 0;
	3358	high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
	3359	while (low <= high) {
	3360	mid = (low + high) / 2;
	3361	i = *name - tokentab[mid].operator[0];
	3362	if (i == 0)
	3363	i = strcmp(name, tokentab[mid].operator);
	3364
	3365	if (i < 0) /* token < mid */
	3366	high = mid - 1;
	3367	else if (i > 0) /* token > mid */
	3368	low = mid + 1;
	3369	else
	3370	return mid;
	3371	}
	3372	return -1;
	3373	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format