source: vendor/gawk/3.1.5/awkgram.y@ 3181

Last change on this file since 3181 was 3076, checked in by bird, 18 years ago

gawk 3.1.5

File size: 78.2 KB
Line 
1/*
2 * awkgram.y --- yacc/bison parser
3 */
4
5/*
6 * Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 */
25
26%{
27#ifdef GAWKDEBUG
28#define YYDEBUG 12
29#endif
30
31#include "awk.h"
32
33#define CAN_FREE TRUE
34#define DONT_FREE FALSE
35
36#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
37static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
38#else
39static void yyerror(); /* va_alist */
40#endif
41static char *get_src_buf P((void));
42static int yylex P((void));
43static NODE *node_common P((NODETYPE op));
44static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
45static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
46static NODE *append_right P((NODE *list, NODE *new));
47static inline NODE *append_pattern P((NODE **list, NODE *patt));
48static void func_install P((NODE *params, NODE *def));
49static void pop_var P((NODE *np, int freeit));
50static void pop_params P((NODE *params));
51static NODE *make_param P((char *name));
52static NODE *mk_rexp P((NODE *exp));
53static int dup_parms P((NODE *func));
54static void param_sanity P((NODE *arglist));
55static int parms_shadow P((const char *fname, NODE *func));
56static int isnoeffect P((NODETYPE t));
57static int isassignable P((NODE *n));
58static void dumpintlstr P((const char *str, size_t len));
59static void dumpintlstr2 P((const char *str1, size_t len1, const char *str2, size_t len2));
60static void count_args P((NODE *n));
61static int isarray P((NODE *n));
62
63enum defref { FUNC_DEFINE, FUNC_USE };
64static void func_use P((const char *name, enum defref how));
65static void check_funcs P((void));
66
67static int want_regexp; /* lexical scanning kludge */
68static int can_return; /* parsing kludge */
69static int begin_or_end_rule = FALSE; /* parsing kludge */
70static int parsing_end_rule = FALSE; /* for warnings */
71static int in_print = FALSE; /* lexical scanning kludge for print */
72static int in_parens = 0; /* lexical scanning kludge for print */
73static char *lexptr; /* pointer to next char during parsing */
74static char *lexend;
75static char *lexptr_begin; /* keep track of where we were for error msgs */
76static char *lexeme; /* beginning of lexeme for debugging */
77static char *thisline = NULL;
78#define YYDEBUG_LEXER_TEXT (lexeme)
79static int param_counter;
80static char *tokstart = NULL;
81static char *tok = NULL;
82static char *tokend;
83
84static long func_count; /* total number of functions */
85
86#define HASHSIZE 1021 /* this constant only used here */
87NODE *variables[HASHSIZE];
88static int var_count; /* total number of global variables */
89
90extern char *source;
91extern int sourceline;
92extern struct src *srcfiles;
93extern long numfiles;
94extern int errcount;
95extern NODE *begin_block;
96extern NODE *end_block;
97
98/*
99 * This string cannot occur as a real awk identifier.
100 * Use it as a special token to make function parsing
101 * uniform, but if it's seen, don't install the function.
102 * e.g.
103 * function split(x) { return x }
104 * function x(a) { return a }
105 * should only produce one error message, and not core dump.
106 */
107static char builtin_func[] = "@builtin";
108%}
109
110%union {
111 long lval;
112 AWKNUM fval;
113 NODE *nodeval;
114 NODETYPE nodetypeval;
115 char *sval;
116 NODE *(*ptrval) P((void));
117}
118
119%type <nodeval> function_prologue pattern action variable param_list
120%type <nodeval> exp common_exp
121%type <nodeval> simp_exp non_post_simp_exp
122%type <nodeval> expression_list opt_expression_list print_expression_list
123%type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list
124%type <nodeval> simple_stmt opt_simple_stmt
125%type <nodeval> opt_exp opt_variable regexp
126%type <nodeval> input_redir output_redir
127%type <nodetypeval> print
128%type <nodetypeval> assign_operator a_relop relop_or_less
129%type <sval> func_name
130%type <lval> lex_builtin
131
132%token <sval> FUNC_CALL NAME REGEXP
133%token <lval> ERROR
134%token <nodeval> YNUMBER YSTRING
135%token <nodetypeval> RELOP IO_OUT IO_IN
136%token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP
137%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
138%token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
139%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
140%token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
141%token <nodetypeval> LEX_IN
142%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
143%token <lval> LEX_BUILTIN LEX_LENGTH
144%token NEWLINE
145
146/* these are just yylval numbers */
147
148/* Lowest to highest */
149%right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
150%right '?' ':'
151%left LEX_OR
152%left LEX_AND
153%left LEX_GETLINE
154%nonassoc LEX_IN
155%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
156%nonassoc ','
157%nonassoc MATCHOP
158%nonassoc RELOP '<' '>' IO_IN IO_OUT
159%left CONCAT_OP
160%left YSTRING YNUMBER
161%left '+' '-'
162%left '*' '/' '%'
163%right '!' UNARY
164%right '^'
165%left INCREMENT DECREMENT
166%left '$'
167%left '(' ')'
168%%
169
170start
171 : opt_nls program opt_nls
172 {
173 check_funcs();
174 }
175 ;
176
177program
178 : /* empty */
179 | program rule
180 {
181 begin_or_end_rule = parsing_end_rule = FALSE;
182 yyerrok;
183 }
184 | program error
185 {
186 begin_or_end_rule = parsing_end_rule = FALSE;
187 /*
188 * If errors, give up, don't produce an infinite
189 * stream of syntax error messages.
190 */
191 /* yyerrok; */
192 }
193 ;
194
195rule
196 : pattern action
197 {
198 $1->rnode = $2;
199 }
200 | pattern statement_term
201 {
202 if ($1->lnode != NULL) {
203 /* pattern rule with non-empty pattern */
204 $1->rnode = node(NULL, Node_K_print_rec, NULL);
205 } else {
206 /* an error */
207 if (begin_or_end_rule)
208 msg(_("%s blocks must have an action part"),
209 (parsing_end_rule ? "END" : "BEGIN"));
210 else
211 msg(_("each rule must have a pattern or an action part"));
212 errcount++;
213 }
214 }
215 | function_prologue action
216 {
217 can_return = FALSE;
218 if ($1)
219 func_install($1, $2);
220 yyerrok;
221 }
222 ;
223
224pattern
225 : /* empty */
226 {
227 $$ = append_pattern(&expression_value, (NODE *) NULL);
228 }
229 | exp
230 {
231 $$ = append_pattern(&expression_value, $1);
232 }
233 | exp ',' exp
234 {
235 NODE *r;
236
237 getnode(r);
238 r->type = Node_line_range;
239 r->condpair = node($1, Node_cond_pair, $3);
240 r->triggered = FALSE;
241 $$ = append_pattern(&expression_value, r);
242 }
243 | LEX_BEGIN
244 {
245 begin_or_end_rule = TRUE;
246 $$ = append_pattern(&begin_block, (NODE *) NULL);
247 }
248 | LEX_END
249 {
250 begin_or_end_rule = parsing_end_rule = TRUE;
251 $$ = append_pattern(&end_block, (NODE *) NULL);
252 }
253 ;
254
255action
256 : l_brace statements r_brace opt_semi opt_nls
257 { $$ = $2; }
258 ;
259
260func_name
261 : NAME
262 { $$ = $1; }
263 | FUNC_CALL
264 { $$ = $1; }
265 | lex_builtin
266 {
267 yyerror(_("`%s' is a built-in function, it cannot be redefined"),
268 tokstart);
269 errcount++;
270 $$ = builtin_func;
271 /* yyerrok; */
272 }
273 ;
274
275lex_builtin
276 : LEX_BUILTIN
277 | LEX_LENGTH
278 ;
279
280function_prologue
281 : LEX_FUNCTION
282 {
283 param_counter = 0;
284 }
285 func_name '(' opt_param_list r_paren opt_nls
286 {
287 NODE *t;
288
289 t = make_param($3);
290 t->flags |= FUNC;
291 $$ = append_right(t, $5);
292 can_return = TRUE;
293 /* check for duplicate parameter names */
294 if (dup_parms($$))
295 errcount++;
296 }
297 ;
298
299regexp
300 /*
301 * In this rule, want_regexp tells yylex that the next thing
302 * is a regexp so it should read up to the closing slash.
303 */
304 : a_slash
305 { ++want_regexp; }
306 REGEXP /* The terminating '/' is consumed by yylex(). */
307 {
308 NODE *n;
309 size_t len = strlen($3);
310
311 if (do_lint) {
312 if (len == 0)
313 lintwarn(_("regexp constant `//' looks like a C++ comment, but is not"));
314 else if (($3)[0] == '*' && ($3)[len-1] == '*')
315 /* possible C comment */
316 lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart);
317 }
318 getnode(n);
319 n->type = Node_regex;
320 n->re_exp = make_string($3, len);
321 n->re_reg = make_regexp($3, len, FALSE, TRUE);
322 n->re_text = NULL;
323 n->re_flags = CONST;
324 n->re_cnt = 1;
325 $$ = n;
326 }
327 ;
328
329a_slash
330 : '/'
331 | SLASH_BEFORE_EQUAL
332 ;
333
334statements
335 : /* empty */
336 { $$ = NULL; }
337 | statements statement
338 {
339 if ($2 == NULL)
340 $$ = $1;
341 else {
342 if (do_lint && isnoeffect($2->type))
343 lintwarn(_("statement may have no effect"));
344 if ($1 == NULL)
345 $$ = $2;
346 else
347 $$ = append_right(
348 ($1->type == Node_statement_list ? $1
349 : node($1, Node_statement_list, (NODE *) NULL)),
350 ($2->type == Node_statement_list ? $2
351 : node($2, Node_statement_list, (NODE *) NULL)));
352 }
353 yyerrok;
354 }
355 | statements error
356 { $$ = NULL; }
357 ;
358
359statement_term
360 : nls
361 | semi opt_nls
362 ;
363
364statement
365 : semi opt_nls
366 { $$ = NULL; }
367 | l_brace statements r_brace
368 { $$ = $2; }
369 | if_statement
370 { $$ = $1; }
371 | LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace
372 { $$ = node($3, Node_K_switch, $7); }
373 | LEX_WHILE '(' exp r_paren opt_nls statement
374 { $$ = node($3, Node_K_while, $6); }
375 | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
376 { $$ = node($6, Node_K_do, $3); }
377 | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
378 {
379 /*
380 * Efficiency hack. Recognize the special case of
381 *
382 * for (iggy in foo)
383 * delete foo[iggy]
384 *
385 * and treat it as if it were
386 *
387 * delete foo
388 *
389 * Check that the body is a `delete a[i]' statement,
390 * and that both the loop var and array names match.
391 */
392 if ($8 != NULL && $8->type == Node_K_delete && $8->rnode != NULL) {
393 NODE *arr, *sub;
394
395 assert($8->rnode->type == Node_expression_list);
396 arr = $8->lnode; /* array var */
397 sub = $8->rnode->lnode; /* index var */
398
399 if ( (arr->type == Node_var_new
400 || arr->type == Node_var_array
401 || arr->type == Node_param_list)
402 && (sub->type == Node_var_new
403 || sub->type == Node_var
404 || sub->type == Node_param_list)
405 && strcmp($3, sub->vname) == 0
406 && strcmp($5, arr->vname) == 0) {
407 $8->type = Node_K_delete_loop;
408 $$ = $8;
409 free($3); /* thanks to valgrind for pointing these out */
410 free($5);
411 }
412 else
413 goto regular_loop;
414 } else {
415 regular_loop:
416 $$ = node($8, Node_K_arrayfor,
417 make_for_loop(variable($3, CAN_FREE, Node_var),
418 (NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
419 }
420 }
421 | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
422 {
423 $$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9));
424 }
425 | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
426 {
427 $$ = node($11, Node_K_for,
428 (NODE *) make_for_loop($3, (NODE *) NULL, $8));
429 }
430 | LEX_BREAK statement_term
431 /* for break, maybe we'll have to remember where to break to */
432 { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); }
433 | LEX_CONTINUE statement_term
434 /* similarly */
435 { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); }
436 | LEX_NEXT statement_term
437 { NODETYPE type;
438
439 if (begin_or_end_rule)
440 yyerror(_("`%s' used in %s action"), "next",
441 (parsing_end_rule ? "END" : "BEGIN"));
442 type = Node_K_next;
443 $$ = node((NODE *) NULL, type, (NODE *) NULL);
444 }
445 | LEX_NEXTFILE statement_term
446 {
447 if (do_traditional) {
448 /*
449 * can't use yyerror, since may have overshot
450 * the source line
451 */
452 errcount++;
453 error(_("`nextfile' is a gawk extension"));
454 }
455 if (do_lint)
456 lintwarn(_("`nextfile' is a gawk extension"));
457 if (begin_or_end_rule) {
458 /* same thing */
459 errcount++;
460 error(_("`%s' used in %s action"), "nextfile",
461 (parsing_end_rule ? "END" : "BEGIN"));
462 }
463 $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL);
464 }
465 | LEX_EXIT opt_exp statement_term
466 { $$ = node($2, Node_K_exit, (NODE *) NULL); }
467 | LEX_RETURN
468 {
469 if (! can_return)
470 yyerror(_("`return' used outside function context"));
471 }
472 opt_exp statement_term
473 {
474 $$ = node($3 == NULL ? Nnull_string : $3,
475 Node_K_return, (NODE *) NULL);
476 }
477 | simple_stmt statement_term
478 ;
479
480 /*
481 * A simple_stmt exists to satisfy a constraint in the POSIX
482 * grammar allowing them to occur as the 1st and 3rd parts
483 * in a `for (...;...;...)' loop. This is a historical oddity
484 * inherited from Unix awk, not at all documented in the AK&W
485 * awk book. We support it, as this was reported as a bug.
486 * We don't bother to document it though. So there.
487 */
488simple_stmt
489 : print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir
490 {
491 /*
492 * Optimization: plain `print' has no expression list, so $3 is null.
493 * If $3 is an expression list with one element (rnode == null)
494 * and lnode is a field spec for field 0, we have `print $0'.
495 * For both, use Node_K_print_rec, which is faster for these two cases.
496 */
497 if ($1 == Node_K_print &&
498 ($3 == NULL
499 || ($3->type == Node_expression_list
500 && $3->rnode == NULL
501 && $3->lnode->type == Node_field_spec
502 && $3->lnode->lnode->type == Node_val
503 && $3->lnode->lnode->numbr == 0.0))
504 ) {
505 static int warned = FALSE;
506
507 $$ = node(NULL, Node_K_print_rec, $4);
508
509 if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) {
510 warned = TRUE;
511 lintwarn(
512 _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
513 }
514 } else {
515 $$ = node($3, $1, $4);
516 if ($$->type == Node_K_printf)
517 count_args($$);
518 }
519 }
520 | LEX_DELETE NAME '[' expression_list ']'
521 { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
522 | LEX_DELETE NAME
523 {
524 if (do_lint)
525 lintwarn(_("`delete array' is a gawk extension"));
526 if (do_traditional) {
527 /*
528 * can't use yyerror, since may have overshot
529 * the source line
530 */
531 errcount++;
532 error(_("`delete array' is a gawk extension"));
533 }
534 $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
535 }
536 | LEX_DELETE '(' NAME ')'
537 {
538 /* this is for tawk compatibility. maybe the warnings should always be done. */
539 if (do_lint)
540 lintwarn(_("`delete(array)' is a non-portable tawk extension"));
541 if (do_traditional) {
542 /*
543 * can't use yyerror, since may have overshot
544 * the source line
545 */
546 errcount++;
547 error(_("`delete(array)' is a non-portable tawk extension"));
548 }
549 $$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
550 }
551 | exp
552 { $$ = $1; }
553 ;
554
555opt_simple_stmt
556 : /* empty */
557 { $$ = NULL; }
558 | simple_stmt
559 { $$ = $1; }
560 ;
561
562switch_body
563 : case_statements
564 {
565 if ($1 == NULL) {
566 $$ = NULL;
567 } else {
568 NODE *dflt = NULL;
569 NODE *head = $1;
570 NODE *curr;
571
572 const char **case_values = NULL;
573
574 int maxcount = 128;
575 int case_count = 0;
576 int i;
577
578 emalloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body");
579 for (curr = $1; curr != NULL; curr = curr->rnode) {
580 /* Assure that case statement values are unique. */
581 if (curr->lnode->type == Node_K_case) {
582 char *caseval;
583
584 if (curr->lnode->lnode->type == Node_regex)
585 caseval = curr->lnode->lnode->re_exp->stptr;
586 else
587 caseval = force_string(tree_eval(curr->lnode->lnode))->stptr;
588
589 for (i = 0; i < case_count; i++)
590 if (strcmp(caseval, case_values[i]) == 0)
591 yyerror(_("duplicate case values in switch body: %s"), caseval);
592
593 if (case_count >= maxcount) {
594 maxcount += 128;
595 erealloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body");
596 }
597 case_values[case_count++] = caseval;
598 } else {
599 /* Otherwise save a pointer to the default node. */
600 if (dflt != NULL)
601 yyerror(_("Duplicate `default' detected in switch body"));
602 dflt = curr;
603 }
604 }
605
606 free(case_values);
607
608 /* Create the switch body. */
609 $$ = node(head, Node_switch_body, dflt);
610 }
611 }
612 ;
613
614case_statements
615 : /* empty */
616 { $$ = NULL; }
617 | case_statements case_statement
618 {
619 if ($2 == NULL)
620 $$ = $1;
621 else {
622 if (do_lint && isnoeffect($2->type))
623 lintwarn(_("statement may have no effect"));
624 if ($1 == NULL)
625 $$ = node($2, Node_case_list, (NODE *) NULL);
626 else
627 $$ = append_right(
628 ($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)),
629 ($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL))
630 );
631 }
632 yyerrok;
633 }
634 | case_statements error
635 { $$ = NULL; }
636 ;
637
638case_statement
639 : LEX_CASE case_value colon opt_nls statements
640 { $$ = node($2, Node_K_case, $5); }
641 | LEX_DEFAULT colon opt_nls statements
642 { $$ = node((NODE *) NULL, Node_K_default, $4); }
643 ;
644
645case_value
646 : YNUMBER
647 { $$ = $1; }
648 | '-' YNUMBER %prec UNARY
649 {
650 $2->numbr = -(force_number($2));
651 $$ = $2;
652 }
653 | '+' YNUMBER %prec UNARY
654 { $$ = $2; }
655 | YSTRING
656 { $$ = $1; }
657 | regexp
658 { $$ = $1; }
659 ;
660
661print
662 : LEX_PRINT
663 | LEX_PRINTF
664 ;
665
666 /*
667 * Note: ``print(x)'' is already parsed by the first rule,
668 * so there is no good in covering it by the second one too.
669 */
670print_expression_list
671 : opt_expression_list
672 | '(' exp comma expression_list r_paren
673 { $$ = node($2, Node_expression_list, $4); }
674 ;
675
676output_redir
677 : /* empty */
678 {
679 in_print = FALSE;
680 in_parens = 0;
681 $$ = NULL;
682 }
683 | IO_OUT { in_print = FALSE; in_parens = 0; } common_exp
684 {
685 $$ = node($3, $1, (NODE *) NULL);
686 if ($1 == Node_redirect_twoway
687 && $3->type == Node_K_getline
688 && $3->rnode != NULL
689 && $3->rnode->type == Node_redirect_twoway)
690 yyerror(_("multistage two-way pipelines don't work"));
691 }
692 ;
693
694if_statement
695 : LEX_IF '(' exp r_paren opt_nls statement
696 {
697 $$ = node($3, Node_K_if,
698 node($6, Node_if_branches, (NODE *) NULL));
699 }
700 | LEX_IF '(' exp r_paren opt_nls statement
701 LEX_ELSE opt_nls statement
702 { $$ = node($3, Node_K_if,
703 node($6, Node_if_branches, $9)); }
704 ;
705
706nls
707 : NEWLINE
708 | nls NEWLINE
709 ;
710
711opt_nls
712 : /* empty */
713 | nls
714 ;
715
716input_redir
717 : /* empty */
718 { $$ = NULL; }
719 | '<' simp_exp
720 { $$ = node($2, Node_redirect_input, (NODE *) NULL); }
721 ;
722
723opt_param_list
724 : /* empty */
725 { $$ = NULL; }
726 | param_list
727 { $$ = $1; }
728 ;
729
730param_list
731 : NAME
732 { $$ = make_param($1); }
733 | param_list comma NAME
734 { $$ = append_right($1, make_param($3)); yyerrok; }
735 | error
736 { $$ = NULL; }
737 | param_list error
738 { $$ = NULL; }
739 | param_list comma error
740 { $$ = NULL; }
741 ;
742
743/* optional expression, as in for loop */
744opt_exp
745 : /* empty */
746 { $$ = NULL; }
747 | exp
748 { $$ = $1; }
749 ;
750
751opt_expression_list
752 : /* empty */
753 { $$ = NULL; }
754 | expression_list
755 { $$ = $1; }
756 ;
757
758expression_list
759 : exp
760 { $$ = node($1, Node_expression_list, (NODE *) NULL); }
761 | expression_list comma exp
762 {
763 $$ = append_right($1,
764 node($3, Node_expression_list, (NODE *) NULL));
765 yyerrok;
766 }
767 | error
768 { $$ = NULL; }
769 | expression_list error
770 { $$ = NULL; }
771 | expression_list error exp
772 { $$ = NULL; }
773 | expression_list comma error
774 { $$ = NULL; }
775 ;
776
777/* Expressions, not including the comma operator. */
778exp : variable assign_operator exp %prec ASSIGNOP
779 {
780 if (do_lint && $3->type == Node_regex)
781 lintwarn(_("regular expression on right of assignment"));
782 /*
783 * Optimization of `x = x y'. Can save lots of time
784 * if done a lot.
785 */
786 if (( $1->type == Node_var
787 || $1->type == Node_var_new
788 || $1->type == Node_param_list)
789 && $2 == Node_assign
790 && $3->type == Node_concat
791 && $3->lnode == $1) {
792 $3->type = Node_assign_concat; /* Just change the type */
793 $$ = $3; /* And use it directly */
794 } else
795 $$ = node($1, $2, $3);
796 }
797 | exp LEX_AND exp
798 { $$ = node($1, Node_and, $3); }
799 | exp LEX_OR exp
800 { $$ = node($1, Node_or, $3); }
801 | exp MATCHOP exp
802 {
803 if ($1->type == Node_regex)
804 warning(_("regular expression on left of `~' or `!~' operator"));
805 $$ = node($1, $2, mk_rexp($3));
806 }
807 | exp LEX_IN NAME
808 { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
809 | exp a_relop exp %prec RELOP
810 {
811 if (do_lint && $3->type == Node_regex)
812 lintwarn(_("regular expression on right of comparison"));
813 $$ = node($1, $2, $3);
814 }
815 | exp '?' exp ':' exp
816 { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
817 | common_exp
818 { $$ = $1; }
819 ;
820
821assign_operator
822 : ASSIGN
823 { $$ = $1; }
824 | ASSIGNOP
825 { $$ = $1; }
826 | SLASH_BEFORE_EQUAL ASSIGN /* `/=' */
827 { $$ = Node_assign_quotient; }
828 ;
829
830relop_or_less
831 : RELOP
832 { $$ = $1; }
833 | '<'
834 { $$ = Node_less; }
835 ;
836a_relop
837 : relop_or_less
838 | '>'
839 { $$ = Node_greater; }
840 ;
841
842common_exp
843 : regexp
844 { $$ = $1; }
845 | '!' regexp %prec UNARY
846 {
847 $$ = node(node(make_number(0.0),
848 Node_field_spec,
849 (NODE *) NULL),
850 Node_nomatch,
851 $2);
852 }
853 | '(' expression_list r_paren LEX_IN NAME
854 { $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); }
855 | simp_exp
856 { $$ = $1; }
857 | common_exp simp_exp %prec CONCAT_OP
858 { $$ = node($1, Node_concat, $2); }
859 ;
860
861simp_exp
862 : non_post_simp_exp
863 /* Binary operators in order of decreasing precedence. */
864 | simp_exp '^' simp_exp
865 { $$ = node($1, Node_exp, $3); }
866 | simp_exp '*' simp_exp
867 { $$ = node($1, Node_times, $3); }
868 | simp_exp '/' simp_exp
869 { $$ = node($1, Node_quotient, $3); }
870 | simp_exp '%' simp_exp
871 { $$ = node($1, Node_mod, $3); }
872 | simp_exp '+' simp_exp
873 { $$ = node($1, Node_plus, $3); }
874 | simp_exp '-' simp_exp
875 { $$ = node($1, Node_minus, $3); }
876 | LEX_GETLINE opt_variable input_redir
877 {
878 if (do_lint && parsing_end_rule && $3 == NULL)
879 lintwarn(_("non-redirected `getline' undefined inside END action"));
880 $$ = node($2, Node_K_getline, $3);
881 }
882 | simp_exp IO_IN LEX_GETLINE opt_variable
883 {
884 $$ = node($4, Node_K_getline,
885 node($1, $2, (NODE *) NULL));
886 }
887 | variable INCREMENT
888 { $$ = node($1, Node_postincrement, (NODE *) NULL); }
889 | variable DECREMENT
890 { $$ = node($1, Node_postdecrement, (NODE *) NULL); }
891 ;
892
893non_post_simp_exp
894 : '!' simp_exp %prec UNARY
895 { $$ = node($2, Node_not, (NODE *) NULL); }
896 | '(' exp r_paren
897 { $$ = $2; }
898 | LEX_BUILTIN
899 '(' opt_expression_list r_paren
900 { $$ = snode($3, Node_builtin, (int) $1); }
901 | LEX_LENGTH '(' opt_expression_list r_paren
902 { $$ = snode($3, Node_builtin, (int) $1); }
903 | LEX_LENGTH
904 {
905 if (do_lint)
906 lintwarn(_("call of `length' without parentheses is not portable"));
907 $$ = snode((NODE *) NULL, Node_builtin, (int) $1);
908 if (do_posix)
909 warning(_("call of `length' without parentheses is deprecated by POSIX"));
910 }
911 | FUNC_CALL '(' opt_expression_list r_paren
912 {
913 $$ = node($3, Node_func_call, make_string($1, strlen($1)));
914 $$->funcbody = NULL;
915 func_use($1, FUNC_USE);
916 param_sanity($3);
917 free($1);
918 }
919 | variable
920 | INCREMENT variable
921 { $$ = node($2, Node_preincrement, (NODE *) NULL); }
922 | DECREMENT variable
923 { $$ = node($2, Node_predecrement, (NODE *) NULL); }
924 | YNUMBER
925 { $$ = $1; }
926 | YSTRING
927 { $$ = $1; }
928
929 | '-' simp_exp %prec UNARY
930 {
931 if ($2->type == Node_val && ($2->flags & (STRCUR|STRING)) == 0) {
932 $2->numbr = -(force_number($2));
933 $$ = $2;
934 } else
935 $$ = node($2, Node_unary_minus, (NODE *) NULL);
936 }
937 | '+' simp_exp %prec UNARY
938 {
939 /*
940 * was: $$ = $2
941 * POSIX semantics: force a conversion to numeric type
942 */
943 $$ = node (make_number(0.0), Node_plus, $2);
944 }
945 ;
946
947opt_variable
948 : /* empty */
949 { $$ = NULL; }
950 | variable
951 { $$ = $1; }
952 ;
953
954variable
955 : NAME
956 { $$ = variable($1, CAN_FREE, Node_var_new); }
957 | NAME '[' expression_list ']'
958 {
959 NODE *n;
960
961 if ((n = lookup($1)) != NULL && ! isarray(n))
962 yyerror(_("use of non-array as array"));
963 else if ($3 == NULL) {
964 fatal(_("invalid subscript expression"));
965 } else if ($3->rnode == NULL) {
966 $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
967 freenode($3);
968 } else
969 $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
970 }
971 | '$' non_post_simp_exp
972 { $$ = node($2, Node_field_spec, (NODE *) NULL); }
973/*
974#if 0
975 | lex_builtin
976 { fatal(_("can't use built-in function `%s' as a variable"), tokstart); }
977#endif
978*/
979 ;
980
981l_brace
982 : '{' opt_nls
983 ;
984
985r_brace
986 : '}' opt_nls { yyerrok; }
987 ;
988
989r_paren
990 : ')' { yyerrok; }
991 ;
992
993opt_semi
994 : /* empty */
995 | semi
996 ;
997
998semi
999 : ';' { yyerrok; }
1000 ;
1001
1002colon
1003 : ':' { yyerrok; }
1004 ;
1005
1006comma : ',' opt_nls { yyerrok; }
1007 ;
1008
1009%%
1010
1011struct token {
1012 const char *operator; /* text to match */
1013 NODETYPE value; /* node type */
1014 int class; /* lexical class */
1015 unsigned flags; /* # of args. allowed and compatability */
1016# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
1017# define A(n) (1<<(n))
1018# define VERSION_MASK 0xFF00 /* old awk is zero */
1019# define NOT_OLD 0x0100 /* feature not in old awk */
1020# define NOT_POSIX 0x0200 /* feature not in POSIX */
1021# define GAWKX 0x0400 /* gawk extension */
1022# define RESX 0x0800 /* Bell Labs Research extension */
1023 NODE *(*ptr) P((NODE *)); /* function that implements this keyword */
1024};
1025
1026/* Tokentab is sorted ascii ascending order, so it can be binary searched. */
1027/* Function pointers come from declarations in awk.h. */
1028
1029static const struct token tokentab[] = {
1030{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
1031{"END", Node_illegal, LEX_END, 0, 0},
1032#ifdef ARRAYDEBUG
1033{"adump", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump},
1034#endif
1035{"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and},
1036{"asort", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asort},
1037{"asorti", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asorti},
1038{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
1039{"bindtextdomain", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain},
1040{"break", Node_K_break, LEX_BREAK, 0, 0},
1041#ifdef ALLOW_SWITCH
1042{"case", Node_K_case, LEX_CASE, GAWKX, 0},
1043#endif
1044{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close},
1045{"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl},
1046{"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
1047{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
1048{"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext},
1049{"dcngettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext},
1050#ifdef ALLOW_SWITCH
1051{"default", Node_K_default, LEX_DEFAULT, GAWKX, 0},
1052#endif
1053{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
1054{"do", Node_K_do, LEX_DO, NOT_OLD, 0},
1055{"else", Node_illegal, LEX_ELSE, 0, 0},
1056{"exit", Node_K_exit, LEX_EXIT, 0, 0},
1057{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
1058{"extension", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext},
1059{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush},
1060{"for", Node_K_for, LEX_FOR, 0, 0},
1061{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
1062{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
1063{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
1064{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
1065{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
1066{"if", Node_K_if, LEX_IF, 0, 0},
1067{"in", Node_illegal, LEX_IN, 0, 0},
1068{"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
1069{"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
1070{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},
1071{"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
1072{"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift},
1073{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match},
1074{"mktime", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime},
1075{"next", Node_K_next, LEX_NEXT, 0, 0},
1076{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
1077{"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or},
1078{"print", Node_K_print, LEX_PRINT, 0, 0},
1079{"printf", Node_K_printf, LEX_PRINTF, 0, 0},
1080{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
1081{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
1082{"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift},
1083{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
1084{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
1085{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
1086{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
1087{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
1088#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
1089{"stopme", Node_builtin, LEX_BUILTIN, GAWKX|A(0), stopme},
1090#endif
1091{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime},
1092{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
1093{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
1094{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
1095#ifdef ALLOW_SWITCH
1096{"switch", Node_K_switch, LEX_SWITCH, GAWKX, 0},
1097#endif
1098{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
1099{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
1100{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
1101{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
1102{"while", Node_K_while, LEX_WHILE, 0, 0},
1103{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor},
1104};
1105
1106#ifdef MBS_SUPPORT
1107/* Variable containing the current shift state. */
1108static mbstate_t cur_mbstate;
1109/* Ring buffer containing current characters. */
1110#define MAX_CHAR_IN_RING_BUFFER 8
1111#define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
1112static char cur_char_ring[RING_BUFFER_SIZE];
1113/* Index for ring buffers. */
1114static int cur_ring_idx;
1115/* This macro means that last nextc() return a singlebyte character
1116 or 1st byte of a multibyte character. */
1117#define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
1118#else /* MBS_SUPPORT */
1119/* a dummy */
1120#define nextc_is_1stbyte 1
1121#endif /* MBS_SUPPORT */
1122
1123/* getfname --- return name of a builtin function (for pretty printing) */
1124
1125const char *
1126getfname(register NODE *(*fptr)(NODE *))
1127{
1128 register int i, j;
1129
1130 j = sizeof(tokentab) / sizeof(tokentab[0]);
1131 /* linear search, no other way to do it */
1132 for (i = 0; i < j; i++)
1133 if (tokentab[i].ptr == fptr)
1134 return tokentab[i].operator;
1135
1136 return NULL;
1137}
1138
1139/* yyerror --- print a syntax error message, show where */
1140
1141/*
1142 * Function identifier purposely indented to avoid mangling
1143 * by ansi2knr. Sigh.
1144 */
1145
1146static void
1147#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
1148 yyerror(const char *m, ...)
1149#else
1150/* VARARGS0 */
1151 yyerror(va_alist)
1152 va_dcl
1153#endif
1154{
1155 va_list args;
1156 const char *mesg = NULL;
1157 register char *bp, *cp;
1158 char *scan;
1159 char *buf;
1160 int count;
1161 static char end_of_file_line[] = "(END OF FILE)";
1162 char save;
1163
1164 errcount++;
1165 /* Find the current line in the input file */
1166 if (lexptr && lexeme) {
1167 if (thisline == NULL) {
1168 cp = lexeme;
1169 if (*cp == '\n') {
1170 cp--;
1171 mesg = _("unexpected newline or end of string");
1172 }
1173 for (; cp != lexptr_begin && *cp != '\n'; --cp)
1174 continue;
1175 if (*cp == '\n')
1176 cp++;
1177 thisline = cp;
1178 }
1179 /* NL isn't guaranteed */
1180 bp = lexeme;
1181 while (bp < lexend && *bp && *bp != '\n')
1182 bp++;
1183 } else {
1184 thisline = end_of_file_line;
1185 bp = thisline + strlen(thisline);
1186 }
1187
1188 /*
1189 * Saving and restoring *bp keeps valgrind happy,
1190 * since the guts of glibc uses strlen, even though
1191 * we're passing an explict precision. Sigh.
1192 *
1193 * 8/2003: We may not need this anymore.
1194 */
1195 save = *bp;
1196 *bp = '\0';
1197
1198 msg("%.*s", (int) (bp - thisline), thisline);
1199
1200 *bp = save;
1201
1202#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
1203 va_start(args, m);
1204 if (mesg == NULL)
1205 mesg = m;
1206#else
1207 va_start(args);
1208 if (mesg == NULL)
1209 mesg = va_arg(args, char *);
1210#endif
1211 count = (bp - thisline) + strlen(mesg) + 2 + 1;
1212 emalloc(buf, char *, count, "yyerror");
1213
1214 bp = buf;
1215
1216 if (lexptr != NULL) {
1217 scan = thisline;
1218 while (scan < lexeme)
1219 if (*scan++ == '\t')
1220 *bp++ = '\t';
1221 else
1222 *bp++ = ' ';
1223 *bp++ = '^';
1224 *bp++ = ' ';
1225 }
1226 strcpy(bp, mesg);
1227 err("", buf, args);
1228 va_end(args);
1229 free(buf);
1230}
1231
1232/* get_src_buf --- read the next buffer of source program */
1233
1234static char *
1235get_src_buf()
1236{
1237 static int samefile = FALSE;
1238 static int nextfile = 0;
1239 static char *buf = NULL;
1240 static size_t buflen = 0;
1241 static int fd;
1242
1243 int n;
1244 register char *scan;
1245 int newfile;
1246 struct stat sbuf;
1247 int readcount = 0;
1248 int l;
1249 char *readloc;
1250
1251again:
1252 newfile = FALSE;
1253 if (nextfile > numfiles)
1254 return NULL;
1255
1256 if (srcfiles[nextfile].stype == CMDLINE) {
1257 if ((l = strlen(srcfiles[nextfile].val)) == 0) {
1258 /*
1259 * Yet Another Special case:
1260 * gawk '' /path/name
1261 * Sigh.
1262 */
1263 static int warned = FALSE;
1264
1265 if (do_lint && ! warned) {
1266 warned = TRUE;
1267 lintwarn(_("empty program text on command line"));
1268 }
1269 ++nextfile;
1270 goto again;
1271 }
1272 if (srcfiles[nextfile].val[l-1] == '\n') {
1273 /* has terminating newline, can use it directly */
1274 sourceline = 1;
1275 lexptr = lexptr_begin = srcfiles[nextfile].val;
1276 /* fall through to pointer adjustment and return, below */
1277 } else {
1278 /* copy it into static buffer */
1279
1280 /* make sure buffer exists and has room */
1281 if (buflen == 0) {
1282 emalloc(buf, char *, l+2, "get_src_buf");
1283 buflen = l + 2;
1284 } else if (l+2 > buflen) {
1285 erealloc(buf, char *, l+2, "get_src_buf");
1286 buflen = l + 2;
1287 } /* else
1288 buffer has room, just use it */
1289
1290 /* copy in data */
1291 memcpy(buf, srcfiles[nextfile].val, l);
1292 buf[l] = '\n';
1293 buf[++l] = '\0';
1294
1295 /* set vars and return */
1296 lexptr = lexptr_begin = buf;
1297 }
1298 lexend = lexptr + l;
1299 nextfile++; /* for next entry to this routine */
1300 return lexptr;
1301 }
1302
1303 if (! samefile) {
1304 source = srcfiles[nextfile].val;
1305 if (source == NULL) { /* read all the source files, all done */
1306 if (buf != NULL) {
1307 free(buf);
1308 buf = NULL;
1309 }
1310 buflen = 0;
1311 return lexeme = lexptr = lexptr_begin = NULL;
1312 }
1313 fd = pathopen(source);
1314 if (fd <= INVALID_HANDLE) {
1315 char *in;
1316
1317 /* suppress file name and line no. in error mesg */
1318 in = source;
1319 source = NULL;
1320 fatal(_("can't open source file `%s' for reading (%s)"),
1321 in, strerror(errno));
1322 }
1323 l = optimal_bufsize(fd, & sbuf);
1324 /*
1325 * Make sure that something silly like
1326 * AWKBUFSIZE=8 make check
1327 * works ok.
1328 */
1329#define A_DECENT_BUFFER_SIZE 128
1330 if (l < A_DECENT_BUFFER_SIZE)
1331 l = A_DECENT_BUFFER_SIZE;
1332#undef A_DECENT_BUFFER_SIZE
1333
1334 newfile = TRUE;
1335
1336 /* make sure buffer exists and has room */
1337 if (buflen == 0) {
1338 emalloc(buf, char *, l+2, "get_src_buf");
1339 buflen = l + 2;
1340 } else if (l+2 > buflen) {
1341 erealloc(buf, char *, l+2, "get_src_buf");
1342 buflen = l + 2;
1343 } /* else
1344 buffer has room, just use it */
1345
1346 readcount = l;
1347 readloc = lexeme = lexptr = lexptr_begin = buf;
1348 samefile = TRUE;
1349 sourceline = 1;
1350 } else {
1351 /*
1352 * In same file, ran off edge of buffer.
1353 * Shift current line down to front, adjust
1354 * pointers and fill in the rest of the buffer.
1355 */
1356
1357 int lexeme_offset = lexeme - lexptr_begin;
1358 int lexptr_offset = lexptr - lexptr_begin;
1359 int lexend_offset = lexend - lexptr_begin;
1360
1361 /* find beginning of current line */
1362 for (scan = lexeme; scan >= lexptr_begin; scan--) {
1363 if (*scan == '\n') {
1364 scan++;
1365 break;
1366 }
1367 }
1368
1369 if (scan <= buf) {
1370 /* have to grow the buffer */
1371 buflen *= 2;
1372 erealloc(buf, char *, buflen, "get_src_buf");
1373 } else {
1374 /* shift things down */
1375 memmove(buf, scan, lexend - scan);
1376 /*
1377 * make offsets relative to start of line,
1378 * not start of buffer.
1379 */
1380 lexend_offset = lexend - scan;
1381 lexeme_offset = lexeme - scan;
1382 lexptr_offset = lexptr - scan;
1383 }
1384
1385 /* adjust pointers */
1386 lexeme = buf + lexeme_offset;
1387 lexptr = buf + lexptr_offset;
1388 lexend = buf + lexend_offset;
1389 lexptr_begin = buf;
1390 readcount = buflen - (lexend - buf);
1391 readloc = lexend;
1392 }
1393
1394 /* add more data to buffer */
1395 n = read(fd, readloc, readcount);
1396 if (n == -1)
1397 fatal(_("can't read sourcefile `%s' (%s)"),
1398 source, strerror(errno));
1399 if (n == 0) {
1400 if (newfile) {
1401 static int warned = FALSE;
1402
1403 if (do_lint && ! warned) {
1404 warned = TRUE;
1405 lintwarn(_("source file `%s' is empty"), source);
1406 }
1407 }
1408 if (fd != fileno(stdin)) /* safety */
1409 close(fd);
1410 samefile = FALSE;
1411 nextfile++;
1412 goto again;
1413 }
1414 lexend = lexptr + n;
1415 return lexptr;
1416}
1417
1418/* tokadd --- add a character to the token buffer */
1419
1420#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
1421
1422/* tokexpand --- grow the token buffer */
1423
1424char *
1425tokexpand()
1426{
1427 static int toksize = 60;
1428 int tokoffset;
1429
1430 tokoffset = tok - tokstart;
1431 toksize *= 2;
1432 if (tokstart != NULL)
1433 erealloc(tokstart, char *, toksize, "tokexpand");
1434 else
1435 emalloc(tokstart, char *, toksize, "tokexpand");
1436 tokend = tokstart + toksize;
1437 tok = tokstart + tokoffset;
1438 return tok;
1439}
1440
1441/* nextc --- get the next input character */
1442
1443#ifdef MBS_SUPPORT
1444
1445static int
1446nextc(void)
1447{
1448 if (gawk_mb_cur_max > 1) {
1449 if (!lexptr || lexptr >= lexend) {
1450 if (! get_src_buf())
1451 return EOF;
1452 }
1453
1454 /* Update the buffer index. */
1455 cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
1456 cur_ring_idx + 1;
1457
1458 /* Did we already check the current character? */
1459 if (cur_char_ring[cur_ring_idx] == 0) {
1460 /* No, we need to check the next character on the buffer. */
1461 int idx, work_ring_idx = cur_ring_idx;
1462 mbstate_t tmp_state;
1463 size_t mbclen;
1464
1465 for (idx = 0 ; lexptr + idx < lexend ; idx++) {
1466 tmp_state = cur_mbstate;
1467 mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
1468
1469 if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
1470 /* It is a singlebyte character, non-complete multibyte
1471 character or EOF. We treat it as a singlebyte
1472 character. */
1473 cur_char_ring[work_ring_idx] = 1;
1474 break;
1475 } else if (mbclen == (size_t)-2) {
1476 /* It is not a complete multibyte character. */
1477 cur_char_ring[work_ring_idx] = idx + 1;
1478 } else {
1479 /* mbclen > 1 */
1480 cur_char_ring[work_ring_idx] = mbclen;
1481 break;
1482 }
1483 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
1484 0 : work_ring_idx + 1;
1485 }
1486 cur_mbstate = tmp_state;
1487
1488 /* Put a mark on the position on which we write next character. */
1489 work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
1490 0 : work_ring_idx + 1;
1491 cur_char_ring[work_ring_idx] = 0;
1492 }
1493
1494 return (int) (unsigned char) *lexptr++;
1495 }
1496 else {
1497 int c;
1498
1499 if (lexptr && lexptr < lexend)
1500 c = (int) (unsigned char) *lexptr++;
1501 else if (get_src_buf())
1502 c = (int) (unsigned char) *lexptr++;
1503 else
1504 c = EOF;
1505
1506 return c;
1507 }
1508}
1509
1510#else /* MBS_SUPPORT */
1511
1512#if GAWKDEBUG
1513int
1514nextc(void)
1515{
1516 int c;
1517
1518 if (lexptr && lexptr < lexend)
1519 c = (int) (unsigned char) *lexptr++;
1520 else if (get_src_buf())
1521 c = (int) (unsigned char) *lexptr++;
1522 else
1523 c = EOF;
1524
1525 return c;
1526}
1527#else
1528#define nextc() ((lexptr && lexptr < lexend) ? \
1529 ((int) (unsigned char) *lexptr++) : \
1530 (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \
1531 )
1532#endif
1533
1534#endif /* MBS_SUPPORT */
1535
1536/* pushback --- push a character back on the input */
1537
1538static inline void
1539pushback(void)
1540{
1541#ifdef MBS_SUPPORT
1542 if (gawk_mb_cur_max > 1)
1543 cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
1544 cur_ring_idx - 1;
1545#endif
1546 (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
1547}
1548
1549
1550/* allow_newline --- allow newline after &&, ||, ? and : */
1551
1552static void
1553allow_newline(void)
1554{
1555 int c;
1556
1557 for (;;) {
1558 c = nextc();
1559 if (c == EOF)
1560 break;
1561 if (c == '#') {
1562 while ((c = nextc()) != '\n' && c != EOF)
1563 continue;
1564 if (c == EOF)
1565 break;
1566 }
1567 if (c == '\n')
1568 sourceline++;
1569 if (! ISSPACE(c)) {
1570 pushback();
1571 break;
1572 }
1573 }
1574}
1575
1576/* yylex --- Read the input and turn it into tokens. */
1577
1578static int
1579yylex(void)
1580{
1581 register int c;
1582 int seen_e = FALSE; /* These are for numbers */
1583 int seen_point = FALSE;
1584 int esc_seen; /* for literal strings */
1585 int mid;
1586 static int did_newline = FALSE;
1587 char *tokkey;
1588 static int lasttok = 0, eof_warned = FALSE;
1589 int inhex = FALSE;
1590 int intlstr = FALSE;
1591
1592 if (nextc() == EOF) {
1593 if (lasttok != NEWLINE) {
1594 lasttok = NEWLINE;
1595 if (do_lint && ! eof_warned) {
1596 lintwarn(_("source file does not end in newline"));
1597 eof_warned = TRUE;
1598 }
1599 return NEWLINE; /* fake it */
1600 }
1601 return 0;
1602 }
1603 pushback();
1604#if defined OS2 || defined __EMX__
1605 /*
1606 * added for OS/2's extproc feature of cmd.exe
1607 * (like #! in BSD sh)
1608 */
1609 if (strncasecmp(lexptr, "extproc ", 8) == 0) {
1610 while (*lexptr && *lexptr != '\n')
1611 lexptr++;
1612 }
1613#endif
1614 lexeme = lexptr;
1615 thisline = NULL;
1616 if (want_regexp) {
1617 int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
1618 /*
1619 * Counting brackets is non-trivial. [[] is ok,
1620 * and so is [\]], with a point being that /[/]/ as a regexp
1621 * constant has to work.
1622 *
1623 * Do not count [ or ] if either one is preceded by a \.
1624 * A `[' should be counted if
1625 * a) it is the first one so far (in_brack == 0)
1626 * b) it is the `[' in `[:'
1627 * A ']' should be counted if not preceded by a \, since
1628 * it is either closing `:]' or just a plain list.
1629 * According to POSIX, []] is how you put a ] into a set.
1630 * Try to handle that too.
1631 *
1632 * The code for \ handles \[ and \].
1633 */
1634
1635 want_regexp = FALSE;
1636 tok = tokstart;
1637 for (;;) {
1638 c = nextc();
1639
1640 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
1641 case '[':
1642 /* one day check for `.' and `=' too */
1643 if (nextc() == ':' || in_brack == 0)
1644 in_brack++;
1645 pushback();
1646 break;
1647 case ']':
1648 if (tokstart[0] == '['
1649 && (tok == tokstart + 1
1650 || (tok == tokstart + 2
1651 && tokstart[1] == '^')))
1652 /* do nothing */;
1653 else
1654 in_brack--;
1655 break;
1656 case '\\':
1657 if ((c = nextc()) == EOF) {
1658 yyerror(_("unterminated regexp ends with `\\' at end of file"));
1659 goto end_regexp; /* kludge */
1660 } else if (c == '\n') {
1661 sourceline++;
1662 continue;
1663 } else {
1664 tokadd('\\');
1665 tokadd(c);
1666 continue;
1667 }
1668 break;
1669 case '/': /* end of the regexp */
1670 if (in_brack > 0)
1671 break;
1672end_regexp:
1673 tokadd('\0');
1674 yylval.sval = tokstart;
1675 if (do_lint) {
1676 int peek = nextc();
1677
1678 pushback();
1679 if (peek == 'i' || peek == 's') {
1680 if (source)
1681 lintwarn(
1682 _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
1683 source, sourceline, peek);
1684 else
1685 lintwarn(
1686 _("tawk regex modifier `/.../%c' doesn't work in gawk"),
1687 peek);
1688 }
1689 }
1690 return lasttok = REGEXP;
1691 case '\n':
1692 pushback();
1693 yyerror(_("unterminated regexp"));
1694 goto end_regexp; /* kludge */
1695 case EOF:
1696 yyerror(_("unterminated regexp at end of file"));
1697 goto end_regexp; /* kludge */
1698 }
1699 tokadd(c);
1700 }
1701 }
1702retry:
1703
1704 /* skipping \r is a hack, but windows is just too pervasive. sigh. */
1705 while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
1706 continue;
1707
1708 lexeme = lexptr ? lexptr - 1 : lexptr;
1709 thisline = NULL;
1710 tok = tokstart;
1711 yylval.nodetypeval = Node_illegal;
1712
1713 if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
1714 case EOF:
1715 if (lasttok != NEWLINE) {
1716 lasttok = NEWLINE;
1717 if (do_lint && ! eof_warned) {
1718 lintwarn(_("source file does not end in newline"));
1719 eof_warned = TRUE;
1720 }
1721 return NEWLINE; /* fake it */
1722 }
1723 return 0;
1724
1725 case '\n':
1726 sourceline++;
1727 return lasttok = NEWLINE;
1728
1729 case '#': /* it's a comment */
1730 while ((c = nextc()) != '\n') {
1731 if (c == EOF) {
1732 if (lasttok != NEWLINE) {
1733 lasttok = NEWLINE;
1734 if (do_lint && ! eof_warned) {
1735 lintwarn(
1736 _("source file does not end in newline"));
1737 eof_warned = TRUE;
1738 }
1739 return NEWLINE; /* fake it */
1740 }
1741 return 0;
1742 }
1743 }
1744 sourceline++;
1745 return lasttok = NEWLINE;
1746
1747 case '\\':
1748#ifdef RELAXED_CONTINUATION
1749 /*
1750 * This code puports to allow comments and/or whitespace
1751 * after the `\' at the end of a line used for continuation.
1752 * Use it at your own risk. We think it's a bad idea, which
1753 * is why it's not on by default.
1754 */
1755 if (! do_traditional) {
1756 /* strip trailing white-space and/or comment */
1757 while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
1758 continue;
1759 if (c == '#') {
1760 if (do_lint)
1761 lintwarn(
1762 _("use of `\\ #...' line continuation is not portable"));
1763 while ((c = nextc()) != '\n')
1764 if (c == EOF)
1765 break;
1766 }
1767 pushback();
1768 }
1769#endif /* RELAXED_CONTINUATION */
1770 if (nextc() == '\n') {
1771 sourceline++;
1772 goto retry;
1773 } else {
1774 yyerror(_("backslash not last character on line"));
1775 exit(1);
1776 }
1777 break;
1778
1779 case ':':
1780 case '?':
1781 if (! do_posix)
1782 allow_newline();
1783 return lasttok = c;
1784
1785 /*
1786 * in_parens is undefined unless we are parsing a print
1787 * statement (in_print), but why bother with a check?
1788 */
1789 case ')':
1790 in_parens--;
1791 return lasttok = c;
1792
1793 case '(':
1794 in_parens++;
1795 /* FALL THROUGH */
1796 case '$':
1797 case ';':
1798 case '{':
1799 case ',':
1800 case '[':
1801 case ']':
1802 return lasttok = c;
1803
1804 case '*':
1805 if ((c = nextc()) == '=') {
1806 yylval.nodetypeval = Node_assign_times;
1807 return lasttok = ASSIGNOP;
1808 } else if (do_posix) {
1809 pushback();
1810 return lasttok = '*';
1811 } else if (c == '*') {
1812 /* make ** and **= aliases for ^ and ^= */
1813 static int did_warn_op = FALSE, did_warn_assgn = FALSE;
1814
1815 if (nextc() == '=') {
1816 if (! did_warn_assgn) {
1817 did_warn_assgn = TRUE;
1818 if (do_lint)
1819 lintwarn(_("POSIX does not allow operator `**='"));
1820 if (do_lint_old)
1821 warning(_("old awk does not support operator `**='"));
1822 }
1823 yylval.nodetypeval = Node_assign_exp;
1824 return ASSIGNOP;
1825 } else {
1826 pushback();
1827 if (! did_warn_op) {
1828 did_warn_op = TRUE;
1829 if (do_lint)
1830 lintwarn(_("POSIX does not allow operator `**'"));
1831 if (do_lint_old)
1832 warning(_("old awk does not support operator `**'"));
1833 }
1834 return lasttok = '^';
1835 }
1836 }
1837 pushback();
1838 return lasttok = '*';
1839
1840 case '/':
1841 if (nextc() == '=') {
1842 pushback();
1843 return lasttok = SLASH_BEFORE_EQUAL;
1844 }
1845 pushback();
1846 return lasttok = '/';
1847
1848 case '%':
1849 if (nextc() == '=') {
1850 yylval.nodetypeval = Node_assign_mod;
1851 return lasttok = ASSIGNOP;
1852 }
1853 pushback();
1854 return lasttok = '%';
1855
1856 case '^':
1857 {
1858 static int did_warn_op = FALSE, did_warn_assgn = FALSE;
1859
1860 if (nextc() == '=') {
1861 if (do_lint_old && ! did_warn_assgn) {
1862 did_warn_assgn = TRUE;
1863 warning(_("operator `^=' is not supported in old awk"));
1864 }
1865 yylval.nodetypeval = Node_assign_exp;
1866 return lasttok = ASSIGNOP;
1867 }
1868 pushback();
1869 if (do_lint_old && ! did_warn_op) {
1870 did_warn_op = TRUE;
1871 warning(_("operator `^' is not supported in old awk"));
1872 }
1873 return lasttok = '^';
1874 }
1875
1876 case '+':
1877 if ((c = nextc()) == '=') {
1878 yylval.nodetypeval = Node_assign_plus;
1879 return lasttok = ASSIGNOP;
1880 }
1881 if (c == '+')
1882 return lasttok = INCREMENT;
1883 pushback();
1884 return lasttok = '+';
1885
1886 case '!':
1887 if ((c = nextc()) == '=') {
1888 yylval.nodetypeval = Node_notequal;
1889 return lasttok = RELOP;
1890 }
1891 if (c == '~') {
1892 yylval.nodetypeval = Node_nomatch;
1893 return lasttok = MATCHOP;
1894 }
1895 pushback();
1896 return lasttok = '!';
1897
1898 case '<':
1899 if (nextc() == '=') {
1900 yylval.nodetypeval = Node_leq;
1901 return lasttok = RELOP;
1902 }
1903 yylval.nodetypeval = Node_less;
1904 pushback();
1905 return lasttok = '<';
1906
1907 case '=':
1908 if (nextc() == '=') {
1909 yylval.nodetypeval = Node_equal;
1910 return lasttok = RELOP;
1911 }
1912 yylval.nodetypeval = Node_assign;
1913 pushback();
1914 return lasttok = ASSIGN;
1915
1916 case '>':
1917 if ((c = nextc()) == '=') {
1918 yylval.nodetypeval = Node_geq;
1919 return lasttok = RELOP;
1920 } else if (c == '>') {
1921 yylval.nodetypeval = Node_redirect_append;
1922 return lasttok = IO_OUT;
1923 }
1924 pushback();
1925 if (in_print && in_parens == 0) {
1926 yylval.nodetypeval = Node_redirect_output;
1927 return lasttok = IO_OUT;
1928 }
1929 yylval.nodetypeval = Node_greater;
1930 return lasttok = '>';
1931
1932 case '~':
1933 yylval.nodetypeval = Node_match;
1934 return lasttok = MATCHOP;
1935
1936 case '}':
1937 /*
1938 * Added did newline stuff. Easier than
1939 * hacking the grammar.
1940 */
1941 if (did_newline) {
1942 did_newline = FALSE;
1943 return lasttok = c;
1944 }
1945 did_newline++;
1946 --lexptr; /* pick up } next time */
1947 return lasttok = NEWLINE;
1948
1949 case '"':
1950 string:
1951 esc_seen = FALSE;
1952 while ((c = nextc()) != '"') {
1953 if (c == '\n') {
1954 pushback();
1955 yyerror(_("unterminated string"));
1956 exit(1);
1957 }
1958 if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
1959 c == '\\') {
1960 c = nextc();
1961 if (c == '\n') {
1962 sourceline++;
1963 continue;
1964 }
1965 esc_seen = TRUE;
1966 tokadd('\\');
1967 }
1968 if (c == EOF) {
1969 pushback();
1970 yyerror(_("unterminated string"));
1971 exit(1);
1972 }
1973 tokadd(c);
1974 }
1975 yylval.nodeval = make_str_node(tokstart,
1976 tok - tokstart, esc_seen ? SCAN : 0);
1977 yylval.nodeval->flags |= PERM;
1978 if (intlstr) {
1979 yylval.nodeval->flags |= INTLSTR;
1980 intlstr = FALSE;
1981 if (do_intl)
1982 dumpintlstr(yylval.nodeval->stptr,
1983 yylval.nodeval->stlen);
1984 }
1985 return lasttok = YSTRING;
1986
1987 case '-':
1988 if ((c = nextc()) == '=') {
1989 yylval.nodetypeval = Node_assign_minus;
1990 return lasttok = ASSIGNOP;
1991 }
1992 if (c == '-')
1993 return lasttok = DECREMENT;
1994 pushback();
1995 return lasttok = '-';
1996
1997 case '.':
1998 c = nextc();
1999 pushback();
2000 if (! ISDIGIT(c))
2001 return lasttok = '.';
2002 else
2003 c = '.';
2004 /* FALL THROUGH */
2005 case '0':
2006 case '1':
2007 case '2':
2008 case '3':
2009 case '4':
2010 case '5':
2011 case '6':
2012 case '7':
2013 case '8':
2014 case '9':
2015 /* It's a number */
2016 for (;;) {
2017 int gotnumber = FALSE;
2018
2019 tokadd(c);
2020 switch (c) {
2021 case 'x':
2022 case 'X':
2023 if (do_traditional)
2024 goto done;
2025 if (tok == tokstart + 2) {
2026 int peek = nextc();
2027
2028 if (ISXDIGIT(peek)) {
2029 inhex = TRUE;
2030 pushback(); /* following digit */
2031 } else {
2032 pushback(); /* x or X */
2033 goto done;
2034 }
2035 }
2036 break;
2037 case '.':
2038 /* period ends exponent part of floating point number */
2039 if (seen_point || seen_e) {
2040 gotnumber = TRUE;
2041 break;
2042 }
2043 seen_point = TRUE;
2044 break;
2045 case 'e':
2046 case 'E':
2047 if (inhex)
2048 break;
2049 if (seen_e) {
2050 gotnumber = TRUE;
2051 break;
2052 }
2053 seen_e = TRUE;
2054 if ((c = nextc()) == '-' || c == '+') {
2055 int c2 = nextc();
2056
2057 if (ISDIGIT(c2)) {
2058 tokadd(c);
2059 tokadd(c2);
2060 } else {
2061 pushback(); /* non-digit after + or - */
2062 pushback(); /* + or - */
2063 pushback(); /* e or E */
2064 }
2065 } else if (! ISDIGIT(c)) {
2066 pushback(); /* character after e or E */
2067 pushback(); /* e or E */
2068 } else {
2069 pushback(); /* digit */
2070 }
2071 break;
2072 case 'a':
2073 case 'A':
2074 case 'b':
2075 case 'B':
2076 case 'c':
2077 case 'C':
2078 case 'D':
2079 case 'd':
2080 case 'f':
2081 case 'F':
2082 if (do_traditional || ! inhex)
2083 goto done;
2084 /* fall through */
2085 case '0':
2086 case '1':
2087 case '2':
2088 case '3':
2089 case '4':
2090 case '5':
2091 case '6':
2092 case '7':
2093 case '8':
2094 case '9':
2095 break;
2096 default:
2097 done:
2098 gotnumber = TRUE;
2099 }
2100 if (gotnumber)
2101 break;
2102 c = nextc();
2103 }
2104 if (c != EOF)
2105 pushback();
2106 else if (do_lint && ! eof_warned) {
2107 lintwarn(_("source file does not end in newline"));
2108 eof_warned = TRUE;
2109 }
2110 tokadd('\0');
2111 if (! do_traditional && isnondecimal(tokstart, FALSE)) {
2112 if (do_lint) {
2113 if (ISDIGIT(tokstart[1])) /* not an 'x' or 'X' */
2114 lintwarn("numeric constant `%.*s' treated as octal",
2115 (int) strlen(tokstart)-1, tokstart);
2116 else if (tokstart[1] == 'x' || tokstart[1] == 'X')
2117 lintwarn("numeric constant `%.*s' treated as hexadecimal",
2118 (int) strlen(tokstart)-1, tokstart);
2119 }
2120 yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
2121 } else
2122 yylval.nodeval = make_number(atof(tokstart));
2123 yylval.nodeval->flags |= PERM;
2124 return lasttok = YNUMBER;
2125
2126 case '&':
2127 if ((c = nextc()) == '&') {
2128 yylval.nodetypeval = Node_and;
2129 allow_newline();
2130 return lasttok = LEX_AND;
2131 }
2132 pushback();
2133 return lasttok = '&';
2134
2135 case '|':
2136 if ((c = nextc()) == '|') {
2137 yylval.nodetypeval = Node_or;
2138 allow_newline();
2139 return lasttok = LEX_OR;
2140 } else if (! do_traditional && c == '&') {
2141 yylval.nodetypeval = Node_redirect_twoway;
2142 return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
2143 }
2144 pushback();
2145 if (in_print && in_parens == 0) {
2146 yylval.nodetypeval = Node_redirect_pipe;
2147 return lasttok = IO_OUT;
2148 } else {
2149 yylval.nodetypeval = Node_redirect_pipein;
2150 return lasttok = IO_IN;
2151 }
2152 }
2153
2154 if (c != '_' && ! ISALPHA(c)) {
2155 yyerror(_("invalid char '%c' in expression"), c);
2156 exit(1);
2157 }
2158
2159 /*
2160 * Lots of fog here. Consider:
2161 *
2162 * print "xyzzy"$_"foo"
2163 *
2164 * Without the check for ` lasttok != '$' ', this is parsed as
2165 *
2166 * print "xxyzz" $(_"foo")
2167 *
2168 * With the check, it is "correctly" parsed as three
2169 * string concatenations. Sigh. This seems to be
2170 * "more correct", but this is definitely one of those
2171 * occasions where the interactions are funny.
2172 */
2173 if (! do_traditional && c == '_' && lasttok != '$') {
2174 if ((c = nextc()) == '"') {
2175 intlstr = TRUE;
2176 goto string;
2177 }
2178 pushback();
2179 c = '_';
2180 }
2181
2182 /* it's some type of name-type-thing. Find its length. */
2183 tok = tokstart;
2184 while (is_identchar(c)) {
2185 tokadd(c);
2186 c = nextc();
2187 }
2188 tokadd('\0');
2189 emalloc(tokkey, char *, tok - tokstart, "yylex");
2190 memcpy(tokkey, tokstart, tok - tokstart);
2191 if (c != EOF)
2192 pushback();
2193 else if (do_lint && ! eof_warned) {
2194 lintwarn(_("source file does not end in newline"));
2195 eof_warned = TRUE;
2196 }
2197
2198 /* See if it is a special token. */
2199
2200 if ((mid = check_special(tokstart)) >= 0) {
2201 if (do_lint) {
2202 if (tokentab[mid].flags & GAWKX)
2203 lintwarn(_("`%s' is a gawk extension"),
2204 tokentab[mid].operator);
2205 if (tokentab[mid].flags & RESX)
2206 lintwarn(_("`%s' is a Bell Labs extension"),
2207 tokentab[mid].operator);
2208 if (tokentab[mid].flags & NOT_POSIX)
2209 lintwarn(_("POSIX does not allow `%s'"),
2210 tokentab[mid].operator);
2211 }
2212 if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
2213 warning(_("`%s' is not supported in old awk"),
2214 tokentab[mid].operator);
2215 if ((do_traditional && (tokentab[mid].flags & GAWKX))
2216 || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
2217 ;
2218 else {
2219 if (tokentab[mid].class == LEX_BUILTIN
2220 || tokentab[mid].class == LEX_LENGTH)
2221 yylval.lval = mid;
2222 else
2223 yylval.nodetypeval = tokentab[mid].value;
2224 free(tokkey);
2225 return lasttok = tokentab[mid].class;
2226 }
2227 }
2228
2229 yylval.sval = tokkey;
2230 if (*lexptr == '(')
2231 return lasttok = FUNC_CALL;
2232 else {
2233 static short goto_warned = FALSE;
2234
2235#define SMART_ALECK 1
2236 if (SMART_ALECK && do_lint
2237 && ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
2238 goto_warned = TRUE;
2239 lintwarn(_("`goto' considered harmful!\n"));
2240 }
2241 return lasttok = NAME;
2242 }
2243}
2244
2245/* node_common --- common code for allocating a new node */
2246
2247static NODE *
2248node_common(NODETYPE op)
2249{
2250 register NODE *r;
2251
2252 getnode(r);
2253 r->type = op;
2254 r->flags = MALLOC;
2255 /* if lookahead is a NL, lineno is 1 too high */
2256 if (lexeme && lexeme >= lexptr_begin && *lexeme == '\n')
2257 r->source_line = sourceline - 1;
2258 else
2259 r->source_line = sourceline;
2260 r->source_file = source;
2261 return r;
2262}
2263
2264/* node --- allocates a node with defined lnode and rnode. */
2265
2266NODE *
2267node(NODE *left, NODETYPE op, NODE *right)
2268{
2269 register NODE *r;
2270
2271 r = node_common(op);
2272 r->lnode = left;
2273 r->rnode = right;
2274 return r;
2275}
2276
2277/* snode --- allocate a node with defined subnode and builtin for builtin
2278 functions. Checks for arg. count and supplies defaults where
2279 possible. */
2280
2281static NODE *
2282snode(NODE *subn, NODETYPE op, int idx)
2283{
2284 register NODE *r;
2285 register NODE *n;
2286 int nexp = 0;
2287 int args_allowed;
2288
2289 r = node_common(op);
2290
2291 /* traverse expression list to see how many args. given */
2292 for (n = subn; n != NULL; n = n->rnode) {
2293 nexp++;
2294 if (nexp > 5)
2295 break;
2296 }
2297
2298 /* check against how many args. are allowed for this builtin */
2299 args_allowed = tokentab[idx].flags & ARGS;
2300 if (args_allowed && (args_allowed & A(nexp)) == 0)
2301 fatal(_("%d is invalid as number of arguments for %s"),
2302 nexp, tokentab[idx].operator);
2303
2304 r->builtin = tokentab[idx].ptr;
2305
2306 /* special case processing for a few builtins */
2307 if (nexp == 0 && r->builtin == do_length) {
2308 subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
2309 Node_expression_list,
2310 (NODE *) NULL);
2311 } else if (r->builtin == do_match) {
2312 static short warned = FALSE;
2313
2314 if (subn->rnode->lnode->type != Node_regex)
2315 subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
2316
2317 if (subn->rnode->rnode != NULL) { /* 3rd argument there */
2318 if (do_lint && ! warned) {
2319 warned = TRUE;
2320 lintwarn(_("match: third argument is a gawk extension"));
2321 }
2322 if (do_traditional)
2323 fatal(_("match: third argument is a gawk extension"));
2324 }
2325 } else if (r->builtin == do_sub || r->builtin == do_gsub) {
2326 if (subn->lnode->type != Node_regex)
2327 subn->lnode = mk_rexp(subn->lnode);
2328 if (nexp == 2)
2329 append_right(subn, node(node(make_number(0.0),
2330 Node_field_spec,
2331 (NODE *) NULL),
2332 Node_expression_list,
2333 (NODE *) NULL));
2334 else if (subn->rnode->rnode->lnode->type == Node_val) {
2335 if (do_lint)
2336 lintwarn(_("%s: string literal as last arg of substitute has no effect"),
2337 (r->builtin == do_sub) ? "sub" : "gsub");
2338 } else if (! isassignable(subn->rnode->rnode->lnode)) {
2339 yyerror(_("%s third parameter is not a changeable object"),
2340 (r->builtin == do_sub) ? "sub" : "gsub");
2341 }
2342 } else if (r->builtin == do_gensub) {
2343 if (subn->lnode->type != Node_regex)
2344 subn->lnode = mk_rexp(subn->lnode);
2345 if (nexp == 3)
2346 append_right(subn, node(node(make_number(0.0),
2347 Node_field_spec,
2348 (NODE *) NULL),
2349 Node_expression_list,
2350 (NODE *) NULL));
2351 } else if (r->builtin == do_split) {
2352 if (nexp == 2)
2353 append_right(subn,
2354 node(FS_node, Node_expression_list, (NODE *) NULL));
2355 n = subn->rnode->rnode->lnode;
2356 if (n->type != Node_regex)
2357 subn->rnode->rnode->lnode = mk_rexp(n);
2358 if (nexp == 2)
2359 subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
2360 } else if (r->builtin == do_close) {
2361 static short warned = FALSE;
2362
2363 if ( nexp == 2) {
2364 if (do_lint && nexp == 2 && ! warned) {
2365 warned = TRUE;
2366 lintwarn(_("close: second argument is a gawk extension"));
2367 }
2368 if (do_traditional)
2369 fatal(_("close: second argument is a gawk extension"));
2370 }
2371 } else if (do_intl /* --gen-po */
2372 && r->builtin == do_dcgettext /* dcgettext(...) */
2373 && subn->lnode->type == Node_val /* 1st arg is constant */
2374 && (subn->lnode->flags & STRCUR) != 0) { /* it's a string constant */
2375 /* ala xgettext, dcgettext("some string" ...) dumps the string */
2376 NODE *str = subn->lnode;
2377
2378 if ((str->flags & INTLSTR) != 0)
2379 warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
2380 /* don't dump it, the lexer already did */
2381 else
2382 dumpintlstr(str->stptr, str->stlen);
2383 } else if (do_intl /* --gen-po */
2384 && r->builtin == do_dcngettext /* dcngettext(...) */
2385 && subn->lnode->type == Node_val /* 1st arg is constant */
2386 && (subn->lnode->flags & STRCUR) != 0 /* it's a string constant */
2387 && subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */
2388 && (subn->rnode->lnode->flags & STRCUR) != 0) { /* it's a string constant */
2389 /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
2390 NODE *str1 = subn->lnode;
2391 NODE *str2 = subn->rnode->lnode;
2392
2393 if (((str1->flags | str2->flags) & INTLSTR) != 0)
2394 warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
2395 else
2396 dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
2397 }
2398
2399 r->subnode = subn;
2400 if (r->builtin == do_sprintf) {
2401 count_args(r);
2402 r->lnode->printf_count = r->printf_count; /* hack */
2403 }
2404 return r;
2405}
2406
2407/* make_for_loop --- build a for loop */
2408
2409static NODE *
2410make_for_loop(NODE *init, NODE *cond, NODE *incr)
2411{
2412 register FOR_LOOP_HEADER *r;
2413 NODE *n;
2414
2415 emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
2416 getnode(n);
2417 n->type = Node_illegal;
2418 r->init = init;
2419 r->cond = cond;
2420 r->incr = incr;
2421 n->sub.nodep.r.hd = r;
2422 return n;
2423}
2424
2425/* dup_parms --- return TRUE if there are duplicate parameters */
2426
2427static int
2428dup_parms(NODE *func)
2429{
2430 register NODE *np;
2431 const char *fname, **names;
2432 int count, i, j, dups;
2433 NODE *params;
2434
2435 if (func == NULL) /* error earlier */
2436 return TRUE;
2437
2438 fname = func->param;
2439 count = func->param_cnt;
2440 params = func->rnode;
2441
2442 if (count == 0) /* no args, no problem */
2443 return FALSE;
2444
2445 if (params == NULL) /* error earlier */
2446 return TRUE;
2447
2448 emalloc(names, const char **, count * sizeof(char *), "dup_parms");
2449
2450 i = 0;
2451 for (np = params; np != NULL; np = np->rnode) {
2452 if (np->param == NULL) { /* error earlier, give up, go home */
2453 free(names);
2454 return TRUE;
2455 }
2456 names[i++] = np->param;
2457 }
2458
2459 dups = 0;
2460 for (i = 1; i < count; i++) {
2461 for (j = 0; j < i; j++) {
2462 if (strcmp(names[i], names[j]) == 0) {
2463 dups++;
2464 error(
2465 _("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
2466 fname, i+1, names[j], j+1);
2467 }
2468 }
2469 }
2470
2471 free(names);
2472 return (dups > 0 ? TRUE : FALSE);
2473}
2474
2475/* parms_shadow --- check if parameters shadow globals */
2476
2477static int
2478parms_shadow(const char *fname, NODE *func)
2479{
2480 int count, i;
2481 int ret = FALSE;
2482
2483 if (fname == NULL || func == NULL) /* error earlier */
2484 return FALSE;
2485
2486 count = func->lnode->param_cnt;
2487
2488 if (count == 0) /* no args, no problem */
2489 return FALSE;
2490
2491 /*
2492 * Use warning() and not lintwarn() so that can warn
2493 * about all shadowed parameters.
2494 */
2495 for (i = 0; i < count; i++) {
2496 if (lookup(func->parmlist[i]) != NULL) {
2497 warning(
2498 _("function `%s': parameter `%s' shadows global variable"),
2499 fname, func->parmlist[i]);
2500 ret = TRUE;
2501 }
2502 }
2503
2504 return ret;
2505}
2506
2507/*
2508 * install:
2509 * Install a name in the symbol table, even if it is already there.
2510 * Caller must check against redefinition if that is desired.
2511 */
2512
2513NODE *
2514install(char *name, NODE *value)
2515{
2516 register NODE *hp;
2517 register size_t len;
2518 register int bucket;
2519
2520 var_count++;
2521 len = strlen(name);
2522 bucket = hash(name, len, (unsigned long) HASHSIZE);
2523 getnode(hp);
2524 hp->type = Node_hashnode;
2525 hp->hnext = variables[bucket];
2526 variables[bucket] = hp;
2527 hp->hlength = len;
2528 hp->hvalue = value;
2529 hp->hname = name;
2530 hp->hvalue->vname = name;
2531 return hp->hvalue;
2532}
2533
2534/* lookup --- find the most recent hash node for name installed by install */
2535
2536NODE *
2537lookup(const char *name)
2538{
2539 register NODE *bucket;
2540 register size_t len;
2541
2542 len = strlen(name);
2543 for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
2544 bucket != NULL; bucket = bucket->hnext)
2545 if (bucket->hlength == len && STREQN(bucket->hname, name, len))
2546 return bucket->hvalue;
2547
2548 return NULL;
2549}
2550
2551/* var_comp --- compare two variable names */
2552
2553static int
2554var_comp(const void *v1, const void *v2)
2555{
2556 const NODE *const *npp1, *const *npp2;
2557 const NODE *n1, *n2;
2558 int minlen;
2559
2560 npp1 = (const NODE *const *) v1;
2561 npp2 = (const NODE *const *) v2;
2562 n1 = *npp1;
2563 n2 = *npp2;
2564
2565 if (n1->hlength > n2->hlength)
2566 minlen = n1->hlength;
2567 else
2568 minlen = n2->hlength;
2569
2570 return strncmp(n1->hname, n2->hname, minlen);
2571}
2572
2573/* valinfo --- dump var info */
2574
2575static void
2576valinfo(NODE *n, FILE *fp)
2577{
2578 if (n->flags & STRING) {
2579 fprintf(fp, "string (");
2580 pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
2581 fprintf(fp, ")\n");
2582 } else if (n->flags & NUMBER)
2583 fprintf(fp, "number (%.17g)\n", n->numbr);
2584 else if (n->flags & STRCUR) {
2585 fprintf(fp, "string value (");
2586 pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
2587 fprintf(fp, ")\n");
2588 } else if (n->flags & NUMCUR)
2589 fprintf(fp, "number value (%.17g)\n", n->numbr);
2590 else
2591 fprintf(fp, "?? flags %s\n", flags2str(n->flags));
2592}
2593
2594
2595/* dump_vars --- dump the symbol table */
2596
2597void
2598dump_vars(const char *fname)
2599{
2600 int i, j;
2601 NODE **table;
2602 NODE *p;
2603 FILE *fp;
2604
2605 emalloc(table, NODE **, var_count * sizeof(NODE *), "dump_vars");
2606
2607 if (fname == NULL)
2608 fp = stderr;
2609 else if ((fp = fopen(fname, "w")) == NULL) {
2610 warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
2611 warning(_("sending profile to standard error"));
2612 fp = stderr;
2613 }
2614
2615 for (i = j = 0; i < HASHSIZE; i++)
2616 for (p = variables[i]; p != NULL; p = p->hnext)
2617 table[j++] = p;
2618
2619 assert(j == var_count);
2620
2621 /* Shazzam! */
2622 qsort(table, j, sizeof(NODE *), var_comp);
2623
2624 for (i = 0; i < j; i++) {
2625 p = table[i];
2626 if (p->hvalue->type == Node_func)
2627 continue;
2628 fprintf(fp, "%.*s: ", (int) p->hlength, p->hname);
2629 if (p->hvalue->type == Node_var_array)
2630 fprintf(fp, "array, %ld elements\n", p->hvalue->table_size);
2631 else if (p->hvalue->type == Node_var_new)
2632 fprintf(fp, "unused variable\n");
2633 else if (p->hvalue->type == Node_var)
2634 valinfo(p->hvalue->var_value, fp);
2635 else {
2636 NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
2637
2638 valinfo(*lhs, fp);
2639 }
2640 }
2641
2642 if (fp != stderr && fclose(fp) != 0)
2643 warning(_("%s: close failed (%s)"), fname, strerror(errno));
2644
2645 free(table);
2646}
2647
2648/* release_all_vars --- free all variable memory */
2649
2650void
2651release_all_vars()
2652{
2653 int i;
2654 NODE *p, *next;
2655
2656 for (i = 0; i < HASHSIZE; i++)
2657 for (p = variables[i]; p != NULL; p = next) {
2658 next = p->hnext;
2659
2660 if (p->hvalue->type == Node_func)
2661 continue;
2662 else if (p->hvalue->type == Node_var_array)
2663 assoc_clear(p->hvalue);
2664 else if (p->hvalue->type != Node_var_new) {
2665 NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
2666
2667 unref(*lhs);
2668 }
2669 unref(p);
2670 }
2671}
2672
2673/* finfo --- for use in comparison and sorting of function names */
2674
2675struct finfo {
2676 const char *name;
2677 size_t nlen;
2678 NODE *func;
2679};
2680
2681/* fcompare --- comparison function for qsort */
2682
2683static int
2684fcompare(const void *p1, const void *p2)
2685{
2686 const struct finfo *f1, *f2;
2687 int minlen;
2688
2689 f1 = (const struct finfo *) p1;
2690 f2 = (const struct finfo *) p2;
2691
2692 if (f1->nlen > f2->nlen)
2693 minlen = f2->nlen;
2694 else
2695 minlen = f1->nlen;
2696
2697 return strncmp(f1->name, f2->name, minlen);
2698}
2699
2700/* dump_funcs --- print all functions */
2701
2702void
2703dump_funcs()
2704{
2705 int i, j;
2706 NODE *p;
2707 struct finfo *tab = NULL;
2708
2709 /*
2710 * Walk through symbol table countng functions.
2711 * Could be more than func_count if there are
2712 * extension functions.
2713 */
2714 for (i = j = 0; i < HASHSIZE; i++) {
2715 for (p = variables[i]; p != NULL; p = p->hnext) {
2716 if (p->hvalue->type == Node_func) {
2717 j++;
2718 }
2719 }
2720 }
2721
2722 if (j == 0)
2723 return;
2724
2725 emalloc(tab, struct finfo *, j * sizeof(struct finfo), "dump_funcs");
2726
2727 /* now walk again, copying info */
2728 for (i = j = 0; i < HASHSIZE; i++) {
2729 for (p = variables[i]; p != NULL; p = p->hnext) {
2730 if (p->hvalue->type == Node_func) {
2731 tab[j].name = p->hname;
2732 tab[j].nlen = p->hlength;
2733 tab[j].func = p->hvalue;
2734 j++;
2735 }
2736 }
2737 }
2738
2739
2740 /* Shazzam! */
2741 qsort(tab, j, sizeof(struct finfo), fcompare);
2742
2743 for (i = 0; i < j; i++)
2744 pp_func(tab[i].name, tab[i].nlen, tab[i].func);
2745
2746 free(tab);
2747}
2748
2749/* shadow_funcs --- check all functions for parameters that shadow globals */
2750
2751void
2752shadow_funcs()
2753{
2754 int i, j;
2755 NODE *p;
2756 struct finfo *tab;
2757 static int calls = 0;
2758 int shadow = FALSE;
2759
2760 if (func_count == 0)
2761 return;
2762
2763 if (calls++ != 0)
2764 fatal(_("shadow_funcs() called twice!"));
2765
2766 emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "shadow_funcs");
2767
2768 for (i = j = 0; i < HASHSIZE; i++) {
2769 for (p = variables[i]; p != NULL; p = p->hnext) {
2770 if (p->hvalue->type == Node_func) {
2771 tab[j].name = p->hname;
2772 tab[j].nlen = p->hlength;
2773 tab[j].func = p->hvalue;
2774 j++;
2775 }
2776 }
2777 }
2778
2779 assert(j == func_count);
2780
2781 /* Shazzam! */
2782 qsort(tab, func_count, sizeof(struct finfo), fcompare);
2783
2784 for (i = 0; i < j; i++)
2785 shadow |= parms_shadow(tab[i].name, tab[i].func);
2786
2787 free(tab);
2788
2789 /* End with fatal if the user requested it. */
2790 if (shadow && lintfunc != warning)
2791 lintwarn(_("there were shadowed variables."));
2792}
2793
2794/*
2795 * append_right:
2796 * Add new to the rightmost branch of LIST. This uses n^2 time, so we make
2797 * a simple attempt at optimizing it.
2798 */
2799
2800static NODE *
2801append_right(NODE *list, NODE *new)
2802{
2803 register NODE *oldlist;
2804 static NODE *savefront = NULL, *savetail = NULL;
2805
2806 if (list == NULL || new == NULL)
2807 return list;
2808
2809 oldlist = list;
2810 if (savefront == oldlist)
2811 list = savetail; /* Be careful: maybe list->rnode != NULL */
2812 else
2813 savefront = oldlist;
2814
2815 while (list->rnode != NULL)
2816 list = list->rnode;
2817 savetail = list->rnode = new;
2818 return oldlist;
2819}
2820
2821/*
2822 * append_pattern:
2823 * A wrapper around append_right, used for rule lists.
2824 */
2825static inline NODE *
2826append_pattern(NODE **list, NODE *patt)
2827{
2828 NODE *n = node(patt, Node_rule_node, (NODE *) NULL);
2829
2830 if (*list == NULL)
2831 *list = n;
2832 else {
2833 NODE *n1 = node(n, Node_rule_list, (NODE *) NULL);
2834 if ((*list)->type != Node_rule_list)
2835 *list = node(*list, Node_rule_list, n1);
2836 else
2837 (void) append_right(*list, n1);
2838 }
2839 return n;
2840}
2841
2842/*
2843 * func_install:
2844 * check if name is already installed; if so, it had better have Null value,
2845 * in which case def is added as the value. Otherwise, install name with def
2846 * as value.
2847 *
2848 * Extra work, build up and save a list of the parameter names in a table
2849 * and hang it off params->parmlist. This is used to set the `vname' field
2850 * of each function parameter during a function call. See eval.c.
2851 */
2852
2853static void
2854func_install(NODE *params, NODE *def)
2855{
2856 NODE *r, *n, *thisfunc;
2857 char **pnames, *names, *sp;
2858 size_t pcount = 0, space = 0;
2859 int i;
2860
2861 /* check for function foo(foo) { ... }. bleah. */
2862 for (n = params->rnode; n != NULL; n = n->rnode) {
2863 if (strcmp(n->param, params->param) == 0)
2864 fatal(_("function `%s': can't use function name as parameter name"),
2865 params->param);
2866 }
2867
2868 thisfunc = NULL; /* turn off warnings */
2869
2870 /* symbol table managment */
2871 pop_var(params, FALSE);
2872 r = lookup(params->param);
2873 if (r != NULL) {
2874 fatal(_("function name `%s' previously defined"), params->param);
2875 } else if (params->param == builtin_func) /* not a valid function name */
2876 goto remove_params;
2877
2878 /* install the function */
2879 thisfunc = node(params, Node_func, def);
2880 (void) install(params->param, thisfunc);
2881
2882 /* figure out amount of space to allocate for variable names */
2883 for (n = params->rnode; n != NULL; n = n->rnode) {
2884 pcount++;
2885 space += strlen(n->param) + 1;
2886 }
2887
2888 /* allocate it and fill it in */
2889 if (pcount != 0) {
2890 emalloc(names, char *, space, "func_install");
2891 emalloc(pnames, char **, pcount * sizeof(char *), "func_install");
2892 sp = names;
2893 for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) {
2894 pnames[i] = sp;
2895 strcpy(sp, n->param);
2896 sp += strlen(n->param) + 1;
2897 }
2898 thisfunc->parmlist = pnames;
2899 } else {
2900 thisfunc->parmlist = NULL;
2901 }
2902
2903 /* update lint table info */
2904 func_use(params->param, FUNC_DEFINE);
2905
2906 func_count++; /* used by profiling / pretty printer */
2907
2908remove_params:
2909 /* remove params from symbol table */
2910 pop_params(params->rnode);
2911}
2912
2913/* pop_var --- remove a variable from the symbol table */
2914
2915static void
2916pop_var(NODE *np, int freeit)
2917{
2918 register NODE *bucket, **save;
2919 register size_t len;
2920 char *name;
2921
2922 name = np->param;
2923 len = strlen(name);
2924 save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
2925 for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
2926 if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
2927 var_count--;
2928 *save = bucket->hnext;
2929 freenode(bucket);
2930 if (freeit)
2931 free(np->param);
2932 return;
2933 }
2934 save = &(bucket->hnext);
2935 }
2936}
2937
2938/* pop_params --- remove list of function parameters from symbol table */
2939
2940/*
2941 * pop parameters out of the symbol table. do this in reverse order to
2942 * avoid reading freed memory if there were duplicated parameters.
2943 */
2944static void
2945pop_params(NODE *params)
2946{
2947 if (params == NULL)
2948 return;
2949 pop_params(params->rnode);
2950 pop_var(params, TRUE);
2951}
2952
2953/* make_param --- make NAME into a function parameter */
2954
2955static NODE *
2956make_param(char *name)
2957{
2958 NODE *r;
2959
2960 getnode(r);
2961 r->type = Node_param_list;
2962 r->rnode = NULL;
2963 r->param = name;
2964 r->param_cnt = param_counter++;
2965 return (install(name, r));
2966}
2967
2968static struct fdesc {
2969 char *name;
2970 short used;
2971 short defined;
2972 struct fdesc *next;
2973} *ftable[HASHSIZE];
2974
2975/* func_use --- track uses and definitions of functions */
2976
2977static void
2978func_use(const char *name, enum defref how)
2979{
2980 struct fdesc *fp;
2981 int len;
2982 int ind;
2983
2984 len = strlen(name);
2985 ind = hash(name, len, HASHSIZE);
2986
2987 for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
2988 if (strcmp(fp->name, name) == 0) {
2989 if (how == FUNC_DEFINE)
2990 fp->defined++;
2991 else
2992 fp->used++;
2993 return;
2994 }
2995 }
2996
2997 /* not in the table, fall through to allocate a new one */
2998
2999 emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
3000 memset(fp, '\0', sizeof(struct fdesc));
3001 emalloc(fp->name, char *, len + 1, "func_use");
3002 strcpy(fp->name, name);
3003 if (how == FUNC_DEFINE)
3004 fp->defined++;
3005 else
3006 fp->used++;
3007 fp->next = ftable[ind];
3008 ftable[ind] = fp;
3009}
3010
3011/* check_funcs --- verify functions that are called but not defined */
3012
3013static void
3014check_funcs()
3015{
3016 struct fdesc *fp, *next;
3017 int i;
3018
3019 for (i = 0; i < HASHSIZE; i++) {
3020 for (fp = ftable[i]; fp != NULL; fp = fp->next) {
3021#ifdef REALLYMEAN
3022 /* making this the default breaks old code. sigh. */
3023 if (fp->defined == 0) {
3024 error(
3025 _("function `%s' called but never defined"), fp->name);
3026 errcount++;
3027 }
3028#else
3029 if (do_lint && fp->defined == 0)
3030 lintwarn(
3031 _("function `%s' called but never defined"), fp->name);
3032#endif
3033 if (do_lint && fp->used == 0) {
3034 lintwarn(_("function `%s' defined but never called"),
3035 fp->name);
3036 }
3037 }
3038 }
3039
3040 /* now let's free all the memory */
3041 for (i = 0; i < HASHSIZE; i++) {
3042 for (fp = ftable[i]; fp != NULL; fp = next) {
3043 next = fp->next;
3044 free(fp->name);
3045 free(fp);
3046 }
3047 }
3048}
3049
3050/* param_sanity --- look for parameters that are regexp constants */
3051
3052static void
3053param_sanity(NODE *arglist)
3054{
3055 NODE *argp, *arg;
3056 int i;
3057
3058 for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
3059 arg = argp->lnode;
3060 if (arg->type == Node_regex)
3061 warning(_("regexp constant for parameter #%d yields boolean value"), i);
3062 }
3063}
3064
3065/* deferred varibles --- those that are only defined if needed. */
3066
3067/*
3068 * Is there any reason to use a hash table for deferred variables? At the
3069 * moment, there are only 1 to 3 such variables, so it may not be worth
3070 * the overhead. If more modules start using this facility, it should
3071 * probably be converted into a hash table.
3072 */
3073
3074static struct deferred_variable {
3075 NODE *(*load_func)(void);
3076 struct deferred_variable *next;
3077 char name[1]; /* variable-length array */
3078} *deferred_variables;
3079
3080/* register_deferred_variable --- add a var name and loading function to the list */
3081
3082void
3083register_deferred_variable(const char *name, NODE *(*load_func)(void))
3084{
3085 struct deferred_variable *dv;
3086 size_t sl = strlen(name);
3087
3088 emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl,
3089 "register_deferred_variable");
3090 dv->load_func = load_func;
3091 dv->next = deferred_variables;
3092 memcpy(dv->name, name, sl+1);
3093 deferred_variables = dv;
3094}
3095
3096/* variable --- make sure NAME is in the symbol table */
3097
3098NODE *
3099variable(char *name, int can_free, NODETYPE type)
3100{
3101 register NODE *r;
3102
3103 if ((r = lookup(name)) != NULL) {
3104 if (r->type == Node_func)
3105 fatal(_("function `%s' called with space between name and `(',\nor used as a variable or an array"),
3106 r->vname);
3107
3108 } else {
3109 /* not found */
3110 struct deferred_variable *dv;
3111
3112 for (dv = deferred_variables; TRUE; dv = dv->next) {
3113 if (dv == NULL) {
3114 /*
3115 * This is the only case in which we may not
3116 * free the string.
3117 */
3118 NODE *n;
3119
3120 if (type == Node_var_array)
3121 n = node(NULL, type, NULL);
3122 else
3123 n = node(Nnull_string, type, NULL);
3124
3125 return install(name, n);
3126 }
3127 if (STREQ(name, dv->name)) {
3128 r = (*dv->load_func)();
3129 break;
3130 }
3131 }
3132 }
3133 if (can_free)
3134 free(name);
3135 return r;
3136}
3137
3138/* mk_rexp --- make a regular expression constant */
3139
3140static NODE *
3141mk_rexp(NODE *exp)
3142{
3143 NODE *n;
3144
3145 if (exp->type == Node_regex)
3146 return exp;
3147
3148 getnode(n);
3149 n->type = Node_dynregex;
3150 n->re_exp = exp;
3151 n->re_text = NULL;
3152 n->re_reg = NULL;
3153 n->re_flags = 0;
3154 n->re_cnt = 1;
3155 return n;
3156}
3157
3158/* isnoeffect --- when used as a statement, has no side effects */
3159
3160/*
3161 * To be completely general, we should recursively walk the parse
3162 * tree, to make sure that all the subexpressions also have no effect.
3163 * Instead, we just weaken the actual warning that's printed, up above
3164 * in the grammar.
3165 */
3166
3167static int
3168isnoeffect(NODETYPE type)
3169{
3170 switch (type) {
3171 case Node_times:
3172 case Node_quotient:
3173 case Node_mod:
3174 case Node_plus:
3175 case Node_minus:
3176 case Node_subscript:
3177 case Node_concat:
3178 case Node_exp:
3179 case Node_unary_minus:
3180 case Node_field_spec:
3181 case Node_and:
3182 case Node_or:
3183 case Node_equal:
3184 case Node_notequal:
3185 case Node_less:
3186 case Node_greater:
3187 case Node_leq:
3188 case Node_geq:
3189 case Node_match:
3190 case Node_nomatch:
3191 case Node_not:
3192 case Node_val:
3193 case Node_in_array:
3194 case Node_NF:
3195 case Node_NR:
3196 case Node_FNR:
3197 case Node_FS:
3198 case Node_RS:
3199 case Node_FIELDWIDTHS:
3200 case Node_IGNORECASE:
3201 case Node_OFS:
3202 case Node_ORS:
3203 case Node_OFMT:
3204 case Node_CONVFMT:
3205 case Node_BINMODE:
3206 case Node_LINT:
3207 case Node_SUBSEP:
3208 case Node_TEXTDOMAIN:
3209 return TRUE;
3210 default:
3211 break; /* keeps gcc -Wall happy */
3212 }
3213
3214 return FALSE;
3215}
3216
3217/* isassignable --- can this node be assigned to? */
3218
3219static int
3220isassignable(register NODE *n)
3221{
3222 switch (n->type) {
3223 case Node_var_new:
3224 case Node_var:
3225 case Node_FIELDWIDTHS:
3226 case Node_RS:
3227 case Node_FS:
3228 case Node_FNR:
3229 case Node_NR:
3230 case Node_NF:
3231 case Node_IGNORECASE:
3232 case Node_OFMT:
3233 case Node_CONVFMT:
3234 case Node_ORS:
3235 case Node_OFS:
3236 case Node_LINT:
3237 case Node_BINMODE:
3238 case Node_SUBSEP:
3239 case Node_TEXTDOMAIN:
3240 case Node_field_spec:
3241 case Node_subscript:
3242 return TRUE;
3243 case Node_param_list:
3244 return ((n->flags & FUNC) == 0); /* ok if not func name */
3245 default:
3246 break; /* keeps gcc -Wall happy */
3247 }
3248 return FALSE;
3249}
3250
3251/* stopme --- for debugging */
3252
3253NODE *
3254stopme(NODE *tree ATTRIBUTE_UNUSED)
3255{
3256 return (NODE *) 0;
3257}
3258
3259/* dumpintlstr --- write out an initial .po file entry for the string */
3260
3261static void
3262dumpintlstr(const char *str, size_t len)
3263{
3264 char *cp;
3265
3266 /* See the GNU gettext distribution for details on the file format */
3267
3268 if (source != NULL) {
3269 /* ala the gettext sources, remove leading `./'s */
3270 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
3271 continue;
3272 printf("#: %s:%d\n", cp, sourceline);
3273 }
3274
3275 printf("msgid ");
3276 pp_string_fp(stdout, str, len, '"', TRUE);
3277 putchar('\n');
3278 printf("msgstr \"\"\n\n");
3279 fflush(stdout);
3280}
3281
3282/* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
3283
3284static void
3285dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
3286{
3287 char *cp;
3288
3289 /* See the GNU gettext distribution for details on the file format */
3290
3291 if (source != NULL) {
3292 /* ala the gettext sources, remove leading `./'s */
3293 for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
3294 continue;
3295 printf("#: %s:%d\n", cp, sourceline);
3296 }
3297
3298 printf("msgid ");
3299 pp_string_fp(stdout, str1, len1, '"', TRUE);
3300 putchar('\n');
3301 printf("msgid_plural ");
3302 pp_string_fp(stdout, str2, len2, '"', TRUE);
3303 putchar('\n');
3304 printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
3305 fflush(stdout);
3306}
3307
3308/* count_args --- count the number of printf arguments */
3309
3310static void
3311count_args(NODE *tree)
3312{
3313 size_t count = 0;
3314 NODE *save_tree;
3315
3316 assert(tree->type == Node_K_printf
3317 || (tree->type == Node_builtin && tree->builtin == do_sprintf));
3318 save_tree = tree;
3319
3320 tree = tree->lnode; /* printf format string */
3321
3322 for (count = 0; tree != NULL; tree = tree->rnode)
3323 count++;
3324
3325 save_tree->printf_count = count;
3326}
3327
3328/* isarray --- can this type be subscripted? */
3329
3330static int
3331isarray(NODE *n)
3332{
3333 switch (n->type) {
3334 case Node_var_new:
3335 case Node_var_array:
3336 return TRUE;
3337 case Node_param_list:
3338 return (n->flags & FUNC) == 0;
3339 case Node_array_ref:
3340 cant_happen();
3341 break;
3342 default:
3343 break; /* keeps gcc -Wall happy */
3344 }
3345
3346 return FALSE;
3347}
3348
3349/* See if name is a special token. */
3350
3351int
3352check_special(const char *name)
3353{
3354 int low, high, mid;
3355 int i;
3356
3357 low = 0;
3358 high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
3359 while (low <= high) {
3360 mid = (low + high) / 2;
3361 i = *name - tokentab[mid].operator[0];
3362 if (i == 0)
3363 i = strcmp(name, tokentab[mid].operator);
3364
3365 if (i < 0) /* token < mid */
3366 high = mid - 1;
3367 else if (i > 0) /* token > mid */
3368 low = mid + 1;
3369 else
3370 return mid;
3371 }
3372 return -1;
3373}
Note: See TracBrowser for help on using the repository browser.