1 | /*
|
---|
2 | * awkgram.y --- yacc/bison parser
|
---|
3 | */
|
---|
4 |
|
---|
5 | /*
|
---|
6 | * Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
|
---|
7 | *
|
---|
8 | * This file is part of GAWK, the GNU implementation of the
|
---|
9 | * AWK Programming Language.
|
---|
10 | *
|
---|
11 | * GAWK is free software; you can redistribute it and/or modify
|
---|
12 | * it under the terms of the GNU General Public License as published by
|
---|
13 | * the Free Software Foundation; either version 2 of the License, or
|
---|
14 | * (at your option) any later version.
|
---|
15 | *
|
---|
16 | * GAWK is distributed in the hope that it will be useful,
|
---|
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | * GNU General Public License for more details.
|
---|
20 | *
|
---|
21 | * You should have received a copy of the GNU General Public License
|
---|
22 | * along with this program; if not, write to the Free Software
|
---|
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
---|
24 | */
|
---|
25 |
|
---|
26 | %{
|
---|
27 | #ifdef GAWKDEBUG
|
---|
28 | #define YYDEBUG 12
|
---|
29 | #endif
|
---|
30 |
|
---|
31 | #include "awk.h"
|
---|
32 |
|
---|
33 | #define CAN_FREE TRUE
|
---|
34 | #define DONT_FREE FALSE
|
---|
35 |
|
---|
36 | #if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
|
---|
37 | static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1;
|
---|
38 | #else
|
---|
39 | static void yyerror(); /* va_alist */
|
---|
40 | #endif
|
---|
41 | static char *get_src_buf P((void));
|
---|
42 | static int yylex P((void));
|
---|
43 | static NODE *node_common P((NODETYPE op));
|
---|
44 | static NODE *snode P((NODE *subn, NODETYPE op, int sindex));
|
---|
45 | static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr));
|
---|
46 | static NODE *append_right P((NODE *list, NODE *new));
|
---|
47 | static inline NODE *append_pattern P((NODE **list, NODE *patt));
|
---|
48 | static void func_install P((NODE *params, NODE *def));
|
---|
49 | static void pop_var P((NODE *np, int freeit));
|
---|
50 | static void pop_params P((NODE *params));
|
---|
51 | static NODE *make_param P((char *name));
|
---|
52 | static NODE *mk_rexp P((NODE *exp));
|
---|
53 | static int dup_parms P((NODE *func));
|
---|
54 | static void param_sanity P((NODE *arglist));
|
---|
55 | static int parms_shadow P((const char *fname, NODE *func));
|
---|
56 | static int isnoeffect P((NODETYPE t));
|
---|
57 | static int isassignable P((NODE *n));
|
---|
58 | static void dumpintlstr P((const char *str, size_t len));
|
---|
59 | static void dumpintlstr2 P((const char *str1, size_t len1, const char *str2, size_t len2));
|
---|
60 | static void count_args P((NODE *n));
|
---|
61 | static int isarray P((NODE *n));
|
---|
62 |
|
---|
63 | enum defref { FUNC_DEFINE, FUNC_USE };
|
---|
64 | static void func_use P((const char *name, enum defref how));
|
---|
65 | static void check_funcs P((void));
|
---|
66 |
|
---|
67 | static int want_regexp; /* lexical scanning kludge */
|
---|
68 | static int can_return; /* parsing kludge */
|
---|
69 | static int begin_or_end_rule = FALSE; /* parsing kludge */
|
---|
70 | static int parsing_end_rule = FALSE; /* for warnings */
|
---|
71 | static int in_print = FALSE; /* lexical scanning kludge for print */
|
---|
72 | static int in_parens = 0; /* lexical scanning kludge for print */
|
---|
73 | static char *lexptr; /* pointer to next char during parsing */
|
---|
74 | static char *lexend;
|
---|
75 | static char *lexptr_begin; /* keep track of where we were for error msgs */
|
---|
76 | static char *lexeme; /* beginning of lexeme for debugging */
|
---|
77 | static char *thisline = NULL;
|
---|
78 | #define YYDEBUG_LEXER_TEXT (lexeme)
|
---|
79 | static int param_counter;
|
---|
80 | static char *tokstart = NULL;
|
---|
81 | static char *tok = NULL;
|
---|
82 | static char *tokend;
|
---|
83 |
|
---|
84 | static long func_count; /* total number of functions */
|
---|
85 |
|
---|
86 | #define HASHSIZE 1021 /* this constant only used here */
|
---|
87 | NODE *variables[HASHSIZE];
|
---|
88 | static int var_count; /* total number of global variables */
|
---|
89 |
|
---|
90 | extern char *source;
|
---|
91 | extern int sourceline;
|
---|
92 | extern struct src *srcfiles;
|
---|
93 | extern long numfiles;
|
---|
94 | extern int errcount;
|
---|
95 | extern NODE *begin_block;
|
---|
96 | extern NODE *end_block;
|
---|
97 |
|
---|
98 | /*
|
---|
99 | * This string cannot occur as a real awk identifier.
|
---|
100 | * Use it as a special token to make function parsing
|
---|
101 | * uniform, but if it's seen, don't install the function.
|
---|
102 | * e.g.
|
---|
103 | * function split(x) { return x }
|
---|
104 | * function x(a) { return a }
|
---|
105 | * should only produce one error message, and not core dump.
|
---|
106 | */
|
---|
107 | static char builtin_func[] = "@builtin";
|
---|
108 | %}
|
---|
109 |
|
---|
110 | %union {
|
---|
111 | long lval;
|
---|
112 | AWKNUM fval;
|
---|
113 | NODE *nodeval;
|
---|
114 | NODETYPE nodetypeval;
|
---|
115 | char *sval;
|
---|
116 | NODE *(*ptrval) P((void));
|
---|
117 | }
|
---|
118 |
|
---|
119 | %type <nodeval> function_prologue pattern action variable param_list
|
---|
120 | %type <nodeval> exp common_exp
|
---|
121 | %type <nodeval> simp_exp non_post_simp_exp
|
---|
122 | %type <nodeval> expression_list opt_expression_list print_expression_list
|
---|
123 | %type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list
|
---|
124 | %type <nodeval> simple_stmt opt_simple_stmt
|
---|
125 | %type <nodeval> opt_exp opt_variable regexp
|
---|
126 | %type <nodeval> input_redir output_redir
|
---|
127 | %type <nodetypeval> print
|
---|
128 | %type <nodetypeval> assign_operator a_relop relop_or_less
|
---|
129 | %type <sval> func_name
|
---|
130 | %type <lval> lex_builtin
|
---|
131 |
|
---|
132 | %token <sval> FUNC_CALL NAME REGEXP
|
---|
133 | %token <lval> ERROR
|
---|
134 | %token <nodeval> YNUMBER YSTRING
|
---|
135 | %token <nodetypeval> RELOP IO_OUT IO_IN
|
---|
136 | %token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP
|
---|
137 | %token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
|
---|
138 | %token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
|
---|
139 | %token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
|
---|
140 | %token <nodetypeval> LEX_GETLINE LEX_NEXTFILE
|
---|
141 | %token <nodetypeval> LEX_IN
|
---|
142 | %token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
|
---|
143 | %token <lval> LEX_BUILTIN LEX_LENGTH
|
---|
144 | %token NEWLINE
|
---|
145 |
|
---|
146 | /* these are just yylval numbers */
|
---|
147 |
|
---|
148 | /* Lowest to highest */
|
---|
149 | %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL
|
---|
150 | %right '?' ':'
|
---|
151 | %left LEX_OR
|
---|
152 | %left LEX_AND
|
---|
153 | %left LEX_GETLINE
|
---|
154 | %nonassoc LEX_IN
|
---|
155 | %left FUNC_CALL LEX_BUILTIN LEX_LENGTH
|
---|
156 | %nonassoc ','
|
---|
157 | %nonassoc MATCHOP
|
---|
158 | %nonassoc RELOP '<' '>' IO_IN IO_OUT
|
---|
159 | %left CONCAT_OP
|
---|
160 | %left YSTRING YNUMBER
|
---|
161 | %left '+' '-'
|
---|
162 | %left '*' '/' '%'
|
---|
163 | %right '!' UNARY
|
---|
164 | %right '^'
|
---|
165 | %left INCREMENT DECREMENT
|
---|
166 | %left '$'
|
---|
167 | %left '(' ')'
|
---|
168 | %%
|
---|
169 |
|
---|
170 | start
|
---|
171 | : opt_nls program opt_nls
|
---|
172 | {
|
---|
173 | check_funcs();
|
---|
174 | }
|
---|
175 | ;
|
---|
176 |
|
---|
177 | program
|
---|
178 | : /* empty */
|
---|
179 | | program rule
|
---|
180 | {
|
---|
181 | begin_or_end_rule = parsing_end_rule = FALSE;
|
---|
182 | yyerrok;
|
---|
183 | }
|
---|
184 | | program error
|
---|
185 | {
|
---|
186 | begin_or_end_rule = parsing_end_rule = FALSE;
|
---|
187 | /*
|
---|
188 | * If errors, give up, don't produce an infinite
|
---|
189 | * stream of syntax error messages.
|
---|
190 | */
|
---|
191 | /* yyerrok; */
|
---|
192 | }
|
---|
193 | ;
|
---|
194 |
|
---|
195 | rule
|
---|
196 | : pattern action
|
---|
197 | {
|
---|
198 | $1->rnode = $2;
|
---|
199 | }
|
---|
200 | | pattern statement_term
|
---|
201 | {
|
---|
202 | if ($1->lnode != NULL) {
|
---|
203 | /* pattern rule with non-empty pattern */
|
---|
204 | $1->rnode = node(NULL, Node_K_print_rec, NULL);
|
---|
205 | } else {
|
---|
206 | /* an error */
|
---|
207 | if (begin_or_end_rule)
|
---|
208 | msg(_("%s blocks must have an action part"),
|
---|
209 | (parsing_end_rule ? "END" : "BEGIN"));
|
---|
210 | else
|
---|
211 | msg(_("each rule must have a pattern or an action part"));
|
---|
212 | errcount++;
|
---|
213 | }
|
---|
214 | }
|
---|
215 | | function_prologue action
|
---|
216 | {
|
---|
217 | can_return = FALSE;
|
---|
218 | if ($1)
|
---|
219 | func_install($1, $2);
|
---|
220 | yyerrok;
|
---|
221 | }
|
---|
222 | ;
|
---|
223 |
|
---|
224 | pattern
|
---|
225 | : /* empty */
|
---|
226 | {
|
---|
227 | $$ = append_pattern(&expression_value, (NODE *) NULL);
|
---|
228 | }
|
---|
229 | | exp
|
---|
230 | {
|
---|
231 | $$ = append_pattern(&expression_value, $1);
|
---|
232 | }
|
---|
233 | | exp ',' exp
|
---|
234 | {
|
---|
235 | NODE *r;
|
---|
236 |
|
---|
237 | getnode(r);
|
---|
238 | r->type = Node_line_range;
|
---|
239 | r->condpair = node($1, Node_cond_pair, $3);
|
---|
240 | r->triggered = FALSE;
|
---|
241 | $$ = append_pattern(&expression_value, r);
|
---|
242 | }
|
---|
243 | | LEX_BEGIN
|
---|
244 | {
|
---|
245 | begin_or_end_rule = TRUE;
|
---|
246 | $$ = append_pattern(&begin_block, (NODE *) NULL);
|
---|
247 | }
|
---|
248 | | LEX_END
|
---|
249 | {
|
---|
250 | begin_or_end_rule = parsing_end_rule = TRUE;
|
---|
251 | $$ = append_pattern(&end_block, (NODE *) NULL);
|
---|
252 | }
|
---|
253 | ;
|
---|
254 |
|
---|
255 | action
|
---|
256 | : l_brace statements r_brace opt_semi opt_nls
|
---|
257 | { $$ = $2; }
|
---|
258 | ;
|
---|
259 |
|
---|
260 | func_name
|
---|
261 | : NAME
|
---|
262 | { $$ = $1; }
|
---|
263 | | FUNC_CALL
|
---|
264 | { $$ = $1; }
|
---|
265 | | lex_builtin
|
---|
266 | {
|
---|
267 | yyerror(_("`%s' is a built-in function, it cannot be redefined"),
|
---|
268 | tokstart);
|
---|
269 | errcount++;
|
---|
270 | $$ = builtin_func;
|
---|
271 | /* yyerrok; */
|
---|
272 | }
|
---|
273 | ;
|
---|
274 |
|
---|
275 | lex_builtin
|
---|
276 | : LEX_BUILTIN
|
---|
277 | | LEX_LENGTH
|
---|
278 | ;
|
---|
279 |
|
---|
280 | function_prologue
|
---|
281 | : LEX_FUNCTION
|
---|
282 | {
|
---|
283 | param_counter = 0;
|
---|
284 | }
|
---|
285 | func_name '(' opt_param_list r_paren opt_nls
|
---|
286 | {
|
---|
287 | NODE *t;
|
---|
288 |
|
---|
289 | t = make_param($3);
|
---|
290 | t->flags |= FUNC;
|
---|
291 | $$ = append_right(t, $5);
|
---|
292 | can_return = TRUE;
|
---|
293 | /* check for duplicate parameter names */
|
---|
294 | if (dup_parms($$))
|
---|
295 | errcount++;
|
---|
296 | }
|
---|
297 | ;
|
---|
298 |
|
---|
299 | regexp
|
---|
300 | /*
|
---|
301 | * In this rule, want_regexp tells yylex that the next thing
|
---|
302 | * is a regexp so it should read up to the closing slash.
|
---|
303 | */
|
---|
304 | : a_slash
|
---|
305 | { ++want_regexp; }
|
---|
306 | REGEXP /* The terminating '/' is consumed by yylex(). */
|
---|
307 | {
|
---|
308 | NODE *n;
|
---|
309 | size_t len = strlen($3);
|
---|
310 |
|
---|
311 | if (do_lint) {
|
---|
312 | if (len == 0)
|
---|
313 | lintwarn(_("regexp constant `//' looks like a C++ comment, but is not"));
|
---|
314 | else if (($3)[0] == '*' && ($3)[len-1] == '*')
|
---|
315 | /* possible C comment */
|
---|
316 | lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart);
|
---|
317 | }
|
---|
318 | getnode(n);
|
---|
319 | n->type = Node_regex;
|
---|
320 | n->re_exp = make_string($3, len);
|
---|
321 | n->re_reg = make_regexp($3, len, FALSE, TRUE);
|
---|
322 | n->re_text = NULL;
|
---|
323 | n->re_flags = CONST;
|
---|
324 | n->re_cnt = 1;
|
---|
325 | $$ = n;
|
---|
326 | }
|
---|
327 | ;
|
---|
328 |
|
---|
329 | a_slash
|
---|
330 | : '/'
|
---|
331 | | SLASH_BEFORE_EQUAL
|
---|
332 | ;
|
---|
333 |
|
---|
334 | statements
|
---|
335 | : /* empty */
|
---|
336 | { $$ = NULL; }
|
---|
337 | | statements statement
|
---|
338 | {
|
---|
339 | if ($2 == NULL)
|
---|
340 | $$ = $1;
|
---|
341 | else {
|
---|
342 | if (do_lint && isnoeffect($2->type))
|
---|
343 | lintwarn(_("statement may have no effect"));
|
---|
344 | if ($1 == NULL)
|
---|
345 | $$ = $2;
|
---|
346 | else
|
---|
347 | $$ = append_right(
|
---|
348 | ($1->type == Node_statement_list ? $1
|
---|
349 | : node($1, Node_statement_list, (NODE *) NULL)),
|
---|
350 | ($2->type == Node_statement_list ? $2
|
---|
351 | : node($2, Node_statement_list, (NODE *) NULL)));
|
---|
352 | }
|
---|
353 | yyerrok;
|
---|
354 | }
|
---|
355 | | statements error
|
---|
356 | { $$ = NULL; }
|
---|
357 | ;
|
---|
358 |
|
---|
359 | statement_term
|
---|
360 | : nls
|
---|
361 | | semi opt_nls
|
---|
362 | ;
|
---|
363 |
|
---|
364 | statement
|
---|
365 | : semi opt_nls
|
---|
366 | { $$ = NULL; }
|
---|
367 | | l_brace statements r_brace
|
---|
368 | { $$ = $2; }
|
---|
369 | | if_statement
|
---|
370 | { $$ = $1; }
|
---|
371 | | LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace
|
---|
372 | { $$ = node($3, Node_K_switch, $7); }
|
---|
373 | | LEX_WHILE '(' exp r_paren opt_nls statement
|
---|
374 | { $$ = node($3, Node_K_while, $6); }
|
---|
375 | | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
|
---|
376 | { $$ = node($6, Node_K_do, $3); }
|
---|
377 | | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
|
---|
378 | {
|
---|
379 | /*
|
---|
380 | * Efficiency hack. Recognize the special case of
|
---|
381 | *
|
---|
382 | * for (iggy in foo)
|
---|
383 | * delete foo[iggy]
|
---|
384 | *
|
---|
385 | * and treat it as if it were
|
---|
386 | *
|
---|
387 | * delete foo
|
---|
388 | *
|
---|
389 | * Check that the body is a `delete a[i]' statement,
|
---|
390 | * and that both the loop var and array names match.
|
---|
391 | */
|
---|
392 | if ($8 != NULL && $8->type == Node_K_delete && $8->rnode != NULL) {
|
---|
393 | NODE *arr, *sub;
|
---|
394 |
|
---|
395 | assert($8->rnode->type == Node_expression_list);
|
---|
396 | arr = $8->lnode; /* array var */
|
---|
397 | sub = $8->rnode->lnode; /* index var */
|
---|
398 |
|
---|
399 | if ( (arr->type == Node_var_new
|
---|
400 | || arr->type == Node_var_array
|
---|
401 | || arr->type == Node_param_list)
|
---|
402 | && (sub->type == Node_var_new
|
---|
403 | || sub->type == Node_var
|
---|
404 | || sub->type == Node_param_list)
|
---|
405 | && strcmp($3, sub->vname) == 0
|
---|
406 | && strcmp($5, arr->vname) == 0) {
|
---|
407 | $8->type = Node_K_delete_loop;
|
---|
408 | $$ = $8;
|
---|
409 | free($3); /* thanks to valgrind for pointing these out */
|
---|
410 | free($5);
|
---|
411 | }
|
---|
412 | else
|
---|
413 | goto regular_loop;
|
---|
414 | } else {
|
---|
415 | regular_loop:
|
---|
416 | $$ = node($8, Node_K_arrayfor,
|
---|
417 | make_for_loop(variable($3, CAN_FREE, Node_var),
|
---|
418 | (NODE *) NULL, variable($5, CAN_FREE, Node_var_array)));
|
---|
419 | }
|
---|
420 | }
|
---|
421 | | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement
|
---|
422 | {
|
---|
423 | $$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9));
|
---|
424 | }
|
---|
425 | | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement
|
---|
426 | {
|
---|
427 | $$ = node($11, Node_K_for,
|
---|
428 | (NODE *) make_for_loop($3, (NODE *) NULL, $8));
|
---|
429 | }
|
---|
430 | | LEX_BREAK statement_term
|
---|
431 | /* for break, maybe we'll have to remember where to break to */
|
---|
432 | { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); }
|
---|
433 | | LEX_CONTINUE statement_term
|
---|
434 | /* similarly */
|
---|
435 | { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); }
|
---|
436 | | LEX_NEXT statement_term
|
---|
437 | { NODETYPE type;
|
---|
438 |
|
---|
439 | if (begin_or_end_rule)
|
---|
440 | yyerror(_("`%s' used in %s action"), "next",
|
---|
441 | (parsing_end_rule ? "END" : "BEGIN"));
|
---|
442 | type = Node_K_next;
|
---|
443 | $$ = node((NODE *) NULL, type, (NODE *) NULL);
|
---|
444 | }
|
---|
445 | | LEX_NEXTFILE statement_term
|
---|
446 | {
|
---|
447 | if (do_traditional) {
|
---|
448 | /*
|
---|
449 | * can't use yyerror, since may have overshot
|
---|
450 | * the source line
|
---|
451 | */
|
---|
452 | errcount++;
|
---|
453 | error(_("`nextfile' is a gawk extension"));
|
---|
454 | }
|
---|
455 | if (do_lint)
|
---|
456 | lintwarn(_("`nextfile' is a gawk extension"));
|
---|
457 | if (begin_or_end_rule) {
|
---|
458 | /* same thing */
|
---|
459 | errcount++;
|
---|
460 | error(_("`%s' used in %s action"), "nextfile",
|
---|
461 | (parsing_end_rule ? "END" : "BEGIN"));
|
---|
462 | }
|
---|
463 | $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL);
|
---|
464 | }
|
---|
465 | | LEX_EXIT opt_exp statement_term
|
---|
466 | { $$ = node($2, Node_K_exit, (NODE *) NULL); }
|
---|
467 | | LEX_RETURN
|
---|
468 | {
|
---|
469 | if (! can_return)
|
---|
470 | yyerror(_("`return' used outside function context"));
|
---|
471 | }
|
---|
472 | opt_exp statement_term
|
---|
473 | {
|
---|
474 | $$ = node($3 == NULL ? Nnull_string : $3,
|
---|
475 | Node_K_return, (NODE *) NULL);
|
---|
476 | }
|
---|
477 | | simple_stmt statement_term
|
---|
478 | ;
|
---|
479 |
|
---|
480 | /*
|
---|
481 | * A simple_stmt exists to satisfy a constraint in the POSIX
|
---|
482 | * grammar allowing them to occur as the 1st and 3rd parts
|
---|
483 | * in a `for (...;...;...)' loop. This is a historical oddity
|
---|
484 | * inherited from Unix awk, not at all documented in the AK&W
|
---|
485 | * awk book. We support it, as this was reported as a bug.
|
---|
486 | * We don't bother to document it though. So there.
|
---|
487 | */
|
---|
488 | simple_stmt
|
---|
489 | : print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir
|
---|
490 | {
|
---|
491 | /*
|
---|
492 | * Optimization: plain `print' has no expression list, so $3 is null.
|
---|
493 | * If $3 is an expression list with one element (rnode == null)
|
---|
494 | * and lnode is a field spec for field 0, we have `print $0'.
|
---|
495 | * For both, use Node_K_print_rec, which is faster for these two cases.
|
---|
496 | */
|
---|
497 | if ($1 == Node_K_print &&
|
---|
498 | ($3 == NULL
|
---|
499 | || ($3->type == Node_expression_list
|
---|
500 | && $3->rnode == NULL
|
---|
501 | && $3->lnode->type == Node_field_spec
|
---|
502 | && $3->lnode->lnode->type == Node_val
|
---|
503 | && $3->lnode->lnode->numbr == 0.0))
|
---|
504 | ) {
|
---|
505 | static int warned = FALSE;
|
---|
506 |
|
---|
507 | $$ = node(NULL, Node_K_print_rec, $4);
|
---|
508 |
|
---|
509 | if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) {
|
---|
510 | warned = TRUE;
|
---|
511 | lintwarn(
|
---|
512 | _("plain `print' in BEGIN or END rule should probably be `print \"\"'"));
|
---|
513 | }
|
---|
514 | } else {
|
---|
515 | $$ = node($3, $1, $4);
|
---|
516 | if ($$->type == Node_K_printf)
|
---|
517 | count_args($$);
|
---|
518 | }
|
---|
519 | }
|
---|
520 | | LEX_DELETE NAME '[' expression_list ']'
|
---|
521 | { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); }
|
---|
522 | | LEX_DELETE NAME
|
---|
523 | {
|
---|
524 | if (do_lint)
|
---|
525 | lintwarn(_("`delete array' is a gawk extension"));
|
---|
526 | if (do_traditional) {
|
---|
527 | /*
|
---|
528 | * can't use yyerror, since may have overshot
|
---|
529 | * the source line
|
---|
530 | */
|
---|
531 | errcount++;
|
---|
532 | error(_("`delete array' is a gawk extension"));
|
---|
533 | }
|
---|
534 | $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
|
---|
535 | }
|
---|
536 | | LEX_DELETE '(' NAME ')'
|
---|
537 | {
|
---|
538 | /* this is for tawk compatibility. maybe the warnings should always be done. */
|
---|
539 | if (do_lint)
|
---|
540 | lintwarn(_("`delete(array)' is a non-portable tawk extension"));
|
---|
541 | if (do_traditional) {
|
---|
542 | /*
|
---|
543 | * can't use yyerror, since may have overshot
|
---|
544 | * the source line
|
---|
545 | */
|
---|
546 | errcount++;
|
---|
547 | error(_("`delete(array)' is a non-portable tawk extension"));
|
---|
548 | }
|
---|
549 | $$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL);
|
---|
550 | }
|
---|
551 | | exp
|
---|
552 | { $$ = $1; }
|
---|
553 | ;
|
---|
554 |
|
---|
555 | opt_simple_stmt
|
---|
556 | : /* empty */
|
---|
557 | { $$ = NULL; }
|
---|
558 | | simple_stmt
|
---|
559 | { $$ = $1; }
|
---|
560 | ;
|
---|
561 |
|
---|
562 | switch_body
|
---|
563 | : case_statements
|
---|
564 | {
|
---|
565 | if ($1 == NULL) {
|
---|
566 | $$ = NULL;
|
---|
567 | } else {
|
---|
568 | NODE *dflt = NULL;
|
---|
569 | NODE *head = $1;
|
---|
570 | NODE *curr;
|
---|
571 |
|
---|
572 | const char **case_values = NULL;
|
---|
573 |
|
---|
574 | int maxcount = 128;
|
---|
575 | int case_count = 0;
|
---|
576 | int i;
|
---|
577 |
|
---|
578 | emalloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body");
|
---|
579 | for (curr = $1; curr != NULL; curr = curr->rnode) {
|
---|
580 | /* Assure that case statement values are unique. */
|
---|
581 | if (curr->lnode->type == Node_K_case) {
|
---|
582 | char *caseval;
|
---|
583 |
|
---|
584 | if (curr->lnode->lnode->type == Node_regex)
|
---|
585 | caseval = curr->lnode->lnode->re_exp->stptr;
|
---|
586 | else
|
---|
587 | caseval = force_string(tree_eval(curr->lnode->lnode))->stptr;
|
---|
588 |
|
---|
589 | for (i = 0; i < case_count; i++)
|
---|
590 | if (strcmp(caseval, case_values[i]) == 0)
|
---|
591 | yyerror(_("duplicate case values in switch body: %s"), caseval);
|
---|
592 |
|
---|
593 | if (case_count >= maxcount) {
|
---|
594 | maxcount += 128;
|
---|
595 | erealloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body");
|
---|
596 | }
|
---|
597 | case_values[case_count++] = caseval;
|
---|
598 | } else {
|
---|
599 | /* Otherwise save a pointer to the default node. */
|
---|
600 | if (dflt != NULL)
|
---|
601 | yyerror(_("Duplicate `default' detected in switch body"));
|
---|
602 | dflt = curr;
|
---|
603 | }
|
---|
604 | }
|
---|
605 |
|
---|
606 | free(case_values);
|
---|
607 |
|
---|
608 | /* Create the switch body. */
|
---|
609 | $$ = node(head, Node_switch_body, dflt);
|
---|
610 | }
|
---|
611 | }
|
---|
612 | ;
|
---|
613 |
|
---|
614 | case_statements
|
---|
615 | : /* empty */
|
---|
616 | { $$ = NULL; }
|
---|
617 | | case_statements case_statement
|
---|
618 | {
|
---|
619 | if ($2 == NULL)
|
---|
620 | $$ = $1;
|
---|
621 | else {
|
---|
622 | if (do_lint && isnoeffect($2->type))
|
---|
623 | lintwarn(_("statement may have no effect"));
|
---|
624 | if ($1 == NULL)
|
---|
625 | $$ = node($2, Node_case_list, (NODE *) NULL);
|
---|
626 | else
|
---|
627 | $$ = append_right(
|
---|
628 | ($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)),
|
---|
629 | ($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL))
|
---|
630 | );
|
---|
631 | }
|
---|
632 | yyerrok;
|
---|
633 | }
|
---|
634 | | case_statements error
|
---|
635 | { $$ = NULL; }
|
---|
636 | ;
|
---|
637 |
|
---|
638 | case_statement
|
---|
639 | : LEX_CASE case_value colon opt_nls statements
|
---|
640 | { $$ = node($2, Node_K_case, $5); }
|
---|
641 | | LEX_DEFAULT colon opt_nls statements
|
---|
642 | { $$ = node((NODE *) NULL, Node_K_default, $4); }
|
---|
643 | ;
|
---|
644 |
|
---|
645 | case_value
|
---|
646 | : YNUMBER
|
---|
647 | { $$ = $1; }
|
---|
648 | | '-' YNUMBER %prec UNARY
|
---|
649 | {
|
---|
650 | $2->numbr = -(force_number($2));
|
---|
651 | $$ = $2;
|
---|
652 | }
|
---|
653 | | '+' YNUMBER %prec UNARY
|
---|
654 | { $$ = $2; }
|
---|
655 | | YSTRING
|
---|
656 | { $$ = $1; }
|
---|
657 | | regexp
|
---|
658 | { $$ = $1; }
|
---|
659 | ;
|
---|
660 |
|
---|
661 | print
|
---|
662 | : LEX_PRINT
|
---|
663 | | LEX_PRINTF
|
---|
664 | ;
|
---|
665 |
|
---|
666 | /*
|
---|
667 | * Note: ``print(x)'' is already parsed by the first rule,
|
---|
668 | * so there is no good in covering it by the second one too.
|
---|
669 | */
|
---|
670 | print_expression_list
|
---|
671 | : opt_expression_list
|
---|
672 | | '(' exp comma expression_list r_paren
|
---|
673 | { $$ = node($2, Node_expression_list, $4); }
|
---|
674 | ;
|
---|
675 |
|
---|
676 | output_redir
|
---|
677 | : /* empty */
|
---|
678 | {
|
---|
679 | in_print = FALSE;
|
---|
680 | in_parens = 0;
|
---|
681 | $$ = NULL;
|
---|
682 | }
|
---|
683 | | IO_OUT { in_print = FALSE; in_parens = 0; } common_exp
|
---|
684 | {
|
---|
685 | $$ = node($3, $1, (NODE *) NULL);
|
---|
686 | if ($1 == Node_redirect_twoway
|
---|
687 | && $3->type == Node_K_getline
|
---|
688 | && $3->rnode != NULL
|
---|
689 | && $3->rnode->type == Node_redirect_twoway)
|
---|
690 | yyerror(_("multistage two-way pipelines don't work"));
|
---|
691 | }
|
---|
692 | ;
|
---|
693 |
|
---|
694 | if_statement
|
---|
695 | : LEX_IF '(' exp r_paren opt_nls statement
|
---|
696 | {
|
---|
697 | $$ = node($3, Node_K_if,
|
---|
698 | node($6, Node_if_branches, (NODE *) NULL));
|
---|
699 | }
|
---|
700 | | LEX_IF '(' exp r_paren opt_nls statement
|
---|
701 | LEX_ELSE opt_nls statement
|
---|
702 | { $$ = node($3, Node_K_if,
|
---|
703 | node($6, Node_if_branches, $9)); }
|
---|
704 | ;
|
---|
705 |
|
---|
706 | nls
|
---|
707 | : NEWLINE
|
---|
708 | | nls NEWLINE
|
---|
709 | ;
|
---|
710 |
|
---|
711 | opt_nls
|
---|
712 | : /* empty */
|
---|
713 | | nls
|
---|
714 | ;
|
---|
715 |
|
---|
716 | input_redir
|
---|
717 | : /* empty */
|
---|
718 | { $$ = NULL; }
|
---|
719 | | '<' simp_exp
|
---|
720 | { $$ = node($2, Node_redirect_input, (NODE *) NULL); }
|
---|
721 | ;
|
---|
722 |
|
---|
723 | opt_param_list
|
---|
724 | : /* empty */
|
---|
725 | { $$ = NULL; }
|
---|
726 | | param_list
|
---|
727 | { $$ = $1; }
|
---|
728 | ;
|
---|
729 |
|
---|
730 | param_list
|
---|
731 | : NAME
|
---|
732 | { $$ = make_param($1); }
|
---|
733 | | param_list comma NAME
|
---|
734 | { $$ = append_right($1, make_param($3)); yyerrok; }
|
---|
735 | | error
|
---|
736 | { $$ = NULL; }
|
---|
737 | | param_list error
|
---|
738 | { $$ = NULL; }
|
---|
739 | | param_list comma error
|
---|
740 | { $$ = NULL; }
|
---|
741 | ;
|
---|
742 |
|
---|
743 | /* optional expression, as in for loop */
|
---|
744 | opt_exp
|
---|
745 | : /* empty */
|
---|
746 | { $$ = NULL; }
|
---|
747 | | exp
|
---|
748 | { $$ = $1; }
|
---|
749 | ;
|
---|
750 |
|
---|
751 | opt_expression_list
|
---|
752 | : /* empty */
|
---|
753 | { $$ = NULL; }
|
---|
754 | | expression_list
|
---|
755 | { $$ = $1; }
|
---|
756 | ;
|
---|
757 |
|
---|
758 | expression_list
|
---|
759 | : exp
|
---|
760 | { $$ = node($1, Node_expression_list, (NODE *) NULL); }
|
---|
761 | | expression_list comma exp
|
---|
762 | {
|
---|
763 | $$ = append_right($1,
|
---|
764 | node($3, Node_expression_list, (NODE *) NULL));
|
---|
765 | yyerrok;
|
---|
766 | }
|
---|
767 | | error
|
---|
768 | { $$ = NULL; }
|
---|
769 | | expression_list error
|
---|
770 | { $$ = NULL; }
|
---|
771 | | expression_list error exp
|
---|
772 | { $$ = NULL; }
|
---|
773 | | expression_list comma error
|
---|
774 | { $$ = NULL; }
|
---|
775 | ;
|
---|
776 |
|
---|
777 | /* Expressions, not including the comma operator. */
|
---|
778 | exp : variable assign_operator exp %prec ASSIGNOP
|
---|
779 | {
|
---|
780 | if (do_lint && $3->type == Node_regex)
|
---|
781 | lintwarn(_("regular expression on right of assignment"));
|
---|
782 | /*
|
---|
783 | * Optimization of `x = x y'. Can save lots of time
|
---|
784 | * if done a lot.
|
---|
785 | */
|
---|
786 | if (( $1->type == Node_var
|
---|
787 | || $1->type == Node_var_new
|
---|
788 | || $1->type == Node_param_list)
|
---|
789 | && $2 == Node_assign
|
---|
790 | && $3->type == Node_concat
|
---|
791 | && $3->lnode == $1) {
|
---|
792 | $3->type = Node_assign_concat; /* Just change the type */
|
---|
793 | $$ = $3; /* And use it directly */
|
---|
794 | } else
|
---|
795 | $$ = node($1, $2, $3);
|
---|
796 | }
|
---|
797 | | exp LEX_AND exp
|
---|
798 | { $$ = node($1, Node_and, $3); }
|
---|
799 | | exp LEX_OR exp
|
---|
800 | { $$ = node($1, Node_or, $3); }
|
---|
801 | | exp MATCHOP exp
|
---|
802 | {
|
---|
803 | if ($1->type == Node_regex)
|
---|
804 | warning(_("regular expression on left of `~' or `!~' operator"));
|
---|
805 | $$ = node($1, $2, mk_rexp($3));
|
---|
806 | }
|
---|
807 | | exp LEX_IN NAME
|
---|
808 | { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); }
|
---|
809 | | exp a_relop exp %prec RELOP
|
---|
810 | {
|
---|
811 | if (do_lint && $3->type == Node_regex)
|
---|
812 | lintwarn(_("regular expression on right of comparison"));
|
---|
813 | $$ = node($1, $2, $3);
|
---|
814 | }
|
---|
815 | | exp '?' exp ':' exp
|
---|
816 | { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));}
|
---|
817 | | common_exp
|
---|
818 | { $$ = $1; }
|
---|
819 | ;
|
---|
820 |
|
---|
821 | assign_operator
|
---|
822 | : ASSIGN
|
---|
823 | { $$ = $1; }
|
---|
824 | | ASSIGNOP
|
---|
825 | { $$ = $1; }
|
---|
826 | | SLASH_BEFORE_EQUAL ASSIGN /* `/=' */
|
---|
827 | { $$ = Node_assign_quotient; }
|
---|
828 | ;
|
---|
829 |
|
---|
830 | relop_or_less
|
---|
831 | : RELOP
|
---|
832 | { $$ = $1; }
|
---|
833 | | '<'
|
---|
834 | { $$ = Node_less; }
|
---|
835 | ;
|
---|
836 | a_relop
|
---|
837 | : relop_or_less
|
---|
838 | | '>'
|
---|
839 | { $$ = Node_greater; }
|
---|
840 | ;
|
---|
841 |
|
---|
842 | common_exp
|
---|
843 | : regexp
|
---|
844 | { $$ = $1; }
|
---|
845 | | '!' regexp %prec UNARY
|
---|
846 | {
|
---|
847 | $$ = node(node(make_number(0.0),
|
---|
848 | Node_field_spec,
|
---|
849 | (NODE *) NULL),
|
---|
850 | Node_nomatch,
|
---|
851 | $2);
|
---|
852 | }
|
---|
853 | | '(' expression_list r_paren LEX_IN NAME
|
---|
854 | { $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); }
|
---|
855 | | simp_exp
|
---|
856 | { $$ = $1; }
|
---|
857 | | common_exp simp_exp %prec CONCAT_OP
|
---|
858 | { $$ = node($1, Node_concat, $2); }
|
---|
859 | ;
|
---|
860 |
|
---|
861 | simp_exp
|
---|
862 | : non_post_simp_exp
|
---|
863 | /* Binary operators in order of decreasing precedence. */
|
---|
864 | | simp_exp '^' simp_exp
|
---|
865 | { $$ = node($1, Node_exp, $3); }
|
---|
866 | | simp_exp '*' simp_exp
|
---|
867 | { $$ = node($1, Node_times, $3); }
|
---|
868 | | simp_exp '/' simp_exp
|
---|
869 | { $$ = node($1, Node_quotient, $3); }
|
---|
870 | | simp_exp '%' simp_exp
|
---|
871 | { $$ = node($1, Node_mod, $3); }
|
---|
872 | | simp_exp '+' simp_exp
|
---|
873 | { $$ = node($1, Node_plus, $3); }
|
---|
874 | | simp_exp '-' simp_exp
|
---|
875 | { $$ = node($1, Node_minus, $3); }
|
---|
876 | | LEX_GETLINE opt_variable input_redir
|
---|
877 | {
|
---|
878 | if (do_lint && parsing_end_rule && $3 == NULL)
|
---|
879 | lintwarn(_("non-redirected `getline' undefined inside END action"));
|
---|
880 | $$ = node($2, Node_K_getline, $3);
|
---|
881 | }
|
---|
882 | | simp_exp IO_IN LEX_GETLINE opt_variable
|
---|
883 | {
|
---|
884 | $$ = node($4, Node_K_getline,
|
---|
885 | node($1, $2, (NODE *) NULL));
|
---|
886 | }
|
---|
887 | | variable INCREMENT
|
---|
888 | { $$ = node($1, Node_postincrement, (NODE *) NULL); }
|
---|
889 | | variable DECREMENT
|
---|
890 | { $$ = node($1, Node_postdecrement, (NODE *) NULL); }
|
---|
891 | ;
|
---|
892 |
|
---|
893 | non_post_simp_exp
|
---|
894 | : '!' simp_exp %prec UNARY
|
---|
895 | { $$ = node($2, Node_not, (NODE *) NULL); }
|
---|
896 | | '(' exp r_paren
|
---|
897 | { $$ = $2; }
|
---|
898 | | LEX_BUILTIN
|
---|
899 | '(' opt_expression_list r_paren
|
---|
900 | { $$ = snode($3, Node_builtin, (int) $1); }
|
---|
901 | | LEX_LENGTH '(' opt_expression_list r_paren
|
---|
902 | { $$ = snode($3, Node_builtin, (int) $1); }
|
---|
903 | | LEX_LENGTH
|
---|
904 | {
|
---|
905 | if (do_lint)
|
---|
906 | lintwarn(_("call of `length' without parentheses is not portable"));
|
---|
907 | $$ = snode((NODE *) NULL, Node_builtin, (int) $1);
|
---|
908 | if (do_posix)
|
---|
909 | warning(_("call of `length' without parentheses is deprecated by POSIX"));
|
---|
910 | }
|
---|
911 | | FUNC_CALL '(' opt_expression_list r_paren
|
---|
912 | {
|
---|
913 | $$ = node($3, Node_func_call, make_string($1, strlen($1)));
|
---|
914 | $$->funcbody = NULL;
|
---|
915 | func_use($1, FUNC_USE);
|
---|
916 | param_sanity($3);
|
---|
917 | free($1);
|
---|
918 | }
|
---|
919 | | variable
|
---|
920 | | INCREMENT variable
|
---|
921 | { $$ = node($2, Node_preincrement, (NODE *) NULL); }
|
---|
922 | | DECREMENT variable
|
---|
923 | { $$ = node($2, Node_predecrement, (NODE *) NULL); }
|
---|
924 | | YNUMBER
|
---|
925 | { $$ = $1; }
|
---|
926 | | YSTRING
|
---|
927 | { $$ = $1; }
|
---|
928 |
|
---|
929 | | '-' simp_exp %prec UNARY
|
---|
930 | {
|
---|
931 | if ($2->type == Node_val && ($2->flags & (STRCUR|STRING)) == 0) {
|
---|
932 | $2->numbr = -(force_number($2));
|
---|
933 | $$ = $2;
|
---|
934 | } else
|
---|
935 | $$ = node($2, Node_unary_minus, (NODE *) NULL);
|
---|
936 | }
|
---|
937 | | '+' simp_exp %prec UNARY
|
---|
938 | {
|
---|
939 | /*
|
---|
940 | * was: $$ = $2
|
---|
941 | * POSIX semantics: force a conversion to numeric type
|
---|
942 | */
|
---|
943 | $$ = node (make_number(0.0), Node_plus, $2);
|
---|
944 | }
|
---|
945 | ;
|
---|
946 |
|
---|
947 | opt_variable
|
---|
948 | : /* empty */
|
---|
949 | { $$ = NULL; }
|
---|
950 | | variable
|
---|
951 | { $$ = $1; }
|
---|
952 | ;
|
---|
953 |
|
---|
954 | variable
|
---|
955 | : NAME
|
---|
956 | { $$ = variable($1, CAN_FREE, Node_var_new); }
|
---|
957 | | NAME '[' expression_list ']'
|
---|
958 | {
|
---|
959 | NODE *n;
|
---|
960 |
|
---|
961 | if ((n = lookup($1)) != NULL && ! isarray(n))
|
---|
962 | yyerror(_("use of non-array as array"));
|
---|
963 | else if ($3 == NULL) {
|
---|
964 | fatal(_("invalid subscript expression"));
|
---|
965 | } else if ($3->rnode == NULL) {
|
---|
966 | $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode);
|
---|
967 | freenode($3);
|
---|
968 | } else
|
---|
969 | $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3);
|
---|
970 | }
|
---|
971 | | '$' non_post_simp_exp
|
---|
972 | { $$ = node($2, Node_field_spec, (NODE *) NULL); }
|
---|
973 | /*
|
---|
974 | #if 0
|
---|
975 | | lex_builtin
|
---|
976 | { fatal(_("can't use built-in function `%s' as a variable"), tokstart); }
|
---|
977 | #endif
|
---|
978 | */
|
---|
979 | ;
|
---|
980 |
|
---|
981 | l_brace
|
---|
982 | : '{' opt_nls
|
---|
983 | ;
|
---|
984 |
|
---|
985 | r_brace
|
---|
986 | : '}' opt_nls { yyerrok; }
|
---|
987 | ;
|
---|
988 |
|
---|
989 | r_paren
|
---|
990 | : ')' { yyerrok; }
|
---|
991 | ;
|
---|
992 |
|
---|
993 | opt_semi
|
---|
994 | : /* empty */
|
---|
995 | | semi
|
---|
996 | ;
|
---|
997 |
|
---|
998 | semi
|
---|
999 | : ';' { yyerrok; }
|
---|
1000 | ;
|
---|
1001 |
|
---|
1002 | colon
|
---|
1003 | : ':' { yyerrok; }
|
---|
1004 | ;
|
---|
1005 |
|
---|
1006 | comma : ',' opt_nls { yyerrok; }
|
---|
1007 | ;
|
---|
1008 |
|
---|
1009 | %%
|
---|
1010 |
|
---|
1011 | struct token {
|
---|
1012 | const char *operator; /* text to match */
|
---|
1013 | NODETYPE value; /* node type */
|
---|
1014 | int class; /* lexical class */
|
---|
1015 | unsigned flags; /* # of args. allowed and compatability */
|
---|
1016 | # define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */
|
---|
1017 | # define A(n) (1<<(n))
|
---|
1018 | # define VERSION_MASK 0xFF00 /* old awk is zero */
|
---|
1019 | # define NOT_OLD 0x0100 /* feature not in old awk */
|
---|
1020 | # define NOT_POSIX 0x0200 /* feature not in POSIX */
|
---|
1021 | # define GAWKX 0x0400 /* gawk extension */
|
---|
1022 | # define RESX 0x0800 /* Bell Labs Research extension */
|
---|
1023 | NODE *(*ptr) P((NODE *)); /* function that implements this keyword */
|
---|
1024 | };
|
---|
1025 |
|
---|
1026 | /* Tokentab is sorted ascii ascending order, so it can be binary searched. */
|
---|
1027 | /* Function pointers come from declarations in awk.h. */
|
---|
1028 |
|
---|
1029 | static const struct token tokentab[] = {
|
---|
1030 | {"BEGIN", Node_illegal, LEX_BEGIN, 0, 0},
|
---|
1031 | {"END", Node_illegal, LEX_END, 0, 0},
|
---|
1032 | #ifdef ARRAYDEBUG
|
---|
1033 | {"adump", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump},
|
---|
1034 | #endif
|
---|
1035 | {"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and},
|
---|
1036 | {"asort", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asort},
|
---|
1037 | {"asorti", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asorti},
|
---|
1038 | {"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2},
|
---|
1039 | {"bindtextdomain", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain},
|
---|
1040 | {"break", Node_K_break, LEX_BREAK, 0, 0},
|
---|
1041 | #ifdef ALLOW_SWITCH
|
---|
1042 | {"case", Node_K_case, LEX_CASE, GAWKX, 0},
|
---|
1043 | #endif
|
---|
1044 | {"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close},
|
---|
1045 | {"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl},
|
---|
1046 | {"continue", Node_K_continue, LEX_CONTINUE, 0, 0},
|
---|
1047 | {"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos},
|
---|
1048 | {"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext},
|
---|
1049 | {"dcngettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext},
|
---|
1050 | #ifdef ALLOW_SWITCH
|
---|
1051 | {"default", Node_K_default, LEX_DEFAULT, GAWKX, 0},
|
---|
1052 | #endif
|
---|
1053 | {"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0},
|
---|
1054 | {"do", Node_K_do, LEX_DO, NOT_OLD, 0},
|
---|
1055 | {"else", Node_illegal, LEX_ELSE, 0, 0},
|
---|
1056 | {"exit", Node_K_exit, LEX_EXIT, 0, 0},
|
---|
1057 | {"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp},
|
---|
1058 | {"extension", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext},
|
---|
1059 | {"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush},
|
---|
1060 | {"for", Node_K_for, LEX_FOR, 0, 0},
|
---|
1061 | {"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0},
|
---|
1062 | {"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0},
|
---|
1063 | {"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub},
|
---|
1064 | {"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0},
|
---|
1065 | {"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub},
|
---|
1066 | {"if", Node_K_if, LEX_IF, 0, 0},
|
---|
1067 | {"in", Node_illegal, LEX_IN, 0, 0},
|
---|
1068 | {"index", Node_builtin, LEX_BUILTIN, A(2), do_index},
|
---|
1069 | {"int", Node_builtin, LEX_BUILTIN, A(1), do_int},
|
---|
1070 | {"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length},
|
---|
1071 | {"log", Node_builtin, LEX_BUILTIN, A(1), do_log},
|
---|
1072 | {"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift},
|
---|
1073 | {"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match},
|
---|
1074 | {"mktime", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime},
|
---|
1075 | {"next", Node_K_next, LEX_NEXT, 0, 0},
|
---|
1076 | {"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0},
|
---|
1077 | {"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or},
|
---|
1078 | {"print", Node_K_print, LEX_PRINT, 0, 0},
|
---|
1079 | {"printf", Node_K_printf, LEX_PRINTF, 0, 0},
|
---|
1080 | {"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand},
|
---|
1081 | {"return", Node_K_return, LEX_RETURN, NOT_OLD, 0},
|
---|
1082 | {"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift},
|
---|
1083 | {"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin},
|
---|
1084 | {"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split},
|
---|
1085 | {"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf},
|
---|
1086 | {"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt},
|
---|
1087 | {"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand},
|
---|
1088 | #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */
|
---|
1089 | {"stopme", Node_builtin, LEX_BUILTIN, GAWKX|A(0), stopme},
|
---|
1090 | #endif
|
---|
1091 | {"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime},
|
---|
1092 | {"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum},
|
---|
1093 | {"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub},
|
---|
1094 | {"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr},
|
---|
1095 | #ifdef ALLOW_SWITCH
|
---|
1096 | {"switch", Node_K_switch, LEX_SWITCH, GAWKX, 0},
|
---|
1097 | #endif
|
---|
1098 | {"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system},
|
---|
1099 | {"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime},
|
---|
1100 | {"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower},
|
---|
1101 | {"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper},
|
---|
1102 | {"while", Node_K_while, LEX_WHILE, 0, 0},
|
---|
1103 | {"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor},
|
---|
1104 | };
|
---|
1105 |
|
---|
1106 | #ifdef MBS_SUPPORT
|
---|
1107 | /* Variable containing the current shift state. */
|
---|
1108 | static mbstate_t cur_mbstate;
|
---|
1109 | /* Ring buffer containing current characters. */
|
---|
1110 | #define MAX_CHAR_IN_RING_BUFFER 8
|
---|
1111 | #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX)
|
---|
1112 | static char cur_char_ring[RING_BUFFER_SIZE];
|
---|
1113 | /* Index for ring buffers. */
|
---|
1114 | static int cur_ring_idx;
|
---|
1115 | /* This macro means that last nextc() return a singlebyte character
|
---|
1116 | or 1st byte of a multibyte character. */
|
---|
1117 | #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1)
|
---|
1118 | #else /* MBS_SUPPORT */
|
---|
1119 | /* a dummy */
|
---|
1120 | #define nextc_is_1stbyte 1
|
---|
1121 | #endif /* MBS_SUPPORT */
|
---|
1122 |
|
---|
1123 | /* getfname --- return name of a builtin function (for pretty printing) */
|
---|
1124 |
|
---|
1125 | const char *
|
---|
1126 | getfname(register NODE *(*fptr)(NODE *))
|
---|
1127 | {
|
---|
1128 | register int i, j;
|
---|
1129 |
|
---|
1130 | j = sizeof(tokentab) / sizeof(tokentab[0]);
|
---|
1131 | /* linear search, no other way to do it */
|
---|
1132 | for (i = 0; i < j; i++)
|
---|
1133 | if (tokentab[i].ptr == fptr)
|
---|
1134 | return tokentab[i].operator;
|
---|
1135 |
|
---|
1136 | return NULL;
|
---|
1137 | }
|
---|
1138 |
|
---|
1139 | /* yyerror --- print a syntax error message, show where */
|
---|
1140 |
|
---|
1141 | /*
|
---|
1142 | * Function identifier purposely indented to avoid mangling
|
---|
1143 | * by ansi2knr. Sigh.
|
---|
1144 | */
|
---|
1145 |
|
---|
1146 | static void
|
---|
1147 | #if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
|
---|
1148 | yyerror(const char *m, ...)
|
---|
1149 | #else
|
---|
1150 | /* VARARGS0 */
|
---|
1151 | yyerror(va_alist)
|
---|
1152 | va_dcl
|
---|
1153 | #endif
|
---|
1154 | {
|
---|
1155 | va_list args;
|
---|
1156 | const char *mesg = NULL;
|
---|
1157 | register char *bp, *cp;
|
---|
1158 | char *scan;
|
---|
1159 | char *buf;
|
---|
1160 | int count;
|
---|
1161 | static char end_of_file_line[] = "(END OF FILE)";
|
---|
1162 | char save;
|
---|
1163 |
|
---|
1164 | errcount++;
|
---|
1165 | /* Find the current line in the input file */
|
---|
1166 | if (lexptr && lexeme) {
|
---|
1167 | if (thisline == NULL) {
|
---|
1168 | cp = lexeme;
|
---|
1169 | if (*cp == '\n') {
|
---|
1170 | cp--;
|
---|
1171 | mesg = _("unexpected newline or end of string");
|
---|
1172 | }
|
---|
1173 | for (; cp != lexptr_begin && *cp != '\n'; --cp)
|
---|
1174 | continue;
|
---|
1175 | if (*cp == '\n')
|
---|
1176 | cp++;
|
---|
1177 | thisline = cp;
|
---|
1178 | }
|
---|
1179 | /* NL isn't guaranteed */
|
---|
1180 | bp = lexeme;
|
---|
1181 | while (bp < lexend && *bp && *bp != '\n')
|
---|
1182 | bp++;
|
---|
1183 | } else {
|
---|
1184 | thisline = end_of_file_line;
|
---|
1185 | bp = thisline + strlen(thisline);
|
---|
1186 | }
|
---|
1187 |
|
---|
1188 | /*
|
---|
1189 | * Saving and restoring *bp keeps valgrind happy,
|
---|
1190 | * since the guts of glibc uses strlen, even though
|
---|
1191 | * we're passing an explict precision. Sigh.
|
---|
1192 | *
|
---|
1193 | * 8/2003: We may not need this anymore.
|
---|
1194 | */
|
---|
1195 | save = *bp;
|
---|
1196 | *bp = '\0';
|
---|
1197 |
|
---|
1198 | msg("%.*s", (int) (bp - thisline), thisline);
|
---|
1199 |
|
---|
1200 | *bp = save;
|
---|
1201 |
|
---|
1202 | #if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__
|
---|
1203 | va_start(args, m);
|
---|
1204 | if (mesg == NULL)
|
---|
1205 | mesg = m;
|
---|
1206 | #else
|
---|
1207 | va_start(args);
|
---|
1208 | if (mesg == NULL)
|
---|
1209 | mesg = va_arg(args, char *);
|
---|
1210 | #endif
|
---|
1211 | count = (bp - thisline) + strlen(mesg) + 2 + 1;
|
---|
1212 | emalloc(buf, char *, count, "yyerror");
|
---|
1213 |
|
---|
1214 | bp = buf;
|
---|
1215 |
|
---|
1216 | if (lexptr != NULL) {
|
---|
1217 | scan = thisline;
|
---|
1218 | while (scan < lexeme)
|
---|
1219 | if (*scan++ == '\t')
|
---|
1220 | *bp++ = '\t';
|
---|
1221 | else
|
---|
1222 | *bp++ = ' ';
|
---|
1223 | *bp++ = '^';
|
---|
1224 | *bp++ = ' ';
|
---|
1225 | }
|
---|
1226 | strcpy(bp, mesg);
|
---|
1227 | err("", buf, args);
|
---|
1228 | va_end(args);
|
---|
1229 | free(buf);
|
---|
1230 | }
|
---|
1231 |
|
---|
1232 | /* get_src_buf --- read the next buffer of source program */
|
---|
1233 |
|
---|
1234 | static char *
|
---|
1235 | get_src_buf()
|
---|
1236 | {
|
---|
1237 | static int samefile = FALSE;
|
---|
1238 | static int nextfile = 0;
|
---|
1239 | static char *buf = NULL;
|
---|
1240 | static size_t buflen = 0;
|
---|
1241 | static int fd;
|
---|
1242 |
|
---|
1243 | int n;
|
---|
1244 | register char *scan;
|
---|
1245 | int newfile;
|
---|
1246 | struct stat sbuf;
|
---|
1247 | int readcount = 0;
|
---|
1248 | int l;
|
---|
1249 | char *readloc;
|
---|
1250 |
|
---|
1251 | again:
|
---|
1252 | newfile = FALSE;
|
---|
1253 | if (nextfile > numfiles)
|
---|
1254 | return NULL;
|
---|
1255 |
|
---|
1256 | if (srcfiles[nextfile].stype == CMDLINE) {
|
---|
1257 | if ((l = strlen(srcfiles[nextfile].val)) == 0) {
|
---|
1258 | /*
|
---|
1259 | * Yet Another Special case:
|
---|
1260 | * gawk '' /path/name
|
---|
1261 | * Sigh.
|
---|
1262 | */
|
---|
1263 | static int warned = FALSE;
|
---|
1264 |
|
---|
1265 | if (do_lint && ! warned) {
|
---|
1266 | warned = TRUE;
|
---|
1267 | lintwarn(_("empty program text on command line"));
|
---|
1268 | }
|
---|
1269 | ++nextfile;
|
---|
1270 | goto again;
|
---|
1271 | }
|
---|
1272 | if (srcfiles[nextfile].val[l-1] == '\n') {
|
---|
1273 | /* has terminating newline, can use it directly */
|
---|
1274 | sourceline = 1;
|
---|
1275 | lexptr = lexptr_begin = srcfiles[nextfile].val;
|
---|
1276 | /* fall through to pointer adjustment and return, below */
|
---|
1277 | } else {
|
---|
1278 | /* copy it into static buffer */
|
---|
1279 |
|
---|
1280 | /* make sure buffer exists and has room */
|
---|
1281 | if (buflen == 0) {
|
---|
1282 | emalloc(buf, char *, l+2, "get_src_buf");
|
---|
1283 | buflen = l + 2;
|
---|
1284 | } else if (l+2 > buflen) {
|
---|
1285 | erealloc(buf, char *, l+2, "get_src_buf");
|
---|
1286 | buflen = l + 2;
|
---|
1287 | } /* else
|
---|
1288 | buffer has room, just use it */
|
---|
1289 |
|
---|
1290 | /* copy in data */
|
---|
1291 | memcpy(buf, srcfiles[nextfile].val, l);
|
---|
1292 | buf[l] = '\n';
|
---|
1293 | buf[++l] = '\0';
|
---|
1294 |
|
---|
1295 | /* set vars and return */
|
---|
1296 | lexptr = lexptr_begin = buf;
|
---|
1297 | }
|
---|
1298 | lexend = lexptr + l;
|
---|
1299 | nextfile++; /* for next entry to this routine */
|
---|
1300 | return lexptr;
|
---|
1301 | }
|
---|
1302 |
|
---|
1303 | if (! samefile) {
|
---|
1304 | source = srcfiles[nextfile].val;
|
---|
1305 | if (source == NULL) { /* read all the source files, all done */
|
---|
1306 | if (buf != NULL) {
|
---|
1307 | free(buf);
|
---|
1308 | buf = NULL;
|
---|
1309 | }
|
---|
1310 | buflen = 0;
|
---|
1311 | return lexeme = lexptr = lexptr_begin = NULL;
|
---|
1312 | }
|
---|
1313 | fd = pathopen(source);
|
---|
1314 | if (fd <= INVALID_HANDLE) {
|
---|
1315 | char *in;
|
---|
1316 |
|
---|
1317 | /* suppress file name and line no. in error mesg */
|
---|
1318 | in = source;
|
---|
1319 | source = NULL;
|
---|
1320 | fatal(_("can't open source file `%s' for reading (%s)"),
|
---|
1321 | in, strerror(errno));
|
---|
1322 | }
|
---|
1323 | l = optimal_bufsize(fd, & sbuf);
|
---|
1324 | /*
|
---|
1325 | * Make sure that something silly like
|
---|
1326 | * AWKBUFSIZE=8 make check
|
---|
1327 | * works ok.
|
---|
1328 | */
|
---|
1329 | #define A_DECENT_BUFFER_SIZE 128
|
---|
1330 | if (l < A_DECENT_BUFFER_SIZE)
|
---|
1331 | l = A_DECENT_BUFFER_SIZE;
|
---|
1332 | #undef A_DECENT_BUFFER_SIZE
|
---|
1333 |
|
---|
1334 | newfile = TRUE;
|
---|
1335 |
|
---|
1336 | /* make sure buffer exists and has room */
|
---|
1337 | if (buflen == 0) {
|
---|
1338 | emalloc(buf, char *, l+2, "get_src_buf");
|
---|
1339 | buflen = l + 2;
|
---|
1340 | } else if (l+2 > buflen) {
|
---|
1341 | erealloc(buf, char *, l+2, "get_src_buf");
|
---|
1342 | buflen = l + 2;
|
---|
1343 | } /* else
|
---|
1344 | buffer has room, just use it */
|
---|
1345 |
|
---|
1346 | readcount = l;
|
---|
1347 | readloc = lexeme = lexptr = lexptr_begin = buf;
|
---|
1348 | samefile = TRUE;
|
---|
1349 | sourceline = 1;
|
---|
1350 | } else {
|
---|
1351 | /*
|
---|
1352 | * In same file, ran off edge of buffer.
|
---|
1353 | * Shift current line down to front, adjust
|
---|
1354 | * pointers and fill in the rest of the buffer.
|
---|
1355 | */
|
---|
1356 |
|
---|
1357 | int lexeme_offset = lexeme - lexptr_begin;
|
---|
1358 | int lexptr_offset = lexptr - lexptr_begin;
|
---|
1359 | int lexend_offset = lexend - lexptr_begin;
|
---|
1360 |
|
---|
1361 | /* find beginning of current line */
|
---|
1362 | for (scan = lexeme; scan >= lexptr_begin; scan--) {
|
---|
1363 | if (*scan == '\n') {
|
---|
1364 | scan++;
|
---|
1365 | break;
|
---|
1366 | }
|
---|
1367 | }
|
---|
1368 |
|
---|
1369 | if (scan <= buf) {
|
---|
1370 | /* have to grow the buffer */
|
---|
1371 | buflen *= 2;
|
---|
1372 | erealloc(buf, char *, buflen, "get_src_buf");
|
---|
1373 | } else {
|
---|
1374 | /* shift things down */
|
---|
1375 | memmove(buf, scan, lexend - scan);
|
---|
1376 | /*
|
---|
1377 | * make offsets relative to start of line,
|
---|
1378 | * not start of buffer.
|
---|
1379 | */
|
---|
1380 | lexend_offset = lexend - scan;
|
---|
1381 | lexeme_offset = lexeme - scan;
|
---|
1382 | lexptr_offset = lexptr - scan;
|
---|
1383 | }
|
---|
1384 |
|
---|
1385 | /* adjust pointers */
|
---|
1386 | lexeme = buf + lexeme_offset;
|
---|
1387 | lexptr = buf + lexptr_offset;
|
---|
1388 | lexend = buf + lexend_offset;
|
---|
1389 | lexptr_begin = buf;
|
---|
1390 | readcount = buflen - (lexend - buf);
|
---|
1391 | readloc = lexend;
|
---|
1392 | }
|
---|
1393 |
|
---|
1394 | /* add more data to buffer */
|
---|
1395 | n = read(fd, readloc, readcount);
|
---|
1396 | if (n == -1)
|
---|
1397 | fatal(_("can't read sourcefile `%s' (%s)"),
|
---|
1398 | source, strerror(errno));
|
---|
1399 | if (n == 0) {
|
---|
1400 | if (newfile) {
|
---|
1401 | static int warned = FALSE;
|
---|
1402 |
|
---|
1403 | if (do_lint && ! warned) {
|
---|
1404 | warned = TRUE;
|
---|
1405 | lintwarn(_("source file `%s' is empty"), source);
|
---|
1406 | }
|
---|
1407 | }
|
---|
1408 | if (fd != fileno(stdin)) /* safety */
|
---|
1409 | close(fd);
|
---|
1410 | samefile = FALSE;
|
---|
1411 | nextfile++;
|
---|
1412 | goto again;
|
---|
1413 | }
|
---|
1414 | lexend = lexptr + n;
|
---|
1415 | return lexptr;
|
---|
1416 | }
|
---|
1417 |
|
---|
1418 | /* tokadd --- add a character to the token buffer */
|
---|
1419 |
|
---|
1420 | #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok)
|
---|
1421 |
|
---|
1422 | /* tokexpand --- grow the token buffer */
|
---|
1423 |
|
---|
1424 | char *
|
---|
1425 | tokexpand()
|
---|
1426 | {
|
---|
1427 | static int toksize = 60;
|
---|
1428 | int tokoffset;
|
---|
1429 |
|
---|
1430 | tokoffset = tok - tokstart;
|
---|
1431 | toksize *= 2;
|
---|
1432 | if (tokstart != NULL)
|
---|
1433 | erealloc(tokstart, char *, toksize, "tokexpand");
|
---|
1434 | else
|
---|
1435 | emalloc(tokstart, char *, toksize, "tokexpand");
|
---|
1436 | tokend = tokstart + toksize;
|
---|
1437 | tok = tokstart + tokoffset;
|
---|
1438 | return tok;
|
---|
1439 | }
|
---|
1440 |
|
---|
1441 | /* nextc --- get the next input character */
|
---|
1442 |
|
---|
1443 | #ifdef MBS_SUPPORT
|
---|
1444 |
|
---|
1445 | static int
|
---|
1446 | nextc(void)
|
---|
1447 | {
|
---|
1448 | if (gawk_mb_cur_max > 1) {
|
---|
1449 | if (!lexptr || lexptr >= lexend) {
|
---|
1450 | if (! get_src_buf())
|
---|
1451 | return EOF;
|
---|
1452 | }
|
---|
1453 |
|
---|
1454 | /* Update the buffer index. */
|
---|
1455 | cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 :
|
---|
1456 | cur_ring_idx + 1;
|
---|
1457 |
|
---|
1458 | /* Did we already check the current character? */
|
---|
1459 | if (cur_char_ring[cur_ring_idx] == 0) {
|
---|
1460 | /* No, we need to check the next character on the buffer. */
|
---|
1461 | int idx, work_ring_idx = cur_ring_idx;
|
---|
1462 | mbstate_t tmp_state;
|
---|
1463 | size_t mbclen;
|
---|
1464 |
|
---|
1465 | for (idx = 0 ; lexptr + idx < lexend ; idx++) {
|
---|
1466 | tmp_state = cur_mbstate;
|
---|
1467 | mbclen = mbrlen(lexptr, idx + 1, &tmp_state);
|
---|
1468 |
|
---|
1469 | if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) {
|
---|
1470 | /* It is a singlebyte character, non-complete multibyte
|
---|
1471 | character or EOF. We treat it as a singlebyte
|
---|
1472 | character. */
|
---|
1473 | cur_char_ring[work_ring_idx] = 1;
|
---|
1474 | break;
|
---|
1475 | } else if (mbclen == (size_t)-2) {
|
---|
1476 | /* It is not a complete multibyte character. */
|
---|
1477 | cur_char_ring[work_ring_idx] = idx + 1;
|
---|
1478 | } else {
|
---|
1479 | /* mbclen > 1 */
|
---|
1480 | cur_char_ring[work_ring_idx] = mbclen;
|
---|
1481 | break;
|
---|
1482 | }
|
---|
1483 | work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
|
---|
1484 | 0 : work_ring_idx + 1;
|
---|
1485 | }
|
---|
1486 | cur_mbstate = tmp_state;
|
---|
1487 |
|
---|
1488 | /* Put a mark on the position on which we write next character. */
|
---|
1489 | work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)?
|
---|
1490 | 0 : work_ring_idx + 1;
|
---|
1491 | cur_char_ring[work_ring_idx] = 0;
|
---|
1492 | }
|
---|
1493 |
|
---|
1494 | return (int) (unsigned char) *lexptr++;
|
---|
1495 | }
|
---|
1496 | else {
|
---|
1497 | int c;
|
---|
1498 |
|
---|
1499 | if (lexptr && lexptr < lexend)
|
---|
1500 | c = (int) (unsigned char) *lexptr++;
|
---|
1501 | else if (get_src_buf())
|
---|
1502 | c = (int) (unsigned char) *lexptr++;
|
---|
1503 | else
|
---|
1504 | c = EOF;
|
---|
1505 |
|
---|
1506 | return c;
|
---|
1507 | }
|
---|
1508 | }
|
---|
1509 |
|
---|
1510 | #else /* MBS_SUPPORT */
|
---|
1511 |
|
---|
1512 | #if GAWKDEBUG
|
---|
1513 | int
|
---|
1514 | nextc(void)
|
---|
1515 | {
|
---|
1516 | int c;
|
---|
1517 |
|
---|
1518 | if (lexptr && lexptr < lexend)
|
---|
1519 | c = (int) (unsigned char) *lexptr++;
|
---|
1520 | else if (get_src_buf())
|
---|
1521 | c = (int) (unsigned char) *lexptr++;
|
---|
1522 | else
|
---|
1523 | c = EOF;
|
---|
1524 |
|
---|
1525 | return c;
|
---|
1526 | }
|
---|
1527 | #else
|
---|
1528 | #define nextc() ((lexptr && lexptr < lexend) ? \
|
---|
1529 | ((int) (unsigned char) *lexptr++) : \
|
---|
1530 | (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \
|
---|
1531 | )
|
---|
1532 | #endif
|
---|
1533 |
|
---|
1534 | #endif /* MBS_SUPPORT */
|
---|
1535 |
|
---|
1536 | /* pushback --- push a character back on the input */
|
---|
1537 |
|
---|
1538 | static inline void
|
---|
1539 | pushback(void)
|
---|
1540 | {
|
---|
1541 | #ifdef MBS_SUPPORT
|
---|
1542 | if (gawk_mb_cur_max > 1)
|
---|
1543 | cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 :
|
---|
1544 | cur_ring_idx - 1;
|
---|
1545 | #endif
|
---|
1546 | (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr);
|
---|
1547 | }
|
---|
1548 |
|
---|
1549 |
|
---|
1550 | /* allow_newline --- allow newline after &&, ||, ? and : */
|
---|
1551 |
|
---|
1552 | static void
|
---|
1553 | allow_newline(void)
|
---|
1554 | {
|
---|
1555 | int c;
|
---|
1556 |
|
---|
1557 | for (;;) {
|
---|
1558 | c = nextc();
|
---|
1559 | if (c == EOF)
|
---|
1560 | break;
|
---|
1561 | if (c == '#') {
|
---|
1562 | while ((c = nextc()) != '\n' && c != EOF)
|
---|
1563 | continue;
|
---|
1564 | if (c == EOF)
|
---|
1565 | break;
|
---|
1566 | }
|
---|
1567 | if (c == '\n')
|
---|
1568 | sourceline++;
|
---|
1569 | if (! ISSPACE(c)) {
|
---|
1570 | pushback();
|
---|
1571 | break;
|
---|
1572 | }
|
---|
1573 | }
|
---|
1574 | }
|
---|
1575 |
|
---|
1576 | /* yylex --- Read the input and turn it into tokens. */
|
---|
1577 |
|
---|
1578 | static int
|
---|
1579 | yylex(void)
|
---|
1580 | {
|
---|
1581 | register int c;
|
---|
1582 | int seen_e = FALSE; /* These are for numbers */
|
---|
1583 | int seen_point = FALSE;
|
---|
1584 | int esc_seen; /* for literal strings */
|
---|
1585 | int mid;
|
---|
1586 | static int did_newline = FALSE;
|
---|
1587 | char *tokkey;
|
---|
1588 | static int lasttok = 0, eof_warned = FALSE;
|
---|
1589 | int inhex = FALSE;
|
---|
1590 | int intlstr = FALSE;
|
---|
1591 |
|
---|
1592 | if (nextc() == EOF) {
|
---|
1593 | if (lasttok != NEWLINE) {
|
---|
1594 | lasttok = NEWLINE;
|
---|
1595 | if (do_lint && ! eof_warned) {
|
---|
1596 | lintwarn(_("source file does not end in newline"));
|
---|
1597 | eof_warned = TRUE;
|
---|
1598 | }
|
---|
1599 | return NEWLINE; /* fake it */
|
---|
1600 | }
|
---|
1601 | return 0;
|
---|
1602 | }
|
---|
1603 | pushback();
|
---|
1604 | #if defined OS2 || defined __EMX__
|
---|
1605 | /*
|
---|
1606 | * added for OS/2's extproc feature of cmd.exe
|
---|
1607 | * (like #! in BSD sh)
|
---|
1608 | */
|
---|
1609 | if (strncasecmp(lexptr, "extproc ", 8) == 0) {
|
---|
1610 | while (*lexptr && *lexptr != '\n')
|
---|
1611 | lexptr++;
|
---|
1612 | }
|
---|
1613 | #endif
|
---|
1614 | lexeme = lexptr;
|
---|
1615 | thisline = NULL;
|
---|
1616 | if (want_regexp) {
|
---|
1617 | int in_brack = 0; /* count brackets, [[:alnum:]] allowed */
|
---|
1618 | /*
|
---|
1619 | * Counting brackets is non-trivial. [[] is ok,
|
---|
1620 | * and so is [\]], with a point being that /[/]/ as a regexp
|
---|
1621 | * constant has to work.
|
---|
1622 | *
|
---|
1623 | * Do not count [ or ] if either one is preceded by a \.
|
---|
1624 | * A `[' should be counted if
|
---|
1625 | * a) it is the first one so far (in_brack == 0)
|
---|
1626 | * b) it is the `[' in `[:'
|
---|
1627 | * A ']' should be counted if not preceded by a \, since
|
---|
1628 | * it is either closing `:]' or just a plain list.
|
---|
1629 | * According to POSIX, []] is how you put a ] into a set.
|
---|
1630 | * Try to handle that too.
|
---|
1631 | *
|
---|
1632 | * The code for \ handles \[ and \].
|
---|
1633 | */
|
---|
1634 |
|
---|
1635 | want_regexp = FALSE;
|
---|
1636 | tok = tokstart;
|
---|
1637 | for (;;) {
|
---|
1638 | c = nextc();
|
---|
1639 |
|
---|
1640 | if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
|
---|
1641 | case '[':
|
---|
1642 | /* one day check for `.' and `=' too */
|
---|
1643 | if (nextc() == ':' || in_brack == 0)
|
---|
1644 | in_brack++;
|
---|
1645 | pushback();
|
---|
1646 | break;
|
---|
1647 | case ']':
|
---|
1648 | if (tokstart[0] == '['
|
---|
1649 | && (tok == tokstart + 1
|
---|
1650 | || (tok == tokstart + 2
|
---|
1651 | && tokstart[1] == '^')))
|
---|
1652 | /* do nothing */;
|
---|
1653 | else
|
---|
1654 | in_brack--;
|
---|
1655 | break;
|
---|
1656 | case '\\':
|
---|
1657 | if ((c = nextc()) == EOF) {
|
---|
1658 | yyerror(_("unterminated regexp ends with `\\' at end of file"));
|
---|
1659 | goto end_regexp; /* kludge */
|
---|
1660 | } else if (c == '\n') {
|
---|
1661 | sourceline++;
|
---|
1662 | continue;
|
---|
1663 | } else {
|
---|
1664 | tokadd('\\');
|
---|
1665 | tokadd(c);
|
---|
1666 | continue;
|
---|
1667 | }
|
---|
1668 | break;
|
---|
1669 | case '/': /* end of the regexp */
|
---|
1670 | if (in_brack > 0)
|
---|
1671 | break;
|
---|
1672 | end_regexp:
|
---|
1673 | tokadd('\0');
|
---|
1674 | yylval.sval = tokstart;
|
---|
1675 | if (do_lint) {
|
---|
1676 | int peek = nextc();
|
---|
1677 |
|
---|
1678 | pushback();
|
---|
1679 | if (peek == 'i' || peek == 's') {
|
---|
1680 | if (source)
|
---|
1681 | lintwarn(
|
---|
1682 | _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"),
|
---|
1683 | source, sourceline, peek);
|
---|
1684 | else
|
---|
1685 | lintwarn(
|
---|
1686 | _("tawk regex modifier `/.../%c' doesn't work in gawk"),
|
---|
1687 | peek);
|
---|
1688 | }
|
---|
1689 | }
|
---|
1690 | return lasttok = REGEXP;
|
---|
1691 | case '\n':
|
---|
1692 | pushback();
|
---|
1693 | yyerror(_("unterminated regexp"));
|
---|
1694 | goto end_regexp; /* kludge */
|
---|
1695 | case EOF:
|
---|
1696 | yyerror(_("unterminated regexp at end of file"));
|
---|
1697 | goto end_regexp; /* kludge */
|
---|
1698 | }
|
---|
1699 | tokadd(c);
|
---|
1700 | }
|
---|
1701 | }
|
---|
1702 | retry:
|
---|
1703 |
|
---|
1704 | /* skipping \r is a hack, but windows is just too pervasive. sigh. */
|
---|
1705 | while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
|
---|
1706 | continue;
|
---|
1707 |
|
---|
1708 | lexeme = lexptr ? lexptr - 1 : lexptr;
|
---|
1709 | thisline = NULL;
|
---|
1710 | tok = tokstart;
|
---|
1711 | yylval.nodetypeval = Node_illegal;
|
---|
1712 |
|
---|
1713 | if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) {
|
---|
1714 | case EOF:
|
---|
1715 | if (lasttok != NEWLINE) {
|
---|
1716 | lasttok = NEWLINE;
|
---|
1717 | if (do_lint && ! eof_warned) {
|
---|
1718 | lintwarn(_("source file does not end in newline"));
|
---|
1719 | eof_warned = TRUE;
|
---|
1720 | }
|
---|
1721 | return NEWLINE; /* fake it */
|
---|
1722 | }
|
---|
1723 | return 0;
|
---|
1724 |
|
---|
1725 | case '\n':
|
---|
1726 | sourceline++;
|
---|
1727 | return lasttok = NEWLINE;
|
---|
1728 |
|
---|
1729 | case '#': /* it's a comment */
|
---|
1730 | while ((c = nextc()) != '\n') {
|
---|
1731 | if (c == EOF) {
|
---|
1732 | if (lasttok != NEWLINE) {
|
---|
1733 | lasttok = NEWLINE;
|
---|
1734 | if (do_lint && ! eof_warned) {
|
---|
1735 | lintwarn(
|
---|
1736 | _("source file does not end in newline"));
|
---|
1737 | eof_warned = TRUE;
|
---|
1738 | }
|
---|
1739 | return NEWLINE; /* fake it */
|
---|
1740 | }
|
---|
1741 | return 0;
|
---|
1742 | }
|
---|
1743 | }
|
---|
1744 | sourceline++;
|
---|
1745 | return lasttok = NEWLINE;
|
---|
1746 |
|
---|
1747 | case '\\':
|
---|
1748 | #ifdef RELAXED_CONTINUATION
|
---|
1749 | /*
|
---|
1750 | * This code puports to allow comments and/or whitespace
|
---|
1751 | * after the `\' at the end of a line used for continuation.
|
---|
1752 | * Use it at your own risk. We think it's a bad idea, which
|
---|
1753 | * is why it's not on by default.
|
---|
1754 | */
|
---|
1755 | if (! do_traditional) {
|
---|
1756 | /* strip trailing white-space and/or comment */
|
---|
1757 | while ((c = nextc()) == ' ' || c == '\t' || c == '\r')
|
---|
1758 | continue;
|
---|
1759 | if (c == '#') {
|
---|
1760 | if (do_lint)
|
---|
1761 | lintwarn(
|
---|
1762 | _("use of `\\ #...' line continuation is not portable"));
|
---|
1763 | while ((c = nextc()) != '\n')
|
---|
1764 | if (c == EOF)
|
---|
1765 | break;
|
---|
1766 | }
|
---|
1767 | pushback();
|
---|
1768 | }
|
---|
1769 | #endif /* RELAXED_CONTINUATION */
|
---|
1770 | if (nextc() == '\n') {
|
---|
1771 | sourceline++;
|
---|
1772 | goto retry;
|
---|
1773 | } else {
|
---|
1774 | yyerror(_("backslash not last character on line"));
|
---|
1775 | exit(1);
|
---|
1776 | }
|
---|
1777 | break;
|
---|
1778 |
|
---|
1779 | case ':':
|
---|
1780 | case '?':
|
---|
1781 | if (! do_posix)
|
---|
1782 | allow_newline();
|
---|
1783 | return lasttok = c;
|
---|
1784 |
|
---|
1785 | /*
|
---|
1786 | * in_parens is undefined unless we are parsing a print
|
---|
1787 | * statement (in_print), but why bother with a check?
|
---|
1788 | */
|
---|
1789 | case ')':
|
---|
1790 | in_parens--;
|
---|
1791 | return lasttok = c;
|
---|
1792 |
|
---|
1793 | case '(':
|
---|
1794 | in_parens++;
|
---|
1795 | /* FALL THROUGH */
|
---|
1796 | case '$':
|
---|
1797 | case ';':
|
---|
1798 | case '{':
|
---|
1799 | case ',':
|
---|
1800 | case '[':
|
---|
1801 | case ']':
|
---|
1802 | return lasttok = c;
|
---|
1803 |
|
---|
1804 | case '*':
|
---|
1805 | if ((c = nextc()) == '=') {
|
---|
1806 | yylval.nodetypeval = Node_assign_times;
|
---|
1807 | return lasttok = ASSIGNOP;
|
---|
1808 | } else if (do_posix) {
|
---|
1809 | pushback();
|
---|
1810 | return lasttok = '*';
|
---|
1811 | } else if (c == '*') {
|
---|
1812 | /* make ** and **= aliases for ^ and ^= */
|
---|
1813 | static int did_warn_op = FALSE, did_warn_assgn = FALSE;
|
---|
1814 |
|
---|
1815 | if (nextc() == '=') {
|
---|
1816 | if (! did_warn_assgn) {
|
---|
1817 | did_warn_assgn = TRUE;
|
---|
1818 | if (do_lint)
|
---|
1819 | lintwarn(_("POSIX does not allow operator `**='"));
|
---|
1820 | if (do_lint_old)
|
---|
1821 | warning(_("old awk does not support operator `**='"));
|
---|
1822 | }
|
---|
1823 | yylval.nodetypeval = Node_assign_exp;
|
---|
1824 | return ASSIGNOP;
|
---|
1825 | } else {
|
---|
1826 | pushback();
|
---|
1827 | if (! did_warn_op) {
|
---|
1828 | did_warn_op = TRUE;
|
---|
1829 | if (do_lint)
|
---|
1830 | lintwarn(_("POSIX does not allow operator `**'"));
|
---|
1831 | if (do_lint_old)
|
---|
1832 | warning(_("old awk does not support operator `**'"));
|
---|
1833 | }
|
---|
1834 | return lasttok = '^';
|
---|
1835 | }
|
---|
1836 | }
|
---|
1837 | pushback();
|
---|
1838 | return lasttok = '*';
|
---|
1839 |
|
---|
1840 | case '/':
|
---|
1841 | if (nextc() == '=') {
|
---|
1842 | pushback();
|
---|
1843 | return lasttok = SLASH_BEFORE_EQUAL;
|
---|
1844 | }
|
---|
1845 | pushback();
|
---|
1846 | return lasttok = '/';
|
---|
1847 |
|
---|
1848 | case '%':
|
---|
1849 | if (nextc() == '=') {
|
---|
1850 | yylval.nodetypeval = Node_assign_mod;
|
---|
1851 | return lasttok = ASSIGNOP;
|
---|
1852 | }
|
---|
1853 | pushback();
|
---|
1854 | return lasttok = '%';
|
---|
1855 |
|
---|
1856 | case '^':
|
---|
1857 | {
|
---|
1858 | static int did_warn_op = FALSE, did_warn_assgn = FALSE;
|
---|
1859 |
|
---|
1860 | if (nextc() == '=') {
|
---|
1861 | if (do_lint_old && ! did_warn_assgn) {
|
---|
1862 | did_warn_assgn = TRUE;
|
---|
1863 | warning(_("operator `^=' is not supported in old awk"));
|
---|
1864 | }
|
---|
1865 | yylval.nodetypeval = Node_assign_exp;
|
---|
1866 | return lasttok = ASSIGNOP;
|
---|
1867 | }
|
---|
1868 | pushback();
|
---|
1869 | if (do_lint_old && ! did_warn_op) {
|
---|
1870 | did_warn_op = TRUE;
|
---|
1871 | warning(_("operator `^' is not supported in old awk"));
|
---|
1872 | }
|
---|
1873 | return lasttok = '^';
|
---|
1874 | }
|
---|
1875 |
|
---|
1876 | case '+':
|
---|
1877 | if ((c = nextc()) == '=') {
|
---|
1878 | yylval.nodetypeval = Node_assign_plus;
|
---|
1879 | return lasttok = ASSIGNOP;
|
---|
1880 | }
|
---|
1881 | if (c == '+')
|
---|
1882 | return lasttok = INCREMENT;
|
---|
1883 | pushback();
|
---|
1884 | return lasttok = '+';
|
---|
1885 |
|
---|
1886 | case '!':
|
---|
1887 | if ((c = nextc()) == '=') {
|
---|
1888 | yylval.nodetypeval = Node_notequal;
|
---|
1889 | return lasttok = RELOP;
|
---|
1890 | }
|
---|
1891 | if (c == '~') {
|
---|
1892 | yylval.nodetypeval = Node_nomatch;
|
---|
1893 | return lasttok = MATCHOP;
|
---|
1894 | }
|
---|
1895 | pushback();
|
---|
1896 | return lasttok = '!';
|
---|
1897 |
|
---|
1898 | case '<':
|
---|
1899 | if (nextc() == '=') {
|
---|
1900 | yylval.nodetypeval = Node_leq;
|
---|
1901 | return lasttok = RELOP;
|
---|
1902 | }
|
---|
1903 | yylval.nodetypeval = Node_less;
|
---|
1904 | pushback();
|
---|
1905 | return lasttok = '<';
|
---|
1906 |
|
---|
1907 | case '=':
|
---|
1908 | if (nextc() == '=') {
|
---|
1909 | yylval.nodetypeval = Node_equal;
|
---|
1910 | return lasttok = RELOP;
|
---|
1911 | }
|
---|
1912 | yylval.nodetypeval = Node_assign;
|
---|
1913 | pushback();
|
---|
1914 | return lasttok = ASSIGN;
|
---|
1915 |
|
---|
1916 | case '>':
|
---|
1917 | if ((c = nextc()) == '=') {
|
---|
1918 | yylval.nodetypeval = Node_geq;
|
---|
1919 | return lasttok = RELOP;
|
---|
1920 | } else if (c == '>') {
|
---|
1921 | yylval.nodetypeval = Node_redirect_append;
|
---|
1922 | return lasttok = IO_OUT;
|
---|
1923 | }
|
---|
1924 | pushback();
|
---|
1925 | if (in_print && in_parens == 0) {
|
---|
1926 | yylval.nodetypeval = Node_redirect_output;
|
---|
1927 | return lasttok = IO_OUT;
|
---|
1928 | }
|
---|
1929 | yylval.nodetypeval = Node_greater;
|
---|
1930 | return lasttok = '>';
|
---|
1931 |
|
---|
1932 | case '~':
|
---|
1933 | yylval.nodetypeval = Node_match;
|
---|
1934 | return lasttok = MATCHOP;
|
---|
1935 |
|
---|
1936 | case '}':
|
---|
1937 | /*
|
---|
1938 | * Added did newline stuff. Easier than
|
---|
1939 | * hacking the grammar.
|
---|
1940 | */
|
---|
1941 | if (did_newline) {
|
---|
1942 | did_newline = FALSE;
|
---|
1943 | return lasttok = c;
|
---|
1944 | }
|
---|
1945 | did_newline++;
|
---|
1946 | --lexptr; /* pick up } next time */
|
---|
1947 | return lasttok = NEWLINE;
|
---|
1948 |
|
---|
1949 | case '"':
|
---|
1950 | string:
|
---|
1951 | esc_seen = FALSE;
|
---|
1952 | while ((c = nextc()) != '"') {
|
---|
1953 | if (c == '\n') {
|
---|
1954 | pushback();
|
---|
1955 | yyerror(_("unterminated string"));
|
---|
1956 | exit(1);
|
---|
1957 | }
|
---|
1958 | if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) &&
|
---|
1959 | c == '\\') {
|
---|
1960 | c = nextc();
|
---|
1961 | if (c == '\n') {
|
---|
1962 | sourceline++;
|
---|
1963 | continue;
|
---|
1964 | }
|
---|
1965 | esc_seen = TRUE;
|
---|
1966 | tokadd('\\');
|
---|
1967 | }
|
---|
1968 | if (c == EOF) {
|
---|
1969 | pushback();
|
---|
1970 | yyerror(_("unterminated string"));
|
---|
1971 | exit(1);
|
---|
1972 | }
|
---|
1973 | tokadd(c);
|
---|
1974 | }
|
---|
1975 | yylval.nodeval = make_str_node(tokstart,
|
---|
1976 | tok - tokstart, esc_seen ? SCAN : 0);
|
---|
1977 | yylval.nodeval->flags |= PERM;
|
---|
1978 | if (intlstr) {
|
---|
1979 | yylval.nodeval->flags |= INTLSTR;
|
---|
1980 | intlstr = FALSE;
|
---|
1981 | if (do_intl)
|
---|
1982 | dumpintlstr(yylval.nodeval->stptr,
|
---|
1983 | yylval.nodeval->stlen);
|
---|
1984 | }
|
---|
1985 | return lasttok = YSTRING;
|
---|
1986 |
|
---|
1987 | case '-':
|
---|
1988 | if ((c = nextc()) == '=') {
|
---|
1989 | yylval.nodetypeval = Node_assign_minus;
|
---|
1990 | return lasttok = ASSIGNOP;
|
---|
1991 | }
|
---|
1992 | if (c == '-')
|
---|
1993 | return lasttok = DECREMENT;
|
---|
1994 | pushback();
|
---|
1995 | return lasttok = '-';
|
---|
1996 |
|
---|
1997 | case '.':
|
---|
1998 | c = nextc();
|
---|
1999 | pushback();
|
---|
2000 | if (! ISDIGIT(c))
|
---|
2001 | return lasttok = '.';
|
---|
2002 | else
|
---|
2003 | c = '.';
|
---|
2004 | /* FALL THROUGH */
|
---|
2005 | case '0':
|
---|
2006 | case '1':
|
---|
2007 | case '2':
|
---|
2008 | case '3':
|
---|
2009 | case '4':
|
---|
2010 | case '5':
|
---|
2011 | case '6':
|
---|
2012 | case '7':
|
---|
2013 | case '8':
|
---|
2014 | case '9':
|
---|
2015 | /* It's a number */
|
---|
2016 | for (;;) {
|
---|
2017 | int gotnumber = FALSE;
|
---|
2018 |
|
---|
2019 | tokadd(c);
|
---|
2020 | switch (c) {
|
---|
2021 | case 'x':
|
---|
2022 | case 'X':
|
---|
2023 | if (do_traditional)
|
---|
2024 | goto done;
|
---|
2025 | if (tok == tokstart + 2) {
|
---|
2026 | int peek = nextc();
|
---|
2027 |
|
---|
2028 | if (ISXDIGIT(peek)) {
|
---|
2029 | inhex = TRUE;
|
---|
2030 | pushback(); /* following digit */
|
---|
2031 | } else {
|
---|
2032 | pushback(); /* x or X */
|
---|
2033 | goto done;
|
---|
2034 | }
|
---|
2035 | }
|
---|
2036 | break;
|
---|
2037 | case '.':
|
---|
2038 | /* period ends exponent part of floating point number */
|
---|
2039 | if (seen_point || seen_e) {
|
---|
2040 | gotnumber = TRUE;
|
---|
2041 | break;
|
---|
2042 | }
|
---|
2043 | seen_point = TRUE;
|
---|
2044 | break;
|
---|
2045 | case 'e':
|
---|
2046 | case 'E':
|
---|
2047 | if (inhex)
|
---|
2048 | break;
|
---|
2049 | if (seen_e) {
|
---|
2050 | gotnumber = TRUE;
|
---|
2051 | break;
|
---|
2052 | }
|
---|
2053 | seen_e = TRUE;
|
---|
2054 | if ((c = nextc()) == '-' || c == '+') {
|
---|
2055 | int c2 = nextc();
|
---|
2056 |
|
---|
2057 | if (ISDIGIT(c2)) {
|
---|
2058 | tokadd(c);
|
---|
2059 | tokadd(c2);
|
---|
2060 | } else {
|
---|
2061 | pushback(); /* non-digit after + or - */
|
---|
2062 | pushback(); /* + or - */
|
---|
2063 | pushback(); /* e or E */
|
---|
2064 | }
|
---|
2065 | } else if (! ISDIGIT(c)) {
|
---|
2066 | pushback(); /* character after e or E */
|
---|
2067 | pushback(); /* e or E */
|
---|
2068 | } else {
|
---|
2069 | pushback(); /* digit */
|
---|
2070 | }
|
---|
2071 | break;
|
---|
2072 | case 'a':
|
---|
2073 | case 'A':
|
---|
2074 | case 'b':
|
---|
2075 | case 'B':
|
---|
2076 | case 'c':
|
---|
2077 | case 'C':
|
---|
2078 | case 'D':
|
---|
2079 | case 'd':
|
---|
2080 | case 'f':
|
---|
2081 | case 'F':
|
---|
2082 | if (do_traditional || ! inhex)
|
---|
2083 | goto done;
|
---|
2084 | /* fall through */
|
---|
2085 | case '0':
|
---|
2086 | case '1':
|
---|
2087 | case '2':
|
---|
2088 | case '3':
|
---|
2089 | case '4':
|
---|
2090 | case '5':
|
---|
2091 | case '6':
|
---|
2092 | case '7':
|
---|
2093 | case '8':
|
---|
2094 | case '9':
|
---|
2095 | break;
|
---|
2096 | default:
|
---|
2097 | done:
|
---|
2098 | gotnumber = TRUE;
|
---|
2099 | }
|
---|
2100 | if (gotnumber)
|
---|
2101 | break;
|
---|
2102 | c = nextc();
|
---|
2103 | }
|
---|
2104 | if (c != EOF)
|
---|
2105 | pushback();
|
---|
2106 | else if (do_lint && ! eof_warned) {
|
---|
2107 | lintwarn(_("source file does not end in newline"));
|
---|
2108 | eof_warned = TRUE;
|
---|
2109 | }
|
---|
2110 | tokadd('\0');
|
---|
2111 | if (! do_traditional && isnondecimal(tokstart, FALSE)) {
|
---|
2112 | if (do_lint) {
|
---|
2113 | if (ISDIGIT(tokstart[1])) /* not an 'x' or 'X' */
|
---|
2114 | lintwarn("numeric constant `%.*s' treated as octal",
|
---|
2115 | (int) strlen(tokstart)-1, tokstart);
|
---|
2116 | else if (tokstart[1] == 'x' || tokstart[1] == 'X')
|
---|
2117 | lintwarn("numeric constant `%.*s' treated as hexadecimal",
|
---|
2118 | (int) strlen(tokstart)-1, tokstart);
|
---|
2119 | }
|
---|
2120 | yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart)));
|
---|
2121 | } else
|
---|
2122 | yylval.nodeval = make_number(atof(tokstart));
|
---|
2123 | yylval.nodeval->flags |= PERM;
|
---|
2124 | return lasttok = YNUMBER;
|
---|
2125 |
|
---|
2126 | case '&':
|
---|
2127 | if ((c = nextc()) == '&') {
|
---|
2128 | yylval.nodetypeval = Node_and;
|
---|
2129 | allow_newline();
|
---|
2130 | return lasttok = LEX_AND;
|
---|
2131 | }
|
---|
2132 | pushback();
|
---|
2133 | return lasttok = '&';
|
---|
2134 |
|
---|
2135 | case '|':
|
---|
2136 | if ((c = nextc()) == '|') {
|
---|
2137 | yylval.nodetypeval = Node_or;
|
---|
2138 | allow_newline();
|
---|
2139 | return lasttok = LEX_OR;
|
---|
2140 | } else if (! do_traditional && c == '&') {
|
---|
2141 | yylval.nodetypeval = Node_redirect_twoway;
|
---|
2142 | return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN);
|
---|
2143 | }
|
---|
2144 | pushback();
|
---|
2145 | if (in_print && in_parens == 0) {
|
---|
2146 | yylval.nodetypeval = Node_redirect_pipe;
|
---|
2147 | return lasttok = IO_OUT;
|
---|
2148 | } else {
|
---|
2149 | yylval.nodetypeval = Node_redirect_pipein;
|
---|
2150 | return lasttok = IO_IN;
|
---|
2151 | }
|
---|
2152 | }
|
---|
2153 |
|
---|
2154 | if (c != '_' && ! ISALPHA(c)) {
|
---|
2155 | yyerror(_("invalid char '%c' in expression"), c);
|
---|
2156 | exit(1);
|
---|
2157 | }
|
---|
2158 |
|
---|
2159 | /*
|
---|
2160 | * Lots of fog here. Consider:
|
---|
2161 | *
|
---|
2162 | * print "xyzzy"$_"foo"
|
---|
2163 | *
|
---|
2164 | * Without the check for ` lasttok != '$' ', this is parsed as
|
---|
2165 | *
|
---|
2166 | * print "xxyzz" $(_"foo")
|
---|
2167 | *
|
---|
2168 | * With the check, it is "correctly" parsed as three
|
---|
2169 | * string concatenations. Sigh. This seems to be
|
---|
2170 | * "more correct", but this is definitely one of those
|
---|
2171 | * occasions where the interactions are funny.
|
---|
2172 | */
|
---|
2173 | if (! do_traditional && c == '_' && lasttok != '$') {
|
---|
2174 | if ((c = nextc()) == '"') {
|
---|
2175 | intlstr = TRUE;
|
---|
2176 | goto string;
|
---|
2177 | }
|
---|
2178 | pushback();
|
---|
2179 | c = '_';
|
---|
2180 | }
|
---|
2181 |
|
---|
2182 | /* it's some type of name-type-thing. Find its length. */
|
---|
2183 | tok = tokstart;
|
---|
2184 | while (is_identchar(c)) {
|
---|
2185 | tokadd(c);
|
---|
2186 | c = nextc();
|
---|
2187 | }
|
---|
2188 | tokadd('\0');
|
---|
2189 | emalloc(tokkey, char *, tok - tokstart, "yylex");
|
---|
2190 | memcpy(tokkey, tokstart, tok - tokstart);
|
---|
2191 | if (c != EOF)
|
---|
2192 | pushback();
|
---|
2193 | else if (do_lint && ! eof_warned) {
|
---|
2194 | lintwarn(_("source file does not end in newline"));
|
---|
2195 | eof_warned = TRUE;
|
---|
2196 | }
|
---|
2197 |
|
---|
2198 | /* See if it is a special token. */
|
---|
2199 |
|
---|
2200 | if ((mid = check_special(tokstart)) >= 0) {
|
---|
2201 | if (do_lint) {
|
---|
2202 | if (tokentab[mid].flags & GAWKX)
|
---|
2203 | lintwarn(_("`%s' is a gawk extension"),
|
---|
2204 | tokentab[mid].operator);
|
---|
2205 | if (tokentab[mid].flags & RESX)
|
---|
2206 | lintwarn(_("`%s' is a Bell Labs extension"),
|
---|
2207 | tokentab[mid].operator);
|
---|
2208 | if (tokentab[mid].flags & NOT_POSIX)
|
---|
2209 | lintwarn(_("POSIX does not allow `%s'"),
|
---|
2210 | tokentab[mid].operator);
|
---|
2211 | }
|
---|
2212 | if (do_lint_old && (tokentab[mid].flags & NOT_OLD))
|
---|
2213 | warning(_("`%s' is not supported in old awk"),
|
---|
2214 | tokentab[mid].operator);
|
---|
2215 | if ((do_traditional && (tokentab[mid].flags & GAWKX))
|
---|
2216 | || (do_posix && (tokentab[mid].flags & NOT_POSIX)))
|
---|
2217 | ;
|
---|
2218 | else {
|
---|
2219 | if (tokentab[mid].class == LEX_BUILTIN
|
---|
2220 | || tokentab[mid].class == LEX_LENGTH)
|
---|
2221 | yylval.lval = mid;
|
---|
2222 | else
|
---|
2223 | yylval.nodetypeval = tokentab[mid].value;
|
---|
2224 | free(tokkey);
|
---|
2225 | return lasttok = tokentab[mid].class;
|
---|
2226 | }
|
---|
2227 | }
|
---|
2228 |
|
---|
2229 | yylval.sval = tokkey;
|
---|
2230 | if (*lexptr == '(')
|
---|
2231 | return lasttok = FUNC_CALL;
|
---|
2232 | else {
|
---|
2233 | static short goto_warned = FALSE;
|
---|
2234 |
|
---|
2235 | #define SMART_ALECK 1
|
---|
2236 | if (SMART_ALECK && do_lint
|
---|
2237 | && ! goto_warned && strcasecmp(tokkey, "goto") == 0) {
|
---|
2238 | goto_warned = TRUE;
|
---|
2239 | lintwarn(_("`goto' considered harmful!\n"));
|
---|
2240 | }
|
---|
2241 | return lasttok = NAME;
|
---|
2242 | }
|
---|
2243 | }
|
---|
2244 |
|
---|
2245 | /* node_common --- common code for allocating a new node */
|
---|
2246 |
|
---|
2247 | static NODE *
|
---|
2248 | node_common(NODETYPE op)
|
---|
2249 | {
|
---|
2250 | register NODE *r;
|
---|
2251 |
|
---|
2252 | getnode(r);
|
---|
2253 | r->type = op;
|
---|
2254 | r->flags = MALLOC;
|
---|
2255 | /* if lookahead is a NL, lineno is 1 too high */
|
---|
2256 | if (lexeme && lexeme >= lexptr_begin && *lexeme == '\n')
|
---|
2257 | r->source_line = sourceline - 1;
|
---|
2258 | else
|
---|
2259 | r->source_line = sourceline;
|
---|
2260 | r->source_file = source;
|
---|
2261 | return r;
|
---|
2262 | }
|
---|
2263 |
|
---|
2264 | /* node --- allocates a node with defined lnode and rnode. */
|
---|
2265 |
|
---|
2266 | NODE *
|
---|
2267 | node(NODE *left, NODETYPE op, NODE *right)
|
---|
2268 | {
|
---|
2269 | register NODE *r;
|
---|
2270 |
|
---|
2271 | r = node_common(op);
|
---|
2272 | r->lnode = left;
|
---|
2273 | r->rnode = right;
|
---|
2274 | return r;
|
---|
2275 | }
|
---|
2276 |
|
---|
2277 | /* snode --- allocate a node with defined subnode and builtin for builtin
|
---|
2278 | functions. Checks for arg. count and supplies defaults where
|
---|
2279 | possible. */
|
---|
2280 |
|
---|
2281 | static NODE *
|
---|
2282 | snode(NODE *subn, NODETYPE op, int idx)
|
---|
2283 | {
|
---|
2284 | register NODE *r;
|
---|
2285 | register NODE *n;
|
---|
2286 | int nexp = 0;
|
---|
2287 | int args_allowed;
|
---|
2288 |
|
---|
2289 | r = node_common(op);
|
---|
2290 |
|
---|
2291 | /* traverse expression list to see how many args. given */
|
---|
2292 | for (n = subn; n != NULL; n = n->rnode) {
|
---|
2293 | nexp++;
|
---|
2294 | if (nexp > 5)
|
---|
2295 | break;
|
---|
2296 | }
|
---|
2297 |
|
---|
2298 | /* check against how many args. are allowed for this builtin */
|
---|
2299 | args_allowed = tokentab[idx].flags & ARGS;
|
---|
2300 | if (args_allowed && (args_allowed & A(nexp)) == 0)
|
---|
2301 | fatal(_("%d is invalid as number of arguments for %s"),
|
---|
2302 | nexp, tokentab[idx].operator);
|
---|
2303 |
|
---|
2304 | r->builtin = tokentab[idx].ptr;
|
---|
2305 |
|
---|
2306 | /* special case processing for a few builtins */
|
---|
2307 | if (nexp == 0 && r->builtin == do_length) {
|
---|
2308 | subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL),
|
---|
2309 | Node_expression_list,
|
---|
2310 | (NODE *) NULL);
|
---|
2311 | } else if (r->builtin == do_match) {
|
---|
2312 | static short warned = FALSE;
|
---|
2313 |
|
---|
2314 | if (subn->rnode->lnode->type != Node_regex)
|
---|
2315 | subn->rnode->lnode = mk_rexp(subn->rnode->lnode);
|
---|
2316 |
|
---|
2317 | if (subn->rnode->rnode != NULL) { /* 3rd argument there */
|
---|
2318 | if (do_lint && ! warned) {
|
---|
2319 | warned = TRUE;
|
---|
2320 | lintwarn(_("match: third argument is a gawk extension"));
|
---|
2321 | }
|
---|
2322 | if (do_traditional)
|
---|
2323 | fatal(_("match: third argument is a gawk extension"));
|
---|
2324 | }
|
---|
2325 | } else if (r->builtin == do_sub || r->builtin == do_gsub) {
|
---|
2326 | if (subn->lnode->type != Node_regex)
|
---|
2327 | subn->lnode = mk_rexp(subn->lnode);
|
---|
2328 | if (nexp == 2)
|
---|
2329 | append_right(subn, node(node(make_number(0.0),
|
---|
2330 | Node_field_spec,
|
---|
2331 | (NODE *) NULL),
|
---|
2332 | Node_expression_list,
|
---|
2333 | (NODE *) NULL));
|
---|
2334 | else if (subn->rnode->rnode->lnode->type == Node_val) {
|
---|
2335 | if (do_lint)
|
---|
2336 | lintwarn(_("%s: string literal as last arg of substitute has no effect"),
|
---|
2337 | (r->builtin == do_sub) ? "sub" : "gsub");
|
---|
2338 | } else if (! isassignable(subn->rnode->rnode->lnode)) {
|
---|
2339 | yyerror(_("%s third parameter is not a changeable object"),
|
---|
2340 | (r->builtin == do_sub) ? "sub" : "gsub");
|
---|
2341 | }
|
---|
2342 | } else if (r->builtin == do_gensub) {
|
---|
2343 | if (subn->lnode->type != Node_regex)
|
---|
2344 | subn->lnode = mk_rexp(subn->lnode);
|
---|
2345 | if (nexp == 3)
|
---|
2346 | append_right(subn, node(node(make_number(0.0),
|
---|
2347 | Node_field_spec,
|
---|
2348 | (NODE *) NULL),
|
---|
2349 | Node_expression_list,
|
---|
2350 | (NODE *) NULL));
|
---|
2351 | } else if (r->builtin == do_split) {
|
---|
2352 | if (nexp == 2)
|
---|
2353 | append_right(subn,
|
---|
2354 | node(FS_node, Node_expression_list, (NODE *) NULL));
|
---|
2355 | n = subn->rnode->rnode->lnode;
|
---|
2356 | if (n->type != Node_regex)
|
---|
2357 | subn->rnode->rnode->lnode = mk_rexp(n);
|
---|
2358 | if (nexp == 2)
|
---|
2359 | subn->rnode->rnode->lnode->re_flags |= FS_DFLT;
|
---|
2360 | } else if (r->builtin == do_close) {
|
---|
2361 | static short warned = FALSE;
|
---|
2362 |
|
---|
2363 | if ( nexp == 2) {
|
---|
2364 | if (do_lint && nexp == 2 && ! warned) {
|
---|
2365 | warned = TRUE;
|
---|
2366 | lintwarn(_("close: second argument is a gawk extension"));
|
---|
2367 | }
|
---|
2368 | if (do_traditional)
|
---|
2369 | fatal(_("close: second argument is a gawk extension"));
|
---|
2370 | }
|
---|
2371 | } else if (do_intl /* --gen-po */
|
---|
2372 | && r->builtin == do_dcgettext /* dcgettext(...) */
|
---|
2373 | && subn->lnode->type == Node_val /* 1st arg is constant */
|
---|
2374 | && (subn->lnode->flags & STRCUR) != 0) { /* it's a string constant */
|
---|
2375 | /* ala xgettext, dcgettext("some string" ...) dumps the string */
|
---|
2376 | NODE *str = subn->lnode;
|
---|
2377 |
|
---|
2378 | if ((str->flags & INTLSTR) != 0)
|
---|
2379 | warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore"));
|
---|
2380 | /* don't dump it, the lexer already did */
|
---|
2381 | else
|
---|
2382 | dumpintlstr(str->stptr, str->stlen);
|
---|
2383 | } else if (do_intl /* --gen-po */
|
---|
2384 | && r->builtin == do_dcngettext /* dcngettext(...) */
|
---|
2385 | && subn->lnode->type == Node_val /* 1st arg is constant */
|
---|
2386 | && (subn->lnode->flags & STRCUR) != 0 /* it's a string constant */
|
---|
2387 | && subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */
|
---|
2388 | && (subn->rnode->lnode->flags & STRCUR) != 0) { /* it's a string constant */
|
---|
2389 | /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */
|
---|
2390 | NODE *str1 = subn->lnode;
|
---|
2391 | NODE *str2 = subn->rnode->lnode;
|
---|
2392 |
|
---|
2393 | if (((str1->flags | str2->flags) & INTLSTR) != 0)
|
---|
2394 | warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore"));
|
---|
2395 | else
|
---|
2396 | dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen);
|
---|
2397 | }
|
---|
2398 |
|
---|
2399 | r->subnode = subn;
|
---|
2400 | if (r->builtin == do_sprintf) {
|
---|
2401 | count_args(r);
|
---|
2402 | r->lnode->printf_count = r->printf_count; /* hack */
|
---|
2403 | }
|
---|
2404 | return r;
|
---|
2405 | }
|
---|
2406 |
|
---|
2407 | /* make_for_loop --- build a for loop */
|
---|
2408 |
|
---|
2409 | static NODE *
|
---|
2410 | make_for_loop(NODE *init, NODE *cond, NODE *incr)
|
---|
2411 | {
|
---|
2412 | register FOR_LOOP_HEADER *r;
|
---|
2413 | NODE *n;
|
---|
2414 |
|
---|
2415 | emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop");
|
---|
2416 | getnode(n);
|
---|
2417 | n->type = Node_illegal;
|
---|
2418 | r->init = init;
|
---|
2419 | r->cond = cond;
|
---|
2420 | r->incr = incr;
|
---|
2421 | n->sub.nodep.r.hd = r;
|
---|
2422 | return n;
|
---|
2423 | }
|
---|
2424 |
|
---|
2425 | /* dup_parms --- return TRUE if there are duplicate parameters */
|
---|
2426 |
|
---|
2427 | static int
|
---|
2428 | dup_parms(NODE *func)
|
---|
2429 | {
|
---|
2430 | register NODE *np;
|
---|
2431 | const char *fname, **names;
|
---|
2432 | int count, i, j, dups;
|
---|
2433 | NODE *params;
|
---|
2434 |
|
---|
2435 | if (func == NULL) /* error earlier */
|
---|
2436 | return TRUE;
|
---|
2437 |
|
---|
2438 | fname = func->param;
|
---|
2439 | count = func->param_cnt;
|
---|
2440 | params = func->rnode;
|
---|
2441 |
|
---|
2442 | if (count == 0) /* no args, no problem */
|
---|
2443 | return FALSE;
|
---|
2444 |
|
---|
2445 | if (params == NULL) /* error earlier */
|
---|
2446 | return TRUE;
|
---|
2447 |
|
---|
2448 | emalloc(names, const char **, count * sizeof(char *), "dup_parms");
|
---|
2449 |
|
---|
2450 | i = 0;
|
---|
2451 | for (np = params; np != NULL; np = np->rnode) {
|
---|
2452 | if (np->param == NULL) { /* error earlier, give up, go home */
|
---|
2453 | free(names);
|
---|
2454 | return TRUE;
|
---|
2455 | }
|
---|
2456 | names[i++] = np->param;
|
---|
2457 | }
|
---|
2458 |
|
---|
2459 | dups = 0;
|
---|
2460 | for (i = 1; i < count; i++) {
|
---|
2461 | for (j = 0; j < i; j++) {
|
---|
2462 | if (strcmp(names[i], names[j]) == 0) {
|
---|
2463 | dups++;
|
---|
2464 | error(
|
---|
2465 | _("function `%s': parameter #%d, `%s', duplicates parameter #%d"),
|
---|
2466 | fname, i+1, names[j], j+1);
|
---|
2467 | }
|
---|
2468 | }
|
---|
2469 | }
|
---|
2470 |
|
---|
2471 | free(names);
|
---|
2472 | return (dups > 0 ? TRUE : FALSE);
|
---|
2473 | }
|
---|
2474 |
|
---|
2475 | /* parms_shadow --- check if parameters shadow globals */
|
---|
2476 |
|
---|
2477 | static int
|
---|
2478 | parms_shadow(const char *fname, NODE *func)
|
---|
2479 | {
|
---|
2480 | int count, i;
|
---|
2481 | int ret = FALSE;
|
---|
2482 |
|
---|
2483 | if (fname == NULL || func == NULL) /* error earlier */
|
---|
2484 | return FALSE;
|
---|
2485 |
|
---|
2486 | count = func->lnode->param_cnt;
|
---|
2487 |
|
---|
2488 | if (count == 0) /* no args, no problem */
|
---|
2489 | return FALSE;
|
---|
2490 |
|
---|
2491 | /*
|
---|
2492 | * Use warning() and not lintwarn() so that can warn
|
---|
2493 | * about all shadowed parameters.
|
---|
2494 | */
|
---|
2495 | for (i = 0; i < count; i++) {
|
---|
2496 | if (lookup(func->parmlist[i]) != NULL) {
|
---|
2497 | warning(
|
---|
2498 | _("function `%s': parameter `%s' shadows global variable"),
|
---|
2499 | fname, func->parmlist[i]);
|
---|
2500 | ret = TRUE;
|
---|
2501 | }
|
---|
2502 | }
|
---|
2503 |
|
---|
2504 | return ret;
|
---|
2505 | }
|
---|
2506 |
|
---|
2507 | /*
|
---|
2508 | * install:
|
---|
2509 | * Install a name in the symbol table, even if it is already there.
|
---|
2510 | * Caller must check against redefinition if that is desired.
|
---|
2511 | */
|
---|
2512 |
|
---|
2513 | NODE *
|
---|
2514 | install(char *name, NODE *value)
|
---|
2515 | {
|
---|
2516 | register NODE *hp;
|
---|
2517 | register size_t len;
|
---|
2518 | register int bucket;
|
---|
2519 |
|
---|
2520 | var_count++;
|
---|
2521 | len = strlen(name);
|
---|
2522 | bucket = hash(name, len, (unsigned long) HASHSIZE);
|
---|
2523 | getnode(hp);
|
---|
2524 | hp->type = Node_hashnode;
|
---|
2525 | hp->hnext = variables[bucket];
|
---|
2526 | variables[bucket] = hp;
|
---|
2527 | hp->hlength = len;
|
---|
2528 | hp->hvalue = value;
|
---|
2529 | hp->hname = name;
|
---|
2530 | hp->hvalue->vname = name;
|
---|
2531 | return hp->hvalue;
|
---|
2532 | }
|
---|
2533 |
|
---|
2534 | /* lookup --- find the most recent hash node for name installed by install */
|
---|
2535 |
|
---|
2536 | NODE *
|
---|
2537 | lookup(const char *name)
|
---|
2538 | {
|
---|
2539 | register NODE *bucket;
|
---|
2540 | register size_t len;
|
---|
2541 |
|
---|
2542 | len = strlen(name);
|
---|
2543 | for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)];
|
---|
2544 | bucket != NULL; bucket = bucket->hnext)
|
---|
2545 | if (bucket->hlength == len && STREQN(bucket->hname, name, len))
|
---|
2546 | return bucket->hvalue;
|
---|
2547 |
|
---|
2548 | return NULL;
|
---|
2549 | }
|
---|
2550 |
|
---|
2551 | /* var_comp --- compare two variable names */
|
---|
2552 |
|
---|
2553 | static int
|
---|
2554 | var_comp(const void *v1, const void *v2)
|
---|
2555 | {
|
---|
2556 | const NODE *const *npp1, *const *npp2;
|
---|
2557 | const NODE *n1, *n2;
|
---|
2558 | int minlen;
|
---|
2559 |
|
---|
2560 | npp1 = (const NODE *const *) v1;
|
---|
2561 | npp2 = (const NODE *const *) v2;
|
---|
2562 | n1 = *npp1;
|
---|
2563 | n2 = *npp2;
|
---|
2564 |
|
---|
2565 | if (n1->hlength > n2->hlength)
|
---|
2566 | minlen = n1->hlength;
|
---|
2567 | else
|
---|
2568 | minlen = n2->hlength;
|
---|
2569 |
|
---|
2570 | return strncmp(n1->hname, n2->hname, minlen);
|
---|
2571 | }
|
---|
2572 |
|
---|
2573 | /* valinfo --- dump var info */
|
---|
2574 |
|
---|
2575 | static void
|
---|
2576 | valinfo(NODE *n, FILE *fp)
|
---|
2577 | {
|
---|
2578 | if (n->flags & STRING) {
|
---|
2579 | fprintf(fp, "string (");
|
---|
2580 | pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
|
---|
2581 | fprintf(fp, ")\n");
|
---|
2582 | } else if (n->flags & NUMBER)
|
---|
2583 | fprintf(fp, "number (%.17g)\n", n->numbr);
|
---|
2584 | else if (n->flags & STRCUR) {
|
---|
2585 | fprintf(fp, "string value (");
|
---|
2586 | pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE);
|
---|
2587 | fprintf(fp, ")\n");
|
---|
2588 | } else if (n->flags & NUMCUR)
|
---|
2589 | fprintf(fp, "number value (%.17g)\n", n->numbr);
|
---|
2590 | else
|
---|
2591 | fprintf(fp, "?? flags %s\n", flags2str(n->flags));
|
---|
2592 | }
|
---|
2593 |
|
---|
2594 |
|
---|
2595 | /* dump_vars --- dump the symbol table */
|
---|
2596 |
|
---|
2597 | void
|
---|
2598 | dump_vars(const char *fname)
|
---|
2599 | {
|
---|
2600 | int i, j;
|
---|
2601 | NODE **table;
|
---|
2602 | NODE *p;
|
---|
2603 | FILE *fp;
|
---|
2604 |
|
---|
2605 | emalloc(table, NODE **, var_count * sizeof(NODE *), "dump_vars");
|
---|
2606 |
|
---|
2607 | if (fname == NULL)
|
---|
2608 | fp = stderr;
|
---|
2609 | else if ((fp = fopen(fname, "w")) == NULL) {
|
---|
2610 | warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno));
|
---|
2611 | warning(_("sending profile to standard error"));
|
---|
2612 | fp = stderr;
|
---|
2613 | }
|
---|
2614 |
|
---|
2615 | for (i = j = 0; i < HASHSIZE; i++)
|
---|
2616 | for (p = variables[i]; p != NULL; p = p->hnext)
|
---|
2617 | table[j++] = p;
|
---|
2618 |
|
---|
2619 | assert(j == var_count);
|
---|
2620 |
|
---|
2621 | /* Shazzam! */
|
---|
2622 | qsort(table, j, sizeof(NODE *), var_comp);
|
---|
2623 |
|
---|
2624 | for (i = 0; i < j; i++) {
|
---|
2625 | p = table[i];
|
---|
2626 | if (p->hvalue->type == Node_func)
|
---|
2627 | continue;
|
---|
2628 | fprintf(fp, "%.*s: ", (int) p->hlength, p->hname);
|
---|
2629 | if (p->hvalue->type == Node_var_array)
|
---|
2630 | fprintf(fp, "array, %ld elements\n", p->hvalue->table_size);
|
---|
2631 | else if (p->hvalue->type == Node_var_new)
|
---|
2632 | fprintf(fp, "unused variable\n");
|
---|
2633 | else if (p->hvalue->type == Node_var)
|
---|
2634 | valinfo(p->hvalue->var_value, fp);
|
---|
2635 | else {
|
---|
2636 | NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
|
---|
2637 |
|
---|
2638 | valinfo(*lhs, fp);
|
---|
2639 | }
|
---|
2640 | }
|
---|
2641 |
|
---|
2642 | if (fp != stderr && fclose(fp) != 0)
|
---|
2643 | warning(_("%s: close failed (%s)"), fname, strerror(errno));
|
---|
2644 |
|
---|
2645 | free(table);
|
---|
2646 | }
|
---|
2647 |
|
---|
2648 | /* release_all_vars --- free all variable memory */
|
---|
2649 |
|
---|
2650 | void
|
---|
2651 | release_all_vars()
|
---|
2652 | {
|
---|
2653 | int i;
|
---|
2654 | NODE *p, *next;
|
---|
2655 |
|
---|
2656 | for (i = 0; i < HASHSIZE; i++)
|
---|
2657 | for (p = variables[i]; p != NULL; p = next) {
|
---|
2658 | next = p->hnext;
|
---|
2659 |
|
---|
2660 | if (p->hvalue->type == Node_func)
|
---|
2661 | continue;
|
---|
2662 | else if (p->hvalue->type == Node_var_array)
|
---|
2663 | assoc_clear(p->hvalue);
|
---|
2664 | else if (p->hvalue->type != Node_var_new) {
|
---|
2665 | NODE **lhs = get_lhs(p->hvalue, NULL, FALSE);
|
---|
2666 |
|
---|
2667 | unref(*lhs);
|
---|
2668 | }
|
---|
2669 | unref(p);
|
---|
2670 | }
|
---|
2671 | }
|
---|
2672 |
|
---|
2673 | /* finfo --- for use in comparison and sorting of function names */
|
---|
2674 |
|
---|
2675 | struct finfo {
|
---|
2676 | const char *name;
|
---|
2677 | size_t nlen;
|
---|
2678 | NODE *func;
|
---|
2679 | };
|
---|
2680 |
|
---|
2681 | /* fcompare --- comparison function for qsort */
|
---|
2682 |
|
---|
2683 | static int
|
---|
2684 | fcompare(const void *p1, const void *p2)
|
---|
2685 | {
|
---|
2686 | const struct finfo *f1, *f2;
|
---|
2687 | int minlen;
|
---|
2688 |
|
---|
2689 | f1 = (const struct finfo *) p1;
|
---|
2690 | f2 = (const struct finfo *) p2;
|
---|
2691 |
|
---|
2692 | if (f1->nlen > f2->nlen)
|
---|
2693 | minlen = f2->nlen;
|
---|
2694 | else
|
---|
2695 | minlen = f1->nlen;
|
---|
2696 |
|
---|
2697 | return strncmp(f1->name, f2->name, minlen);
|
---|
2698 | }
|
---|
2699 |
|
---|
2700 | /* dump_funcs --- print all functions */
|
---|
2701 |
|
---|
2702 | void
|
---|
2703 | dump_funcs()
|
---|
2704 | {
|
---|
2705 | int i, j;
|
---|
2706 | NODE *p;
|
---|
2707 | struct finfo *tab = NULL;
|
---|
2708 |
|
---|
2709 | /*
|
---|
2710 | * Walk through symbol table countng functions.
|
---|
2711 | * Could be more than func_count if there are
|
---|
2712 | * extension functions.
|
---|
2713 | */
|
---|
2714 | for (i = j = 0; i < HASHSIZE; i++) {
|
---|
2715 | for (p = variables[i]; p != NULL; p = p->hnext) {
|
---|
2716 | if (p->hvalue->type == Node_func) {
|
---|
2717 | j++;
|
---|
2718 | }
|
---|
2719 | }
|
---|
2720 | }
|
---|
2721 |
|
---|
2722 | if (j == 0)
|
---|
2723 | return;
|
---|
2724 |
|
---|
2725 | emalloc(tab, struct finfo *, j * sizeof(struct finfo), "dump_funcs");
|
---|
2726 |
|
---|
2727 | /* now walk again, copying info */
|
---|
2728 | for (i = j = 0; i < HASHSIZE; i++) {
|
---|
2729 | for (p = variables[i]; p != NULL; p = p->hnext) {
|
---|
2730 | if (p->hvalue->type == Node_func) {
|
---|
2731 | tab[j].name = p->hname;
|
---|
2732 | tab[j].nlen = p->hlength;
|
---|
2733 | tab[j].func = p->hvalue;
|
---|
2734 | j++;
|
---|
2735 | }
|
---|
2736 | }
|
---|
2737 | }
|
---|
2738 |
|
---|
2739 |
|
---|
2740 | /* Shazzam! */
|
---|
2741 | qsort(tab, j, sizeof(struct finfo), fcompare);
|
---|
2742 |
|
---|
2743 | for (i = 0; i < j; i++)
|
---|
2744 | pp_func(tab[i].name, tab[i].nlen, tab[i].func);
|
---|
2745 |
|
---|
2746 | free(tab);
|
---|
2747 | }
|
---|
2748 |
|
---|
2749 | /* shadow_funcs --- check all functions for parameters that shadow globals */
|
---|
2750 |
|
---|
2751 | void
|
---|
2752 | shadow_funcs()
|
---|
2753 | {
|
---|
2754 | int i, j;
|
---|
2755 | NODE *p;
|
---|
2756 | struct finfo *tab;
|
---|
2757 | static int calls = 0;
|
---|
2758 | int shadow = FALSE;
|
---|
2759 |
|
---|
2760 | if (func_count == 0)
|
---|
2761 | return;
|
---|
2762 |
|
---|
2763 | if (calls++ != 0)
|
---|
2764 | fatal(_("shadow_funcs() called twice!"));
|
---|
2765 |
|
---|
2766 | emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "shadow_funcs");
|
---|
2767 |
|
---|
2768 | for (i = j = 0; i < HASHSIZE; i++) {
|
---|
2769 | for (p = variables[i]; p != NULL; p = p->hnext) {
|
---|
2770 | if (p->hvalue->type == Node_func) {
|
---|
2771 | tab[j].name = p->hname;
|
---|
2772 | tab[j].nlen = p->hlength;
|
---|
2773 | tab[j].func = p->hvalue;
|
---|
2774 | j++;
|
---|
2775 | }
|
---|
2776 | }
|
---|
2777 | }
|
---|
2778 |
|
---|
2779 | assert(j == func_count);
|
---|
2780 |
|
---|
2781 | /* Shazzam! */
|
---|
2782 | qsort(tab, func_count, sizeof(struct finfo), fcompare);
|
---|
2783 |
|
---|
2784 | for (i = 0; i < j; i++)
|
---|
2785 | shadow |= parms_shadow(tab[i].name, tab[i].func);
|
---|
2786 |
|
---|
2787 | free(tab);
|
---|
2788 |
|
---|
2789 | /* End with fatal if the user requested it. */
|
---|
2790 | if (shadow && lintfunc != warning)
|
---|
2791 | lintwarn(_("there were shadowed variables."));
|
---|
2792 | }
|
---|
2793 |
|
---|
2794 | /*
|
---|
2795 | * append_right:
|
---|
2796 | * Add new to the rightmost branch of LIST. This uses n^2 time, so we make
|
---|
2797 | * a simple attempt at optimizing it.
|
---|
2798 | */
|
---|
2799 |
|
---|
2800 | static NODE *
|
---|
2801 | append_right(NODE *list, NODE *new)
|
---|
2802 | {
|
---|
2803 | register NODE *oldlist;
|
---|
2804 | static NODE *savefront = NULL, *savetail = NULL;
|
---|
2805 |
|
---|
2806 | if (list == NULL || new == NULL)
|
---|
2807 | return list;
|
---|
2808 |
|
---|
2809 | oldlist = list;
|
---|
2810 | if (savefront == oldlist)
|
---|
2811 | list = savetail; /* Be careful: maybe list->rnode != NULL */
|
---|
2812 | else
|
---|
2813 | savefront = oldlist;
|
---|
2814 |
|
---|
2815 | while (list->rnode != NULL)
|
---|
2816 | list = list->rnode;
|
---|
2817 | savetail = list->rnode = new;
|
---|
2818 | return oldlist;
|
---|
2819 | }
|
---|
2820 |
|
---|
2821 | /*
|
---|
2822 | * append_pattern:
|
---|
2823 | * A wrapper around append_right, used for rule lists.
|
---|
2824 | */
|
---|
2825 | static inline NODE *
|
---|
2826 | append_pattern(NODE **list, NODE *patt)
|
---|
2827 | {
|
---|
2828 | NODE *n = node(patt, Node_rule_node, (NODE *) NULL);
|
---|
2829 |
|
---|
2830 | if (*list == NULL)
|
---|
2831 | *list = n;
|
---|
2832 | else {
|
---|
2833 | NODE *n1 = node(n, Node_rule_list, (NODE *) NULL);
|
---|
2834 | if ((*list)->type != Node_rule_list)
|
---|
2835 | *list = node(*list, Node_rule_list, n1);
|
---|
2836 | else
|
---|
2837 | (void) append_right(*list, n1);
|
---|
2838 | }
|
---|
2839 | return n;
|
---|
2840 | }
|
---|
2841 |
|
---|
2842 | /*
|
---|
2843 | * func_install:
|
---|
2844 | * check if name is already installed; if so, it had better have Null value,
|
---|
2845 | * in which case def is added as the value. Otherwise, install name with def
|
---|
2846 | * as value.
|
---|
2847 | *
|
---|
2848 | * Extra work, build up and save a list of the parameter names in a table
|
---|
2849 | * and hang it off params->parmlist. This is used to set the `vname' field
|
---|
2850 | * of each function parameter during a function call. See eval.c.
|
---|
2851 | */
|
---|
2852 |
|
---|
2853 | static void
|
---|
2854 | func_install(NODE *params, NODE *def)
|
---|
2855 | {
|
---|
2856 | NODE *r, *n, *thisfunc;
|
---|
2857 | char **pnames, *names, *sp;
|
---|
2858 | size_t pcount = 0, space = 0;
|
---|
2859 | int i;
|
---|
2860 |
|
---|
2861 | /* check for function foo(foo) { ... }. bleah. */
|
---|
2862 | for (n = params->rnode; n != NULL; n = n->rnode) {
|
---|
2863 | if (strcmp(n->param, params->param) == 0)
|
---|
2864 | fatal(_("function `%s': can't use function name as parameter name"),
|
---|
2865 | params->param);
|
---|
2866 | }
|
---|
2867 |
|
---|
2868 | thisfunc = NULL; /* turn off warnings */
|
---|
2869 |
|
---|
2870 | /* symbol table managment */
|
---|
2871 | pop_var(params, FALSE);
|
---|
2872 | r = lookup(params->param);
|
---|
2873 | if (r != NULL) {
|
---|
2874 | fatal(_("function name `%s' previously defined"), params->param);
|
---|
2875 | } else if (params->param == builtin_func) /* not a valid function name */
|
---|
2876 | goto remove_params;
|
---|
2877 |
|
---|
2878 | /* install the function */
|
---|
2879 | thisfunc = node(params, Node_func, def);
|
---|
2880 | (void) install(params->param, thisfunc);
|
---|
2881 |
|
---|
2882 | /* figure out amount of space to allocate for variable names */
|
---|
2883 | for (n = params->rnode; n != NULL; n = n->rnode) {
|
---|
2884 | pcount++;
|
---|
2885 | space += strlen(n->param) + 1;
|
---|
2886 | }
|
---|
2887 |
|
---|
2888 | /* allocate it and fill it in */
|
---|
2889 | if (pcount != 0) {
|
---|
2890 | emalloc(names, char *, space, "func_install");
|
---|
2891 | emalloc(pnames, char **, pcount * sizeof(char *), "func_install");
|
---|
2892 | sp = names;
|
---|
2893 | for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) {
|
---|
2894 | pnames[i] = sp;
|
---|
2895 | strcpy(sp, n->param);
|
---|
2896 | sp += strlen(n->param) + 1;
|
---|
2897 | }
|
---|
2898 | thisfunc->parmlist = pnames;
|
---|
2899 | } else {
|
---|
2900 | thisfunc->parmlist = NULL;
|
---|
2901 | }
|
---|
2902 |
|
---|
2903 | /* update lint table info */
|
---|
2904 | func_use(params->param, FUNC_DEFINE);
|
---|
2905 |
|
---|
2906 | func_count++; /* used by profiling / pretty printer */
|
---|
2907 |
|
---|
2908 | remove_params:
|
---|
2909 | /* remove params from symbol table */
|
---|
2910 | pop_params(params->rnode);
|
---|
2911 | }
|
---|
2912 |
|
---|
2913 | /* pop_var --- remove a variable from the symbol table */
|
---|
2914 |
|
---|
2915 | static void
|
---|
2916 | pop_var(NODE *np, int freeit)
|
---|
2917 | {
|
---|
2918 | register NODE *bucket, **save;
|
---|
2919 | register size_t len;
|
---|
2920 | char *name;
|
---|
2921 |
|
---|
2922 | name = np->param;
|
---|
2923 | len = strlen(name);
|
---|
2924 | save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]);
|
---|
2925 | for (bucket = *save; bucket != NULL; bucket = bucket->hnext) {
|
---|
2926 | if (len == bucket->hlength && STREQN(bucket->hname, name, len)) {
|
---|
2927 | var_count--;
|
---|
2928 | *save = bucket->hnext;
|
---|
2929 | freenode(bucket);
|
---|
2930 | if (freeit)
|
---|
2931 | free(np->param);
|
---|
2932 | return;
|
---|
2933 | }
|
---|
2934 | save = &(bucket->hnext);
|
---|
2935 | }
|
---|
2936 | }
|
---|
2937 |
|
---|
2938 | /* pop_params --- remove list of function parameters from symbol table */
|
---|
2939 |
|
---|
2940 | /*
|
---|
2941 | * pop parameters out of the symbol table. do this in reverse order to
|
---|
2942 | * avoid reading freed memory if there were duplicated parameters.
|
---|
2943 | */
|
---|
2944 | static void
|
---|
2945 | pop_params(NODE *params)
|
---|
2946 | {
|
---|
2947 | if (params == NULL)
|
---|
2948 | return;
|
---|
2949 | pop_params(params->rnode);
|
---|
2950 | pop_var(params, TRUE);
|
---|
2951 | }
|
---|
2952 |
|
---|
2953 | /* make_param --- make NAME into a function parameter */
|
---|
2954 |
|
---|
2955 | static NODE *
|
---|
2956 | make_param(char *name)
|
---|
2957 | {
|
---|
2958 | NODE *r;
|
---|
2959 |
|
---|
2960 | getnode(r);
|
---|
2961 | r->type = Node_param_list;
|
---|
2962 | r->rnode = NULL;
|
---|
2963 | r->param = name;
|
---|
2964 | r->param_cnt = param_counter++;
|
---|
2965 | return (install(name, r));
|
---|
2966 | }
|
---|
2967 |
|
---|
2968 | static struct fdesc {
|
---|
2969 | char *name;
|
---|
2970 | short used;
|
---|
2971 | short defined;
|
---|
2972 | struct fdesc *next;
|
---|
2973 | } *ftable[HASHSIZE];
|
---|
2974 |
|
---|
2975 | /* func_use --- track uses and definitions of functions */
|
---|
2976 |
|
---|
2977 | static void
|
---|
2978 | func_use(const char *name, enum defref how)
|
---|
2979 | {
|
---|
2980 | struct fdesc *fp;
|
---|
2981 | int len;
|
---|
2982 | int ind;
|
---|
2983 |
|
---|
2984 | len = strlen(name);
|
---|
2985 | ind = hash(name, len, HASHSIZE);
|
---|
2986 |
|
---|
2987 | for (fp = ftable[ind]; fp != NULL; fp = fp->next) {
|
---|
2988 | if (strcmp(fp->name, name) == 0) {
|
---|
2989 | if (how == FUNC_DEFINE)
|
---|
2990 | fp->defined++;
|
---|
2991 | else
|
---|
2992 | fp->used++;
|
---|
2993 | return;
|
---|
2994 | }
|
---|
2995 | }
|
---|
2996 |
|
---|
2997 | /* not in the table, fall through to allocate a new one */
|
---|
2998 |
|
---|
2999 | emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use");
|
---|
3000 | memset(fp, '\0', sizeof(struct fdesc));
|
---|
3001 | emalloc(fp->name, char *, len + 1, "func_use");
|
---|
3002 | strcpy(fp->name, name);
|
---|
3003 | if (how == FUNC_DEFINE)
|
---|
3004 | fp->defined++;
|
---|
3005 | else
|
---|
3006 | fp->used++;
|
---|
3007 | fp->next = ftable[ind];
|
---|
3008 | ftable[ind] = fp;
|
---|
3009 | }
|
---|
3010 |
|
---|
3011 | /* check_funcs --- verify functions that are called but not defined */
|
---|
3012 |
|
---|
3013 | static void
|
---|
3014 | check_funcs()
|
---|
3015 | {
|
---|
3016 | struct fdesc *fp, *next;
|
---|
3017 | int i;
|
---|
3018 |
|
---|
3019 | for (i = 0; i < HASHSIZE; i++) {
|
---|
3020 | for (fp = ftable[i]; fp != NULL; fp = fp->next) {
|
---|
3021 | #ifdef REALLYMEAN
|
---|
3022 | /* making this the default breaks old code. sigh. */
|
---|
3023 | if (fp->defined == 0) {
|
---|
3024 | error(
|
---|
3025 | _("function `%s' called but never defined"), fp->name);
|
---|
3026 | errcount++;
|
---|
3027 | }
|
---|
3028 | #else
|
---|
3029 | if (do_lint && fp->defined == 0)
|
---|
3030 | lintwarn(
|
---|
3031 | _("function `%s' called but never defined"), fp->name);
|
---|
3032 | #endif
|
---|
3033 | if (do_lint && fp->used == 0) {
|
---|
3034 | lintwarn(_("function `%s' defined but never called"),
|
---|
3035 | fp->name);
|
---|
3036 | }
|
---|
3037 | }
|
---|
3038 | }
|
---|
3039 |
|
---|
3040 | /* now let's free all the memory */
|
---|
3041 | for (i = 0; i < HASHSIZE; i++) {
|
---|
3042 | for (fp = ftable[i]; fp != NULL; fp = next) {
|
---|
3043 | next = fp->next;
|
---|
3044 | free(fp->name);
|
---|
3045 | free(fp);
|
---|
3046 | }
|
---|
3047 | }
|
---|
3048 | }
|
---|
3049 |
|
---|
3050 | /* param_sanity --- look for parameters that are regexp constants */
|
---|
3051 |
|
---|
3052 | static void
|
---|
3053 | param_sanity(NODE *arglist)
|
---|
3054 | {
|
---|
3055 | NODE *argp, *arg;
|
---|
3056 | int i;
|
---|
3057 |
|
---|
3058 | for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) {
|
---|
3059 | arg = argp->lnode;
|
---|
3060 | if (arg->type == Node_regex)
|
---|
3061 | warning(_("regexp constant for parameter #%d yields boolean value"), i);
|
---|
3062 | }
|
---|
3063 | }
|
---|
3064 |
|
---|
3065 | /* deferred varibles --- those that are only defined if needed. */
|
---|
3066 |
|
---|
3067 | /*
|
---|
3068 | * Is there any reason to use a hash table for deferred variables? At the
|
---|
3069 | * moment, there are only 1 to 3 such variables, so it may not be worth
|
---|
3070 | * the overhead. If more modules start using this facility, it should
|
---|
3071 | * probably be converted into a hash table.
|
---|
3072 | */
|
---|
3073 |
|
---|
3074 | static struct deferred_variable {
|
---|
3075 | NODE *(*load_func)(void);
|
---|
3076 | struct deferred_variable *next;
|
---|
3077 | char name[1]; /* variable-length array */
|
---|
3078 | } *deferred_variables;
|
---|
3079 |
|
---|
3080 | /* register_deferred_variable --- add a var name and loading function to the list */
|
---|
3081 |
|
---|
3082 | void
|
---|
3083 | register_deferred_variable(const char *name, NODE *(*load_func)(void))
|
---|
3084 | {
|
---|
3085 | struct deferred_variable *dv;
|
---|
3086 | size_t sl = strlen(name);
|
---|
3087 |
|
---|
3088 | emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl,
|
---|
3089 | "register_deferred_variable");
|
---|
3090 | dv->load_func = load_func;
|
---|
3091 | dv->next = deferred_variables;
|
---|
3092 | memcpy(dv->name, name, sl+1);
|
---|
3093 | deferred_variables = dv;
|
---|
3094 | }
|
---|
3095 |
|
---|
3096 | /* variable --- make sure NAME is in the symbol table */
|
---|
3097 |
|
---|
3098 | NODE *
|
---|
3099 | variable(char *name, int can_free, NODETYPE type)
|
---|
3100 | {
|
---|
3101 | register NODE *r;
|
---|
3102 |
|
---|
3103 | if ((r = lookup(name)) != NULL) {
|
---|
3104 | if (r->type == Node_func)
|
---|
3105 | fatal(_("function `%s' called with space between name and `(',\nor used as a variable or an array"),
|
---|
3106 | r->vname);
|
---|
3107 |
|
---|
3108 | } else {
|
---|
3109 | /* not found */
|
---|
3110 | struct deferred_variable *dv;
|
---|
3111 |
|
---|
3112 | for (dv = deferred_variables; TRUE; dv = dv->next) {
|
---|
3113 | if (dv == NULL) {
|
---|
3114 | /*
|
---|
3115 | * This is the only case in which we may not
|
---|
3116 | * free the string.
|
---|
3117 | */
|
---|
3118 | NODE *n;
|
---|
3119 |
|
---|
3120 | if (type == Node_var_array)
|
---|
3121 | n = node(NULL, type, NULL);
|
---|
3122 | else
|
---|
3123 | n = node(Nnull_string, type, NULL);
|
---|
3124 |
|
---|
3125 | return install(name, n);
|
---|
3126 | }
|
---|
3127 | if (STREQ(name, dv->name)) {
|
---|
3128 | r = (*dv->load_func)();
|
---|
3129 | break;
|
---|
3130 | }
|
---|
3131 | }
|
---|
3132 | }
|
---|
3133 | if (can_free)
|
---|
3134 | free(name);
|
---|
3135 | return r;
|
---|
3136 | }
|
---|
3137 |
|
---|
3138 | /* mk_rexp --- make a regular expression constant */
|
---|
3139 |
|
---|
3140 | static NODE *
|
---|
3141 | mk_rexp(NODE *exp)
|
---|
3142 | {
|
---|
3143 | NODE *n;
|
---|
3144 |
|
---|
3145 | if (exp->type == Node_regex)
|
---|
3146 | return exp;
|
---|
3147 |
|
---|
3148 | getnode(n);
|
---|
3149 | n->type = Node_dynregex;
|
---|
3150 | n->re_exp = exp;
|
---|
3151 | n->re_text = NULL;
|
---|
3152 | n->re_reg = NULL;
|
---|
3153 | n->re_flags = 0;
|
---|
3154 | n->re_cnt = 1;
|
---|
3155 | return n;
|
---|
3156 | }
|
---|
3157 |
|
---|
3158 | /* isnoeffect --- when used as a statement, has no side effects */
|
---|
3159 |
|
---|
3160 | /*
|
---|
3161 | * To be completely general, we should recursively walk the parse
|
---|
3162 | * tree, to make sure that all the subexpressions also have no effect.
|
---|
3163 | * Instead, we just weaken the actual warning that's printed, up above
|
---|
3164 | * in the grammar.
|
---|
3165 | */
|
---|
3166 |
|
---|
3167 | static int
|
---|
3168 | isnoeffect(NODETYPE type)
|
---|
3169 | {
|
---|
3170 | switch (type) {
|
---|
3171 | case Node_times:
|
---|
3172 | case Node_quotient:
|
---|
3173 | case Node_mod:
|
---|
3174 | case Node_plus:
|
---|
3175 | case Node_minus:
|
---|
3176 | case Node_subscript:
|
---|
3177 | case Node_concat:
|
---|
3178 | case Node_exp:
|
---|
3179 | case Node_unary_minus:
|
---|
3180 | case Node_field_spec:
|
---|
3181 | case Node_and:
|
---|
3182 | case Node_or:
|
---|
3183 | case Node_equal:
|
---|
3184 | case Node_notequal:
|
---|
3185 | case Node_less:
|
---|
3186 | case Node_greater:
|
---|
3187 | case Node_leq:
|
---|
3188 | case Node_geq:
|
---|
3189 | case Node_match:
|
---|
3190 | case Node_nomatch:
|
---|
3191 | case Node_not:
|
---|
3192 | case Node_val:
|
---|
3193 | case Node_in_array:
|
---|
3194 | case Node_NF:
|
---|
3195 | case Node_NR:
|
---|
3196 | case Node_FNR:
|
---|
3197 | case Node_FS:
|
---|
3198 | case Node_RS:
|
---|
3199 | case Node_FIELDWIDTHS:
|
---|
3200 | case Node_IGNORECASE:
|
---|
3201 | case Node_OFS:
|
---|
3202 | case Node_ORS:
|
---|
3203 | case Node_OFMT:
|
---|
3204 | case Node_CONVFMT:
|
---|
3205 | case Node_BINMODE:
|
---|
3206 | case Node_LINT:
|
---|
3207 | case Node_SUBSEP:
|
---|
3208 | case Node_TEXTDOMAIN:
|
---|
3209 | return TRUE;
|
---|
3210 | default:
|
---|
3211 | break; /* keeps gcc -Wall happy */
|
---|
3212 | }
|
---|
3213 |
|
---|
3214 | return FALSE;
|
---|
3215 | }
|
---|
3216 |
|
---|
3217 | /* isassignable --- can this node be assigned to? */
|
---|
3218 |
|
---|
3219 | static int
|
---|
3220 | isassignable(register NODE *n)
|
---|
3221 | {
|
---|
3222 | switch (n->type) {
|
---|
3223 | case Node_var_new:
|
---|
3224 | case Node_var:
|
---|
3225 | case Node_FIELDWIDTHS:
|
---|
3226 | case Node_RS:
|
---|
3227 | case Node_FS:
|
---|
3228 | case Node_FNR:
|
---|
3229 | case Node_NR:
|
---|
3230 | case Node_NF:
|
---|
3231 | case Node_IGNORECASE:
|
---|
3232 | case Node_OFMT:
|
---|
3233 | case Node_CONVFMT:
|
---|
3234 | case Node_ORS:
|
---|
3235 | case Node_OFS:
|
---|
3236 | case Node_LINT:
|
---|
3237 | case Node_BINMODE:
|
---|
3238 | case Node_SUBSEP:
|
---|
3239 | case Node_TEXTDOMAIN:
|
---|
3240 | case Node_field_spec:
|
---|
3241 | case Node_subscript:
|
---|
3242 | return TRUE;
|
---|
3243 | case Node_param_list:
|
---|
3244 | return ((n->flags & FUNC) == 0); /* ok if not func name */
|
---|
3245 | default:
|
---|
3246 | break; /* keeps gcc -Wall happy */
|
---|
3247 | }
|
---|
3248 | return FALSE;
|
---|
3249 | }
|
---|
3250 |
|
---|
3251 | /* stopme --- for debugging */
|
---|
3252 |
|
---|
3253 | NODE *
|
---|
3254 | stopme(NODE *tree ATTRIBUTE_UNUSED)
|
---|
3255 | {
|
---|
3256 | return (NODE *) 0;
|
---|
3257 | }
|
---|
3258 |
|
---|
3259 | /* dumpintlstr --- write out an initial .po file entry for the string */
|
---|
3260 |
|
---|
3261 | static void
|
---|
3262 | dumpintlstr(const char *str, size_t len)
|
---|
3263 | {
|
---|
3264 | char *cp;
|
---|
3265 |
|
---|
3266 | /* See the GNU gettext distribution for details on the file format */
|
---|
3267 |
|
---|
3268 | if (source != NULL) {
|
---|
3269 | /* ala the gettext sources, remove leading `./'s */
|
---|
3270 | for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
|
---|
3271 | continue;
|
---|
3272 | printf("#: %s:%d\n", cp, sourceline);
|
---|
3273 | }
|
---|
3274 |
|
---|
3275 | printf("msgid ");
|
---|
3276 | pp_string_fp(stdout, str, len, '"', TRUE);
|
---|
3277 | putchar('\n');
|
---|
3278 | printf("msgstr \"\"\n\n");
|
---|
3279 | fflush(stdout);
|
---|
3280 | }
|
---|
3281 |
|
---|
3282 | /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */
|
---|
3283 |
|
---|
3284 | static void
|
---|
3285 | dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2)
|
---|
3286 | {
|
---|
3287 | char *cp;
|
---|
3288 |
|
---|
3289 | /* See the GNU gettext distribution for details on the file format */
|
---|
3290 |
|
---|
3291 | if (source != NULL) {
|
---|
3292 | /* ala the gettext sources, remove leading `./'s */
|
---|
3293 | for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2)
|
---|
3294 | continue;
|
---|
3295 | printf("#: %s:%d\n", cp, sourceline);
|
---|
3296 | }
|
---|
3297 |
|
---|
3298 | printf("msgid ");
|
---|
3299 | pp_string_fp(stdout, str1, len1, '"', TRUE);
|
---|
3300 | putchar('\n');
|
---|
3301 | printf("msgid_plural ");
|
---|
3302 | pp_string_fp(stdout, str2, len2, '"', TRUE);
|
---|
3303 | putchar('\n');
|
---|
3304 | printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n");
|
---|
3305 | fflush(stdout);
|
---|
3306 | }
|
---|
3307 |
|
---|
3308 | /* count_args --- count the number of printf arguments */
|
---|
3309 |
|
---|
3310 | static void
|
---|
3311 | count_args(NODE *tree)
|
---|
3312 | {
|
---|
3313 | size_t count = 0;
|
---|
3314 | NODE *save_tree;
|
---|
3315 |
|
---|
3316 | assert(tree->type == Node_K_printf
|
---|
3317 | || (tree->type == Node_builtin && tree->builtin == do_sprintf));
|
---|
3318 | save_tree = tree;
|
---|
3319 |
|
---|
3320 | tree = tree->lnode; /* printf format string */
|
---|
3321 |
|
---|
3322 | for (count = 0; tree != NULL; tree = tree->rnode)
|
---|
3323 | count++;
|
---|
3324 |
|
---|
3325 | save_tree->printf_count = count;
|
---|
3326 | }
|
---|
3327 |
|
---|
3328 | /* isarray --- can this type be subscripted? */
|
---|
3329 |
|
---|
3330 | static int
|
---|
3331 | isarray(NODE *n)
|
---|
3332 | {
|
---|
3333 | switch (n->type) {
|
---|
3334 | case Node_var_new:
|
---|
3335 | case Node_var_array:
|
---|
3336 | return TRUE;
|
---|
3337 | case Node_param_list:
|
---|
3338 | return (n->flags & FUNC) == 0;
|
---|
3339 | case Node_array_ref:
|
---|
3340 | cant_happen();
|
---|
3341 | break;
|
---|
3342 | default:
|
---|
3343 | break; /* keeps gcc -Wall happy */
|
---|
3344 | }
|
---|
3345 |
|
---|
3346 | return FALSE;
|
---|
3347 | }
|
---|
3348 |
|
---|
3349 | /* See if name is a special token. */
|
---|
3350 |
|
---|
3351 | int
|
---|
3352 | check_special(const char *name)
|
---|
3353 | {
|
---|
3354 | int low, high, mid;
|
---|
3355 | int i;
|
---|
3356 |
|
---|
3357 | low = 0;
|
---|
3358 | high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1;
|
---|
3359 | while (low <= high) {
|
---|
3360 | mid = (low + high) / 2;
|
---|
3361 | i = *name - tokentab[mid].operator[0];
|
---|
3362 | if (i == 0)
|
---|
3363 | i = strcmp(name, tokentab[mid].operator);
|
---|
3364 |
|
---|
3365 | if (i < 0) /* token < mid */
|
---|
3366 | high = mid - 1;
|
---|
3367 | else if (i > 0) /* token > mid */
|
---|
3368 | low = mid + 1;
|
---|
3369 | else
|
---|
3370 | return mid;
|
---|
3371 | }
|
---|
3372 | return -1;
|
---|
3373 | }
|
---|