| 1 | /* | 
|---|
| 2 | * awkgram.y --- yacc/bison parser | 
|---|
| 3 | */ | 
|---|
| 4 |  | 
|---|
| 5 | /* | 
|---|
| 6 | * Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc. | 
|---|
| 7 | * | 
|---|
| 8 | * This file is part of GAWK, the GNU implementation of the | 
|---|
| 9 | * AWK Programming Language. | 
|---|
| 10 | * | 
|---|
| 11 | * GAWK is free software; you can redistribute it and/or modify | 
|---|
| 12 | * it under the terms of the GNU General Public License as published by | 
|---|
| 13 | * the Free Software Foundation; either version 2 of the License, or | 
|---|
| 14 | * (at your option) any later version. | 
|---|
| 15 | * | 
|---|
| 16 | * GAWK is distributed in the hope that it will be useful, | 
|---|
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 19 | * GNU General Public License for more details. | 
|---|
| 20 | * | 
|---|
| 21 | * You should have received a copy of the GNU General Public License | 
|---|
| 22 | * along with this program; if not, write to the Free Software | 
|---|
| 23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA | 
|---|
| 24 | */ | 
|---|
| 25 |  | 
|---|
| 26 | %{ | 
|---|
| 27 | #ifdef GAWKDEBUG | 
|---|
| 28 | #define YYDEBUG 12 | 
|---|
| 29 | #endif | 
|---|
| 30 |  | 
|---|
| 31 | #include "awk.h" | 
|---|
| 32 |  | 
|---|
| 33 | #define CAN_FREE        TRUE | 
|---|
| 34 | #define DONT_FREE       FALSE | 
|---|
| 35 |  | 
|---|
| 36 | #if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ | 
|---|
| 37 | static void yyerror(const char *m, ...) ATTRIBUTE_PRINTF_1; | 
|---|
| 38 | #else | 
|---|
| 39 | static void yyerror(); /* va_alist */ | 
|---|
| 40 | #endif | 
|---|
| 41 | static char *get_src_buf P((void)); | 
|---|
| 42 | static int yylex P((void)); | 
|---|
| 43 | static NODE *node_common P((NODETYPE op)); | 
|---|
| 44 | static NODE *snode P((NODE *subn, NODETYPE op, int sindex)); | 
|---|
| 45 | static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr)); | 
|---|
| 46 | static NODE *append_right P((NODE *list, NODE *new)); | 
|---|
| 47 | static inline NODE *append_pattern P((NODE **list, NODE *patt)); | 
|---|
| 48 | static void func_install P((NODE *params, NODE *def)); | 
|---|
| 49 | static void pop_var P((NODE *np, int freeit)); | 
|---|
| 50 | static void pop_params P((NODE *params)); | 
|---|
| 51 | static NODE *make_param P((char *name)); | 
|---|
| 52 | static NODE *mk_rexp P((NODE *exp)); | 
|---|
| 53 | static int dup_parms P((NODE *func)); | 
|---|
| 54 | static void param_sanity P((NODE *arglist)); | 
|---|
| 55 | static int parms_shadow P((const char *fname, NODE *func)); | 
|---|
| 56 | static int isnoeffect P((NODETYPE t)); | 
|---|
| 57 | static int isassignable P((NODE *n)); | 
|---|
| 58 | static void dumpintlstr P((const char *str, size_t len)); | 
|---|
| 59 | static void dumpintlstr2 P((const char *str1, size_t len1, const char *str2, size_t len2)); | 
|---|
| 60 | static void count_args P((NODE *n)); | 
|---|
| 61 | static int isarray P((NODE *n)); | 
|---|
| 62 |  | 
|---|
| 63 | enum defref { FUNC_DEFINE, FUNC_USE }; | 
|---|
| 64 | static void func_use P((const char *name, enum defref how)); | 
|---|
| 65 | static void check_funcs P((void)); | 
|---|
| 66 |  | 
|---|
| 67 | static int want_regexp;         /* lexical scanning kludge */ | 
|---|
| 68 | static int can_return;          /* parsing kludge */ | 
|---|
| 69 | static int begin_or_end_rule = FALSE;   /* parsing kludge */ | 
|---|
| 70 | static int parsing_end_rule = FALSE; /* for warnings */ | 
|---|
| 71 | static int in_print = FALSE;    /* lexical scanning kludge for print */ | 
|---|
| 72 | static int in_parens = 0;       /* lexical scanning kludge for print */ | 
|---|
| 73 | static char *lexptr;            /* pointer to next char during parsing */ | 
|---|
| 74 | static char *lexend; | 
|---|
| 75 | static char *lexptr_begin;      /* keep track of where we were for error msgs */ | 
|---|
| 76 | static char *lexeme;            /* beginning of lexeme for debugging */ | 
|---|
| 77 | static char *thisline = NULL; | 
|---|
| 78 | #define YYDEBUG_LEXER_TEXT (lexeme) | 
|---|
| 79 | static int param_counter; | 
|---|
| 80 | static char *tokstart = NULL; | 
|---|
| 81 | static char *tok = NULL; | 
|---|
| 82 | static char *tokend; | 
|---|
| 83 |  | 
|---|
| 84 | static long func_count;         /* total number of functions */ | 
|---|
| 85 |  | 
|---|
| 86 | #define HASHSIZE        1021    /* this constant only used here */ | 
|---|
| 87 | NODE *variables[HASHSIZE]; | 
|---|
| 88 | static int var_count;           /* total number of global variables */ | 
|---|
| 89 |  | 
|---|
| 90 | extern char *source; | 
|---|
| 91 | extern int sourceline; | 
|---|
| 92 | extern struct src *srcfiles; | 
|---|
| 93 | extern long numfiles; | 
|---|
| 94 | extern int errcount; | 
|---|
| 95 | extern NODE *begin_block; | 
|---|
| 96 | extern NODE *end_block; | 
|---|
| 97 |  | 
|---|
| 98 | /* | 
|---|
| 99 | * This string cannot occur as a real awk identifier. | 
|---|
| 100 | * Use it as a special token to make function parsing | 
|---|
| 101 | * uniform, but if it's seen, don't install the function. | 
|---|
| 102 | * e.g. | 
|---|
| 103 | *      function split(x) { return x } | 
|---|
| 104 | *      function x(a) { return a } | 
|---|
| 105 | * should only produce one error message, and not core dump. | 
|---|
| 106 | */ | 
|---|
| 107 | static char builtin_func[] = "@builtin"; | 
|---|
| 108 | %} | 
|---|
| 109 |  | 
|---|
| 110 | %union { | 
|---|
| 111 | long lval; | 
|---|
| 112 | AWKNUM fval; | 
|---|
| 113 | NODE *nodeval; | 
|---|
| 114 | NODETYPE nodetypeval; | 
|---|
| 115 | char *sval; | 
|---|
| 116 | NODE *(*ptrval) P((void)); | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | %type <nodeval> function_prologue pattern action variable param_list | 
|---|
| 120 | %type <nodeval> exp common_exp | 
|---|
| 121 | %type <nodeval> simp_exp non_post_simp_exp | 
|---|
| 122 | %type <nodeval> expression_list opt_expression_list print_expression_list | 
|---|
| 123 | %type <nodeval> statements statement if_statement switch_body case_statements case_statement case_value opt_param_list | 
|---|
| 124 | %type <nodeval> simple_stmt opt_simple_stmt | 
|---|
| 125 | %type <nodeval> opt_exp opt_variable regexp | 
|---|
| 126 | %type <nodeval> input_redir output_redir | 
|---|
| 127 | %type <nodetypeval> print | 
|---|
| 128 | %type <nodetypeval> assign_operator a_relop relop_or_less | 
|---|
| 129 | %type <sval> func_name | 
|---|
| 130 | %type <lval> lex_builtin | 
|---|
| 131 |  | 
|---|
| 132 | %token <sval> FUNC_CALL NAME REGEXP | 
|---|
| 133 | %token <lval> ERROR | 
|---|
| 134 | %token <nodeval> YNUMBER YSTRING | 
|---|
| 135 | %token <nodetypeval> RELOP IO_OUT IO_IN | 
|---|
| 136 | %token <nodetypeval> ASSIGNOP ASSIGN MATCHOP CONCAT_OP | 
|---|
| 137 | %token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE | 
|---|
| 138 | %token <nodetypeval> LEX_SWITCH LEX_CASE LEX_DEFAULT LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE | 
|---|
| 139 | %token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION | 
|---|
| 140 | %token <nodetypeval> LEX_GETLINE LEX_NEXTFILE | 
|---|
| 141 | %token <nodetypeval> LEX_IN | 
|---|
| 142 | %token <lval> LEX_AND LEX_OR INCREMENT DECREMENT | 
|---|
| 143 | %token <lval> LEX_BUILTIN LEX_LENGTH | 
|---|
| 144 | %token NEWLINE | 
|---|
| 145 |  | 
|---|
| 146 | /* these are just yylval numbers */ | 
|---|
| 147 |  | 
|---|
| 148 | /* Lowest to highest */ | 
|---|
| 149 | %right ASSIGNOP ASSIGN SLASH_BEFORE_EQUAL | 
|---|
| 150 | %right '?' ':' | 
|---|
| 151 | %left LEX_OR | 
|---|
| 152 | %left LEX_AND | 
|---|
| 153 | %left LEX_GETLINE | 
|---|
| 154 | %nonassoc LEX_IN | 
|---|
| 155 | %left FUNC_CALL LEX_BUILTIN LEX_LENGTH | 
|---|
| 156 | %nonassoc ',' | 
|---|
| 157 | %nonassoc MATCHOP | 
|---|
| 158 | %nonassoc RELOP '<' '>' IO_IN IO_OUT | 
|---|
| 159 | %left CONCAT_OP | 
|---|
| 160 | %left YSTRING YNUMBER | 
|---|
| 161 | %left '+' '-' | 
|---|
| 162 | %left '*' '/' '%' | 
|---|
| 163 | %right '!' UNARY | 
|---|
| 164 | %right '^' | 
|---|
| 165 | %left INCREMENT DECREMENT | 
|---|
| 166 | %left '$' | 
|---|
| 167 | %left '(' ')' | 
|---|
| 168 | %% | 
|---|
| 169 |  | 
|---|
| 170 | start | 
|---|
| 171 | : opt_nls program opt_nls | 
|---|
| 172 | { | 
|---|
| 173 | check_funcs(); | 
|---|
| 174 | } | 
|---|
| 175 | ; | 
|---|
| 176 |  | 
|---|
| 177 | program | 
|---|
| 178 | : /* empty */ | 
|---|
| 179 | | program rule | 
|---|
| 180 | { | 
|---|
| 181 | begin_or_end_rule = parsing_end_rule = FALSE; | 
|---|
| 182 | yyerrok; | 
|---|
| 183 | } | 
|---|
| 184 | | program error | 
|---|
| 185 | { | 
|---|
| 186 | begin_or_end_rule = parsing_end_rule = FALSE; | 
|---|
| 187 | /* | 
|---|
| 188 | * If errors, give up, don't produce an infinite | 
|---|
| 189 | * stream of syntax error messages. | 
|---|
| 190 | */ | 
|---|
| 191 | /* yyerrok; */ | 
|---|
| 192 | } | 
|---|
| 193 | ; | 
|---|
| 194 |  | 
|---|
| 195 | rule | 
|---|
| 196 | : pattern action | 
|---|
| 197 | { | 
|---|
| 198 | $1->rnode = $2; | 
|---|
| 199 | } | 
|---|
| 200 | | pattern statement_term | 
|---|
| 201 | { | 
|---|
| 202 | if ($1->lnode != NULL) { | 
|---|
| 203 | /* pattern rule with non-empty pattern */ | 
|---|
| 204 | $1->rnode = node(NULL, Node_K_print_rec, NULL); | 
|---|
| 205 | } else { | 
|---|
| 206 | /* an error */ | 
|---|
| 207 | if (begin_or_end_rule) | 
|---|
| 208 | msg(_("%s blocks must have an action part"), | 
|---|
| 209 | (parsing_end_rule ? "END" : "BEGIN")); | 
|---|
| 210 | else | 
|---|
| 211 | msg(_("each rule must have a pattern or an action part")); | 
|---|
| 212 | errcount++; | 
|---|
| 213 | } | 
|---|
| 214 | } | 
|---|
| 215 | | function_prologue action | 
|---|
| 216 | { | 
|---|
| 217 | can_return = FALSE; | 
|---|
| 218 | if ($1) | 
|---|
| 219 | func_install($1, $2); | 
|---|
| 220 | yyerrok; | 
|---|
| 221 | } | 
|---|
| 222 | ; | 
|---|
| 223 |  | 
|---|
| 224 | pattern | 
|---|
| 225 | : /* empty */ | 
|---|
| 226 | { | 
|---|
| 227 | $$ = append_pattern(&expression_value, (NODE *) NULL); | 
|---|
| 228 | } | 
|---|
| 229 | | exp | 
|---|
| 230 | { | 
|---|
| 231 | $$ = append_pattern(&expression_value, $1); | 
|---|
| 232 | } | 
|---|
| 233 | | exp ',' exp | 
|---|
| 234 | { | 
|---|
| 235 | NODE *r; | 
|---|
| 236 |  | 
|---|
| 237 | getnode(r); | 
|---|
| 238 | r->type = Node_line_range; | 
|---|
| 239 | r->condpair = node($1, Node_cond_pair, $3); | 
|---|
| 240 | r->triggered = FALSE; | 
|---|
| 241 | $$ = append_pattern(&expression_value, r); | 
|---|
| 242 | } | 
|---|
| 243 | | LEX_BEGIN | 
|---|
| 244 | { | 
|---|
| 245 | begin_or_end_rule = TRUE; | 
|---|
| 246 | $$ = append_pattern(&begin_block, (NODE *) NULL); | 
|---|
| 247 | } | 
|---|
| 248 | | LEX_END | 
|---|
| 249 | { | 
|---|
| 250 | begin_or_end_rule = parsing_end_rule = TRUE; | 
|---|
| 251 | $$ = append_pattern(&end_block, (NODE *) NULL); | 
|---|
| 252 | } | 
|---|
| 253 | ; | 
|---|
| 254 |  | 
|---|
| 255 | action | 
|---|
| 256 | : l_brace statements r_brace opt_semi opt_nls | 
|---|
| 257 | { $$ = $2; } | 
|---|
| 258 | ; | 
|---|
| 259 |  | 
|---|
| 260 | func_name | 
|---|
| 261 | : NAME | 
|---|
| 262 | { $$ = $1; } | 
|---|
| 263 | | FUNC_CALL | 
|---|
| 264 | { $$ = $1; } | 
|---|
| 265 | | lex_builtin | 
|---|
| 266 | { | 
|---|
| 267 | yyerror(_("`%s' is a built-in function, it cannot be redefined"), | 
|---|
| 268 | tokstart); | 
|---|
| 269 | errcount++; | 
|---|
| 270 | $$ = builtin_func; | 
|---|
| 271 | /* yyerrok; */ | 
|---|
| 272 | } | 
|---|
| 273 | ; | 
|---|
| 274 |  | 
|---|
| 275 | lex_builtin | 
|---|
| 276 | : LEX_BUILTIN | 
|---|
| 277 | | LEX_LENGTH | 
|---|
| 278 | ; | 
|---|
| 279 |  | 
|---|
| 280 | function_prologue | 
|---|
| 281 | : LEX_FUNCTION | 
|---|
| 282 | { | 
|---|
| 283 | param_counter = 0; | 
|---|
| 284 | } | 
|---|
| 285 | func_name '(' opt_param_list r_paren opt_nls | 
|---|
| 286 | { | 
|---|
| 287 | NODE *t; | 
|---|
| 288 |  | 
|---|
| 289 | t = make_param($3); | 
|---|
| 290 | t->flags |= FUNC; | 
|---|
| 291 | $$ = append_right(t, $5); | 
|---|
| 292 | can_return = TRUE; | 
|---|
| 293 | /* check for duplicate parameter names */ | 
|---|
| 294 | if (dup_parms($$)) | 
|---|
| 295 | errcount++; | 
|---|
| 296 | } | 
|---|
| 297 | ; | 
|---|
| 298 |  | 
|---|
| 299 | regexp | 
|---|
| 300 | /* | 
|---|
| 301 | * In this rule, want_regexp tells yylex that the next thing | 
|---|
| 302 | * is a regexp so it should read up to the closing slash. | 
|---|
| 303 | */ | 
|---|
| 304 | : a_slash | 
|---|
| 305 | { ++want_regexp; } | 
|---|
| 306 | REGEXP        /* The terminating '/' is consumed by yylex(). */ | 
|---|
| 307 | { | 
|---|
| 308 | NODE *n; | 
|---|
| 309 | size_t len = strlen($3); | 
|---|
| 310 |  | 
|---|
| 311 | if (do_lint) { | 
|---|
| 312 | if (len == 0) | 
|---|
| 313 | lintwarn(_("regexp constant `//' looks like a C++ comment, but is not")); | 
|---|
| 314 | else if (($3)[0] == '*' && ($3)[len-1] == '*') | 
|---|
| 315 | /* possible C comment */ | 
|---|
| 316 | lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart); | 
|---|
| 317 | } | 
|---|
| 318 | getnode(n); | 
|---|
| 319 | n->type = Node_regex; | 
|---|
| 320 | n->re_exp = make_string($3, len); | 
|---|
| 321 | n->re_reg = make_regexp($3, len, FALSE, TRUE); | 
|---|
| 322 | n->re_text = NULL; | 
|---|
| 323 | n->re_flags = CONST; | 
|---|
| 324 | n->re_cnt = 1; | 
|---|
| 325 | $$ = n; | 
|---|
| 326 | } | 
|---|
| 327 | ; | 
|---|
| 328 |  | 
|---|
| 329 | a_slash | 
|---|
| 330 | : '/' | 
|---|
| 331 | | SLASH_BEFORE_EQUAL | 
|---|
| 332 | ; | 
|---|
| 333 |  | 
|---|
| 334 | statements | 
|---|
| 335 | : /* empty */ | 
|---|
| 336 | { $$ = NULL; } | 
|---|
| 337 | | statements statement | 
|---|
| 338 | { | 
|---|
| 339 | if ($2 == NULL) | 
|---|
| 340 | $$ = $1; | 
|---|
| 341 | else { | 
|---|
| 342 | if (do_lint && isnoeffect($2->type)) | 
|---|
| 343 | lintwarn(_("statement may have no effect")); | 
|---|
| 344 | if ($1 == NULL) | 
|---|
| 345 | $$ = $2; | 
|---|
| 346 | else | 
|---|
| 347 | $$ = append_right( | 
|---|
| 348 | ($1->type == Node_statement_list ? $1 | 
|---|
| 349 | : node($1, Node_statement_list, (NODE *) NULL)), | 
|---|
| 350 | ($2->type == Node_statement_list ? $2 | 
|---|
| 351 | : node($2, Node_statement_list, (NODE *) NULL))); | 
|---|
| 352 | } | 
|---|
| 353 | yyerrok; | 
|---|
| 354 | } | 
|---|
| 355 | | statements error | 
|---|
| 356 | { $$ = NULL; } | 
|---|
| 357 | ; | 
|---|
| 358 |  | 
|---|
| 359 | statement_term | 
|---|
| 360 | : nls | 
|---|
| 361 | | semi opt_nls | 
|---|
| 362 | ; | 
|---|
| 363 |  | 
|---|
| 364 | statement | 
|---|
| 365 | : semi opt_nls | 
|---|
| 366 | { $$ = NULL; } | 
|---|
| 367 | | l_brace statements r_brace | 
|---|
| 368 | { $$ = $2; } | 
|---|
| 369 | | if_statement | 
|---|
| 370 | { $$ = $1; } | 
|---|
| 371 | | LEX_SWITCH '(' exp r_paren opt_nls l_brace switch_body opt_nls r_brace | 
|---|
| 372 | { $$ = node($3, Node_K_switch, $7); } | 
|---|
| 373 | | LEX_WHILE '(' exp r_paren opt_nls statement | 
|---|
| 374 | { $$ = node($3, Node_K_while, $6); } | 
|---|
| 375 | | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls | 
|---|
| 376 | { $$ = node($6, Node_K_do, $3); } | 
|---|
| 377 | | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement | 
|---|
| 378 | { | 
|---|
| 379 | /* | 
|---|
| 380 | * Efficiency hack.  Recognize the special case of | 
|---|
| 381 | * | 
|---|
| 382 | *      for (iggy in foo) | 
|---|
| 383 | *              delete foo[iggy] | 
|---|
| 384 | * | 
|---|
| 385 | * and treat it as if it were | 
|---|
| 386 | * | 
|---|
| 387 | *      delete foo | 
|---|
| 388 | * | 
|---|
| 389 | * Check that the body is a `delete a[i]' statement, | 
|---|
| 390 | * and that both the loop var and array names match. | 
|---|
| 391 | */ | 
|---|
| 392 | if ($8 != NULL && $8->type == Node_K_delete && $8->rnode != NULL) { | 
|---|
| 393 | NODE *arr, *sub; | 
|---|
| 394 |  | 
|---|
| 395 | assert($8->rnode->type == Node_expression_list); | 
|---|
| 396 | arr = $8->lnode;        /* array var */ | 
|---|
| 397 | sub = $8->rnode->lnode; /* index var */ | 
|---|
| 398 |  | 
|---|
| 399 | if (   (arr->type == Node_var_new | 
|---|
| 400 | || arr->type == Node_var_array | 
|---|
| 401 | || arr->type == Node_param_list) | 
|---|
| 402 | && (sub->type == Node_var_new | 
|---|
| 403 | || sub->type == Node_var | 
|---|
| 404 | || sub->type == Node_param_list) | 
|---|
| 405 | && strcmp($3, sub->vname) == 0 | 
|---|
| 406 | && strcmp($5, arr->vname) == 0) { | 
|---|
| 407 | $8->type = Node_K_delete_loop; | 
|---|
| 408 | $$ = $8; | 
|---|
| 409 | free($3);       /* thanks to valgrind for pointing these out */ | 
|---|
| 410 | free($5); | 
|---|
| 411 | } | 
|---|
| 412 | else | 
|---|
| 413 | goto regular_loop; | 
|---|
| 414 | } else { | 
|---|
| 415 | regular_loop: | 
|---|
| 416 | $$ = node($8, Node_K_arrayfor, | 
|---|
| 417 | make_for_loop(variable($3, CAN_FREE, Node_var), | 
|---|
| 418 | (NODE *) NULL, variable($5, CAN_FREE, Node_var_array))); | 
|---|
| 419 | } | 
|---|
| 420 | } | 
|---|
| 421 | | LEX_FOR '(' opt_simple_stmt semi opt_nls exp semi opt_nls opt_simple_stmt r_paren opt_nls statement | 
|---|
| 422 | { | 
|---|
| 423 | $$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9)); | 
|---|
| 424 | } | 
|---|
| 425 | | LEX_FOR '(' opt_simple_stmt semi opt_nls semi opt_nls opt_simple_stmt r_paren opt_nls statement | 
|---|
| 426 | { | 
|---|
| 427 | $$ = node($11, Node_K_for, | 
|---|
| 428 | (NODE *) make_for_loop($3, (NODE *) NULL, $8)); | 
|---|
| 429 | } | 
|---|
| 430 | | LEX_BREAK statement_term | 
|---|
| 431 | /* for break, maybe we'll have to remember where to break to */ | 
|---|
| 432 | { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); } | 
|---|
| 433 | | LEX_CONTINUE statement_term | 
|---|
| 434 | /* similarly */ | 
|---|
| 435 | { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); } | 
|---|
| 436 | | LEX_NEXT statement_term | 
|---|
| 437 | { NODETYPE type; | 
|---|
| 438 |  | 
|---|
| 439 | if (begin_or_end_rule) | 
|---|
| 440 | yyerror(_("`%s' used in %s action"), "next", | 
|---|
| 441 | (parsing_end_rule ? "END" : "BEGIN")); | 
|---|
| 442 | type = Node_K_next; | 
|---|
| 443 | $$ = node((NODE *) NULL, type, (NODE *) NULL); | 
|---|
| 444 | } | 
|---|
| 445 | | LEX_NEXTFILE statement_term | 
|---|
| 446 | { | 
|---|
| 447 | if (do_traditional) { | 
|---|
| 448 | /* | 
|---|
| 449 | * can't use yyerror, since may have overshot | 
|---|
| 450 | * the source line | 
|---|
| 451 | */ | 
|---|
| 452 | errcount++; | 
|---|
| 453 | error(_("`nextfile' is a gawk extension")); | 
|---|
| 454 | } | 
|---|
| 455 | if (do_lint) | 
|---|
| 456 | lintwarn(_("`nextfile' is a gawk extension")); | 
|---|
| 457 | if (begin_or_end_rule) { | 
|---|
| 458 | /* same thing */ | 
|---|
| 459 | errcount++; | 
|---|
| 460 | error(_("`%s' used in %s action"), "nextfile", | 
|---|
| 461 | (parsing_end_rule ? "END" : "BEGIN")); | 
|---|
| 462 | } | 
|---|
| 463 | $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL); | 
|---|
| 464 | } | 
|---|
| 465 | | LEX_EXIT opt_exp statement_term | 
|---|
| 466 | { $$ = node($2, Node_K_exit, (NODE *) NULL); } | 
|---|
| 467 | | LEX_RETURN | 
|---|
| 468 | { | 
|---|
| 469 | if (! can_return) | 
|---|
| 470 | yyerror(_("`return' used outside function context")); | 
|---|
| 471 | } | 
|---|
| 472 | opt_exp statement_term | 
|---|
| 473 | { | 
|---|
| 474 | $$ = node($3 == NULL ? Nnull_string : $3, | 
|---|
| 475 | Node_K_return, (NODE *) NULL); | 
|---|
| 476 | } | 
|---|
| 477 | | simple_stmt statement_term | 
|---|
| 478 | ; | 
|---|
| 479 |  | 
|---|
| 480 | /* | 
|---|
| 481 | * A simple_stmt exists to satisfy a constraint in the POSIX | 
|---|
| 482 | * grammar allowing them to occur as the 1st and 3rd parts | 
|---|
| 483 | * in a `for (...;...;...)' loop.  This is a historical oddity | 
|---|
| 484 | * inherited from Unix awk, not at all documented in the AK&W | 
|---|
| 485 | * awk book.  We support it, as this was reported as a bug. | 
|---|
| 486 | * We don't bother to document it though. So there. | 
|---|
| 487 | */ | 
|---|
| 488 | simple_stmt | 
|---|
| 489 | : print { in_print = TRUE; in_parens = 0; } print_expression_list output_redir | 
|---|
| 490 | { | 
|---|
| 491 | /* | 
|---|
| 492 | * Optimization: plain `print' has no expression list, so $3 is null. | 
|---|
| 493 | * If $3 is an expression list with one element (rnode == null) | 
|---|
| 494 | * and lnode is a field spec for field 0, we have `print $0'. | 
|---|
| 495 | * For both, use Node_K_print_rec, which is faster for these two cases. | 
|---|
| 496 | */ | 
|---|
| 497 | if ($1 == Node_K_print && | 
|---|
| 498 | ($3 == NULL | 
|---|
| 499 | || ($3->type == Node_expression_list | 
|---|
| 500 | && $3->rnode == NULL | 
|---|
| 501 | && $3->lnode->type == Node_field_spec | 
|---|
| 502 | && $3->lnode->lnode->type == Node_val | 
|---|
| 503 | && $3->lnode->lnode->numbr == 0.0)) | 
|---|
| 504 | ) { | 
|---|
| 505 | static int warned = FALSE; | 
|---|
| 506 |  | 
|---|
| 507 | $$ = node(NULL, Node_K_print_rec, $4); | 
|---|
| 508 |  | 
|---|
| 509 | if (do_lint && $3 == NULL && begin_or_end_rule && ! warned) { | 
|---|
| 510 | warned = TRUE; | 
|---|
| 511 | lintwarn( | 
|---|
| 512 | _("plain `print' in BEGIN or END rule should probably be `print \"\"'")); | 
|---|
| 513 | } | 
|---|
| 514 | } else { | 
|---|
| 515 | $$ = node($3, $1, $4); | 
|---|
| 516 | if ($$->type == Node_K_printf) | 
|---|
| 517 | count_args($$); | 
|---|
| 518 | } | 
|---|
| 519 | } | 
|---|
| 520 | | LEX_DELETE NAME '[' expression_list ']' | 
|---|
| 521 | { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); } | 
|---|
| 522 | | LEX_DELETE NAME | 
|---|
| 523 | { | 
|---|
| 524 | if (do_lint) | 
|---|
| 525 | lintwarn(_("`delete array' is a gawk extension")); | 
|---|
| 526 | if (do_traditional) { | 
|---|
| 527 | /* | 
|---|
| 528 | * can't use yyerror, since may have overshot | 
|---|
| 529 | * the source line | 
|---|
| 530 | */ | 
|---|
| 531 | errcount++; | 
|---|
| 532 | error(_("`delete array' is a gawk extension")); | 
|---|
| 533 | } | 
|---|
| 534 | $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); | 
|---|
| 535 | } | 
|---|
| 536 | | LEX_DELETE '(' NAME ')' | 
|---|
| 537 | { | 
|---|
| 538 | /* this is for tawk compatibility. maybe the warnings should always be done. */ | 
|---|
| 539 | if (do_lint) | 
|---|
| 540 | lintwarn(_("`delete(array)' is a non-portable tawk extension")); | 
|---|
| 541 | if (do_traditional) { | 
|---|
| 542 | /* | 
|---|
| 543 | * can't use yyerror, since may have overshot | 
|---|
| 544 | * the source line | 
|---|
| 545 | */ | 
|---|
| 546 | errcount++; | 
|---|
| 547 | error(_("`delete(array)' is a non-portable tawk extension")); | 
|---|
| 548 | } | 
|---|
| 549 | $$ = node(variable($3, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); | 
|---|
| 550 | } | 
|---|
| 551 | | exp | 
|---|
| 552 | { $$ = $1; } | 
|---|
| 553 | ; | 
|---|
| 554 |  | 
|---|
| 555 | opt_simple_stmt | 
|---|
| 556 | : /* empty */ | 
|---|
| 557 | { $$ = NULL; } | 
|---|
| 558 | | simple_stmt | 
|---|
| 559 | { $$ = $1; } | 
|---|
| 560 | ; | 
|---|
| 561 |  | 
|---|
| 562 | switch_body | 
|---|
| 563 | : case_statements | 
|---|
| 564 | { | 
|---|
| 565 | if ($1 == NULL) { | 
|---|
| 566 | $$ = NULL; | 
|---|
| 567 | } else { | 
|---|
| 568 | NODE *dflt = NULL; | 
|---|
| 569 | NODE *head = $1; | 
|---|
| 570 | NODE *curr; | 
|---|
| 571 |  | 
|---|
| 572 | const char **case_values = NULL; | 
|---|
| 573 |  | 
|---|
| 574 | int maxcount = 128; | 
|---|
| 575 | int case_count = 0; | 
|---|
| 576 | int i; | 
|---|
| 577 |  | 
|---|
| 578 | emalloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body"); | 
|---|
| 579 | for (curr = $1; curr != NULL; curr = curr->rnode) { | 
|---|
| 580 | /* Assure that case statement values are unique. */ | 
|---|
| 581 | if (curr->lnode->type == Node_K_case) { | 
|---|
| 582 | char *caseval; | 
|---|
| 583 |  | 
|---|
| 584 | if (curr->lnode->lnode->type == Node_regex) | 
|---|
| 585 | caseval = curr->lnode->lnode->re_exp->stptr; | 
|---|
| 586 | else | 
|---|
| 587 | caseval = force_string(tree_eval(curr->lnode->lnode))->stptr; | 
|---|
| 588 |  | 
|---|
| 589 | for (i = 0; i < case_count; i++) | 
|---|
| 590 | if (strcmp(caseval, case_values[i]) == 0) | 
|---|
| 591 | yyerror(_("duplicate case values in switch body: %s"), caseval); | 
|---|
| 592 |  | 
|---|
| 593 | if (case_count >= maxcount) { | 
|---|
| 594 | maxcount += 128; | 
|---|
| 595 | erealloc(case_values, const char **, sizeof(char*) * maxcount, "switch_body"); | 
|---|
| 596 | } | 
|---|
| 597 | case_values[case_count++] = caseval; | 
|---|
| 598 | } else { | 
|---|
| 599 | /* Otherwise save a pointer to the default node.  */ | 
|---|
| 600 | if (dflt != NULL) | 
|---|
| 601 | yyerror(_("Duplicate `default' detected in switch body")); | 
|---|
| 602 | dflt = curr; | 
|---|
| 603 | } | 
|---|
| 604 | } | 
|---|
| 605 |  | 
|---|
| 606 | free(case_values); | 
|---|
| 607 |  | 
|---|
| 608 | /* Create the switch body. */ | 
|---|
| 609 | $$ = node(head, Node_switch_body, dflt); | 
|---|
| 610 | } | 
|---|
| 611 | } | 
|---|
| 612 | ; | 
|---|
| 613 |  | 
|---|
| 614 | case_statements | 
|---|
| 615 | : /* empty */ | 
|---|
| 616 | { $$ = NULL; } | 
|---|
| 617 | | case_statements case_statement | 
|---|
| 618 | { | 
|---|
| 619 | if ($2 == NULL) | 
|---|
| 620 | $$ = $1; | 
|---|
| 621 | else { | 
|---|
| 622 | if (do_lint && isnoeffect($2->type)) | 
|---|
| 623 | lintwarn(_("statement may have no effect")); | 
|---|
| 624 | if ($1 == NULL) | 
|---|
| 625 | $$ = node($2, Node_case_list, (NODE *) NULL); | 
|---|
| 626 | else | 
|---|
| 627 | $$ = append_right( | 
|---|
| 628 | ($1->type == Node_case_list ? $1 : node($1, Node_case_list, (NODE *) NULL)), | 
|---|
| 629 | ($2->type == Node_case_list ? $2 : node($2, Node_case_list, (NODE *) NULL)) | 
|---|
| 630 | ); | 
|---|
| 631 | } | 
|---|
| 632 | yyerrok; | 
|---|
| 633 | } | 
|---|
| 634 | | case_statements error | 
|---|
| 635 | { $$ = NULL; } | 
|---|
| 636 | ; | 
|---|
| 637 |  | 
|---|
| 638 | case_statement | 
|---|
| 639 | : LEX_CASE case_value colon opt_nls statements | 
|---|
| 640 | { $$ = node($2, Node_K_case, $5); } | 
|---|
| 641 | | LEX_DEFAULT colon opt_nls statements | 
|---|
| 642 | { $$ = node((NODE *) NULL, Node_K_default, $4); } | 
|---|
| 643 | ; | 
|---|
| 644 |  | 
|---|
| 645 | case_value | 
|---|
| 646 | : YNUMBER | 
|---|
| 647 | { $$ = $1; } | 
|---|
| 648 | | '-' YNUMBER    %prec UNARY | 
|---|
| 649 | { | 
|---|
| 650 | $2->numbr = -(force_number($2)); | 
|---|
| 651 | $$ = $2; | 
|---|
| 652 | } | 
|---|
| 653 | | '+' YNUMBER    %prec UNARY | 
|---|
| 654 | { $$ = $2; } | 
|---|
| 655 | | YSTRING | 
|---|
| 656 | { $$ = $1; } | 
|---|
| 657 | | regexp | 
|---|
| 658 | { $$ = $1; } | 
|---|
| 659 | ; | 
|---|
| 660 |  | 
|---|
| 661 | print | 
|---|
| 662 | : LEX_PRINT | 
|---|
| 663 | | LEX_PRINTF | 
|---|
| 664 | ; | 
|---|
| 665 |  | 
|---|
| 666 | /* | 
|---|
| 667 | * Note: ``print(x)'' is already parsed by the first rule, | 
|---|
| 668 | * so there is no good in covering it by the second one too. | 
|---|
| 669 | */ | 
|---|
| 670 | print_expression_list | 
|---|
| 671 | : opt_expression_list | 
|---|
| 672 | | '(' exp comma expression_list r_paren | 
|---|
| 673 | { $$ = node($2, Node_expression_list, $4); } | 
|---|
| 674 | ; | 
|---|
| 675 |  | 
|---|
| 676 | output_redir | 
|---|
| 677 | : /* empty */ | 
|---|
| 678 | { | 
|---|
| 679 | in_print = FALSE; | 
|---|
| 680 | in_parens = 0; | 
|---|
| 681 | $$ = NULL; | 
|---|
| 682 | } | 
|---|
| 683 | | IO_OUT { in_print = FALSE; in_parens = 0; } common_exp | 
|---|
| 684 | { | 
|---|
| 685 | $$ = node($3, $1, (NODE *) NULL); | 
|---|
| 686 | if ($1 == Node_redirect_twoway | 
|---|
| 687 | && $3->type == Node_K_getline | 
|---|
| 688 | && $3->rnode != NULL | 
|---|
| 689 | && $3->rnode->type == Node_redirect_twoway) | 
|---|
| 690 | yyerror(_("multistage two-way pipelines don't work")); | 
|---|
| 691 | } | 
|---|
| 692 | ; | 
|---|
| 693 |  | 
|---|
| 694 | if_statement | 
|---|
| 695 | : LEX_IF '(' exp r_paren opt_nls statement | 
|---|
| 696 | { | 
|---|
| 697 | $$ = node($3, Node_K_if, | 
|---|
| 698 | node($6, Node_if_branches, (NODE *) NULL)); | 
|---|
| 699 | } | 
|---|
| 700 | | LEX_IF '(' exp r_paren opt_nls statement | 
|---|
| 701 | LEX_ELSE opt_nls statement | 
|---|
| 702 | { $$ = node($3, Node_K_if, | 
|---|
| 703 | node($6, Node_if_branches, $9)); } | 
|---|
| 704 | ; | 
|---|
| 705 |  | 
|---|
| 706 | nls | 
|---|
| 707 | : NEWLINE | 
|---|
| 708 | | nls NEWLINE | 
|---|
| 709 | ; | 
|---|
| 710 |  | 
|---|
| 711 | opt_nls | 
|---|
| 712 | : /* empty */ | 
|---|
| 713 | | nls | 
|---|
| 714 | ; | 
|---|
| 715 |  | 
|---|
| 716 | input_redir | 
|---|
| 717 | : /* empty */ | 
|---|
| 718 | { $$ = NULL; } | 
|---|
| 719 | | '<' simp_exp | 
|---|
| 720 | { $$ = node($2, Node_redirect_input, (NODE *) NULL); } | 
|---|
| 721 | ; | 
|---|
| 722 |  | 
|---|
| 723 | opt_param_list | 
|---|
| 724 | : /* empty */ | 
|---|
| 725 | { $$ = NULL; } | 
|---|
| 726 | | param_list | 
|---|
| 727 | { $$ = $1; } | 
|---|
| 728 | ; | 
|---|
| 729 |  | 
|---|
| 730 | param_list | 
|---|
| 731 | : NAME | 
|---|
| 732 | { $$ = make_param($1); } | 
|---|
| 733 | | param_list comma NAME | 
|---|
| 734 | { $$ = append_right($1, make_param($3)); yyerrok; } | 
|---|
| 735 | | error | 
|---|
| 736 | { $$ = NULL; } | 
|---|
| 737 | | param_list error | 
|---|
| 738 | { $$ = NULL; } | 
|---|
| 739 | | param_list comma error | 
|---|
| 740 | { $$ = NULL; } | 
|---|
| 741 | ; | 
|---|
| 742 |  | 
|---|
| 743 | /* optional expression, as in for loop */ | 
|---|
| 744 | opt_exp | 
|---|
| 745 | : /* empty */ | 
|---|
| 746 | { $$ = NULL; } | 
|---|
| 747 | | exp | 
|---|
| 748 | { $$ = $1; } | 
|---|
| 749 | ; | 
|---|
| 750 |  | 
|---|
| 751 | opt_expression_list | 
|---|
| 752 | : /* empty */ | 
|---|
| 753 | { $$ = NULL; } | 
|---|
| 754 | | expression_list | 
|---|
| 755 | { $$ = $1; } | 
|---|
| 756 | ; | 
|---|
| 757 |  | 
|---|
| 758 | expression_list | 
|---|
| 759 | : exp | 
|---|
| 760 | { $$ = node($1, Node_expression_list, (NODE *) NULL); } | 
|---|
| 761 | | expression_list comma exp | 
|---|
| 762 | { | 
|---|
| 763 | $$ = append_right($1, | 
|---|
| 764 | node($3, Node_expression_list, (NODE *) NULL)); | 
|---|
| 765 | yyerrok; | 
|---|
| 766 | } | 
|---|
| 767 | | error | 
|---|
| 768 | { $$ = NULL; } | 
|---|
| 769 | | expression_list error | 
|---|
| 770 | { $$ = NULL; } | 
|---|
| 771 | | expression_list error exp | 
|---|
| 772 | { $$ = NULL; } | 
|---|
| 773 | | expression_list comma error | 
|---|
| 774 | { $$ = NULL; } | 
|---|
| 775 | ; | 
|---|
| 776 |  | 
|---|
| 777 | /* Expressions, not including the comma operator.  */ | 
|---|
| 778 | exp     : variable assign_operator exp %prec ASSIGNOP | 
|---|
| 779 | { | 
|---|
| 780 | if (do_lint && $3->type == Node_regex) | 
|---|
| 781 | lintwarn(_("regular expression on right of assignment")); | 
|---|
| 782 | /* | 
|---|
| 783 | * Optimization of `x = x y'.  Can save lots of time | 
|---|
| 784 | * if done a lot. | 
|---|
| 785 | */ | 
|---|
| 786 | if ((    $1->type == Node_var | 
|---|
| 787 | || $1->type == Node_var_new | 
|---|
| 788 | || $1->type == Node_param_list) | 
|---|
| 789 | && $2 == Node_assign | 
|---|
| 790 | && $3->type == Node_concat | 
|---|
| 791 | && $3->lnode == $1) { | 
|---|
| 792 | $3->type = Node_assign_concat;  /* Just change the type */ | 
|---|
| 793 | $$ = $3;                        /* And use it directly */ | 
|---|
| 794 | } else | 
|---|
| 795 | $$ = node($1, $2, $3); | 
|---|
| 796 | } | 
|---|
| 797 | | exp LEX_AND exp | 
|---|
| 798 | { $$ = node($1, Node_and, $3); } | 
|---|
| 799 | | exp LEX_OR exp | 
|---|
| 800 | { $$ = node($1, Node_or, $3); } | 
|---|
| 801 | | exp MATCHOP exp | 
|---|
| 802 | { | 
|---|
| 803 | if ($1->type == Node_regex) | 
|---|
| 804 | warning(_("regular expression on left of `~' or `!~' operator")); | 
|---|
| 805 | $$ = node($1, $2, mk_rexp($3)); | 
|---|
| 806 | } | 
|---|
| 807 | | exp LEX_IN NAME | 
|---|
| 808 | { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); } | 
|---|
| 809 | | exp a_relop exp %prec RELOP | 
|---|
| 810 | { | 
|---|
| 811 | if (do_lint && $3->type == Node_regex) | 
|---|
| 812 | lintwarn(_("regular expression on right of comparison")); | 
|---|
| 813 | $$ = node($1, $2, $3); | 
|---|
| 814 | } | 
|---|
| 815 | | exp '?' exp ':' exp | 
|---|
| 816 | { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} | 
|---|
| 817 | | common_exp | 
|---|
| 818 | { $$ = $1; } | 
|---|
| 819 | ; | 
|---|
| 820 |  | 
|---|
| 821 | assign_operator | 
|---|
| 822 | : ASSIGN | 
|---|
| 823 | { $$ = $1; } | 
|---|
| 824 | | ASSIGNOP | 
|---|
| 825 | { $$ = $1; } | 
|---|
| 826 | | SLASH_BEFORE_EQUAL ASSIGN   /* `/=' */ | 
|---|
| 827 | { $$ = Node_assign_quotient; } | 
|---|
| 828 | ; | 
|---|
| 829 |  | 
|---|
| 830 | relop_or_less | 
|---|
| 831 | : RELOP | 
|---|
| 832 | { $$ = $1; } | 
|---|
| 833 | | '<' | 
|---|
| 834 | { $$ = Node_less; } | 
|---|
| 835 | ; | 
|---|
| 836 | a_relop | 
|---|
| 837 | : relop_or_less | 
|---|
| 838 | | '>' | 
|---|
| 839 | { $$ = Node_greater; } | 
|---|
| 840 | ; | 
|---|
| 841 |  | 
|---|
| 842 | common_exp | 
|---|
| 843 | : regexp | 
|---|
| 844 | { $$ = $1; } | 
|---|
| 845 | | '!' regexp %prec UNARY | 
|---|
| 846 | { | 
|---|
| 847 | $$ = node(node(make_number(0.0), | 
|---|
| 848 | Node_field_spec, | 
|---|
| 849 | (NODE *) NULL), | 
|---|
| 850 | Node_nomatch, | 
|---|
| 851 | $2); | 
|---|
| 852 | } | 
|---|
| 853 | | '(' expression_list r_paren LEX_IN NAME | 
|---|
| 854 | { $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); } | 
|---|
| 855 | | simp_exp | 
|---|
| 856 | { $$ = $1; } | 
|---|
| 857 | | common_exp simp_exp %prec CONCAT_OP | 
|---|
| 858 | { $$ = node($1, Node_concat, $2); } | 
|---|
| 859 | ; | 
|---|
| 860 |  | 
|---|
| 861 | simp_exp | 
|---|
| 862 | : non_post_simp_exp | 
|---|
| 863 | /* Binary operators in order of decreasing precedence.  */ | 
|---|
| 864 | | simp_exp '^' simp_exp | 
|---|
| 865 | { $$ = node($1, Node_exp, $3); } | 
|---|
| 866 | | simp_exp '*' simp_exp | 
|---|
| 867 | { $$ = node($1, Node_times, $3); } | 
|---|
| 868 | | simp_exp '/' simp_exp | 
|---|
| 869 | { $$ = node($1, Node_quotient, $3); } | 
|---|
| 870 | | simp_exp '%' simp_exp | 
|---|
| 871 | { $$ = node($1, Node_mod, $3); } | 
|---|
| 872 | | simp_exp '+' simp_exp | 
|---|
| 873 | { $$ = node($1, Node_plus, $3); } | 
|---|
| 874 | | simp_exp '-' simp_exp | 
|---|
| 875 | { $$ = node($1, Node_minus, $3); } | 
|---|
| 876 | | LEX_GETLINE opt_variable input_redir | 
|---|
| 877 | { | 
|---|
| 878 | if (do_lint && parsing_end_rule && $3 == NULL) | 
|---|
| 879 | lintwarn(_("non-redirected `getline' undefined inside END action")); | 
|---|
| 880 | $$ = node($2, Node_K_getline, $3); | 
|---|
| 881 | } | 
|---|
| 882 | | simp_exp IO_IN LEX_GETLINE opt_variable | 
|---|
| 883 | { | 
|---|
| 884 | $$ = node($4, Node_K_getline, | 
|---|
| 885 | node($1, $2, (NODE *) NULL)); | 
|---|
| 886 | } | 
|---|
| 887 | | variable INCREMENT | 
|---|
| 888 | { $$ = node($1, Node_postincrement, (NODE *) NULL); } | 
|---|
| 889 | | variable DECREMENT | 
|---|
| 890 | { $$ = node($1, Node_postdecrement, (NODE *) NULL); } | 
|---|
| 891 | ; | 
|---|
| 892 |  | 
|---|
| 893 | non_post_simp_exp | 
|---|
| 894 | : '!' simp_exp %prec UNARY | 
|---|
| 895 | { $$ = node($2, Node_not, (NODE *) NULL); } | 
|---|
| 896 | | '(' exp r_paren | 
|---|
| 897 | { $$ = $2; } | 
|---|
| 898 | | LEX_BUILTIN | 
|---|
| 899 | '(' opt_expression_list r_paren | 
|---|
| 900 | { $$ = snode($3, Node_builtin, (int) $1); } | 
|---|
| 901 | | LEX_LENGTH '(' opt_expression_list r_paren | 
|---|
| 902 | { $$ = snode($3, Node_builtin, (int) $1); } | 
|---|
| 903 | | LEX_LENGTH | 
|---|
| 904 | { | 
|---|
| 905 | if (do_lint) | 
|---|
| 906 | lintwarn(_("call of `length' without parentheses is not portable")); | 
|---|
| 907 | $$ = snode((NODE *) NULL, Node_builtin, (int) $1); | 
|---|
| 908 | if (do_posix) | 
|---|
| 909 | warning(_("call of `length' without parentheses is deprecated by POSIX")); | 
|---|
| 910 | } | 
|---|
| 911 | | FUNC_CALL '(' opt_expression_list r_paren | 
|---|
| 912 | { | 
|---|
| 913 | $$ = node($3, Node_func_call, make_string($1, strlen($1))); | 
|---|
| 914 | $$->funcbody = NULL; | 
|---|
| 915 | func_use($1, FUNC_USE); | 
|---|
| 916 | param_sanity($3); | 
|---|
| 917 | free($1); | 
|---|
| 918 | } | 
|---|
| 919 | | variable | 
|---|
| 920 | | INCREMENT variable | 
|---|
| 921 | { $$ = node($2, Node_preincrement, (NODE *) NULL); } | 
|---|
| 922 | | DECREMENT variable | 
|---|
| 923 | { $$ = node($2, Node_predecrement, (NODE *) NULL); } | 
|---|
| 924 | | YNUMBER | 
|---|
| 925 | { $$ = $1; } | 
|---|
| 926 | | YSTRING | 
|---|
| 927 | { $$ = $1; } | 
|---|
| 928 |  | 
|---|
| 929 | | '-' simp_exp    %prec UNARY | 
|---|
| 930 | { | 
|---|
| 931 | if ($2->type == Node_val && ($2->flags & (STRCUR|STRING)) == 0) { | 
|---|
| 932 | $2->numbr = -(force_number($2)); | 
|---|
| 933 | $$ = $2; | 
|---|
| 934 | } else | 
|---|
| 935 | $$ = node($2, Node_unary_minus, (NODE *) NULL); | 
|---|
| 936 | } | 
|---|
| 937 | | '+' simp_exp    %prec UNARY | 
|---|
| 938 | { | 
|---|
| 939 | /* | 
|---|
| 940 | * was: $$ = $2 | 
|---|
| 941 | * POSIX semantics: force a conversion to numeric type | 
|---|
| 942 | */ | 
|---|
| 943 | $$ = node (make_number(0.0), Node_plus, $2); | 
|---|
| 944 | } | 
|---|
| 945 | ; | 
|---|
| 946 |  | 
|---|
| 947 | opt_variable | 
|---|
| 948 | : /* empty */ | 
|---|
| 949 | { $$ = NULL; } | 
|---|
| 950 | | variable | 
|---|
| 951 | { $$ = $1; } | 
|---|
| 952 | ; | 
|---|
| 953 |  | 
|---|
| 954 | variable | 
|---|
| 955 | : NAME | 
|---|
| 956 | { $$ = variable($1, CAN_FREE, Node_var_new); } | 
|---|
| 957 | | NAME '[' expression_list ']' | 
|---|
| 958 | { | 
|---|
| 959 | NODE *n; | 
|---|
| 960 |  | 
|---|
| 961 | if ((n = lookup($1)) != NULL && ! isarray(n)) | 
|---|
| 962 | yyerror(_("use of non-array as array")); | 
|---|
| 963 | else if ($3 == NULL) { | 
|---|
| 964 | fatal(_("invalid subscript expression")); | 
|---|
| 965 | } else if ($3->rnode == NULL) { | 
|---|
| 966 | $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode); | 
|---|
| 967 | freenode($3); | 
|---|
| 968 | } else | 
|---|
| 969 | $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3); | 
|---|
| 970 | } | 
|---|
| 971 | | '$' non_post_simp_exp | 
|---|
| 972 | { $$ = node($2, Node_field_spec, (NODE *) NULL); } | 
|---|
| 973 | /* | 
|---|
| 974 | #if 0 | 
|---|
| 975 | | lex_builtin | 
|---|
| 976 | { fatal(_("can't use built-in function `%s' as a variable"), tokstart); } | 
|---|
| 977 | #endif | 
|---|
| 978 | */ | 
|---|
| 979 | ; | 
|---|
| 980 |  | 
|---|
| 981 | l_brace | 
|---|
| 982 | : '{' opt_nls | 
|---|
| 983 | ; | 
|---|
| 984 |  | 
|---|
| 985 | r_brace | 
|---|
| 986 | : '}' opt_nls   { yyerrok; } | 
|---|
| 987 | ; | 
|---|
| 988 |  | 
|---|
| 989 | r_paren | 
|---|
| 990 | : ')' { yyerrok; } | 
|---|
| 991 | ; | 
|---|
| 992 |  | 
|---|
| 993 | opt_semi | 
|---|
| 994 | : /* empty */ | 
|---|
| 995 | | semi | 
|---|
| 996 | ; | 
|---|
| 997 |  | 
|---|
| 998 | semi | 
|---|
| 999 | : ';'   { yyerrok; } | 
|---|
| 1000 | ; | 
|---|
| 1001 |  | 
|---|
| 1002 | colon | 
|---|
| 1003 | : ':'   { yyerrok; } | 
|---|
| 1004 | ; | 
|---|
| 1005 |  | 
|---|
| 1006 | comma   : ',' opt_nls   { yyerrok; } | 
|---|
| 1007 | ; | 
|---|
| 1008 |  | 
|---|
| 1009 | %% | 
|---|
| 1010 |  | 
|---|
| 1011 | struct token { | 
|---|
| 1012 | const char *operator;           /* text to match */ | 
|---|
| 1013 | NODETYPE value;         /* node type */ | 
|---|
| 1014 | int class;              /* lexical class */ | 
|---|
| 1015 | unsigned flags;         /* # of args. allowed and compatability */ | 
|---|
| 1016 | #       define  ARGS    0xFF    /* 0, 1, 2, 3 args allowed (any combination */ | 
|---|
| 1017 | #       define  A(n)    (1<<(n)) | 
|---|
| 1018 | #       define  VERSION_MASK    0xFF00  /* old awk is zero */ | 
|---|
| 1019 | #       define  NOT_OLD         0x0100  /* feature not in old awk */ | 
|---|
| 1020 | #       define  NOT_POSIX       0x0200  /* feature not in POSIX */ | 
|---|
| 1021 | #       define  GAWKX           0x0400  /* gawk extension */ | 
|---|
| 1022 | #       define  RESX            0x0800  /* Bell Labs Research extension */ | 
|---|
| 1023 | NODE *(*ptr) P((NODE *));       /* function that implements this keyword */ | 
|---|
| 1024 | }; | 
|---|
| 1025 |  | 
|---|
| 1026 | /* Tokentab is sorted ascii ascending order, so it can be binary searched. */ | 
|---|
| 1027 | /* Function pointers come from declarations in awk.h. */ | 
|---|
| 1028 |  | 
|---|
| 1029 | static const struct token tokentab[] = { | 
|---|
| 1030 | {"BEGIN",       Node_illegal,    LEX_BEGIN,     0,              0}, | 
|---|
| 1031 | {"END",         Node_illegal,    LEX_END,       0,              0}, | 
|---|
| 1032 | #ifdef ARRAYDEBUG | 
|---|
| 1033 | {"adump",       Node_builtin,    LEX_BUILTIN,   GAWKX|A(1),     do_adump}, | 
|---|
| 1034 | #endif | 
|---|
| 1035 | {"and",         Node_builtin,    LEX_BUILTIN,   GAWKX|A(2),     do_and}, | 
|---|
| 1036 | {"asort",       Node_builtin,    LEX_BUILTIN,   GAWKX|A(1)|A(2),        do_asort}, | 
|---|
| 1037 | {"asorti",      Node_builtin,    LEX_BUILTIN,   GAWKX|A(1)|A(2),        do_asorti}, | 
|---|
| 1038 | {"atan2",       Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(2),   do_atan2}, | 
|---|
| 1039 | {"bindtextdomain",      Node_builtin,    LEX_BUILTIN,   GAWKX|A(1)|A(2),        do_bindtextdomain}, | 
|---|
| 1040 | {"break",       Node_K_break,    LEX_BREAK,     0,              0}, | 
|---|
| 1041 | #ifdef ALLOW_SWITCH | 
|---|
| 1042 | {"case",        Node_K_case,     LEX_CASE,      GAWKX,          0}, | 
|---|
| 1043 | #endif | 
|---|
| 1044 | {"close",       Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(1)|A(2),      do_close}, | 
|---|
| 1045 | {"compl",       Node_builtin,    LEX_BUILTIN,   GAWKX|A(1),     do_compl}, | 
|---|
| 1046 | {"continue",    Node_K_continue, LEX_CONTINUE,  0,              0}, | 
|---|
| 1047 | {"cos",         Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(1),   do_cos}, | 
|---|
| 1048 | {"dcgettext",   Node_builtin,    LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3),   do_dcgettext}, | 
|---|
| 1049 | {"dcngettext",  Node_builtin,    LEX_BUILTIN,   GAWKX|A(1)|A(2)|A(3)|A(4)|A(5), do_dcngettext}, | 
|---|
| 1050 | #ifdef ALLOW_SWITCH | 
|---|
| 1051 | {"default",     Node_K_default,  LEX_DEFAULT,   GAWKX,          0}, | 
|---|
| 1052 | #endif | 
|---|
| 1053 | {"delete",      Node_K_delete,   LEX_DELETE,    NOT_OLD,        0}, | 
|---|
| 1054 | {"do",          Node_K_do,       LEX_DO,        NOT_OLD,        0}, | 
|---|
| 1055 | {"else",        Node_illegal,    LEX_ELSE,      0,              0}, | 
|---|
| 1056 | {"exit",        Node_K_exit,     LEX_EXIT,      0,              0}, | 
|---|
| 1057 | {"exp",         Node_builtin,    LEX_BUILTIN,   A(1),           do_exp}, | 
|---|
| 1058 | {"extension",   Node_builtin,    LEX_BUILTIN,   GAWKX|A(2),     do_ext}, | 
|---|
| 1059 | {"fflush",      Node_builtin,    LEX_BUILTIN,   RESX|A(0)|A(1), do_fflush}, | 
|---|
| 1060 | {"for",         Node_K_for,      LEX_FOR,       0,              0}, | 
|---|
| 1061 | {"func",        Node_K_function, LEX_FUNCTION,  NOT_POSIX|NOT_OLD,      0}, | 
|---|
| 1062 | {"function",    Node_K_function, LEX_FUNCTION,  NOT_OLD,        0}, | 
|---|
| 1063 | {"gensub",      Node_builtin,    LEX_BUILTIN,   GAWKX|A(3)|A(4), do_gensub}, | 
|---|
| 1064 | {"getline",     Node_K_getline,  LEX_GETLINE,   NOT_OLD,        0}, | 
|---|
| 1065 | {"gsub",        Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(2)|A(3), do_gsub}, | 
|---|
| 1066 | {"if",          Node_K_if,       LEX_IF,        0,              0}, | 
|---|
| 1067 | {"in",          Node_illegal,    LEX_IN,        0,              0}, | 
|---|
| 1068 | {"index",       Node_builtin,    LEX_BUILTIN,   A(2),           do_index}, | 
|---|
| 1069 | {"int",         Node_builtin,    LEX_BUILTIN,   A(1),           do_int}, | 
|---|
| 1070 | {"length",      Node_builtin,    LEX_LENGTH,    A(0)|A(1),      do_length}, | 
|---|
| 1071 | {"log",         Node_builtin,    LEX_BUILTIN,   A(1),           do_log}, | 
|---|
| 1072 | {"lshift",      Node_builtin,    LEX_BUILTIN,   GAWKX|A(2),     do_lshift}, | 
|---|
| 1073 | {"match",       Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(2)|A(3), do_match}, | 
|---|
| 1074 | {"mktime",      Node_builtin,    LEX_BUILTIN,   GAWKX|A(1),     do_mktime}, | 
|---|
| 1075 | {"next",        Node_K_next,     LEX_NEXT,      0,              0}, | 
|---|
| 1076 | {"nextfile",    Node_K_nextfile, LEX_NEXTFILE,  GAWKX,          0}, | 
|---|
| 1077 | {"or",          Node_builtin,    LEX_BUILTIN,   GAWKX|A(2),     do_or}, | 
|---|
| 1078 | {"print",       Node_K_print,    LEX_PRINT,     0,              0}, | 
|---|
| 1079 | {"printf",      Node_K_printf,   LEX_PRINTF,    0,              0}, | 
|---|
| 1080 | {"rand",        Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(0),   do_rand}, | 
|---|
| 1081 | {"return",      Node_K_return,   LEX_RETURN,    NOT_OLD,        0}, | 
|---|
| 1082 | {"rshift",      Node_builtin,    LEX_BUILTIN,   GAWKX|A(2),     do_rshift}, | 
|---|
| 1083 | {"sin",         Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(1),   do_sin}, | 
|---|
| 1084 | {"split",       Node_builtin,    LEX_BUILTIN,   A(2)|A(3),      do_split}, | 
|---|
| 1085 | {"sprintf",     Node_builtin,    LEX_BUILTIN,   0,              do_sprintf}, | 
|---|
| 1086 | {"sqrt",        Node_builtin,    LEX_BUILTIN,   A(1),           do_sqrt}, | 
|---|
| 1087 | {"srand",       Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(0)|A(1), do_srand}, | 
|---|
| 1088 | #if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */ | 
|---|
| 1089 | {"stopme",      Node_builtin,    LEX_BUILTIN,   GAWKX|A(0),     stopme}, | 
|---|
| 1090 | #endif | 
|---|
| 1091 | {"strftime",    Node_builtin,    LEX_BUILTIN,   GAWKX|A(0)|A(1)|A(2), do_strftime}, | 
|---|
| 1092 | {"strtonum",    Node_builtin,    LEX_BUILTIN,   GAWKX|A(1),     do_strtonum}, | 
|---|
| 1093 | {"sub",         Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(2)|A(3), do_sub}, | 
|---|
| 1094 | {"substr",      Node_builtin,    LEX_BUILTIN,   A(2)|A(3),      do_substr}, | 
|---|
| 1095 | #ifdef ALLOW_SWITCH | 
|---|
| 1096 | {"switch",      Node_K_switch,   LEX_SWITCH,    GAWKX,          0}, | 
|---|
| 1097 | #endif | 
|---|
| 1098 | {"system",      Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(1),   do_system}, | 
|---|
| 1099 | {"systime",     Node_builtin,    LEX_BUILTIN,   GAWKX|A(0),     do_systime}, | 
|---|
| 1100 | {"tolower",     Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(1),   do_tolower}, | 
|---|
| 1101 | {"toupper",     Node_builtin,    LEX_BUILTIN,   NOT_OLD|A(1),   do_toupper}, | 
|---|
| 1102 | {"while",       Node_K_while,    LEX_WHILE,     0,              0}, | 
|---|
| 1103 | {"xor",         Node_builtin,    LEX_BUILTIN,   GAWKX|A(2),     do_xor}, | 
|---|
| 1104 | }; | 
|---|
| 1105 |  | 
|---|
| 1106 | #ifdef MBS_SUPPORT | 
|---|
| 1107 | /* Variable containing the current shift state.  */ | 
|---|
| 1108 | static mbstate_t cur_mbstate; | 
|---|
| 1109 | /* Ring buffer containing current characters.  */ | 
|---|
| 1110 | #define MAX_CHAR_IN_RING_BUFFER 8 | 
|---|
| 1111 | #define RING_BUFFER_SIZE (MAX_CHAR_IN_RING_BUFFER * MB_LEN_MAX) | 
|---|
| 1112 | static char cur_char_ring[RING_BUFFER_SIZE]; | 
|---|
| 1113 | /* Index for ring buffers.  */ | 
|---|
| 1114 | static int cur_ring_idx; | 
|---|
| 1115 | /* This macro means that last nextc() return a singlebyte character | 
|---|
| 1116 | or 1st byte of a multibyte character.  */ | 
|---|
| 1117 | #define nextc_is_1stbyte (cur_char_ring[cur_ring_idx] == 1) | 
|---|
| 1118 | #else /* MBS_SUPPORT */ | 
|---|
| 1119 | /* a dummy */ | 
|---|
| 1120 | #define nextc_is_1stbyte 1 | 
|---|
| 1121 | #endif /* MBS_SUPPORT */ | 
|---|
| 1122 |  | 
|---|
| 1123 | /* getfname --- return name of a builtin function (for pretty printing) */ | 
|---|
| 1124 |  | 
|---|
| 1125 | const char * | 
|---|
| 1126 | getfname(register NODE *(*fptr)(NODE *)) | 
|---|
| 1127 | { | 
|---|
| 1128 | register int i, j; | 
|---|
| 1129 |  | 
|---|
| 1130 | j = sizeof(tokentab) / sizeof(tokentab[0]); | 
|---|
| 1131 | /* linear search, no other way to do it */ | 
|---|
| 1132 | for (i = 0; i < j; i++) | 
|---|
| 1133 | if (tokentab[i].ptr == fptr) | 
|---|
| 1134 | return tokentab[i].operator; | 
|---|
| 1135 |  | 
|---|
| 1136 | return NULL; | 
|---|
| 1137 | } | 
|---|
| 1138 |  | 
|---|
| 1139 | /* yyerror --- print a syntax error message, show where */ | 
|---|
| 1140 |  | 
|---|
| 1141 | /* | 
|---|
| 1142 | * Function identifier purposely indented to avoid mangling | 
|---|
| 1143 | * by ansi2knr.  Sigh. | 
|---|
| 1144 | */ | 
|---|
| 1145 |  | 
|---|
| 1146 | static void | 
|---|
| 1147 | #if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ | 
|---|
| 1148 | yyerror(const char *m, ...) | 
|---|
| 1149 | #else | 
|---|
| 1150 | /* VARARGS0 */ | 
|---|
| 1151 | yyerror(va_alist) | 
|---|
| 1152 | va_dcl | 
|---|
| 1153 | #endif | 
|---|
| 1154 | { | 
|---|
| 1155 | va_list args; | 
|---|
| 1156 | const char *mesg = NULL; | 
|---|
| 1157 | register char *bp, *cp; | 
|---|
| 1158 | char *scan; | 
|---|
| 1159 | char *buf; | 
|---|
| 1160 | int count; | 
|---|
| 1161 | static char end_of_file_line[] = "(END OF FILE)"; | 
|---|
| 1162 | char save; | 
|---|
| 1163 |  | 
|---|
| 1164 | errcount++; | 
|---|
| 1165 | /* Find the current line in the input file */ | 
|---|
| 1166 | if (lexptr && lexeme) { | 
|---|
| 1167 | if (thisline == NULL) { | 
|---|
| 1168 | cp = lexeme; | 
|---|
| 1169 | if (*cp == '\n') { | 
|---|
| 1170 | cp--; | 
|---|
| 1171 | mesg = _("unexpected newline or end of string"); | 
|---|
| 1172 | } | 
|---|
| 1173 | for (; cp != lexptr_begin && *cp != '\n'; --cp) | 
|---|
| 1174 | continue; | 
|---|
| 1175 | if (*cp == '\n') | 
|---|
| 1176 | cp++; | 
|---|
| 1177 | thisline = cp; | 
|---|
| 1178 | } | 
|---|
| 1179 | /* NL isn't guaranteed */ | 
|---|
| 1180 | bp = lexeme; | 
|---|
| 1181 | while (bp < lexend && *bp && *bp != '\n') | 
|---|
| 1182 | bp++; | 
|---|
| 1183 | } else { | 
|---|
| 1184 | thisline = end_of_file_line; | 
|---|
| 1185 | bp = thisline + strlen(thisline); | 
|---|
| 1186 | } | 
|---|
| 1187 |  | 
|---|
| 1188 | /* | 
|---|
| 1189 | * Saving and restoring *bp keeps valgrind happy, | 
|---|
| 1190 | * since the guts of glibc uses strlen, even though | 
|---|
| 1191 | * we're passing an explict precision. Sigh. | 
|---|
| 1192 | * | 
|---|
| 1193 | * 8/2003: We may not need this anymore. | 
|---|
| 1194 | */ | 
|---|
| 1195 | save = *bp; | 
|---|
| 1196 | *bp = '\0'; | 
|---|
| 1197 |  | 
|---|
| 1198 | msg("%.*s", (int) (bp - thisline), thisline); | 
|---|
| 1199 |  | 
|---|
| 1200 | *bp = save; | 
|---|
| 1201 |  | 
|---|
| 1202 | #if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ | 
|---|
| 1203 | va_start(args, m); | 
|---|
| 1204 | if (mesg == NULL) | 
|---|
| 1205 | mesg = m; | 
|---|
| 1206 | #else | 
|---|
| 1207 | va_start(args); | 
|---|
| 1208 | if (mesg == NULL) | 
|---|
| 1209 | mesg = va_arg(args, char *); | 
|---|
| 1210 | #endif | 
|---|
| 1211 | count = (bp - thisline) + strlen(mesg) + 2 + 1; | 
|---|
| 1212 | emalloc(buf, char *, count, "yyerror"); | 
|---|
| 1213 |  | 
|---|
| 1214 | bp = buf; | 
|---|
| 1215 |  | 
|---|
| 1216 | if (lexptr != NULL) { | 
|---|
| 1217 | scan = thisline; | 
|---|
| 1218 | while (scan < lexeme) | 
|---|
| 1219 | if (*scan++ == '\t') | 
|---|
| 1220 | *bp++ = '\t'; | 
|---|
| 1221 | else | 
|---|
| 1222 | *bp++ = ' '; | 
|---|
| 1223 | *bp++ = '^'; | 
|---|
| 1224 | *bp++ = ' '; | 
|---|
| 1225 | } | 
|---|
| 1226 | strcpy(bp, mesg); | 
|---|
| 1227 | err("", buf, args); | 
|---|
| 1228 | va_end(args); | 
|---|
| 1229 | free(buf); | 
|---|
| 1230 | } | 
|---|
| 1231 |  | 
|---|
| 1232 | /* get_src_buf --- read the next buffer of source program */ | 
|---|
| 1233 |  | 
|---|
| 1234 | static char * | 
|---|
| 1235 | get_src_buf() | 
|---|
| 1236 | { | 
|---|
| 1237 | static int samefile = FALSE; | 
|---|
| 1238 | static int nextfile = 0; | 
|---|
| 1239 | static char *buf = NULL; | 
|---|
| 1240 | static size_t buflen = 0; | 
|---|
| 1241 | static int fd; | 
|---|
| 1242 |  | 
|---|
| 1243 | int n; | 
|---|
| 1244 | register char *scan; | 
|---|
| 1245 | int newfile; | 
|---|
| 1246 | struct stat sbuf; | 
|---|
| 1247 | int readcount = 0; | 
|---|
| 1248 | int l; | 
|---|
| 1249 | char *readloc; | 
|---|
| 1250 |  | 
|---|
| 1251 | again: | 
|---|
| 1252 | newfile = FALSE; | 
|---|
| 1253 | if (nextfile > numfiles) | 
|---|
| 1254 | return NULL; | 
|---|
| 1255 |  | 
|---|
| 1256 | if (srcfiles[nextfile].stype == CMDLINE) { | 
|---|
| 1257 | if ((l = strlen(srcfiles[nextfile].val)) == 0) { | 
|---|
| 1258 | /* | 
|---|
| 1259 | * Yet Another Special case: | 
|---|
| 1260 | *      gawk '' /path/name | 
|---|
| 1261 | * Sigh. | 
|---|
| 1262 | */ | 
|---|
| 1263 | static int warned = FALSE; | 
|---|
| 1264 |  | 
|---|
| 1265 | if (do_lint && ! warned) { | 
|---|
| 1266 | warned = TRUE; | 
|---|
| 1267 | lintwarn(_("empty program text on command line")); | 
|---|
| 1268 | } | 
|---|
| 1269 | ++nextfile; | 
|---|
| 1270 | goto again; | 
|---|
| 1271 | } | 
|---|
| 1272 | if (srcfiles[nextfile].val[l-1] == '\n') { | 
|---|
| 1273 | /* has terminating newline, can use it directly */ | 
|---|
| 1274 | sourceline = 1; | 
|---|
| 1275 | lexptr = lexptr_begin = srcfiles[nextfile].val; | 
|---|
| 1276 | /* fall through to pointer adjustment and return, below */ | 
|---|
| 1277 | } else { | 
|---|
| 1278 | /* copy it into static buffer */ | 
|---|
| 1279 |  | 
|---|
| 1280 | /* make sure buffer exists and has room */ | 
|---|
| 1281 | if (buflen == 0) { | 
|---|
| 1282 | emalloc(buf, char *, l+2, "get_src_buf"); | 
|---|
| 1283 | buflen = l + 2; | 
|---|
| 1284 | } else if (l+2 > buflen) { | 
|---|
| 1285 | erealloc(buf, char *, l+2, "get_src_buf"); | 
|---|
| 1286 | buflen = l + 2; | 
|---|
| 1287 | } /* else | 
|---|
| 1288 | buffer has room, just use it */ | 
|---|
| 1289 |  | 
|---|
| 1290 | /* copy in data */ | 
|---|
| 1291 | memcpy(buf, srcfiles[nextfile].val, l); | 
|---|
| 1292 | buf[l] = '\n'; | 
|---|
| 1293 | buf[++l] = '\0'; | 
|---|
| 1294 |  | 
|---|
| 1295 | /* set vars and return */ | 
|---|
| 1296 | lexptr = lexptr_begin = buf; | 
|---|
| 1297 | } | 
|---|
| 1298 | lexend = lexptr + l; | 
|---|
| 1299 | nextfile++;     /* for next entry to this routine */ | 
|---|
| 1300 | return lexptr; | 
|---|
| 1301 | } | 
|---|
| 1302 |  | 
|---|
| 1303 | if (! samefile) { | 
|---|
| 1304 | source = srcfiles[nextfile].val; | 
|---|
| 1305 | if (source == NULL) {   /* read all the source files, all done */ | 
|---|
| 1306 | if (buf != NULL) { | 
|---|
| 1307 | free(buf); | 
|---|
| 1308 | buf = NULL; | 
|---|
| 1309 | } | 
|---|
| 1310 | buflen = 0; | 
|---|
| 1311 | return lexeme = lexptr = lexptr_begin = NULL; | 
|---|
| 1312 | } | 
|---|
| 1313 | fd = pathopen(source); | 
|---|
| 1314 | if (fd <= INVALID_HANDLE) { | 
|---|
| 1315 | char *in; | 
|---|
| 1316 |  | 
|---|
| 1317 | /* suppress file name and line no. in error mesg */ | 
|---|
| 1318 | in = source; | 
|---|
| 1319 | source = NULL; | 
|---|
| 1320 | fatal(_("can't open source file `%s' for reading (%s)"), | 
|---|
| 1321 | in, strerror(errno)); | 
|---|
| 1322 | } | 
|---|
| 1323 | l = optimal_bufsize(fd, & sbuf); | 
|---|
| 1324 | /* | 
|---|
| 1325 | * Make sure that something silly like | 
|---|
| 1326 | *      AWKBUFSIZE=8 make check | 
|---|
| 1327 | * works ok. | 
|---|
| 1328 | */ | 
|---|
| 1329 | #define A_DECENT_BUFFER_SIZE    128 | 
|---|
| 1330 | if (l < A_DECENT_BUFFER_SIZE) | 
|---|
| 1331 | l = A_DECENT_BUFFER_SIZE; | 
|---|
| 1332 | #undef A_DECENT_BUFFER_SIZE | 
|---|
| 1333 |  | 
|---|
| 1334 | newfile = TRUE; | 
|---|
| 1335 |  | 
|---|
| 1336 | /* make sure buffer exists and has room */ | 
|---|
| 1337 | if (buflen == 0) { | 
|---|
| 1338 | emalloc(buf, char *, l+2, "get_src_buf"); | 
|---|
| 1339 | buflen = l + 2; | 
|---|
| 1340 | } else if (l+2 > buflen) { | 
|---|
| 1341 | erealloc(buf, char *, l+2, "get_src_buf"); | 
|---|
| 1342 | buflen = l + 2; | 
|---|
| 1343 | } /* else | 
|---|
| 1344 | buffer has room, just use it */ | 
|---|
| 1345 |  | 
|---|
| 1346 | readcount = l; | 
|---|
| 1347 | readloc = lexeme = lexptr = lexptr_begin = buf; | 
|---|
| 1348 | samefile = TRUE; | 
|---|
| 1349 | sourceline = 1; | 
|---|
| 1350 | } else { | 
|---|
| 1351 | /* | 
|---|
| 1352 | * In same file, ran off edge of buffer. | 
|---|
| 1353 | * Shift current line down to front, adjust | 
|---|
| 1354 | * pointers and fill in the rest of the buffer. | 
|---|
| 1355 | */ | 
|---|
| 1356 |  | 
|---|
| 1357 | int lexeme_offset = lexeme - lexptr_begin; | 
|---|
| 1358 | int lexptr_offset = lexptr - lexptr_begin; | 
|---|
| 1359 | int lexend_offset = lexend - lexptr_begin; | 
|---|
| 1360 |  | 
|---|
| 1361 | /* find beginning of current line */ | 
|---|
| 1362 | for (scan = lexeme; scan >= lexptr_begin; scan--) { | 
|---|
| 1363 | if (*scan == '\n') { | 
|---|
| 1364 | scan++; | 
|---|
| 1365 | break; | 
|---|
| 1366 | } | 
|---|
| 1367 | } | 
|---|
| 1368 |  | 
|---|
| 1369 | if (scan <= buf) { | 
|---|
| 1370 | /* have to grow the buffer */ | 
|---|
| 1371 | buflen *= 2; | 
|---|
| 1372 | erealloc(buf, char *, buflen, "get_src_buf"); | 
|---|
| 1373 | } else { | 
|---|
| 1374 | /* shift things down */ | 
|---|
| 1375 | memmove(buf, scan, lexend - scan); | 
|---|
| 1376 | /* | 
|---|
| 1377 | * make offsets relative to start of line, | 
|---|
| 1378 | * not start of buffer. | 
|---|
| 1379 | */ | 
|---|
| 1380 | lexend_offset = lexend - scan; | 
|---|
| 1381 | lexeme_offset = lexeme - scan; | 
|---|
| 1382 | lexptr_offset = lexptr - scan; | 
|---|
| 1383 | } | 
|---|
| 1384 |  | 
|---|
| 1385 | /* adjust pointers */ | 
|---|
| 1386 | lexeme = buf + lexeme_offset; | 
|---|
| 1387 | lexptr = buf + lexptr_offset; | 
|---|
| 1388 | lexend = buf + lexend_offset; | 
|---|
| 1389 | lexptr_begin = buf; | 
|---|
| 1390 | readcount = buflen - (lexend - buf); | 
|---|
| 1391 | readloc = lexend; | 
|---|
| 1392 | } | 
|---|
| 1393 |  | 
|---|
| 1394 | /* add more data to buffer */ | 
|---|
| 1395 | n = read(fd, readloc, readcount); | 
|---|
| 1396 | if (n == -1) | 
|---|
| 1397 | fatal(_("can't read sourcefile `%s' (%s)"), | 
|---|
| 1398 | source, strerror(errno)); | 
|---|
| 1399 | if (n == 0) { | 
|---|
| 1400 | if (newfile) { | 
|---|
| 1401 | static int warned = FALSE; | 
|---|
| 1402 |  | 
|---|
| 1403 | if (do_lint && ! warned) { | 
|---|
| 1404 | warned = TRUE; | 
|---|
| 1405 | lintwarn(_("source file `%s' is empty"), source); | 
|---|
| 1406 | } | 
|---|
| 1407 | } | 
|---|
| 1408 | if (fd != fileno(stdin)) /* safety */ | 
|---|
| 1409 | close(fd); | 
|---|
| 1410 | samefile = FALSE; | 
|---|
| 1411 | nextfile++; | 
|---|
| 1412 | goto again; | 
|---|
| 1413 | } | 
|---|
| 1414 | lexend = lexptr + n; | 
|---|
| 1415 | return lexptr; | 
|---|
| 1416 | } | 
|---|
| 1417 |  | 
|---|
| 1418 | /* tokadd --- add a character to the token buffer */ | 
|---|
| 1419 |  | 
|---|
| 1420 | #define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok) | 
|---|
| 1421 |  | 
|---|
| 1422 | /* tokexpand --- grow the token buffer */ | 
|---|
| 1423 |  | 
|---|
| 1424 | char * | 
|---|
| 1425 | tokexpand() | 
|---|
| 1426 | { | 
|---|
| 1427 | static int toksize = 60; | 
|---|
| 1428 | int tokoffset; | 
|---|
| 1429 |  | 
|---|
| 1430 | tokoffset = tok - tokstart; | 
|---|
| 1431 | toksize *= 2; | 
|---|
| 1432 | if (tokstart != NULL) | 
|---|
| 1433 | erealloc(tokstart, char *, toksize, "tokexpand"); | 
|---|
| 1434 | else | 
|---|
| 1435 | emalloc(tokstart, char *, toksize, "tokexpand"); | 
|---|
| 1436 | tokend = tokstart + toksize; | 
|---|
| 1437 | tok = tokstart + tokoffset; | 
|---|
| 1438 | return tok; | 
|---|
| 1439 | } | 
|---|
| 1440 |  | 
|---|
| 1441 | /* nextc --- get the next input character */ | 
|---|
| 1442 |  | 
|---|
| 1443 | #ifdef MBS_SUPPORT | 
|---|
| 1444 |  | 
|---|
| 1445 | static int | 
|---|
| 1446 | nextc(void) | 
|---|
| 1447 | { | 
|---|
| 1448 | if (gawk_mb_cur_max > 1) { | 
|---|
| 1449 | if (!lexptr || lexptr >= lexend) { | 
|---|
| 1450 | if (! get_src_buf()) | 
|---|
| 1451 | return EOF; | 
|---|
| 1452 | } | 
|---|
| 1453 |  | 
|---|
| 1454 | /* Update the buffer index.  */ | 
|---|
| 1455 | cur_ring_idx = (cur_ring_idx == RING_BUFFER_SIZE - 1)? 0 : | 
|---|
| 1456 | cur_ring_idx + 1; | 
|---|
| 1457 |  | 
|---|
| 1458 | /* Did we already check the current character?  */ | 
|---|
| 1459 | if (cur_char_ring[cur_ring_idx] == 0) { | 
|---|
| 1460 | /* No, we need to check the next character on the buffer.  */ | 
|---|
| 1461 | int idx, work_ring_idx = cur_ring_idx; | 
|---|
| 1462 | mbstate_t tmp_state; | 
|---|
| 1463 | size_t mbclen; | 
|---|
| 1464 |  | 
|---|
| 1465 | for (idx = 0 ; lexptr + idx < lexend ; idx++) { | 
|---|
| 1466 | tmp_state = cur_mbstate; | 
|---|
| 1467 | mbclen = mbrlen(lexptr, idx + 1, &tmp_state); | 
|---|
| 1468 |  | 
|---|
| 1469 | if (mbclen == 1 || mbclen == (size_t)-1 || mbclen == 0) { | 
|---|
| 1470 | /* It is a singlebyte character, non-complete multibyte | 
|---|
| 1471 | character or EOF.  We treat it as a singlebyte | 
|---|
| 1472 | character.  */ | 
|---|
| 1473 | cur_char_ring[work_ring_idx] = 1; | 
|---|
| 1474 | break; | 
|---|
| 1475 | } else if (mbclen == (size_t)-2) { | 
|---|
| 1476 | /* It is not a complete multibyte character.  */ | 
|---|
| 1477 | cur_char_ring[work_ring_idx] = idx + 1; | 
|---|
| 1478 | } else { | 
|---|
| 1479 | /* mbclen > 1 */ | 
|---|
| 1480 | cur_char_ring[work_ring_idx] = mbclen; | 
|---|
| 1481 | break; | 
|---|
| 1482 | } | 
|---|
| 1483 | work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)? | 
|---|
| 1484 | 0 : work_ring_idx + 1; | 
|---|
| 1485 | } | 
|---|
| 1486 | cur_mbstate = tmp_state; | 
|---|
| 1487 |  | 
|---|
| 1488 | /* Put a mark on the position on which we write next character.  */ | 
|---|
| 1489 | work_ring_idx = (work_ring_idx == RING_BUFFER_SIZE - 1)? | 
|---|
| 1490 | 0 : work_ring_idx + 1; | 
|---|
| 1491 | cur_char_ring[work_ring_idx] = 0; | 
|---|
| 1492 | } | 
|---|
| 1493 |  | 
|---|
| 1494 | return (int) (unsigned char) *lexptr++; | 
|---|
| 1495 | } | 
|---|
| 1496 | else { | 
|---|
| 1497 | int c; | 
|---|
| 1498 |  | 
|---|
| 1499 | if (lexptr && lexptr < lexend) | 
|---|
| 1500 | c = (int) (unsigned char) *lexptr++; | 
|---|
| 1501 | else if (get_src_buf()) | 
|---|
| 1502 | c = (int) (unsigned char) *lexptr++; | 
|---|
| 1503 | else | 
|---|
| 1504 | c = EOF; | 
|---|
| 1505 |  | 
|---|
| 1506 | return c; | 
|---|
| 1507 | } | 
|---|
| 1508 | } | 
|---|
| 1509 |  | 
|---|
| 1510 | #else /* MBS_SUPPORT */ | 
|---|
| 1511 |  | 
|---|
| 1512 | #if GAWKDEBUG | 
|---|
| 1513 | int | 
|---|
| 1514 | nextc(void) | 
|---|
| 1515 | { | 
|---|
| 1516 | int c; | 
|---|
| 1517 |  | 
|---|
| 1518 | if (lexptr && lexptr < lexend) | 
|---|
| 1519 | c = (int) (unsigned char) *lexptr++; | 
|---|
| 1520 | else if (get_src_buf()) | 
|---|
| 1521 | c = (int) (unsigned char) *lexptr++; | 
|---|
| 1522 | else | 
|---|
| 1523 | c = EOF; | 
|---|
| 1524 |  | 
|---|
| 1525 | return c; | 
|---|
| 1526 | } | 
|---|
| 1527 | #else | 
|---|
| 1528 | #define nextc() ((lexptr && lexptr < lexend) ? \ | 
|---|
| 1529 | ((int) (unsigned char) *lexptr++) : \ | 
|---|
| 1530 | (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \ | 
|---|
| 1531 | ) | 
|---|
| 1532 | #endif | 
|---|
| 1533 |  | 
|---|
| 1534 | #endif /* MBS_SUPPORT */ | 
|---|
| 1535 |  | 
|---|
| 1536 | /* pushback --- push a character back on the input */ | 
|---|
| 1537 |  | 
|---|
| 1538 | static inline void | 
|---|
| 1539 | pushback(void) | 
|---|
| 1540 | { | 
|---|
| 1541 | #ifdef MBS_SUPPORT | 
|---|
| 1542 | if (gawk_mb_cur_max > 1) | 
|---|
| 1543 | cur_ring_idx = (cur_ring_idx == 0)? RING_BUFFER_SIZE - 1 : | 
|---|
| 1544 | cur_ring_idx - 1; | 
|---|
| 1545 | #endif | 
|---|
| 1546 | (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr); | 
|---|
| 1547 | } | 
|---|
| 1548 |  | 
|---|
| 1549 |  | 
|---|
| 1550 | /* allow_newline --- allow newline after &&, ||, ? and : */ | 
|---|
| 1551 |  | 
|---|
| 1552 | static void | 
|---|
| 1553 | allow_newline(void) | 
|---|
| 1554 | { | 
|---|
| 1555 | int c; | 
|---|
| 1556 |  | 
|---|
| 1557 | for (;;) { | 
|---|
| 1558 | c = nextc(); | 
|---|
| 1559 | if (c == EOF) | 
|---|
| 1560 | break; | 
|---|
| 1561 | if (c == '#') { | 
|---|
| 1562 | while ((c = nextc()) != '\n' && c != EOF) | 
|---|
| 1563 | continue; | 
|---|
| 1564 | if (c == EOF) | 
|---|
| 1565 | break; | 
|---|
| 1566 | } | 
|---|
| 1567 | if (c == '\n') | 
|---|
| 1568 | sourceline++; | 
|---|
| 1569 | if (! ISSPACE(c)) { | 
|---|
| 1570 | pushback(); | 
|---|
| 1571 | break; | 
|---|
| 1572 | } | 
|---|
| 1573 | } | 
|---|
| 1574 | } | 
|---|
| 1575 |  | 
|---|
| 1576 | /* yylex --- Read the input and turn it into tokens. */ | 
|---|
| 1577 |  | 
|---|
| 1578 | static int | 
|---|
| 1579 | yylex(void) | 
|---|
| 1580 | { | 
|---|
| 1581 | register int c; | 
|---|
| 1582 | int seen_e = FALSE;             /* These are for numbers */ | 
|---|
| 1583 | int seen_point = FALSE; | 
|---|
| 1584 | int esc_seen;           /* for literal strings */ | 
|---|
| 1585 | int mid; | 
|---|
| 1586 | static int did_newline = FALSE; | 
|---|
| 1587 | char *tokkey; | 
|---|
| 1588 | static int lasttok = 0, eof_warned = FALSE; | 
|---|
| 1589 | int inhex = FALSE; | 
|---|
| 1590 | int intlstr = FALSE; | 
|---|
| 1591 |  | 
|---|
| 1592 | if (nextc() == EOF) { | 
|---|
| 1593 | if (lasttok != NEWLINE) { | 
|---|
| 1594 | lasttok = NEWLINE; | 
|---|
| 1595 | if (do_lint && ! eof_warned) { | 
|---|
| 1596 | lintwarn(_("source file does not end in newline")); | 
|---|
| 1597 | eof_warned = TRUE; | 
|---|
| 1598 | } | 
|---|
| 1599 | return NEWLINE; /* fake it */ | 
|---|
| 1600 | } | 
|---|
| 1601 | return 0; | 
|---|
| 1602 | } | 
|---|
| 1603 | pushback(); | 
|---|
| 1604 | #if defined OS2 || defined __EMX__ | 
|---|
| 1605 | /* | 
|---|
| 1606 | * added for OS/2's extproc feature of cmd.exe | 
|---|
| 1607 | * (like #! in BSD sh) | 
|---|
| 1608 | */ | 
|---|
| 1609 | if (strncasecmp(lexptr, "extproc ", 8) == 0) { | 
|---|
| 1610 | while (*lexptr && *lexptr != '\n') | 
|---|
| 1611 | lexptr++; | 
|---|
| 1612 | } | 
|---|
| 1613 | #endif | 
|---|
| 1614 | lexeme = lexptr; | 
|---|
| 1615 | thisline = NULL; | 
|---|
| 1616 | if (want_regexp) { | 
|---|
| 1617 | int in_brack = 0;       /* count brackets, [[:alnum:]] allowed */ | 
|---|
| 1618 | /* | 
|---|
| 1619 | * Counting brackets is non-trivial. [[] is ok, | 
|---|
| 1620 | * and so is [\]], with a point being that /[/]/ as a regexp | 
|---|
| 1621 | * constant has to work. | 
|---|
| 1622 | * | 
|---|
| 1623 | * Do not count [ or ] if either one is preceded by a \. | 
|---|
| 1624 | * A `[' should be counted if | 
|---|
| 1625 | *  a) it is the first one so far (in_brack == 0) | 
|---|
| 1626 | *  b) it is the `[' in `[:' | 
|---|
| 1627 | * A ']' should be counted if not preceded by a \, since | 
|---|
| 1628 | * it is either closing `:]' or just a plain list. | 
|---|
| 1629 | * According to POSIX, []] is how you put a ] into a set. | 
|---|
| 1630 | * Try to handle that too. | 
|---|
| 1631 | * | 
|---|
| 1632 | * The code for \ handles \[ and \]. | 
|---|
| 1633 | */ | 
|---|
| 1634 |  | 
|---|
| 1635 | want_regexp = FALSE; | 
|---|
| 1636 | tok = tokstart; | 
|---|
| 1637 | for (;;) { | 
|---|
| 1638 | c = nextc(); | 
|---|
| 1639 |  | 
|---|
| 1640 | if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { | 
|---|
| 1641 | case '[': | 
|---|
| 1642 | /* one day check for `.' and `=' too */ | 
|---|
| 1643 | if (nextc() == ':' || in_brack == 0) | 
|---|
| 1644 | in_brack++; | 
|---|
| 1645 | pushback(); | 
|---|
| 1646 | break; | 
|---|
| 1647 | case ']': | 
|---|
| 1648 | if (tokstart[0] == '[' | 
|---|
| 1649 | && (tok == tokstart + 1 | 
|---|
| 1650 | || (tok == tokstart + 2 | 
|---|
| 1651 | && tokstart[1] == '^'))) | 
|---|
| 1652 | /* do nothing */; | 
|---|
| 1653 | else | 
|---|
| 1654 | in_brack--; | 
|---|
| 1655 | break; | 
|---|
| 1656 | case '\\': | 
|---|
| 1657 | if ((c = nextc()) == EOF) { | 
|---|
| 1658 | yyerror(_("unterminated regexp ends with `\\' at end of file")); | 
|---|
| 1659 | goto end_regexp; /* kludge */ | 
|---|
| 1660 | } else if (c == '\n') { | 
|---|
| 1661 | sourceline++; | 
|---|
| 1662 | continue; | 
|---|
| 1663 | } else { | 
|---|
| 1664 | tokadd('\\'); | 
|---|
| 1665 | tokadd(c); | 
|---|
| 1666 | continue; | 
|---|
| 1667 | } | 
|---|
| 1668 | break; | 
|---|
| 1669 | case '/':       /* end of the regexp */ | 
|---|
| 1670 | if (in_brack > 0) | 
|---|
| 1671 | break; | 
|---|
| 1672 | end_regexp: | 
|---|
| 1673 | tokadd('\0'); | 
|---|
| 1674 | yylval.sval = tokstart; | 
|---|
| 1675 | if (do_lint) { | 
|---|
| 1676 | int peek = nextc(); | 
|---|
| 1677 |  | 
|---|
| 1678 | pushback(); | 
|---|
| 1679 | if (peek == 'i' || peek == 's') { | 
|---|
| 1680 | if (source) | 
|---|
| 1681 | lintwarn( | 
|---|
| 1682 | _("%s: %d: tawk regex modifier `/.../%c' doesn't work in gawk"), | 
|---|
| 1683 | source, sourceline, peek); | 
|---|
| 1684 | else | 
|---|
| 1685 | lintwarn( | 
|---|
| 1686 | _("tawk regex modifier `/.../%c' doesn't work in gawk"), | 
|---|
| 1687 | peek); | 
|---|
| 1688 | } | 
|---|
| 1689 | } | 
|---|
| 1690 | return lasttok = REGEXP; | 
|---|
| 1691 | case '\n': | 
|---|
| 1692 | pushback(); | 
|---|
| 1693 | yyerror(_("unterminated regexp")); | 
|---|
| 1694 | goto end_regexp;        /* kludge */ | 
|---|
| 1695 | case EOF: | 
|---|
| 1696 | yyerror(_("unterminated regexp at end of file")); | 
|---|
| 1697 | goto end_regexp;        /* kludge */ | 
|---|
| 1698 | } | 
|---|
| 1699 | tokadd(c); | 
|---|
| 1700 | } | 
|---|
| 1701 | } | 
|---|
| 1702 | retry: | 
|---|
| 1703 |  | 
|---|
| 1704 | /* skipping \r is a hack, but windows is just too pervasive. sigh. */ | 
|---|
| 1705 | while ((c = nextc()) == ' ' || c == '\t' || c == '\r') | 
|---|
| 1706 | continue; | 
|---|
| 1707 |  | 
|---|
| 1708 | lexeme = lexptr ? lexptr - 1 : lexptr; | 
|---|
| 1709 | thisline = NULL; | 
|---|
| 1710 | tok = tokstart; | 
|---|
| 1711 | yylval.nodetypeval = Node_illegal; | 
|---|
| 1712 |  | 
|---|
| 1713 | if (gawk_mb_cur_max == 1 || nextc_is_1stbyte) switch (c) { | 
|---|
| 1714 | case EOF: | 
|---|
| 1715 | if (lasttok != NEWLINE) { | 
|---|
| 1716 | lasttok = NEWLINE; | 
|---|
| 1717 | if (do_lint && ! eof_warned) { | 
|---|
| 1718 | lintwarn(_("source file does not end in newline")); | 
|---|
| 1719 | eof_warned = TRUE; | 
|---|
| 1720 | } | 
|---|
| 1721 | return NEWLINE; /* fake it */ | 
|---|
| 1722 | } | 
|---|
| 1723 | return 0; | 
|---|
| 1724 |  | 
|---|
| 1725 | case '\n': | 
|---|
| 1726 | sourceline++; | 
|---|
| 1727 | return lasttok = NEWLINE; | 
|---|
| 1728 |  | 
|---|
| 1729 | case '#':               /* it's a comment */ | 
|---|
| 1730 | while ((c = nextc()) != '\n') { | 
|---|
| 1731 | if (c == EOF) { | 
|---|
| 1732 | if (lasttok != NEWLINE) { | 
|---|
| 1733 | lasttok = NEWLINE; | 
|---|
| 1734 | if (do_lint && ! eof_warned) { | 
|---|
| 1735 | lintwarn( | 
|---|
| 1736 | _("source file does not end in newline")); | 
|---|
| 1737 | eof_warned = TRUE; | 
|---|
| 1738 | } | 
|---|
| 1739 | return NEWLINE; /* fake it */ | 
|---|
| 1740 | } | 
|---|
| 1741 | return 0; | 
|---|
| 1742 | } | 
|---|
| 1743 | } | 
|---|
| 1744 | sourceline++; | 
|---|
| 1745 | return lasttok = NEWLINE; | 
|---|
| 1746 |  | 
|---|
| 1747 | case '\\': | 
|---|
| 1748 | #ifdef RELAXED_CONTINUATION | 
|---|
| 1749 | /* | 
|---|
| 1750 | * This code puports to allow comments and/or whitespace | 
|---|
| 1751 | * after the `\' at the end of a line used for continuation. | 
|---|
| 1752 | * Use it at your own risk. We think it's a bad idea, which | 
|---|
| 1753 | * is why it's not on by default. | 
|---|
| 1754 | */ | 
|---|
| 1755 | if (! do_traditional) { | 
|---|
| 1756 | /* strip trailing white-space and/or comment */ | 
|---|
| 1757 | while ((c = nextc()) == ' ' || c == '\t' || c == '\r') | 
|---|
| 1758 | continue; | 
|---|
| 1759 | if (c == '#') { | 
|---|
| 1760 | if (do_lint) | 
|---|
| 1761 | lintwarn( | 
|---|
| 1762 | _("use of `\\ #...' line continuation is not portable")); | 
|---|
| 1763 | while ((c = nextc()) != '\n') | 
|---|
| 1764 | if (c == EOF) | 
|---|
| 1765 | break; | 
|---|
| 1766 | } | 
|---|
| 1767 | pushback(); | 
|---|
| 1768 | } | 
|---|
| 1769 | #endif /* RELAXED_CONTINUATION */ | 
|---|
| 1770 | if (nextc() == '\n') { | 
|---|
| 1771 | sourceline++; | 
|---|
| 1772 | goto retry; | 
|---|
| 1773 | } else { | 
|---|
| 1774 | yyerror(_("backslash not last character on line")); | 
|---|
| 1775 | exit(1); | 
|---|
| 1776 | } | 
|---|
| 1777 | break; | 
|---|
| 1778 |  | 
|---|
| 1779 | case ':': | 
|---|
| 1780 | case '?': | 
|---|
| 1781 | if (! do_posix) | 
|---|
| 1782 | allow_newline(); | 
|---|
| 1783 | return lasttok = c; | 
|---|
| 1784 |  | 
|---|
| 1785 | /* | 
|---|
| 1786 | * in_parens is undefined unless we are parsing a print | 
|---|
| 1787 | * statement (in_print), but why bother with a check? | 
|---|
| 1788 | */ | 
|---|
| 1789 | case ')': | 
|---|
| 1790 | in_parens--; | 
|---|
| 1791 | return lasttok = c; | 
|---|
| 1792 |  | 
|---|
| 1793 | case '(': | 
|---|
| 1794 | in_parens++; | 
|---|
| 1795 | /* FALL THROUGH */ | 
|---|
| 1796 | case '$': | 
|---|
| 1797 | case ';': | 
|---|
| 1798 | case '{': | 
|---|
| 1799 | case ',': | 
|---|
| 1800 | case '[': | 
|---|
| 1801 | case ']': | 
|---|
| 1802 | return lasttok = c; | 
|---|
| 1803 |  | 
|---|
| 1804 | case '*': | 
|---|
| 1805 | if ((c = nextc()) == '=') { | 
|---|
| 1806 | yylval.nodetypeval = Node_assign_times; | 
|---|
| 1807 | return lasttok = ASSIGNOP; | 
|---|
| 1808 | } else if (do_posix) { | 
|---|
| 1809 | pushback(); | 
|---|
| 1810 | return lasttok = '*'; | 
|---|
| 1811 | } else if (c == '*') { | 
|---|
| 1812 | /* make ** and **= aliases for ^ and ^= */ | 
|---|
| 1813 | static int did_warn_op = FALSE, did_warn_assgn = FALSE; | 
|---|
| 1814 |  | 
|---|
| 1815 | if (nextc() == '=') { | 
|---|
| 1816 | if (! did_warn_assgn) { | 
|---|
| 1817 | did_warn_assgn = TRUE; | 
|---|
| 1818 | if (do_lint) | 
|---|
| 1819 | lintwarn(_("POSIX does not allow operator `**='")); | 
|---|
| 1820 | if (do_lint_old) | 
|---|
| 1821 | warning(_("old awk does not support operator `**='")); | 
|---|
| 1822 | } | 
|---|
| 1823 | yylval.nodetypeval = Node_assign_exp; | 
|---|
| 1824 | return ASSIGNOP; | 
|---|
| 1825 | } else { | 
|---|
| 1826 | pushback(); | 
|---|
| 1827 | if (! did_warn_op) { | 
|---|
| 1828 | did_warn_op = TRUE; | 
|---|
| 1829 | if (do_lint) | 
|---|
| 1830 | lintwarn(_("POSIX does not allow operator `**'")); | 
|---|
| 1831 | if (do_lint_old) | 
|---|
| 1832 | warning(_("old awk does not support operator `**'")); | 
|---|
| 1833 | } | 
|---|
| 1834 | return lasttok = '^'; | 
|---|
| 1835 | } | 
|---|
| 1836 | } | 
|---|
| 1837 | pushback(); | 
|---|
| 1838 | return lasttok = '*'; | 
|---|
| 1839 |  | 
|---|
| 1840 | case '/': | 
|---|
| 1841 | if (nextc() == '=') { | 
|---|
| 1842 | pushback(); | 
|---|
| 1843 | return lasttok = SLASH_BEFORE_EQUAL; | 
|---|
| 1844 | } | 
|---|
| 1845 | pushback(); | 
|---|
| 1846 | return lasttok = '/'; | 
|---|
| 1847 |  | 
|---|
| 1848 | case '%': | 
|---|
| 1849 | if (nextc() == '=') { | 
|---|
| 1850 | yylval.nodetypeval = Node_assign_mod; | 
|---|
| 1851 | return lasttok = ASSIGNOP; | 
|---|
| 1852 | } | 
|---|
| 1853 | pushback(); | 
|---|
| 1854 | return lasttok = '%'; | 
|---|
| 1855 |  | 
|---|
| 1856 | case '^': | 
|---|
| 1857 | { | 
|---|
| 1858 | static int did_warn_op = FALSE, did_warn_assgn = FALSE; | 
|---|
| 1859 |  | 
|---|
| 1860 | if (nextc() == '=') { | 
|---|
| 1861 | if (do_lint_old && ! did_warn_assgn) { | 
|---|
| 1862 | did_warn_assgn = TRUE; | 
|---|
| 1863 | warning(_("operator `^=' is not supported in old awk")); | 
|---|
| 1864 | } | 
|---|
| 1865 | yylval.nodetypeval = Node_assign_exp; | 
|---|
| 1866 | return lasttok = ASSIGNOP; | 
|---|
| 1867 | } | 
|---|
| 1868 | pushback(); | 
|---|
| 1869 | if (do_lint_old && ! did_warn_op) { | 
|---|
| 1870 | did_warn_op = TRUE; | 
|---|
| 1871 | warning(_("operator `^' is not supported in old awk")); | 
|---|
| 1872 | } | 
|---|
| 1873 | return lasttok = '^'; | 
|---|
| 1874 | } | 
|---|
| 1875 |  | 
|---|
| 1876 | case '+': | 
|---|
| 1877 | if ((c = nextc()) == '=') { | 
|---|
| 1878 | yylval.nodetypeval = Node_assign_plus; | 
|---|
| 1879 | return lasttok = ASSIGNOP; | 
|---|
| 1880 | } | 
|---|
| 1881 | if (c == '+') | 
|---|
| 1882 | return lasttok = INCREMENT; | 
|---|
| 1883 | pushback(); | 
|---|
| 1884 | return lasttok = '+'; | 
|---|
| 1885 |  | 
|---|
| 1886 | case '!': | 
|---|
| 1887 | if ((c = nextc()) == '=') { | 
|---|
| 1888 | yylval.nodetypeval = Node_notequal; | 
|---|
| 1889 | return lasttok = RELOP; | 
|---|
| 1890 | } | 
|---|
| 1891 | if (c == '~') { | 
|---|
| 1892 | yylval.nodetypeval = Node_nomatch; | 
|---|
| 1893 | return lasttok = MATCHOP; | 
|---|
| 1894 | } | 
|---|
| 1895 | pushback(); | 
|---|
| 1896 | return lasttok = '!'; | 
|---|
| 1897 |  | 
|---|
| 1898 | case '<': | 
|---|
| 1899 | if (nextc() == '=') { | 
|---|
| 1900 | yylval.nodetypeval = Node_leq; | 
|---|
| 1901 | return lasttok = RELOP; | 
|---|
| 1902 | } | 
|---|
| 1903 | yylval.nodetypeval = Node_less; | 
|---|
| 1904 | pushback(); | 
|---|
| 1905 | return lasttok = '<'; | 
|---|
| 1906 |  | 
|---|
| 1907 | case '=': | 
|---|
| 1908 | if (nextc() == '=') { | 
|---|
| 1909 | yylval.nodetypeval = Node_equal; | 
|---|
| 1910 | return lasttok = RELOP; | 
|---|
| 1911 | } | 
|---|
| 1912 | yylval.nodetypeval = Node_assign; | 
|---|
| 1913 | pushback(); | 
|---|
| 1914 | return lasttok = ASSIGN; | 
|---|
| 1915 |  | 
|---|
| 1916 | case '>': | 
|---|
| 1917 | if ((c = nextc()) == '=') { | 
|---|
| 1918 | yylval.nodetypeval = Node_geq; | 
|---|
| 1919 | return lasttok = RELOP; | 
|---|
| 1920 | } else if (c == '>') { | 
|---|
| 1921 | yylval.nodetypeval = Node_redirect_append; | 
|---|
| 1922 | return lasttok = IO_OUT; | 
|---|
| 1923 | } | 
|---|
| 1924 | pushback(); | 
|---|
| 1925 | if (in_print && in_parens == 0) { | 
|---|
| 1926 | yylval.nodetypeval = Node_redirect_output; | 
|---|
| 1927 | return lasttok = IO_OUT; | 
|---|
| 1928 | } | 
|---|
| 1929 | yylval.nodetypeval = Node_greater; | 
|---|
| 1930 | return lasttok = '>'; | 
|---|
| 1931 |  | 
|---|
| 1932 | case '~': | 
|---|
| 1933 | yylval.nodetypeval = Node_match; | 
|---|
| 1934 | return lasttok = MATCHOP; | 
|---|
| 1935 |  | 
|---|
| 1936 | case '}': | 
|---|
| 1937 | /* | 
|---|
| 1938 | * Added did newline stuff.  Easier than | 
|---|
| 1939 | * hacking the grammar. | 
|---|
| 1940 | */ | 
|---|
| 1941 | if (did_newline) { | 
|---|
| 1942 | did_newline = FALSE; | 
|---|
| 1943 | return lasttok = c; | 
|---|
| 1944 | } | 
|---|
| 1945 | did_newline++; | 
|---|
| 1946 | --lexptr;       /* pick up } next time */ | 
|---|
| 1947 | return lasttok = NEWLINE; | 
|---|
| 1948 |  | 
|---|
| 1949 | case '"': | 
|---|
| 1950 | string: | 
|---|
| 1951 | esc_seen = FALSE; | 
|---|
| 1952 | while ((c = nextc()) != '"') { | 
|---|
| 1953 | if (c == '\n') { | 
|---|
| 1954 | pushback(); | 
|---|
| 1955 | yyerror(_("unterminated string")); | 
|---|
| 1956 | exit(1); | 
|---|
| 1957 | } | 
|---|
| 1958 | if ((gawk_mb_cur_max == 1 || nextc_is_1stbyte) && | 
|---|
| 1959 | c == '\\') { | 
|---|
| 1960 | c = nextc(); | 
|---|
| 1961 | if (c == '\n') { | 
|---|
| 1962 | sourceline++; | 
|---|
| 1963 | continue; | 
|---|
| 1964 | } | 
|---|
| 1965 | esc_seen = TRUE; | 
|---|
| 1966 | tokadd('\\'); | 
|---|
| 1967 | } | 
|---|
| 1968 | if (c == EOF) { | 
|---|
| 1969 | pushback(); | 
|---|
| 1970 | yyerror(_("unterminated string")); | 
|---|
| 1971 | exit(1); | 
|---|
| 1972 | } | 
|---|
| 1973 | tokadd(c); | 
|---|
| 1974 | } | 
|---|
| 1975 | yylval.nodeval = make_str_node(tokstart, | 
|---|
| 1976 | tok - tokstart, esc_seen ? SCAN : 0); | 
|---|
| 1977 | yylval.nodeval->flags |= PERM; | 
|---|
| 1978 | if (intlstr) { | 
|---|
| 1979 | yylval.nodeval->flags |= INTLSTR; | 
|---|
| 1980 | intlstr = FALSE; | 
|---|
| 1981 | if (do_intl) | 
|---|
| 1982 | dumpintlstr(yylval.nodeval->stptr, | 
|---|
| 1983 | yylval.nodeval->stlen); | 
|---|
| 1984 | } | 
|---|
| 1985 | return lasttok = YSTRING; | 
|---|
| 1986 |  | 
|---|
| 1987 | case '-': | 
|---|
| 1988 | if ((c = nextc()) == '=') { | 
|---|
| 1989 | yylval.nodetypeval = Node_assign_minus; | 
|---|
| 1990 | return lasttok = ASSIGNOP; | 
|---|
| 1991 | } | 
|---|
| 1992 | if (c == '-') | 
|---|
| 1993 | return lasttok = DECREMENT; | 
|---|
| 1994 | pushback(); | 
|---|
| 1995 | return lasttok = '-'; | 
|---|
| 1996 |  | 
|---|
| 1997 | case '.': | 
|---|
| 1998 | c = nextc(); | 
|---|
| 1999 | pushback(); | 
|---|
| 2000 | if (! ISDIGIT(c)) | 
|---|
| 2001 | return lasttok = '.'; | 
|---|
| 2002 | else | 
|---|
| 2003 | c = '.'; | 
|---|
| 2004 | /* FALL THROUGH */ | 
|---|
| 2005 | case '0': | 
|---|
| 2006 | case '1': | 
|---|
| 2007 | case '2': | 
|---|
| 2008 | case '3': | 
|---|
| 2009 | case '4': | 
|---|
| 2010 | case '5': | 
|---|
| 2011 | case '6': | 
|---|
| 2012 | case '7': | 
|---|
| 2013 | case '8': | 
|---|
| 2014 | case '9': | 
|---|
| 2015 | /* It's a number */ | 
|---|
| 2016 | for (;;) { | 
|---|
| 2017 | int gotnumber = FALSE; | 
|---|
| 2018 |  | 
|---|
| 2019 | tokadd(c); | 
|---|
| 2020 | switch (c) { | 
|---|
| 2021 | case 'x': | 
|---|
| 2022 | case 'X': | 
|---|
| 2023 | if (do_traditional) | 
|---|
| 2024 | goto done; | 
|---|
| 2025 | if (tok == tokstart + 2) { | 
|---|
| 2026 | int peek = nextc(); | 
|---|
| 2027 |  | 
|---|
| 2028 | if (ISXDIGIT(peek)) { | 
|---|
| 2029 | inhex = TRUE; | 
|---|
| 2030 | pushback();     /* following digit */ | 
|---|
| 2031 | } else { | 
|---|
| 2032 | pushback();     /* x or X */ | 
|---|
| 2033 | goto done; | 
|---|
| 2034 | } | 
|---|
| 2035 | } | 
|---|
| 2036 | break; | 
|---|
| 2037 | case '.': | 
|---|
| 2038 | /* period ends exponent part of floating point number */ | 
|---|
| 2039 | if (seen_point || seen_e) { | 
|---|
| 2040 | gotnumber = TRUE; | 
|---|
| 2041 | break; | 
|---|
| 2042 | } | 
|---|
| 2043 | seen_point = TRUE; | 
|---|
| 2044 | break; | 
|---|
| 2045 | case 'e': | 
|---|
| 2046 | case 'E': | 
|---|
| 2047 | if (inhex) | 
|---|
| 2048 | break; | 
|---|
| 2049 | if (seen_e) { | 
|---|
| 2050 | gotnumber = TRUE; | 
|---|
| 2051 | break; | 
|---|
| 2052 | } | 
|---|
| 2053 | seen_e = TRUE; | 
|---|
| 2054 | if ((c = nextc()) == '-' || c == '+') { | 
|---|
| 2055 | int c2 = nextc(); | 
|---|
| 2056 |  | 
|---|
| 2057 | if (ISDIGIT(c2)) { | 
|---|
| 2058 | tokadd(c); | 
|---|
| 2059 | tokadd(c2); | 
|---|
| 2060 | } else { | 
|---|
| 2061 | pushback();     /* non-digit after + or - */ | 
|---|
| 2062 | pushback();     /* + or - */ | 
|---|
| 2063 | pushback();     /* e or E */ | 
|---|
| 2064 | } | 
|---|
| 2065 | } else if (! ISDIGIT(c)) { | 
|---|
| 2066 | pushback();     /* character after e or E */ | 
|---|
| 2067 | pushback();     /* e or E */ | 
|---|
| 2068 | } else { | 
|---|
| 2069 | pushback();     /* digit */ | 
|---|
| 2070 | } | 
|---|
| 2071 | break; | 
|---|
| 2072 | case 'a': | 
|---|
| 2073 | case 'A': | 
|---|
| 2074 | case 'b': | 
|---|
| 2075 | case 'B': | 
|---|
| 2076 | case 'c': | 
|---|
| 2077 | case 'C': | 
|---|
| 2078 | case 'D': | 
|---|
| 2079 | case 'd': | 
|---|
| 2080 | case 'f': | 
|---|
| 2081 | case 'F': | 
|---|
| 2082 | if (do_traditional || ! inhex) | 
|---|
| 2083 | goto done; | 
|---|
| 2084 | /* fall through */ | 
|---|
| 2085 | case '0': | 
|---|
| 2086 | case '1': | 
|---|
| 2087 | case '2': | 
|---|
| 2088 | case '3': | 
|---|
| 2089 | case '4': | 
|---|
| 2090 | case '5': | 
|---|
| 2091 | case '6': | 
|---|
| 2092 | case '7': | 
|---|
| 2093 | case '8': | 
|---|
| 2094 | case '9': | 
|---|
| 2095 | break; | 
|---|
| 2096 | default: | 
|---|
| 2097 | done: | 
|---|
| 2098 | gotnumber = TRUE; | 
|---|
| 2099 | } | 
|---|
| 2100 | if (gotnumber) | 
|---|
| 2101 | break; | 
|---|
| 2102 | c = nextc(); | 
|---|
| 2103 | } | 
|---|
| 2104 | if (c != EOF) | 
|---|
| 2105 | pushback(); | 
|---|
| 2106 | else if (do_lint && ! eof_warned) { | 
|---|
| 2107 | lintwarn(_("source file does not end in newline")); | 
|---|
| 2108 | eof_warned = TRUE; | 
|---|
| 2109 | } | 
|---|
| 2110 | tokadd('\0'); | 
|---|
| 2111 | if (! do_traditional && isnondecimal(tokstart, FALSE)) { | 
|---|
| 2112 | if (do_lint) { | 
|---|
| 2113 | if (ISDIGIT(tokstart[1]))       /* not an 'x' or 'X' */ | 
|---|
| 2114 | lintwarn("numeric constant `%.*s' treated as octal", | 
|---|
| 2115 | (int) strlen(tokstart)-1, tokstart); | 
|---|
| 2116 | else if (tokstart[1] == 'x' || tokstart[1] == 'X') | 
|---|
| 2117 | lintwarn("numeric constant `%.*s' treated as hexadecimal", | 
|---|
| 2118 | (int) strlen(tokstart)-1, tokstart); | 
|---|
| 2119 | } | 
|---|
| 2120 | yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart))); | 
|---|
| 2121 | } else | 
|---|
| 2122 | yylval.nodeval = make_number(atof(tokstart)); | 
|---|
| 2123 | yylval.nodeval->flags |= PERM; | 
|---|
| 2124 | return lasttok = YNUMBER; | 
|---|
| 2125 |  | 
|---|
| 2126 | case '&': | 
|---|
| 2127 | if ((c = nextc()) == '&') { | 
|---|
| 2128 | yylval.nodetypeval = Node_and; | 
|---|
| 2129 | allow_newline(); | 
|---|
| 2130 | return lasttok = LEX_AND; | 
|---|
| 2131 | } | 
|---|
| 2132 | pushback(); | 
|---|
| 2133 | return lasttok = '&'; | 
|---|
| 2134 |  | 
|---|
| 2135 | case '|': | 
|---|
| 2136 | if ((c = nextc()) == '|') { | 
|---|
| 2137 | yylval.nodetypeval = Node_or; | 
|---|
| 2138 | allow_newline(); | 
|---|
| 2139 | return lasttok = LEX_OR; | 
|---|
| 2140 | } else if (! do_traditional && c == '&') { | 
|---|
| 2141 | yylval.nodetypeval = Node_redirect_twoway; | 
|---|
| 2142 | return lasttok = (in_print && in_parens == 0 ? IO_OUT : IO_IN); | 
|---|
| 2143 | } | 
|---|
| 2144 | pushback(); | 
|---|
| 2145 | if (in_print && in_parens == 0) { | 
|---|
| 2146 | yylval.nodetypeval = Node_redirect_pipe; | 
|---|
| 2147 | return lasttok = IO_OUT; | 
|---|
| 2148 | } else { | 
|---|
| 2149 | yylval.nodetypeval = Node_redirect_pipein; | 
|---|
| 2150 | return lasttok = IO_IN; | 
|---|
| 2151 | } | 
|---|
| 2152 | } | 
|---|
| 2153 |  | 
|---|
| 2154 | if (c != '_' && ! ISALPHA(c)) { | 
|---|
| 2155 | yyerror(_("invalid char '%c' in expression"), c); | 
|---|
| 2156 | exit(1); | 
|---|
| 2157 | } | 
|---|
| 2158 |  | 
|---|
| 2159 | /* | 
|---|
| 2160 | * Lots of fog here.  Consider: | 
|---|
| 2161 | * | 
|---|
| 2162 | * print "xyzzy"$_"foo" | 
|---|
| 2163 | * | 
|---|
| 2164 | * Without the check for ` lasttok != '$' ', this is parsed as | 
|---|
| 2165 | * | 
|---|
| 2166 | * print "xxyzz" $(_"foo") | 
|---|
| 2167 | * | 
|---|
| 2168 | * With the check, it is "correctly" parsed as three | 
|---|
| 2169 | * string concatenations.  Sigh.  This seems to be | 
|---|
| 2170 | * "more correct", but this is definitely one of those | 
|---|
| 2171 | * occasions where the interactions are funny. | 
|---|
| 2172 | */ | 
|---|
| 2173 | if (! do_traditional && c == '_' && lasttok != '$') { | 
|---|
| 2174 | if ((c = nextc()) == '"') { | 
|---|
| 2175 | intlstr = TRUE; | 
|---|
| 2176 | goto string; | 
|---|
| 2177 | } | 
|---|
| 2178 | pushback(); | 
|---|
| 2179 | c = '_'; | 
|---|
| 2180 | } | 
|---|
| 2181 |  | 
|---|
| 2182 | /* it's some type of name-type-thing.  Find its length. */ | 
|---|
| 2183 | tok = tokstart; | 
|---|
| 2184 | while (is_identchar(c)) { | 
|---|
| 2185 | tokadd(c); | 
|---|
| 2186 | c = nextc(); | 
|---|
| 2187 | } | 
|---|
| 2188 | tokadd('\0'); | 
|---|
| 2189 | emalloc(tokkey, char *, tok - tokstart, "yylex"); | 
|---|
| 2190 | memcpy(tokkey, tokstart, tok - tokstart); | 
|---|
| 2191 | if (c != EOF) | 
|---|
| 2192 | pushback(); | 
|---|
| 2193 | else if (do_lint && ! eof_warned) { | 
|---|
| 2194 | lintwarn(_("source file does not end in newline")); | 
|---|
| 2195 | eof_warned = TRUE; | 
|---|
| 2196 | } | 
|---|
| 2197 |  | 
|---|
| 2198 | /* See if it is a special token. */ | 
|---|
| 2199 |  | 
|---|
| 2200 | if ((mid = check_special(tokstart)) >= 0) { | 
|---|
| 2201 | if (do_lint) { | 
|---|
| 2202 | if (tokentab[mid].flags & GAWKX) | 
|---|
| 2203 | lintwarn(_("`%s' is a gawk extension"), | 
|---|
| 2204 | tokentab[mid].operator); | 
|---|
| 2205 | if (tokentab[mid].flags & RESX) | 
|---|
| 2206 | lintwarn(_("`%s' is a Bell Labs extension"), | 
|---|
| 2207 | tokentab[mid].operator); | 
|---|
| 2208 | if (tokentab[mid].flags & NOT_POSIX) | 
|---|
| 2209 | lintwarn(_("POSIX does not allow `%s'"), | 
|---|
| 2210 | tokentab[mid].operator); | 
|---|
| 2211 | } | 
|---|
| 2212 | if (do_lint_old && (tokentab[mid].flags & NOT_OLD)) | 
|---|
| 2213 | warning(_("`%s' is not supported in old awk"), | 
|---|
| 2214 | tokentab[mid].operator); | 
|---|
| 2215 | if ((do_traditional && (tokentab[mid].flags & GAWKX)) | 
|---|
| 2216 | || (do_posix && (tokentab[mid].flags & NOT_POSIX))) | 
|---|
| 2217 | ; | 
|---|
| 2218 | else { | 
|---|
| 2219 | if (tokentab[mid].class == LEX_BUILTIN | 
|---|
| 2220 | || tokentab[mid].class == LEX_LENGTH) | 
|---|
| 2221 | yylval.lval = mid; | 
|---|
| 2222 | else | 
|---|
| 2223 | yylval.nodetypeval = tokentab[mid].value; | 
|---|
| 2224 | free(tokkey); | 
|---|
| 2225 | return lasttok = tokentab[mid].class; | 
|---|
| 2226 | } | 
|---|
| 2227 | } | 
|---|
| 2228 |  | 
|---|
| 2229 | yylval.sval = tokkey; | 
|---|
| 2230 | if (*lexptr == '(') | 
|---|
| 2231 | return lasttok = FUNC_CALL; | 
|---|
| 2232 | else { | 
|---|
| 2233 | static short goto_warned = FALSE; | 
|---|
| 2234 |  | 
|---|
| 2235 | #define SMART_ALECK     1 | 
|---|
| 2236 | if (SMART_ALECK && do_lint | 
|---|
| 2237 | && ! goto_warned && strcasecmp(tokkey, "goto") == 0) { | 
|---|
| 2238 | goto_warned = TRUE; | 
|---|
| 2239 | lintwarn(_("`goto' considered harmful!\n")); | 
|---|
| 2240 | } | 
|---|
| 2241 | return lasttok = NAME; | 
|---|
| 2242 | } | 
|---|
| 2243 | } | 
|---|
| 2244 |  | 
|---|
| 2245 | /* node_common --- common code for allocating a new node */ | 
|---|
| 2246 |  | 
|---|
| 2247 | static NODE * | 
|---|
| 2248 | node_common(NODETYPE op) | 
|---|
| 2249 | { | 
|---|
| 2250 | register NODE *r; | 
|---|
| 2251 |  | 
|---|
| 2252 | getnode(r); | 
|---|
| 2253 | r->type = op; | 
|---|
| 2254 | r->flags = MALLOC; | 
|---|
| 2255 | /* if lookahead is a NL, lineno is 1 too high */ | 
|---|
| 2256 | if (lexeme && lexeme >= lexptr_begin && *lexeme == '\n') | 
|---|
| 2257 | r->source_line = sourceline - 1; | 
|---|
| 2258 | else | 
|---|
| 2259 | r->source_line = sourceline; | 
|---|
| 2260 | r->source_file = source; | 
|---|
| 2261 | return r; | 
|---|
| 2262 | } | 
|---|
| 2263 |  | 
|---|
| 2264 | /* node --- allocates a node with defined lnode and rnode. */ | 
|---|
| 2265 |  | 
|---|
| 2266 | NODE * | 
|---|
| 2267 | node(NODE *left, NODETYPE op, NODE *right) | 
|---|
| 2268 | { | 
|---|
| 2269 | register NODE *r; | 
|---|
| 2270 |  | 
|---|
| 2271 | r = node_common(op); | 
|---|
| 2272 | r->lnode = left; | 
|---|
| 2273 | r->rnode = right; | 
|---|
| 2274 | return r; | 
|---|
| 2275 | } | 
|---|
| 2276 |  | 
|---|
| 2277 | /* snode ---    allocate a node with defined subnode and builtin for builtin | 
|---|
| 2278 | functions. Checks for arg. count and supplies defaults where | 
|---|
| 2279 | possible. */ | 
|---|
| 2280 |  | 
|---|
| 2281 | static NODE * | 
|---|
| 2282 | snode(NODE *subn, NODETYPE op, int idx) | 
|---|
| 2283 | { | 
|---|
| 2284 | register NODE *r; | 
|---|
| 2285 | register NODE *n; | 
|---|
| 2286 | int nexp = 0; | 
|---|
| 2287 | int args_allowed; | 
|---|
| 2288 |  | 
|---|
| 2289 | r = node_common(op); | 
|---|
| 2290 |  | 
|---|
| 2291 | /* traverse expression list to see how many args. given */ | 
|---|
| 2292 | for (n = subn; n != NULL; n = n->rnode) { | 
|---|
| 2293 | nexp++; | 
|---|
| 2294 | if (nexp > 5) | 
|---|
| 2295 | break; | 
|---|
| 2296 | } | 
|---|
| 2297 |  | 
|---|
| 2298 | /* check against how many args. are allowed for this builtin */ | 
|---|
| 2299 | args_allowed = tokentab[idx].flags & ARGS; | 
|---|
| 2300 | if (args_allowed && (args_allowed & A(nexp)) == 0) | 
|---|
| 2301 | fatal(_("%d is invalid as number of arguments for %s"), | 
|---|
| 2302 | nexp, tokentab[idx].operator); | 
|---|
| 2303 |  | 
|---|
| 2304 | r->builtin = tokentab[idx].ptr; | 
|---|
| 2305 |  | 
|---|
| 2306 | /* special case processing for a few builtins */ | 
|---|
| 2307 | if (nexp == 0 && r->builtin == do_length) { | 
|---|
| 2308 | subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), | 
|---|
| 2309 | Node_expression_list, | 
|---|
| 2310 | (NODE *) NULL); | 
|---|
| 2311 | } else if (r->builtin == do_match) { | 
|---|
| 2312 | static short warned = FALSE; | 
|---|
| 2313 |  | 
|---|
| 2314 | if (subn->rnode->lnode->type != Node_regex) | 
|---|
| 2315 | subn->rnode->lnode = mk_rexp(subn->rnode->lnode); | 
|---|
| 2316 |  | 
|---|
| 2317 | if (subn->rnode->rnode != NULL) {       /* 3rd argument there */ | 
|---|
| 2318 | if (do_lint && ! warned) { | 
|---|
| 2319 | warned = TRUE; | 
|---|
| 2320 | lintwarn(_("match: third argument is a gawk extension")); | 
|---|
| 2321 | } | 
|---|
| 2322 | if (do_traditional) | 
|---|
| 2323 | fatal(_("match: third argument is a gawk extension")); | 
|---|
| 2324 | } | 
|---|
| 2325 | } else if (r->builtin == do_sub || r->builtin == do_gsub) { | 
|---|
| 2326 | if (subn->lnode->type != Node_regex) | 
|---|
| 2327 | subn->lnode = mk_rexp(subn->lnode); | 
|---|
| 2328 | if (nexp == 2) | 
|---|
| 2329 | append_right(subn, node(node(make_number(0.0), | 
|---|
| 2330 | Node_field_spec, | 
|---|
| 2331 | (NODE *) NULL), | 
|---|
| 2332 | Node_expression_list, | 
|---|
| 2333 | (NODE *) NULL)); | 
|---|
| 2334 | else if (subn->rnode->rnode->lnode->type == Node_val) { | 
|---|
| 2335 | if (do_lint) | 
|---|
| 2336 | lintwarn(_("%s: string literal as last arg of substitute has no effect"), | 
|---|
| 2337 | (r->builtin == do_sub) ? "sub" : "gsub"); | 
|---|
| 2338 | } else if (! isassignable(subn->rnode->rnode->lnode)) { | 
|---|
| 2339 | yyerror(_("%s third parameter is not a changeable object"), | 
|---|
| 2340 | (r->builtin == do_sub) ? "sub" : "gsub"); | 
|---|
| 2341 | } | 
|---|
| 2342 | } else if (r->builtin == do_gensub) { | 
|---|
| 2343 | if (subn->lnode->type != Node_regex) | 
|---|
| 2344 | subn->lnode = mk_rexp(subn->lnode); | 
|---|
| 2345 | if (nexp == 3) | 
|---|
| 2346 | append_right(subn, node(node(make_number(0.0), | 
|---|
| 2347 | Node_field_spec, | 
|---|
| 2348 | (NODE *) NULL), | 
|---|
| 2349 | Node_expression_list, | 
|---|
| 2350 | (NODE *) NULL)); | 
|---|
| 2351 | } else if (r->builtin == do_split) { | 
|---|
| 2352 | if (nexp == 2) | 
|---|
| 2353 | append_right(subn, | 
|---|
| 2354 | node(FS_node, Node_expression_list, (NODE *) NULL)); | 
|---|
| 2355 | n = subn->rnode->rnode->lnode; | 
|---|
| 2356 | if (n->type != Node_regex) | 
|---|
| 2357 | subn->rnode->rnode->lnode = mk_rexp(n); | 
|---|
| 2358 | if (nexp == 2) | 
|---|
| 2359 | subn->rnode->rnode->lnode->re_flags |= FS_DFLT; | 
|---|
| 2360 | } else if (r->builtin == do_close) { | 
|---|
| 2361 | static short warned = FALSE; | 
|---|
| 2362 |  | 
|---|
| 2363 | if ( nexp == 2) { | 
|---|
| 2364 | if (do_lint && nexp == 2 && ! warned) { | 
|---|
| 2365 | warned = TRUE; | 
|---|
| 2366 | lintwarn(_("close: second argument is a gawk extension")); | 
|---|
| 2367 | } | 
|---|
| 2368 | if (do_traditional) | 
|---|
| 2369 | fatal(_("close: second argument is a gawk extension")); | 
|---|
| 2370 | } | 
|---|
| 2371 | } else if (do_intl                                      /* --gen-po */ | 
|---|
| 2372 | && r->builtin == do_dcgettext           /* dcgettext(...) */ | 
|---|
| 2373 | && subn->lnode->type == Node_val        /* 1st arg is constant */ | 
|---|
| 2374 | && (subn->lnode->flags & STRCUR) != 0) {        /* it's a string constant */ | 
|---|
| 2375 | /* ala xgettext, dcgettext("some string" ...) dumps the string */ | 
|---|
| 2376 | NODE *str = subn->lnode; | 
|---|
| 2377 |  | 
|---|
| 2378 | if ((str->flags & INTLSTR) != 0) | 
|---|
| 2379 | warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore")); | 
|---|
| 2380 | /* don't dump it, the lexer already did */ | 
|---|
| 2381 | else | 
|---|
| 2382 | dumpintlstr(str->stptr, str->stlen); | 
|---|
| 2383 | } else if (do_intl                                      /* --gen-po */ | 
|---|
| 2384 | && r->builtin == do_dcngettext          /* dcngettext(...) */ | 
|---|
| 2385 | && subn->lnode->type == Node_val        /* 1st arg is constant */ | 
|---|
| 2386 | && (subn->lnode->flags & STRCUR) != 0   /* it's a string constant */ | 
|---|
| 2387 | && subn->rnode->lnode->type == Node_val /* 2nd arg is constant too */ | 
|---|
| 2388 | && (subn->rnode->lnode->flags & STRCUR) != 0) { /* it's a string constant */ | 
|---|
| 2389 | /* ala xgettext, dcngettext("some string", "some plural" ...) dumps the string */ | 
|---|
| 2390 | NODE *str1 = subn->lnode; | 
|---|
| 2391 | NODE *str2 = subn->rnode->lnode; | 
|---|
| 2392 |  | 
|---|
| 2393 | if (((str1->flags | str2->flags) & INTLSTR) != 0) | 
|---|
| 2394 | warning(_("use of dcngettext(_\"...\") is incorrect: remove leading underscore")); | 
|---|
| 2395 | else | 
|---|
| 2396 | dumpintlstr2(str1->stptr, str1->stlen, str2->stptr, str2->stlen); | 
|---|
| 2397 | } | 
|---|
| 2398 |  | 
|---|
| 2399 | r->subnode = subn; | 
|---|
| 2400 | if (r->builtin == do_sprintf) { | 
|---|
| 2401 | count_args(r); | 
|---|
| 2402 | r->lnode->printf_count = r->printf_count; /* hack */ | 
|---|
| 2403 | } | 
|---|
| 2404 | return r; | 
|---|
| 2405 | } | 
|---|
| 2406 |  | 
|---|
| 2407 | /* make_for_loop --- build a for loop */ | 
|---|
| 2408 |  | 
|---|
| 2409 | static NODE * | 
|---|
| 2410 | make_for_loop(NODE *init, NODE *cond, NODE *incr) | 
|---|
| 2411 | { | 
|---|
| 2412 | register FOR_LOOP_HEADER *r; | 
|---|
| 2413 | NODE *n; | 
|---|
| 2414 |  | 
|---|
| 2415 | emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); | 
|---|
| 2416 | getnode(n); | 
|---|
| 2417 | n->type = Node_illegal; | 
|---|
| 2418 | r->init = init; | 
|---|
| 2419 | r->cond = cond; | 
|---|
| 2420 | r->incr = incr; | 
|---|
| 2421 | n->sub.nodep.r.hd = r; | 
|---|
| 2422 | return n; | 
|---|
| 2423 | } | 
|---|
| 2424 |  | 
|---|
| 2425 | /* dup_parms --- return TRUE if there are duplicate parameters */ | 
|---|
| 2426 |  | 
|---|
| 2427 | static int | 
|---|
| 2428 | dup_parms(NODE *func) | 
|---|
| 2429 | { | 
|---|
| 2430 | register NODE *np; | 
|---|
| 2431 | const char *fname, **names; | 
|---|
| 2432 | int count, i, j, dups; | 
|---|
| 2433 | NODE *params; | 
|---|
| 2434 |  | 
|---|
| 2435 | if (func == NULL)       /* error earlier */ | 
|---|
| 2436 | return TRUE; | 
|---|
| 2437 |  | 
|---|
| 2438 | fname = func->param; | 
|---|
| 2439 | count = func->param_cnt; | 
|---|
| 2440 | params = func->rnode; | 
|---|
| 2441 |  | 
|---|
| 2442 | if (count == 0)         /* no args, no problem */ | 
|---|
| 2443 | return FALSE; | 
|---|
| 2444 |  | 
|---|
| 2445 | if (params == NULL)     /* error earlier */ | 
|---|
| 2446 | return TRUE; | 
|---|
| 2447 |  | 
|---|
| 2448 | emalloc(names, const char **, count * sizeof(char *), "dup_parms"); | 
|---|
| 2449 |  | 
|---|
| 2450 | i = 0; | 
|---|
| 2451 | for (np = params; np != NULL; np = np->rnode) { | 
|---|
| 2452 | if (np->param == NULL) { /* error earlier, give up, go home */ | 
|---|
| 2453 | free(names); | 
|---|
| 2454 | return TRUE; | 
|---|
| 2455 | } | 
|---|
| 2456 | names[i++] = np->param; | 
|---|
| 2457 | } | 
|---|
| 2458 |  | 
|---|
| 2459 | dups = 0; | 
|---|
| 2460 | for (i = 1; i < count; i++) { | 
|---|
| 2461 | for (j = 0; j < i; j++) { | 
|---|
| 2462 | if (strcmp(names[i], names[j]) == 0) { | 
|---|
| 2463 | dups++; | 
|---|
| 2464 | error( | 
|---|
| 2465 | _("function `%s': parameter #%d, `%s', duplicates parameter #%d"), | 
|---|
| 2466 | fname, i+1, names[j], j+1); | 
|---|
| 2467 | } | 
|---|
| 2468 | } | 
|---|
| 2469 | } | 
|---|
| 2470 |  | 
|---|
| 2471 | free(names); | 
|---|
| 2472 | return (dups > 0 ? TRUE : FALSE); | 
|---|
| 2473 | } | 
|---|
| 2474 |  | 
|---|
| 2475 | /* parms_shadow --- check if parameters shadow globals */ | 
|---|
| 2476 |  | 
|---|
| 2477 | static int | 
|---|
| 2478 | parms_shadow(const char *fname, NODE *func) | 
|---|
| 2479 | { | 
|---|
| 2480 | int count, i; | 
|---|
| 2481 | int ret = FALSE; | 
|---|
| 2482 |  | 
|---|
| 2483 | if (fname == NULL || func == NULL)      /* error earlier */ | 
|---|
| 2484 | return FALSE; | 
|---|
| 2485 |  | 
|---|
| 2486 | count = func->lnode->param_cnt; | 
|---|
| 2487 |  | 
|---|
| 2488 | if (count == 0)         /* no args, no problem */ | 
|---|
| 2489 | return FALSE; | 
|---|
| 2490 |  | 
|---|
| 2491 | /* | 
|---|
| 2492 | * Use warning() and not lintwarn() so that can warn | 
|---|
| 2493 | * about all shadowed parameters. | 
|---|
| 2494 | */ | 
|---|
| 2495 | for (i = 0; i < count; i++) { | 
|---|
| 2496 | if (lookup(func->parmlist[i]) != NULL) { | 
|---|
| 2497 | warning( | 
|---|
| 2498 | _("function `%s': parameter `%s' shadows global variable"), | 
|---|
| 2499 | fname, func->parmlist[i]); | 
|---|
| 2500 | ret = TRUE; | 
|---|
| 2501 | } | 
|---|
| 2502 | } | 
|---|
| 2503 |  | 
|---|
| 2504 | return ret; | 
|---|
| 2505 | } | 
|---|
| 2506 |  | 
|---|
| 2507 | /* | 
|---|
| 2508 | * install: | 
|---|
| 2509 | * Install a name in the symbol table, even if it is already there. | 
|---|
| 2510 | * Caller must check against redefinition if that is desired. | 
|---|
| 2511 | */ | 
|---|
| 2512 |  | 
|---|
| 2513 | NODE * | 
|---|
| 2514 | install(char *name, NODE *value) | 
|---|
| 2515 | { | 
|---|
| 2516 | register NODE *hp; | 
|---|
| 2517 | register size_t len; | 
|---|
| 2518 | register int bucket; | 
|---|
| 2519 |  | 
|---|
| 2520 | var_count++; | 
|---|
| 2521 | len = strlen(name); | 
|---|
| 2522 | bucket = hash(name, len, (unsigned long) HASHSIZE); | 
|---|
| 2523 | getnode(hp); | 
|---|
| 2524 | hp->type = Node_hashnode; | 
|---|
| 2525 | hp->hnext = variables[bucket]; | 
|---|
| 2526 | variables[bucket] = hp; | 
|---|
| 2527 | hp->hlength = len; | 
|---|
| 2528 | hp->hvalue = value; | 
|---|
| 2529 | hp->hname = name; | 
|---|
| 2530 | hp->hvalue->vname = name; | 
|---|
| 2531 | return hp->hvalue; | 
|---|
| 2532 | } | 
|---|
| 2533 |  | 
|---|
| 2534 | /* lookup --- find the most recent hash node for name installed by install */ | 
|---|
| 2535 |  | 
|---|
| 2536 | NODE * | 
|---|
| 2537 | lookup(const char *name) | 
|---|
| 2538 | { | 
|---|
| 2539 | register NODE *bucket; | 
|---|
| 2540 | register size_t len; | 
|---|
| 2541 |  | 
|---|
| 2542 | len = strlen(name); | 
|---|
| 2543 | for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)]; | 
|---|
| 2544 | bucket != NULL; bucket = bucket->hnext) | 
|---|
| 2545 | if (bucket->hlength == len && STREQN(bucket->hname, name, len)) | 
|---|
| 2546 | return bucket->hvalue; | 
|---|
| 2547 |  | 
|---|
| 2548 | return NULL; | 
|---|
| 2549 | } | 
|---|
| 2550 |  | 
|---|
| 2551 | /* var_comp --- compare two variable names */ | 
|---|
| 2552 |  | 
|---|
| 2553 | static int | 
|---|
| 2554 | var_comp(const void *v1, const void *v2) | 
|---|
| 2555 | { | 
|---|
| 2556 | const NODE *const *npp1, *const *npp2; | 
|---|
| 2557 | const NODE *n1, *n2; | 
|---|
| 2558 | int minlen; | 
|---|
| 2559 |  | 
|---|
| 2560 | npp1 = (const NODE *const *) v1; | 
|---|
| 2561 | npp2 = (const NODE *const *) v2; | 
|---|
| 2562 | n1 = *npp1; | 
|---|
| 2563 | n2 = *npp2; | 
|---|
| 2564 |  | 
|---|
| 2565 | if (n1->hlength > n2->hlength) | 
|---|
| 2566 | minlen = n1->hlength; | 
|---|
| 2567 | else | 
|---|
| 2568 | minlen = n2->hlength; | 
|---|
| 2569 |  | 
|---|
| 2570 | return strncmp(n1->hname, n2->hname, minlen); | 
|---|
| 2571 | } | 
|---|
| 2572 |  | 
|---|
| 2573 | /* valinfo --- dump var info */ | 
|---|
| 2574 |  | 
|---|
| 2575 | static void | 
|---|
| 2576 | valinfo(NODE *n, FILE *fp) | 
|---|
| 2577 | { | 
|---|
| 2578 | if (n->flags & STRING) { | 
|---|
| 2579 | fprintf(fp, "string ("); | 
|---|
| 2580 | pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE); | 
|---|
| 2581 | fprintf(fp, ")\n"); | 
|---|
| 2582 | } else if (n->flags & NUMBER) | 
|---|
| 2583 | fprintf(fp, "number (%.17g)\n", n->numbr); | 
|---|
| 2584 | else if (n->flags & STRCUR) { | 
|---|
| 2585 | fprintf(fp, "string value ("); | 
|---|
| 2586 | pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE); | 
|---|
| 2587 | fprintf(fp, ")\n"); | 
|---|
| 2588 | } else if (n->flags & NUMCUR) | 
|---|
| 2589 | fprintf(fp, "number value (%.17g)\n", n->numbr); | 
|---|
| 2590 | else | 
|---|
| 2591 | fprintf(fp, "?? flags %s\n", flags2str(n->flags)); | 
|---|
| 2592 | } | 
|---|
| 2593 |  | 
|---|
| 2594 |  | 
|---|
| 2595 | /* dump_vars --- dump the symbol table */ | 
|---|
| 2596 |  | 
|---|
| 2597 | void | 
|---|
| 2598 | dump_vars(const char *fname) | 
|---|
| 2599 | { | 
|---|
| 2600 | int i, j; | 
|---|
| 2601 | NODE **table; | 
|---|
| 2602 | NODE *p; | 
|---|
| 2603 | FILE *fp; | 
|---|
| 2604 |  | 
|---|
| 2605 | emalloc(table, NODE **, var_count * sizeof(NODE *), "dump_vars"); | 
|---|
| 2606 |  | 
|---|
| 2607 | if (fname == NULL) | 
|---|
| 2608 | fp = stderr; | 
|---|
| 2609 | else if ((fp = fopen(fname, "w")) == NULL) { | 
|---|
| 2610 | warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno)); | 
|---|
| 2611 | warning(_("sending profile to standard error")); | 
|---|
| 2612 | fp = stderr; | 
|---|
| 2613 | } | 
|---|
| 2614 |  | 
|---|
| 2615 | for (i = j = 0; i < HASHSIZE; i++) | 
|---|
| 2616 | for (p = variables[i]; p != NULL; p = p->hnext) | 
|---|
| 2617 | table[j++] = p; | 
|---|
| 2618 |  | 
|---|
| 2619 | assert(j == var_count); | 
|---|
| 2620 |  | 
|---|
| 2621 | /* Shazzam! */ | 
|---|
| 2622 | qsort(table, j, sizeof(NODE *), var_comp); | 
|---|
| 2623 |  | 
|---|
| 2624 | for (i = 0; i < j; i++) { | 
|---|
| 2625 | p = table[i]; | 
|---|
| 2626 | if (p->hvalue->type == Node_func) | 
|---|
| 2627 | continue; | 
|---|
| 2628 | fprintf(fp, "%.*s: ", (int) p->hlength, p->hname); | 
|---|
| 2629 | if (p->hvalue->type == Node_var_array) | 
|---|
| 2630 | fprintf(fp, "array, %ld elements\n", p->hvalue->table_size); | 
|---|
| 2631 | else if (p->hvalue->type == Node_var_new) | 
|---|
| 2632 | fprintf(fp, "unused variable\n"); | 
|---|
| 2633 | else if (p->hvalue->type == Node_var) | 
|---|
| 2634 | valinfo(p->hvalue->var_value, fp); | 
|---|
| 2635 | else { | 
|---|
| 2636 | NODE **lhs = get_lhs(p->hvalue, NULL, FALSE); | 
|---|
| 2637 |  | 
|---|
| 2638 | valinfo(*lhs, fp); | 
|---|
| 2639 | } | 
|---|
| 2640 | } | 
|---|
| 2641 |  | 
|---|
| 2642 | if (fp != stderr && fclose(fp) != 0) | 
|---|
| 2643 | warning(_("%s: close failed (%s)"), fname, strerror(errno)); | 
|---|
| 2644 |  | 
|---|
| 2645 | free(table); | 
|---|
| 2646 | } | 
|---|
| 2647 |  | 
|---|
| 2648 | /* release_all_vars --- free all variable memory */ | 
|---|
| 2649 |  | 
|---|
| 2650 | void | 
|---|
| 2651 | release_all_vars() | 
|---|
| 2652 | { | 
|---|
| 2653 | int i; | 
|---|
| 2654 | NODE *p, *next; | 
|---|
| 2655 |  | 
|---|
| 2656 | for (i = 0; i < HASHSIZE; i++) | 
|---|
| 2657 | for (p = variables[i]; p != NULL; p = next) { | 
|---|
| 2658 | next = p->hnext; | 
|---|
| 2659 |  | 
|---|
| 2660 | if (p->hvalue->type == Node_func) | 
|---|
| 2661 | continue; | 
|---|
| 2662 | else if (p->hvalue->type == Node_var_array) | 
|---|
| 2663 | assoc_clear(p->hvalue); | 
|---|
| 2664 | else if (p->hvalue->type != Node_var_new) { | 
|---|
| 2665 | NODE **lhs = get_lhs(p->hvalue, NULL, FALSE); | 
|---|
| 2666 |  | 
|---|
| 2667 | unref(*lhs); | 
|---|
| 2668 | } | 
|---|
| 2669 | unref(p); | 
|---|
| 2670 | } | 
|---|
| 2671 | } | 
|---|
| 2672 |  | 
|---|
| 2673 | /* finfo --- for use in comparison and sorting of function names */ | 
|---|
| 2674 |  | 
|---|
| 2675 | struct finfo { | 
|---|
| 2676 | const char *name; | 
|---|
| 2677 | size_t nlen; | 
|---|
| 2678 | NODE *func; | 
|---|
| 2679 | }; | 
|---|
| 2680 |  | 
|---|
| 2681 | /* fcompare --- comparison function for qsort */ | 
|---|
| 2682 |  | 
|---|
| 2683 | static int | 
|---|
| 2684 | fcompare(const void *p1, const void *p2) | 
|---|
| 2685 | { | 
|---|
| 2686 | const struct finfo *f1, *f2; | 
|---|
| 2687 | int minlen; | 
|---|
| 2688 |  | 
|---|
| 2689 | f1 = (const struct finfo *) p1; | 
|---|
| 2690 | f2 = (const struct finfo *) p2; | 
|---|
| 2691 |  | 
|---|
| 2692 | if (f1->nlen > f2->nlen) | 
|---|
| 2693 | minlen = f2->nlen; | 
|---|
| 2694 | else | 
|---|
| 2695 | minlen = f1->nlen; | 
|---|
| 2696 |  | 
|---|
| 2697 | return strncmp(f1->name, f2->name, minlen); | 
|---|
| 2698 | } | 
|---|
| 2699 |  | 
|---|
| 2700 | /* dump_funcs --- print all functions */ | 
|---|
| 2701 |  | 
|---|
| 2702 | void | 
|---|
| 2703 | dump_funcs() | 
|---|
| 2704 | { | 
|---|
| 2705 | int i, j; | 
|---|
| 2706 | NODE *p; | 
|---|
| 2707 | struct finfo *tab = NULL; | 
|---|
| 2708 |  | 
|---|
| 2709 | /* | 
|---|
| 2710 | * Walk through symbol table countng functions. | 
|---|
| 2711 | * Could be more than func_count if there are | 
|---|
| 2712 | * extension functions. | 
|---|
| 2713 | */ | 
|---|
| 2714 | for (i = j = 0; i < HASHSIZE; i++) { | 
|---|
| 2715 | for (p = variables[i]; p != NULL; p = p->hnext) { | 
|---|
| 2716 | if (p->hvalue->type == Node_func) { | 
|---|
| 2717 | j++; | 
|---|
| 2718 | } | 
|---|
| 2719 | } | 
|---|
| 2720 | } | 
|---|
| 2721 |  | 
|---|
| 2722 | if (j == 0) | 
|---|
| 2723 | return; | 
|---|
| 2724 |  | 
|---|
| 2725 | emalloc(tab, struct finfo *, j * sizeof(struct finfo), "dump_funcs"); | 
|---|
| 2726 |  | 
|---|
| 2727 | /* now walk again, copying info */ | 
|---|
| 2728 | for (i = j = 0; i < HASHSIZE; i++) { | 
|---|
| 2729 | for (p = variables[i]; p != NULL; p = p->hnext) { | 
|---|
| 2730 | if (p->hvalue->type == Node_func) { | 
|---|
| 2731 | tab[j].name = p->hname; | 
|---|
| 2732 | tab[j].nlen = p->hlength; | 
|---|
| 2733 | tab[j].func = p->hvalue; | 
|---|
| 2734 | j++; | 
|---|
| 2735 | } | 
|---|
| 2736 | } | 
|---|
| 2737 | } | 
|---|
| 2738 |  | 
|---|
| 2739 |  | 
|---|
| 2740 | /* Shazzam! */ | 
|---|
| 2741 | qsort(tab, j, sizeof(struct finfo), fcompare); | 
|---|
| 2742 |  | 
|---|
| 2743 | for (i = 0; i < j; i++) | 
|---|
| 2744 | pp_func(tab[i].name, tab[i].nlen, tab[i].func); | 
|---|
| 2745 |  | 
|---|
| 2746 | free(tab); | 
|---|
| 2747 | } | 
|---|
| 2748 |  | 
|---|
| 2749 | /* shadow_funcs --- check all functions for parameters that shadow globals */ | 
|---|
| 2750 |  | 
|---|
| 2751 | void | 
|---|
| 2752 | shadow_funcs() | 
|---|
| 2753 | { | 
|---|
| 2754 | int i, j; | 
|---|
| 2755 | NODE *p; | 
|---|
| 2756 | struct finfo *tab; | 
|---|
| 2757 | static int calls = 0; | 
|---|
| 2758 | int shadow = FALSE; | 
|---|
| 2759 |  | 
|---|
| 2760 | if (func_count == 0) | 
|---|
| 2761 | return; | 
|---|
| 2762 |  | 
|---|
| 2763 | if (calls++ != 0) | 
|---|
| 2764 | fatal(_("shadow_funcs() called twice!")); | 
|---|
| 2765 |  | 
|---|
| 2766 | emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "shadow_funcs"); | 
|---|
| 2767 |  | 
|---|
| 2768 | for (i = j = 0; i < HASHSIZE; i++) { | 
|---|
| 2769 | for (p = variables[i]; p != NULL; p = p->hnext) { | 
|---|
| 2770 | if (p->hvalue->type == Node_func) { | 
|---|
| 2771 | tab[j].name = p->hname; | 
|---|
| 2772 | tab[j].nlen = p->hlength; | 
|---|
| 2773 | tab[j].func = p->hvalue; | 
|---|
| 2774 | j++; | 
|---|
| 2775 | } | 
|---|
| 2776 | } | 
|---|
| 2777 | } | 
|---|
| 2778 |  | 
|---|
| 2779 | assert(j == func_count); | 
|---|
| 2780 |  | 
|---|
| 2781 | /* Shazzam! */ | 
|---|
| 2782 | qsort(tab, func_count, sizeof(struct finfo), fcompare); | 
|---|
| 2783 |  | 
|---|
| 2784 | for (i = 0; i < j; i++) | 
|---|
| 2785 | shadow |= parms_shadow(tab[i].name, tab[i].func); | 
|---|
| 2786 |  | 
|---|
| 2787 | free(tab); | 
|---|
| 2788 |  | 
|---|
| 2789 | /* End with fatal if the user requested it.  */ | 
|---|
| 2790 | if (shadow && lintfunc != warning) | 
|---|
| 2791 | lintwarn(_("there were shadowed variables.")); | 
|---|
| 2792 | } | 
|---|
| 2793 |  | 
|---|
| 2794 | /* | 
|---|
| 2795 | * append_right: | 
|---|
| 2796 | * Add new to the rightmost branch of LIST.  This uses n^2 time, so we make | 
|---|
| 2797 | * a simple attempt at optimizing it. | 
|---|
| 2798 | */ | 
|---|
| 2799 |  | 
|---|
| 2800 | static NODE * | 
|---|
| 2801 | append_right(NODE *list, NODE *new) | 
|---|
| 2802 | { | 
|---|
| 2803 | register NODE *oldlist; | 
|---|
| 2804 | static NODE *savefront = NULL, *savetail = NULL; | 
|---|
| 2805 |  | 
|---|
| 2806 | if (list == NULL || new == NULL) | 
|---|
| 2807 | return list; | 
|---|
| 2808 |  | 
|---|
| 2809 | oldlist = list; | 
|---|
| 2810 | if (savefront == oldlist) | 
|---|
| 2811 | list = savetail; /* Be careful: maybe list->rnode != NULL */ | 
|---|
| 2812 | else | 
|---|
| 2813 | savefront = oldlist; | 
|---|
| 2814 |  | 
|---|
| 2815 | while (list->rnode != NULL) | 
|---|
| 2816 | list = list->rnode; | 
|---|
| 2817 | savetail = list->rnode = new; | 
|---|
| 2818 | return oldlist; | 
|---|
| 2819 | } | 
|---|
| 2820 |  | 
|---|
| 2821 | /* | 
|---|
| 2822 | * append_pattern: | 
|---|
| 2823 | * A wrapper around append_right, used for rule lists. | 
|---|
| 2824 | */ | 
|---|
| 2825 | static inline NODE * | 
|---|
| 2826 | append_pattern(NODE **list, NODE *patt) | 
|---|
| 2827 | { | 
|---|
| 2828 | NODE *n = node(patt, Node_rule_node, (NODE *) NULL); | 
|---|
| 2829 |  | 
|---|
| 2830 | if (*list == NULL) | 
|---|
| 2831 | *list = n; | 
|---|
| 2832 | else { | 
|---|
| 2833 | NODE *n1 = node(n, Node_rule_list, (NODE *) NULL); | 
|---|
| 2834 | if ((*list)->type != Node_rule_list) | 
|---|
| 2835 | *list = node(*list, Node_rule_list, n1); | 
|---|
| 2836 | else | 
|---|
| 2837 | (void) append_right(*list, n1); | 
|---|
| 2838 | } | 
|---|
| 2839 | return n; | 
|---|
| 2840 | } | 
|---|
| 2841 |  | 
|---|
| 2842 | /* | 
|---|
| 2843 | * func_install: | 
|---|
| 2844 | * check if name is already installed;  if so, it had better have Null value, | 
|---|
| 2845 | * in which case def is added as the value. Otherwise, install name with def | 
|---|
| 2846 | * as value. | 
|---|
| 2847 | * | 
|---|
| 2848 | * Extra work, build up and save a list of the parameter names in a table | 
|---|
| 2849 | * and hang it off params->parmlist. This is used to set the `vname' field | 
|---|
| 2850 | * of each function parameter during a function call. See eval.c. | 
|---|
| 2851 | */ | 
|---|
| 2852 |  | 
|---|
| 2853 | static void | 
|---|
| 2854 | func_install(NODE *params, NODE *def) | 
|---|
| 2855 | { | 
|---|
| 2856 | NODE *r, *n, *thisfunc; | 
|---|
| 2857 | char **pnames, *names, *sp; | 
|---|
| 2858 | size_t pcount = 0, space = 0; | 
|---|
| 2859 | int i; | 
|---|
| 2860 |  | 
|---|
| 2861 | /* check for function foo(foo) { ... }.  bleah. */ | 
|---|
| 2862 | for (n = params->rnode; n != NULL; n = n->rnode) { | 
|---|
| 2863 | if (strcmp(n->param, params->param) == 0) | 
|---|
| 2864 | fatal(_("function `%s': can't use function name as parameter name"), | 
|---|
| 2865 | params->param); | 
|---|
| 2866 | } | 
|---|
| 2867 |  | 
|---|
| 2868 | thisfunc = NULL;        /* turn off warnings */ | 
|---|
| 2869 |  | 
|---|
| 2870 | /* symbol table managment */ | 
|---|
| 2871 | pop_var(params, FALSE); | 
|---|
| 2872 | r = lookup(params->param); | 
|---|
| 2873 | if (r != NULL) { | 
|---|
| 2874 | fatal(_("function name `%s' previously defined"), params->param); | 
|---|
| 2875 | } else if (params->param == builtin_func)       /* not a valid function name */ | 
|---|
| 2876 | goto remove_params; | 
|---|
| 2877 |  | 
|---|
| 2878 | /* install the function */ | 
|---|
| 2879 | thisfunc = node(params, Node_func, def); | 
|---|
| 2880 | (void) install(params->param, thisfunc); | 
|---|
| 2881 |  | 
|---|
| 2882 | /* figure out amount of space to allocate for variable names */ | 
|---|
| 2883 | for (n = params->rnode; n != NULL; n = n->rnode) { | 
|---|
| 2884 | pcount++; | 
|---|
| 2885 | space += strlen(n->param) + 1; | 
|---|
| 2886 | } | 
|---|
| 2887 |  | 
|---|
| 2888 | /* allocate it and fill it in */ | 
|---|
| 2889 | if (pcount != 0) { | 
|---|
| 2890 | emalloc(names, char *, space, "func_install"); | 
|---|
| 2891 | emalloc(pnames, char **, pcount * sizeof(char *), "func_install"); | 
|---|
| 2892 | sp = names; | 
|---|
| 2893 | for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) { | 
|---|
| 2894 | pnames[i] = sp; | 
|---|
| 2895 | strcpy(sp, n->param); | 
|---|
| 2896 | sp += strlen(n->param) + 1; | 
|---|
| 2897 | } | 
|---|
| 2898 | thisfunc->parmlist = pnames; | 
|---|
| 2899 | } else { | 
|---|
| 2900 | thisfunc->parmlist = NULL; | 
|---|
| 2901 | } | 
|---|
| 2902 |  | 
|---|
| 2903 | /* update lint table info */ | 
|---|
| 2904 | func_use(params->param, FUNC_DEFINE); | 
|---|
| 2905 |  | 
|---|
| 2906 | func_count++;   /* used by profiling / pretty printer */ | 
|---|
| 2907 |  | 
|---|
| 2908 | remove_params: | 
|---|
| 2909 | /* remove params from symbol table */ | 
|---|
| 2910 | pop_params(params->rnode); | 
|---|
| 2911 | } | 
|---|
| 2912 |  | 
|---|
| 2913 | /* pop_var --- remove a variable from the symbol table */ | 
|---|
| 2914 |  | 
|---|
| 2915 | static void | 
|---|
| 2916 | pop_var(NODE *np, int freeit) | 
|---|
| 2917 | { | 
|---|
| 2918 | register NODE *bucket, **save; | 
|---|
| 2919 | register size_t len; | 
|---|
| 2920 | char *name; | 
|---|
| 2921 |  | 
|---|
| 2922 | name = np->param; | 
|---|
| 2923 | len = strlen(name); | 
|---|
| 2924 | save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]); | 
|---|
| 2925 | for (bucket = *save; bucket != NULL; bucket = bucket->hnext) { | 
|---|
| 2926 | if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { | 
|---|
| 2927 | var_count--; | 
|---|
| 2928 | *save = bucket->hnext; | 
|---|
| 2929 | freenode(bucket); | 
|---|
| 2930 | if (freeit) | 
|---|
| 2931 | free(np->param); | 
|---|
| 2932 | return; | 
|---|
| 2933 | } | 
|---|
| 2934 | save = &(bucket->hnext); | 
|---|
| 2935 | } | 
|---|
| 2936 | } | 
|---|
| 2937 |  | 
|---|
| 2938 | /* pop_params --- remove list of function parameters from symbol table */ | 
|---|
| 2939 |  | 
|---|
| 2940 | /* | 
|---|
| 2941 | * pop parameters out of the symbol table. do this in reverse order to | 
|---|
| 2942 | * avoid reading freed memory if there were duplicated parameters. | 
|---|
| 2943 | */ | 
|---|
| 2944 | static void | 
|---|
| 2945 | pop_params(NODE *params) | 
|---|
| 2946 | { | 
|---|
| 2947 | if (params == NULL) | 
|---|
| 2948 | return; | 
|---|
| 2949 | pop_params(params->rnode); | 
|---|
| 2950 | pop_var(params, TRUE); | 
|---|
| 2951 | } | 
|---|
| 2952 |  | 
|---|
| 2953 | /* make_param --- make NAME into a function parameter */ | 
|---|
| 2954 |  | 
|---|
| 2955 | static NODE * | 
|---|
| 2956 | make_param(char *name) | 
|---|
| 2957 | { | 
|---|
| 2958 | NODE *r; | 
|---|
| 2959 |  | 
|---|
| 2960 | getnode(r); | 
|---|
| 2961 | r->type = Node_param_list; | 
|---|
| 2962 | r->rnode = NULL; | 
|---|
| 2963 | r->param = name; | 
|---|
| 2964 | r->param_cnt = param_counter++; | 
|---|
| 2965 | return (install(name, r)); | 
|---|
| 2966 | } | 
|---|
| 2967 |  | 
|---|
| 2968 | static struct fdesc { | 
|---|
| 2969 | char *name; | 
|---|
| 2970 | short used; | 
|---|
| 2971 | short defined; | 
|---|
| 2972 | struct fdesc *next; | 
|---|
| 2973 | } *ftable[HASHSIZE]; | 
|---|
| 2974 |  | 
|---|
| 2975 | /* func_use --- track uses and definitions of functions */ | 
|---|
| 2976 |  | 
|---|
| 2977 | static void | 
|---|
| 2978 | func_use(const char *name, enum defref how) | 
|---|
| 2979 | { | 
|---|
| 2980 | struct fdesc *fp; | 
|---|
| 2981 | int len; | 
|---|
| 2982 | int ind; | 
|---|
| 2983 |  | 
|---|
| 2984 | len = strlen(name); | 
|---|
| 2985 | ind = hash(name, len, HASHSIZE); | 
|---|
| 2986 |  | 
|---|
| 2987 | for (fp = ftable[ind]; fp != NULL; fp = fp->next) { | 
|---|
| 2988 | if (strcmp(fp->name, name) == 0) { | 
|---|
| 2989 | if (how == FUNC_DEFINE) | 
|---|
| 2990 | fp->defined++; | 
|---|
| 2991 | else | 
|---|
| 2992 | fp->used++; | 
|---|
| 2993 | return; | 
|---|
| 2994 | } | 
|---|
| 2995 | } | 
|---|
| 2996 |  | 
|---|
| 2997 | /* not in the table, fall through to allocate a new one */ | 
|---|
| 2998 |  | 
|---|
| 2999 | emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use"); | 
|---|
| 3000 | memset(fp, '\0', sizeof(struct fdesc)); | 
|---|
| 3001 | emalloc(fp->name, char *, len + 1, "func_use"); | 
|---|
| 3002 | strcpy(fp->name, name); | 
|---|
| 3003 | if (how == FUNC_DEFINE) | 
|---|
| 3004 | fp->defined++; | 
|---|
| 3005 | else | 
|---|
| 3006 | fp->used++; | 
|---|
| 3007 | fp->next = ftable[ind]; | 
|---|
| 3008 | ftable[ind] = fp; | 
|---|
| 3009 | } | 
|---|
| 3010 |  | 
|---|
| 3011 | /* check_funcs --- verify functions that are called but not defined */ | 
|---|
| 3012 |  | 
|---|
| 3013 | static void | 
|---|
| 3014 | check_funcs() | 
|---|
| 3015 | { | 
|---|
| 3016 | struct fdesc *fp, *next; | 
|---|
| 3017 | int i; | 
|---|
| 3018 |  | 
|---|
| 3019 | for (i = 0; i < HASHSIZE; i++) { | 
|---|
| 3020 | for (fp = ftable[i]; fp != NULL; fp = fp->next) { | 
|---|
| 3021 | #ifdef REALLYMEAN | 
|---|
| 3022 | /* making this the default breaks old code. sigh. */ | 
|---|
| 3023 | if (fp->defined == 0) { | 
|---|
| 3024 | error( | 
|---|
| 3025 | _("function `%s' called but never defined"), fp->name); | 
|---|
| 3026 | errcount++; | 
|---|
| 3027 | } | 
|---|
| 3028 | #else | 
|---|
| 3029 | if (do_lint && fp->defined == 0) | 
|---|
| 3030 | lintwarn( | 
|---|
| 3031 | _("function `%s' called but never defined"), fp->name); | 
|---|
| 3032 | #endif | 
|---|
| 3033 | if (do_lint && fp->used == 0) { | 
|---|
| 3034 | lintwarn(_("function `%s' defined but never called"), | 
|---|
| 3035 | fp->name); | 
|---|
| 3036 | } | 
|---|
| 3037 | } | 
|---|
| 3038 | } | 
|---|
| 3039 |  | 
|---|
| 3040 | /* now let's free all the memory */ | 
|---|
| 3041 | for (i = 0; i < HASHSIZE; i++) { | 
|---|
| 3042 | for (fp = ftable[i]; fp != NULL; fp = next) { | 
|---|
| 3043 | next = fp->next; | 
|---|
| 3044 | free(fp->name); | 
|---|
| 3045 | free(fp); | 
|---|
| 3046 | } | 
|---|
| 3047 | } | 
|---|
| 3048 | } | 
|---|
| 3049 |  | 
|---|
| 3050 | /* param_sanity --- look for parameters that are regexp constants */ | 
|---|
| 3051 |  | 
|---|
| 3052 | static void | 
|---|
| 3053 | param_sanity(NODE *arglist) | 
|---|
| 3054 | { | 
|---|
| 3055 | NODE *argp, *arg; | 
|---|
| 3056 | int i; | 
|---|
| 3057 |  | 
|---|
| 3058 | for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) { | 
|---|
| 3059 | arg = argp->lnode; | 
|---|
| 3060 | if (arg->type == Node_regex) | 
|---|
| 3061 | warning(_("regexp constant for parameter #%d yields boolean value"), i); | 
|---|
| 3062 | } | 
|---|
| 3063 | } | 
|---|
| 3064 |  | 
|---|
| 3065 | /* deferred varibles --- those that are only defined if needed. */ | 
|---|
| 3066 |  | 
|---|
| 3067 | /* | 
|---|
| 3068 | * Is there any reason to use a hash table for deferred variables?  At the | 
|---|
| 3069 | * moment, there are only 1 to 3 such variables, so it may not be worth | 
|---|
| 3070 | * the overhead.  If more modules start using this facility, it should | 
|---|
| 3071 | * probably be converted into a hash table. | 
|---|
| 3072 | */ | 
|---|
| 3073 |  | 
|---|
| 3074 | static struct deferred_variable { | 
|---|
| 3075 | NODE *(*load_func)(void); | 
|---|
| 3076 | struct deferred_variable *next; | 
|---|
| 3077 | char name[1];   /* variable-length array */ | 
|---|
| 3078 | } *deferred_variables; | 
|---|
| 3079 |  | 
|---|
| 3080 | /* register_deferred_variable --- add a var name and loading function to the list */ | 
|---|
| 3081 |  | 
|---|
| 3082 | void | 
|---|
| 3083 | register_deferred_variable(const char *name, NODE *(*load_func)(void)) | 
|---|
| 3084 | { | 
|---|
| 3085 | struct deferred_variable *dv; | 
|---|
| 3086 | size_t sl = strlen(name); | 
|---|
| 3087 |  | 
|---|
| 3088 | emalloc(dv, struct deferred_variable *, sizeof(*dv)+sl, | 
|---|
| 3089 | "register_deferred_variable"); | 
|---|
| 3090 | dv->load_func = load_func; | 
|---|
| 3091 | dv->next = deferred_variables; | 
|---|
| 3092 | memcpy(dv->name, name, sl+1); | 
|---|
| 3093 | deferred_variables = dv; | 
|---|
| 3094 | } | 
|---|
| 3095 |  | 
|---|
| 3096 | /* variable --- make sure NAME is in the symbol table */ | 
|---|
| 3097 |  | 
|---|
| 3098 | NODE * | 
|---|
| 3099 | variable(char *name, int can_free, NODETYPE type) | 
|---|
| 3100 | { | 
|---|
| 3101 | register NODE *r; | 
|---|
| 3102 |  | 
|---|
| 3103 | if ((r = lookup(name)) != NULL) { | 
|---|
| 3104 | if (r->type == Node_func) | 
|---|
| 3105 | fatal(_("function `%s' called with space between name and `(',\nor used as a variable or an array"), | 
|---|
| 3106 | r->vname); | 
|---|
| 3107 |  | 
|---|
| 3108 | } else { | 
|---|
| 3109 | /* not found */ | 
|---|
| 3110 | struct deferred_variable *dv; | 
|---|
| 3111 |  | 
|---|
| 3112 | for (dv = deferred_variables; TRUE; dv = dv->next) { | 
|---|
| 3113 | if (dv == NULL) { | 
|---|
| 3114 | /* | 
|---|
| 3115 | * This is the only case in which we may not | 
|---|
| 3116 | * free the string. | 
|---|
| 3117 | */ | 
|---|
| 3118 | NODE *n; | 
|---|
| 3119 |  | 
|---|
| 3120 | if (type == Node_var_array) | 
|---|
| 3121 | n = node(NULL, type, NULL); | 
|---|
| 3122 | else | 
|---|
| 3123 | n = node(Nnull_string, type, NULL); | 
|---|
| 3124 |  | 
|---|
| 3125 | return install(name, n); | 
|---|
| 3126 | } | 
|---|
| 3127 | if (STREQ(name, dv->name)) { | 
|---|
| 3128 | r = (*dv->load_func)(); | 
|---|
| 3129 | break; | 
|---|
| 3130 | } | 
|---|
| 3131 | } | 
|---|
| 3132 | } | 
|---|
| 3133 | if (can_free) | 
|---|
| 3134 | free(name); | 
|---|
| 3135 | return r; | 
|---|
| 3136 | } | 
|---|
| 3137 |  | 
|---|
| 3138 | /* mk_rexp --- make a regular expression constant */ | 
|---|
| 3139 |  | 
|---|
| 3140 | static NODE * | 
|---|
| 3141 | mk_rexp(NODE *exp) | 
|---|
| 3142 | { | 
|---|
| 3143 | NODE *n; | 
|---|
| 3144 |  | 
|---|
| 3145 | if (exp->type == Node_regex) | 
|---|
| 3146 | return exp; | 
|---|
| 3147 |  | 
|---|
| 3148 | getnode(n); | 
|---|
| 3149 | n->type = Node_dynregex; | 
|---|
| 3150 | n->re_exp = exp; | 
|---|
| 3151 | n->re_text = NULL; | 
|---|
| 3152 | n->re_reg = NULL; | 
|---|
| 3153 | n->re_flags = 0; | 
|---|
| 3154 | n->re_cnt = 1; | 
|---|
| 3155 | return n; | 
|---|
| 3156 | } | 
|---|
| 3157 |  | 
|---|
| 3158 | /* isnoeffect --- when used as a statement, has no side effects */ | 
|---|
| 3159 |  | 
|---|
| 3160 | /* | 
|---|
| 3161 | * To be completely general, we should recursively walk the parse | 
|---|
| 3162 | * tree, to make sure that all the subexpressions also have no effect. | 
|---|
| 3163 | * Instead, we just weaken the actual warning that's printed, up above | 
|---|
| 3164 | * in the grammar. | 
|---|
| 3165 | */ | 
|---|
| 3166 |  | 
|---|
| 3167 | static int | 
|---|
| 3168 | isnoeffect(NODETYPE type) | 
|---|
| 3169 | { | 
|---|
| 3170 | switch (type) { | 
|---|
| 3171 | case Node_times: | 
|---|
| 3172 | case Node_quotient: | 
|---|
| 3173 | case Node_mod: | 
|---|
| 3174 | case Node_plus: | 
|---|
| 3175 | case Node_minus: | 
|---|
| 3176 | case Node_subscript: | 
|---|
| 3177 | case Node_concat: | 
|---|
| 3178 | case Node_exp: | 
|---|
| 3179 | case Node_unary_minus: | 
|---|
| 3180 | case Node_field_spec: | 
|---|
| 3181 | case Node_and: | 
|---|
| 3182 | case Node_or: | 
|---|
| 3183 | case Node_equal: | 
|---|
| 3184 | case Node_notequal: | 
|---|
| 3185 | case Node_less: | 
|---|
| 3186 | case Node_greater: | 
|---|
| 3187 | case Node_leq: | 
|---|
| 3188 | case Node_geq: | 
|---|
| 3189 | case Node_match: | 
|---|
| 3190 | case Node_nomatch: | 
|---|
| 3191 | case Node_not: | 
|---|
| 3192 | case Node_val: | 
|---|
| 3193 | case Node_in_array: | 
|---|
| 3194 | case Node_NF: | 
|---|
| 3195 | case Node_NR: | 
|---|
| 3196 | case Node_FNR: | 
|---|
| 3197 | case Node_FS: | 
|---|
| 3198 | case Node_RS: | 
|---|
| 3199 | case Node_FIELDWIDTHS: | 
|---|
| 3200 | case Node_IGNORECASE: | 
|---|
| 3201 | case Node_OFS: | 
|---|
| 3202 | case Node_ORS: | 
|---|
| 3203 | case Node_OFMT: | 
|---|
| 3204 | case Node_CONVFMT: | 
|---|
| 3205 | case Node_BINMODE: | 
|---|
| 3206 | case Node_LINT: | 
|---|
| 3207 | case Node_SUBSEP: | 
|---|
| 3208 | case Node_TEXTDOMAIN: | 
|---|
| 3209 | return TRUE; | 
|---|
| 3210 | default: | 
|---|
| 3211 | break;  /* keeps gcc -Wall happy */ | 
|---|
| 3212 | } | 
|---|
| 3213 |  | 
|---|
| 3214 | return FALSE; | 
|---|
| 3215 | } | 
|---|
| 3216 |  | 
|---|
| 3217 | /* isassignable --- can this node be assigned to? */ | 
|---|
| 3218 |  | 
|---|
| 3219 | static int | 
|---|
| 3220 | isassignable(register NODE *n) | 
|---|
| 3221 | { | 
|---|
| 3222 | switch (n->type) { | 
|---|
| 3223 | case Node_var_new: | 
|---|
| 3224 | case Node_var: | 
|---|
| 3225 | case Node_FIELDWIDTHS: | 
|---|
| 3226 | case Node_RS: | 
|---|
| 3227 | case Node_FS: | 
|---|
| 3228 | case Node_FNR: | 
|---|
| 3229 | case Node_NR: | 
|---|
| 3230 | case Node_NF: | 
|---|
| 3231 | case Node_IGNORECASE: | 
|---|
| 3232 | case Node_OFMT: | 
|---|
| 3233 | case Node_CONVFMT: | 
|---|
| 3234 | case Node_ORS: | 
|---|
| 3235 | case Node_OFS: | 
|---|
| 3236 | case Node_LINT: | 
|---|
| 3237 | case Node_BINMODE: | 
|---|
| 3238 | case Node_SUBSEP: | 
|---|
| 3239 | case Node_TEXTDOMAIN: | 
|---|
| 3240 | case Node_field_spec: | 
|---|
| 3241 | case Node_subscript: | 
|---|
| 3242 | return TRUE; | 
|---|
| 3243 | case Node_param_list: | 
|---|
| 3244 | return ((n->flags & FUNC) == 0);  /* ok if not func name */ | 
|---|
| 3245 | default: | 
|---|
| 3246 | break;  /* keeps gcc -Wall happy */ | 
|---|
| 3247 | } | 
|---|
| 3248 | return FALSE; | 
|---|
| 3249 | } | 
|---|
| 3250 |  | 
|---|
| 3251 | /* stopme --- for debugging */ | 
|---|
| 3252 |  | 
|---|
| 3253 | NODE * | 
|---|
| 3254 | stopme(NODE *tree ATTRIBUTE_UNUSED) | 
|---|
| 3255 | { | 
|---|
| 3256 | return (NODE *) 0; | 
|---|
| 3257 | } | 
|---|
| 3258 |  | 
|---|
| 3259 | /* dumpintlstr --- write out an initial .po file entry for the string */ | 
|---|
| 3260 |  | 
|---|
| 3261 | static void | 
|---|
| 3262 | dumpintlstr(const char *str, size_t len) | 
|---|
| 3263 | { | 
|---|
| 3264 | char *cp; | 
|---|
| 3265 |  | 
|---|
| 3266 | /* See the GNU gettext distribution for details on the file format */ | 
|---|
| 3267 |  | 
|---|
| 3268 | if (source != NULL) { | 
|---|
| 3269 | /* ala the gettext sources, remove leading `./'s */ | 
|---|
| 3270 | for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2) | 
|---|
| 3271 | continue; | 
|---|
| 3272 | printf("#: %s:%d\n", cp, sourceline); | 
|---|
| 3273 | } | 
|---|
| 3274 |  | 
|---|
| 3275 | printf("msgid "); | 
|---|
| 3276 | pp_string_fp(stdout, str, len, '"', TRUE); | 
|---|
| 3277 | putchar('\n'); | 
|---|
| 3278 | printf("msgstr \"\"\n\n"); | 
|---|
| 3279 | fflush(stdout); | 
|---|
| 3280 | } | 
|---|
| 3281 |  | 
|---|
| 3282 | /* dumpintlstr2 --- write out an initial .po file entry for the string and its plural */ | 
|---|
| 3283 |  | 
|---|
| 3284 | static void | 
|---|
| 3285 | dumpintlstr2(const char *str1, size_t len1, const char *str2, size_t len2) | 
|---|
| 3286 | { | 
|---|
| 3287 | char *cp; | 
|---|
| 3288 |  | 
|---|
| 3289 | /* See the GNU gettext distribution for details on the file format */ | 
|---|
| 3290 |  | 
|---|
| 3291 | if (source != NULL) { | 
|---|
| 3292 | /* ala the gettext sources, remove leading `./'s */ | 
|---|
| 3293 | for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2) | 
|---|
| 3294 | continue; | 
|---|
| 3295 | printf("#: %s:%d\n", cp, sourceline); | 
|---|
| 3296 | } | 
|---|
| 3297 |  | 
|---|
| 3298 | printf("msgid "); | 
|---|
| 3299 | pp_string_fp(stdout, str1, len1, '"', TRUE); | 
|---|
| 3300 | putchar('\n'); | 
|---|
| 3301 | printf("msgid_plural "); | 
|---|
| 3302 | pp_string_fp(stdout, str2, len2, '"', TRUE); | 
|---|
| 3303 | putchar('\n'); | 
|---|
| 3304 | printf("msgstr[0] \"\"\nmsgstr[1] \"\"\n\n"); | 
|---|
| 3305 | fflush(stdout); | 
|---|
| 3306 | } | 
|---|
| 3307 |  | 
|---|
| 3308 | /* count_args --- count the number of printf arguments */ | 
|---|
| 3309 |  | 
|---|
| 3310 | static void | 
|---|
| 3311 | count_args(NODE *tree) | 
|---|
| 3312 | { | 
|---|
| 3313 | size_t count = 0; | 
|---|
| 3314 | NODE *save_tree; | 
|---|
| 3315 |  | 
|---|
| 3316 | assert(tree->type == Node_K_printf | 
|---|
| 3317 | || (tree->type == Node_builtin && tree->builtin == do_sprintf)); | 
|---|
| 3318 | save_tree = tree; | 
|---|
| 3319 |  | 
|---|
| 3320 | tree = tree->lnode;     /* printf format string */ | 
|---|
| 3321 |  | 
|---|
| 3322 | for (count = 0; tree != NULL; tree = tree->rnode) | 
|---|
| 3323 | count++; | 
|---|
| 3324 |  | 
|---|
| 3325 | save_tree->printf_count = count; | 
|---|
| 3326 | } | 
|---|
| 3327 |  | 
|---|
| 3328 | /* isarray --- can this type be subscripted? */ | 
|---|
| 3329 |  | 
|---|
| 3330 | static int | 
|---|
| 3331 | isarray(NODE *n) | 
|---|
| 3332 | { | 
|---|
| 3333 | switch (n->type) { | 
|---|
| 3334 | case Node_var_new: | 
|---|
| 3335 | case Node_var_array: | 
|---|
| 3336 | return TRUE; | 
|---|
| 3337 | case Node_param_list: | 
|---|
| 3338 | return (n->flags & FUNC) == 0; | 
|---|
| 3339 | case Node_array_ref: | 
|---|
| 3340 | cant_happen(); | 
|---|
| 3341 | break; | 
|---|
| 3342 | default: | 
|---|
| 3343 | break;  /* keeps gcc -Wall happy */ | 
|---|
| 3344 | } | 
|---|
| 3345 |  | 
|---|
| 3346 | return FALSE; | 
|---|
| 3347 | } | 
|---|
| 3348 |  | 
|---|
| 3349 | /* See if name is a special token. */ | 
|---|
| 3350 |  | 
|---|
| 3351 | int | 
|---|
| 3352 | check_special(const char *name) | 
|---|
| 3353 | { | 
|---|
| 3354 | int low, high, mid; | 
|---|
| 3355 | int i; | 
|---|
| 3356 |  | 
|---|
| 3357 | low = 0; | 
|---|
| 3358 | high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1; | 
|---|
| 3359 | while (low <= high) { | 
|---|
| 3360 | mid = (low + high) / 2; | 
|---|
| 3361 | i = *name - tokentab[mid].operator[0]; | 
|---|
| 3362 | if (i == 0) | 
|---|
| 3363 | i = strcmp(name, tokentab[mid].operator); | 
|---|
| 3364 |  | 
|---|
| 3365 | if (i < 0)              /* token < mid */ | 
|---|
| 3366 | high = mid - 1; | 
|---|
| 3367 | else if (i > 0)         /* token > mid */ | 
|---|
| 3368 | low = mid + 1; | 
|---|
| 3369 | else | 
|---|
| 3370 | return mid; | 
|---|
| 3371 | } | 
|---|
| 3372 | return -1; | 
|---|
| 3373 | } | 
|---|