| 1 | /* | 
|---|
| 2 | * node.c -- routines for node management | 
|---|
| 3 | */ | 
|---|
| 4 |  | 
|---|
| 5 | /* | 
|---|
| 6 | * Copyright (C) 1986, 1988, 1989, 1991-2001, 2003-2005 the Free Software Foundation, Inc. | 
|---|
| 7 | * | 
|---|
| 8 | * This file is part of GAWK, the GNU implementation of the | 
|---|
| 9 | * AWK Programming Language. | 
|---|
| 10 | * | 
|---|
| 11 | * GAWK is free software; you can redistribute it and/or modify | 
|---|
| 12 | * it under the terms of the GNU General Public License as published by | 
|---|
| 13 | * the Free Software Foundation; either version 2 of the License, or | 
|---|
| 14 | * (at your option) any later version. | 
|---|
| 15 | * | 
|---|
| 16 | * GAWK is distributed in the hope that it will be useful, | 
|---|
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 19 | * GNU General Public License for more details. | 
|---|
| 20 | * | 
|---|
| 21 | * You should have received a copy of the GNU General Public License | 
|---|
| 22 | * along with this program; if not, write to the Free Software | 
|---|
| 23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA | 
|---|
| 24 | */ | 
|---|
| 25 |  | 
|---|
| 26 | #include "awk.h" | 
|---|
| 27 |  | 
|---|
| 28 | /* r_force_number --- force a value to be numeric */ | 
|---|
| 29 |  | 
|---|
| 30 | AWKNUM | 
|---|
| 31 | r_force_number(register NODE *n) | 
|---|
| 32 | { | 
|---|
| 33 | register char *cp; | 
|---|
| 34 | register char *cpend; | 
|---|
| 35 | char save; | 
|---|
| 36 | char *ptr; | 
|---|
| 37 | unsigned int newflags; | 
|---|
| 38 | extern double strtod(); | 
|---|
| 39 |  | 
|---|
| 40 | #ifdef GAWKDEBUG | 
|---|
| 41 | if (n == NULL) | 
|---|
| 42 | cant_happen(); | 
|---|
| 43 | if (n->type != Node_val) | 
|---|
| 44 | cant_happen(); | 
|---|
| 45 | if (n->flags == 0) | 
|---|
| 46 | cant_happen(); | 
|---|
| 47 | if (n->flags & NUMCUR) | 
|---|
| 48 | return n->numbr; | 
|---|
| 49 | #endif | 
|---|
| 50 |  | 
|---|
| 51 | /* all the conditionals are an attempt to avoid the expensive strtod */ | 
|---|
| 52 |  | 
|---|
| 53 | /* Note: only set NUMCUR if we actually convert some digits */ | 
|---|
| 54 |  | 
|---|
| 55 | n->numbr = 0.0; | 
|---|
| 56 |  | 
|---|
| 57 | if (n->stlen == 0) { | 
|---|
| 58 | if (0 && do_lint) | 
|---|
| 59 | lintwarn(_("can't convert string to float")); | 
|---|
| 60 | return 0.0; | 
|---|
| 61 | } | 
|---|
| 62 |  | 
|---|
| 63 | cp = n->stptr; | 
|---|
| 64 | if (ISALPHA(*cp)) { | 
|---|
| 65 | if (0 && do_lint) | 
|---|
| 66 | lintwarn(_("can't convert string to float")); | 
|---|
| 67 | return 0.0; | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | cpend = cp + n->stlen; | 
|---|
| 71 | while (cp < cpend && ISSPACE(*cp)) | 
|---|
| 72 | cp++; | 
|---|
| 73 | if (cp == cpend || ISALPHA(*cp)) { | 
|---|
| 74 | if (0 && do_lint) | 
|---|
| 75 | lintwarn(_("can't convert string to float")); | 
|---|
| 76 | return 0.0; | 
|---|
| 77 | } | 
|---|
| 78 |  | 
|---|
| 79 | if (n->flags & MAYBE_NUM) { | 
|---|
| 80 | newflags = NUMBER; | 
|---|
| 81 | n->flags &= ~MAYBE_NUM; | 
|---|
| 82 | } else | 
|---|
| 83 | newflags = 0; | 
|---|
| 84 | if (cpend - cp == 1) { | 
|---|
| 85 | if (ISDIGIT(*cp)) { | 
|---|
| 86 | n->numbr = (AWKNUM)(*cp - '0'); | 
|---|
| 87 | n->flags |= newflags; | 
|---|
| 88 | n->flags |= NUMCUR; | 
|---|
| 89 | } else if (0 && do_lint) | 
|---|
| 90 | lintwarn(_("can't convert string to float")); | 
|---|
| 91 | return n->numbr; | 
|---|
| 92 | } | 
|---|
| 93 |  | 
|---|
| 94 | if (do_non_decimal_data) { | 
|---|
| 95 | errno = 0; | 
|---|
| 96 | if (! do_traditional && isnondecimal(cp, TRUE)) { | 
|---|
| 97 | n->numbr = nondec2awknum(cp, cpend - cp); | 
|---|
| 98 | n->flags |= NUMCUR; | 
|---|
| 99 | goto finish; | 
|---|
| 100 | } | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | errno = 0; | 
|---|
| 104 | save = *cpend; | 
|---|
| 105 | *cpend = '\0'; | 
|---|
| 106 | n->numbr = (AWKNUM) strtod((const char *) cp, &ptr); | 
|---|
| 107 |  | 
|---|
| 108 | /* POSIX says trailing space is OK for NUMBER */ | 
|---|
| 109 | while (ISSPACE(*ptr)) | 
|---|
| 110 | ptr++; | 
|---|
| 111 | *cpend = save; | 
|---|
| 112 | finish: | 
|---|
| 113 | /* the >= should be ==, but for SunOS 3.5 strtod() */ | 
|---|
| 114 | if (errno == 0 && ptr >= cpend) { | 
|---|
| 115 | n->flags |= newflags; | 
|---|
| 116 | n->flags |= NUMCUR; | 
|---|
| 117 | } else { | 
|---|
| 118 | if (0 && do_lint && ptr < cpend) | 
|---|
| 119 | lintwarn(_("can't convert string to float")); | 
|---|
| 120 | errno = 0; | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | return n->numbr; | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 | /* | 
|---|
| 127 | * the following lookup table is used as an optimization in force_string | 
|---|
| 128 | * (more complicated) variations on this theme didn't seem to pay off, but | 
|---|
| 129 | * systematic testing might be in order at some point | 
|---|
| 130 | */ | 
|---|
| 131 | static const char *const values[] = { | 
|---|
| 132 | "0", | 
|---|
| 133 | "1", | 
|---|
| 134 | "2", | 
|---|
| 135 | "3", | 
|---|
| 136 | "4", | 
|---|
| 137 | "5", | 
|---|
| 138 | "6", | 
|---|
| 139 | "7", | 
|---|
| 140 | "8", | 
|---|
| 141 | "9", | 
|---|
| 142 | }; | 
|---|
| 143 | #define NVAL    (sizeof(values)/sizeof(values[0])) | 
|---|
| 144 |  | 
|---|
| 145 | /* format_val --- format a numeric value based on format */ | 
|---|
| 146 |  | 
|---|
| 147 | NODE * | 
|---|
| 148 | format_val(const char *format, int index, register NODE *s) | 
|---|
| 149 | { | 
|---|
| 150 | char buf[BUFSIZ]; | 
|---|
| 151 | register char *sp = buf; | 
|---|
| 152 | double val; | 
|---|
| 153 | char *orig, *trans, save; | 
|---|
| 154 |  | 
|---|
| 155 | if (! do_traditional && (s->flags & INTLSTR) != 0) { | 
|---|
| 156 | save = s->stptr[s->stlen]; | 
|---|
| 157 | s->stptr[s->stlen] = '\0'; | 
|---|
| 158 |  | 
|---|
| 159 | orig = s->stptr; | 
|---|
| 160 | trans = dgettext(TEXTDOMAIN, orig); | 
|---|
| 161 |  | 
|---|
| 162 | s->stptr[s->stlen] = save; | 
|---|
| 163 | return tmp_string(trans, strlen(trans)); | 
|---|
| 164 | } | 
|---|
| 165 |  | 
|---|
| 166 | /* not an integral value, or out of range */ | 
|---|
| 167 | if ((val = double_to_int(s->numbr)) != s->numbr | 
|---|
| 168 | || val < LONG_MIN || val > LONG_MAX) { | 
|---|
| 169 | /* | 
|---|
| 170 | * Once upon a time, if GFMT_WORKAROUND wasn't defined, | 
|---|
| 171 | * we just blindly did this: | 
|---|
| 172 | *      sprintf(sp, format, s->numbr); | 
|---|
| 173 | *      s->stlen = strlen(sp); | 
|---|
| 174 | *      s->stfmt = (char) index; | 
|---|
| 175 | * but that's no good if, e.g., OFMT is %s. So we punt, | 
|---|
| 176 | * and just always format the value ourselves. | 
|---|
| 177 | */ | 
|---|
| 178 |  | 
|---|
| 179 | NODE *dummy, *r; | 
|---|
| 180 | unsigned short oflags; | 
|---|
| 181 | extern NODE **fmt_list;          /* declared in eval.c */ | 
|---|
| 182 |  | 
|---|
| 183 | /* create dummy node for a sole use of format_tree */ | 
|---|
| 184 | getnode(dummy); | 
|---|
| 185 | dummy->type = Node_expression_list; | 
|---|
| 186 | dummy->lnode = s; | 
|---|
| 187 | dummy->rnode = NULL; | 
|---|
| 188 | oflags = s->flags; | 
|---|
| 189 | s->flags |= PERM; /* prevent from freeing by format_tree() */ | 
|---|
| 190 | r = format_tree(format, fmt_list[index]->stlen, dummy, 2); | 
|---|
| 191 | s->flags = oflags; | 
|---|
| 192 | s->stfmt = (char) index; | 
|---|
| 193 | s->stlen = r->stlen; | 
|---|
| 194 | if ((s->flags & STRCUR) != 0) | 
|---|
| 195 | free(s->stptr); | 
|---|
| 196 | s->stptr = r->stptr; | 
|---|
| 197 | freenode(r);            /* Do not free_temp(r)!  We want */ | 
|---|
| 198 | freenode(dummy);        /* to keep s->stptr == r->stpr.  */ | 
|---|
| 199 |  | 
|---|
| 200 | goto no_malloc; | 
|---|
| 201 | } else { | 
|---|
| 202 | /* integral value */ | 
|---|
| 203 | /* force conversion to long only once */ | 
|---|
| 204 | register long num = (long) val; | 
|---|
| 205 | if (num < NVAL && num >= 0) { | 
|---|
| 206 | sp = (char *) values[num]; | 
|---|
| 207 | s->stlen = 1; | 
|---|
| 208 | } else { | 
|---|
| 209 | (void) sprintf(sp, "%ld", num); | 
|---|
| 210 | s->stlen = strlen(sp); | 
|---|
| 211 | } | 
|---|
| 212 | s->stfmt = -1; | 
|---|
| 213 | } | 
|---|
| 214 | emalloc(s->stptr, char *, s->stlen + 2, "format_val"); | 
|---|
| 215 | memcpy(s->stptr, sp, s->stlen+1); | 
|---|
| 216 | no_malloc: | 
|---|
| 217 | s->stref = 1; | 
|---|
| 218 | s->flags |= STRCUR; | 
|---|
| 219 | #if defined MBS_SUPPORT | 
|---|
| 220 | if ((s->flags & WSTRCUR) != 0) { | 
|---|
| 221 | assert(s->wstptr != NULL); | 
|---|
| 222 | free(s->wstptr); | 
|---|
| 223 | s->wstptr = NULL; | 
|---|
| 224 | s->wstlen = 0; | 
|---|
| 225 | s->flags &= ~WSTRCUR; | 
|---|
| 226 | } | 
|---|
| 227 | #endif | 
|---|
| 228 | return s; | 
|---|
| 229 | } | 
|---|
| 230 |  | 
|---|
| 231 | /* r_force_string --- force a value to be a string */ | 
|---|
| 232 |  | 
|---|
| 233 | NODE * | 
|---|
| 234 | r_force_string(register NODE *s) | 
|---|
| 235 | { | 
|---|
| 236 | NODE *ret; | 
|---|
| 237 | #ifdef GAWKDEBUG | 
|---|
| 238 | if (s == NULL) | 
|---|
| 239 | cant_happen(); | 
|---|
| 240 | if (s->type != Node_val) | 
|---|
| 241 | cant_happen(); | 
|---|
| 242 | if (s->stref <= 0) | 
|---|
| 243 | cant_happen(); | 
|---|
| 244 | if ((s->flags & STRCUR) != 0 | 
|---|
| 245 | && (s->stfmt == -1 || s->stfmt == CONVFMTidx)) | 
|---|
| 246 | return s; | 
|---|
| 247 | #endif | 
|---|
| 248 |  | 
|---|
| 249 | ret = format_val(CONVFMT, CONVFMTidx, s); | 
|---|
| 250 | return ret; | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | /* | 
|---|
| 254 | * dupnode: | 
|---|
| 255 | * Duplicate a node.  (For strings, "duplicate" means crank up the | 
|---|
| 256 | * reference count.) | 
|---|
| 257 | */ | 
|---|
| 258 |  | 
|---|
| 259 | NODE * | 
|---|
| 260 | r_dupnode(NODE *n) | 
|---|
| 261 | { | 
|---|
| 262 | register NODE *r; | 
|---|
| 263 |  | 
|---|
| 264 | #ifndef DUPNODE_MACRO | 
|---|
| 265 | if ((n->flags & TEMP) != 0) { | 
|---|
| 266 | n->flags &= ~TEMP; | 
|---|
| 267 | n->flags |= MALLOC; | 
|---|
| 268 | return n; | 
|---|
| 269 | } | 
|---|
| 270 | if ((n->flags & PERM) != 0) | 
|---|
| 271 | return n; | 
|---|
| 272 | #endif | 
|---|
| 273 | if ((n->flags & (MALLOC|STRCUR)) == (MALLOC|STRCUR)) { | 
|---|
| 274 | if (n->stref < LONG_MAX) | 
|---|
| 275 | n->stref++; | 
|---|
| 276 | else | 
|---|
| 277 | n->flags |= PERM; | 
|---|
| 278 | return n; | 
|---|
| 279 | } else if ((n->flags & MALLOC) != 0 && n->type == Node_ahash) { | 
|---|
| 280 | if (n->ahname_ref < LONG_MAX) | 
|---|
| 281 | n->ahname_ref++; | 
|---|
| 282 | else | 
|---|
| 283 | n->flags |= PERM; | 
|---|
| 284 | return n; | 
|---|
| 285 | } | 
|---|
| 286 | getnode(r); | 
|---|
| 287 | *r = *n; | 
|---|
| 288 | r->flags &= ~(PERM|TEMP|FIELD); | 
|---|
| 289 | r->flags |= MALLOC; | 
|---|
| 290 | #if defined MBS_SUPPORT | 
|---|
| 291 | r->wstptr = NULL; | 
|---|
| 292 | #endif /* defined MBS_SUPPORT */ | 
|---|
| 293 | if (n->type == Node_val && (n->flags & STRCUR) != 0) { | 
|---|
| 294 | r->stref = 1; | 
|---|
| 295 | emalloc(r->stptr, char *, r->stlen + 2, "dupnode"); | 
|---|
| 296 | memcpy(r->stptr, n->stptr, r->stlen); | 
|---|
| 297 | r->stptr[r->stlen] = '\0'; | 
|---|
| 298 | #if defined MBS_SUPPORT | 
|---|
| 299 | if ((n->flags & WSTRCUR) != 0) { | 
|---|
| 300 | r->wstlen = n->wstlen; | 
|---|
| 301 | emalloc(r->wstptr, wchar_t *, sizeof(wchar_t) * (r->wstlen + 2), "dupnode"); | 
|---|
| 302 | memcpy(r->wstptr, n->wstptr, r->wstlen * sizeof(wchar_t)); | 
|---|
| 303 | r->wstptr[r->wstlen] = L'\0'; | 
|---|
| 304 | r->flags |= WSTRCUR; | 
|---|
| 305 | } | 
|---|
| 306 | #endif /* defined MBS_SUPPORT */ | 
|---|
| 307 | } else if (n->type == Node_ahash && (n->flags & MALLOC) != 0) { | 
|---|
| 308 | r->ahname_ref = 1; | 
|---|
| 309 | emalloc(r->ahname_str, char *, r->ahname_len + 2, "dupnode"); | 
|---|
| 310 | memcpy(r->ahname_str, n->ahname_str, r->ahname_len); | 
|---|
| 311 | r->ahname_str[r->ahname_len] = '\0'; | 
|---|
| 312 | } | 
|---|
| 313 | return r; | 
|---|
| 314 | } | 
|---|
| 315 |  | 
|---|
| 316 | /* copy_node --- force a brand new copy of a node to be allocated */ | 
|---|
| 317 |  | 
|---|
| 318 | NODE * | 
|---|
| 319 | copynode(NODE *old) | 
|---|
| 320 | { | 
|---|
| 321 | NODE *new; | 
|---|
| 322 | int saveflags; | 
|---|
| 323 |  | 
|---|
| 324 | assert(old != NULL); | 
|---|
| 325 | saveflags = old->flags; | 
|---|
| 326 | old->flags &= ~(MALLOC|PERM); | 
|---|
| 327 | new = dupnode(old); | 
|---|
| 328 | old->flags = saveflags; | 
|---|
| 329 | return new; | 
|---|
| 330 | } | 
|---|
| 331 |  | 
|---|
| 332 | /* mk_number --- allocate a node with defined number */ | 
|---|
| 333 |  | 
|---|
| 334 | NODE * | 
|---|
| 335 | mk_number(AWKNUM x, unsigned int flags) | 
|---|
| 336 | { | 
|---|
| 337 | register NODE *r; | 
|---|
| 338 |  | 
|---|
| 339 | getnode(r); | 
|---|
| 340 | r->type = Node_val; | 
|---|
| 341 | r->numbr = x; | 
|---|
| 342 | r->flags = flags; | 
|---|
| 343 | #ifdef GAWKDEBUG | 
|---|
| 344 | r->stref = 1; | 
|---|
| 345 | r->stptr = NULL; | 
|---|
| 346 | r->stlen = 0; | 
|---|
| 347 | #if defined MBS_SUPPORT | 
|---|
| 348 | r->wstptr = NULL; | 
|---|
| 349 | r->wstlen = 0; | 
|---|
| 350 | r->flags &= ~WSTRCUR; | 
|---|
| 351 | #endif /* MBS_SUPPORT */ | 
|---|
| 352 | #endif /* GAWKDEBUG */ | 
|---|
| 353 | return r; | 
|---|
| 354 | } | 
|---|
| 355 |  | 
|---|
| 356 | /* make_str_node --- make a string node */ | 
|---|
| 357 |  | 
|---|
| 358 | NODE * | 
|---|
| 359 | make_str_node(char *s, unsigned long len, int flags) | 
|---|
| 360 | { | 
|---|
| 361 | register NODE *r; | 
|---|
| 362 |  | 
|---|
| 363 | getnode(r); | 
|---|
| 364 | r->type = Node_val; | 
|---|
| 365 | r->flags = (STRING|STRCUR|MALLOC); | 
|---|
| 366 | #if defined MBS_SUPPORT | 
|---|
| 367 | r->wstptr = NULL; | 
|---|
| 368 | r->wstlen = 0; | 
|---|
| 369 | #endif | 
|---|
| 370 | if (flags & ALREADY_MALLOCED) | 
|---|
| 371 | r->stptr = s; | 
|---|
| 372 | else { | 
|---|
| 373 | emalloc(r->stptr, char *, len + 2, s); | 
|---|
| 374 | memcpy(r->stptr, s, len); | 
|---|
| 375 | } | 
|---|
| 376 | r->stptr[len] = '\0'; | 
|---|
| 377 |  | 
|---|
| 378 | if ((flags & SCAN) != 0) {      /* scan for escape sequences */ | 
|---|
| 379 | const char *pf; | 
|---|
| 380 | register char *ptm; | 
|---|
| 381 | register int c; | 
|---|
| 382 | register const char *end; | 
|---|
| 383 | #ifdef MBS_SUPPORT | 
|---|
| 384 | mbstate_t cur_state; | 
|---|
| 385 |  | 
|---|
| 386 | memset(& cur_state, 0, sizeof(cur_state)); | 
|---|
| 387 | #endif | 
|---|
| 388 |  | 
|---|
| 389 | end = &(r->stptr[len]); | 
|---|
| 390 | for (pf = ptm = r->stptr; pf < end;) { | 
|---|
| 391 | #ifdef MBS_SUPPORT | 
|---|
| 392 | /* | 
|---|
| 393 | * Keep multibyte characters together. This avoids | 
|---|
| 394 | * problems if a subsequent byte of a multibyte | 
|---|
| 395 | * character happens to be a backslash. | 
|---|
| 396 | */ | 
|---|
| 397 | if (gawk_mb_cur_max > 1) { | 
|---|
| 398 | int mblen = mbrlen(pf, end-pf, &cur_state); | 
|---|
| 399 |  | 
|---|
| 400 | if (mblen > 1) { | 
|---|
| 401 | int i; | 
|---|
| 402 |  | 
|---|
| 403 | for (i = 0; i < mblen; i++) | 
|---|
| 404 | *ptm++ = *pf++; | 
|---|
| 405 | continue; | 
|---|
| 406 | } | 
|---|
| 407 | } | 
|---|
| 408 | #endif | 
|---|
| 409 | c = *pf++; | 
|---|
| 410 | if (c == '\\') { | 
|---|
| 411 | c = parse_escape(&pf); | 
|---|
| 412 | if (c < 0) { | 
|---|
| 413 | if (do_lint) | 
|---|
| 414 | lintwarn(_("backslash at end of string")); | 
|---|
| 415 | c = '\\'; | 
|---|
| 416 | } | 
|---|
| 417 | *ptm++ = c; | 
|---|
| 418 | } else | 
|---|
| 419 | *ptm++ = c; | 
|---|
| 420 | } | 
|---|
| 421 | len = ptm - r->stptr; | 
|---|
| 422 | erealloc(r->stptr, char *, len + 1, "make_str_node"); | 
|---|
| 423 | r->stptr[len] = '\0'; | 
|---|
| 424 | r->flags |= PERM; | 
|---|
| 425 | } | 
|---|
| 426 | r->stlen = len; | 
|---|
| 427 | r->stref = 1; | 
|---|
| 428 | r->stfmt = -1; | 
|---|
| 429 |  | 
|---|
| 430 | return r; | 
|---|
| 431 | } | 
|---|
| 432 |  | 
|---|
| 433 | /* tmp_string --- allocate a temporary string */ | 
|---|
| 434 |  | 
|---|
| 435 | NODE * | 
|---|
| 436 | tmp_string(char *s, size_t len) | 
|---|
| 437 | { | 
|---|
| 438 | register NODE *r; | 
|---|
| 439 |  | 
|---|
| 440 | r = make_string(s, len); | 
|---|
| 441 | r->flags |= TEMP; | 
|---|
| 442 | return r; | 
|---|
| 443 | } | 
|---|
| 444 |  | 
|---|
| 445 | /* more_nodes --- allocate more nodes */ | 
|---|
| 446 |  | 
|---|
| 447 | #define NODECHUNK       100 | 
|---|
| 448 |  | 
|---|
| 449 | NODE *nextfree = NULL; | 
|---|
| 450 |  | 
|---|
| 451 | NODE * | 
|---|
| 452 | more_nodes() | 
|---|
| 453 | { | 
|---|
| 454 | register NODE *np; | 
|---|
| 455 |  | 
|---|
| 456 | /* get more nodes and initialize list */ | 
|---|
| 457 | emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "more_nodes"); | 
|---|
| 458 | memset(nextfree, 0, NODECHUNK * sizeof(NODE)); | 
|---|
| 459 | for (np = nextfree; np <= &nextfree[NODECHUNK - 1]; np++) { | 
|---|
| 460 | np->nextp = np + 1; | 
|---|
| 461 | } | 
|---|
| 462 | --np; | 
|---|
| 463 | np->nextp = NULL; | 
|---|
| 464 | np = nextfree; | 
|---|
| 465 | nextfree = nextfree->nextp; | 
|---|
| 466 | return np; | 
|---|
| 467 | } | 
|---|
| 468 |  | 
|---|
| 469 | #ifdef MEMDEBUG | 
|---|
| 470 | #undef freenode | 
|---|
| 471 | /* freenode --- release a node back to the pool */ | 
|---|
| 472 |  | 
|---|
| 473 | void | 
|---|
| 474 | freenode(NODE *it) | 
|---|
| 475 | { | 
|---|
| 476 | #ifdef MPROF | 
|---|
| 477 | it->stref = 0; | 
|---|
| 478 | free((char *) it); | 
|---|
| 479 | #else   /* not MPROF */ | 
|---|
| 480 | #ifndef NO_PROFILING | 
|---|
| 481 | it->exec_count = 0; | 
|---|
| 482 | #endif | 
|---|
| 483 | /* add it to head of freelist */ | 
|---|
| 484 | it->nextp = nextfree; | 
|---|
| 485 | nextfree = it; | 
|---|
| 486 | #endif  /* not MPROF */ | 
|---|
| 487 | } | 
|---|
| 488 | #endif  /* GAWKDEBUG */ | 
|---|
| 489 |  | 
|---|
| 490 | /* unref --- remove reference to a particular node */ | 
|---|
| 491 |  | 
|---|
| 492 | void | 
|---|
| 493 | unref(register NODE *tmp) | 
|---|
| 494 | { | 
|---|
| 495 | if (tmp == NULL) | 
|---|
| 496 | return; | 
|---|
| 497 | if ((tmp->flags & PERM) != 0) | 
|---|
| 498 | return; | 
|---|
| 499 | tmp->flags &= ~TEMP; | 
|---|
| 500 | if ((tmp->flags & MALLOC) != 0) { | 
|---|
| 501 | if (tmp->type == Node_ahash) { | 
|---|
| 502 | if (tmp->ahname_ref > 1) { | 
|---|
| 503 | tmp->ahname_ref--; | 
|---|
| 504 | return; | 
|---|
| 505 | } | 
|---|
| 506 | free(tmp->ahname_str); | 
|---|
| 507 | } else if ((tmp->flags & STRCUR) != 0) { | 
|---|
| 508 | if (tmp->stref > 1) { | 
|---|
| 509 | tmp->stref--; | 
|---|
| 510 | return; | 
|---|
| 511 | } | 
|---|
| 512 | free(tmp->stptr); | 
|---|
| 513 | #if defined MBS_SUPPORT | 
|---|
| 514 | if (tmp->wstptr != NULL) { | 
|---|
| 515 | assert((tmp->flags & WSTRCUR) != 0); | 
|---|
| 516 | free(tmp->wstptr); | 
|---|
| 517 | } | 
|---|
| 518 | tmp->flags &= ~WSTRCUR; | 
|---|
| 519 | tmp->wstptr = NULL; | 
|---|
| 520 | tmp->wstlen = 0; | 
|---|
| 521 | #endif | 
|---|
| 522 | } | 
|---|
| 523 | freenode(tmp); | 
|---|
| 524 | return; | 
|---|
| 525 | } | 
|---|
| 526 | if ((tmp->flags & FIELD) != 0) { | 
|---|
| 527 | freenode(tmp); | 
|---|
| 528 | return; | 
|---|
| 529 | } | 
|---|
| 530 | } | 
|---|
| 531 |  | 
|---|
| 532 | /* | 
|---|
| 533 | * parse_escape: | 
|---|
| 534 | * | 
|---|
| 535 | * Parse a C escape sequence.  STRING_PTR points to a variable containing a | 
|---|
| 536 | * pointer to the string to parse.  That pointer is updated past the | 
|---|
| 537 | * characters we use.  The value of the escape sequence is returned. | 
|---|
| 538 | * | 
|---|
| 539 | * A negative value means the sequence \ newline was seen, which is supposed to | 
|---|
| 540 | * be equivalent to nothing at all. | 
|---|
| 541 | * | 
|---|
| 542 | * If \ is followed by a null character, we return a negative value and leave | 
|---|
| 543 | * the string pointer pointing at the null character. | 
|---|
| 544 | * | 
|---|
| 545 | * If \ is followed by 000, we return 0 and leave the string pointer after the | 
|---|
| 546 | * zeros.  A value of 0 does not mean end of string. | 
|---|
| 547 | * | 
|---|
| 548 | * Posix doesn't allow \x. | 
|---|
| 549 | */ | 
|---|
| 550 |  | 
|---|
| 551 | int | 
|---|
| 552 | parse_escape(const char **string_ptr) | 
|---|
| 553 | { | 
|---|
| 554 | register int c = *(*string_ptr)++; | 
|---|
| 555 | register int i; | 
|---|
| 556 | register int count; | 
|---|
| 557 |  | 
|---|
| 558 | switch (c) { | 
|---|
| 559 | case 'a': | 
|---|
| 560 | return BELL; | 
|---|
| 561 | case 'b': | 
|---|
| 562 | return '\b'; | 
|---|
| 563 | case 'f': | 
|---|
| 564 | return '\f'; | 
|---|
| 565 | case 'n': | 
|---|
| 566 | return '\n'; | 
|---|
| 567 | case 'r': | 
|---|
| 568 | return '\r'; | 
|---|
| 569 | case 't': | 
|---|
| 570 | return '\t'; | 
|---|
| 571 | case 'v': | 
|---|
| 572 | return '\v'; | 
|---|
| 573 | case '\n': | 
|---|
| 574 | return -2; | 
|---|
| 575 | case 0: | 
|---|
| 576 | (*string_ptr)--; | 
|---|
| 577 | return -1; | 
|---|
| 578 | case '0': | 
|---|
| 579 | case '1': | 
|---|
| 580 | case '2': | 
|---|
| 581 | case '3': | 
|---|
| 582 | case '4': | 
|---|
| 583 | case '5': | 
|---|
| 584 | case '6': | 
|---|
| 585 | case '7': | 
|---|
| 586 | i = c - '0'; | 
|---|
| 587 | count = 0; | 
|---|
| 588 | while (++count < 3) { | 
|---|
| 589 | if ((c = *(*string_ptr)++) >= '0' && c <= '7') { | 
|---|
| 590 | i *= 8; | 
|---|
| 591 | i += c - '0'; | 
|---|
| 592 | } else { | 
|---|
| 593 | (*string_ptr)--; | 
|---|
| 594 | break; | 
|---|
| 595 | } | 
|---|
| 596 | } | 
|---|
| 597 | return i; | 
|---|
| 598 | case 'x': | 
|---|
| 599 | if (do_lint) { | 
|---|
| 600 | static int didwarn = FALSE; | 
|---|
| 601 |  | 
|---|
| 602 | if (! didwarn) { | 
|---|
| 603 | didwarn = TRUE; | 
|---|
| 604 | lintwarn(_("POSIX does not allow `\\x' escapes")); | 
|---|
| 605 | } | 
|---|
| 606 | } | 
|---|
| 607 | if (do_posix) | 
|---|
| 608 | return ('x'); | 
|---|
| 609 | if (! ISXDIGIT((*string_ptr)[0])) { | 
|---|
| 610 | warning(_("no hex digits in `\\x' escape sequence")); | 
|---|
| 611 | return ('x'); | 
|---|
| 612 | } | 
|---|
| 613 | i = 0; | 
|---|
| 614 | for (;;) { | 
|---|
| 615 | /* do outside test to avoid multiple side effects */ | 
|---|
| 616 | c = *(*string_ptr)++; | 
|---|
| 617 | if (ISXDIGIT(c)) { | 
|---|
| 618 | i *= 16; | 
|---|
| 619 | if (ISDIGIT(c)) | 
|---|
| 620 | i += c - '0'; | 
|---|
| 621 | else if (ISUPPER(c)) | 
|---|
| 622 | i += c - 'A' + 10; | 
|---|
| 623 | else | 
|---|
| 624 | i += c - 'a' + 10; | 
|---|
| 625 | } else { | 
|---|
| 626 | (*string_ptr)--; | 
|---|
| 627 | break; | 
|---|
| 628 | } | 
|---|
| 629 | } | 
|---|
| 630 | return i; | 
|---|
| 631 | case '\\': | 
|---|
| 632 | case '"': | 
|---|
| 633 | return c; | 
|---|
| 634 | default: | 
|---|
| 635 | { | 
|---|
| 636 | static short warned[256]; | 
|---|
| 637 | unsigned char uc = (unsigned char) c; | 
|---|
| 638 |  | 
|---|
| 639 | /* N.B.: use unsigned char here to avoid Latin-1 problems */ | 
|---|
| 640 |  | 
|---|
| 641 | if (! warned[uc]) { | 
|---|
| 642 | warned[uc] = TRUE; | 
|---|
| 643 |  | 
|---|
| 644 | warning(_("escape sequence `\\%c' treated as plain `%c'"), uc, uc); | 
|---|
| 645 | } | 
|---|
| 646 | } | 
|---|
| 647 | return c; | 
|---|
| 648 | } | 
|---|
| 649 | } | 
|---|
| 650 |  | 
|---|
| 651 | /* isnondecimal --- return true if number is not a decimal number */ | 
|---|
| 652 |  | 
|---|
| 653 | int | 
|---|
| 654 | isnondecimal(const char *str, int use_locale) | 
|---|
| 655 | { | 
|---|
| 656 | int dec_point = '.'; | 
|---|
| 657 | #if defined(HAVE_LOCALE_H) | 
|---|
| 658 | /* | 
|---|
| 659 | * loc.decimal_point may not have been initialized yet, | 
|---|
| 660 | * so double check it before using it. | 
|---|
| 661 | */ | 
|---|
| 662 | if (use_locale && loc.decimal_point != NULL && loc.decimal_point[0] != '\0') | 
|---|
| 663 | dec_point = loc.decimal_point[0];       /* XXX --- assumes one char */ | 
|---|
| 664 | #endif | 
|---|
| 665 |  | 
|---|
| 666 | if (str[0] != '0') | 
|---|
| 667 | return FALSE; | 
|---|
| 668 |  | 
|---|
| 669 | /* leading 0x or 0X */ | 
|---|
| 670 | if (str[1] == 'x' || str[1] == 'X') | 
|---|
| 671 | return TRUE; | 
|---|
| 672 |  | 
|---|
| 673 | /* | 
|---|
| 674 | * Numbers with '.', 'e', or 'E' are decimal. | 
|---|
| 675 | * Have to check so that things like 00.34 are handled right. | 
|---|
| 676 | * | 
|---|
| 677 | * These beasts can have trailing whitespace. Deal with that too. | 
|---|
| 678 | */ | 
|---|
| 679 | for (; *str != '\0'; str++) { | 
|---|
| 680 | if (*str == 'e' || *str == 'E' || *str == dec_point) | 
|---|
| 681 | return FALSE; | 
|---|
| 682 | else if (! ISDIGIT(*str)) | 
|---|
| 683 | break; | 
|---|
| 684 | } | 
|---|
| 685 |  | 
|---|
| 686 | return TRUE; | 
|---|
| 687 | } | 
|---|
| 688 |  | 
|---|
| 689 | #if defined MBS_SUPPORT | 
|---|
| 690 | /* str2wstr --- convert a multibyte string to a wide string */ | 
|---|
| 691 |  | 
|---|
| 692 | NODE * | 
|---|
| 693 | str2wstr(NODE *n, size_t **ptr) | 
|---|
| 694 | { | 
|---|
| 695 | size_t i, count, src_count; | 
|---|
| 696 | char *sp; | 
|---|
| 697 | mbstate_t mbs; | 
|---|
| 698 | wchar_t wc, *wsp; | 
|---|
| 699 |  | 
|---|
| 700 | assert((n->flags & (STRING|STRCUR)) != 0); | 
|---|
| 701 |  | 
|---|
| 702 | if ((n->flags & WSTRCUR) != 0) { | 
|---|
| 703 | if (ptr == NULL) | 
|---|
| 704 | return n; | 
|---|
| 705 | /* otherwise | 
|---|
| 706 | fall through and recompute to fill in the array */ | 
|---|
| 707 | } | 
|---|
| 708 |  | 
|---|
| 709 | if (n->wstptr != NULL) { | 
|---|
| 710 | free(n->wstptr); | 
|---|
| 711 | n->wstptr = NULL; | 
|---|
| 712 | n->wstlen = 0; | 
|---|
| 713 | } | 
|---|
| 714 |  | 
|---|
| 715 | /* | 
|---|
| 716 | * After consideration and consultation, this | 
|---|
| 717 | * code trades space for time. We allocate | 
|---|
| 718 | * an array of wchar_t that is n->stlen long. | 
|---|
| 719 | * This is needed in the worst case anyway, where | 
|---|
| 720 | * each input bytes maps to one wchar_t.  The | 
|---|
| 721 | * advantage is that we only have to convert the string | 
|---|
| 722 | * once, instead of twice, once to find out how many | 
|---|
| 723 | * wide characters, and then again to actually fill | 
|---|
| 724 | * the info in.  If there's a lot left over, we can | 
|---|
| 725 | * realloc the wide string down in size. | 
|---|
| 726 | */ | 
|---|
| 727 |  | 
|---|
| 728 | emalloc(n->wstptr, wchar_t *, sizeof(wchar_t) * (n->stlen + 2), "str2wstr"); | 
|---|
| 729 | wsp = n->wstptr; | 
|---|
| 730 |  | 
|---|
| 731 | /* | 
|---|
| 732 | * For use by do_match, create and fill in an array. | 
|---|
| 733 | * For each byte `i' in n->stptr (the original string), | 
|---|
| 734 | * a[i] is equal to `j', where `j' is the corresponding wchar_t | 
|---|
| 735 | * in the converted wide string. | 
|---|
| 736 | * | 
|---|
| 737 | * Create the array. | 
|---|
| 738 | */ | 
|---|
| 739 | if (ptr != NULL) { | 
|---|
| 740 | emalloc(*ptr, size_t *, sizeof(size_t) * n->stlen, "str2wstr"); | 
|---|
| 741 | memset(*ptr, 0, sizeof(size_t) * n->stlen); | 
|---|
| 742 | } | 
|---|
| 743 |  | 
|---|
| 744 | sp = n->stptr; | 
|---|
| 745 | src_count = n->stlen; | 
|---|
| 746 | memset(& mbs, 0, sizeof(mbs)); | 
|---|
| 747 | for (i = 0; src_count > 0; i++) { | 
|---|
| 748 | count = mbrtowc(& wc, sp, src_count, & mbs); | 
|---|
| 749 | switch (count) { | 
|---|
| 750 | case (size_t) -2: | 
|---|
| 751 | case (size_t) -1: | 
|---|
| 752 | case 0: | 
|---|
| 753 | goto done; | 
|---|
| 754 |  | 
|---|
| 755 | default: | 
|---|
| 756 | *wsp++ = wc; | 
|---|
| 757 | src_count -= count; | 
|---|
| 758 | while (count--)  { | 
|---|
| 759 | if (ptr != NULL) | 
|---|
| 760 | (*ptr)[sp - n->stptr] = i; | 
|---|
| 761 | sp++; | 
|---|
| 762 | } | 
|---|
| 763 | break; | 
|---|
| 764 | } | 
|---|
| 765 | } | 
|---|
| 766 |  | 
|---|
| 767 | done: | 
|---|
| 768 | *wsp = L'\0'; | 
|---|
| 769 | n->wstlen = i; | 
|---|
| 770 | n->flags |= WSTRCUR; | 
|---|
| 771 | #define ARBITRARY_AMOUNT_TO_GIVE_BACK 100 | 
|---|
| 772 | if (n->stlen - n->wstlen > ARBITRARY_AMOUNT_TO_GIVE_BACK) | 
|---|
| 773 | erealloc(n->wstptr, wchar_t *, sizeof(wchar_t) * (n->wstlen + 2), "str2wstr"); | 
|---|
| 774 |  | 
|---|
| 775 | return n; | 
|---|
| 776 | } | 
|---|
| 777 |  | 
|---|
| 778 | #if 0 | 
|---|
| 779 | static void | 
|---|
| 780 | dump_wstr(FILE *fp, const wchar_t *str, size_t len) | 
|---|
| 781 | { | 
|---|
| 782 | if (str == NULL || len == 0) | 
|---|
| 783 | return; | 
|---|
| 784 |  | 
|---|
| 785 | for (; len--; str++) | 
|---|
| 786 | putc((int) *str, fp); | 
|---|
| 787 | } | 
|---|
| 788 | #endif | 
|---|
| 789 |  | 
|---|
| 790 | /* wstrstr --- walk haystack, looking for needle, wide char version */ | 
|---|
| 791 |  | 
|---|
| 792 | const wchar_t * | 
|---|
| 793 | wstrstr(const wchar_t *haystack, size_t hs_len, | 
|---|
| 794 | const wchar_t *needle, size_t needle_len) | 
|---|
| 795 | { | 
|---|
| 796 | size_t i; | 
|---|
| 797 |  | 
|---|
| 798 | if (haystack == NULL || needle == NULL || needle_len > hs_len) | 
|---|
| 799 | return NULL; | 
|---|
| 800 |  | 
|---|
| 801 | for (i = 0; i < hs_len; i++) { | 
|---|
| 802 | if (haystack[i] == needle[0] | 
|---|
| 803 | && i+needle_len-1 < hs_len | 
|---|
| 804 | && haystack[i+needle_len-1] == needle[needle_len-1]) { | 
|---|
| 805 | /* first & last chars match, check string */ | 
|---|
| 806 | if (memcmp(haystack+i, needle, sizeof(wchar_t) * needle_len) == 0) { | 
|---|
| 807 | return haystack + i; | 
|---|
| 808 | } | 
|---|
| 809 | } | 
|---|
| 810 | } | 
|---|
| 811 |  | 
|---|
| 812 | return NULL; | 
|---|
| 813 | } | 
|---|
| 814 |  | 
|---|
| 815 | /* wcasestrstr --- walk haystack, nocase look for needle, wide char version */ | 
|---|
| 816 |  | 
|---|
| 817 | const wchar_t * | 
|---|
| 818 | wcasestrstr(const wchar_t *haystack, size_t hs_len, | 
|---|
| 819 | const wchar_t *needle, size_t needle_len) | 
|---|
| 820 | { | 
|---|
| 821 | size_t i, j; | 
|---|
| 822 |  | 
|---|
| 823 | if (haystack == NULL || needle == NULL || needle_len > hs_len) | 
|---|
| 824 | return NULL; | 
|---|
| 825 |  | 
|---|
| 826 | for (i = 0; i < hs_len; i++) { | 
|---|
| 827 | if (towlower(haystack[i]) == towlower(needle[0]) | 
|---|
| 828 | && i+needle_len-1 < hs_len | 
|---|
| 829 | && towlower(haystack[i+needle_len-1]) == towlower(needle[needle_len-1])) { | 
|---|
| 830 | /* first & last chars match, check string */ | 
|---|
| 831 | const wchar_t *start; | 
|---|
| 832 |  | 
|---|
| 833 | start = haystack+i; | 
|---|
| 834 | for (j = 0; j < needle_len; j++, start++) { | 
|---|
| 835 | wchar_t h, n; | 
|---|
| 836 |  | 
|---|
| 837 | h = towlower(*start); | 
|---|
| 838 | n = towlower(needle[j]); | 
|---|
| 839 | if (h != n) | 
|---|
| 840 | goto out; | 
|---|
| 841 | } | 
|---|
| 842 | return haystack + i; | 
|---|
| 843 | } | 
|---|
| 844 | out:    ; | 
|---|
| 845 | } | 
|---|
| 846 |  | 
|---|
| 847 | return NULL; | 
|---|
| 848 | } | 
|---|
| 849 | #endif /* defined MBS_SUPPORT */ | 
|---|