source: trunk/essentials/sys-apps/gawk/builtin.c@ 3879

Last change on this file since 3879 was 3076, checked in by bird, 19 years ago

gawk 3.1.5

File size: 76.8 KB
Line 
1/*
2 * builtin.c - Builtin functions and various utility procedures
3 */
4
5/*
6 * Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 */
25
26
27#include "awk.h"
28#if defined(HAVE_FCNTL_H)
29#include <fcntl.h>
30#endif
31#undef CHARBITS
32#undef INTBITS
33#if HAVE_INTTYPES_H
34# include <inttypes.h>
35#endif
36#if HAVE_STDINT_H
37# include <stdint.h>
38#endif
39#include <math.h>
40#include "random.h"
41
42#ifndef CHAR_BIT
43# define CHAR_BIT 8
44#endif
45
46/* The extra casts work around common compiler bugs. */
47#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
48/* The outer cast is needed to work around a bug in Cray C 5.0.3.0.
49 It is necessary at least when t == time_t. */
50#define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \
51 ? ~ (t) 0 << (sizeof (t) * CHAR_BIT - 1) : (t) 0))
52#define TYPE_MAXIMUM(t) ((t) (~ (t) 0 - TYPE_MINIMUM (t)))
53
54#ifndef INTMAX_MIN
55# define INTMAX_MIN TYPE_MINIMUM (intmax_t)
56#endif
57#ifndef UINTMAX_MAX
58# define UINTMAX_MAX TYPE_MAXIMUM (uintmax_t)
59#endif
60
61#ifndef SIZE_MAX /* C99 constant, can't rely on it everywhere */
62#define SIZE_MAX ((size_t) -1)
63#endif
64
65/* can declare these, since we always use the random shipped with gawk */
66extern char *initstate P((unsigned long seed, char *state, long n));
67extern char *setstate P((char *state));
68extern long random P((void));
69extern void srandom P((unsigned long seed));
70
71extern NODE **fields_arr;
72extern int output_is_tty;
73
74static NODE *sub_common P((NODE *tree, long how_many, int backdigs));
75
76/* Assume IEEE-754 arithmetic on pre-C89 hosts. */
77#ifndef FLT_RADIX
78#define FLT_RADIX 2
79#endif
80#ifndef FLT_MANT_DIG
81#define FLT_MANT_DIG 24
82#endif
83#ifndef DBL_MANT_DIG
84#define DBL_MANT_DIG 53
85#endif
86
87#ifdef _CRAY
88/* Work around a problem in conversion of doubles to exact integers. */
89#define Floor(n) floor((n) * (1.0 + DBL_EPSILON))
90#define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON))
91
92/* Force the standard C compiler to use the library math functions. */
93extern double exp(double);
94double (*Exp)() = exp;
95#define exp(x) (*Exp)(x)
96extern double log(double);
97double (*Log)() = log;
98#define log(x) (*Log)(x)
99#else
100#define Floor(n) floor(n)
101#define Ceil(n) ceil(n)
102#endif
103
104#define DEFAULT_G_PRECISION 6
105
106#ifdef GFMT_WORKAROUND
107/* semi-temporary hack, mostly to gracefully handle VMS */
108static void sgfmt P((char *buf, const char *format, int alt,
109 int fwidth, int precision, double value));
110#endif /* GFMT_WORKAROUND */
111
112/*
113 * Since we supply the version of random(), we know what
114 * value to use here.
115 */
116#define GAWK_RANDOM_MAX 0x7fffffffL
117
118static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp,
119 const char *from, struct redirect *rp, int flush));
120
121/* efwrite --- like fwrite, but with error checking */
122
123static void
124efwrite(const void *ptr,
125 size_t size,
126 size_t count,
127 FILE *fp,
128 const char *from,
129 struct redirect *rp,
130 int flush)
131{
132 errno = 0;
133 if (fwrite(ptr, size, count, fp) != count)
134 goto wrerror;
135 if (flush
136 && ((fp == stdout && output_is_tty)
137 || (rp != NULL && (rp->flag & RED_NOBUF)))) {
138 fflush(fp);
139 if (ferror(fp))
140 goto wrerror;
141 }
142 return;
143
144wrerror:
145 fatal(_("%s to \"%s\" failed (%s)"), from,
146 rp ? rp->value : _("standard output"),
147 errno ? strerror(errno) : _("reason unknown"));
148}
149
150/* do_exp --- exponential function */
151
152NODE *
153do_exp(NODE *tree)
154{
155 NODE *tmp;
156 double d, res;
157
158 tmp = tree_eval(tree->lnode);
159 if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
160 lintwarn(_("exp: received non-numeric argument"));
161 d = force_number(tmp);
162 free_temp(tmp);
163 errno = 0;
164 res = exp(d);
165 if (errno == ERANGE)
166 warning(_("exp: argument %g is out of range"), d);
167 return tmp_number((AWKNUM) res);
168}
169
170/* stdfile --- return fp for a standard file */
171
172/*
173 * This function allows `fflush("/dev/stdout")' to work.
174 * The other files will be available via getredirect().
175 * /dev/stdin is not included, since fflush is only for output.
176 */
177
178static FILE *
179stdfile(const char *name, size_t len)
180{
181 if (len == 11) {
182 if (STREQN(name, "/dev/stderr", 11))
183 return stderr;
184 else if (STREQN(name, "/dev/stdout", 11))
185 return stdout;
186 }
187
188 return NULL;
189}
190
191/* do_fflush --- flush output, either named file or pipe or everything */
192
193NODE *
194do_fflush(NODE *tree)
195{
196 struct redirect *rp;
197 NODE *tmp;
198 FILE *fp;
199 int status = 0;
200 const char *file;
201
202 /* fflush() --- flush stdout */
203 if (tree == NULL) {
204 status = fflush(stdout);
205 return tmp_number((AWKNUM) status);
206 }
207
208 tmp = tree_eval(tree->lnode);
209 tmp = force_string(tmp);
210 file = tmp->stptr;
211
212 /* fflush("") --- flush all */
213 if (tmp->stlen == 0) {
214 status = flush_io();
215 free_temp(tmp);
216 return tmp_number((AWKNUM) status);
217 }
218
219 rp = getredirect(tmp->stptr, tmp->stlen);
220 status = -1;
221 if (rp != NULL) {
222 if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
223 if (rp->flag & RED_PIPE)
224 warning(_("fflush: cannot flush: pipe `%s' opened for reading, not writing"),
225 file);
226 else
227 warning(_("fflush: cannot flush: file `%s' opened for reading, not writing"),
228 file);
229 free_temp(tmp);
230 return tmp_number((AWKNUM) status);
231 }
232 fp = rp->fp;
233 if (fp != NULL)
234 status = fflush(fp);
235 } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
236 status = fflush(fp);
237 } else {
238 status = -1;
239 warning(_("fflush: `%s' is not an open file, pipe or co-process"), file);
240 }
241 free_temp(tmp);
242 return tmp_number((AWKNUM) status);
243}
244
245#ifdef MBS_SUPPORT
246/* strncasecmpmbs --- like strncasecmp(multibyte string version) */
247int
248strncasecmpmbs(const char *s1, mbstate_t mbs1, const char *s2,
249 mbstate_t mbs2, size_t n)
250{
251 int i1, i2, mbclen1, mbclen2, gap;
252 wchar_t wc1, wc2;
253
254 for (i1 = i2 = 0 ; i1 < n && i2 < n ;i1 += mbclen1, i2 += mbclen2) {
255 mbclen1 = mbrtowc(&wc1, s1 + i1, n - i1, &mbs1);
256 if (mbclen1 == (size_t) -1 || mbclen1 == (size_t) -2 || mbclen1 == 0) {
257 /* We treat it as a singlebyte character. */
258 mbclen1 = 1;
259 wc1 = s1[i1];
260 }
261 mbclen2 = mbrtowc(&wc2, s2 + i2, n - i2, &mbs2);
262 if (mbclen2 == (size_t) -1 || mbclen2 == (size_t) -2 || mbclen2 == 0) {
263 /* We treat it as a singlebyte character. */
264 mbclen2 = 1;
265 wc2 = s2[i2];
266 }
267 if ((gap = towlower(wc1) - towlower(wc2)) != 0)
268 /* s1 and s2 are not equivalent. */
269 return gap;
270 }
271 /* s1 and s2 are equivalent. */
272 return 0;
273}
274
275/* Inspect the buffer `src' and write the index of each byte to `dest'.
276 Caller must allocate `dest'.
277 e.g. str = <mb1(1)>, <mb1(2)>, a, b, <mb2(1)>, <mb2(2)>, <mb2(3)>, c
278 where mb(i) means the `i'-th byte of a multibyte character.
279 dest = 1, 2, 1, 1, 1, 2, 3. 1
280*/
281static void
282index_multibyte_buffer(char* src, char* dest, int len)
283{
284 int idx, prev_idx;
285 mbstate_t mbs, prevs;
286 memset(&prevs, 0, sizeof(mbstate_t));
287
288 for (idx = prev_idx = 0 ; idx < len ; idx++) {
289 size_t mbclen;
290 mbs = prevs;
291 mbclen = mbrlen(src + prev_idx, idx - prev_idx + 1, &mbs);
292 if (mbclen == (size_t) -1 || mbclen == 1 || mbclen == 0) {
293 /* singlebyte character. */
294 mbclen = 1;
295 prev_idx = idx + 1;
296 } else if (mbclen == (size_t) -2) {
297 /* a part of a multibyte character. */
298 mbclen = idx - prev_idx + 1;
299 } else if (mbclen > 1) {
300 /* the end of a multibyte character. */
301 prev_idx = idx + 1;
302 prevs = mbs;
303 } else {
304 /* Can't reach. */
305 }
306 dest[idx] = mbclen;
307 }
308}
309#else
310/* a dummy function */
311static void
312index_multibyte_buffer(char* src ATTRIBUTE_UNUSED, char* dest ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED)
313{
314 cant_happen();
315}
316#endif
317
318/* do_index --- find index of a string */
319
320NODE *
321do_index(NODE *tree)
322{
323 NODE *s1, *s2;
324 register const char *p1, *p2;
325 register size_t l1, l2;
326 long ret;
327
328 s1 = tree_eval(tree->lnode);
329 s2 = tree_eval(tree->rnode->lnode);
330 if (do_lint) {
331 if ((s1->flags & (STRING|STRCUR)) == 0)
332 lintwarn(_("index: received non-string first argument"));
333 if ((s2->flags & (STRING|STRCUR)) == 0)
334 lintwarn(_("index: received non-string second argument"));
335 }
336 force_string(s1);
337 force_string(s2);
338 p1 = s1->stptr;
339 p2 = s2->stptr;
340 l1 = s1->stlen;
341 l2 = s2->stlen;
342 ret = 0;
343
344 /*
345 * Icky special case, index(foo, "") should return 1,
346 * since both bwk awk and mawk do, and since match("foo", "")
347 * returns 1. This makes index("", "") work, too, fwiw.
348 */
349 if (l2 == 0) {
350 ret = 1;
351 goto out;
352 }
353
354 /* IGNORECASE will already be false if posix */
355 if (IGNORECASE) {
356 while (l1 > 0) {
357 if (l2 > l1)
358 break;
359#ifdef MBS_SUPPORT
360 if (gawk_mb_cur_max > 1) {
361 const wchar_t *pos;
362
363 s1 = force_wstring(s1);
364 s2 = force_wstring(s2);
365
366 pos = wcasestrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen);
367 if (pos == NULL)
368 ret = 0;
369 else
370 ret = pos - s1->wstptr + 1; /* 1-based */
371 goto out;
372 } else {
373#endif
374 /*
375 * Could use tolower(*p1) == tolower(*p2) here. See discussion
376 * in eval.c as to why not.
377 */
378 if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2]
379 && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
380 ret = 1 + s1->stlen - l1;
381 break;
382 }
383 l1--;
384 p1++;
385#ifdef MBS_SUPPORT
386 }
387#endif
388 }
389 } else {
390 while (l1 > 0) {
391 if (l2 > l1)
392 break;
393 if (*p1 == *p2
394 && (l2 == 1 || (l2 > 0 && memcmp(p1, p2, l2) == 0))) {
395 ret = 1 + s1->stlen - l1;
396 break;
397 }
398#ifdef MBS_SUPPORT
399 if (gawk_mb_cur_max > 1) {
400 const wchar_t *pos;
401
402 s1 = force_wstring(s1);
403 s2 = force_wstring(s2);
404
405 pos = wstrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen);
406 if (pos == NULL)
407 ret = 0;
408 else
409 ret = pos - s1->wstptr + 1; /* 1-based */
410 goto out;
411 } else {
412 l1--;
413 p1++;
414 }
415#else
416 l1--;
417 p1++;
418#endif
419 }
420 }
421out:
422 free_temp(s1);
423 free_temp(s2);
424 return tmp_number((AWKNUM) ret);
425}
426
427/* double_to_int --- convert double to int, used several places */
428
429double
430double_to_int(double d)
431{
432 if (d >= 0)
433 d = Floor(d);
434 else
435 d = Ceil(d);
436 return d;
437}
438
439/* do_int --- convert double to int for awk */
440
441NODE *
442do_int(NODE *tree)
443{
444 NODE *tmp;
445 double d;
446
447 tmp = tree_eval(tree->lnode);
448 if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
449 lintwarn(_("int: received non-numeric argument"));
450 d = force_number(tmp);
451 d = double_to_int(d);
452 free_temp(tmp);
453 return tmp_number((AWKNUM) d);
454}
455
456/* do_length --- length of a string or $0 */
457
458NODE *
459do_length(NODE *tree)
460{
461 NODE *tmp;
462 size_t len;
463
464 if (tree->lnode->type == Node_var_array
465 || tree->lnode->type == Node_array_ref) {
466 NODE *array_var = tree->lnode;
467
468 if (array_var->type == Node_array_ref)
469 array_var = array_var->orig_array;
470
471 if (do_lint)
472 lintwarn(_("`length(array)' is a gawk extension"));
473 if (do_posix)
474 goto normal; /* will die as fatal error */
475
476 return tmp_number((AWKNUM) array_var->table_size);
477 } else {
478normal:
479 tmp = tree_eval(tree->lnode);
480 if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0)
481 lintwarn(_("length: received non-string argument"));
482 tmp = force_string(tmp);
483#ifdef MBS_SUPPORT
484 if (gawk_mb_cur_max > 1) {
485 tmp = force_wstring(tmp);
486 len = tmp->wstlen;
487 } else
488#endif
489 len = tmp->stlen;
490
491 free_temp(tmp);
492 return tmp_number((AWKNUM) len);
493 }
494}
495
496/* do_log --- the log function */
497
498NODE *
499do_log(NODE *tree)
500{
501 NODE *tmp;
502 double d, arg;
503
504 tmp = tree_eval(tree->lnode);
505 if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
506 lintwarn(_("log: received non-numeric argument"));
507 arg = (double) force_number(tmp);
508 if (arg < 0.0)
509 warning(_("log: received negative argument %g"), arg);
510 d = log(arg);
511 free_temp(tmp);
512 return tmp_number((AWKNUM) d);
513}
514
515/*
516 * format_tree() formats nodes of a tree, starting with a left node,
517 * and accordingly to a fmt_string providing a format like in
518 * printf family from C library. Returns a string node which value
519 * is a formatted string. Called by sprintf function.
520 *
521 * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann
522 * for taming this beast and making it compatible with ANSI C.
523 */
524
525NODE *
526format_tree(
527 const char *fmt_string,
528 size_t n0,
529 register NODE *carg,
530 long num_args)
531{
532/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
533/* difference of pointers should be of ptrdiff_t type, but let us be kind */
534#define bchunk(s, l) if (l) { \
535 while ((l) > ofre) { \
536 size_t olen = obufout - obuf; \
537 erealloc(obuf, char *, osiz * 2, "format_tree"); \
538 ofre += osiz; \
539 osiz *= 2; \
540 obufout = obuf + olen; \
541 } \
542 memcpy(obufout, s, (size_t) (l)); \
543 obufout += (l); \
544 ofre -= (l); \
545}
546
547/* copy one byte from 's' to 'obufout' checking for space in the process */
548#define bchunk_one(s) { \
549 if (ofre < 1) { \
550 size_t olen = obufout - obuf; \
551 erealloc(obuf, char *, osiz * 2, "format_tree"); \
552 ofre += osiz; \
553 osiz *= 2; \
554 obufout = obuf + olen; \
555 } \
556 *obufout++ = *s; \
557 --ofre; \
558}
559
560/* Is there space for something L big in the buffer? */
561#define chksize(l) if ((l) > ofre) { \
562 size_t olen = obufout - obuf; \
563 erealloc(obuf, char *, osiz * 2, "format_tree"); \
564 obufout = obuf + olen; \
565 ofre += osiz; \
566 osiz *= 2; \
567}
568
569 static NODE **the_args = 0;
570 static size_t args_size = 0;
571 size_t cur_arg = 0;
572
573 auto NODE **save_args = 0;
574 auto size_t save_args_size = 0;
575 static int call_level = 0;
576
577 NODE *r;
578 int i;
579 int toofew = FALSE;
580 char *obuf, *obufout;
581 size_t osiz, ofre;
582 const char *chbuf;
583 const char *s0, *s1;
584 int cs1;
585 NODE *arg;
586 long fw, prec, argnum;
587 int used_dollar;
588 int lj, alt, big, bigbig, small, have_prec, need_format;
589 long *cur = NULL;
590#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */
591 long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */
592#endif
593 uintmax_t uval;
594 int sgn;
595 int base = 0;
596 char cpbuf[30]; /* if we have numbers bigger than 30 */
597 char *cend = &cpbuf[30];/* chars, we lose, but seems unlikely */
598 char *cp;
599 const char *fill;
600 AWKNUM tmpval;
601 char signchar = FALSE;
602 size_t len;
603 int zero_flag = FALSE;
604 int quote_flag = FALSE;
605 int ii, jj;
606 static const char sp[] = " ";
607 static const char zero_string[] = "0";
608 static const char lchbuf[] = "0123456789abcdef";
609 static const char Uchbuf[] = "0123456789ABCDEF";
610
611#define INITIAL_OUT_SIZE 512
612 emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
613 obufout = obuf;
614 osiz = INITIAL_OUT_SIZE;
615 ofre = osiz - 2;
616
617 /*
618 * Icky problem. If the args make a nested call to printf/sprintf,
619 * we end up clobbering the static variable `the_args'. Not good.
620 * We don't just malloc and free the_args each time, since most of the
621 * time there aren't nested calls. But if this is a nested call,
622 * save the memory pointed to by the_args and allocate a fresh
623 * array. Then free it on end.
624 */
625 if (++call_level > 1) { /* nested */
626 save_args = the_args;
627 save_args_size = args_size;
628
629 args_size = 0; /* force fresh allocation */
630 }
631
632 if (args_size == 0) {
633 /* allocate array */
634 emalloc(the_args, NODE **, (num_args+1) * sizeof(NODE *), "format_tree");
635 args_size = num_args + 1;
636 } else if (num_args + 1 > args_size) {
637 /* grow it */
638 erealloc(the_args, NODE **, (num_args+1) * sizeof(NODE *), "format_tree");
639 args_size = num_args + 1;
640 }
641
642
643 /* fill it in */
644 /*
645 * We ignore the_args[0] since format strings use
646 * 1-based numbers to indicate the arguments. It's
647 * easiest to just convert to int and index, without
648 * having to remember to subtract 1.
649 */
650 memset(the_args, '\0', num_args * sizeof(NODE *));
651 for (i = 1; carg != NULL; i++, carg = carg->rnode) {
652 NODE *tmp;
653
654 /* Here lies the wumpus's other brother. R.I.P. */
655 tmp = tree_eval(carg->lnode);
656 the_args[i] = dupnode(tmp);
657 free_temp(tmp);
658 }
659 assert(i == num_args);
660 cur_arg = 1;
661
662 /*
663 * Check first for use of `count$'.
664 * If plain argument retrieval was used earlier, choke.
665 * Otherwise, return the requested argument.
666 * If not `count$' now, but it was used earlier, choke.
667 * If this format is more than total number of args, choke.
668 * Otherwise, return the current argument.
669 */
670#define parse_next_arg() { \
671 if (argnum > 0) { \
672 if (cur_arg > 1) \
673 fatal(_("must use `count$' on all formats or none")); \
674 arg = the_args[argnum]; \
675 } else if (used_dollar) { \
676 fatal(_("must use `count$' on all formats or none")); \
677 arg = 0; /* shutup the compiler */ \
678 } else if (cur_arg >= num_args) { \
679 arg = 0; /* shutup the compiler */ \
680 toofew = TRUE; \
681 break; \
682 } else { \
683 arg = the_args[cur_arg]; \
684 cur_arg++; \
685 } \
686}
687
688 need_format = FALSE;
689 used_dollar = FALSE;
690
691 s0 = s1 = fmt_string;
692 while (n0-- > 0) {
693 if (*s1 != '%') {
694 s1++;
695 continue;
696 }
697 need_format = TRUE;
698 bchunk(s0, s1 - s0);
699 s0 = s1;
700 cur = &fw;
701 fw = 0;
702 prec = 0;
703 argnum = 0;
704 have_prec = FALSE;
705 signchar = FALSE;
706 zero_flag = FALSE;
707 lj = alt = big = bigbig = small = FALSE;
708 fill = sp;
709 cp = cend;
710 chbuf = lchbuf;
711 s1++;
712
713retry:
714 if (n0-- == 0) /* ran out early! */
715 break;
716
717 switch (cs1 = *s1++) {
718 case (-1): /* dummy case to allow for checking */
719check_pos:
720 if (cur != &fw)
721 break; /* reject as a valid format */
722 goto retry;
723 case '%':
724 need_format = FALSE;
725 /*
726 * 29 Oct. 2002:
727 * The C99 standard pages 274 and 279 seem to imply that
728 * since there's no arg converted, the field width doesn't
729 * apply. The code already was that way, but this
730 * comment documents it, at least in the code.
731 */
732 bchunk_one("%");
733 s0 = s1;
734 break;
735
736 case '0':
737 /*
738 * Only turn on zero_flag if we haven't seen
739 * the field width or precision yet. Otherwise,
740 * screws up floating point formatting.
741 */
742 if (cur == & fw)
743 zero_flag = TRUE;
744 if (lj)
745 goto retry;
746 /* FALL through */
747 case '1':
748 case '2':
749 case '3':
750 case '4':
751 case '5':
752 case '6':
753 case '7':
754 case '8':
755 case '9':
756 if (cur == NULL)
757 break;
758 if (prec >= 0)
759 *cur = cs1 - '0';
760 /*
761 * with a negative precision *cur is already set
762 * to -1, so it will remain negative, but we have
763 * to "eat" precision digits in any case
764 */
765 while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
766 --n0;
767 *cur = *cur * 10 + *s1++ - '0';
768 }
769 if (prec < 0) /* negative precision is discarded */
770 have_prec = FALSE;
771 if (cur == &prec)
772 cur = NULL;
773 if (n0 == 0) /* badly formatted control string */
774 continue;
775 goto retry;
776 case '$':
777 if (do_traditional)
778 fatal(_("`$' is not permitted in awk formats"));
779 if (cur == &fw) {
780 argnum = fw;
781 fw = 0;
782 used_dollar = TRUE;
783 if (argnum <= 0)
784 fatal(_("arg count with `$' must be > 0"));
785 if (argnum >= num_args)
786 fatal(_("arg count %ld greater than total number of supplied arguments"), argnum);
787 } else
788 fatal(_("`$' not permitted after period in format"));
789 goto retry;
790 case '*':
791 if (cur == NULL)
792 break;
793 if (! do_traditional && ISDIGIT(*s1)) {
794 int val = 0;
795
796 for (; n0 > 0 && *s1 && ISDIGIT(*s1); s1++, n0--) {
797 val *= 10;
798 val += *s1 - '0';
799 }
800 if (*s1 != '$') {
801 fatal(_("no `$' supplied for positional field width or precision"));
802 } else {
803 s1++;
804 n0--;
805 }
806 if (val >= num_args) {
807 toofew = TRUE;
808 break;
809 }
810 arg = the_args[val];
811 } else {
812 parse_next_arg();
813 }
814 *cur = force_number(arg);
815 if (*cur < 0 && cur == &fw) {
816 *cur = -*cur;
817 lj++;
818 }
819 if (cur == &prec) {
820 if (*cur >= 0)
821 have_prec = TRUE;
822 else
823 have_prec = FALSE;
824 cur = NULL;
825 }
826 goto retry;
827 case ' ': /* print ' ' or '-' */
828 /* 'space' flag is ignored */
829 /* if '+' already present */
830 if (signchar != FALSE)
831 goto check_pos;
832 /* FALL THROUGH */
833 case '+': /* print '+' or '-' */
834 signchar = cs1;
835 goto check_pos;
836 case '-':
837 if (prec < 0)
838 break;
839 if (cur == &prec) {
840 prec = -1;
841 goto retry;
842 }
843 fill = sp; /* if left justified then other */
844 lj++; /* filling is ignored */
845 goto check_pos;
846 case '.':
847 if (cur != &fw)
848 break;
849 cur = &prec;
850 have_prec = TRUE;
851 goto retry;
852 case '#':
853 alt = TRUE;
854 goto check_pos;
855#if defined(HAVE_LOCALE_H)
856 case '\'':
857 quote_flag = TRUE;
858 goto check_pos;
859#endif
860 case 'l':
861 if (big)
862 break;
863 else {
864 static int warned = FALSE;
865
866 if (do_lint && ! warned) {
867 lintwarn(_("`l' is meaningless in awk formats; ignored"));
868 warned = TRUE;
869 }
870 if (do_posix)
871 fatal(_("`l' is not permitted in POSIX awk formats"));
872 }
873 big = TRUE;
874 goto retry;
875 case 'L':
876 if (bigbig)
877 break;
878 else {
879 static int warned = FALSE;
880
881 if (do_lint && ! warned) {
882 lintwarn(_("`L' is meaningless in awk formats; ignored"));
883 warned = TRUE;
884 }
885 if (do_posix)
886 fatal(_("`L' is not permitted in POSIX awk formats"));
887 }
888 bigbig = TRUE;
889 goto retry;
890 case 'h':
891 if (small)
892 break;
893 else {
894 static int warned = FALSE;
895
896 if (do_lint && ! warned) {
897 lintwarn(_("`h' is meaningless in awk formats; ignored"));
898 warned = TRUE;
899 }
900 if (do_posix)
901 fatal(_("`h' is not permitted in POSIX awk formats"));
902 }
903 small = TRUE;
904 goto retry;
905 case 'c':
906 need_format = FALSE;
907 if (zero_flag && ! lj)
908 fill = zero_string;
909 parse_next_arg();
910 /* user input that looks numeric is numeric */
911 if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM)
912 (void) force_number(arg);
913 if (arg->flags & NUMBER) {
914#ifdef sun386
915 tmp_uval = arg->numbr;
916 uval = (unsigned long) tmp_uval;
917#else
918 uval = (uintmax_t) arg->numbr;
919#endif
920 cpbuf[0] = uval;
921 prec = 1;
922 cp = cpbuf;
923 goto pr_tail;
924 }
925 /*
926 * As per POSIX, only output first character of a
927 * string value. Thus, we ignore any provided
928 * precision, forcing it to 1. (Didn't this
929 * used to work? 6/2003.)
930 */
931 prec = 1;
932 cp = arg->stptr;
933 goto pr_tail;
934 case 's':
935 need_format = FALSE;
936 if (zero_flag && ! lj)
937 fill = zero_string;
938 parse_next_arg();
939 arg = force_string(arg);
940 if (! have_prec || prec > arg->stlen)
941 prec = arg->stlen;
942 cp = arg->stptr;
943 goto pr_tail;
944 case 'd':
945 case 'i':
946 need_format = FALSE;
947 parse_next_arg();
948 tmpval = force_number(arg);
949
950 /*
951 * ``The result of converting a zero value with a
952 * precision of zero is no characters.''
953 */
954 if (have_prec && prec == 0 && tmpval == 0)
955 goto pr_tail;
956
957 if (tmpval < 0) {
958 if (tmpval < INTMAX_MIN)
959 goto out_of_range;
960 sgn = TRUE;
961 uval = - (uintmax_t) (intmax_t) tmpval;
962 } else {
963 /* Use !, so that NaNs are out of range. */
964 if (! (tmpval <= UINTMAX_MAX))
965 goto out_of_range;
966 sgn = FALSE;
967 uval = (uintmax_t) tmpval;
968 }
969 ii = jj = 0;
970 do {
971 *--cp = (char) ('0' + uval % 10);
972 uval /= 10;
973#if defined(HAVE_LOCALE_H)
974 if (quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
975 if (uval) /* only add if more digits coming */
976 *--cp = loc.thousands_sep[0]; /* XXX - assumption it's one char */
977 if (loc.grouping[ii+1] == 0)
978 jj = 0; /* keep using current val in loc.grouping[ii] */
979 else if (loc.grouping[ii+1] == CHAR_MAX)
980 quote_flag = FALSE;
981 else {
982 ii++;
983 jj = 0;
984 }
985 }
986#endif
987 } while (uval > 0);
988
989 /* add more output digits to match the precision */
990 if (have_prec) {
991 while (cend - cp < prec)
992 *--cp = '0';
993 }
994
995 if (sgn)
996 *--cp = '-';
997 else if (signchar)
998 *--cp = signchar;
999 /*
1000 * When to fill with zeroes is of course not simple.
1001 * First: No zero fill if left-justifying.
1002 * Next: There seem to be two cases:
1003 * A '0' without a precision, e.g. %06d
1004 * A precision with no field width, e.g. %.10d
1005 * Any other case, we don't want to fill with zeroes.
1006 */
1007 if (! lj
1008 && ((zero_flag && ! have_prec)
1009 || (fw == 0 && have_prec)))
1010 fill = zero_string;
1011 if (prec > fw)
1012 fw = prec;
1013 prec = cend - cp;
1014 if (fw > prec && ! lj && fill != sp
1015 && (*cp == '-' || signchar)) {
1016 bchunk_one(cp);
1017 cp++;
1018 prec--;
1019 fw--;
1020 }
1021 goto pr_tail;
1022 case 'X':
1023 chbuf = Uchbuf; /* FALL THROUGH */
1024 case 'x':
1025 base += 6; /* FALL THROUGH */
1026 case 'u':
1027 base += 2; /* FALL THROUGH */
1028 case 'o':
1029 base += 8;
1030 need_format = FALSE;
1031 parse_next_arg();
1032 tmpval = force_number(arg);
1033
1034 /*
1035 * ``The result of converting a zero value with a
1036 * precision of zero is no characters.''
1037 *
1038 * If I remember the ANSI C standard, though,
1039 * it says that for octal conversions
1040 * the precision is artificially increased
1041 * to add an extra 0 if # is supplied.
1042 * Indeed, in C,
1043 * printf("%#.0o\n", 0);
1044 * prints a single 0.
1045 */
1046 if (! alt && have_prec && prec == 0 && tmpval == 0)
1047 goto pr_tail;
1048
1049 if (tmpval < 0) {
1050 if (tmpval < INTMAX_MIN)
1051 goto out_of_range;
1052 uval = (uintmax_t) (intmax_t) tmpval;
1053 } else {
1054 /* Use !, so that NaNs are out of range. */
1055 if (! (tmpval <= UINTMAX_MAX))
1056 goto out_of_range;
1057 uval = (uintmax_t) tmpval;
1058 }
1059 /*
1060 * When to fill with zeroes is of course not simple.
1061 * First: No zero fill if left-justifying.
1062 * Next: There seem to be two cases:
1063 * A '0' without a precision, e.g. %06d
1064 * A precision with no field width, e.g. %.10d
1065 * Any other case, we don't want to fill with zeroes.
1066 */
1067 if (! lj
1068 && ((zero_flag && ! have_prec)
1069 || (fw == 0 && have_prec)))
1070 fill = zero_string;
1071
1072 ii = jj = 0;
1073 do {
1074 *--cp = chbuf[uval % base];
1075 uval /= base;
1076#if defined(HAVE_LOCALE_H)
1077 if (base == 10 && quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
1078 if (uval) /* only add if more digits coming */
1079 *--cp = loc.thousands_sep[0]; /* XXX --- assumption it's one char */
1080 if (loc.grouping[ii+1] == 0)
1081 jj = 0; /* keep using current val in loc.grouping[ii] */
1082 else if (loc.grouping[ii+1] == CHAR_MAX)
1083 quote_flag = FALSE;
1084 else {
1085 ii++;
1086 jj = 0;
1087 }
1088 }
1089#endif
1090 } while (uval > 0);
1091
1092 /* add more output digits to match the precision */
1093 if (have_prec) {
1094 while (cend - cp < prec)
1095 *--cp = '0';
1096 }
1097
1098 if (alt && tmpval != 0) {
1099 if (base == 16) {
1100 *--cp = cs1;
1101 *--cp = '0';
1102 if (fill != sp) {
1103 bchunk(cp, 2);
1104 cp += 2;
1105 fw -= 2;
1106 }
1107 } else if (base == 8)
1108 *--cp = '0';
1109 }
1110 base = 0;
1111 if (prec > fw)
1112 fw = prec;
1113 prec = cend - cp;
1114 pr_tail:
1115 if (! lj) {
1116 while (fw > prec) {
1117 bchunk_one(fill);
1118 fw--;
1119 }
1120 }
1121 bchunk(cp, (int) prec);
1122 while (fw > prec) {
1123 bchunk_one(fill);
1124 fw--;
1125 }
1126 s0 = s1;
1127 break;
1128
1129 out_of_range:
1130 /* out of range - emergency use of %g format */
1131 if (do_lint)
1132 lintwarn(_("[s]printf: value %g is out of range for `%%%c' format"),
1133 tmpval, cs1);
1134 cs1 = 'g';
1135 goto format_float;
1136
1137 case 'F':
1138#if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
1139 cs1 = 'f';
1140 /* FALL THROUGH */
1141#endif
1142 case 'g':
1143 case 'G':
1144 case 'e':
1145 case 'f':
1146 case 'E':
1147 need_format = FALSE;
1148 parse_next_arg();
1149 tmpval = force_number(arg);
1150 format_float:
1151 if (! have_prec)
1152 prec = DEFAULT_G_PRECISION;
1153 chksize(fw + prec + 9); /* 9 == slop */
1154#ifdef VAXCRTL
1155 /* pre-ANSI library doesn't handle '0' flag
1156 correctly in many cases; reject it */
1157 if (zero_flag
1158 && (lj || (signchar && signchar != '+')))
1159 zero_flag = FALSE;
1160#endif
1161 cp = cpbuf;
1162 *cp++ = '%';
1163 if (lj)
1164 *cp++ = '-';
1165 if (signchar)
1166 *cp++ = signchar;
1167 if (alt)
1168 *cp++ = '#';
1169 if (zero_flag)
1170 *cp++ = '0';
1171 if (quote_flag)
1172 *cp++ = '\'';
1173 strcpy(cp, "*.*");
1174 cp += 3;
1175 *cp++ = cs1;
1176 *cp = '\0';
1177#ifndef GFMT_WORKAROUND
1178 (void) sprintf(obufout, cpbuf,
1179 (int) fw, (int) prec, (double) tmpval);
1180#else /* GFMT_WORKAROUND */
1181 if (cs1 == 'g' || cs1 == 'G')
1182 sgfmt(obufout, cpbuf, (int) alt,
1183 (int) fw, (int) prec, (double) tmpval);
1184 else
1185 (void) sprintf(obufout, cpbuf,
1186 (int) fw, (int) prec, (double) tmpval);
1187#endif /* GFMT_WORKAROUND */
1188 len = strlen(obufout);
1189 ofre -= len;
1190 obufout += len;
1191 s0 = s1;
1192 break;
1193 default:
1194 break;
1195 }
1196 if (toofew)
1197 fatal("%s\n\t`%s'\n\t%*s%s",
1198 _("not enough arguments to satisfy format string"),
1199 fmt_string, (int) (s1 - fmt_string - 1), "",
1200 _("^ ran out for this one"));
1201 }
1202 if (do_lint) {
1203 if (need_format)
1204 lintwarn(
1205 _("[s]printf: format specifier does not have control letter"));
1206 if (cur_arg < num_args)
1207 lintwarn(
1208 _("too many arguments supplied for format string"));
1209 }
1210 bchunk(s0, s1 - s0);
1211 r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED);
1212 r->flags |= TEMP;
1213
1214 for (i = 1; i < num_args; i++) {
1215 unref(the_args[i]);
1216 }
1217
1218 if (call_level-- > 1) {
1219 free(the_args);
1220 the_args = save_args;
1221 args_size = save_args_size;
1222 }
1223
1224 return r;
1225}
1226
1227/* do_sprintf --- perform sprintf */
1228
1229NODE *
1230do_sprintf(NODE *tree)
1231{
1232 NODE *r;
1233 NODE *sfmt = force_string(tree_eval(tree->lnode));
1234
1235 r = format_tree(sfmt->stptr, sfmt->stlen, tree->rnode, tree->printf_count);
1236 free_temp(sfmt);
1237 return r;
1238}
1239
1240/*
1241 * redirect_to_fp --- return fp for redirection, NULL on failure
1242 * or stdout if no redirection, used by all print routines
1243 */
1244
1245static inline FILE *
1246redirect_to_fp(NODE *tree, struct redirect **rpp)
1247{
1248 int errflg; /* not used, sigh */
1249 struct redirect *rp;
1250
1251 if (tree == NULL)
1252 return stdout;
1253
1254 rp = redirect(tree, &errflg);
1255 if (rp != NULL) {
1256 *rpp = rp;
1257 return rp->fp;
1258 }
1259
1260 return NULL;
1261}
1262
1263/* do_printf --- perform printf, including redirection */
1264
1265void
1266do_printf(NODE *tree)
1267{
1268 struct redirect *rp = NULL;
1269 register FILE *fp;
1270
1271 if (tree->lnode == NULL) {
1272 if (do_traditional) {
1273 if (do_lint)
1274 lintwarn(_("printf: no arguments"));
1275 return; /* bwk accepts it silently */
1276 }
1277 fatal(_("printf: no arguments"));
1278 }
1279
1280 fp = redirect_to_fp(tree->rnode, & rp);
1281 if (fp == NULL)
1282 return;
1283 tree->lnode->printf_count = tree->printf_count;
1284 tree = do_sprintf(tree->lnode);
1285 efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp, TRUE);
1286 if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
1287 fflush(rp->fp);
1288 free_temp(tree);
1289}
1290
1291/* do_sqrt --- do the sqrt function */
1292
1293NODE *
1294do_sqrt(NODE *tree)
1295{
1296 NODE *tmp;
1297 double arg;
1298
1299 tmp = tree_eval(tree->lnode);
1300 if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
1301 lintwarn(_("sqrt: received non-numeric argument"));
1302 arg = (double) force_number(tmp);
1303 free_temp(tmp);
1304 if (arg < 0.0)
1305 warning(_("sqrt: called with negative argument %g"), arg);
1306 return tmp_number((AWKNUM) sqrt(arg));
1307}
1308
1309/* do_substr --- do the substr function */
1310
1311NODE *
1312do_substr(NODE *tree)
1313{
1314 NODE *t1, *t2, *t3;
1315 NODE *r;
1316 register size_t indx;
1317 size_t length;
1318 double d_index, d_length;
1319 size_t src_len;
1320
1321 t1 = force_string(tree_eval(tree->lnode));
1322 t2 = tree_eval(tree->rnode->lnode);
1323 d_index = force_number(t2);
1324 free_temp(t2);
1325
1326 /* the weird `! (foo)' tests help catch NaN values. */
1327 if (! (d_index >= 1)) {
1328 if (do_lint)
1329 lintwarn(_("substr: start index %g is invalid, using 1"),
1330 d_index);
1331 d_index = 1;
1332 }
1333 if (do_lint && double_to_int(d_index) != d_index)
1334 lintwarn(_("substr: non-integer start index %g will be truncated"),
1335 d_index);
1336
1337 /* awk indices are from 1, C's are from 0 */
1338 if (d_index <= SIZE_MAX)
1339 indx = d_index - 1;
1340 else
1341 indx = SIZE_MAX;
1342
1343 if (tree->rnode->rnode == NULL) { /* third arg. missing */
1344 /* use remainder of string */
1345 length = t1->stlen - indx;
1346 d_length = length; /* set here in case used in diagnostics, below */
1347 } else {
1348 t3 = tree_eval(tree->rnode->rnode->lnode);
1349 d_length = force_number(t3);
1350 free_temp(t3);
1351 if (! (d_length >= 1)) {
1352 if (do_lint == LINT_ALL)
1353 lintwarn(_("substr: length %g is not >= 1"), d_length);
1354 else if (do_lint == LINT_INVALID && ! (d_length >= 0))
1355 lintwarn(_("substr: length %g is not >= 0"), d_length);
1356 free_temp(t1);
1357 return Nnull_string;
1358 }
1359 if (do_lint) {
1360 if (double_to_int(d_length) != d_length)
1361 lintwarn(
1362 _("substr: non-integer length %g will be truncated"),
1363 d_length);
1364
1365 if (d_length > SIZE_MAX)
1366 lintwarn(
1367 _("substr: length %g too big for string indexing, truncating to %g"),
1368 d_length, (double) SIZE_MAX);
1369 }
1370 if (d_length < SIZE_MAX)
1371 length = d_length;
1372 else
1373 length = SIZE_MAX;
1374 }
1375
1376 if (t1->stlen == 0) {
1377 /* substr("", 1, 0) produces a warning only if LINT_ALL */
1378 if (do_lint && (do_lint == LINT_ALL || ((indx | length) != 0)))
1379 lintwarn(_("substr: source string is zero length"));
1380 free_temp(t1);
1381 return Nnull_string;
1382 }
1383
1384 /* get total len of input string, for following checks */
1385#ifdef MBS_SUPPORT
1386 if (gawk_mb_cur_max > 1) {
1387 t1 = force_wstring(t1);
1388 src_len = t1->wstlen;
1389 } else
1390#endif
1391 src_len = t1->stlen;
1392
1393 if (indx >= src_len) {
1394 if (do_lint)
1395 lintwarn(_("substr: start index %g is past end of string"),
1396 d_index);
1397 free_temp(t1);
1398 return Nnull_string;
1399 }
1400 if (length > src_len - indx) {
1401 if (do_lint)
1402 lintwarn(
1403 _("substr: length %g at start index %g exceeds length of first argument (%lu)"),
1404 d_length, d_index, (unsigned long int) src_len);
1405 length = src_len - indx;
1406 }
1407
1408#ifdef MBS_SUPPORT
1409 if (gawk_mb_cur_max > 1) {
1410 /* multibyte case, more work */
1411 size_t result;
1412 wchar_t *wp;
1413 mbstate_t mbs;
1414 char *substr, *cp;
1415
1416 /* force_wstring() already called */
1417
1418 if (t1->stlen == t1->wstlen)
1419 goto single_byte_case;
1420
1421 /*
1422 * Convert the wide chars in t1->wstptr back into m.b. chars.
1423 * This is pretty grotty, but it's the most straightforward
1424 * way to do things.
1425 */
1426 memset(& mbs, 0, sizeof(mbs));
1427 emalloc(substr, char *, (length * gawk_mb_cur_max) + 2, "do_substr");
1428 wp = t1->wstptr + indx;
1429 for (cp = substr; length > 0; length--) {
1430 result = wcrtomb(cp, *wp, & mbs);
1431 if (result == (size_t) -1) /* what to do? break seems best */
1432 break;
1433 cp += result;
1434 wp++;
1435 }
1436 *cp = '\0';
1437 r = make_str_node(substr, cp - substr, ALREADY_MALLOCED);
1438 r->flags |= TEMP;
1439 } else {
1440 /* single byte case, easy */
1441single_byte_case:
1442 r = tmp_string(t1->stptr + indx, length);
1443 }
1444#else
1445 r = tmp_string(t1->stptr + indx, length);
1446#endif
1447
1448 free_temp(t1);
1449 return r;
1450}
1451
1452/* do_strftime --- format a time stamp */
1453
1454NODE *
1455do_strftime(NODE *tree)
1456{
1457 NODE *t1, *t2, *ret;
1458 struct tm *tm;
1459 time_t fclock;
1460 char *bufp;
1461 size_t buflen, bufsize;
1462 char buf[BUFSIZ];
1463 /* FIXME: One day make %d be %e, after C 99 is common. */
1464 static const char def_format[] = "%a %b %d %H:%M:%S %Z %Y";
1465 const char *format;
1466 int formatlen;
1467
1468 /* set defaults first */
1469 format = def_format; /* traditional date format */
1470 formatlen = strlen(format);
1471 (void) time(&fclock); /* current time of day */
1472
1473 t1 = t2 = NULL;
1474 if (tree != NULL) { /* have args */
1475 if (tree->lnode != NULL) {
1476 NODE *tmp = tree_eval(tree->lnode);
1477 if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0)
1478 lintwarn(_("strftime: received non-string first argument"));
1479 t1 = force_string(tmp);
1480 format = t1->stptr;
1481 formatlen = t1->stlen;
1482 if (formatlen == 0) {
1483 if (do_lint)
1484 lintwarn(_("strftime: received empty format string"));
1485 free_temp(t1);
1486 return tmp_string("", 0);
1487 }
1488 }
1489
1490 if (tree->rnode != NULL) {
1491 t2 = tree_eval(tree->rnode->lnode);
1492 if (do_lint && (t2->flags & (NUMCUR|NUMBER)) == 0)
1493 lintwarn(_("strftime: received non-numeric second argument"));
1494 fclock = (time_t) force_number(t2);
1495 free_temp(t2);
1496 }
1497 }
1498
1499 tm = localtime(&fclock);
1500
1501 bufp = buf;
1502 bufsize = sizeof(buf);
1503 for (;;) {
1504 *bufp = '\0';
1505 buflen = strftime(bufp, bufsize, format, tm);
1506 /*
1507 * buflen can be zero EITHER because there's not enough
1508 * room in the string, or because the control command
1509 * goes to the empty string. Make a reasonable guess that
1510 * if the buffer is 1024 times bigger than the length of the
1511 * format string, it's not failing for lack of room.
1512 * Thanks to Paul Eggert for pointing out this issue.
1513 */
1514 if (buflen > 0 || bufsize >= 1024 * formatlen)
1515 break;
1516 bufsize *= 2;
1517 if (bufp == buf)
1518 emalloc(bufp, char *, bufsize, "do_strftime");
1519 else
1520 erealloc(bufp, char *, bufsize, "do_strftime");
1521 }
1522 ret = tmp_string(bufp, buflen);
1523 if (bufp != buf)
1524 free(bufp);
1525 if (t1)
1526 free_temp(t1);
1527 return ret;
1528}
1529
1530/* do_systime --- get the time of day */
1531
1532NODE *
1533do_systime(NODE *tree ATTRIBUTE_UNUSED)
1534{
1535 time_t lclock;
1536
1537 (void) time(&lclock);
1538 return tmp_number((AWKNUM) lclock);
1539}
1540
1541/* do_mktime --- turn a time string into a timestamp */
1542
1543NODE *
1544do_mktime(NODE *tree)
1545{
1546 NODE *t1;
1547 struct tm then;
1548 long year;
1549 int month, day, hour, minute, second, count;
1550 int dst = -1; /* default is unknown */
1551 time_t then_stamp;
1552 char save;
1553
1554 t1 = tree_eval(tree->lnode);
1555 if (do_lint && (t1->flags & (STRING|STRCUR)) == 0)
1556 lintwarn(_("mktime: received non-string argument"));
1557 t1 = force_string(t1);
1558
1559 save = t1->stptr[t1->stlen];
1560 t1->stptr[t1->stlen] = '\0';
1561
1562 count = sscanf(t1->stptr, "%ld %d %d %d %d %d %d",
1563 & year, & month, & day,
1564 & hour, & minute, & second,
1565 & dst);
1566
1567 t1->stptr[t1->stlen] = save;
1568 free_temp(t1);
1569
1570 if (count < 6
1571 || month < month - 1
1572 || year < year - 1900 || year - 1900 != (int) (year - 1900))
1573 return tmp_number((AWKNUM) -1);
1574
1575 memset(& then, '\0', sizeof(then));
1576 then.tm_sec = second;
1577 then.tm_min = minute;
1578 then.tm_hour = hour;
1579 then.tm_mday = day;
1580 then.tm_mon = month - 1;
1581 then.tm_year = year - 1900;
1582 then.tm_isdst = dst;
1583
1584 then_stamp = mktime(& then);
1585 return tmp_number((AWKNUM) then_stamp);
1586}
1587
1588/* do_system --- run an external command */
1589
1590NODE *
1591do_system(NODE *tree)
1592{
1593 NODE *tmp;
1594 int ret = 0;
1595 char *cmd;
1596 char save;
1597
1598 (void) flush_io(); /* so output is synchronous with gawk's */
1599 tmp = tree_eval(tree->lnode);
1600 if (do_lint && (tmp->flags & (STRING|STRCUR)) == 0)
1601 lintwarn(_("system: received non-string argument"));
1602 cmd = force_string(tmp)->stptr;
1603
1604 if (cmd && *cmd) {
1605 /* insure arg to system is zero-terminated */
1606
1607 /*
1608 * From: David Trueman <david@cs.dal.ca>
1609 * To: arnold@cc.gatech.edu (Arnold Robbins)
1610 * Date: Wed, 3 Nov 1993 12:49:41 -0400
1611 *
1612 * It may not be necessary to save the character, but
1613 * I'm not sure. It would normally be the field
1614 * separator. If the parse has not yet gone beyond
1615 * that, it could mess up (although I doubt it). If
1616 * FIELDWIDTHS is being used, it might be the first
1617 * character of the next field. Unless someone wants
1618 * to check it out exhaustively, I suggest saving it
1619 * for now...
1620 */
1621 save = cmd[tmp->stlen];
1622 cmd[tmp->stlen] = '\0';
1623
1624 os_restore_mode(fileno(stdin));
1625 ret = system(cmd);
1626 if (ret != -1)
1627 ret = WEXITSTATUS(ret);
1628 if ((BINMODE & 1) != 0)
1629 os_setbinmode(fileno(stdin), O_BINARY);
1630
1631 cmd[tmp->stlen] = save;
1632 }
1633 free_temp(tmp);
1634 return tmp_number((AWKNUM) ret);
1635}
1636
1637extern NODE **fmt_list; /* declared in eval.c */
1638
1639/* do_print --- print items, separated by OFS, terminated with ORS */
1640
1641void
1642do_print(register NODE *tree)
1643{
1644 register NODE **t;
1645 struct redirect *rp = NULL;
1646 register FILE *fp;
1647 int numnodes, i;
1648 NODE *save;
1649 NODE *tval;
1650
1651 fp = redirect_to_fp(tree->rnode, & rp);
1652 if (fp == NULL)
1653 return;
1654
1655 /*
1656 * General idea is to evaluate all the expressions first and
1657 * then print them, otherwise you get suprising behavior.
1658 * See test/prtoeval.awk for an example program.
1659 */
1660 save = tree = tree->lnode;
1661 for (numnodes = 0; tree != NULL; tree = tree->rnode)
1662 numnodes++;
1663 emalloc(t, NODE **, numnodes * sizeof(NODE *), "do_print");
1664
1665 tree = save;
1666 for (i = 0; tree != NULL; i++, tree = tree->rnode) {
1667 NODE *n;
1668
1669 /* Here lies the wumpus. R.I.P. */
1670 n = tree_eval(tree->lnode);
1671 t[i] = dupnode(n);
1672 free_temp(n);
1673
1674 if ((t[i]->flags & (NUMBER|STRING)) == NUMBER) {
1675 if (OFMTidx == CONVFMTidx)
1676 (void) force_string(t[i]);
1677 else {
1678 tval = tmp_number(t[i]->numbr);
1679 unref(t[i]);
1680 t[i] = format_val(OFMT, OFMTidx, tval);
1681 }
1682 }
1683 }
1684
1685 for (i = 0; i < numnodes; i++) {
1686 efwrite(t[i]->stptr, sizeof(char), t[i]->stlen, fp, "print", rp, FALSE);
1687 unref(t[i]);
1688
1689 if (i != numnodes - 1 && OFSlen > 0)
1690 efwrite(OFS, sizeof(char), (size_t) OFSlen,
1691 fp, "print", rp, FALSE);
1692
1693 }
1694 if (ORSlen > 0)
1695 efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE);
1696
1697 if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
1698 fflush(rp->fp);
1699
1700 free(t);
1701}
1702
1703/* do_print_rec --- special case printing of $0, for speed */
1704
1705void
1706do_print_rec(register NODE *tree)
1707{
1708 struct redirect *rp = NULL;
1709 register FILE *fp;
1710 register NODE *f0;
1711
1712 fp = redirect_to_fp(tree->rnode, & rp);
1713 if (fp == NULL)
1714 return;
1715
1716 if (! field0_valid)
1717 (void) get_field(0L, NULL); /* rebuild record */
1718
1719 f0 = fields_arr[0];
1720
1721 if (do_lint && f0 == Nnull_string)
1722 lintwarn(_("reference to uninitialized field `$%d'"), 0);
1723
1724 efwrite(f0->stptr, sizeof(char), f0->stlen, fp, "print", rp, FALSE);
1725
1726 if (ORSlen > 0)
1727 efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE);
1728
1729 if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
1730 fflush(rp->fp);
1731}
1732
1733#ifdef MBS_SUPPORT
1734/* wide_tolower_toupper --- lower- or uppercase a multibute string */
1735
1736typedef int (*isw_func)(wint_t);
1737typedef wint_t (*tow_func)(wint_t);
1738
1739static NODE *
1740wide_tolower_toupper(NODE *t1, isw_func iswu, tow_func towl)
1741{
1742 register unsigned char *cp, *cpe;
1743 register unsigned char *cp2;
1744 size_t mbclen;
1745 mbstate_t mbs, prev_mbs;
1746 wchar_t wc;
1747 NODE *t2;
1748 /*
1749 * Since the lowercase char and its uppercase equivalent may occupy
1750 * different number of bytes (Turkish `i'), we cannot say the length
1751 * of the output string.
1752 * This approach is adapted from format_tree().
1753 */
1754 unsigned char *obuf;
1755 size_t osiz, ofre;
1756
1757 /*
1758 * Better 2 spare bytes than 1, consistently with make_str_node().
1759 * And we need gawk_mb_cur_max free bytes before we convert the last
1760 * char, so we add (gawk_mb_cur_max - 1).
1761 */
1762 osiz = t1->stlen + 2 + (gawk_mb_cur_max - 1);
1763 ofre = osiz - 2;
1764 emalloc(obuf, char *, osiz, "wide_tolower_toupper");
1765
1766 memset(&mbs, 0, sizeof(mbstate_t));
1767 cp = (unsigned char *)t1->stptr;
1768 cpe = (unsigned char *)(t1->stptr + t1->stlen);
1769 cp2 = obuf;
1770 while (cp < cpe) {
1771 if (ofre < gawk_mb_cur_max) {
1772 size_t olen = cp2 - obuf;
1773 ofre += osiz;
1774 osiz *= 2;
1775 erealloc(obuf, char *, osiz, "wide_tolower_toupper");
1776 cp2 = obuf + olen;
1777 }
1778 prev_mbs = mbs;
1779 mbclen = (size_t) mbrtowc(&wc, (char *) cp, cpe - cp,
1780 &mbs);
1781 if (mbclen == 0 || mbclen == (size_t) -1 || mbclen == (size_t) -2) {
1782 /* Null wide char, or a problem appeared. */
1783 *cp2++ = *cp++;
1784 ofre--;
1785 continue;
1786 }
1787
1788 /* If the character doesn't need change, copy it. */
1789 if (!(*iswu)(wc)) {
1790 ofre -= mbclen;
1791 while (mbclen--)
1792 *cp2++ = *cp++;
1793 continue;
1794 }
1795
1796 /* Increment the input pointer. */
1797 cp += mbclen;
1798
1799 /* Write the modified wide character. */
1800 mbclen = wcrtomb((char *) cp2, (*towl)(wc), &prev_mbs);
1801
1802 if (mbclen > 0 && mbclen < (size_t) -2) {
1803 /* Increment the output pointer. */
1804 cp2 += mbclen;
1805 ofre -= mbclen;
1806 } else {
1807 /* A problem appeared. */
1808 cp2++;
1809 ofre--;
1810 }
1811 }
1812 t2 = make_str_node(obuf, cp2 - obuf, ALREADY_MALLOCED);
1813 t2->flags |= TEMP;
1814 return t2;
1815}
1816#endif
1817
1818/* do_tolower --- lower case a string */
1819
1820NODE *
1821do_tolower(NODE *tree)
1822{
1823 NODE *t1, *t2;
1824
1825 t1 = tree_eval(tree->lnode);
1826 if (do_lint && (t1->flags & (STRING|STRCUR)) == 0)
1827 lintwarn(_("tolower: received non-string argument"));
1828 t1 = force_string(t1);
1829
1830#ifdef MBS_SUPPORT
1831 if (gawk_mb_cur_max > 1)
1832 t2 = wide_tolower_toupper(t1, iswupper, towlower);
1833 else
1834#endif
1835 {
1836 register unsigned char *cp, *cpe;
1837
1838 t2 = tmp_string(t1->stptr, t1->stlen);
1839 for (cp = (unsigned char *)t2->stptr,
1840 cpe = (unsigned char *)(t2->stptr + t2->stlen); cp < cpe; cp++)
1841 if (ISUPPER(*cp))
1842 *cp = TOLOWER(*cp);
1843 }
1844 free_temp(t1);
1845 return t2;
1846}
1847
1848/* do_toupper --- upper case a string */
1849
1850NODE *
1851do_toupper(NODE *tree)
1852{
1853 NODE *t1, *t2;
1854
1855 t1 = tree_eval(tree->lnode);
1856 if (do_lint && (t1->flags & (STRING|STRCUR)) == 0)
1857 lintwarn(_("toupper: received non-string argument"));
1858 t1 = force_string(t1);
1859
1860#ifdef MBS_SUPPORT
1861 if (gawk_mb_cur_max > 1)
1862 t2 = wide_tolower_toupper(t1, iswlower, towupper);
1863 else
1864#endif
1865 {
1866 register unsigned char *cp, *cpe;
1867
1868 t2 = tmp_string(t1->stptr, t1->stlen);
1869 for (cp = (unsigned char *)t2->stptr,
1870 cpe = (unsigned char *)(t2->stptr + t2->stlen); cp < cpe; cp++)
1871 if (ISLOWER(*cp))
1872 *cp = TOUPPER(*cp);
1873 }
1874 free_temp(t1);
1875 return t2;
1876}
1877
1878/* do_atan2 --- do the atan2 function */
1879
1880NODE *
1881do_atan2(NODE *tree)
1882{
1883 NODE *t1, *t2;
1884 double d1, d2;
1885
1886 t1 = tree_eval(tree->lnode);
1887 t2 = tree_eval(tree->rnode->lnode);
1888 if (do_lint) {
1889 if ((t1->flags & (NUMCUR|NUMBER)) == 0)
1890 lintwarn(_("atan2: received non-numeric first argument"));
1891 if ((t2->flags & (NUMCUR|NUMBER)) == 0)
1892 lintwarn(_("atan2: received non-numeric second argument"));
1893 }
1894 d1 = force_number(t1);
1895 d2 = force_number(t2);
1896 free_temp(t1);
1897 free_temp(t2);
1898 return tmp_number((AWKNUM) atan2(d1, d2));
1899}
1900
1901/* do_sin --- do the sin function */
1902
1903NODE *
1904do_sin(NODE *tree)
1905{
1906 NODE *tmp;
1907 double d;
1908
1909 tmp = tree_eval(tree->lnode);
1910 if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
1911 lintwarn(_("sin: received non-numeric argument"));
1912 d = sin((double) force_number(tmp));
1913 free_temp(tmp);
1914 return tmp_number((AWKNUM) d);
1915}
1916
1917/* do_cos --- do the cos function */
1918
1919NODE *
1920do_cos(NODE *tree)
1921{
1922 NODE *tmp;
1923 double d;
1924
1925 tmp = tree_eval(tree->lnode);
1926 if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
1927 lintwarn(_("cos: received non-numeric argument"));
1928 d = cos((double) force_number(tmp));
1929 free_temp(tmp);
1930 return tmp_number((AWKNUM) d);
1931}
1932
1933/* do_rand --- do the rand function */
1934
1935static int firstrand = TRUE;
1936static char state[256];
1937
1938/* ARGSUSED */
1939NODE *
1940do_rand(NODE *tree ATTRIBUTE_UNUSED)
1941{
1942 if (firstrand) {
1943 (void) initstate((unsigned) 1, state, sizeof state);
1944 /* don't need to srandom(1), initstate() does it for us. */
1945 firstrand = FALSE;
1946 setstate(state);
1947 }
1948 /*
1949 * Per historical practice and POSIX, return value N is
1950 *
1951 * 0 <= n < 1
1952 */
1953 return tmp_number((AWKNUM) (random() % GAWK_RANDOM_MAX) / GAWK_RANDOM_MAX);
1954}
1955
1956/* do_srand --- seed the random number generator */
1957
1958NODE *
1959do_srand(NODE *tree)
1960{
1961 NODE *tmp;
1962 static long save_seed = 1;
1963 long ret = save_seed; /* SVR4 awk srand returns previous seed */
1964
1965 if (firstrand) {
1966 (void) initstate((unsigned) 1, state, sizeof state);
1967 /* don't need to srandom(1), we're changing the seed below */
1968 firstrand = FALSE;
1969 (void) setstate(state);
1970 }
1971
1972 if (tree == NULL)
1973 srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
1974 else {
1975 tmp = tree_eval(tree->lnode);
1976 if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
1977 lintwarn(_("srand: received non-numeric argument"));
1978 srandom((unsigned int) (save_seed = (long) force_number(tmp)));
1979 free_temp(tmp);
1980 }
1981 return tmp_number((AWKNUM) ret);
1982}
1983
1984/* do_match --- match a regexp, set RSTART and RLENGTH,
1985 * optional third arg is array filled with text of
1986 * subpatterns enclosed in parens and start and len info.
1987 */
1988
1989NODE *
1990do_match(NODE *tree)
1991{
1992 NODE *t1, *dest, *it;
1993 int rstart, len, ii;
1994 int rlength;
1995 Regexp *rp;
1996 regoff_t s;
1997 char *start;
1998 char *buf = NULL;
1999 char buff[100];
2000 size_t amt, oldamt = 0, ilen, slen;
2001 char *subsepstr;
2002 size_t subseplen;
2003
2004 t1 = force_string(tree_eval(tree->lnode));
2005 tree = tree->rnode;
2006 rp = re_update(tree->lnode);
2007
2008 dest = NULL;
2009 if (tree->rnode != NULL) { /* 3rd optional arg for the subpatterns */
2010 dest = get_param(tree->rnode->lnode);
2011 if (dest->type != Node_var_array)
2012 fatal(_("match: third argument is not an array"));
2013
2014 assoc_clear(dest);
2015 }
2016
2017 rstart = research(rp, t1->stptr, 0, t1->stlen, RE_NEED_START);
2018 if (rstart >= 0) { /* match succeded */
2019 size_t *wc_indices = NULL;
2020
2021 rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr); /* byte length */
2022#ifdef MBS_SUPPORT
2023 if (gawk_mb_cur_max > 1) {
2024 t1 = str2wstr(t1, & wc_indices);
2025 rlength = wc_indices[rstart + rlength - 1] - wc_indices[rstart] + 1;
2026 rstart = wc_indices[rstart];
2027 }
2028#endif
2029 rstart++; /* now it's 1-based indexing */
2030
2031 /* Build the array only if the caller wants the optional subpatterns */
2032 if (dest != NULL) {
2033 subsepstr = SUBSEP_node->var_value->stptr;
2034 subseplen = SUBSEP_node->var_value->stlen;
2035
2036 for (ii = 0; ii < NUMSUBPATS(rp, t1->stptr); ii++) {
2037 /*
2038 * Loop over all the subpats; some of them may have
2039 * matched even if all of them did not.
2040 */
2041 if ((s = SUBPATSTART(rp, t1->stptr, ii)) != -1) {
2042 size_t subpat_start;
2043 size_t subpat_len;
2044
2045 start = t1->stptr + s;
2046 subpat_start = s;
2047 subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s;
2048#ifdef MBS_SUPPORT
2049 if (gawk_mb_cur_max > 1) {
2050 subpat_start = wc_indices[s];
2051 subpat_len = wc_indices[s + len - 1] - subpat_start + 1;
2052 }
2053#endif
2054
2055 it = make_string(start, len);
2056 /*
2057 * assoc_lookup() does free_temp() on 2nd arg.
2058 */
2059 *assoc_lookup(dest, tmp_number((AWKNUM) (ii)), FALSE) = it;
2060
2061 sprintf(buff, "%d", ii);
2062 ilen = strlen(buff);
2063 amt = ilen + subseplen + strlen("length") + 2;
2064
2065 if (oldamt == 0) {
2066 emalloc(buf, char *, amt, "do_match");
2067 } else if (amt > oldamt) {
2068 erealloc(buf, char *, amt, "do_match");
2069 }
2070 oldamt = amt;
2071 memcpy(buf, buff, ilen);
2072 memcpy(buf + ilen, subsepstr, subseplen);
2073 memcpy(buf + ilen + subseplen, "start", 6);
2074
2075 slen = ilen + subseplen + 5;
2076
2077 it = make_number((AWKNUM) subpat_start + 1);
2078 *assoc_lookup(dest, tmp_string(buf, slen), FALSE) = it;
2079
2080 memcpy(buf, buff, ilen);
2081 memcpy(buf + ilen, subsepstr, subseplen);
2082 memcpy(buf + ilen + subseplen, "length", 7);
2083
2084 slen = ilen + subseplen + 6;
2085
2086 it = make_number((AWKNUM) subpat_len);
2087 *assoc_lookup(dest, tmp_string(buf, slen), FALSE) = it;
2088 }
2089 }
2090
2091 free(buf);
2092 if (wc_indices != NULL)
2093 free(wc_indices);
2094 }
2095 } else { /* match failed */
2096 rstart = 0;
2097 rlength = -1;
2098 }
2099 free_temp(t1);
2100 unref(RSTART_node->var_value);
2101 RSTART_node->var_value = make_number((AWKNUM) rstart);
2102 unref(RLENGTH_node->var_value);
2103 RLENGTH_node->var_value = make_number((AWKNUM) rlength);
2104 return tmp_number((AWKNUM) rstart);
2105}
2106
2107/* sub_common --- the common code (does the work) for sub, gsub, and gensub */
2108
2109/*
2110 * Gsub can be tricksy; particularly when handling the case of null strings.
2111 * The following awk code was useful in debugging problems. It is too bad
2112 * that it does not readily translate directly into the C code, below.
2113 *
2114 * #! /usr/local/bin/mawk -f
2115 *
2116 * BEGIN {
2117 * TRUE = 1; FALSE = 0
2118 * print "--->", mygsub("abc", "b+", "FOO")
2119 * print "--->", mygsub("abc", "x*", "X")
2120 * print "--->", mygsub("abc", "b*", "X")
2121 * print "--->", mygsub("abc", "c", "X")
2122 * print "--->", mygsub("abc", "c+", "X")
2123 * print "--->", mygsub("abc", "x*$", "X")
2124 * }
2125 *
2126 * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag)
2127 * {
2128 * origstr = str;
2129 * eosflag = nonzeroflag = FALSE
2130 * while (match(str, regex)) {
2131 * if (RLENGTH > 0) { # easy case
2132 * nonzeroflag = TRUE
2133 * if (RSTART == 1) { # match at front of string
2134 * newstr = newstr replace
2135 * } else {
2136 * newstr = newstr substr(str, 1, RSTART-1) replace
2137 * }
2138 * str = substr(str, RSTART+RLENGTH)
2139 * } else if (nonzeroflag) {
2140 * # last match was non-zero in length, and at the
2141 * # current character, we get a zero length match,
2142 * # which we don't really want, so skip over it
2143 * newstr = newstr substr(str, 1, 1)
2144 * str = substr(str, 2)
2145 * nonzeroflag = FALSE
2146 * } else {
2147 * # 0-length match
2148 * if (RSTART == 1) {
2149 * newstr = newstr replace substr(str, 1, 1)
2150 * str = substr(str, 2)
2151 * } else {
2152 * return newstr str replace
2153 * }
2154 * }
2155 * if (length(str) == 0)
2156 * if (eosflag)
2157 * break
2158 * else
2159 * eosflag = TRUE
2160 * }
2161 * if (length(str) > 0)
2162 * newstr = newstr str # rest of string
2163 *
2164 * return newstr
2165 * }
2166 */
2167
2168/*
2169 * 1/2004: The gawk sub/gsub behavior dates from 1996, when we proposed it
2170 * for POSIX. The proposal fell through the cracks, and the 2001 POSIX
2171 * standard chose a more simple behavior.
2172 *
2173 * The relevant text is to be found on lines 6394-6407 (pages 166, 167) of the
2174 * 2001 standard:
2175 *
2176 * sub(ere, repl[, in ])
2177 * Substitute the string repl in place of the first instance of the extended regular
2178 * expression ERE in string in and return the number of substitutions. An ampersand
2179 * ('&') appearing in the string repl shall be replaced by the string from in that
2180 * matches the ERE. An ampersand preceded with a backslash ('\') shall be
2181 * interpreted as the literal ampersand character. An occurrence of two consecutive
2182 * backslashes shall be interpreted as just a single literal backslash character. Any
2183 * other occurrence of a backslash (for example, preceding any other character) shall
2184 * be treated as a literal backslash character. Note that if repl is a string literal (the
2185 * lexical token STRING; see Grammar (on page 170)), the handling of the
2186 * ampersand character occurs after any lexical processing, including any lexical
2187 * backslash escape sequence processing. If in is specified and it is not an lvalue (see
2188 * Expressions in awk (on page 156)), the behavior is undefined. If in is omitted, awk
2189 * shall use the current record ($0) in its place.
2190 *
2191 * Because gawk has had its behavior for 7+ years, that behavior is remaining as
2192 * the default, with the POSIX behavior available for do_posix. Fun, fun, fun.
2193 */
2194
2195/*
2196 * NB: `howmany' conflicts with a SunOS 4.x macro in <sys/param.h>.
2197 */
2198
2199static NODE *
2200sub_common(NODE *tree, long how_many, int backdigs)
2201{
2202 register char *scan;
2203 register char *bp, *cp;
2204 char *buf;
2205 size_t buflen;
2206 register char *matchend;
2207 register size_t len;
2208 char *matchstart;
2209 char *text;
2210 size_t textlen;
2211 char *repl;
2212 char *replend;
2213 size_t repllen;
2214 int sofar;
2215 int ampersands;
2216 int matches = 0;
2217 Regexp *rp;
2218 NODE *s; /* subst. pattern */
2219 NODE *t; /* string to make sub. in; $0 if none given */
2220 NODE *tmp;
2221 NODE **lhs = &tree; /* value not used -- just different from NULL */
2222 int priv = FALSE;
2223 Func_ptr after_assign = NULL;
2224
2225 int global = (how_many == -1);
2226 long current;
2227 int lastmatchnonzero;
2228 char *mb_indices = NULL;
2229
2230 tmp = tree->lnode; /* regexp */
2231 rp = re_update(tmp);
2232
2233 tree = tree->rnode; /* replacement text */
2234 s = tree->lnode;
2235 s = force_string(tree_eval(s));
2236
2237 tree = tree->rnode; /* original string */
2238 tmp = tree->lnode;
2239 t = force_string(tree_eval(tmp));
2240
2241 /* do the search early to avoid work on non-match */
2242 if (research(rp, t->stptr, 0, t->stlen, RE_NEED_START) == -1 ||
2243 RESTART(rp, t->stptr) > t->stlen) {
2244 free_temp(t);
2245 free_temp(s);
2246 return tmp_number((AWKNUM) 0.0);
2247 }
2248
2249 if (tmp->type == Node_val)
2250 lhs = NULL;
2251 else
2252 lhs = get_lhs(tmp, &after_assign, FALSE);
2253 t->flags |= STRING;
2254 /*
2255 * create a private copy of the string
2256 */
2257 if (t->stref > 1 || (t->flags & (PERM|FIELD)) != 0) {
2258 tmp = copynode(t);
2259 t = tmp;
2260 priv = TRUE;
2261 }
2262 text = t->stptr;
2263 textlen = t->stlen;
2264 buflen = textlen + 2;
2265
2266 repl = s->stptr;
2267 replend = repl + s->stlen;
2268 repllen = replend - repl;
2269 emalloc(buf, char *, buflen + 2, "sub_common");
2270 buf[buflen] = '\0';
2271 buf[buflen + 1] = '\0';
2272 ampersands = 0;
2273
2274 /*
2275 * Some systems' malloc() can't handle being called with an
2276 * argument of zero. Thus we have to have some special case
2277 * code to check for `repllen == 0'. This can occur for
2278 * something like:
2279 * sub(/foo/, "", mystring)
2280 * for example.
2281 */
2282 if (gawk_mb_cur_max > 1 && repllen > 0) {
2283 emalloc(mb_indices, char *, repllen * sizeof(char), "sub_common");
2284 index_multibyte_buffer(repl, mb_indices, repllen);
2285 }
2286
2287 for (scan = repl; scan < replend; scan++) {
2288 if ((gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1))
2289 && (*scan == '&')) {
2290 repllen--;
2291 ampersands++;
2292 } else if (*scan == '\\') {
2293 if (backdigs) { /* gensub, behave sanely */
2294 if (ISDIGIT(scan[1])) {
2295 ampersands++;
2296 scan++;
2297 } else { /* \q for any q --> q */
2298 repllen--;
2299 scan++;
2300 }
2301 } else if (do_posix) {
2302 /* \& --> &, \\ --> \ */
2303 if (scan[1] == '&' || scan[1] == '\\') {
2304 repllen--;
2305 scan++;
2306 } /* else
2307 leave alone, it goes into the output */
2308 } else {
2309 /* gawk default behavior since 1996 */
2310 if (strncmp(scan, "\\\\\\&", 4) == 0) {
2311 /* \\\& --> \& */
2312 repllen -= 2;
2313 scan += 3;
2314 } else if (strncmp(scan, "\\\\&", 3) == 0) {
2315 /* \\& --> \<string> */
2316 ampersands++;
2317 repllen--;
2318 scan += 2;
2319 } else if (scan[1] == '&') {
2320 /* \& --> & */
2321 repllen--;
2322 scan++;
2323 } /* else
2324 leave alone, it goes into the output */
2325 }
2326 }
2327 }
2328
2329 lastmatchnonzero = FALSE;
2330 bp = buf;
2331 for (current = 1;; current++) {
2332 matches++;
2333 matchstart = t->stptr + RESTART(rp, t->stptr);
2334 matchend = t->stptr + REEND(rp, t->stptr);
2335
2336 /*
2337 * create the result, copying in parts of the original
2338 * string
2339 */
2340 len = matchstart - text + repllen
2341 + ampersands * (matchend - matchstart);
2342 sofar = bp - buf;
2343 while (buflen < (sofar + len + 1)) {
2344 buflen *= 2;
2345 erealloc(buf, char *, buflen, "sub_common");
2346 bp = buf + sofar;
2347 }
2348 for (scan = text; scan < matchstart; scan++)
2349 *bp++ = *scan;
2350 if (global || current == how_many) {
2351 /*
2352 * If the current match matched the null string,
2353 * and the last match didn't and did a replacement,
2354 * and the match of the null string is at the front of
2355 * the text (meaning right after end of the previous
2356 * replacement), then skip this one.
2357 */
2358 if (matchstart == matchend
2359 && lastmatchnonzero
2360 && matchstart == text) {
2361 lastmatchnonzero = FALSE;
2362 matches--;
2363 goto empty;
2364 }
2365 /*
2366 * If replacing all occurrences, or this is the
2367 * match we want, copy in the replacement text,
2368 * making substitutions as we go.
2369 */
2370 for (scan = repl; scan < replend; scan++)
2371 if (*scan == '&'
2372 /*
2373 * Don't test repllen here. A simple "&" could
2374 * end up with repllen == 0.
2375 */
2376 && (gawk_mb_cur_max == 1
2377 || mb_indices[scan - repl] == 1)
2378 ) {
2379 for (cp = matchstart; cp < matchend; cp++)
2380 *bp++ = *cp;
2381 } else if (*scan == '\\'
2382 && (gawk_mb_cur_max == 1
2383 || (repllen > 0 && mb_indices[scan - repl] == 1))
2384 ) {
2385 if (backdigs) { /* gensub, behave sanely */
2386 if (ISDIGIT(scan[1])) {
2387 int dig = scan[1] - '0';
2388 char *start, *end;
2389
2390 start = t->stptr
2391 + SUBPATSTART(rp, t->stptr, dig);
2392 end = t->stptr
2393 + SUBPATEND(rp, t->stptr, dig);
2394
2395 for (cp = start; cp < end; cp++)
2396 *bp++ = *cp;
2397 scan++;
2398 } else /* \q for any q --> q */
2399 *bp++ = *++scan;
2400 } else if (do_posix) {
2401 /* \& --> &, \\ --> \ */
2402 if (scan[1] == '&' || scan[1] == '\\')
2403 scan++;
2404 *bp++ = *scan;
2405 } else {
2406 /* gawk default behavior since 1996 */
2407 if (strncmp(scan, "\\\\\\&", 4) == 0) {
2408 /* \\\& --> \& */
2409 *bp++ = '\\';
2410 *bp++ = '&';
2411 scan += 3;
2412 } else if (strncmp(scan, "\\\\&", 3) == 0) {
2413 /* \\& --> \<string> */
2414 *bp++ = '\\';
2415 for (cp = matchstart; cp < matchend; cp++)
2416 *bp++ = *cp;
2417 scan += 2;
2418 } else if (scan[1] == '&') {
2419 /* \& --> & */
2420 *bp++ = '&';
2421 scan++;
2422 } else
2423 *bp++ = *scan;
2424 }
2425 } else
2426 *bp++ = *scan;
2427 if (matchstart != matchend)
2428 lastmatchnonzero = TRUE;
2429 } else {
2430 /*
2431 * don't want this match, skip over it by copying
2432 * in current text.
2433 */
2434 for (cp = matchstart; cp < matchend; cp++)
2435 *bp++ = *cp;
2436 }
2437 empty:
2438 /* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
2439 if (matchstart == matchend && matchend < text + textlen) {
2440 *bp++ = *matchend;
2441 matchend++;
2442 }
2443 textlen = text + textlen - matchend;
2444 text = matchend;
2445
2446 if ((current >= how_many && !global)
2447 || ((long) textlen <= 0 && matchstart == matchend)
2448 || research(rp, t->stptr, text - t->stptr, textlen, RE_NEED_START) == -1)
2449 break;
2450
2451 }
2452 sofar = bp - buf;
2453 if (buflen - sofar - textlen - 1) {
2454 buflen = sofar + textlen + 2;
2455 erealloc(buf, char *, buflen, "sub_common");
2456 bp = buf + sofar;
2457 }
2458 for (scan = matchend; scan < text + textlen; scan++)
2459 *bp++ = *scan;
2460 *bp = '\0';
2461 textlen = bp - buf;
2462 free(t->stptr);
2463 t->stptr = buf;
2464 t->stlen = textlen;
2465
2466 free_temp(s);
2467 if (matches > 0 && lhs) {
2468 if (priv) {
2469 unref(*lhs);
2470 *lhs = t;
2471 }
2472 if (after_assign != NULL)
2473 (*after_assign)();
2474 t->flags &= ~(NUMCUR|NUMBER);
2475 }
2476 if (mb_indices != NULL)
2477 free(mb_indices);
2478
2479 return tmp_number((AWKNUM) matches);
2480}
2481
2482/* do_gsub --- global substitution */
2483
2484NODE *
2485do_gsub(NODE *tree)
2486{
2487 return sub_common(tree, -1, FALSE);
2488}
2489
2490/* do_sub --- single substitution */
2491
2492NODE *
2493do_sub(NODE *tree)
2494{
2495 return sub_common(tree, 1, FALSE);
2496}
2497
2498/* do_gensub --- fix up the tree for sub_common for the gensub function */
2499
2500NODE *
2501do_gensub(NODE *tree)
2502{
2503 NODE n1, n2, n3, *t, *tmp, *target, *ret;
2504 long how_many = 1; /* default is one substitution */
2505 double d;
2506
2507 /*
2508 * We have to pull out the value of the global flag, and
2509 * build up a tree without the flag in it, turning it into the
2510 * kind of tree that sub_common() expects. It helps to draw
2511 * a picture of this ...
2512 */
2513 n1 = *tree;
2514 n2 = *(tree->rnode);
2515 n1.rnode = & n2;
2516
2517 t = tree_eval(n2.rnode->lnode); /* value of global flag */
2518
2519 tmp = force_string(tree_eval(n2.rnode->rnode->lnode)); /* target */
2520
2521 /*
2522 * We make copy of the original target string, and pass that
2523 * in to sub_common() as the target to make the substitution in.
2524 * We will then return the result string as the return value of
2525 * this function.
2526 */
2527 target = make_string(tmp->stptr, tmp->stlen);
2528 free_temp(tmp);
2529
2530 n3 = *(n2.rnode->rnode);
2531 n3.lnode = target;
2532 n2.rnode = & n3;
2533
2534 if ((t->flags & (STRCUR|STRING)) != 0) {
2535 if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G'))
2536 how_many = -1;
2537 else {
2538 d = force_number(t);
2539
2540 if ((t->flags & NUMCUR) != 0)
2541 goto set_how_many;
2542
2543 how_many = 1;
2544 }
2545 } else {
2546 d = force_number(t);
2547set_how_many:
2548 if (d < 1)
2549 how_many = 1;
2550 else if (d < LONG_MAX)
2551 how_many = d;
2552 else
2553 how_many = LONG_MAX;
2554 if (d == 0)
2555 warning(_("gensub: third argument of 0 treated as 1"));
2556 }
2557
2558 free_temp(t);
2559
2560 ret = sub_common(&n1, how_many, TRUE);
2561 free_temp(ret);
2562
2563 /*
2564 * Note that we don't care what sub_common() returns, since the
2565 * easiest thing for the programmer is to return the string, even
2566 * if no substitutions were done.
2567 */
2568 target->flags |= TEMP;
2569 return target;
2570}
2571
2572#ifdef GFMT_WORKAROUND
2573/*
2574 * printf's %g format [can't rely on gcvt()]
2575 * caveat: don't use as argument to *printf()!
2576 * 'format' string HAS to be of "<flags>*.*g" kind, or we bomb!
2577 */
2578static void
2579sgfmt(char *buf, /* return buffer; assumed big enough to hold result */
2580 const char *format,
2581 int alt, /* use alternate form flag */
2582 int fwidth, /* field width in a format */
2583 int prec, /* indicates desired significant digits, not decimal places */
2584 double g) /* value to format */
2585{
2586 char dform[40];
2587 register char *gpos;
2588 register char *d, *e, *p;
2589 int again = FALSE;
2590
2591 strncpy(dform, format, sizeof dform - 1);
2592 dform[sizeof dform - 1] = '\0';
2593 gpos = strrchr(dform, '.');
2594
2595 if (g == 0.0 && ! alt) { /* easy special case */
2596 *gpos++ = 'd';
2597 *gpos = '\0';
2598 (void) sprintf(buf, dform, fwidth, 0);
2599 return;
2600 }
2601
2602 /* advance to location of 'g' in the format */
2603 while (*gpos && *gpos != 'g' && *gpos != 'G')
2604 gpos++;
2605
2606 if (prec <= 0) /* negative precision is ignored */
2607 prec = (prec < 0 ? DEFAULT_G_PRECISION : 1);
2608
2609 if (*gpos == 'G')
2610 again = TRUE;
2611 /* start with 'e' format (it'll provide nice exponent) */
2612 *gpos = 'e';
2613 prec--;
2614 (void) sprintf(buf, dform, fwidth, prec, g);
2615 if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */
2616 int expn = atoi(e+1); /* fetch exponent */
2617 if (expn >= -4 && expn <= prec) { /* per K&R2, B1.2 */
2618 /* switch to 'f' format and re-do */
2619 *gpos = 'f';
2620 prec -= expn; /* decimal precision */
2621 (void) sprintf(buf, dform, fwidth, prec, g);
2622 e = buf + strlen(buf);
2623 while (*--e == ' ')
2624 continue;
2625 e++;
2626 }
2627 else if (again)
2628 *gpos = 'E';
2629
2630 /* if 'alt' in force, then trailing zeros are not removed */
2631 if (! alt && (d = strrchr(buf, '.')) != NULL) {
2632 /* throw away an excess of precision */
2633 for (p = e; p > d && *--p == '0'; )
2634 prec--;
2635 if (d == p)
2636 prec--;
2637 if (prec < 0)
2638 prec = 0;
2639 /* and do that once again */
2640 again = TRUE;
2641 }
2642 if (again)
2643 (void) sprintf(buf, dform, fwidth, prec, g);
2644 }
2645}
2646#endif /* GFMT_WORKAROUND */
2647
2648/*
2649 * The number of base-FLT_RADIX digits in an AWKNUM fraction, assuming
2650 * that AWKNUM is not long double.
2651 */
2652#define AWKSMALL_MANT_DIG \
2653 (sizeof (AWKNUM) == sizeof (double) ? DBL_MANT_DIG : FLT_MANT_DIG)
2654
2655/*
2656 * The number of base-FLT_DIGIT digits in an AWKNUM fraction, even if
2657 * AWKNUM is long double. Don't mention 'long double' unless
2658 * LDBL_MANT_DIG is defined, for the sake of ancient compilers that
2659 * lack 'long double'.
2660 */
2661#ifdef LDBL_MANT_DIG
2662#define AWKNUM_MANT_DIG \
2663 (sizeof (AWKNUM) == sizeof (long double) ? LDBL_MANT_DIG : AWKSMALL_MANT_DIG)
2664#else
2665#define AWKNUM_MANT_DIG AWKSMALL_MANT_DIG
2666#endif
2667
2668/*
2669 * The number of bits in an AWKNUM fraction, assuming FLT_RADIX is
2670 * either 2 or 16. IEEE and VAX formats use radix 2, and IBM
2671 * mainframe format uses radix 16; we know of no other radices in
2672 * practical use.
2673 */
2674#if FLT_RADIX != 2 && FLT_RADIX != 16
2675Please port the following code to your weird host;
2676#endif
2677#define AWKNUM_FRACTION_BITS (AWKNUM_MANT_DIG * (FLT_RADIX == 2 ? 1 : 4))
2678
2679/* tmp_integer - Convert an integer to a temporary number node. */
2680
2681static NODE *
2682tmp_integer(uintmax_t n)
2683{
2684#ifdef HAVE_UINTMAX_T
2685/* #ifndef LDBL_MANT_DIG */
2686 /*
2687 * If uintmax_t is so wide that AWKNUM cannot represent all its
2688 * values, strip leading nonzero bits of integers that are so large
2689 * that they cannot be represented exactly as AWKNUMs, so that their
2690 * low order bits are represented exactly, without rounding errors.
2691 * This is more desirable in practice, since it means the user sees
2692 * integers that are the same width as the AWKNUM fractions.
2693 */
2694 if (AWKNUM_FRACTION_BITS < CHAR_BIT * sizeof n)
2695 n &= ((uintmax_t) 1 << AWKNUM_FRACTION_BITS) - 1;
2696/* #endif */ /* LDBL_MANT_DIG */
2697#endif /* HAVE_UINTMAX_T */
2698
2699 return tmp_number((AWKNUM) n);
2700}
2701
2702/* do_lshift --- perform a << operation */
2703
2704NODE *
2705do_lshift(NODE *tree)
2706{
2707 NODE *s1, *s2;
2708 uintmax_t uval, ushift, res;
2709 AWKNUM val, shift;
2710
2711 s1 = tree_eval(tree->lnode);
2712 s2 = tree_eval(tree->rnode->lnode);
2713 if (do_lint) {
2714 if ((s1->flags & (NUMCUR|NUMBER)) == 0)
2715 lintwarn(_("lshift: received non-numeric first argument"));
2716 if ((s2->flags & (NUMCUR|NUMBER)) == 0)
2717 lintwarn(_("lshift: received non-numeric second argument"));
2718 }
2719 val = force_number(s1);
2720 shift = force_number(s2);
2721 if (do_lint) {
2722 if (val < 0 || shift < 0)
2723 lintwarn(_("lshift(%lf, %lf): negative values will give strange results"), val, shift);
2724 if (double_to_int(val) != val || double_to_int(shift) != shift)
2725 lintwarn(_("lshift(%lf, %lf): fractional values will be truncated"), val, shift);
2726 if (shift >= sizeof(uintmax_t) * CHAR_BIT)
2727 lintwarn(_("lshift(%lf, %lf): too large shift value will give strange results"), val, shift);
2728 }
2729
2730 free_temp(s1);
2731 free_temp(s2);
2732
2733 uval = (uintmax_t) val;
2734 ushift = (uintmax_t) shift;
2735
2736 res = uval << ushift;
2737 return tmp_integer(res);
2738}
2739
2740/* do_rshift --- perform a >> operation */
2741
2742NODE *
2743do_rshift(NODE *tree)
2744{
2745 NODE *s1, *s2;
2746 uintmax_t uval, ushift, res;
2747 AWKNUM val, shift;
2748
2749 s1 = tree_eval(tree->lnode);
2750 s2 = tree_eval(tree->rnode->lnode);
2751 if (do_lint) {
2752 if ((s1->flags & (NUMCUR|NUMBER)) == 0)
2753 lintwarn(_("rshift: received non-numeric first argument"));
2754 if ((s2->flags & (NUMCUR|NUMBER)) == 0)
2755 lintwarn(_("rshift: received non-numeric second argument"));
2756 }
2757 val = force_number(s1);
2758 shift = force_number(s2);
2759 if (do_lint) {
2760 if (val < 0 || shift < 0)
2761 lintwarn(_("rshift(%lf, %lf): negative values will give strange results"), val, shift);
2762 if (double_to_int(val) != val || double_to_int(shift) != shift)
2763 lintwarn(_("rshift(%lf, %lf): fractional values will be truncated"), val, shift);
2764 if (shift >= sizeof(uintmax_t) * CHAR_BIT)
2765 lintwarn(_("rshift(%lf, %lf): too large shift value will give strange results"), val, shift);
2766 }
2767
2768 free_temp(s1);
2769 free_temp(s2);
2770
2771 uval = (uintmax_t) val;
2772 ushift = (uintmax_t) shift;
2773
2774 res = uval >> ushift;
2775 return tmp_integer(res);
2776}
2777
2778/* do_and --- perform an & operation */
2779
2780NODE *
2781do_and(NODE *tree)
2782{
2783 NODE *s1, *s2;
2784 uintmax_t uleft, uright, res;
2785 AWKNUM left, right;
2786
2787 s1 = tree_eval(tree->lnode);
2788 s2 = tree_eval(tree->rnode->lnode);
2789 if (do_lint) {
2790 if ((s1->flags & (NUMCUR|NUMBER)) == 0)
2791 lintwarn(_("and: received non-numeric first argument"));
2792 if ((s2->flags & (NUMCUR|NUMBER)) == 0)
2793 lintwarn(_("and: received non-numeric second argument"));
2794 }
2795 left = force_number(s1);
2796 right = force_number(s2);
2797 if (do_lint) {
2798 if (left < 0 || right < 0)
2799 lintwarn(_("and(%lf, %lf): negative values will give strange results"), left, right);
2800 if (double_to_int(left) != left || double_to_int(right) != right)
2801 lintwarn(_("and(%lf, %lf): fractional values will be truncated"), left, right);
2802 }
2803
2804 free_temp(s1);
2805 free_temp(s2);
2806
2807 uleft = (uintmax_t) left;
2808 uright = (uintmax_t) right;
2809
2810 res = uleft & uright;
2811 return tmp_integer(res);
2812}
2813
2814/* do_or --- perform an | operation */
2815
2816NODE *
2817do_or(NODE *tree)
2818{
2819 NODE *s1, *s2;
2820 uintmax_t uleft, uright, res;
2821 AWKNUM left, right;
2822
2823 s1 = tree_eval(tree->lnode);
2824 s2 = tree_eval(tree->rnode->lnode);
2825 if (do_lint) {
2826 if ((s1->flags & (NUMCUR|NUMBER)) == 0)
2827 lintwarn(_("or: received non-numeric first argument"));
2828 if ((s2->flags & (NUMCUR|NUMBER)) == 0)
2829 lintwarn(_("or: received non-numeric second argument"));
2830 }
2831 left = force_number(s1);
2832 right = force_number(s2);
2833 if (do_lint) {
2834 if (left < 0 || right < 0)
2835 lintwarn(_("or(%lf, %lf): negative values will give strange results"), left, right);
2836 if (double_to_int(left) != left || double_to_int(right) != right)
2837 lintwarn(_("or(%lf, %lf): fractional values will be truncated"), left, right);
2838 }
2839
2840 free_temp(s1);
2841 free_temp(s2);
2842
2843 uleft = (uintmax_t) left;
2844 uright = (uintmax_t) right;
2845
2846 res = uleft | uright;
2847 return tmp_integer(res);
2848}
2849
2850/* do_xor --- perform an ^ operation */
2851
2852NODE *
2853do_xor(NODE *tree)
2854{
2855 NODE *s1, *s2;
2856 uintmax_t uleft, uright, res;
2857 AWKNUM left, right;
2858
2859 s1 = tree_eval(tree->lnode);
2860 s2 = tree_eval(tree->rnode->lnode);
2861 if (do_lint) {
2862 if ((s1->flags & (NUMCUR|NUMBER)) == 0)
2863 lintwarn(_("xor: received non-numeric first argument"));
2864 if ((s2->flags & (NUMCUR|NUMBER)) == 0)
2865 lintwarn(_("xor: received non-numeric second argument"));
2866 }
2867 left = force_number(s1);
2868 right = force_number(s2);
2869 if (do_lint) {
2870 if (left < 0 || right < 0)
2871 lintwarn(_("xor(%lf, %lf): negative values will give strange results"), left, right);
2872 if (double_to_int(left) != left || double_to_int(right) != right)
2873 lintwarn(_("xor(%lf, %lf): fractional values will be truncated"), left, right);
2874 }
2875
2876 free_temp(s1);
2877 free_temp(s2);
2878
2879 uleft = (uintmax_t) left;
2880 uright = (uintmax_t) right;
2881
2882 res = uleft ^ uright;
2883 return tmp_integer(res);
2884}
2885
2886/* do_compl --- perform a ~ operation */
2887
2888NODE *
2889do_compl(NODE *tree)
2890{
2891 NODE *tmp;
2892 double d;
2893 uintmax_t uval;
2894
2895 tmp = tree_eval(tree->lnode);
2896 if (do_lint && (tmp->flags & (NUMCUR|NUMBER)) == 0)
2897 lintwarn(_("compl: received non-numeric argument"));
2898 d = force_number(tmp);
2899 free_temp(tmp);
2900
2901 if (do_lint) {
2902 if (d < 0)
2903 lintwarn(_("compl(%lf): negative value will give strange results"), d);
2904 if (double_to_int(d) != d)
2905 lintwarn(_("compl(%lf): fractional value will be truncated"), d);
2906 }
2907
2908 uval = (uintmax_t) d;
2909 uval = ~ uval;
2910 return tmp_integer(uval);
2911}
2912
2913/* do_strtonum --- the strtonum function */
2914
2915NODE *
2916do_strtonum(NODE *tree)
2917{
2918 NODE *tmp;
2919 AWKNUM d;
2920
2921 tmp = tree_eval(tree->lnode);
2922
2923 if ((tmp->flags & (NUMBER|NUMCUR)) != 0)
2924 d = (AWKNUM) force_number(tmp);
2925 else if (isnondecimal(tmp->stptr, TRUE))
2926 d = nondec2awknum(tmp->stptr, tmp->stlen);
2927 else
2928 d = (AWKNUM) force_number(tmp);
2929
2930 free_temp(tmp);
2931 return tmp_number((AWKNUM) d);
2932}
2933
2934/* nondec2awknum --- convert octal or hex value to double */
2935
2936/*
2937 * Because of awk's concatenation rules and the way awk.y:yylex()
2938 * collects a number, this routine has to be willing to stop on the
2939 * first invalid character.
2940 */
2941
2942AWKNUM
2943nondec2awknum(char *str, size_t len)
2944{
2945 AWKNUM retval = 0.0;
2946 char save;
2947 short val;
2948 char *start = str;
2949
2950 if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) {
2951 /*
2952 * User called strtonum("0x") or some such,
2953 * so just quit early.
2954 */
2955 if (len <= 2)
2956 return (AWKNUM) 0.0;
2957
2958 for (str += 2, len -= 2; len > 0; len--, str++) {
2959 switch (*str) {
2960 case '0':
2961 case '1':
2962 case '2':
2963 case '3':
2964 case '4':
2965 case '5':
2966 case '6':
2967 case '7':
2968 case '8':
2969 case '9':
2970 val = *str - '0';
2971 break;
2972 case 'a':
2973 case 'b':
2974 case 'c':
2975 case 'd':
2976 case 'e':
2977 case 'f':
2978 val = *str - 'a' + 10;
2979 break;
2980 case 'A':
2981 case 'B':
2982 case 'C':
2983 case 'D':
2984 case 'E':
2985 case 'F':
2986 val = *str - 'A' + 10;
2987 break;
2988 default:
2989 goto done;
2990 }
2991 retval = (retval * 16) + val;
2992 }
2993 } else if (*str == '0') {
2994 for (; len > 0; len--) {
2995 if (! ISDIGIT(*str))
2996 goto done;
2997 else if (*str == '8' || *str == '9') {
2998 str = start;
2999 goto decimal;
3000 }
3001 retval = (retval * 8) + (*str - '0');
3002 str++;
3003 }
3004 } else {
3005decimal:
3006 save = str[len];
3007 retval = strtod(str, NULL);
3008 str[len] = save;
3009 }
3010done:
3011 return retval;
3012}
3013
3014/* do_dcgettext, do_dcngettext --- handle i18n translations */
3015
3016#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
3017
3018static int
3019localecategory_from_argument(NODE *tree)
3020{
3021 static const struct category_table {
3022 int val;
3023 const char *name;
3024 } cat_tab[] = {
3025#ifdef LC_ALL
3026 { LC_ALL, "LC_ALL" },
3027#endif /* LC_ALL */
3028#ifdef LC_COLLATE
3029 { LC_COLLATE, "LC_COLLATE" },
3030#endif /* LC_COLLATE */
3031#ifdef LC_CTYPE
3032 { LC_CTYPE, "LC_CTYPE" },
3033#endif /* LC_CTYPE */
3034#ifdef LC_MESSAGES
3035 { LC_MESSAGES, "LC_MESSAGES" },
3036#endif /* LC_MESSAGES */
3037#ifdef LC_MONETARY
3038 { LC_MONETARY, "LC_MONETARY" },
3039#endif /* LC_MONETARY */
3040#ifdef LC_NUMERIC
3041 { LC_NUMERIC, "LC_NUMERIC" },
3042#endif /* LC_NUMERIC */
3043#ifdef LC_RESPONSE
3044 { LC_RESPONSE, "LC_RESPONSE" },
3045#endif /* LC_RESPONSE */
3046#ifdef LC_TIME
3047 { LC_TIME, "LC_TIME" },
3048#endif /* LC_TIME */
3049 };
3050
3051 if (tree != NULL) {
3052 int low, high, i, mid;
3053 NODE *tmp, *t;
3054 char *category;
3055 int lc_cat = -1;
3056
3057 tmp = tree->lnode;
3058 t = force_string(tree_eval(tmp));
3059 category = t->stptr;
3060
3061 /* binary search the table */
3062 low = 0;
3063 high = (sizeof(cat_tab) / sizeof(cat_tab[0])) - 1;
3064 while (low <= high) {
3065 mid = (low + high) / 2;
3066 i = strcmp(category, cat_tab[mid].name);
3067
3068 if (i < 0) /* category < mid */
3069 high = mid - 1;
3070 else if (i > 0) /* category > mid */
3071 low = mid + 1;
3072 else {
3073 lc_cat = cat_tab[mid].val;
3074 break;
3075 }
3076 }
3077 if (lc_cat == -1) /* not there */
3078 fatal(_("dcgettext: `%s' is not a valid locale category"), category);
3079
3080 free_temp(t);
3081 return lc_cat;
3082 } else
3083 return LC_MESSAGES;
3084}
3085
3086#endif
3087
3088/*
3089 * awk usage is
3090 *
3091 * str = dcgettext(string [, domain [, category]])
3092 * str = dcngettext(string1, string2, number [, domain [, category]])
3093 *
3094 * Default domain is TEXTDOMAIN, default category is LC_MESSAGES.
3095 */
3096
3097NODE *
3098do_dcgettext(NODE *tree)
3099{
3100 NODE *tmp, *t1, *t2;
3101 char *string;
3102 char *the_result;
3103#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
3104 int lc_cat;
3105 char *domain;
3106#endif /* ENABLE_NLS */
3107
3108 tmp = tree->lnode; /* first argument */
3109 t1 = force_string(tree_eval(tmp));
3110 string = t1->stptr;
3111
3112 t2 = NULL;
3113#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
3114 tree = tree->rnode; /* second argument */
3115 if (tree != NULL) {
3116 tmp = tree->lnode;
3117 t2 = force_string(tree_eval(tmp));
3118 domain = t2->stptr;
3119 } else
3120 domain = TEXTDOMAIN;
3121
3122 if (tree && tree->rnode != NULL) { /* third argument */
3123 lc_cat = localecategory_from_argument(tree->rnode);
3124 } else
3125 lc_cat = LC_MESSAGES;
3126
3127 the_result = dcgettext(domain, string, lc_cat);
3128#else
3129 the_result = string;
3130#endif
3131 free_temp(t1);
3132 if (t2 != NULL)
3133 free_temp(t2);
3134
3135 return tmp_string(the_result, strlen(the_result));
3136}
3137
3138NODE *
3139do_dcngettext(NODE *tree)
3140{
3141 NODE *tmp, *t1, *t2, *t3;
3142 char *string1, *string2;
3143 unsigned long number;
3144 char *the_result;
3145#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
3146 int lc_cat;
3147 char *domain;
3148#endif /* ENABLE_NLS */
3149
3150 tmp = tree->lnode; /* first argument */
3151 t1 = force_string(tree_eval(tmp));
3152 string1 = t1->stptr;
3153
3154 tmp = tree->rnode->lnode; /* second argument */
3155 t2 = force_string(tree_eval(tmp));
3156 string2 = t2->stptr;
3157
3158 tmp = tree->rnode->rnode->lnode; /* third argument */
3159 number = (unsigned long) double_to_int(force_number(tree_eval(tmp)));
3160
3161 t3 = NULL;
3162#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT
3163 tree = tree->rnode->rnode->rnode; /* fourth argument */
3164 if (tree != NULL) {
3165 tmp = tree->lnode;
3166 t3 = force_string(tree_eval(tmp));
3167 domain = t3->stptr;
3168 } else
3169 domain = TEXTDOMAIN;
3170
3171 if (tree && tree->rnode != NULL) { /* fifth argument */
3172 lc_cat = localecategory_from_argument(tree->rnode);
3173 } else
3174 lc_cat = LC_MESSAGES;
3175
3176 the_result = dcngettext(domain, string1, string2, number, lc_cat);
3177#else
3178 the_result = (number == 1 ? string1 : string2);
3179#endif
3180 free_temp(t1);
3181 free_temp(t2);
3182 if (t3 != NULL)
3183 free_temp(t3);
3184
3185 return tmp_string(the_result, strlen(the_result));
3186}
3187
3188/* do_bindtextdomain --- set the directory for a text domain */
3189
3190/*
3191 * awk usage is
3192 *
3193 * binding = bindtextdomain(dir [, domain])
3194 *
3195 * If dir is "", pass NULL to C version.
3196 * Default domain is TEXTDOMAIN.
3197 */
3198
3199NODE *
3200do_bindtextdomain(NODE *tree)
3201{
3202 NODE *tmp, *t1, *t2;
3203 char *directory, *domain;
3204 char *the_result;
3205
3206 t1 = t2 = NULL;
3207 /* set defaults */
3208 directory = NULL;
3209 domain = TEXTDOMAIN;
3210
3211 tmp = tree->lnode; /* first argument */
3212 t1 = force_string(tree_eval(tmp));
3213 if (t1->stlen > 0)
3214 directory = t1->stptr;
3215
3216 tree = tree->rnode; /* second argument */
3217 if (tree != NULL) {
3218 tmp = tree->lnode;
3219 t2 = force_string(tree_eval(tmp));
3220 domain = t2->stptr;
3221 }
3222
3223 the_result = bindtextdomain(domain, directory);
3224
3225 free_temp(t1);
3226 if (t2 != NULL)
3227 free_temp(t2);
3228
3229 return tmp_string(the_result, strlen(the_result));
3230}
Note: See TracBrowser for help on using the repository browser.