source: trunk/essentials/sys-apps/gawk/array.c@ 3590

Last change on this file since 3590 was 3076, checked in by bird, 19 years ago

gawk 3.1.5

File size: 28.6 KB
Line 
1/*
2 * array.c - routines for associative arrays.
3 */
4
5/*
6 * Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 */
25
26/*
27 * Tree walks (``for (iggy in foo)'') and array deletions use expensive
28 * linear searching. So what we do is start out with small arrays and
29 * grow them as needed, so that our arrays are hopefully small enough,
30 * most of the time, that they're pretty full and we're not looking at
31 * wasted space.
32 *
33 * The decision is made to grow the array if the average chain length is
34 * ``too big''. This is defined as the total number of entries in the table
35 * divided by the size of the array being greater than some constant.
36 *
37 * 11/2002: We make the constant a variable, so that it can be tweaked
38 * via environment variable.
39 */
40
41static int AVG_CHAIN_MAX = 2; /* 11/2002: Modern machines are bigger, cut this down from 10. */
42
43#include "awk.h"
44
45static NODE *assoc_find P((NODE *symbol, NODE *subs, unsigned long hash1));
46static void grow_table P((NODE *symbol));
47
48static unsigned long gst_hash_string P((const char *str, size_t len, unsigned long hsize));
49static unsigned long scramble P((unsigned long x));
50static unsigned long awk_hash P((const char *s, size_t len, unsigned long hsize));
51
52unsigned long (*hash)P((const char *s, size_t len, unsigned long hsize)) = awk_hash;
53
54/* array_init --- possibly temporary function for experimentation purposes */
55
56void
57array_init()
58{
59 const char *val;
60 int newval;
61
62 if ((val = getenv("AVG_CHAIN_MAX")) != NULL && ISDIGIT(*val)) {
63 for (newval = 0; *val && ISDIGIT(*val); val++)
64 newval = (newval * 10) + *val - '0';
65
66 AVG_CHAIN_MAX = newval;
67 }
68
69 if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0)
70 hash = gst_hash_string;
71}
72
73/*
74 * get_actual --- proceed to the actual Node_var_array,
75 * change Node_var_new to an array.
76 * If canfatal and type isn't good, die fatally,
77 * otherwise return the final actual value.
78 */
79
80NODE *
81get_actual(NODE *symbol, int canfatal)
82{
83 int isparam = (symbol->type == Node_param_list
84 && (symbol->flags & FUNC) == 0);
85 NODE *save_symbol = symbol;
86
87 if (isparam) {
88 save_symbol = symbol = stack_ptr[symbol->param_cnt];
89 if (symbol->type == Node_array_ref)
90 symbol = symbol->orig_array;
91 }
92
93 switch (symbol->type) {
94 case Node_var_new:
95 symbol->type = Node_var_array;
96 symbol->var_array = NULL;
97 /* fall through */
98 case Node_var_array:
99 break;
100
101 case Node_array_ref:
102 case Node_param_list:
103 if ((symbol->flags & FUNC) == 0)
104 cant_happen();
105 /* else
106 fall through */
107
108 default:
109 /* notably Node_var but catches also e.g. FS[1] = "x" */
110 if (canfatal) {
111 if ((symbol->flags & FUNC) != 0)
112 fatal(_("attempt to use function `%s' as an array"),
113 save_symbol->vname);
114 else if (isparam)
115 fatal(_("attempt to use scalar parameter `%s' as an array"),
116 save_symbol->vname);
117 else
118 fatal(_("attempt to use scalar `%s' as array"),
119 save_symbol->vname);
120 } else
121 break;
122 }
123
124 return symbol;
125}
126
127/*
128 * array_vname --- print the name of the array
129 *
130 * Returns a pointer to a statically maintained dynamically allocated string.
131 * It's appropriate for printing the name once; if the caller wants
132 * to save it, they have to make a copy.
133 *
134 * Setting MAX_LEN to a positive value (eg. 140) would limit the length
135 * of the output to _roughly_ that length.
136 *
137 * If MAX_LEN == 0, which is the default, the whole stack is printed.
138 */
139#define MAX_LEN 0
140
141char *
142array_vname(register const NODE *symbol)
143{
144 if (symbol->type == Node_param_list)
145 symbol = stack_ptr[symbol->param_cnt];
146
147 if (symbol->type != Node_array_ref || symbol->orig_array->type != Node_var_array)
148 return symbol->vname;
149 else {
150 static char *message = NULL;
151 static size_t msglen = 0;
152 char *s;
153 size_t len;
154 int n;
155 const NODE *save_symbol = symbol;
156 const char *from = _("from %s");
157
158#if (MAX_LEN <= 0) || !defined(HAVE_SNPRINTF)
159 /* This is the default branch. */
160
161 /* First, we have to compute the length of the string: */
162 len = strlen(symbol->vname) + 2; /* "%s (" */
163 n = 0;
164 do {
165 symbol = symbol->prev_array;
166 len += strlen(symbol->vname);
167 n++;
168 } while (symbol->type == Node_array_ref);
169 /*
170 * Each node contributes by strlen(from) minus the length
171 * of "%s" in the translation (which is at least 2)
172 * plus 2 for ", " or ")\0"; this adds up to strlen(from).
173 */
174 len += n * strlen(from);
175
176 /* (Re)allocate memory: */
177 if (message == NULL) {
178 emalloc(message, char *, len, "array_vname");
179 msglen = len;
180 } else if (len > msglen) {
181 erealloc(message, char *, len, "array_vname");
182 msglen = len;
183 } /* else
184 current buffer can hold new name */
185
186 /* We're ready to print: */
187 symbol = save_symbol;
188 s = message;
189 /*
190 * Ancient systems have sprintf() returning char *, not int.
191 * Thus, `s += sprintf(s, from, name);' is a no-no.
192 */
193 sprintf(s, "%s (", symbol->vname);
194 s += strlen(s);
195 for (;;) {
196 symbol = symbol->prev_array;
197 sprintf(s, from, symbol->vname);
198 s += strlen(s);
199 if (symbol->type != Node_array_ref)
200 break;
201 sprintf(s, ", ");
202 s += strlen(s);
203 }
204 sprintf(s, ")");
205
206#else /* MAX_LEN > 0 */
207
208 /*
209 * The following check fails only on
210 * abnormally_long_variable_name.
211 */
212#define PRINT_CHECK \
213 if (n <= 0 || n >= len) \
214 return save_symbol->vname; \
215 s += n; len -= n
216#define PRINT(str) \
217 n = snprintf(s, len, str); \
218 PRINT_CHECK
219#define PRINT_vname(str) \
220 n = snprintf(s, len, str, symbol->vname); \
221 PRINT_CHECK
222
223 if (message == NULL)
224 emalloc(message, char *, MAX_LEN, "array_vname");
225
226 s = message;
227 len = MAX_LEN;
228
229 /* First, print the vname of the node. */
230 PRINT_vname("%s (");
231
232 for (;;) {
233 symbol = symbol->prev_array;
234 /*
235 * When we don't have enough space and this is not
236 * the last node, shorten the list.
237 */
238 if (len < 40 && symbol->type == Node_array_ref) {
239 PRINT("..., ");
240 symbol = symbol->orig_array;
241 }
242 PRINT_vname(from);
243 if (symbol->type != Node_array_ref)
244 break;
245 PRINT(", ");
246 }
247 PRINT(")");
248
249#undef PRINT_CHECK
250#undef PRINT
251#undef PRINT_vname
252#endif /* MAX_LEN <= 0 */
253
254 return message;
255 }
256}
257#undef MAX_LEN
258
259/* concat_exp --- concatenate expression list into a single string */
260
261NODE *
262concat_exp(register NODE *tree)
263{
264 register NODE *r;
265 char *str;
266 char *s;
267 size_t len;
268 int offset;
269 size_t subseplen;
270 const char *subsep;
271
272 if (tree->type != Node_expression_list)
273 return force_string(tree_eval(tree));
274 r = force_string(tree_eval(tree->lnode));
275 if (tree->rnode == NULL)
276 return r;
277 subseplen = SUBSEP_node->var_value->stlen;
278 subsep = SUBSEP_node->var_value->stptr;
279 len = r->stlen + subseplen + 2;
280 emalloc(str, char *, len, "concat_exp");
281 memcpy(str, r->stptr, r->stlen+1);
282 s = str + r->stlen;
283 free_temp(r);
284 for (tree = tree->rnode; tree != NULL; tree = tree->rnode) {
285 if (subseplen == 1)
286 *s++ = *subsep;
287 else {
288 memcpy(s, subsep, subseplen+1);
289 s += subseplen;
290 }
291 r = force_string(tree_eval(tree->lnode));
292 len += r->stlen + subseplen;
293 offset = s - str;
294 erealloc(str, char *, len, "concat_exp");
295 s = str + offset;
296 memcpy(s, r->stptr, r->stlen+1);
297 s += r->stlen;
298 free_temp(r);
299 }
300 r = make_str_node(str, s - str, ALREADY_MALLOCED);
301 r->flags |= TEMP;
302 return r;
303}
304
305/* assoc_clear --- flush all the values in symbol[] before doing a split() */
306
307void
308assoc_clear(NODE *symbol)
309{
310 long i;
311 NODE *bucket, *next;
312
313 if (symbol->var_array == NULL)
314 return;
315 for (i = 0; i < symbol->array_size; i++) {
316 for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
317 next = bucket->ahnext;
318 unref(bucket->ahvalue);
319 unref(bucket); /* unref() will free the ahname_str */
320 }
321 symbol->var_array[i] = NULL;
322 }
323 free(symbol->var_array);
324 symbol->var_array = NULL;
325 symbol->array_size = symbol->table_size = 0;
326 symbol->flags &= ~ARRAYMAXED;
327}
328
329/* hash --- calculate the hash function of the string in subs */
330
331static unsigned long
332awk_hash(register const char *s, register size_t len, unsigned long hsize)
333{
334 register unsigned long h = 0;
335
336 /*
337 * This is INCREDIBLY ugly, but fast. We break the string up into
338 * 8 byte units. On the first time through the loop we get the
339 * "leftover bytes" (strlen % 8). On every other iteration, we
340 * perform 8 HASHC's so we handle all 8 bytes. Essentially, this
341 * saves us 7 cmp & branch instructions. If this routine is
342 * heavily used enough, it's worth the ugly coding.
343 *
344 * OZ's original sdbm hash, copied from Margo Seltzers db package.
345 */
346
347 /*
348 * Even more speed:
349 * #define HASHC h = *s++ + 65599 * h
350 * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
351 */
352#define HASHC htmp = (h << 6); \
353 h = *s++ + htmp + (htmp << 10) - h
354
355 unsigned long htmp;
356
357 h = 0;
358
359#if defined(VAXC)
360 /*
361 * This was an implementation of "Duff's Device", but it has been
362 * redone, separating the switch for extra iterations from the
363 * loop. This is necessary because the DEC VAX-C compiler is
364 * STOOPID.
365 */
366 switch (len & (8 - 1)) {
367 case 7: HASHC;
368 case 6: HASHC;
369 case 5: HASHC;
370 case 4: HASHC;
371 case 3: HASHC;
372 case 2: HASHC;
373 case 1: HASHC;
374 default: break;
375 }
376
377 if (len > (8 - 1)) {
378 register size_t loop = len >> 3;
379 do {
380 HASHC;
381 HASHC;
382 HASHC;
383 HASHC;
384 HASHC;
385 HASHC;
386 HASHC;
387 HASHC;
388 } while (--loop);
389 }
390#else /* ! VAXC */
391 /* "Duff's Device" for those who can handle it */
392 if (len > 0) {
393 register size_t loop = (len + 8 - 1) >> 3;
394
395 switch (len & (8 - 1)) {
396 case 0:
397 do { /* All fall throughs */
398 HASHC;
399 case 7: HASHC;
400 case 6: HASHC;
401 case 5: HASHC;
402 case 4: HASHC;
403 case 3: HASHC;
404 case 2: HASHC;
405 case 1: HASHC;
406 } while (--loop);
407 }
408 }
409#endif /* ! VAXC */
410
411 if (h >= hsize)
412 h %= hsize;
413 return h;
414}
415
416/* assoc_find --- locate symbol[subs] */
417
418static NODE * /* NULL if not found */
419assoc_find(NODE *symbol, register NODE *subs, unsigned long hash1)
420{
421 register NODE *bucket;
422 const char *s1_str;
423 size_t s1_len;
424 NODE *s2;
425
426 for (bucket = symbol->var_array[hash1]; bucket != NULL;
427 bucket = bucket->ahnext) {
428 /*
429 * This used to use cmp_nodes() here. That's wrong.
430 * Array indexes are strings; compare as such, always!
431 */
432 s1_str = bucket->ahname_str;
433 s1_len = bucket->ahname_len;
434 s2 = subs;
435
436 if (s1_len == s2->stlen) {
437 if (s1_len == 0 /* "" is a valid index */
438 || memcmp(s1_str, s2->stptr, s1_len) == 0)
439 return bucket;
440 }
441 }
442 return NULL;
443}
444
445/* in_array --- test whether the array element symbol[subs] exists or not,
446 * return pointer to value if it does.
447 */
448
449NODE *
450in_array(NODE *symbol, NODE *subs)
451{
452 register unsigned long hash1;
453 NODE *ret;
454
455 symbol = get_array(symbol);
456
457 /*
458 * Evaluate subscript first, it could have side effects.
459 */
460 subs = concat_exp(subs); /* concat_exp returns a string node */
461 if (symbol->var_array == NULL) {
462 free_temp(subs);
463 return NULL;
464 }
465 hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
466 ret = assoc_find(symbol, subs, hash1);
467 free_temp(subs);
468 if (ret)
469 return ret->ahvalue;
470 else
471 return NULL;
472}
473
474/*
475 * assoc_lookup:
476 * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it
477 * isn't there. Returns a pointer ala get_lhs to where its value is stored.
478 *
479 * SYMBOL is the address of the node (or other pointer) being dereferenced.
480 * SUBS is a number or string used as the subscript.
481 */
482
483NODE **
484assoc_lookup(NODE *symbol, NODE *subs, int reference)
485{
486 register unsigned long hash1;
487 register NODE *bucket;
488
489 assert(symbol->type == Node_var_array);
490
491 (void) force_string(subs);
492
493 if (symbol->var_array == NULL) {
494 symbol->array_size = symbol->table_size = 0; /* sanity */
495 symbol->flags &= ~ARRAYMAXED;
496 grow_table(symbol);
497 hash1 = hash(subs->stptr, subs->stlen,
498 (unsigned long) symbol->array_size);
499 } else {
500 hash1 = hash(subs->stptr, subs->stlen,
501 (unsigned long) symbol->array_size);
502 bucket = assoc_find(symbol, subs, hash1);
503 if (bucket != NULL) {
504 free_temp(subs);
505 return &(bucket->ahvalue);
506 }
507 }
508
509 if (do_lint && reference) {
510 subs->stptr[subs->stlen] = '\0';
511 lintwarn(_("reference to uninitialized element `%s[\"%s\"]'"),
512 array_vname(symbol), subs->stptr);
513 }
514
515 /* It's not there, install it. */
516 if (do_lint && subs->stlen == 0)
517 lintwarn(_("subscript of array `%s' is null string"),
518 array_vname(symbol));
519
520 /* first see if we would need to grow the array, before installing */
521 symbol->table_size++;
522 if ((symbol->flags & ARRAYMAXED) == 0
523 && (symbol->table_size / symbol->array_size) > AVG_CHAIN_MAX) {
524 grow_table(symbol);
525 /* have to recompute hash value for new size */
526 hash1 = hash(subs->stptr, subs->stlen,
527 (unsigned long) symbol->array_size);
528 }
529
530 getnode(bucket);
531 bucket->type = Node_ahash;
532
533 /*
534 * Freeze this string value --- it must never
535 * change, no matter what happens to the value
536 * that created it or to CONVFMT, etc.
537 *
538 * One day: Use an atom table to track array indices,
539 * and avoid the extra memory overhead.
540 */
541 bucket->flags |= MALLOC;
542 bucket->ahname_ref = 1;
543
544 /* For TEMP node, reuse the storage directly */
545 if ((subs->flags & TEMP) != 0) {
546 bucket->ahname_str = subs->stptr;
547 bucket->ahname_len = subs->stlen;
548 bucket->ahname_str[bucket->ahname_len] = '\0';
549 subs->flags &= ~TEMP; /* for good measure */
550 freenode(subs);
551 } else {
552 emalloc(bucket->ahname_str, char *, subs->stlen + 2, "assoc_lookup");
553 bucket->ahname_len = subs->stlen;
554 memcpy(bucket->ahname_str, subs->stptr, subs->stlen);
555 bucket->ahname_str[bucket->ahname_len] = '\0';
556 }
557
558 bucket->ahvalue = Nnull_string;
559 bucket->ahnext = symbol->var_array[hash1];
560 symbol->var_array[hash1] = bucket;
561 return &(bucket->ahvalue);
562}
563
564/* do_delete --- perform `delete array[s]' */
565
566/*
567 * `symbol' is array
568 * `tree' is subscript
569 */
570
571void
572do_delete(NODE *sym, NODE *tree)
573{
574 register unsigned long hash1;
575 register NODE *bucket, *last;
576 NODE *subs;
577 register NODE *symbol = get_array(sym);
578
579 if (tree == NULL) { /* delete array */
580 assoc_clear(symbol);
581 return;
582 }
583
584 last = NULL; /* shut up gcc -Wall */
585 hash1 = 0; /* ditto */
586
587 /*
588 * Always evaluate subscript, it could have side effects.
589 */
590 subs = concat_exp(tree); /* concat_exp returns string node */
591
592 if (symbol->var_array != NULL) {
593 hash1 = hash(subs->stptr, subs->stlen,
594 (unsigned long) symbol->array_size);
595 last = NULL;
596 for (bucket = symbol->var_array[hash1]; bucket != NULL;
597 last = bucket, bucket = bucket->ahnext) {
598 /*
599 * This used to use cmp_nodes() here. That's wrong.
600 * Array indexes are strings; compare as such, always!
601 */
602 const char *s1_str;
603 size_t s1_len;
604 NODE *s2;
605
606 s1_str = bucket->ahname_str;
607 s1_len = bucket->ahname_len;
608 s2 = subs;
609
610 if (s1_len == s2->stlen) {
611 if (s1_len == 0 /* "" is a valid index */
612 || memcmp(s1_str, s2->stptr, s1_len) == 0)
613 break;
614 }
615 }
616 } else
617 bucket = NULL; /* The array is empty. */
618
619 if (bucket == NULL) {
620 if (do_lint)
621 lintwarn(_("delete: index `%s' not in array `%s'"),
622 subs->stptr, array_vname(sym));
623 free_temp(subs);
624 return;
625 }
626
627 free_temp(subs);
628
629 if (last != NULL)
630 last->ahnext = bucket->ahnext;
631 else
632 symbol->var_array[hash1] = bucket->ahnext;
633 unref(bucket->ahvalue);
634 unref(bucket); /* unref() will free the ahname_str */
635 symbol->table_size--;
636 if (symbol->table_size <= 0) {
637 memset(symbol->var_array, '\0',
638 sizeof(NODE *) * symbol->array_size);
639 symbol->table_size = symbol->array_size = 0;
640 symbol->flags &= ~ARRAYMAXED;
641 free((char *) symbol->var_array);
642 symbol->var_array = NULL;
643 }
644}
645
646/* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */
647
648/*
649 * The primary hassle here is that `iggy' needs to have some arbitrary
650 * array index put in it before we can clear the array, we can't
651 * just replace the loop with `delete foo'.
652 */
653
654void
655do_delete_loop(NODE *symbol, NODE *tree)
656{
657 long i;
658 NODE **lhs;
659 Func_ptr after_assign = NULL;
660
661 symbol = get_array(symbol);
662
663 if (symbol->var_array == NULL)
664 return;
665
666 /* get first index value */
667 for (i = 0; i < symbol->array_size; i++) {
668 if (symbol->var_array[i] != NULL) {
669 lhs = get_lhs(tree->lnode, & after_assign, FALSE);
670 unref(*lhs);
671 *lhs = make_string(symbol->var_array[i]->ahname_str,
672 symbol->var_array[i]->ahname_len);
673 if (after_assign)
674 (*after_assign)();
675 break;
676 }
677 }
678
679 /* blast the array in one shot */
680 assoc_clear(symbol);
681}
682
683/* grow_table --- grow a hash table */
684
685static void
686grow_table(NODE *symbol)
687{
688 NODE **old, **new, *chain, *next;
689 int i, j;
690 unsigned long hash1;
691 unsigned long oldsize, newsize, k;
692 /*
693 * This is an array of primes. We grow the table by an order of
694 * magnitude each time (not just doubling) so that growing is a
695 * rare operation. We expect, on average, that it won't happen
696 * more than twice. The final size is also chosen to be small
697 * enough so that MS-DOG mallocs can handle it. When things are
698 * very large (> 8K), we just double more or less, instead of
699 * just jumping from 8K to 64K.
700 */
701 static const long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497,
702#if ! defined(MSDOS) && ! defined(OS2) && ! defined(atarist)
703 131101, 262147, 524309, 1048583, 2097169,
704 4194319, 8388617, 16777259, 33554467,
705 67108879, 134217757, 268435459, 536870923,
706 1073741827
707#endif
708 };
709
710 /* find next biggest hash size */
711 newsize = oldsize = symbol->array_size;
712 for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
713 if (oldsize < sizes[i]) {
714 newsize = sizes[i];
715 break;
716 }
717 }
718
719 if (newsize == oldsize) { /* table already at max (!) */
720 symbol->flags |= ARRAYMAXED;
721 return;
722 }
723
724 /* allocate new table */
725 emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table");
726 memset(new, '\0', newsize * sizeof(NODE *));
727
728 /* brand new hash table, set things up and return */
729 if (symbol->var_array == NULL) {
730 symbol->table_size = 0;
731 goto done;
732 }
733
734 /* old hash table there, move stuff to new, free old */
735 old = symbol->var_array;
736 for (k = 0; k < oldsize; k++) {
737 if (old[k] == NULL)
738 continue;
739
740 for (chain = old[k]; chain != NULL; chain = next) {
741 next = chain->ahnext;
742 hash1 = hash(chain->ahname_str,
743 chain->ahname_len, newsize);
744
745 /* remove from old list, add to new */
746 chain->ahnext = new[hash1];
747 new[hash1] = chain;
748 }
749 }
750 free(old);
751
752done:
753 /*
754 * note that symbol->table_size does not change if an old array,
755 * and is explicitly set to 0 if a new one.
756 */
757 symbol->var_array = new;
758 symbol->array_size = newsize;
759}
760
761/* set_SUBSEP --- make sure SUBSEP always has a string value */
762
763void
764set_SUBSEP(void)
765{
766
767 (void) force_string(SUBSEP_node->var_value);
768 return;
769}
770
771/* pr_node --- print simple node info */
772
773static void
774pr_node(NODE *n)
775{
776 if ((n->flags & (NUMCUR|NUMBER)) != 0)
777 printf("%g", n->numbr);
778 else
779 printf("%.*s", (int) n->stlen, n->stptr);
780}
781
782/* assoc_dump --- dump the contents of an array */
783
784NODE *
785assoc_dump(NODE *symbol)
786{
787 long i;
788 NODE *bucket;
789
790 if (symbol->var_array == NULL) {
791 printf(_("%s: empty (null)\n"), symbol->vname);
792 return tmp_number((AWKNUM) 0);
793 }
794
795 if (symbol->table_size == 0) {
796 printf(_("%s: empty (zero)\n"), symbol->vname);
797 return tmp_number((AWKNUM) 0);
798 }
799
800 printf(_("%s: table_size = %d, array_size = %d\n"), symbol->vname,
801 (int) symbol->table_size, (int) symbol->array_size);
802
803 for (i = 0; i < symbol->array_size; i++) {
804 for (bucket = symbol->var_array[i]; bucket != NULL;
805 bucket = bucket->ahnext) {
806 printf("%s: I: [len %d <%.*s>] V: [",
807 symbol->vname,
808 (int) bucket->ahname_len,
809 (int) bucket->ahname_len,
810 bucket->ahname_str);
811 pr_node(bucket->ahvalue);
812 printf("]\n");
813 }
814 }
815
816 return tmp_number((AWKNUM) 0);
817}
818
819/* do_adump --- dump an array: interface to assoc_dump */
820
821NODE *
822do_adump(NODE *tree)
823{
824 NODE *r, *a;
825
826 a = tree->lnode;
827
828 if (a->type == Node_param_list) {
829 printf(_("%s: is parameter\n"), a->vname);
830 a = stack_ptr[a->param_cnt];
831 }
832
833 if (a->type == Node_array_ref) {
834 printf(_("%s: array_ref to %s\n"), a->vname,
835 a->orig_array->vname);
836 a = a->orig_array;
837 }
838
839 r = assoc_dump(a);
840
841 return r;
842}
843
844/*
845 * The following functions implement the builtin
846 * asort function. Initial work by Alan J. Broder,
847 * ajb@woti.com.
848 */
849
850/* dup_table --- duplicate input symbol table "symbol" */
851
852static void
853dup_table(NODE *symbol, NODE *newsymb)
854{
855 NODE **old, **new, *chain, *bucket;
856 long i;
857 unsigned long cursize;
858
859 /* find the current hash size */
860 cursize = symbol->array_size;
861
862 new = NULL;
863
864 /* input is a brand new hash table, so there's nothing to copy */
865 if (symbol->var_array == NULL)
866 newsymb->table_size = 0;
867 else {
868 /* old hash table there, dupnode stuff into a new table */
869
870 /* allocate new table */
871 emalloc(new, NODE **, cursize * sizeof(NODE *), "dup_table");
872 memset(new, '\0', cursize * sizeof(NODE *));
873
874 /* do the copying/dupnode'ing */
875 old = symbol->var_array;
876 for (i = 0; i < cursize; i++) {
877 if (old[i] != NULL) {
878 for (chain = old[i]; chain != NULL;
879 chain = chain->ahnext) {
880 /* get a node for the linked list */
881 getnode(bucket);
882 bucket->type = Node_ahash;
883 bucket->flags |= MALLOC;
884 bucket->ahname_ref = 1;
885
886 /*
887 * copy the corresponding name and
888 * value from the original input list
889 */
890 emalloc(bucket->ahname_str, char *, chain->ahname_len + 2, "dup_table");
891 bucket->ahname_len = chain->ahname_len;
892
893 memcpy(bucket->ahname_str, chain->ahname_str, chain->ahname_len);
894 bucket->ahname_str[bucket->ahname_len] = '\0';
895
896 bucket->ahvalue = dupnode(chain->ahvalue);
897
898 /*
899 * put the node on the corresponding
900 * linked list in the new table
901 */
902 bucket->ahnext = new[i];
903 new[i] = bucket;
904 }
905 }
906 }
907 newsymb->table_size = symbol->table_size;
908 }
909
910 newsymb->var_array = new;
911 newsymb->array_size = cursize;
912}
913
914/* merge --- do a merge of two sorted lists */
915
916static NODE *
917merge(NODE *left, NODE *right)
918{
919 NODE *ans, *cur;
920
921 /*
922 * The use of cmp_nodes() here means that IGNORECASE influences the
923 * comparison. This is OK, but it may be surprising. This comment
924 * serves to remind us that we know about this and that it's OK.
925 */
926 if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
927 ans = cur = left;
928 left = left->ahnext;
929 } else {
930 ans = cur = right;
931 right = right->ahnext;
932 }
933
934 while (left != NULL && right != NULL) {
935 if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
936 cur->ahnext = left;
937 cur = left;
938 left = left->ahnext;
939 } else {
940 cur->ahnext = right;
941 cur = right;
942 right = right->ahnext;
943 }
944 }
945
946 cur->ahnext = (left != NULL ? left : right);
947
948 return ans;
949}
950
951/* merge_sort --- recursively sort the left and right sides of a list */
952
953static NODE *
954merge_sort(NODE *left, unsigned long size)
955{
956 NODE *right, *tmp;
957 int i, half;
958
959 if (size <= 1)
960 return left;
961
962 /* walk down the list, till just one before the midpoint */
963 tmp = left;
964 half = size / 2;
965 for (i = 0; i < half-1; i++)
966 tmp = tmp->ahnext;
967
968 /* split the list into two parts */
969 right = tmp->ahnext;
970 tmp->ahnext = NULL;
971
972 /* sort the left and right parts of the list */
973 left = merge_sort(left, half);
974 right = merge_sort(right, size-half);
975
976 /* merge the two sorted parts of the list */
977 return merge(left, right);
978}
979
980
981/*
982 * assoc_from_list -- Populate an array with the contents of a list of NODEs,
983 * using increasing integers as the key.
984 */
985
986static void
987assoc_from_list(NODE *symbol, NODE *list)
988{
989 NODE *next;
990 unsigned long i = 0;
991 register unsigned long hash1;
992 char buf[100];
993
994 for (; list != NULL; list = next) {
995 next = list->ahnext;
996
997 /* make an int out of i++ */
998 i++;
999 sprintf(buf, "%lu", i);
1000 assert(list->ahname_str == NULL);
1001 assert(list->ahname_ref == 1);
1002 emalloc(list->ahname_str, char *, strlen(buf) + 2, "assoc_from_list");
1003 list->ahname_len = strlen(buf);
1004 strcpy(list->ahname_str, buf);
1005
1006 /* find the bucket where it belongs */
1007 hash1 = hash(list->ahname_str, list->ahname_len,
1008 symbol->array_size);
1009
1010 /* link the node into the chain at that bucket */
1011 list->ahnext = symbol->var_array[hash1];
1012 symbol->var_array[hash1] = list;
1013 }
1014}
1015
1016/*
1017 * assoc_sort_inplace --- sort all the values in symbol[], replacing
1018 * the sorted values back into symbol[], indexed by integers starting with 1.
1019 */
1020
1021typedef enum asort_how { VALUE, INDEX } ASORT_TYPE;
1022
1023static NODE *
1024assoc_sort_inplace(NODE *symbol, ASORT_TYPE how)
1025{
1026 unsigned long i, num;
1027 NODE *bucket, *next, *list;
1028
1029 if (symbol->var_array == NULL
1030 || symbol->array_size <= 0
1031 || symbol->table_size <= 0)
1032 return tmp_number((AWKNUM) 0);
1033
1034 /* build a linked list out of all the entries in the table */
1035 if (how == VALUE) {
1036 list = NULL;
1037 num = 0;
1038 for (i = 0; i < symbol->array_size; i++) {
1039 for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
1040 next = bucket->ahnext;
1041 if (bucket->ahname_ref == 1) {
1042 free(bucket->ahname_str);
1043 bucket->ahname_str = NULL;
1044 bucket->ahname_len = 0;
1045 } else {
1046 NODE *r;
1047
1048 getnode(r);
1049 *r = *bucket;
1050 unref(bucket);
1051 bucket = r;
1052 bucket->flags |= MALLOC;
1053 bucket->ahname_ref = 1;
1054 bucket->ahname_str = NULL;
1055 bucket->ahname_len = 0;
1056 }
1057 bucket->ahnext = list;
1058 list = bucket;
1059 num++;
1060 }
1061 symbol->var_array[i] = NULL;
1062 }
1063 } else { /* how == INDEX */
1064 list = NULL;
1065 num = 0;
1066 for (i = 0; i < symbol->array_size; i++) {
1067 for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
1068 next = bucket->ahnext;
1069
1070 /* toss old value */
1071 unref(bucket->ahvalue);
1072
1073 /* move index into value */
1074 if (bucket->ahname_ref == 1) {
1075 bucket->ahvalue = make_str_node(bucket->ahname_str,
1076 bucket->ahname_len, ALREADY_MALLOCED);
1077 bucket->ahname_str = NULL;
1078 bucket->ahname_len = 0;
1079 } else {
1080 NODE *r;
1081
1082 bucket->ahvalue = make_string(bucket->ahname_str, bucket->ahname_len);
1083 getnode(r);
1084 *r = *bucket;
1085 unref(bucket);
1086 bucket = r;
1087 bucket->flags |= MALLOC;
1088 bucket->ahname_ref = 1;
1089 bucket->ahname_str = NULL;
1090 bucket->ahname_len = 0;
1091 }
1092
1093 bucket->ahnext = list;
1094 list = bucket;
1095 num++;
1096 }
1097 symbol->var_array[i] = NULL;
1098 }
1099 }
1100
1101 /*
1102 * Sort the linked list of NODEs.
1103 * (The especially nice thing about using a merge sort here is that
1104 * we require absolutely no additional storage. This is handy if the
1105 * array has grown to be very large.)
1106 */
1107 list = merge_sort(list, num);
1108
1109 /*
1110 * now repopulate the original array, using increasing
1111 * integers as the key
1112 */
1113 assoc_from_list(symbol, list);
1114
1115 return tmp_number((AWKNUM) num);
1116}
1117
1118/* asort_actual --- do the actual work to sort the input array */
1119
1120static NODE *
1121asort_actual(NODE *tree, ASORT_TYPE how)
1122{
1123 NODE *array = get_array(tree->lnode);
1124
1125 if (tree->rnode != NULL) { /* 2nd optional arg */
1126 NODE *dest = get_array(tree->rnode->lnode);
1127
1128 assoc_clear(dest);
1129 dup_table(array, dest);
1130 array = dest;
1131 }
1132
1133 return assoc_sort_inplace(array, how);
1134}
1135
1136/* do_asort --- sort array by value */
1137
1138NODE *
1139do_asort(NODE *tree)
1140{
1141 return asort_actual(tree, VALUE);
1142}
1143
1144/* do_asorti --- sort array by index */
1145
1146NODE *
1147do_asorti(NODE *tree)
1148{
1149 return asort_actual(tree, INDEX);
1150}
1151
1152/*
1153From bonzini@gnu.org Mon Oct 28 16:05:26 2002
1154Date: Mon, 28 Oct 2002 13:33:03 +0100
1155From: Paolo Bonzini <bonzini@gnu.org>
1156To: arnold@skeeve.com
1157Subject: Hash function
1158Message-ID: <20021028123303.GA6832@biancaneve>
1159
1160Here is the hash function I'm using in GNU Smalltalk. The scrambling is
1161needed if you use powers of two as the table sizes. If you use primes it
1162is not needed.
1163
1164To use double-hashing with power-of-two size, you should use the
1165_gst_hash_string(str, len) as the primary hash and
1166scramble(_gst_hash_string (str, len)) | 1 as the secondary hash.
1167
1168Paolo
1169
1170*/
1171/*
1172 * ADR: Slightly modified to work w/in the context of gawk.
1173 */
1174
1175static unsigned long
1176gst_hash_string(const char *str, size_t len, unsigned long hsize)
1177{
1178 unsigned long hashVal = 1497032417; /* arbitrary value */
1179 unsigned long ret;
1180
1181 while (len--) {
1182 hashVal += *str++;
1183 hashVal += (hashVal << 10);
1184 hashVal ^= (hashVal >> 6);
1185 }
1186
1187 ret = scramble(hashVal);
1188 if (ret >= hsize)
1189 ret %= hsize;
1190
1191 return ret;
1192}
1193
1194static unsigned long
1195scramble(unsigned long x)
1196{
1197 if (sizeof(long) == 4) {
1198 int y = ~x;
1199
1200 x += (y << 10) | (y >> 22);
1201 x += (x << 6) | (x >> 26);
1202 x -= (x << 16) | (x >> 16);
1203 } else {
1204 x ^= (~x) >> 31;
1205 x += (x << 21) | (x >> 11);
1206 x += (x << 5) | (x >> 27);
1207 x += (x << 27) | (x >> 5);
1208 x += (x << 31);
1209 }
1210
1211 return x;
1212}
Note: See TracBrowser for help on using the repository browser.