Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

array.c@ 3934

Visit:

Last change on this file since 3934 was 3076, checked in by bird, 19 years ago
gawk 3.1.5
File size: 28.6 KB

Line
1	/*
2	* array.c - routines for associative arrays.
3	*/
4
5	/*
6	* Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
7	*
8	* This file is part of GAWK, the GNU implementation of the
9	* AWK Programming Language.
10	*
11	* GAWK is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 2 of the License, or
14	* (at your option) any later version.
15	*
16	* GAWK is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with this program; if not, write to the Free Software
23	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24	*/
25
26	/*
27	* Tree walks (``for (iggy in foo)'') and array deletions use expensive
28	* linear searching. So what we do is start out with small arrays and
29	* grow them as needed, so that our arrays are hopefully small enough,
30	* most of the time, that they're pretty full and we're not looking at
31	* wasted space.
32	*
33	* The decision is made to grow the array if the average chain length is
34	* ``too big''. This is defined as the total number of entries in the table
35	* divided by the size of the array being greater than some constant.
36	*
37	* 11/2002: We make the constant a variable, so that it can be tweaked
38	* via environment variable.
39	*/
40
41	static int AVG_CHAIN_MAX = 2; /* 11/2002: Modern machines are bigger, cut this down from 10. */
42
43	#include "awk.h"
44
45	static NODE assoc_find P((NODE symbol, NODE *subs, unsigned long hash1));
46	static void grow_table P((NODE *symbol));
47
48	static unsigned long gst_hash_string P((const char *str, size_t len, unsigned long hsize));
49	static unsigned long scramble P((unsigned long x));
50	static unsigned long awk_hash P((const char *s, size_t len, unsigned long hsize));
51
52	unsigned long (hash)P((const char s, size_t len, unsigned long hsize)) = awk_hash;
53
54	/* array_init --- possibly temporary function for experimentation purposes */
55
56	void
57	array_init()
58	{
59	const char *val;
60	int newval;
61
62	if ((val = getenv("AVG_CHAIN_MAX")) != NULL && ISDIGIT(*val)) {
63	for (newval = 0; val && ISDIGIT(val); val++)
64	newval = (newval * 10) + *val - '0';
65
66	AVG_CHAIN_MAX = newval;
67	}
68
69	if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0)
70	hash = gst_hash_string;
71	}
72
73	/*
74	* get_actual --- proceed to the actual Node_var_array,
75	* change Node_var_new to an array.
76	* If canfatal and type isn't good, die fatally,
77	* otherwise return the final actual value.
78	*/
79
80	NODE *
81	get_actual(NODE *symbol, int canfatal)
82	{
83	int isparam = (symbol->type == Node_param_list
84	&& (symbol->flags & FUNC) == 0);
85	NODE *save_symbol = symbol;
86
87	if (isparam) {
88	save_symbol = symbol = stack_ptr[symbol->param_cnt];
89	if (symbol->type == Node_array_ref)
90	symbol = symbol->orig_array;
91	}
92
93	switch (symbol->type) {
94	case Node_var_new:
95	symbol->type = Node_var_array;
96	symbol->var_array = NULL;
97	/* fall through */
98	case Node_var_array:
99	break;
100
101	case Node_array_ref:
102	case Node_param_list:
103	if ((symbol->flags & FUNC) == 0)
104	cant_happen();
105	/* else
106	fall through */
107
108	default:
109	/* notably Node_var but catches also e.g. FS[1] = "x" */
110	if (canfatal) {
111	if ((symbol->flags & FUNC) != 0)
112	fatal(_("attempt to use function `%s' as an array"),
113	save_symbol->vname);
114	else if (isparam)
115	fatal(_("attempt to use scalar parameter `%s' as an array"),
116	save_symbol->vname);
117	else
118	fatal(_("attempt to use scalar `%s' as array"),
119	save_symbol->vname);
120	} else
121	break;
122	}
123
124	return symbol;
125	}
126
127	/*
128	* array_vname --- print the name of the array
129	*
130	* Returns a pointer to a statically maintained dynamically allocated string.
131	* It's appropriate for printing the name once; if the caller wants
132	* to save it, they have to make a copy.
133	*
134	* Setting MAX_LEN to a positive value (eg. 140) would limit the length
135	* of the output to _roughly_ that length.
136	*
137	* If MAX_LEN == 0, which is the default, the whole stack is printed.
138	*/
139	#define MAX_LEN 0
140
141	char *
142	array_vname(register const NODE *symbol)
143	{
144	if (symbol->type == Node_param_list)
145	symbol = stack_ptr[symbol->param_cnt];
146
147	if (symbol->type != Node_array_ref \|\| symbol->orig_array->type != Node_var_array)
148	return symbol->vname;
149	else {
150	static char *message = NULL;
151	static size_t msglen = 0;
152	char *s;
153	size_t len;
154	int n;
155	const NODE *save_symbol = symbol;
156	const char *from = _("from %s");
157
158	#if (MAX_LEN <= 0) \|\| !defined(HAVE_SNPRINTF)
159	/* This is the default branch. */
160
161	/* First, we have to compute the length of the string: */
162	len = strlen(symbol->vname) + 2; /* "%s (" */
163	n = 0;
164	do {
165	symbol = symbol->prev_array;
166	len += strlen(symbol->vname);
167	n++;
168	} while (symbol->type == Node_array_ref);
169	/*
170	* Each node contributes by strlen(from) minus the length
171	* of "%s" in the translation (which is at least 2)
172	* plus 2 for ", " or ")\0"; this adds up to strlen(from).
173	*/
174	len += n * strlen(from);
175
176	/* (Re)allocate memory: */
177	if (message == NULL) {
178	emalloc(message, char *, len, "array_vname");
179	msglen = len;
180	} else if (len > msglen) {
181	erealloc(message, char *, len, "array_vname");
182	msglen = len;
183	} /* else
184	current buffer can hold new name */
185
186	/* We're ready to print: */
187	symbol = save_symbol;
188	s = message;
189	/*
190	* Ancient systems have sprintf() returning char *, not int.
191	* Thus, `s += sprintf(s, from, name);' is a no-no.
192	*/
193	sprintf(s, "%s (", symbol->vname);
194	s += strlen(s);
195	for (;;) {
196	symbol = symbol->prev_array;
197	sprintf(s, from, symbol->vname);
198	s += strlen(s);
199	if (symbol->type != Node_array_ref)
200	break;
201	sprintf(s, ", ");
202	s += strlen(s);
203	}
204	sprintf(s, ")");
205
206	#else /* MAX_LEN > 0 */
207
208	/*
209	* The following check fails only on
210	* abnormally_long_variable_name.
211	*/
212	#define PRINT_CHECK \
213	if (n <= 0 \|\| n >= len) \
214	return save_symbol->vname; \
215	s += n; len -= n
216	#define PRINT(str) \
217	n = snprintf(s, len, str); \
218	PRINT_CHECK
219	#define PRINT_vname(str) \
220	n = snprintf(s, len, str, symbol->vname); \
221	PRINT_CHECK
222
223	if (message == NULL)
224	emalloc(message, char *, MAX_LEN, "array_vname");
225
226	s = message;
227	len = MAX_LEN;
228
229	/* First, print the vname of the node. */
230	PRINT_vname("%s (");
231
232	for (;;) {
233	symbol = symbol->prev_array;
234	/*
235	* When we don't have enough space and this is not
236	* the last node, shorten the list.
237	*/
238	if (len < 40 && symbol->type == Node_array_ref) {
239	PRINT("..., ");
240	symbol = symbol->orig_array;
241	}
242	PRINT_vname(from);
243	if (symbol->type != Node_array_ref)
244	break;
245	PRINT(", ");
246	}
247	PRINT(")");
248
249	#undef PRINT_CHECK
250	#undef PRINT
251	#undef PRINT_vname
252	#endif /* MAX_LEN <= 0 */
253
254	return message;
255	}
256	}
257	#undef MAX_LEN
258
259	/* concat_exp --- concatenate expression list into a single string */
260
261	NODE *
262	concat_exp(register NODE *tree)
263	{
264	register NODE *r;
265	char *str;
266	char *s;
267	size_t len;
268	int offset;
269	size_t subseplen;
270	const char *subsep;
271
272	if (tree->type != Node_expression_list)
273	return force_string(tree_eval(tree));
274	r = force_string(tree_eval(tree->lnode));
275	if (tree->rnode == NULL)
276	return r;
277	subseplen = SUBSEP_node->var_value->stlen;
278	subsep = SUBSEP_node->var_value->stptr;
279	len = r->stlen + subseplen + 2;
280	emalloc(str, char *, len, "concat_exp");
281	memcpy(str, r->stptr, r->stlen+1);
282	s = str + r->stlen;
283	free_temp(r);
284	for (tree = tree->rnode; tree != NULL; tree = tree->rnode) {
285	if (subseplen == 1)
286	s++ = subsep;
287	else {
288	memcpy(s, subsep, subseplen+1);
289	s += subseplen;
290	}
291	r = force_string(tree_eval(tree->lnode));
292	len += r->stlen + subseplen;
293	offset = s - str;
294	erealloc(str, char *, len, "concat_exp");
295	s = str + offset;
296	memcpy(s, r->stptr, r->stlen+1);
297	s += r->stlen;
298	free_temp(r);
299	}
300	r = make_str_node(str, s - str, ALREADY_MALLOCED);
301	r->flags \|= TEMP;
302	return r;
303	}
304
305	/* assoc_clear --- flush all the values in symbol[] before doing a split() */
306
307	void
308	assoc_clear(NODE *symbol)
309	{
310	long i;
311	NODE bucket, next;
312
313	if (symbol->var_array == NULL)
314	return;
315	for (i = 0; i < symbol->array_size; i++) {
316	for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
317	next = bucket->ahnext;
318	unref(bucket->ahvalue);
319	unref(bucket); /* unref() will free the ahname_str */
320	}
321	symbol->var_array[i] = NULL;
322	}
323	free(symbol->var_array);
324	symbol->var_array = NULL;
325	symbol->array_size = symbol->table_size = 0;
326	symbol->flags &= ~ARRAYMAXED;
327	}
328
329	/* hash --- calculate the hash function of the string in subs */
330
331	static unsigned long
332	awk_hash(register const char *s, register size_t len, unsigned long hsize)
333	{
334	register unsigned long h = 0;
335
336	/*
337	* This is INCREDIBLY ugly, but fast. We break the string up into
338	* 8 byte units. On the first time through the loop we get the
339	* "leftover bytes" (strlen % 8). On every other iteration, we
340	* perform 8 HASHC's so we handle all 8 bytes. Essentially, this
341	* saves us 7 cmp & branch instructions. If this routine is
342	* heavily used enough, it's worth the ugly coding.
343	*
344	* OZ's original sdbm hash, copied from Margo Seltzers db package.
345	*/
346
347	/*
348	* Even more speed:
349	* #define HASHC h = s++ + 65599 h
350	* Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
351	*/
352	#define HASHC htmp = (h << 6); \
353	h = *s++ + htmp + (htmp << 10) - h
354
355	unsigned long htmp;
356
357	h = 0;
358
359	#if defined(VAXC)
360	/*
361	* This was an implementation of "Duff's Device", but it has been
362	* redone, separating the switch for extra iterations from the
363	* loop. This is necessary because the DEC VAX-C compiler is
364	* STOOPID.
365	*/
366	switch (len & (8 - 1)) {
367	case 7: HASHC;
368	case 6: HASHC;
369	case 5: HASHC;
370	case 4: HASHC;
371	case 3: HASHC;
372	case 2: HASHC;
373	case 1: HASHC;
374	default: break;
375	}
376
377	if (len > (8 - 1)) {
378	register size_t loop = len >> 3;
379	do {
380	HASHC;
381	HASHC;
382	HASHC;
383	HASHC;
384	HASHC;
385	HASHC;
386	HASHC;
387	HASHC;
388	} while (--loop);
389	}
390	#else /* ! VAXC */
391	/* "Duff's Device" for those who can handle it */
392	if (len > 0) {
393	register size_t loop = (len + 8 - 1) >> 3;
394
395	switch (len & (8 - 1)) {
396	case 0:
397	do { /* All fall throughs */
398	HASHC;
399	case 7: HASHC;
400	case 6: HASHC;
401	case 5: HASHC;
402	case 4: HASHC;
403	case 3: HASHC;
404	case 2: HASHC;
405	case 1: HASHC;
406	} while (--loop);
407	}
408	}
409	#endif /* ! VAXC */
410
411	if (h >= hsize)
412	h %= hsize;
413	return h;
414	}
415
416	/* assoc_find --- locate symbol[subs] */
417
418	static NODE * /* NULL if not found */
419	assoc_find(NODE symbol, register NODE subs, unsigned long hash1)
420	{
421	register NODE *bucket;
422	const char *s1_str;
423	size_t s1_len;
424	NODE *s2;
425
426	for (bucket = symbol->var_array[hash1]; bucket != NULL;
427	bucket = bucket->ahnext) {
428	/*
429	* This used to use cmp_nodes() here. That's wrong.
430	* Array indexes are strings; compare as such, always!
431	*/
432	s1_str = bucket->ahname_str;
433	s1_len = bucket->ahname_len;
434	s2 = subs;
435
436	if (s1_len == s2->stlen) {
437	if (s1_len == 0 /* "" is a valid index */
438	\|\| memcmp(s1_str, s2->stptr, s1_len) == 0)
439	return bucket;
440	}
441	}
442	return NULL;
443	}
444
445	/* in_array --- test whether the array element symbol[subs] exists or not,
446	* return pointer to value if it does.
447	*/
448
449	NODE *
450	in_array(NODE symbol, NODE subs)
451	{
452	register unsigned long hash1;
453	NODE *ret;
454
455	symbol = get_array(symbol);
456
457	/*
458	* Evaluate subscript first, it could have side effects.
459	*/
460	subs = concat_exp(subs); /* concat_exp returns a string node */
461	if (symbol->var_array == NULL) {
462	free_temp(subs);
463	return NULL;
464	}
465	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
466	ret = assoc_find(symbol, subs, hash1);
467	free_temp(subs);
468	if (ret)
469	return ret->ahvalue;
470	else
471	return NULL;
472	}
473
474	/*
475	* assoc_lookup:
476	* Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it
477	* isn't there. Returns a pointer ala get_lhs to where its value is stored.
478	*
479	* SYMBOL is the address of the node (or other pointer) being dereferenced.
480	* SUBS is a number or string used as the subscript.
481	*/
482
483	NODE **
484	assoc_lookup(NODE symbol, NODE subs, int reference)
485	{
486	register unsigned long hash1;
487	register NODE *bucket;
488
489	assert(symbol->type == Node_var_array);
490
491	(void) force_string(subs);
492
493	if (symbol->var_array == NULL) {
494	symbol->array_size = symbol->table_size = 0; /* sanity */
495	symbol->flags &= ~ARRAYMAXED;
496	grow_table(symbol);
497	hash1 = hash(subs->stptr, subs->stlen,
498	(unsigned long) symbol->array_size);
499	} else {
500	hash1 = hash(subs->stptr, subs->stlen,
501	(unsigned long) symbol->array_size);
502	bucket = assoc_find(symbol, subs, hash1);
503	if (bucket != NULL) {
504	free_temp(subs);
505	return &(bucket->ahvalue);
506	}
507	}
508
509	if (do_lint && reference) {
510	subs->stptr[subs->stlen] = '\0';
511	lintwarn(_("reference to uninitialized element `%s[\"%s\"]'"),
512	array_vname(symbol), subs->stptr);
513	}
514
515	/* It's not there, install it. */
516	if (do_lint && subs->stlen == 0)
517	lintwarn(_("subscript of array `%s' is null string"),
518	array_vname(symbol));
519
520	/* first see if we would need to grow the array, before installing */
521	symbol->table_size++;
522	if ((symbol->flags & ARRAYMAXED) == 0
523	&& (symbol->table_size / symbol->array_size) > AVG_CHAIN_MAX) {
524	grow_table(symbol);
525	/* have to recompute hash value for new size */
526	hash1 = hash(subs->stptr, subs->stlen,
527	(unsigned long) symbol->array_size);
528	}
529
530	getnode(bucket);
531	bucket->type = Node_ahash;
532
533	/*
534	* Freeze this string value --- it must never
535	* change, no matter what happens to the value
536	* that created it or to CONVFMT, etc.
537	*
538	* One day: Use an atom table to track array indices,
539	* and avoid the extra memory overhead.
540	*/
541	bucket->flags \|= MALLOC;
542	bucket->ahname_ref = 1;
543
544	/* For TEMP node, reuse the storage directly */
545	if ((subs->flags & TEMP) != 0) {
546	bucket->ahname_str = subs->stptr;
547	bucket->ahname_len = subs->stlen;
548	bucket->ahname_str[bucket->ahname_len] = '\0';
549	subs->flags &= ~TEMP; /* for good measure */
550	freenode(subs);
551	} else {
552	emalloc(bucket->ahname_str, char *, subs->stlen + 2, "assoc_lookup");
553	bucket->ahname_len = subs->stlen;
554	memcpy(bucket->ahname_str, subs->stptr, subs->stlen);
555	bucket->ahname_str[bucket->ahname_len] = '\0';
556	}
557
558	bucket->ahvalue = Nnull_string;
559	bucket->ahnext = symbol->var_array[hash1];
560	symbol->var_array[hash1] = bucket;
561	return &(bucket->ahvalue);
562	}
563
564	/* do_delete --- perform `delete array[s]' */
565
566	/*
567	* `symbol' is array
568	* `tree' is subscript
569	*/
570
571	void
572	do_delete(NODE sym, NODE tree)
573	{
574	register unsigned long hash1;
575	register NODE bucket, last;
576	NODE *subs;
577	register NODE *symbol = get_array(sym);
578
579	if (tree == NULL) { /* delete array */
580	assoc_clear(symbol);
581	return;
582	}
583
584	last = NULL; /* shut up gcc -Wall */
585	hash1 = 0; /* ditto */
586
587	/*
588	* Always evaluate subscript, it could have side effects.
589	*/
590	subs = concat_exp(tree); /* concat_exp returns string node */
591
592	if (symbol->var_array != NULL) {
593	hash1 = hash(subs->stptr, subs->stlen,
594	(unsigned long) symbol->array_size);
595	last = NULL;
596	for (bucket = symbol->var_array[hash1]; bucket != NULL;
597	last = bucket, bucket = bucket->ahnext) {
598	/*
599	* This used to use cmp_nodes() here. That's wrong.
600	* Array indexes are strings; compare as such, always!
601	*/
602	const char *s1_str;
603	size_t s1_len;
604	NODE *s2;
605
606	s1_str = bucket->ahname_str;
607	s1_len = bucket->ahname_len;
608	s2 = subs;
609
610	if (s1_len == s2->stlen) {
611	if (s1_len == 0 /* "" is a valid index */
612	\|\| memcmp(s1_str, s2->stptr, s1_len) == 0)
613	break;
614	}
615	}
616	} else
617	bucket = NULL; /* The array is empty. */
618
619	if (bucket == NULL) {
620	if (do_lint)
621	lintwarn(_("delete: index `%s' not in array `%s'"),
622	subs->stptr, array_vname(sym));
623	free_temp(subs);
624	return;
625	}
626
627	free_temp(subs);
628
629	if (last != NULL)
630	last->ahnext = bucket->ahnext;
631	else
632	symbol->var_array[hash1] = bucket->ahnext;
633	unref(bucket->ahvalue);
634	unref(bucket); /* unref() will free the ahname_str */
635	symbol->table_size--;
636	if (symbol->table_size <= 0) {
637	memset(symbol->var_array, '\0',
638	sizeof(NODE ) symbol->array_size);
639	symbol->table_size = symbol->array_size = 0;
640	symbol->flags &= ~ARRAYMAXED;
641	free((char *) symbol->var_array);
642	symbol->var_array = NULL;
643	}
644	}
645
646	/* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */
647
648	/*
649	* The primary hassle here is that `iggy' needs to have some arbitrary
650	* array index put in it before we can clear the array, we can't
651	* just replace the loop with `delete foo'.
652	*/
653
654	void
655	do_delete_loop(NODE symbol, NODE tree)
656	{
657	long i;
658	NODE **lhs;
659	Func_ptr after_assign = NULL;
660
661	symbol = get_array(symbol);
662
663	if (symbol->var_array == NULL)
664	return;
665
666	/* get first index value */
667	for (i = 0; i < symbol->array_size; i++) {
668	if (symbol->var_array[i] != NULL) {
669	lhs = get_lhs(tree->lnode, & after_assign, FALSE);
670	unref(*lhs);
671	*lhs = make_string(symbol->var_array[i]->ahname_str,
672	symbol->var_array[i]->ahname_len);
673	if (after_assign)
674	(*after_assign)();
675	break;
676	}
677	}
678
679	/* blast the array in one shot */
680	assoc_clear(symbol);
681	}
682
683	/* grow_table --- grow a hash table */
684
685	static void
686	grow_table(NODE *symbol)
687	{
688	NODE old, new, chain, next;
689	int i, j;
690	unsigned long hash1;
691	unsigned long oldsize, newsize, k;
692	/*
693	* This is an array of primes. We grow the table by an order of
694	* magnitude each time (not just doubling) so that growing is a
695	* rare operation. We expect, on average, that it won't happen
696	* more than twice. The final size is also chosen to be small
697	* enough so that MS-DOG mallocs can handle it. When things are
698	* very large (> 8K), we just double more or less, instead of
699	* just jumping from 8K to 64K.
700	*/
701	static const long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497,
702	#if ! defined(MSDOS) && ! defined(OS2) && ! defined(atarist)
703	131101, 262147, 524309, 1048583, 2097169,
704	4194319, 8388617, 16777259, 33554467,
705	67108879, 134217757, 268435459, 536870923,
706	1073741827
707	#endif
708	};
709
710	/* find next biggest hash size */
711	newsize = oldsize = symbol->array_size;
712	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
713	if (oldsize < sizes[i]) {
714	newsize = sizes[i];
715	break;
716	}
717	}
718
719	if (newsize == oldsize) { /* table already at max (!) */
720	symbol->flags \|= ARRAYMAXED;
721	return;
722	}
723
724	/* allocate new table */
725	emalloc(new, NODE *, newsize sizeof(NODE *), "grow_table");
726	memset(new, '\0', newsize * sizeof(NODE *));
727
728	/* brand new hash table, set things up and return */
729	if (symbol->var_array == NULL) {
730	symbol->table_size = 0;
731	goto done;
732	}
733
734	/* old hash table there, move stuff to new, free old */
735	old = symbol->var_array;
736	for (k = 0; k < oldsize; k++) {
737	if (old[k] == NULL)
738	continue;
739
740	for (chain = old[k]; chain != NULL; chain = next) {
741	next = chain->ahnext;
742	hash1 = hash(chain->ahname_str,
743	chain->ahname_len, newsize);
744
745	/* remove from old list, add to new */
746	chain->ahnext = new[hash1];
747	new[hash1] = chain;
748	}
749	}
750	free(old);
751
752	done:
753	/*
754	* note that symbol->table_size does not change if an old array,
755	* and is explicitly set to 0 if a new one.
756	*/
757	symbol->var_array = new;
758	symbol->array_size = newsize;
759	}
760
761	/* set_SUBSEP --- make sure SUBSEP always has a string value */
762
763	void
764	set_SUBSEP(void)
765	{
766
767	(void) force_string(SUBSEP_node->var_value);
768	return;
769	}
770
771	/* pr_node --- print simple node info */
772
773	static void
774	pr_node(NODE *n)
775	{
776	if ((n->flags & (NUMCUR\|NUMBER)) != 0)
777	printf("%g", n->numbr);
778	else
779	printf("%.*s", (int) n->stlen, n->stptr);
780	}
781
782	/* assoc_dump --- dump the contents of an array */
783
784	NODE *
785	assoc_dump(NODE *symbol)
786	{
787	long i;
788	NODE *bucket;
789
790	if (symbol->var_array == NULL) {
791	printf(_("%s: empty (null)\n"), symbol->vname);
792	return tmp_number((AWKNUM) 0);
793	}
794
795	if (symbol->table_size == 0) {
796	printf(_("%s: empty (zero)\n"), symbol->vname);
797	return tmp_number((AWKNUM) 0);
798	}
799
800	printf(_("%s: table_size = %d, array_size = %d\n"), symbol->vname,
801	(int) symbol->table_size, (int) symbol->array_size);
802
803	for (i = 0; i < symbol->array_size; i++) {
804	for (bucket = symbol->var_array[i]; bucket != NULL;
805	bucket = bucket->ahnext) {
806	printf("%s: I: [len %d <%.*s>] V: [",
807	symbol->vname,
808	(int) bucket->ahname_len,
809	(int) bucket->ahname_len,
810	bucket->ahname_str);
811	pr_node(bucket->ahvalue);
812	printf("]\n");
813	}
814	}
815
816	return tmp_number((AWKNUM) 0);
817	}
818
819	/* do_adump --- dump an array: interface to assoc_dump */
820
821	NODE *
822	do_adump(NODE *tree)
823	{
824	NODE r, a;
825
826	a = tree->lnode;
827
828	if (a->type == Node_param_list) {
829	printf(_("%s: is parameter\n"), a->vname);
830	a = stack_ptr[a->param_cnt];
831	}
832
833	if (a->type == Node_array_ref) {
834	printf(_("%s: array_ref to %s\n"), a->vname,
835	a->orig_array->vname);
836	a = a->orig_array;
837	}
838
839	r = assoc_dump(a);
840
841	return r;
842	}
843
844	/*
845	* The following functions implement the builtin
846	* asort function. Initial work by Alan J. Broder,
847	* ajb@woti.com.
848	*/
849
850	/* dup_table --- duplicate input symbol table "symbol" */
851
852	static void
853	dup_table(NODE symbol, NODE newsymb)
854	{
855	NODE old, new, chain, bucket;
856	long i;
857	unsigned long cursize;
858
859	/* find the current hash size */
860	cursize = symbol->array_size;
861
862	new = NULL;
863
864	/* input is a brand new hash table, so there's nothing to copy */
865	if (symbol->var_array == NULL)
866	newsymb->table_size = 0;
867	else {
868	/* old hash table there, dupnode stuff into a new table */
869
870	/* allocate new table */
871	emalloc(new, NODE *, cursize sizeof(NODE *), "dup_table");
872	memset(new, '\0', cursize * sizeof(NODE *));
873
874	/* do the copying/dupnode'ing */
875	old = symbol->var_array;
876	for (i = 0; i < cursize; i++) {
877	if (old[i] != NULL) {
878	for (chain = old[i]; chain != NULL;
879	chain = chain->ahnext) {
880	/* get a node for the linked list */
881	getnode(bucket);
882	bucket->type = Node_ahash;
883	bucket->flags \|= MALLOC;
884	bucket->ahname_ref = 1;
885
886	/*
887	* copy the corresponding name and
888	* value from the original input list
889	*/
890	emalloc(bucket->ahname_str, char *, chain->ahname_len + 2, "dup_table");
891	bucket->ahname_len = chain->ahname_len;
892
893	memcpy(bucket->ahname_str, chain->ahname_str, chain->ahname_len);
894	bucket->ahname_str[bucket->ahname_len] = '\0';
895
896	bucket->ahvalue = dupnode(chain->ahvalue);
897
898	/*
899	* put the node on the corresponding
900	* linked list in the new table
901	*/
902	bucket->ahnext = new[i];
903	new[i] = bucket;
904	}
905	}
906	}
907	newsymb->table_size = symbol->table_size;
908	}
909
910	newsymb->var_array = new;
911	newsymb->array_size = cursize;
912	}
913
914	/* merge --- do a merge of two sorted lists */
915
916	static NODE *
917	merge(NODE left, NODE right)
918	{
919	NODE ans, cur;
920
921	/*
922	* The use of cmp_nodes() here means that IGNORECASE influences the
923	* comparison. This is OK, but it may be surprising. This comment
924	* serves to remind us that we know about this and that it's OK.
925	*/
926	if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
927	ans = cur = left;
928	left = left->ahnext;
929	} else {
930	ans = cur = right;
931	right = right->ahnext;
932	}
933
934	while (left != NULL && right != NULL) {
935	if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
936	cur->ahnext = left;
937	cur = left;
938	left = left->ahnext;
939	} else {
940	cur->ahnext = right;
941	cur = right;
942	right = right->ahnext;
943	}
944	}
945
946	cur->ahnext = (left != NULL ? left : right);
947
948	return ans;
949	}
950
951	/* merge_sort --- recursively sort the left and right sides of a list */
952
953	static NODE *
954	merge_sort(NODE *left, unsigned long size)
955	{
956	NODE right, tmp;
957	int i, half;
958
959	if (size <= 1)
960	return left;
961
962	/* walk down the list, till just one before the midpoint */
963	tmp = left;
964	half = size / 2;
965	for (i = 0; i < half-1; i++)
966	tmp = tmp->ahnext;
967
968	/* split the list into two parts */
969	right = tmp->ahnext;
970	tmp->ahnext = NULL;
971
972	/* sort the left and right parts of the list */
973	left = merge_sort(left, half);
974	right = merge_sort(right, size-half);
975
976	/* merge the two sorted parts of the list */
977	return merge(left, right);
978	}
979
980
981	/*
982	* assoc_from_list -- Populate an array with the contents of a list of NODEs,
983	* using increasing integers as the key.
984	*/
985
986	static void
987	assoc_from_list(NODE symbol, NODE list)
988	{
989	NODE *next;
990	unsigned long i = 0;
991	register unsigned long hash1;
992	char buf[100];
993
994	for (; list != NULL; list = next) {
995	next = list->ahnext;
996
997	/* make an int out of i++ */
998	i++;
999	sprintf(buf, "%lu", i);
1000	assert(list->ahname_str == NULL);
1001	assert(list->ahname_ref == 1);
1002	emalloc(list->ahname_str, char *, strlen(buf) + 2, "assoc_from_list");
1003	list->ahname_len = strlen(buf);
1004	strcpy(list->ahname_str, buf);
1005
1006	/* find the bucket where it belongs */
1007	hash1 = hash(list->ahname_str, list->ahname_len,
1008	symbol->array_size);
1009
1010	/* link the node into the chain at that bucket */
1011	list->ahnext = symbol->var_array[hash1];
1012	symbol->var_array[hash1] = list;
1013	}
1014	}
1015
1016	/*
1017	* assoc_sort_inplace --- sort all the values in symbol[], replacing
1018	* the sorted values back into symbol[], indexed by integers starting with 1.
1019	*/
1020
1021	typedef enum asort_how { VALUE, INDEX } ASORT_TYPE;
1022
1023	static NODE *
1024	assoc_sort_inplace(NODE *symbol, ASORT_TYPE how)
1025	{
1026	unsigned long i, num;
1027	NODE bucket, next, *list;
1028
1029	if (symbol->var_array == NULL
1030	\|\| symbol->array_size <= 0
1031	\|\| symbol->table_size <= 0)
1032	return tmp_number((AWKNUM) 0);
1033
1034	/* build a linked list out of all the entries in the table */
1035	if (how == VALUE) {
1036	list = NULL;
1037	num = 0;
1038	for (i = 0; i < symbol->array_size; i++) {
1039	for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
1040	next = bucket->ahnext;
1041	if (bucket->ahname_ref == 1) {
1042	free(bucket->ahname_str);
1043	bucket->ahname_str = NULL;
1044	bucket->ahname_len = 0;
1045	} else {
1046	NODE *r;
1047
1048	getnode(r);
1049	r = bucket;
1050	unref(bucket);
1051	bucket = r;
1052	bucket->flags \|= MALLOC;
1053	bucket->ahname_ref = 1;
1054	bucket->ahname_str = NULL;
1055	bucket->ahname_len = 0;
1056	}
1057	bucket->ahnext = list;
1058	list = bucket;
1059	num++;
1060	}
1061	symbol->var_array[i] = NULL;
1062	}
1063	} else { /* how == INDEX */
1064	list = NULL;
1065	num = 0;
1066	for (i = 0; i < symbol->array_size; i++) {
1067	for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
1068	next = bucket->ahnext;
1069
1070	/* toss old value */
1071	unref(bucket->ahvalue);
1072
1073	/* move index into value */
1074	if (bucket->ahname_ref == 1) {
1075	bucket->ahvalue = make_str_node(bucket->ahname_str,
1076	bucket->ahname_len, ALREADY_MALLOCED);
1077	bucket->ahname_str = NULL;
1078	bucket->ahname_len = 0;
1079	} else {
1080	NODE *r;
1081
1082	bucket->ahvalue = make_string(bucket->ahname_str, bucket->ahname_len);
1083	getnode(r);
1084	r = bucket;
1085	unref(bucket);
1086	bucket = r;
1087	bucket->flags \|= MALLOC;
1088	bucket->ahname_ref = 1;
1089	bucket->ahname_str = NULL;
1090	bucket->ahname_len = 0;
1091	}
1092
1093	bucket->ahnext = list;
1094	list = bucket;
1095	num++;
1096	}
1097	symbol->var_array[i] = NULL;
1098	}
1099	}
1100
1101	/*
1102	* Sort the linked list of NODEs.
1103	* (The especially nice thing about using a merge sort here is that
1104	* we require absolutely no additional storage. This is handy if the
1105	* array has grown to be very large.)
1106	*/
1107	list = merge_sort(list, num);
1108
1109	/*
1110	* now repopulate the original array, using increasing
1111	* integers as the key
1112	*/
1113	assoc_from_list(symbol, list);
1114
1115	return tmp_number((AWKNUM) num);
1116	}
1117
1118	/* asort_actual --- do the actual work to sort the input array */
1119
1120	static NODE *
1121	asort_actual(NODE *tree, ASORT_TYPE how)
1122	{
1123	NODE *array = get_array(tree->lnode);
1124
1125	if (tree->rnode != NULL) { /* 2nd optional arg */
1126	NODE *dest = get_array(tree->rnode->lnode);
1127
1128	assoc_clear(dest);
1129	dup_table(array, dest);
1130	array = dest;
1131	}
1132
1133	return assoc_sort_inplace(array, how);
1134	}
1135
1136	/* do_asort --- sort array by value */
1137
1138	NODE *
1139	do_asort(NODE *tree)
1140	{
1141	return asort_actual(tree, VALUE);
1142	}
1143
1144	/* do_asorti --- sort array by index */
1145
1146	NODE *
1147	do_asorti(NODE *tree)
1148	{
1149	return asort_actual(tree, INDEX);
1150	}
1151
1152	/*
1153	From bonzini@gnu.org Mon Oct 28 16:05:26 2002
1154	Date: Mon, 28 Oct 2002 13:33:03 +0100
1155	From: Paolo Bonzini <bonzini@gnu.org>
1156	To: arnold@skeeve.com
1157	Subject: Hash function
1158	Message-ID: <20021028123303.GA6832@biancaneve>
1159
1160	Here is the hash function I'm using in GNU Smalltalk. The scrambling is
1161	needed if you use powers of two as the table sizes. If you use primes it
1162	is not needed.
1163
1164	To use double-hashing with power-of-two size, you should use the
1165	_gst_hash_string(str, len) as the primary hash and
1166	scramble(_gst_hash_string (str, len)) \| 1 as the secondary hash.
1167
1168	Paolo
1169
1170	*/
1171	/*
1172	* ADR: Slightly modified to work w/in the context of gawk.
1173	*/
1174
1175	static unsigned long
1176	gst_hash_string(const char *str, size_t len, unsigned long hsize)
1177	{
1178	unsigned long hashVal = 1497032417; /* arbitrary value */
1179	unsigned long ret;
1180
1181	while (len--) {
1182	hashVal += *str++;
1183	hashVal += (hashVal << 10);
1184	hashVal ^= (hashVal >> 6);
1185	}
1186
1187	ret = scramble(hashVal);
1188	if (ret >= hsize)
1189	ret %= hsize;
1190
1191	return ret;
1192	}
1193
1194	static unsigned long
1195	scramble(unsigned long x)
1196	{
1197	if (sizeof(long) == 4) {
1198	int y = ~x;
1199
1200	x += (y << 10) \| (y >> 22);
1201	x += (x << 6) \| (x >> 26);
1202	x -= (x << 16) \| (x >> 16);
1203	} else {
1204	x ^= (~x) >> 31;
1205	x += (x << 21) \| (x >> 11);
1206	x += (x << 5) \| (x >> 27);
1207	x += (x << 27) \| (x >> 5);
1208	x += (x << 31);
1209	}
1210
1211	return x;
1212	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/gawk/3.1.5/array.c@ 3934

Download in other formats: