Context Navigation

array.c

Visit:

Last change on this file was 3076, checked in by bird, 18 years ago
gawk 3.1.5
File size: 28.6 KB

Rev	Line
[3076]	1	/*
	2	* array.c - routines for associative arrays.
	3	*/
	4
	5	/*
	6	* Copyright (C) 1986, 1988, 1989, 1991-2005 the Free Software Foundation, Inc.
	7	*
	8	* This file is part of GAWK, the GNU implementation of the
	9	* AWK Programming Language.
	10	*
	11	* GAWK is free software; you can redistribute it and/or modify
	12	* it under the terms of the GNU General Public License as published by
	13	* the Free Software Foundation; either version 2 of the License, or
	14	* (at your option) any later version.
	15	*
	16	* GAWK is distributed in the hope that it will be useful,
	17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	19	* GNU General Public License for more details.
	20	*
	21	* You should have received a copy of the GNU General Public License
	22	* along with this program; if not, write to the Free Software
	23	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
	24	*/
	25
	26	/*
	27	* Tree walks (``for (iggy in foo)'') and array deletions use expensive
	28	* linear searching. So what we do is start out with small arrays and
	29	* grow them as needed, so that our arrays are hopefully small enough,
	30	* most of the time, that they're pretty full and we're not looking at
	31	* wasted space.
	32	*
	33	* The decision is made to grow the array if the average chain length is
	34	* ``too big''. This is defined as the total number of entries in the table
	35	* divided by the size of the array being greater than some constant.
	36	*
	37	* 11/2002: We make the constant a variable, so that it can be tweaked
	38	* via environment variable.
	39	*/
	40
	41	static int AVG_CHAIN_MAX = 2; /* 11/2002: Modern machines are bigger, cut this down from 10. */
	42
	43	#include "awk.h"
	44
	45	static NODE assoc_find P((NODE symbol, NODE *subs, unsigned long hash1));
	46	static void grow_table P((NODE *symbol));
	47
	48	static unsigned long gst_hash_string P((const char *str, size_t len, unsigned long hsize));
	49	static unsigned long scramble P((unsigned long x));
	50	static unsigned long awk_hash P((const char *s, size_t len, unsigned long hsize));
	51
	52	unsigned long (hash)P((const char s, size_t len, unsigned long hsize)) = awk_hash;
	53
	54	/* array_init --- possibly temporary function for experimentation purposes */
	55
	56	void
	57	array_init()
	58	{
	59	const char *val;
	60	int newval;
	61
	62	if ((val = getenv("AVG_CHAIN_MAX")) != NULL && ISDIGIT(*val)) {
	63	for (newval = 0; val && ISDIGIT(val); val++)
	64	newval = (newval * 10) + *val - '0';
	65
	66	AVG_CHAIN_MAX = newval;
	67	}
	68
	69	if ((val = getenv("AWK_HASH")) != NULL && strcmp(val, "gst") == 0)
	70	hash = gst_hash_string;
	71	}
	72
	73	/*
	74	* get_actual --- proceed to the actual Node_var_array,
	75	* change Node_var_new to an array.
	76	* If canfatal and type isn't good, die fatally,
	77	* otherwise return the final actual value.
	78	*/
	79
	80	NODE *
	81	get_actual(NODE *symbol, int canfatal)
	82	{
	83	int isparam = (symbol->type == Node_param_list
	84	&& (symbol->flags & FUNC) == 0);
	85	NODE *save_symbol = symbol;
	86
	87	if (isparam) {
	88	save_symbol = symbol = stack_ptr[symbol->param_cnt];
	89	if (symbol->type == Node_array_ref)
	90	symbol = symbol->orig_array;
	91	}
	92
	93	switch (symbol->type) {
	94	case Node_var_new:
	95	symbol->type = Node_var_array;
	96	symbol->var_array = NULL;
	97	/* fall through */
	98	case Node_var_array:
	99	break;
	100
	101	case Node_array_ref:
	102	case Node_param_list:
	103	if ((symbol->flags & FUNC) == 0)
	104	cant_happen();
	105	/* else
	106	fall through */
	107
	108	default:
	109	/* notably Node_var but catches also e.g. FS[1] = "x" */
	110	if (canfatal) {
	111	if ((symbol->flags & FUNC) != 0)
	112	fatal(_("attempt to use function `%s' as an array"),
	113	save_symbol->vname);
	114	else if (isparam)
	115	fatal(_("attempt to use scalar parameter `%s' as an array"),
	116	save_symbol->vname);
	117	else
	118	fatal(_("attempt to use scalar `%s' as array"),
	119	save_symbol->vname);
	120	} else
	121	break;
	122	}
	123
	124	return symbol;
	125	}
	126
	127	/*
	128	* array_vname --- print the name of the array
	129	*
	130	* Returns a pointer to a statically maintained dynamically allocated string.
	131	* It's appropriate for printing the name once; if the caller wants
	132	* to save it, they have to make a copy.
	133	*
	134	* Setting MAX_LEN to a positive value (eg. 140) would limit the length
	135	* of the output to _roughly_ that length.
	136	*
	137	* If MAX_LEN == 0, which is the default, the whole stack is printed.
	138	*/
	139	#define MAX_LEN 0
	140
	141	char *
	142	array_vname(register const NODE *symbol)
	143	{
	144	if (symbol->type == Node_param_list)
	145	symbol = stack_ptr[symbol->param_cnt];
	146
	147	if (symbol->type != Node_array_ref \|\| symbol->orig_array->type != Node_var_array)
	148	return symbol->vname;
	149	else {
	150	static char *message = NULL;
	151	static size_t msglen = 0;
	152	char *s;
	153	size_t len;
	154	int n;
	155	const NODE *save_symbol = symbol;
	156	const char *from = _("from %s");
	157
	158	#if (MAX_LEN <= 0) \|\| !defined(HAVE_SNPRINTF)
	159	/* This is the default branch. */
	160
	161	/* First, we have to compute the length of the string: */
	162	len = strlen(symbol->vname) + 2; /* "%s (" */
	163	n = 0;
	164	do {
	165	symbol = symbol->prev_array;
	166	len += strlen(symbol->vname);
	167	n++;
	168	} while (symbol->type == Node_array_ref);
	169	/*
	170	* Each node contributes by strlen(from) minus the length
	171	* of "%s" in the translation (which is at least 2)
	172	* plus 2 for ", " or ")\0"; this adds up to strlen(from).
	173	*/
	174	len += n * strlen(from);
	175
	176	/* (Re)allocate memory: */
	177	if (message == NULL) {
	178	emalloc(message, char *, len, "array_vname");
	179	msglen = len;
	180	} else if (len > msglen) {
	181	erealloc(message, char *, len, "array_vname");
	182	msglen = len;
	183	} /* else
	184	current buffer can hold new name */
	185
	186	/* We're ready to print: */
	187	symbol = save_symbol;
	188	s = message;
	189	/*
	190	* Ancient systems have sprintf() returning char *, not int.
	191	* Thus, `s += sprintf(s, from, name);' is a no-no.
	192	*/
	193	sprintf(s, "%s (", symbol->vname);
	194	s += strlen(s);
	195	for (;;) {
	196	symbol = symbol->prev_array;
	197	sprintf(s, from, symbol->vname);
	198	s += strlen(s);
	199	if (symbol->type != Node_array_ref)
	200	break;
	201	sprintf(s, ", ");
	202	s += strlen(s);
	203	}
	204	sprintf(s, ")");
	205
	206	#else /* MAX_LEN > 0 */
	207
	208	/*
	209	* The following check fails only on
	210	* abnormally_long_variable_name.
	211	*/
	212	#define PRINT_CHECK \
	213	if (n <= 0 \|\| n >= len) \
	214	return save_symbol->vname; \
	215	s += n; len -= n
	216	#define PRINT(str) \
	217	n = snprintf(s, len, str); \
	218	PRINT_CHECK
	219	#define PRINT_vname(str) \
	220	n = snprintf(s, len, str, symbol->vname); \
	221	PRINT_CHECK
	222
	223	if (message == NULL)
	224	emalloc(message, char *, MAX_LEN, "array_vname");
	225
	226	s = message;
	227	len = MAX_LEN;
	228
	229	/* First, print the vname of the node. */
	230	PRINT_vname("%s (");
	231
	232	for (;;) {
	233	symbol = symbol->prev_array;
	234	/*
	235	* When we don't have enough space and this is not
	236	* the last node, shorten the list.
	237	*/
	238	if (len < 40 && symbol->type == Node_array_ref) {
	239	PRINT("..., ");
	240	symbol = symbol->orig_array;
	241	}
	242	PRINT_vname(from);
	243	if (symbol->type != Node_array_ref)
	244	break;
	245	PRINT(", ");
	246	}
	247	PRINT(")");
	248
	249	#undef PRINT_CHECK
	250	#undef PRINT
	251	#undef PRINT_vname
	252	#endif /* MAX_LEN <= 0 */
	253
	254	return message;
	255	}
	256	}
	257	#undef MAX_LEN
	258
	259	/* concat_exp --- concatenate expression list into a single string */
	260
	261	NODE *
	262	concat_exp(register NODE *tree)
	263	{
	264	register NODE *r;
	265	char *str;
	266	char *s;
	267	size_t len;
	268	int offset;
	269	size_t subseplen;
	270	const char *subsep;
	271
	272	if (tree->type != Node_expression_list)
	273	return force_string(tree_eval(tree));
	274	r = force_string(tree_eval(tree->lnode));
	275	if (tree->rnode == NULL)
	276	return r;
	277	subseplen = SUBSEP_node->var_value->stlen;
	278	subsep = SUBSEP_node->var_value->stptr;
	279	len = r->stlen + subseplen + 2;
	280	emalloc(str, char *, len, "concat_exp");
	281	memcpy(str, r->stptr, r->stlen+1);
	282	s = str + r->stlen;
	283	free_temp(r);
	284	for (tree = tree->rnode; tree != NULL; tree = tree->rnode) {
	285	if (subseplen == 1)
	286	s++ = subsep;
	287	else {
	288	memcpy(s, subsep, subseplen+1);
	289	s += subseplen;
	290	}
	291	r = force_string(tree_eval(tree->lnode));
	292	len += r->stlen + subseplen;
	293	offset = s - str;
	294	erealloc(str, char *, len, "concat_exp");
	295	s = str + offset;
	296	memcpy(s, r->stptr, r->stlen+1);
	297	s += r->stlen;
	298	free_temp(r);
	299	}
	300	r = make_str_node(str, s - str, ALREADY_MALLOCED);
	301	r->flags \|= TEMP;
	302	return r;
	303	}
	304
	305	/* assoc_clear --- flush all the values in symbol[] before doing a split() */
	306
	307	void
	308	assoc_clear(NODE *symbol)
	309	{
	310	long i;
	311	NODE bucket, next;
	312
	313	if (symbol->var_array == NULL)
	314	return;
	315	for (i = 0; i < symbol->array_size; i++) {
	316	for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
	317	next = bucket->ahnext;
	318	unref(bucket->ahvalue);
	319	unref(bucket); /* unref() will free the ahname_str */
	320	}
	321	symbol->var_array[i] = NULL;
	322	}
	323	free(symbol->var_array);
	324	symbol->var_array = NULL;
	325	symbol->array_size = symbol->table_size = 0;
	326	symbol->flags &= ~ARRAYMAXED;
	327	}
	328
	329	/* hash --- calculate the hash function of the string in subs */
	330
	331	static unsigned long
	332	awk_hash(register const char *s, register size_t len, unsigned long hsize)
	333	{
	334	register unsigned long h = 0;
	335
	336	/*
	337	* This is INCREDIBLY ugly, but fast. We break the string up into
	338	* 8 byte units. On the first time through the loop we get the
	339	* "leftover bytes" (strlen % 8). On every other iteration, we
	340	* perform 8 HASHC's so we handle all 8 bytes. Essentially, this
	341	* saves us 7 cmp & branch instructions. If this routine is
	342	* heavily used enough, it's worth the ugly coding.
	343	*
	344	* OZ's original sdbm hash, copied from Margo Seltzers db package.
	345	*/
	346
	347	/*
	348	* Even more speed:
	349	* #define HASHC h = s++ + 65599 h
	350	* Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts
	351	*/
	352	#define HASHC htmp = (h << 6); \
	353	h = *s++ + htmp + (htmp << 10) - h
	354
	355	unsigned long htmp;
	356
	357	h = 0;
	358
	359	#if defined(VAXC)
	360	/*
	361	* This was an implementation of "Duff's Device", but it has been
	362	* redone, separating the switch for extra iterations from the
	363	* loop. This is necessary because the DEC VAX-C compiler is
	364	* STOOPID.
	365	*/
	366	switch (len & (8 - 1)) {
	367	case 7: HASHC;
	368	case 6: HASHC;
	369	case 5: HASHC;
	370	case 4: HASHC;
	371	case 3: HASHC;
	372	case 2: HASHC;
	373	case 1: HASHC;
	374	default: break;
	375	}
	376
	377	if (len > (8 - 1)) {
	378	register size_t loop = len >> 3;
	379	do {
	380	HASHC;
	381	HASHC;
	382	HASHC;
	383	HASHC;
	384	HASHC;
	385	HASHC;
	386	HASHC;
	387	HASHC;
	388	} while (--loop);
	389	}
	390	#else /* ! VAXC */
	391	/* "Duff's Device" for those who can handle it */
	392	if (len > 0) {
	393	register size_t loop = (len + 8 - 1) >> 3;
	394
	395	switch (len & (8 - 1)) {
	396	case 0:
	397	do { /* All fall throughs */
	398	HASHC;
	399	case 7: HASHC;
	400	case 6: HASHC;
	401	case 5: HASHC;
	402	case 4: HASHC;
	403	case 3: HASHC;
	404	case 2: HASHC;
	405	case 1: HASHC;
	406	} while (--loop);
	407	}
	408	}
	409	#endif /* ! VAXC */
	410
	411	if (h >= hsize)
	412	h %= hsize;
	413	return h;
	414	}
	415
	416	/* assoc_find --- locate symbol[subs] */
	417
	418	static NODE * /* NULL if not found */
	419	assoc_find(NODE symbol, register NODE subs, unsigned long hash1)
	420	{
	421	register NODE *bucket;
	422	const char *s1_str;
	423	size_t s1_len;
	424	NODE *s2;
	425
	426	for (bucket = symbol->var_array[hash1]; bucket != NULL;
	427	bucket = bucket->ahnext) {
	428	/*
	429	* This used to use cmp_nodes() here. That's wrong.
	430	* Array indexes are strings; compare as such, always!
	431	*/
	432	s1_str = bucket->ahname_str;
	433	s1_len = bucket->ahname_len;
	434	s2 = subs;
	435
	436	if (s1_len == s2->stlen) {
	437	if (s1_len == 0 /* "" is a valid index */
	438	\|\| memcmp(s1_str, s2->stptr, s1_len) == 0)
	439	return bucket;
	440	}
	441	}
	442	return NULL;
	443	}
	444
	445	/* in_array --- test whether the array element symbol[subs] exists or not,
	446	* return pointer to value if it does.
	447	*/
	448
	449	NODE *
	450	in_array(NODE symbol, NODE subs)
	451	{
	452	register unsigned long hash1;
	453	NODE *ret;
	454
	455	symbol = get_array(symbol);
	456
	457	/*
	458	* Evaluate subscript first, it could have side effects.
	459	*/
	460	subs = concat_exp(subs); /* concat_exp returns a string node */
	461	if (symbol->var_array == NULL) {
	462	free_temp(subs);
	463	return NULL;
	464	}
	465	hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size);
	466	ret = assoc_find(symbol, subs, hash1);
	467	free_temp(subs);
	468	if (ret)
	469	return ret->ahvalue;
	470	else
	471	return NULL;
	472	}
	473
	474	/*
	475	* assoc_lookup:
	476	* Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it
	477	* isn't there. Returns a pointer ala get_lhs to where its value is stored.
	478	*
	479	* SYMBOL is the address of the node (or other pointer) being dereferenced.
	480	* SUBS is a number or string used as the subscript.
	481	*/
	482
	483	NODE **
	484	assoc_lookup(NODE symbol, NODE subs, int reference)
	485	{
	486	register unsigned long hash1;
	487	register NODE *bucket;
	488
	489	assert(symbol->type == Node_var_array);
	490
	491	(void) force_string(subs);
	492
	493	if (symbol->var_array == NULL) {
	494	symbol->array_size = symbol->table_size = 0; /* sanity */
	495	symbol->flags &= ~ARRAYMAXED;
	496	grow_table(symbol);
	497	hash1 = hash(subs->stptr, subs->stlen,
	498	(unsigned long) symbol->array_size);
	499	} else {
	500	hash1 = hash(subs->stptr, subs->stlen,
	501	(unsigned long) symbol->array_size);
	502	bucket = assoc_find(symbol, subs, hash1);
	503	if (bucket != NULL) {
	504	free_temp(subs);
	505	return &(bucket->ahvalue);
	506	}
	507	}
	508
	509	if (do_lint && reference) {
	510	subs->stptr[subs->stlen] = '\0';
	511	lintwarn(_("reference to uninitialized element `%s[\"%s\"]'"),
	512	array_vname(symbol), subs->stptr);
	513	}
	514
	515	/* It's not there, install it. */
	516	if (do_lint && subs->stlen == 0)
	517	lintwarn(_("subscript of array `%s' is null string"),
	518	array_vname(symbol));
	519
	520	/* first see if we would need to grow the array, before installing */
	521	symbol->table_size++;
	522	if ((symbol->flags & ARRAYMAXED) == 0
	523	&& (symbol->table_size / symbol->array_size) > AVG_CHAIN_MAX) {
	524	grow_table(symbol);
	525	/* have to recompute hash value for new size */
	526	hash1 = hash(subs->stptr, subs->stlen,
	527	(unsigned long) symbol->array_size);
	528	}
	529
	530	getnode(bucket);
	531	bucket->type = Node_ahash;
	532
	533	/*
	534	* Freeze this string value --- it must never
	535	* change, no matter what happens to the value
	536	* that created it or to CONVFMT, etc.
	537	*
	538	* One day: Use an atom table to track array indices,
	539	* and avoid the extra memory overhead.
	540	*/
	541	bucket->flags \|= MALLOC;
	542	bucket->ahname_ref = 1;
	543
	544	/* For TEMP node, reuse the storage directly */
	545	if ((subs->flags & TEMP) != 0) {
	546	bucket->ahname_str = subs->stptr;
	547	bucket->ahname_len = subs->stlen;
	548	bucket->ahname_str[bucket->ahname_len] = '\0';
	549	subs->flags &= ~TEMP; /* for good measure */
	550	freenode(subs);
	551	} else {
	552	emalloc(bucket->ahname_str, char *, subs->stlen + 2, "assoc_lookup");
	553	bucket->ahname_len = subs->stlen;
	554	memcpy(bucket->ahname_str, subs->stptr, subs->stlen);
	555	bucket->ahname_str[bucket->ahname_len] = '\0';
	556	}
	557
	558	bucket->ahvalue = Nnull_string;
	559	bucket->ahnext = symbol->var_array[hash1];
	560	symbol->var_array[hash1] = bucket;
	561	return &(bucket->ahvalue);
	562	}
	563
	564	/* do_delete --- perform `delete array[s]' */
	565
	566	/*
	567	* `symbol' is array
	568	* `tree' is subscript
	569	*/
	570
	571	void
	572	do_delete(NODE sym, NODE tree)
	573	{
	574	register unsigned long hash1;
	575	register NODE bucket, last;
	576	NODE *subs;
	577	register NODE *symbol = get_array(sym);
	578
	579	if (tree == NULL) { /* delete array */
	580	assoc_clear(symbol);
	581	return;
	582	}
	583
	584	last = NULL; /* shut up gcc -Wall */
	585	hash1 = 0; /* ditto */
	586
	587	/*
	588	* Always evaluate subscript, it could have side effects.
	589	*/
	590	subs = concat_exp(tree); /* concat_exp returns string node */
	591
	592	if (symbol->var_array != NULL) {
	593	hash1 = hash(subs->stptr, subs->stlen,
	594	(unsigned long) symbol->array_size);
	595	last = NULL;
	596	for (bucket = symbol->var_array[hash1]; bucket != NULL;
	597	last = bucket, bucket = bucket->ahnext) {
	598	/*
	599	* This used to use cmp_nodes() here. That's wrong.
	600	* Array indexes are strings; compare as such, always!
	601	*/
	602	const char *s1_str;
	603	size_t s1_len;
	604	NODE *s2;
	605
	606	s1_str = bucket->ahname_str;
	607	s1_len = bucket->ahname_len;
	608	s2 = subs;
	609
	610	if (s1_len == s2->stlen) {
	611	if (s1_len == 0 /* "" is a valid index */
	612	\|\| memcmp(s1_str, s2->stptr, s1_len) == 0)
	613	break;
	614	}
	615	}
	616	} else
	617	bucket = NULL; /* The array is empty. */
	618
	619	if (bucket == NULL) {
	620	if (do_lint)
	621	lintwarn(_("delete: index `%s' not in array `%s'"),
	622	subs->stptr, array_vname(sym));
	623	free_temp(subs);
	624	return;
	625	}
	626
	627	free_temp(subs);
	628
	629	if (last != NULL)
	630	last->ahnext = bucket->ahnext;
	631	else
	632	symbol->var_array[hash1] = bucket->ahnext;
	633	unref(bucket->ahvalue);
	634	unref(bucket); /* unref() will free the ahname_str */
	635	symbol->table_size--;
	636	if (symbol->table_size <= 0) {
	637	memset(symbol->var_array, '\0',
	638	sizeof(NODE ) symbol->array_size);
	639	symbol->table_size = symbol->array_size = 0;
	640	symbol->flags &= ~ARRAYMAXED;
	641	free((char *) symbol->var_array);
	642	symbol->var_array = NULL;
	643	}
	644	}
	645
	646	/* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */
	647
	648	/*
	649	* The primary hassle here is that `iggy' needs to have some arbitrary
	650	* array index put in it before we can clear the array, we can't
	651	* just replace the loop with `delete foo'.
	652	*/
	653
	654	void
	655	do_delete_loop(NODE symbol, NODE tree)
	656	{
	657	long i;
	658	NODE **lhs;
	659	Func_ptr after_assign = NULL;
	660
	661	symbol = get_array(symbol);
	662
	663	if (symbol->var_array == NULL)
	664	return;
	665
	666	/* get first index value */
	667	for (i = 0; i < symbol->array_size; i++) {
	668	if (symbol->var_array[i] != NULL) {
	669	lhs = get_lhs(tree->lnode, & after_assign, FALSE);
	670	unref(*lhs);
	671	*lhs = make_string(symbol->var_array[i]->ahname_str,
	672	symbol->var_array[i]->ahname_len);
	673	if (after_assign)
	674	(*after_assign)();
	675	break;
	676	}
	677	}
	678
	679	/* blast the array in one shot */
	680	assoc_clear(symbol);
	681	}
	682
	683	/* grow_table --- grow a hash table */
	684
	685	static void
	686	grow_table(NODE *symbol)
	687	{
	688	NODE old, new, chain, next;
	689	int i, j;
	690	unsigned long hash1;
	691	unsigned long oldsize, newsize, k;
	692	/*
	693	* This is an array of primes. We grow the table by an order of
	694	* magnitude each time (not just doubling) so that growing is a
	695	* rare operation. We expect, on average, that it won't happen
	696	* more than twice. The final size is also chosen to be small
	697	* enough so that MS-DOG mallocs can handle it. When things are
	698	* very large (> 8K), we just double more or less, instead of
	699	* just jumping from 8K to 64K.
	700	*/
	701	static const long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497,
	702	#if ! defined(MSDOS) && ! defined(OS2) && ! defined(atarist)
	703	131101, 262147, 524309, 1048583, 2097169,
	704	4194319, 8388617, 16777259, 33554467,
	705	67108879, 134217757, 268435459, 536870923,
	706	1073741827
	707	#endif
	708	};
	709
	710	/* find next biggest hash size */
	711	newsize = oldsize = symbol->array_size;
	712	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
	713	if (oldsize < sizes[i]) {
	714	newsize = sizes[i];
	715	break;
	716	}
	717	}
	718
	719	if (newsize == oldsize) { /* table already at max (!) */
	720	symbol->flags \|= ARRAYMAXED;
	721	return;
	722	}
	723
	724	/* allocate new table */
	725	emalloc(new, NODE *, newsize sizeof(NODE *), "grow_table");
	726	memset(new, '\0', newsize * sizeof(NODE *));
	727
	728	/* brand new hash table, set things up and return */
	729	if (symbol->var_array == NULL) {
	730	symbol->table_size = 0;
	731	goto done;
	732	}
	733
	734	/* old hash table there, move stuff to new, free old */
	735	old = symbol->var_array;
	736	for (k = 0; k < oldsize; k++) {
	737	if (old[k] == NULL)
	738	continue;
	739
	740	for (chain = old[k]; chain != NULL; chain = next) {
	741	next = chain->ahnext;
	742	hash1 = hash(chain->ahname_str,
	743	chain->ahname_len, newsize);
	744
	745	/* remove from old list, add to new */
	746	chain->ahnext = new[hash1];
	747	new[hash1] = chain;
	748	}
	749	}
	750	free(old);
	751
	752	done:
	753	/*
	754	* note that symbol->table_size does not change if an old array,
	755	* and is explicitly set to 0 if a new one.
	756	*/
	757	symbol->var_array = new;
	758	symbol->array_size = newsize;
	759	}
	760
	761	/* set_SUBSEP --- make sure SUBSEP always has a string value */
	762
	763	void
	764	set_SUBSEP(void)
	765	{
	766
	767	(void) force_string(SUBSEP_node->var_value);
	768	return;
	769	}
	770
	771	/* pr_node --- print simple node info */
	772
	773	static void
	774	pr_node(NODE *n)
	775	{
	776	if ((n->flags & (NUMCUR\|NUMBER)) != 0)
	777	printf("%g", n->numbr);
	778	else
	779	printf("%.*s", (int) n->stlen, n->stptr);
	780	}
	781
	782	/* assoc_dump --- dump the contents of an array */
	783
	784	NODE *
	785	assoc_dump(NODE *symbol)
	786	{
	787	long i;
	788	NODE *bucket;
	789
	790	if (symbol->var_array == NULL) {
	791	printf(_("%s: empty (null)\n"), symbol->vname);
	792	return tmp_number((AWKNUM) 0);
	793	}
	794
	795	if (symbol->table_size == 0) {
	796	printf(_("%s: empty (zero)\n"), symbol->vname);
	797	return tmp_number((AWKNUM) 0);
	798	}
	799
	800	printf(_("%s: table_size = %d, array_size = %d\n"), symbol->vname,
	801	(int) symbol->table_size, (int) symbol->array_size);
	802
	803	for (i = 0; i < symbol->array_size; i++) {
	804	for (bucket = symbol->var_array[i]; bucket != NULL;
	805	bucket = bucket->ahnext) {
	806	printf("%s: I: [len %d <%.*s>] V: [",
	807	symbol->vname,
	808	(int) bucket->ahname_len,
	809	(int) bucket->ahname_len,
	810	bucket->ahname_str);
	811	pr_node(bucket->ahvalue);
	812	printf("]\n");
	813	}
	814	}
	815
	816	return tmp_number((AWKNUM) 0);
	817	}
	818
	819	/* do_adump --- dump an array: interface to assoc_dump */
	820
	821	NODE *
	822	do_adump(NODE *tree)
	823	{
	824	NODE r, a;
	825
	826	a = tree->lnode;
	827
	828	if (a->type == Node_param_list) {
	829	printf(_("%s: is parameter\n"), a->vname);
	830	a = stack_ptr[a->param_cnt];
	831	}
	832
	833	if (a->type == Node_array_ref) {
	834	printf(_("%s: array_ref to %s\n"), a->vname,
	835	a->orig_array->vname);
	836	a = a->orig_array;
	837	}
	838
	839	r = assoc_dump(a);
	840
	841	return r;
	842	}
	843
	844	/*
	845	* The following functions implement the builtin
	846	* asort function. Initial work by Alan J. Broder,
	847	* ajb@woti.com.
	848	*/
	849
	850	/* dup_table --- duplicate input symbol table "symbol" */
	851
	852	static void
	853	dup_table(NODE symbol, NODE newsymb)
	854	{
	855	NODE old, new, chain, bucket;
	856	long i;
	857	unsigned long cursize;
	858
	859	/* find the current hash size */
	860	cursize = symbol->array_size;
	861
	862	new = NULL;
	863
	864	/* input is a brand new hash table, so there's nothing to copy */
	865	if (symbol->var_array == NULL)
	866	newsymb->table_size = 0;
	867	else {
	868	/* old hash table there, dupnode stuff into a new table */
	869
	870	/* allocate new table */
	871	emalloc(new, NODE *, cursize sizeof(NODE *), "dup_table");
	872	memset(new, '\0', cursize * sizeof(NODE *));
	873
	874	/* do the copying/dupnode'ing */
	875	old = symbol->var_array;
	876	for (i = 0; i < cursize; i++) {
	877	if (old[i] != NULL) {
	878	for (chain = old[i]; chain != NULL;
	879	chain = chain->ahnext) {
	880	/* get a node for the linked list */
	881	getnode(bucket);
	882	bucket->type = Node_ahash;
	883	bucket->flags \|= MALLOC;
	884	bucket->ahname_ref = 1;
	885
	886	/*
	887	* copy the corresponding name and
	888	* value from the original input list
	889	*/
	890	emalloc(bucket->ahname_str, char *, chain->ahname_len + 2, "dup_table");
	891	bucket->ahname_len = chain->ahname_len;
	892
	893	memcpy(bucket->ahname_str, chain->ahname_str, chain->ahname_len);
	894	bucket->ahname_str[bucket->ahname_len] = '\0';
	895
	896	bucket->ahvalue = dupnode(chain->ahvalue);
	897
	898	/*
	899	* put the node on the corresponding
	900	* linked list in the new table
	901	*/
	902	bucket->ahnext = new[i];
	903	new[i] = bucket;
	904	}
	905	}
	906	}
	907	newsymb->table_size = symbol->table_size;
	908	}
	909
	910	newsymb->var_array = new;
	911	newsymb->array_size = cursize;
	912	}
	913
	914	/* merge --- do a merge of two sorted lists */
	915
	916	static NODE *
	917	merge(NODE left, NODE right)
	918	{
	919	NODE ans, cur;
	920
	921	/*
	922	* The use of cmp_nodes() here means that IGNORECASE influences the
	923	* comparison. This is OK, but it may be surprising. This comment
	924	* serves to remind us that we know about this and that it's OK.
	925	*/
	926	if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
	927	ans = cur = left;
	928	left = left->ahnext;
	929	} else {
	930	ans = cur = right;
	931	right = right->ahnext;
	932	}
	933
	934	while (left != NULL && right != NULL) {
	935	if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) {
	936	cur->ahnext = left;
	937	cur = left;
	938	left = left->ahnext;
	939	} else {
	940	cur->ahnext = right;
	941	cur = right;
	942	right = right->ahnext;
	943	}
	944	}
	945
	946	cur->ahnext = (left != NULL ? left : right);
	947
	948	return ans;
	949	}
	950
	951	/* merge_sort --- recursively sort the left and right sides of a list */
	952
	953	static NODE *
	954	merge_sort(NODE *left, unsigned long size)
	955	{
	956	NODE right, tmp;
	957	int i, half;
	958
	959	if (size <= 1)
	960	return left;
	961
	962	/* walk down the list, till just one before the midpoint */
	963	tmp = left;
	964	half = size / 2;
	965	for (i = 0; i < half-1; i++)
	966	tmp = tmp->ahnext;
	967
	968	/* split the list into two parts */
	969	right = tmp->ahnext;
	970	tmp->ahnext = NULL;
	971
	972	/* sort the left and right parts of the list */
	973	left = merge_sort(left, half);
	974	right = merge_sort(right, size-half);
	975
	976	/* merge the two sorted parts of the list */
	977	return merge(left, right);
	978	}
	979
	980
	981	/*
	982	* assoc_from_list -- Populate an array with the contents of a list of NODEs,
	983	* using increasing integers as the key.
	984	*/
	985
	986	static void
	987	assoc_from_list(NODE symbol, NODE list)
	988	{
	989	NODE *next;
	990	unsigned long i = 0;
	991	register unsigned long hash1;
	992	char buf[100];
	993
	994	for (; list != NULL; list = next) {
	995	next = list->ahnext;
	996
	997	/* make an int out of i++ */
	998	i++;
	999	sprintf(buf, "%lu", i);
	1000	assert(list->ahname_str == NULL);
	1001	assert(list->ahname_ref == 1);
	1002	emalloc(list->ahname_str, char *, strlen(buf) + 2, "assoc_from_list");
	1003	list->ahname_len = strlen(buf);
	1004	strcpy(list->ahname_str, buf);
	1005
	1006	/* find the bucket where it belongs */
	1007	hash1 = hash(list->ahname_str, list->ahname_len,
	1008	symbol->array_size);
	1009
	1010	/* link the node into the chain at that bucket */
	1011	list->ahnext = symbol->var_array[hash1];
	1012	symbol->var_array[hash1] = list;
	1013	}
	1014	}
	1015
	1016	/*
	1017	* assoc_sort_inplace --- sort all the values in symbol[], replacing
	1018	* the sorted values back into symbol[], indexed by integers starting with 1.
	1019	*/
	1020
	1021	typedef enum asort_how { VALUE, INDEX } ASORT_TYPE;
	1022
	1023	static NODE *
	1024	assoc_sort_inplace(NODE *symbol, ASORT_TYPE how)
	1025	{
	1026	unsigned long i, num;
	1027	NODE bucket, next, *list;
	1028
	1029	if (symbol->var_array == NULL
	1030	\|\| symbol->array_size <= 0
	1031	\|\| symbol->table_size <= 0)
	1032	return tmp_number((AWKNUM) 0);
	1033
	1034	/* build a linked list out of all the entries in the table */
	1035	if (how == VALUE) {
	1036	list = NULL;
	1037	num = 0;
	1038	for (i = 0; i < symbol->array_size; i++) {
	1039	for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
	1040	next = bucket->ahnext;
	1041	if (bucket->ahname_ref == 1) {
	1042	free(bucket->ahname_str);
	1043	bucket->ahname_str = NULL;
	1044	bucket->ahname_len = 0;
	1045	} else {
	1046	NODE *r;
	1047
	1048	getnode(r);
	1049	r = bucket;
	1050	unref(bucket);
	1051	bucket = r;
	1052	bucket->flags \|= MALLOC;
	1053	bucket->ahname_ref = 1;
	1054	bucket->ahname_str = NULL;
	1055	bucket->ahname_len = 0;
	1056	}
	1057	bucket->ahnext = list;
	1058	list = bucket;
	1059	num++;
	1060	}
	1061	symbol->var_array[i] = NULL;
	1062	}
	1063	} else { /* how == INDEX */
	1064	list = NULL;
	1065	num = 0;
	1066	for (i = 0; i < symbol->array_size; i++) {
	1067	for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) {
	1068	next = bucket->ahnext;
	1069
	1070	/* toss old value */
	1071	unref(bucket->ahvalue);
	1072
	1073	/* move index into value */
	1074	if (bucket->ahname_ref == 1) {
	1075	bucket->ahvalue = make_str_node(bucket->ahname_str,
	1076	bucket->ahname_len, ALREADY_MALLOCED);
	1077	bucket->ahname_str = NULL;
	1078	bucket->ahname_len = 0;
	1079	} else {
	1080	NODE *r;
	1081
	1082	bucket->ahvalue = make_string(bucket->ahname_str, bucket->ahname_len);
	1083	getnode(r);
	1084	r = bucket;
	1085	unref(bucket);
	1086	bucket = r;
	1087	bucket->flags \|= MALLOC;
	1088	bucket->ahname_ref = 1;
	1089	bucket->ahname_str = NULL;
	1090	bucket->ahname_len = 0;
	1091	}
	1092
	1093	bucket->ahnext = list;
	1094	list = bucket;
	1095	num++;
	1096	}
	1097	symbol->var_array[i] = NULL;
	1098	}
	1099	}
	1100
	1101	/*
	1102	* Sort the linked list of NODEs.
	1103	* (The especially nice thing about using a merge sort here is that
	1104	* we require absolutely no additional storage. This is handy if the
	1105	* array has grown to be very large.)
	1106	*/
	1107	list = merge_sort(list, num);
	1108
	1109	/*
	1110	* now repopulate the original array, using increasing
	1111	* integers as the key
	1112	*/
	1113	assoc_from_list(symbol, list);
	1114
	1115	return tmp_number((AWKNUM) num);
	1116	}
	1117
	1118	/* asort_actual --- do the actual work to sort the input array */
	1119
	1120	static NODE *
	1121	asort_actual(NODE *tree, ASORT_TYPE how)
	1122	{
	1123	NODE *array = get_array(tree->lnode);
	1124
	1125	if (tree->rnode != NULL) { /* 2nd optional arg */
	1126	NODE *dest = get_array(tree->rnode->lnode);
	1127
	1128	assoc_clear(dest);
	1129	dup_table(array, dest);
	1130	array = dest;
	1131	}
	1132
	1133	return assoc_sort_inplace(array, how);
	1134	}
	1135
	1136	/* do_asort --- sort array by value */
	1137
	1138	NODE *
	1139	do_asort(NODE *tree)
	1140	{
	1141	return asort_actual(tree, VALUE);
	1142	}
	1143
	1144	/* do_asorti --- sort array by index */
	1145
	1146	NODE *
	1147	do_asorti(NODE *tree)
	1148	{
	1149	return asort_actual(tree, INDEX);
	1150	}
	1151
	1152	/*
	1153	From bonzini@gnu.org Mon Oct 28 16:05:26 2002
	1154	Date: Mon, 28 Oct 2002 13:33:03 +0100
	1155	From: Paolo Bonzini <bonzini@gnu.org>
	1156	To: arnold@skeeve.com
	1157	Subject: Hash function
	1158	Message-ID: <20021028123303.GA6832@biancaneve>
	1159
	1160	Here is the hash function I'm using in GNU Smalltalk. The scrambling is
	1161	needed if you use powers of two as the table sizes. If you use primes it
	1162	is not needed.
	1163
	1164	To use double-hashing with power-of-two size, you should use the
	1165	_gst_hash_string(str, len) as the primary hash and
	1166	scramble(_gst_hash_string (str, len)) \| 1 as the secondary hash.
	1167
	1168	Paolo
	1169
	1170	*/
	1171	/*
	1172	* ADR: Slightly modified to work w/in the context of gawk.
	1173	*/
	1174
	1175	static unsigned long
	1176	gst_hash_string(const char *str, size_t len, unsigned long hsize)
	1177	{
	1178	unsigned long hashVal = 1497032417; /* arbitrary value */
	1179	unsigned long ret;
	1180
	1181	while (len--) {
	1182	hashVal += *str++;
	1183	hashVal += (hashVal << 10);
	1184	hashVal ^= (hashVal >> 6);
	1185	}
	1186
	1187	ret = scramble(hashVal);
	1188	if (ret >= hsize)
	1189	ret %= hsize;
	1190
	1191	return ret;
	1192	}
	1193
	1194	static unsigned long
	1195	scramble(unsigned long x)
	1196	{
	1197	if (sizeof(long) == 4) {
	1198	int y = ~x;
	1199
	1200	x += (y << 10) \| (y >> 22);
	1201	x += (x << 6) \| (x >> 26);
	1202	x -= (x << 16) \| (x >> 16);
	1203	} else {
	1204	x ^= (~x) >> 31;
	1205	x += (x << 21) \| (x >> 11);
	1206	x += (x << 5) \| (x >> 27);
	1207	x += (x << 27) \| (x >> 5);
	1208	x += (x << 31);
	1209	}
	1210
	1211	return x;
	1212	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/gawk/3.1.5/array.c

Download in other formats: