Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

hash.c@ 3603

Last change on this file since 3603 was 3529, checked in by bird, 4 years ago
Imported grep 3.7 from grep-3.7.tar.gz (sha256: c22b0cf2d4f6bbe599c902387e8058990e1eee99aef333a203829e5fd3dbb342), applying minimal auto-props.
Property svn:eol-style set to `native`
File size: 32.1 KB

Line
1	/* hash - hashing table processing.
2
3	Copyright (C) 1998-2004, 2006-2007, 2009-2021 Free Software Foundation, Inc.
4
5	Written by Jim Meyering, 1992.
6
7	This file is free software: you can redistribute it and/or modify
8	it under the terms of the GNU Lesser General Public License as
9	published by the Free Software Foundation; either version 2.1 of the
10	License, or (at your option) any later version.
11
12	This file is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	GNU Lesser General Public License for more details.
16
17	You should have received a copy of the GNU Lesser General Public License
18	along with this program. If not, see <https://www.gnu.org/licenses/>. */
19
20	/* A generic hash table package. */
21
22	/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
23	of malloc. If you change USE_OBSTACK, you have to recompile! */
24
25	#include <config.h>
26
27	#include "hash.h"
28
29	#include "bitrotate.h"
30	#include "xalloc-oversized.h"
31
32	#include <stdint.h>
33	#include <stdio.h>
34	#include <stdlib.h>
35
36	#if USE_OBSTACK
37	# include "obstack.h"
38	# ifndef obstack_chunk_alloc
39	# define obstack_chunk_alloc malloc
40	# endif
41	# ifndef obstack_chunk_free
42	# define obstack_chunk_free free
43	# endif
44	#endif
45
46	struct hash_entry
47	{
48	void *data;
49	struct hash_entry *next;
50	};
51
52	struct hash_table
53	{
54	/* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
55	for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
56	are not empty, there are N_ENTRIES active entries in the table. */
57	struct hash_entry *bucket;
58	struct hash_entry const *bucket_limit;
59	size_t n_buckets;
60	size_t n_buckets_used;
61	size_t n_entries;
62
63	/* Tuning arguments, kept in a physically separate structure. */
64	const Hash_tuning *tuning;
65
66	/* Three functions are given to 'hash_initialize', see the documentation
67	block for this function. In a word, HASHER randomizes a user entry
68	into a number up from 0 up to some maximum minus 1; COMPARATOR returns
69	true if two user entries compare equally; and DATA_FREER is the cleanup
70	function for a user entry. */
71	Hash_hasher hasher;
72	Hash_comparator comparator;
73	Hash_data_freer data_freer;
74
75	/* A linked list of freed struct hash_entry structs. */
76	struct hash_entry *free_entry_list;
77
78	#if USE_OBSTACK
79	/* Whenever obstacks are used, it is possible to allocate all overflowed
80	entries into a single stack, so they all can be freed in a single
81	operation. It is not clear if the speedup is worth the trouble. */
82	struct obstack entry_stack;
83	#endif
84	};
85
86	/* A hash table contains many internal entries, each holding a pointer to
87	some user-provided data (also called a user entry). An entry indistinctly
88	refers to both the internal entry and its associated user entry. A user
89	entry contents may be hashed by a randomization function (the hashing
90	function, or just "hasher" for short) into a number (or "slot") between 0
91	and the current table size. At each slot position in the hash table,
92	starts a linked chain of entries for which the user data all hash to this
93	slot. A bucket is the collection of all entries hashing to the same slot.
94
95	A good "hasher" function will distribute entries rather evenly in buckets.
96	In the ideal case, the length of each bucket is roughly the number of
97	entries divided by the table size. Finding the slot for a data is usually
98	done in constant time by the "hasher", and the later finding of a precise
99	entry is linear in time with the size of the bucket. Consequently, a
100	larger hash table size (that is, a larger number of buckets) is prone to
101	yielding shorter chains, given the "hasher" function behaves properly.
102
103	Long buckets slow down the lookup algorithm. One might use big hash table
104	sizes in hope to reduce the average length of buckets, but this might
105	become inordinate, as unused slots in the hash table take some space. The
106	best bet is to make sure you are using a good "hasher" function (beware
107	that those are not that easy to write! :-), and to use a table size
108	larger than the actual number of entries. */
109
110	/* If an insertion makes the ratio of nonempty buckets to table size larger
111	than the growth threshold (a number between 0.0 and 1.0), then increase
112	the table size by multiplying by the growth factor (a number greater than
113	1.0). The growth threshold defaults to 0.8, and the growth factor
114	defaults to 1.414, meaning that the table will have doubled its size
115	every second time 80% of the buckets get used. */
116	#define DEFAULT_GROWTH_THRESHOLD 0.8f
117	#define DEFAULT_GROWTH_FACTOR 1.414f
118
119	/* If a deletion empties a bucket and causes the ratio of used buckets to
120	table size to become smaller than the shrink threshold (a number between
121	0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
122	number greater than the shrink threshold but smaller than 1.0). The shrink
123	threshold and factor default to 0.0 and 1.0, meaning that the table never
124	shrinks. */
125	#define DEFAULT_SHRINK_THRESHOLD 0.0f
126	#define DEFAULT_SHRINK_FACTOR 1.0f
127
128	/* Use this to initialize or reset a TUNING structure to
129	some sensible values. */
130	static const Hash_tuning default_tuning =
131	{
132	DEFAULT_SHRINK_THRESHOLD,
133	DEFAULT_SHRINK_FACTOR,
134	DEFAULT_GROWTH_THRESHOLD,
135	DEFAULT_GROWTH_FACTOR,
136	false
137	};
138
139	/* Information and lookup. */
140
141	size_t
142	hash_get_n_buckets (const Hash_table *table)
143	{
144	return table->n_buckets;
145	}
146
147	size_t
148	hash_get_n_buckets_used (const Hash_table *table)
149	{
150	return table->n_buckets_used;
151	}
152
153	size_t
154	hash_get_n_entries (const Hash_table *table)
155	{
156	return table->n_entries;
157	}
158
159	size_t
160	hash_get_max_bucket_length (const Hash_table *table)
161	{
162	struct hash_entry const *bucket;
163	size_t max_bucket_length = 0;
164
165	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
166	{
167	if (bucket->data)
168	{
169	struct hash_entry const *cursor = bucket;
170	size_t bucket_length = 1;
171
172	while (cursor = cursor->next, cursor)
173	bucket_length++;
174
175	if (bucket_length > max_bucket_length)
176	max_bucket_length = bucket_length;
177	}
178	}
179
180	return max_bucket_length;
181	}
182
183	bool
184	hash_table_ok (const Hash_table *table)
185	{
186	struct hash_entry const *bucket;
187	size_t n_buckets_used = 0;
188	size_t n_entries = 0;
189
190	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
191	{
192	if (bucket->data)
193	{
194	struct hash_entry const *cursor = bucket;
195
196	/* Count bucket head. */
197	n_buckets_used++;
198	n_entries++;
199
200	/* Count bucket overflow. */
201	while (cursor = cursor->next, cursor)
202	n_entries++;
203	}
204	}
205
206	if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
207	return true;
208
209	return false;
210	}
211
212	void
213	hash_print_statistics (const Hash_table table, FILE stream)
214	{
215	size_t n_entries = hash_get_n_entries (table);
216	size_t n_buckets = hash_get_n_buckets (table);
217	size_t n_buckets_used = hash_get_n_buckets_used (table);
218	size_t max_bucket_length = hash_get_max_bucket_length (table);
219
220	fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
221	fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
222	fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
223	(unsigned long int) n_buckets_used,
224	(100.0 * n_buckets_used) / n_buckets);
225	fprintf (stream, "max bucket length: %lu\n",
226	(unsigned long int) max_bucket_length);
227	}
228
229	/* Hash KEY and return a pointer to the selected bucket.
230	If TABLE->hasher misbehaves, abort. */
231	static struct hash_entry *
232	safe_hasher (const Hash_table table, const void key)
233	{
234	size_t n = table->hasher (key, table->n_buckets);
235	if (! (n < table->n_buckets))
236	abort ();
237	return table->bucket + n;
238	}
239
240	void *
241	hash_lookup (const Hash_table table, const void entry)
242	{
243	struct hash_entry const *bucket = safe_hasher (table, entry);
244	struct hash_entry const *cursor;
245
246	if (bucket->data == NULL)
247	return NULL;
248
249	for (cursor = bucket; cursor; cursor = cursor->next)
250	if (entry == cursor->data \|\| table->comparator (entry, cursor->data))
251	return cursor->data;
252
253	return NULL;
254	}
255
256	/* Walking. */
257
258	void *
259	hash_get_first (const Hash_table *table)
260	{
261	struct hash_entry const *bucket;
262
263	if (table->n_entries == 0)
264	return NULL;
265
266	for (bucket = table->bucket; ; bucket++)
267	if (! (bucket < table->bucket_limit))
268	abort ();
269	else if (bucket->data)
270	return bucket->data;
271	}
272
273	void *
274	hash_get_next (const Hash_table table, const void entry)
275	{
276	struct hash_entry const *bucket = safe_hasher (table, entry);
277	struct hash_entry const *cursor;
278
279	/* Find next entry in the same bucket. */
280	cursor = bucket;
281	do
282	{
283	if (cursor->data == entry && cursor->next)
284	return cursor->next->data;
285	cursor = cursor->next;
286	}
287	while (cursor != NULL);
288
289	/* Find first entry in any subsequent bucket. */
290	while (++bucket < table->bucket_limit)
291	if (bucket->data)
292	return bucket->data;
293
294	/* None found. */
295	return NULL;
296	}
297
298	size_t
299	hash_get_entries (const Hash_table table, void *buffer,
300	size_t buffer_size)
301	{
302	size_t counter = 0;
303	struct hash_entry const *bucket;
304	struct hash_entry const *cursor;
305
306	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
307	{
308	if (bucket->data)
309	{
310	for (cursor = bucket; cursor; cursor = cursor->next)
311	{
312	if (counter >= buffer_size)
313	return counter;
314	buffer[counter++] = cursor->data;
315	}
316	}
317	}
318
319	return counter;
320	}
321
322	size_t
323	hash_do_for_each (const Hash_table *table, Hash_processor processor,
324	void *processor_data)
325	{
326	size_t counter = 0;
327	struct hash_entry const *bucket;
328	struct hash_entry const *cursor;
329
330	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
331	{
332	if (bucket->data)
333	{
334	for (cursor = bucket; cursor; cursor = cursor->next)
335	{
336	if (! processor (cursor->data, processor_data))
337	return counter;
338	counter++;
339	}
340	}
341	}
342
343	return counter;
344	}
345
346	/* Allocation and clean-up. */
347
348	#if USE_DIFF_HASH
349
350	/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
351	B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
352	Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
353	algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
354	may not be good for your application." */
355
356	size_t
357	hash_string (const char *string, size_t n_buckets)
358	{
359	# define HASH_ONE_CHAR(Value, Byte) \
360	((Byte) + rotl_sz (Value, 7))
361
362	size_t value = 0;
363	unsigned char ch;
364
365	for (; (ch = *string); string++)
366	value = HASH_ONE_CHAR (value, ch);
367	return value % n_buckets;
368
369	# undef HASH_ONE_CHAR
370	}
371
372	#else /* not USE_DIFF_HASH */
373
374	/* This one comes from 'recode', and performs a bit better than the above as
375	per a few experiments. It is inspired from a hashing routine found in the
376	very old Cyber 'snoop', itself written in typical Greg Mansfield style.
377	(By the way, what happened to this excellent man? Is he still alive?) */
378
379	size_t
380	hash_string (const char *string, size_t n_buckets)
381	{
382	size_t value = 0;
383	unsigned char ch;
384
385	for (; (ch = *string); string++)
386	value = (value * 31 + ch) % n_buckets;
387	return value;
388	}
389
390	#endif /* not USE_DIFF_HASH */
391
392	/* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
393	number at least equal to 11. */
394
395	static bool _GL_ATTRIBUTE_CONST
396	is_prime (size_t candidate)
397	{
398	size_t divisor = 3;
399	size_t square = divisor * divisor;
400
401	while (square < candidate && (candidate % divisor))
402	{
403	divisor++;
404	square += 4 * divisor;
405	divisor++;
406	}
407
408	return (candidate % divisor ? true : false);
409	}
410
411	/* Round a given CANDIDATE number up to the nearest prime, and return that
412	prime. Primes lower than 10 are merely skipped. */
413
414	static size_t _GL_ATTRIBUTE_CONST
415	next_prime (size_t candidate)
416	{
417	/* Skip small primes. */
418	if (candidate < 10)
419	candidate = 10;
420
421	/* Make it definitely odd. */
422	candidate \|= 1;
423
424	while (SIZE_MAX != candidate && !is_prime (candidate))
425	candidate += 2;
426
427	return candidate;
428	}
429
430	void
431	hash_reset_tuning (Hash_tuning *tuning)
432	{
433	*tuning = default_tuning;
434	}
435
436	/* If the user passes a NULL hasher, we hash the raw pointer. */
437	static size_t
438	raw_hasher (const void *data, size_t n)
439	{
440	/* When hashing unique pointers, it is often the case that they were
441	generated by malloc and thus have the property that the low-order
442	bits are 0. As this tends to give poorer performance with small
443	tables, we rotate the pointer value before performing division,
444	in an attempt to improve hash quality. */
445	size_t val = rotr_sz ((size_t) data, 3);
446	return val % n;
447	}
448
449	/* If the user passes a NULL comparator, we use pointer comparison. */
450	static bool
451	raw_comparator (const void a, const void b)
452	{
453	return a == b;
454	}
455
456
457	/* For the given hash TABLE, check the user supplied tuning structure for
458	reasonable values, and return true if there is no gross error with it.
459	Otherwise, definitively reset the TUNING field to some acceptable default
460	in the hash table (that is, the user loses the right of further modifying
461	tuning arguments), and return false. */
462
463	static bool
464	check_tuning (Hash_table *table)
465	{
466	const Hash_tuning *tuning = table->tuning;
467	float epsilon;
468	if (tuning == &default_tuning)
469	return true;
470
471	/* Be a bit stricter than mathematics would require, so that
472	rounding errors in size calculations do not cause allocations to
473	fail to grow or shrink as they should. The smallest allocation
474	is 11 (due to next_prime's algorithm), so an epsilon of 0.1
475	should be good enough. */
476	epsilon = 0.1f;
477
478	if (epsilon < tuning->growth_threshold
479	&& tuning->growth_threshold < 1 - epsilon
480	&& 1 + epsilon < tuning->growth_factor
481	&& 0 <= tuning->shrink_threshold
482	&& tuning->shrink_threshold + epsilon < tuning->shrink_factor
483	&& tuning->shrink_factor <= 1
484	&& tuning->shrink_threshold + epsilon < tuning->growth_threshold)
485	return true;
486
487	table->tuning = &default_tuning;
488	return false;
489	}
490
491	/* Compute the size of the bucket array for the given CANDIDATE and
492	TUNING, or return 0 if there is no possible way to allocate that
493	many entries. */
494
495	static size_t _GL_ATTRIBUTE_PURE
496	compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
497	{
498	if (!tuning->is_n_buckets)
499	{
500	float new_candidate = candidate / tuning->growth_threshold;
501	if ((float) SIZE_MAX <= new_candidate)
502	return 0;
503	candidate = new_candidate;
504	}
505	candidate = next_prime (candidate);
506	if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
507	return 0;
508	return candidate;
509	}
510
511	Hash_table *
512	hash_initialize (size_t candidate, const Hash_tuning *tuning,
513	Hash_hasher hasher, Hash_comparator comparator,
514	Hash_data_freer data_freer)
515	{
516	Hash_table *table;
517
518	if (hasher == NULL)
519	hasher = raw_hasher;
520	if (comparator == NULL)
521	comparator = raw_comparator;
522
523	table = malloc (sizeof *table);
524	if (table == NULL)
525	return NULL;
526
527	if (!tuning)
528	tuning = &default_tuning;
529	table->tuning = tuning;
530	if (!check_tuning (table))
531	{
532	/* Fail if the tuning options are invalid. This is the only occasion
533	when the user gets some feedback about it. Once the table is created,
534	if the user provides invalid tuning options, we silently revert to
535	using the defaults, and ignore further request to change the tuning
536	options. */
537	goto fail;
538	}
539
540	table->n_buckets = compute_bucket_size (candidate, tuning);
541	if (!table->n_buckets)
542	goto fail;
543
544	table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
545	if (table->bucket == NULL)
546	goto fail;
547	table->bucket_limit = table->bucket + table->n_buckets;
548	table->n_buckets_used = 0;
549	table->n_entries = 0;
550
551	table->hasher = hasher;
552	table->comparator = comparator;
553	table->data_freer = data_freer;
554
555	table->free_entry_list = NULL;
556	#if USE_OBSTACK
557	obstack_init (&table->entry_stack);
558	#endif
559	return table;
560
561	fail:
562	free (table);
563	return NULL;
564	}
565
566	void
567	hash_clear (Hash_table *table)
568	{
569	struct hash_entry *bucket;
570
571	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
572	{
573	if (bucket->data)
574	{
575	struct hash_entry *cursor;
576	struct hash_entry *next;
577
578	/* Free the bucket overflow. */
579	for (cursor = bucket->next; cursor; cursor = next)
580	{
581	if (table->data_freer)
582	table->data_freer (cursor->data);
583	cursor->data = NULL;
584
585	next = cursor->next;
586	/* Relinking is done one entry at a time, as it is to be expected
587	that overflows are either rare or short. */
588	cursor->next = table->free_entry_list;
589	table->free_entry_list = cursor;
590	}
591
592	/* Free the bucket head. */
593	if (table->data_freer)
594	table->data_freer (bucket->data);
595	bucket->data = NULL;
596	bucket->next = NULL;
597	}
598	}
599
600	table->n_buckets_used = 0;
601	table->n_entries = 0;
602	}
603
604	void
605	hash_free (Hash_table *table)
606	{
607	struct hash_entry *bucket;
608	struct hash_entry *cursor;
609	struct hash_entry *next;
610
611	/* Call the user data_freer function. */
612	if (table->data_freer && table->n_entries)
613	{
614	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
615	{
616	if (bucket->data)
617	{
618	for (cursor = bucket; cursor; cursor = cursor->next)
619	table->data_freer (cursor->data);
620	}
621	}
622	}
623
624	#if USE_OBSTACK
625
626	obstack_free (&table->entry_stack, NULL);
627
628	#else
629
630	/* Free all bucket overflowed entries. */
631	for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
632	{
633	for (cursor = bucket->next; cursor; cursor = next)
634	{
635	next = cursor->next;
636	free (cursor);
637	}
638	}
639
640	/* Also reclaim the internal list of previously freed entries. */
641	for (cursor = table->free_entry_list; cursor; cursor = next)
642	{
643	next = cursor->next;
644	free (cursor);
645	}
646
647	#endif
648
649	/* Free the remainder of the hash table structure. */
650	free (table->bucket);
651	free (table);
652	}
653
654	/* Insertion and deletion. */
655
656	/* Get a new hash entry for a bucket overflow, possibly by recycling a
657	previously freed one. If this is not possible, allocate a new one. */
658
659	static struct hash_entry *
660	allocate_entry (Hash_table *table)
661	{
662	struct hash_entry *new;
663
664	if (table->free_entry_list)
665	{
666	new = table->free_entry_list;
667	table->free_entry_list = new->next;
668	}
669	else
670	{
671	#if USE_OBSTACK
672	new = obstack_alloc (&table->entry_stack, sizeof *new);
673	#else
674	new = malloc (sizeof *new);
675	#endif
676	}
677
678	return new;
679	}
680
681	/* Free a hash entry which was part of some bucket overflow,
682	saving it for later recycling. */
683
684	static void
685	free_entry (Hash_table table, struct hash_entry entry)
686	{
687	entry->data = NULL;
688	entry->next = table->free_entry_list;
689	table->free_entry_list = entry;
690	}
691
692	/* This private function is used to help with insertion and deletion. When
693	ENTRY matches an entry in the table, return a pointer to the corresponding
694	user data and set *BUCKET_HEAD to the head of the selected bucket.
695	Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
696	the table, unlink the matching entry. */
697
698	static void *
699	hash_find_entry (Hash_table table, const void entry,
700	struct hash_entry **bucket_head, bool delete)
701	{
702	struct hash_entry *bucket = safe_hasher (table, entry);
703	struct hash_entry *cursor;
704
705	*bucket_head = bucket;
706
707	/* Test for empty bucket. */
708	if (bucket->data == NULL)
709	return NULL;
710
711	/* See if the entry is the first in the bucket. */
712	if (entry == bucket->data \|\| table->comparator (entry, bucket->data))
713	{
714	void *data = bucket->data;
715
716	if (delete)
717	{
718	if (bucket->next)
719	{
720	struct hash_entry *next = bucket->next;
721
722	/* Bump the first overflow entry into the bucket head, then save
723	the previous first overflow entry for later recycling. */
724	bucket = next;
725	free_entry (table, next);
726	}
727	else
728	{
729	bucket->data = NULL;
730	}
731	}
732
733	return data;
734	}
735
736	/* Scan the bucket overflow. */
737	for (cursor = bucket; cursor->next; cursor = cursor->next)
738	{
739	if (entry == cursor->next->data
740	\|\| table->comparator (entry, cursor->next->data))
741	{
742	void *data = cursor->next->data;
743
744	if (delete)
745	{
746	struct hash_entry *next = cursor->next;
747
748	/* Unlink the entry to delete, then save the freed entry for later
749	recycling. */
750	cursor->next = next->next;
751	free_entry (table, next);
752	}
753
754	return data;
755	}
756	}
757
758	/* No entry found. */
759	return NULL;
760	}
761
762	/* Internal helper, to move entries from SRC to DST. Both tables must
763	share the same free entry list. If SAFE, only move overflow
764	entries, saving bucket heads for later, so that no allocations will
765	occur. Return false if the free entry list is exhausted and an
766	allocation fails. */
767
768	static bool
769	transfer_entries (Hash_table dst, Hash_table src, bool safe)
770	{
771	struct hash_entry *bucket;
772	struct hash_entry *cursor;
773	struct hash_entry *next;
774	for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
775	if (bucket->data)
776	{
777	void *data;
778	struct hash_entry *new_bucket;
779
780	/* Within each bucket, transfer overflow entries first and
781	then the bucket head, to minimize memory pressure. After
782	all, the only time we might allocate is when moving the
783	bucket head, but moving overflow entries first may create
784	free entries that can be recycled by the time we finally
785	get to the bucket head. */
786	for (cursor = bucket->next; cursor; cursor = next)
787	{
788	data = cursor->data;
789	new_bucket = safe_hasher (dst, data);
790
791	next = cursor->next;
792
793	if (new_bucket->data)
794	{
795	/* Merely relink an existing entry, when moving from a
796	bucket overflow into a bucket overflow. */
797	cursor->next = new_bucket->next;
798	new_bucket->next = cursor;
799	}
800	else
801	{
802	/* Free an existing entry, when moving from a bucket
803	overflow into a bucket header. */
804	new_bucket->data = data;
805	dst->n_buckets_used++;
806	free_entry (dst, cursor);
807	}
808	}
809	/* Now move the bucket head. Be sure that if we fail due to
810	allocation failure that the src table is in a consistent
811	state. */
812	data = bucket->data;
813	bucket->next = NULL;
814	if (safe)
815	continue;
816	new_bucket = safe_hasher (dst, data);
817
818	if (new_bucket->data)
819	{
820	/* Allocate or recycle an entry, when moving from a bucket
821	header into a bucket overflow. */
822	struct hash_entry *new_entry = allocate_entry (dst);
823
824	if (new_entry == NULL)
825	return false;
826
827	new_entry->data = data;
828	new_entry->next = new_bucket->next;
829	new_bucket->next = new_entry;
830	}
831	else
832	{
833	/* Move from one bucket header to another. */
834	new_bucket->data = data;
835	dst->n_buckets_used++;
836	}
837	bucket->data = NULL;
838	src->n_buckets_used--;
839	}
840	return true;
841	}
842
843	bool
844	hash_rehash (Hash_table *table, size_t candidate)
845	{
846	Hash_table storage;
847	Hash_table *new_table;
848	size_t new_size = compute_bucket_size (candidate, table->tuning);
849
850	if (!new_size)
851	return false;
852	if (new_size == table->n_buckets)
853	return true;
854	new_table = &storage;
855	new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
856	if (new_table->bucket == NULL)
857	return false;
858	new_table->n_buckets = new_size;
859	new_table->bucket_limit = new_table->bucket + new_size;
860	new_table->n_buckets_used = 0;
861	new_table->n_entries = 0;
862	new_table->tuning = table->tuning;
863	new_table->hasher = table->hasher;
864	new_table->comparator = table->comparator;
865	new_table->data_freer = table->data_freer;
866
867	/* In order for the transfer to successfully complete, we need
868	additional overflow entries when distinct buckets in the old
869	table collide into a common bucket in the new table. The worst
870	case possible is a hasher that gives a good spread with the old
871	size, but returns a constant with the new size; if we were to
872	guarantee table->n_buckets_used-1 free entries in advance, then
873	the transfer would be guaranteed to not allocate memory.
874	However, for large tables, a guarantee of no further allocation
875	introduces a lot of extra memory pressure, all for an unlikely
876	corner case (most rehashes reduce, rather than increase, the
877	number of overflow entries needed). So, we instead ensure that
878	the transfer process can be reversed if we hit a memory
879	allocation failure mid-transfer. */
880
881	/* Merely reuse the extra old space into the new table. */
882	#if USE_OBSTACK
883	new_table->entry_stack = table->entry_stack;
884	#endif
885	new_table->free_entry_list = table->free_entry_list;
886
887	if (transfer_entries (new_table, table, false))
888	{
889	/* Entries transferred successfully; tie up the loose ends. */
890	free (table->bucket);
891	table->bucket = new_table->bucket;
892	table->bucket_limit = new_table->bucket_limit;
893	table->n_buckets = new_table->n_buckets;
894	table->n_buckets_used = new_table->n_buckets_used;
895	table->free_entry_list = new_table->free_entry_list;
896	/* table->n_entries and table->entry_stack already hold their value. */
897	return true;
898	}
899
900	/* We've allocated new_table->bucket (and possibly some entries),
901	exhausted the free list, and moved some but not all entries into
902	new_table. We must undo the partial move before returning
903	failure. The only way to get into this situation is if new_table
904	uses fewer buckets than the old table, so we will reclaim some
905	free entries as overflows in the new table are put back into
906	distinct buckets in the old table.
907
908	There are some pathological cases where a single pass through the
909	table requires more intermediate overflow entries than using two
910	passes. Two passes give worse cache performance and takes
911	longer, but at this point, we're already out of memory, so slow
912	and safe is better than failure. */
913	table->free_entry_list = new_table->free_entry_list;
914	if (! (transfer_entries (table, new_table, true)
915	&& transfer_entries (table, new_table, false)))
916	abort ();
917	/* table->n_entries already holds its value. */
918	free (new_table->bucket);
919	return false;
920	}
921
922	int
923	hash_insert_if_absent (Hash_table table, void const entry,
924	void const **matched_ent)
925	{
926	void *data;
927	struct hash_entry *bucket;
928
929	/* The caller cannot insert a NULL entry, since hash_lookup returns NULL
930	to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
931	to indicate an empty bucket. */
932	if (! entry)
933	abort ();
934
935	/* If there's a matching entry already in the table, return that. */
936	if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
937	{
938	if (matched_ent)
939	*matched_ent = data;
940	return 0;
941	}
942
943	/* If the growth threshold of the buckets in use has been reached, increase
944	the table size and rehash. There's no point in checking the number of
945	entries: if the hashing function is ill-conditioned, rehashing is not
946	likely to improve it. */
947
948	if (table->n_buckets_used
949	> table->tuning->growth_threshold * table->n_buckets)
950	{
951	/* Check more fully, before starting real work. If tuning arguments
952	became invalid, the second check will rely on proper defaults. */
953	check_tuning (table);
954	if (table->n_buckets_used
955	> table->tuning->growth_threshold * table->n_buckets)
956	{
957	const Hash_tuning *tuning = table->tuning;
958	float candidate =
959	(tuning->is_n_buckets
960	? (table->n_buckets * tuning->growth_factor)
961	: (table->n_buckets * tuning->growth_factor
962	* tuning->growth_threshold));
963
964	if ((float) SIZE_MAX <= candidate)
965	return -1;
966
967	/* If the rehash fails, arrange to return NULL. */
968	if (!hash_rehash (table, candidate))
969	return -1;
970
971	/* Update the bucket we are interested in. */
972	if (hash_find_entry (table, entry, &bucket, false) != NULL)
973	abort ();
974	}
975	}
976
977	/* ENTRY is not matched, it should be inserted. */
978
979	if (bucket->data)
980	{
981	struct hash_entry *new_entry = allocate_entry (table);
982
983	if (new_entry == NULL)
984	return -1;
985
986	/* Add ENTRY in the overflow of the bucket. */
987
988	new_entry->data = (void *) entry;
989	new_entry->next = bucket->next;
990	bucket->next = new_entry;
991	table->n_entries++;
992	return 1;
993	}
994
995	/* Add ENTRY right in the bucket head. */
996
997	bucket->data = (void *) entry;
998	table->n_entries++;
999	table->n_buckets_used++;
1000
1001	return 1;
1002	}
1003
1004	void *
1005	hash_insert (Hash_table table, void const entry)
1006	{
1007	void const *matched_ent;
1008	int err = hash_insert_if_absent (table, entry, &matched_ent);
1009	return (err == -1
1010	? NULL
1011	: (void *) (err == 0 ? matched_ent : entry));
1012	}
1013
1014	void *
1015	hash_remove (Hash_table table, const void entry)
1016	{
1017	void *data;
1018	struct hash_entry *bucket;
1019
1020	data = hash_find_entry (table, entry, &bucket, true);
1021	if (!data)
1022	return NULL;
1023
1024	table->n_entries--;
1025	if (!bucket->data)
1026	{
1027	table->n_buckets_used--;
1028
1029	/* If the shrink threshold of the buckets in use has been reached,
1030	rehash into a smaller table. */
1031
1032	if (table->n_buckets_used
1033	< table->tuning->shrink_threshold * table->n_buckets)
1034	{
1035	/* Check more fully, before starting real work. If tuning arguments
1036	became invalid, the second check will rely on proper defaults. */
1037	check_tuning (table);
1038	if (table->n_buckets_used
1039	< table->tuning->shrink_threshold * table->n_buckets)
1040	{
1041	const Hash_tuning *tuning = table->tuning;
1042	size_t candidate =
1043	(tuning->is_n_buckets
1044	? table->n_buckets * tuning->shrink_factor
1045	: (table->n_buckets * tuning->shrink_factor
1046	* tuning->growth_threshold));
1047
1048	if (!hash_rehash (table, candidate))
1049	{
1050	/* Failure to allocate memory in an attempt to
1051	shrink the table is not fatal. But since memory
1052	is low, we can at least be kind and free any
1053	spare entries, rather than keeping them tied up
1054	in the free entry list. */
1055	#if ! USE_OBSTACK
1056	struct hash_entry *cursor = table->free_entry_list;
1057	struct hash_entry *next;
1058	while (cursor)
1059	{
1060	next = cursor->next;
1061	free (cursor);
1062	cursor = next;
1063	}
1064	table->free_entry_list = NULL;
1065	#endif
1066	}
1067	}
1068	}
1069	}
1070
1071	return data;
1072	}
1073
1074	void *
1075	hash_delete (Hash_table table, const void entry)
1076	{
1077	return hash_remove (table, entry);
1078	}
1079
1080	/* Testing. */
1081
1082	#if TESTING
1083
1084	void
1085	hash_print (const Hash_table *table)
1086	{
1087	struct hash_entry bucket = (struct hash_entry ) table->bucket;
1088
1089	for ( ; bucket < table->bucket_limit; bucket++)
1090	{
1091	struct hash_entry *cursor;
1092
1093	if (bucket)
1094	printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1095
1096	for (cursor = bucket; cursor; cursor = cursor->next)
1097	{
1098	char const *s = cursor->data;
1099	/* FIXME */
1100	if (s)
1101	printf (" %s\n", s);
1102	}
1103	}
1104	}
1105
1106	#endif /* TESTING */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/grep/lib/hash.c@ 3603

Download in other formats: