source: trunk/essentials/app-arch/tar/lib/hash.c

Last change on this file was 3342, checked in by bird, 18 years ago

tar 1.16.1

File size: 29.9 KB
Line 
1/* hash - hashing table processing.
2
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006 Free
4 Software Foundation, Inc.
5
6 Written by Jim Meyering, 1992.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software Foundation,
20 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
22/* A generic hash table package. */
23
24/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
25 of malloc. If you change USE_OBSTACK, you have to recompile! */
26
27#include <config.h>
28
29#include "hash.h"
30#include "xalloc.h"
31
32#include <limits.h>
33#include <stdio.h>
34#include <stdlib.h>
35
36#if USE_OBSTACK
37# include "obstack.h"
38# ifndef obstack_chunk_alloc
39# define obstack_chunk_alloc malloc
40# endif
41# ifndef obstack_chunk_free
42# define obstack_chunk_free free
43# endif
44#endif
45
46#ifndef SIZE_MAX
47# define SIZE_MAX ((size_t) -1)
48#endif
49
50struct hash_table
51 {
52 /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
53 for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
54 are not empty, there are N_ENTRIES active entries in the table. */
55 struct hash_entry *bucket;
56 struct hash_entry const *bucket_limit;
57 size_t n_buckets;
58 size_t n_buckets_used;
59 size_t n_entries;
60
61 /* Tuning arguments, kept in a physicaly separate structure. */
62 const Hash_tuning *tuning;
63
64 /* Three functions are given to `hash_initialize', see the documentation
65 block for this function. In a word, HASHER randomizes a user entry
66 into a number up from 0 up to some maximum minus 1; COMPARATOR returns
67 true if two user entries compare equally; and DATA_FREER is the cleanup
68 function for a user entry. */
69 Hash_hasher hasher;
70 Hash_comparator comparator;
71 Hash_data_freer data_freer;
72
73 /* A linked list of freed struct hash_entry structs. */
74 struct hash_entry *free_entry_list;
75
76#if USE_OBSTACK
77 /* Whenever obstacks are used, it is possible to allocate all overflowed
78 entries into a single stack, so they all can be freed in a single
79 operation. It is not clear if the speedup is worth the trouble. */
80 struct obstack entry_stack;
81#endif
82 };
83
84/* A hash table contains many internal entries, each holding a pointer to
85 some user provided data (also called a user entry). An entry indistinctly
86 refers to both the internal entry and its associated user entry. A user
87 entry contents may be hashed by a randomization function (the hashing
88 function, or just `hasher' for short) into a number (or `slot') between 0
89 and the current table size. At each slot position in the hash table,
90 starts a linked chain of entries for which the user data all hash to this
91 slot. A bucket is the collection of all entries hashing to the same slot.
92
93 A good `hasher' function will distribute entries rather evenly in buckets.
94 In the ideal case, the length of each bucket is roughly the number of
95 entries divided by the table size. Finding the slot for a data is usually
96 done in constant time by the `hasher', and the later finding of a precise
97 entry is linear in time with the size of the bucket. Consequently, a
98 larger hash table size (that is, a larger number of buckets) is prone to
99 yielding shorter chains, *given* the `hasher' function behaves properly.
100
101 Long buckets slow down the lookup algorithm. One might use big hash table
102 sizes in hope to reduce the average length of buckets, but this might
103 become inordinate, as unused slots in the hash table take some space. The
104 best bet is to make sure you are using a good `hasher' function (beware
105 that those are not that easy to write! :-), and to use a table size
106 larger than the actual number of entries. */
107
108/* If an insertion makes the ratio of nonempty buckets to table size larger
109 than the growth threshold (a number between 0.0 and 1.0), then increase
110 the table size by multiplying by the growth factor (a number greater than
111 1.0). The growth threshold defaults to 0.8, and the growth factor
112 defaults to 1.414, meaning that the table will have doubled its size
113 every second time 80% of the buckets get used. */
114#define DEFAULT_GROWTH_THRESHOLD 0.8
115#define DEFAULT_GROWTH_FACTOR 1.414
116
117/* If a deletion empties a bucket and causes the ratio of used buckets to
118 table size to become smaller than the shrink threshold (a number between
119 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
120 number greater than the shrink threshold but smaller than 1.0). The shrink
121 threshold and factor default to 0.0 and 1.0, meaning that the table never
122 shrinks. */
123#define DEFAULT_SHRINK_THRESHOLD 0.0
124#define DEFAULT_SHRINK_FACTOR 1.0
125
126/* Use this to initialize or reset a TUNING structure to
127 some sensible values. */
128static const Hash_tuning default_tuning =
129 {
130 DEFAULT_SHRINK_THRESHOLD,
131 DEFAULT_SHRINK_FACTOR,
132 DEFAULT_GROWTH_THRESHOLD,
133 DEFAULT_GROWTH_FACTOR,
134 false
135 };
136
137/* Information and lookup. */
138
139/* The following few functions provide information about the overall hash
140 table organization: the number of entries, number of buckets and maximum
141 length of buckets. */
142
143/* Return the number of buckets in the hash table. The table size, the total
144 number of buckets (used plus unused), or the maximum number of slots, are
145 the same quantity. */
146
147size_t
148hash_get_n_buckets (const Hash_table *table)
149{
150 return table->n_buckets;
151}
152
153/* Return the number of slots in use (non-empty buckets). */
154
155size_t
156hash_get_n_buckets_used (const Hash_table *table)
157{
158 return table->n_buckets_used;
159}
160
161/* Return the number of active entries. */
162
163size_t
164hash_get_n_entries (const Hash_table *table)
165{
166 return table->n_entries;
167}
168
169/* Return the length of the longest chain (bucket). */
170
171size_t
172hash_get_max_bucket_length (const Hash_table *table)
173{
174 struct hash_entry const *bucket;
175 size_t max_bucket_length = 0;
176
177 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
178 {
179 if (bucket->data)
180 {
181 struct hash_entry const *cursor = bucket;
182 size_t bucket_length = 1;
183
184 while (cursor = cursor->next, cursor)
185 bucket_length++;
186
187 if (bucket_length > max_bucket_length)
188 max_bucket_length = bucket_length;
189 }
190 }
191
192 return max_bucket_length;
193}
194
195/* Do a mild validation of a hash table, by traversing it and checking two
196 statistics. */
197
198bool
199hash_table_ok (const Hash_table *table)
200{
201 struct hash_entry const *bucket;
202 size_t n_buckets_used = 0;
203 size_t n_entries = 0;
204
205 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
206 {
207 if (bucket->data)
208 {
209 struct hash_entry const *cursor = bucket;
210
211 /* Count bucket head. */
212 n_buckets_used++;
213 n_entries++;
214
215 /* Count bucket overflow. */
216 while (cursor = cursor->next, cursor)
217 n_entries++;
218 }
219 }
220
221 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
222 return true;
223
224 return false;
225}
226
227void
228hash_print_statistics (const Hash_table *table, FILE *stream)
229{
230 size_t n_entries = hash_get_n_entries (table);
231 size_t n_buckets = hash_get_n_buckets (table);
232 size_t n_buckets_used = hash_get_n_buckets_used (table);
233 size_t max_bucket_length = hash_get_max_bucket_length (table);
234
235 fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
236 fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
237 fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
238 (unsigned long int) n_buckets_used,
239 (100.0 * n_buckets_used) / n_buckets);
240 fprintf (stream, "max bucket length: %lu\n",
241 (unsigned long int) max_bucket_length);
242}
243
244/* If ENTRY matches an entry already in the hash table, return the
245 entry from the table. Otherwise, return NULL. */
246
247void *
248hash_lookup (const Hash_table *table, const void *entry)
249{
250 struct hash_entry const *bucket
251 = table->bucket + table->hasher (entry, table->n_buckets);
252 struct hash_entry const *cursor;
253
254 if (! (bucket < table->bucket_limit))
255 abort ();
256
257 if (bucket->data == NULL)
258 return NULL;
259
260 for (cursor = bucket; cursor; cursor = cursor->next)
261 if (table->comparator (entry, cursor->data))
262 return cursor->data;
263
264 return NULL;
265}
266
267/* Walking. */
268
269/* The functions in this page traverse the hash table and process the
270 contained entries. For the traversal to work properly, the hash table
271 should not be resized nor modified while any particular entry is being
272 processed. In particular, entries should not be added or removed. */
273
274/* Return the first data in the table, or NULL if the table is empty. */
275
276void *
277hash_get_first (const Hash_table *table)
278{
279 struct hash_entry const *bucket;
280
281 if (table->n_entries == 0)
282 return NULL;
283
284 for (bucket = table->bucket; ; bucket++)
285 if (! (bucket < table->bucket_limit))
286 abort ();
287 else if (bucket->data)
288 return bucket->data;
289}
290
291/* Return the user data for the entry following ENTRY, where ENTRY has been
292 returned by a previous call to either `hash_get_first' or `hash_get_next'.
293 Return NULL if there are no more entries. */
294
295void *
296hash_get_next (const Hash_table *table, const void *entry)
297{
298 struct hash_entry const *bucket
299 = table->bucket + table->hasher (entry, table->n_buckets);
300 struct hash_entry const *cursor;
301
302 if (! (bucket < table->bucket_limit))
303 abort ();
304
305 /* Find next entry in the same bucket. */
306 for (cursor = bucket; cursor; cursor = cursor->next)
307 if (cursor->data == entry && cursor->next)
308 return cursor->next->data;
309
310 /* Find first entry in any subsequent bucket. */
311 while (++bucket < table->bucket_limit)
312 if (bucket->data)
313 return bucket->data;
314
315 /* None found. */
316 return NULL;
317}
318
319/* Fill BUFFER with pointers to active user entries in the hash table, then
320 return the number of pointers copied. Do not copy more than BUFFER_SIZE
321 pointers. */
322
323size_t
324hash_get_entries (const Hash_table *table, void **buffer,
325 size_t buffer_size)
326{
327 size_t counter = 0;
328 struct hash_entry const *bucket;
329 struct hash_entry const *cursor;
330
331 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
332 {
333 if (bucket->data)
334 {
335 for (cursor = bucket; cursor; cursor = cursor->next)
336 {
337 if (counter >= buffer_size)
338 return counter;
339 buffer[counter++] = cursor->data;
340 }
341 }
342 }
343
344 return counter;
345}
346
347/* Call a PROCESSOR function for each entry of a hash table, and return the
348 number of entries for which the processor function returned success. A
349 pointer to some PROCESSOR_DATA which will be made available to each call to
350 the processor function. The PROCESSOR accepts two arguments: the first is
351 the user entry being walked into, the second is the value of PROCESSOR_DATA
352 as received. The walking continue for as long as the PROCESSOR function
353 returns nonzero. When it returns zero, the walking is interrupted. */
354
355size_t
356hash_do_for_each (const Hash_table *table, Hash_processor processor,
357 void *processor_data)
358{
359 size_t counter = 0;
360 struct hash_entry const *bucket;
361 struct hash_entry const *cursor;
362
363 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
364 {
365 if (bucket->data)
366 {
367 for (cursor = bucket; cursor; cursor = cursor->next)
368 {
369 if (!(*processor) (cursor->data, processor_data))
370 return counter;
371 counter++;
372 }
373 }
374 }
375
376 return counter;
377}
378
379/* Allocation and clean-up. */
380
381/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
382 This is a convenience routine for constructing other hashing functions. */
383
384#if USE_DIFF_HASH
385
386/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
387 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
388 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
389 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
390 may not be good for your application." */
391
392size_t
393hash_string (const char *string, size_t n_buckets)
394{
395# define ROTATE_LEFT(Value, Shift) \
396 ((Value) << (Shift) | (Value) >> ((sizeof (size_t) * CHAR_BIT) - (Shift)))
397# define HASH_ONE_CHAR(Value, Byte) \
398 ((Byte) + ROTATE_LEFT (Value, 7))
399
400 size_t value = 0;
401 unsigned char ch;
402
403 for (; (ch = *string); string++)
404 value = HASH_ONE_CHAR (value, ch);
405 return value % n_buckets;
406
407# undef ROTATE_LEFT
408# undef HASH_ONE_CHAR
409}
410
411#else /* not USE_DIFF_HASH */
412
413/* This one comes from `recode', and performs a bit better than the above as
414 per a few experiments. It is inspired from a hashing routine found in the
415 very old Cyber `snoop', itself written in typical Greg Mansfield style.
416 (By the way, what happened to this excellent man? Is he still alive?) */
417
418size_t
419hash_string (const char *string, size_t n_buckets)
420{
421 size_t value = 0;
422 unsigned char ch;
423
424 for (; (ch = *string); string++)
425 value = (value * 31 + ch) % n_buckets;
426 return value;
427}
428
429#endif /* not USE_DIFF_HASH */
430
431/* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
432 number at least equal to 11. */
433
434static bool
435is_prime (size_t candidate)
436{
437 size_t divisor = 3;
438 size_t square = divisor * divisor;
439
440 while (square < candidate && (candidate % divisor))
441 {
442 divisor++;
443 square += 4 * divisor;
444 divisor++;
445 }
446
447 return (candidate % divisor ? true : false);
448}
449
450/* Round a given CANDIDATE number up to the nearest prime, and return that
451 prime. Primes lower than 10 are merely skipped. */
452
453static size_t
454next_prime (size_t candidate)
455{
456 /* Skip small primes. */
457 if (candidate < 10)
458 candidate = 10;
459
460 /* Make it definitely odd. */
461 candidate |= 1;
462
463 while (!is_prime (candidate))
464 candidate += 2;
465
466 return candidate;
467}
468
469void
470hash_reset_tuning (Hash_tuning *tuning)
471{
472 *tuning = default_tuning;
473}
474
475/* For the given hash TABLE, check the user supplied tuning structure for
476 reasonable values, and return true if there is no gross error with it.
477 Otherwise, definitively reset the TUNING field to some acceptable default
478 in the hash table (that is, the user loses the right of further modifying
479 tuning arguments), and return false. */
480
481static bool
482check_tuning (Hash_table *table)
483{
484 const Hash_tuning *tuning = table->tuning;
485
486 /* Be a bit stricter than mathematics would require, so that
487 rounding errors in size calculations do not cause allocations to
488 fail to grow or shrink as they should. The smallest allocation
489 is 11 (due to next_prime's algorithm), so an epsilon of 0.1
490 should be good enough. */
491 float epsilon = 0.1f;
492
493 if (epsilon < tuning->growth_threshold
494 && tuning->growth_threshold < 1 - epsilon
495 && 1 + epsilon < tuning->growth_factor
496 && 0 <= tuning->shrink_threshold
497 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
498 && tuning->shrink_factor <= 1
499 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
500 return true;
501
502 table->tuning = &default_tuning;
503 return false;
504}
505
506/* Allocate and return a new hash table, or NULL upon failure. The initial
507 number of buckets is automatically selected so as to _guarantee_ that you
508 may insert at least CANDIDATE different user entries before any growth of
509 the hash table size occurs. So, if have a reasonably tight a-priori upper
510 bound on the number of entries you intend to insert in the hash table, you
511 may save some table memory and insertion time, by specifying it here. If
512 the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
513 argument has its meaning changed to the wanted number of buckets.
514
515 TUNING points to a structure of user-supplied values, in case some fine
516 tuning is wanted over the default behavior of the hasher. If TUNING is
517 NULL, the default tuning parameters are used instead.
518
519 The user-supplied HASHER function should be provided. It accepts two
520 arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
521 slot number for that entry which should be in the range 0..TABLE_SIZE-1.
522 This slot number is then returned.
523
524 The user-supplied COMPARATOR function should be provided. It accepts two
525 arguments pointing to user data, it then returns true for a pair of entries
526 that compare equal, or false otherwise. This function is internally called
527 on entries which are already known to hash to the same bucket index.
528
529 The user-supplied DATA_FREER function, when not NULL, may be later called
530 with the user data as an argument, just before the entry containing the
531 data gets freed. This happens from within `hash_free' or `hash_clear'.
532 You should specify this function only if you want these functions to free
533 all of your `data' data. This is typically the case when your data is
534 simply an auxiliary struct that you have malloc'd to aggregate several
535 values. */
536
537Hash_table *
538hash_initialize (size_t candidate, const Hash_tuning *tuning,
539 Hash_hasher hasher, Hash_comparator comparator,
540 Hash_data_freer data_freer)
541{
542 Hash_table *table;
543
544 if (hasher == NULL || comparator == NULL)
545 return NULL;
546
547 table = malloc (sizeof *table);
548 if (table == NULL)
549 return NULL;
550
551 if (!tuning)
552 tuning = &default_tuning;
553 table->tuning = tuning;
554 if (!check_tuning (table))
555 {
556 /* Fail if the tuning options are invalid. This is the only occasion
557 when the user gets some feedback about it. Once the table is created,
558 if the user provides invalid tuning options, we silently revert to
559 using the defaults, and ignore further request to change the tuning
560 options. */
561 goto fail;
562 }
563
564 if (!tuning->is_n_buckets)
565 {
566 float new_candidate = candidate / tuning->growth_threshold;
567 if (SIZE_MAX <= new_candidate)
568 goto fail;
569 candidate = new_candidate;
570 }
571
572 if (xalloc_oversized (candidate, sizeof *table->bucket))
573 goto fail;
574 table->n_buckets = next_prime (candidate);
575 if (xalloc_oversized (table->n_buckets, sizeof *table->bucket))
576 goto fail;
577
578 table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
579 table->bucket_limit = table->bucket + table->n_buckets;
580 table->n_buckets_used = 0;
581 table->n_entries = 0;
582
583 table->hasher = hasher;
584 table->comparator = comparator;
585 table->data_freer = data_freer;
586
587 table->free_entry_list = NULL;
588#if USE_OBSTACK
589 obstack_init (&table->entry_stack);
590#endif
591 return table;
592
593 fail:
594 free (table);
595 return NULL;
596}
597
598/* Make all buckets empty, placing any chained entries on the free list.
599 Apply the user-specified function data_freer (if any) to the datas of any
600 affected entries. */
601
602void
603hash_clear (Hash_table *table)
604{
605 struct hash_entry *bucket;
606
607 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
608 {
609 if (bucket->data)
610 {
611 struct hash_entry *cursor;
612 struct hash_entry *next;
613
614 /* Free the bucket overflow. */
615 for (cursor = bucket->next; cursor; cursor = next)
616 {
617 if (table->data_freer)
618 (*table->data_freer) (cursor->data);
619 cursor->data = NULL;
620
621 next = cursor->next;
622 /* Relinking is done one entry at a time, as it is to be expected
623 that overflows are either rare or short. */
624 cursor->next = table->free_entry_list;
625 table->free_entry_list = cursor;
626 }
627
628 /* Free the bucket head. */
629 if (table->data_freer)
630 (*table->data_freer) (bucket->data);
631 bucket->data = NULL;
632 bucket->next = NULL;
633 }
634 }
635
636 table->n_buckets_used = 0;
637 table->n_entries = 0;
638}
639
640/* Reclaim all storage associated with a hash table. If a data_freer
641 function has been supplied by the user when the hash table was created,
642 this function applies it to the data of each entry before freeing that
643 entry. */
644
645void
646hash_free (Hash_table *table)
647{
648 struct hash_entry *bucket;
649 struct hash_entry *cursor;
650 struct hash_entry *next;
651
652 /* Call the user data_freer function. */
653 if (table->data_freer && table->n_entries)
654 {
655 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
656 {
657 if (bucket->data)
658 {
659 for (cursor = bucket; cursor; cursor = cursor->next)
660 {
661 (*table->data_freer) (cursor->data);
662 }
663 }
664 }
665 }
666
667#if USE_OBSTACK
668
669 obstack_free (&table->entry_stack, NULL);
670
671#else
672
673 /* Free all bucket overflowed entries. */
674 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
675 {
676 for (cursor = bucket->next; cursor; cursor = next)
677 {
678 next = cursor->next;
679 free (cursor);
680 }
681 }
682
683 /* Also reclaim the internal list of previously freed entries. */
684 for (cursor = table->free_entry_list; cursor; cursor = next)
685 {
686 next = cursor->next;
687 free (cursor);
688 }
689
690#endif
691
692 /* Free the remainder of the hash table structure. */
693 free (table->bucket);
694 free (table);
695}
696
697/* Insertion and deletion. */
698
699/* Get a new hash entry for a bucket overflow, possibly by reclying a
700 previously freed one. If this is not possible, allocate a new one. */
701
702static struct hash_entry *
703allocate_entry (Hash_table *table)
704{
705 struct hash_entry *new;
706
707 if (table->free_entry_list)
708 {
709 new = table->free_entry_list;
710 table->free_entry_list = new->next;
711 }
712 else
713 {
714#if USE_OBSTACK
715 new = obstack_alloc (&table->entry_stack, sizeof *new);
716#else
717 new = malloc (sizeof *new);
718#endif
719 }
720
721 return new;
722}
723
724/* Free a hash entry which was part of some bucket overflow,
725 saving it for later recycling. */
726
727static void
728free_entry (Hash_table *table, struct hash_entry *entry)
729{
730 entry->data = NULL;
731 entry->next = table->free_entry_list;
732 table->free_entry_list = entry;
733}
734
735/* This private function is used to help with insertion and deletion. When
736 ENTRY matches an entry in the table, return a pointer to the corresponding
737 user data and set *BUCKET_HEAD to the head of the selected bucket.
738 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
739 the table, unlink the matching entry. */
740
741static void *
742hash_find_entry (Hash_table *table, const void *entry,
743 struct hash_entry **bucket_head, bool delete)
744{
745 struct hash_entry *bucket
746 = table->bucket + table->hasher (entry, table->n_buckets);
747 struct hash_entry *cursor;
748
749 if (! (bucket < table->bucket_limit))
750 abort ();
751
752 *bucket_head = bucket;
753
754 /* Test for empty bucket. */
755 if (bucket->data == NULL)
756 return NULL;
757
758 /* See if the entry is the first in the bucket. */
759 if ((*table->comparator) (entry, bucket->data))
760 {
761 void *data = bucket->data;
762
763 if (delete)
764 {
765 if (bucket->next)
766 {
767 struct hash_entry *next = bucket->next;
768
769 /* Bump the first overflow entry into the bucket head, then save
770 the previous first overflow entry for later recycling. */
771 *bucket = *next;
772 free_entry (table, next);
773 }
774 else
775 {
776 bucket->data = NULL;
777 }
778 }
779
780 return data;
781 }
782
783 /* Scan the bucket overflow. */
784 for (cursor = bucket; cursor->next; cursor = cursor->next)
785 {
786 if ((*table->comparator) (entry, cursor->next->data))
787 {
788 void *data = cursor->next->data;
789
790 if (delete)
791 {
792 struct hash_entry *next = cursor->next;
793
794 /* Unlink the entry to delete, then save the freed entry for later
795 recycling. */
796 cursor->next = next->next;
797 free_entry (table, next);
798 }
799
800 return data;
801 }
802 }
803
804 /* No entry found. */
805 return NULL;
806}
807
808/* For an already existing hash table, change the number of buckets through
809 specifying CANDIDATE. The contents of the hash table are preserved. The
810 new number of buckets is automatically selected so as to _guarantee_ that
811 the table may receive at least CANDIDATE different user entries, including
812 those already in the table, before any other growth of the hash table size
813 occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
814 exact number of buckets desired. */
815
816bool
817hash_rehash (Hash_table *table, size_t candidate)
818{
819 Hash_table *new_table;
820 struct hash_entry *bucket;
821 struct hash_entry *cursor;
822 struct hash_entry *next;
823
824 new_table = hash_initialize (candidate, table->tuning, table->hasher,
825 table->comparator, table->data_freer);
826 if (new_table == NULL)
827 return false;
828
829 /* Merely reuse the extra old space into the new table. */
830#if USE_OBSTACK
831 obstack_free (&new_table->entry_stack, NULL);
832 new_table->entry_stack = table->entry_stack;
833#endif
834 new_table->free_entry_list = table->free_entry_list;
835
836 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
837 if (bucket->data)
838 for (cursor = bucket; cursor; cursor = next)
839 {
840 void *data = cursor->data;
841 struct hash_entry *new_bucket
842 = (new_table->bucket
843 + new_table->hasher (data, new_table->n_buckets));
844
845 if (! (new_bucket < new_table->bucket_limit))
846 abort ();
847
848 next = cursor->next;
849
850 if (new_bucket->data)
851 {
852 if (cursor == bucket)
853 {
854 /* Allocate or recycle an entry, when moving from a bucket
855 header into a bucket overflow. */
856 struct hash_entry *new_entry = allocate_entry (new_table);
857
858 if (new_entry == NULL)
859 return false;
860
861 new_entry->data = data;
862 new_entry->next = new_bucket->next;
863 new_bucket->next = new_entry;
864 }
865 else
866 {
867 /* Merely relink an existing entry, when moving from a
868 bucket overflow into a bucket overflow. */
869 cursor->next = new_bucket->next;
870 new_bucket->next = cursor;
871 }
872 }
873 else
874 {
875 /* Free an existing entry, when moving from a bucket
876 overflow into a bucket header. Also take care of the
877 simple case of moving from a bucket header into a bucket
878 header. */
879 new_bucket->data = data;
880 new_table->n_buckets_used++;
881 if (cursor != bucket)
882 free_entry (new_table, cursor);
883 }
884 }
885
886 free (table->bucket);
887 table->bucket = new_table->bucket;
888 table->bucket_limit = new_table->bucket_limit;
889 table->n_buckets = new_table->n_buckets;
890 table->n_buckets_used = new_table->n_buckets_used;
891 table->free_entry_list = new_table->free_entry_list;
892 /* table->n_entries already holds its value. */
893#if USE_OBSTACK
894 table->entry_stack = new_table->entry_stack;
895#endif
896 free (new_table);
897
898 return true;
899}
900
901/* If ENTRY matches an entry already in the hash table, return the pointer
902 to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
903 Return NULL if the storage required for insertion cannot be allocated. */
904
905void *
906hash_insert (Hash_table *table, const void *entry)
907{
908 void *data;
909 struct hash_entry *bucket;
910
911 /* The caller cannot insert a NULL entry. */
912 if (! entry)
913 abort ();
914
915 /* If there's a matching entry already in the table, return that. */
916 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
917 return data;
918
919 /* ENTRY is not matched, it should be inserted. */
920
921 if (bucket->data)
922 {
923 struct hash_entry *new_entry = allocate_entry (table);
924
925 if (new_entry == NULL)
926 return NULL;
927
928 /* Add ENTRY in the overflow of the bucket. */
929
930 new_entry->data = (void *) entry;
931 new_entry->next = bucket->next;
932 bucket->next = new_entry;
933 table->n_entries++;
934 return (void *) entry;
935 }
936
937 /* Add ENTRY right in the bucket head. */
938
939 bucket->data = (void *) entry;
940 table->n_entries++;
941 table->n_buckets_used++;
942
943 /* If the growth threshold of the buckets in use has been reached, increase
944 the table size and rehash. There's no point in checking the number of
945 entries: if the hashing function is ill-conditioned, rehashing is not
946 likely to improve it. */
947
948 if (table->n_buckets_used
949 > table->tuning->growth_threshold * table->n_buckets)
950 {
951 /* Check more fully, before starting real work. If tuning arguments
952 became invalid, the second check will rely on proper defaults. */
953 check_tuning (table);
954 if (table->n_buckets_used
955 > table->tuning->growth_threshold * table->n_buckets)
956 {
957 const Hash_tuning *tuning = table->tuning;
958 float candidate =
959 (tuning->is_n_buckets
960 ? (table->n_buckets * tuning->growth_factor)
961 : (table->n_buckets * tuning->growth_factor
962 * tuning->growth_threshold));
963
964 if (SIZE_MAX <= candidate)
965 return NULL;
966
967 /* If the rehash fails, arrange to return NULL. */
968 if (!hash_rehash (table, candidate))
969 entry = NULL;
970 }
971 }
972
973 return (void *) entry;
974}
975
976/* If ENTRY is already in the table, remove it and return the just-deleted
977 data (the user may want to deallocate its storage). If ENTRY is not in the
978 table, don't modify the table and return NULL. */
979
980void *
981hash_delete (Hash_table *table, const void *entry)
982{
983 void *data;
984 struct hash_entry *bucket;
985
986 data = hash_find_entry (table, entry, &bucket, true);
987 if (!data)
988 return NULL;
989
990 table->n_entries--;
991 if (!bucket->data)
992 {
993 table->n_buckets_used--;
994
995 /* If the shrink threshold of the buckets in use has been reached,
996 rehash into a smaller table. */
997
998 if (table->n_buckets_used
999 < table->tuning->shrink_threshold * table->n_buckets)
1000 {
1001 /* Check more fully, before starting real work. If tuning arguments
1002 became invalid, the second check will rely on proper defaults. */
1003 check_tuning (table);
1004 if (table->n_buckets_used
1005 < table->tuning->shrink_threshold * table->n_buckets)
1006 {
1007 const Hash_tuning *tuning = table->tuning;
1008 size_t candidate =
1009 (tuning->is_n_buckets
1010 ? table->n_buckets * tuning->shrink_factor
1011 : (table->n_buckets * tuning->shrink_factor
1012 * tuning->growth_threshold));
1013
1014 hash_rehash (table, candidate);
1015 }
1016 }
1017 }
1018
1019 return data;
1020}
1021
1022/* Testing. */
1023
1024#if TESTING
1025
1026void
1027hash_print (const Hash_table *table)
1028{
1029 struct hash_entry const *bucket;
1030
1031 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
1032 {
1033 struct hash_entry *cursor;
1034
1035 if (bucket)
1036 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1037
1038 for (cursor = bucket; cursor; cursor = cursor->next)
1039 {
1040 char const *s = cursor->data;
1041 /* FIXME */
1042 if (s)
1043 printf (" %s\n", s);
1044 }
1045 }
1046}
1047
1048#endif /* TESTING */
Note: See TracBrowser for help on using the repository browser.