1 | /* hash - hashing table processing.
|
---|
2 |
|
---|
3 | Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006 Free
|
---|
4 | Software Foundation, Inc.
|
---|
5 |
|
---|
6 | Written by Jim Meyering, 1992.
|
---|
7 |
|
---|
8 | This program is free software; you can redistribute it and/or modify
|
---|
9 | it under the terms of the GNU General Public License as published by
|
---|
10 | the Free Software Foundation; either version 2, or (at your option)
|
---|
11 | any later version.
|
---|
12 |
|
---|
13 | This program is distributed in the hope that it will be useful,
|
---|
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
16 | GNU General Public License for more details.
|
---|
17 |
|
---|
18 | You should have received a copy of the GNU General Public License
|
---|
19 | along with this program; if not, write to the Free Software Foundation,
|
---|
20 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
---|
21 |
|
---|
22 | /* A generic hash table package. */
|
---|
23 |
|
---|
24 | /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
|
---|
25 | of malloc. If you change USE_OBSTACK, you have to recompile! */
|
---|
26 |
|
---|
27 | #include <config.h>
|
---|
28 |
|
---|
29 | #include "hash.h"
|
---|
30 | #include "xalloc.h"
|
---|
31 |
|
---|
32 | #include <limits.h>
|
---|
33 | #include <stdio.h>
|
---|
34 | #include <stdlib.h>
|
---|
35 |
|
---|
36 | #if USE_OBSTACK
|
---|
37 | # include "obstack.h"
|
---|
38 | # ifndef obstack_chunk_alloc
|
---|
39 | # define obstack_chunk_alloc malloc
|
---|
40 | # endif
|
---|
41 | # ifndef obstack_chunk_free
|
---|
42 | # define obstack_chunk_free free
|
---|
43 | # endif
|
---|
44 | #endif
|
---|
45 |
|
---|
46 | #ifndef SIZE_MAX
|
---|
47 | # define SIZE_MAX ((size_t) -1)
|
---|
48 | #endif
|
---|
49 |
|
---|
50 | struct hash_table
|
---|
51 | {
|
---|
52 | /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
|
---|
53 | for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
|
---|
54 | are not empty, there are N_ENTRIES active entries in the table. */
|
---|
55 | struct hash_entry *bucket;
|
---|
56 | struct hash_entry const *bucket_limit;
|
---|
57 | size_t n_buckets;
|
---|
58 | size_t n_buckets_used;
|
---|
59 | size_t n_entries;
|
---|
60 |
|
---|
61 | /* Tuning arguments, kept in a physicaly separate structure. */
|
---|
62 | const Hash_tuning *tuning;
|
---|
63 |
|
---|
64 | /* Three functions are given to `hash_initialize', see the documentation
|
---|
65 | block for this function. In a word, HASHER randomizes a user entry
|
---|
66 | into a number up from 0 up to some maximum minus 1; COMPARATOR returns
|
---|
67 | true if two user entries compare equally; and DATA_FREER is the cleanup
|
---|
68 | function for a user entry. */
|
---|
69 | Hash_hasher hasher;
|
---|
70 | Hash_comparator comparator;
|
---|
71 | Hash_data_freer data_freer;
|
---|
72 |
|
---|
73 | /* A linked list of freed struct hash_entry structs. */
|
---|
74 | struct hash_entry *free_entry_list;
|
---|
75 |
|
---|
76 | #if USE_OBSTACK
|
---|
77 | /* Whenever obstacks are used, it is possible to allocate all overflowed
|
---|
78 | entries into a single stack, so they all can be freed in a single
|
---|
79 | operation. It is not clear if the speedup is worth the trouble. */
|
---|
80 | struct obstack entry_stack;
|
---|
81 | #endif
|
---|
82 | };
|
---|
83 |
|
---|
84 | /* A hash table contains many internal entries, each holding a pointer to
|
---|
85 | some user provided data (also called a user entry). An entry indistinctly
|
---|
86 | refers to both the internal entry and its associated user entry. A user
|
---|
87 | entry contents may be hashed by a randomization function (the hashing
|
---|
88 | function, or just `hasher' for short) into a number (or `slot') between 0
|
---|
89 | and the current table size. At each slot position in the hash table,
|
---|
90 | starts a linked chain of entries for which the user data all hash to this
|
---|
91 | slot. A bucket is the collection of all entries hashing to the same slot.
|
---|
92 |
|
---|
93 | A good `hasher' function will distribute entries rather evenly in buckets.
|
---|
94 | In the ideal case, the length of each bucket is roughly the number of
|
---|
95 | entries divided by the table size. Finding the slot for a data is usually
|
---|
96 | done in constant time by the `hasher', and the later finding of a precise
|
---|
97 | entry is linear in time with the size of the bucket. Consequently, a
|
---|
98 | larger hash table size (that is, a larger number of buckets) is prone to
|
---|
99 | yielding shorter chains, *given* the `hasher' function behaves properly.
|
---|
100 |
|
---|
101 | Long buckets slow down the lookup algorithm. One might use big hash table
|
---|
102 | sizes in hope to reduce the average length of buckets, but this might
|
---|
103 | become inordinate, as unused slots in the hash table take some space. The
|
---|
104 | best bet is to make sure you are using a good `hasher' function (beware
|
---|
105 | that those are not that easy to write! :-), and to use a table size
|
---|
106 | larger than the actual number of entries. */
|
---|
107 |
|
---|
108 | /* If an insertion makes the ratio of nonempty buckets to table size larger
|
---|
109 | than the growth threshold (a number between 0.0 and 1.0), then increase
|
---|
110 | the table size by multiplying by the growth factor (a number greater than
|
---|
111 | 1.0). The growth threshold defaults to 0.8, and the growth factor
|
---|
112 | defaults to 1.414, meaning that the table will have doubled its size
|
---|
113 | every second time 80% of the buckets get used. */
|
---|
114 | #define DEFAULT_GROWTH_THRESHOLD 0.8
|
---|
115 | #define DEFAULT_GROWTH_FACTOR 1.414
|
---|
116 |
|
---|
117 | /* If a deletion empties a bucket and causes the ratio of used buckets to
|
---|
118 | table size to become smaller than the shrink threshold (a number between
|
---|
119 | 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
|
---|
120 | number greater than the shrink threshold but smaller than 1.0). The shrink
|
---|
121 | threshold and factor default to 0.0 and 1.0, meaning that the table never
|
---|
122 | shrinks. */
|
---|
123 | #define DEFAULT_SHRINK_THRESHOLD 0.0
|
---|
124 | #define DEFAULT_SHRINK_FACTOR 1.0
|
---|
125 |
|
---|
126 | /* Use this to initialize or reset a TUNING structure to
|
---|
127 | some sensible values. */
|
---|
128 | static const Hash_tuning default_tuning =
|
---|
129 | {
|
---|
130 | DEFAULT_SHRINK_THRESHOLD,
|
---|
131 | DEFAULT_SHRINK_FACTOR,
|
---|
132 | DEFAULT_GROWTH_THRESHOLD,
|
---|
133 | DEFAULT_GROWTH_FACTOR,
|
---|
134 | false
|
---|
135 | };
|
---|
136 |
|
---|
137 | /* Information and lookup. */
|
---|
138 |
|
---|
139 | /* The following few functions provide information about the overall hash
|
---|
140 | table organization: the number of entries, number of buckets and maximum
|
---|
141 | length of buckets. */
|
---|
142 |
|
---|
143 | /* Return the number of buckets in the hash table. The table size, the total
|
---|
144 | number of buckets (used plus unused), or the maximum number of slots, are
|
---|
145 | the same quantity. */
|
---|
146 |
|
---|
147 | size_t
|
---|
148 | hash_get_n_buckets (const Hash_table *table)
|
---|
149 | {
|
---|
150 | return table->n_buckets;
|
---|
151 | }
|
---|
152 |
|
---|
153 | /* Return the number of slots in use (non-empty buckets). */
|
---|
154 |
|
---|
155 | size_t
|
---|
156 | hash_get_n_buckets_used (const Hash_table *table)
|
---|
157 | {
|
---|
158 | return table->n_buckets_used;
|
---|
159 | }
|
---|
160 |
|
---|
161 | /* Return the number of active entries. */
|
---|
162 |
|
---|
163 | size_t
|
---|
164 | hash_get_n_entries (const Hash_table *table)
|
---|
165 | {
|
---|
166 | return table->n_entries;
|
---|
167 | }
|
---|
168 |
|
---|
169 | /* Return the length of the longest chain (bucket). */
|
---|
170 |
|
---|
171 | size_t
|
---|
172 | hash_get_max_bucket_length (const Hash_table *table)
|
---|
173 | {
|
---|
174 | struct hash_entry const *bucket;
|
---|
175 | size_t max_bucket_length = 0;
|
---|
176 |
|
---|
177 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
178 | {
|
---|
179 | if (bucket->data)
|
---|
180 | {
|
---|
181 | struct hash_entry const *cursor = bucket;
|
---|
182 | size_t bucket_length = 1;
|
---|
183 |
|
---|
184 | while (cursor = cursor->next, cursor)
|
---|
185 | bucket_length++;
|
---|
186 |
|
---|
187 | if (bucket_length > max_bucket_length)
|
---|
188 | max_bucket_length = bucket_length;
|
---|
189 | }
|
---|
190 | }
|
---|
191 |
|
---|
192 | return max_bucket_length;
|
---|
193 | }
|
---|
194 |
|
---|
195 | /* Do a mild validation of a hash table, by traversing it and checking two
|
---|
196 | statistics. */
|
---|
197 |
|
---|
198 | bool
|
---|
199 | hash_table_ok (const Hash_table *table)
|
---|
200 | {
|
---|
201 | struct hash_entry const *bucket;
|
---|
202 | size_t n_buckets_used = 0;
|
---|
203 | size_t n_entries = 0;
|
---|
204 |
|
---|
205 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
206 | {
|
---|
207 | if (bucket->data)
|
---|
208 | {
|
---|
209 | struct hash_entry const *cursor = bucket;
|
---|
210 |
|
---|
211 | /* Count bucket head. */
|
---|
212 | n_buckets_used++;
|
---|
213 | n_entries++;
|
---|
214 |
|
---|
215 | /* Count bucket overflow. */
|
---|
216 | while (cursor = cursor->next, cursor)
|
---|
217 | n_entries++;
|
---|
218 | }
|
---|
219 | }
|
---|
220 |
|
---|
221 | if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
|
---|
222 | return true;
|
---|
223 |
|
---|
224 | return false;
|
---|
225 | }
|
---|
226 |
|
---|
227 | void
|
---|
228 | hash_print_statistics (const Hash_table *table, FILE *stream)
|
---|
229 | {
|
---|
230 | size_t n_entries = hash_get_n_entries (table);
|
---|
231 | size_t n_buckets = hash_get_n_buckets (table);
|
---|
232 | size_t n_buckets_used = hash_get_n_buckets_used (table);
|
---|
233 | size_t max_bucket_length = hash_get_max_bucket_length (table);
|
---|
234 |
|
---|
235 | fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
|
---|
236 | fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
|
---|
237 | fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
|
---|
238 | (unsigned long int) n_buckets_used,
|
---|
239 | (100.0 * n_buckets_used) / n_buckets);
|
---|
240 | fprintf (stream, "max bucket length: %lu\n",
|
---|
241 | (unsigned long int) max_bucket_length);
|
---|
242 | }
|
---|
243 |
|
---|
244 | /* If ENTRY matches an entry already in the hash table, return the
|
---|
245 | entry from the table. Otherwise, return NULL. */
|
---|
246 |
|
---|
247 | void *
|
---|
248 | hash_lookup (const Hash_table *table, const void *entry)
|
---|
249 | {
|
---|
250 | struct hash_entry const *bucket
|
---|
251 | = table->bucket + table->hasher (entry, table->n_buckets);
|
---|
252 | struct hash_entry const *cursor;
|
---|
253 |
|
---|
254 | if (! (bucket < table->bucket_limit))
|
---|
255 | abort ();
|
---|
256 |
|
---|
257 | if (bucket->data == NULL)
|
---|
258 | return NULL;
|
---|
259 |
|
---|
260 | for (cursor = bucket; cursor; cursor = cursor->next)
|
---|
261 | if (table->comparator (entry, cursor->data))
|
---|
262 | return cursor->data;
|
---|
263 |
|
---|
264 | return NULL;
|
---|
265 | }
|
---|
266 |
|
---|
267 | /* Walking. */
|
---|
268 |
|
---|
269 | /* The functions in this page traverse the hash table and process the
|
---|
270 | contained entries. For the traversal to work properly, the hash table
|
---|
271 | should not be resized nor modified while any particular entry is being
|
---|
272 | processed. In particular, entries should not be added or removed. */
|
---|
273 |
|
---|
274 | /* Return the first data in the table, or NULL if the table is empty. */
|
---|
275 |
|
---|
276 | void *
|
---|
277 | hash_get_first (const Hash_table *table)
|
---|
278 | {
|
---|
279 | struct hash_entry const *bucket;
|
---|
280 |
|
---|
281 | if (table->n_entries == 0)
|
---|
282 | return NULL;
|
---|
283 |
|
---|
284 | for (bucket = table->bucket; ; bucket++)
|
---|
285 | if (! (bucket < table->bucket_limit))
|
---|
286 | abort ();
|
---|
287 | else if (bucket->data)
|
---|
288 | return bucket->data;
|
---|
289 | }
|
---|
290 |
|
---|
291 | /* Return the user data for the entry following ENTRY, where ENTRY has been
|
---|
292 | returned by a previous call to either `hash_get_first' or `hash_get_next'.
|
---|
293 | Return NULL if there are no more entries. */
|
---|
294 |
|
---|
295 | void *
|
---|
296 | hash_get_next (const Hash_table *table, const void *entry)
|
---|
297 | {
|
---|
298 | struct hash_entry const *bucket
|
---|
299 | = table->bucket + table->hasher (entry, table->n_buckets);
|
---|
300 | struct hash_entry const *cursor;
|
---|
301 |
|
---|
302 | if (! (bucket < table->bucket_limit))
|
---|
303 | abort ();
|
---|
304 |
|
---|
305 | /* Find next entry in the same bucket. */
|
---|
306 | for (cursor = bucket; cursor; cursor = cursor->next)
|
---|
307 | if (cursor->data == entry && cursor->next)
|
---|
308 | return cursor->next->data;
|
---|
309 |
|
---|
310 | /* Find first entry in any subsequent bucket. */
|
---|
311 | while (++bucket < table->bucket_limit)
|
---|
312 | if (bucket->data)
|
---|
313 | return bucket->data;
|
---|
314 |
|
---|
315 | /* None found. */
|
---|
316 | return NULL;
|
---|
317 | }
|
---|
318 |
|
---|
319 | /* Fill BUFFER with pointers to active user entries in the hash table, then
|
---|
320 | return the number of pointers copied. Do not copy more than BUFFER_SIZE
|
---|
321 | pointers. */
|
---|
322 |
|
---|
323 | size_t
|
---|
324 | hash_get_entries (const Hash_table *table, void **buffer,
|
---|
325 | size_t buffer_size)
|
---|
326 | {
|
---|
327 | size_t counter = 0;
|
---|
328 | struct hash_entry const *bucket;
|
---|
329 | struct hash_entry const *cursor;
|
---|
330 |
|
---|
331 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
332 | {
|
---|
333 | if (bucket->data)
|
---|
334 | {
|
---|
335 | for (cursor = bucket; cursor; cursor = cursor->next)
|
---|
336 | {
|
---|
337 | if (counter >= buffer_size)
|
---|
338 | return counter;
|
---|
339 | buffer[counter++] = cursor->data;
|
---|
340 | }
|
---|
341 | }
|
---|
342 | }
|
---|
343 |
|
---|
344 | return counter;
|
---|
345 | }
|
---|
346 |
|
---|
347 | /* Call a PROCESSOR function for each entry of a hash table, and return the
|
---|
348 | number of entries for which the processor function returned success. A
|
---|
349 | pointer to some PROCESSOR_DATA which will be made available to each call to
|
---|
350 | the processor function. The PROCESSOR accepts two arguments: the first is
|
---|
351 | the user entry being walked into, the second is the value of PROCESSOR_DATA
|
---|
352 | as received. The walking continue for as long as the PROCESSOR function
|
---|
353 | returns nonzero. When it returns zero, the walking is interrupted. */
|
---|
354 |
|
---|
355 | size_t
|
---|
356 | hash_do_for_each (const Hash_table *table, Hash_processor processor,
|
---|
357 | void *processor_data)
|
---|
358 | {
|
---|
359 | size_t counter = 0;
|
---|
360 | struct hash_entry const *bucket;
|
---|
361 | struct hash_entry const *cursor;
|
---|
362 |
|
---|
363 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
364 | {
|
---|
365 | if (bucket->data)
|
---|
366 | {
|
---|
367 | for (cursor = bucket; cursor; cursor = cursor->next)
|
---|
368 | {
|
---|
369 | if (!(*processor) (cursor->data, processor_data))
|
---|
370 | return counter;
|
---|
371 | counter++;
|
---|
372 | }
|
---|
373 | }
|
---|
374 | }
|
---|
375 |
|
---|
376 | return counter;
|
---|
377 | }
|
---|
378 |
|
---|
379 | /* Allocation and clean-up. */
|
---|
380 |
|
---|
381 | /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
|
---|
382 | This is a convenience routine for constructing other hashing functions. */
|
---|
383 |
|
---|
384 | #if USE_DIFF_HASH
|
---|
385 |
|
---|
386 | /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
|
---|
387 | B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
|
---|
388 | Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
|
---|
389 | algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
|
---|
390 | may not be good for your application." */
|
---|
391 |
|
---|
392 | size_t
|
---|
393 | hash_string (const char *string, size_t n_buckets)
|
---|
394 | {
|
---|
395 | # define ROTATE_LEFT(Value, Shift) \
|
---|
396 | ((Value) << (Shift) | (Value) >> ((sizeof (size_t) * CHAR_BIT) - (Shift)))
|
---|
397 | # define HASH_ONE_CHAR(Value, Byte) \
|
---|
398 | ((Byte) + ROTATE_LEFT (Value, 7))
|
---|
399 |
|
---|
400 | size_t value = 0;
|
---|
401 | unsigned char ch;
|
---|
402 |
|
---|
403 | for (; (ch = *string); string++)
|
---|
404 | value = HASH_ONE_CHAR (value, ch);
|
---|
405 | return value % n_buckets;
|
---|
406 |
|
---|
407 | # undef ROTATE_LEFT
|
---|
408 | # undef HASH_ONE_CHAR
|
---|
409 | }
|
---|
410 |
|
---|
411 | #else /* not USE_DIFF_HASH */
|
---|
412 |
|
---|
413 | /* This one comes from `recode', and performs a bit better than the above as
|
---|
414 | per a few experiments. It is inspired from a hashing routine found in the
|
---|
415 | very old Cyber `snoop', itself written in typical Greg Mansfield style.
|
---|
416 | (By the way, what happened to this excellent man? Is he still alive?) */
|
---|
417 |
|
---|
418 | size_t
|
---|
419 | hash_string (const char *string, size_t n_buckets)
|
---|
420 | {
|
---|
421 | size_t value = 0;
|
---|
422 | unsigned char ch;
|
---|
423 |
|
---|
424 | for (; (ch = *string); string++)
|
---|
425 | value = (value * 31 + ch) % n_buckets;
|
---|
426 | return value;
|
---|
427 | }
|
---|
428 |
|
---|
429 | #endif /* not USE_DIFF_HASH */
|
---|
430 |
|
---|
431 | /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
|
---|
432 | number at least equal to 11. */
|
---|
433 |
|
---|
434 | static bool
|
---|
435 | is_prime (size_t candidate)
|
---|
436 | {
|
---|
437 | size_t divisor = 3;
|
---|
438 | size_t square = divisor * divisor;
|
---|
439 |
|
---|
440 | while (square < candidate && (candidate % divisor))
|
---|
441 | {
|
---|
442 | divisor++;
|
---|
443 | square += 4 * divisor;
|
---|
444 | divisor++;
|
---|
445 | }
|
---|
446 |
|
---|
447 | return (candidate % divisor ? true : false);
|
---|
448 | }
|
---|
449 |
|
---|
450 | /* Round a given CANDIDATE number up to the nearest prime, and return that
|
---|
451 | prime. Primes lower than 10 are merely skipped. */
|
---|
452 |
|
---|
453 | static size_t
|
---|
454 | next_prime (size_t candidate)
|
---|
455 | {
|
---|
456 | /* Skip small primes. */
|
---|
457 | if (candidate < 10)
|
---|
458 | candidate = 10;
|
---|
459 |
|
---|
460 | /* Make it definitely odd. */
|
---|
461 | candidate |= 1;
|
---|
462 |
|
---|
463 | while (!is_prime (candidate))
|
---|
464 | candidate += 2;
|
---|
465 |
|
---|
466 | return candidate;
|
---|
467 | }
|
---|
468 |
|
---|
469 | void
|
---|
470 | hash_reset_tuning (Hash_tuning *tuning)
|
---|
471 | {
|
---|
472 | *tuning = default_tuning;
|
---|
473 | }
|
---|
474 |
|
---|
475 | /* For the given hash TABLE, check the user supplied tuning structure for
|
---|
476 | reasonable values, and return true if there is no gross error with it.
|
---|
477 | Otherwise, definitively reset the TUNING field to some acceptable default
|
---|
478 | in the hash table (that is, the user loses the right of further modifying
|
---|
479 | tuning arguments), and return false. */
|
---|
480 |
|
---|
481 | static bool
|
---|
482 | check_tuning (Hash_table *table)
|
---|
483 | {
|
---|
484 | const Hash_tuning *tuning = table->tuning;
|
---|
485 |
|
---|
486 | /* Be a bit stricter than mathematics would require, so that
|
---|
487 | rounding errors in size calculations do not cause allocations to
|
---|
488 | fail to grow or shrink as they should. The smallest allocation
|
---|
489 | is 11 (due to next_prime's algorithm), so an epsilon of 0.1
|
---|
490 | should be good enough. */
|
---|
491 | float epsilon = 0.1f;
|
---|
492 |
|
---|
493 | if (epsilon < tuning->growth_threshold
|
---|
494 | && tuning->growth_threshold < 1 - epsilon
|
---|
495 | && 1 + epsilon < tuning->growth_factor
|
---|
496 | && 0 <= tuning->shrink_threshold
|
---|
497 | && tuning->shrink_threshold + epsilon < tuning->shrink_factor
|
---|
498 | && tuning->shrink_factor <= 1
|
---|
499 | && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
|
---|
500 | return true;
|
---|
501 |
|
---|
502 | table->tuning = &default_tuning;
|
---|
503 | return false;
|
---|
504 | }
|
---|
505 |
|
---|
506 | /* Allocate and return a new hash table, or NULL upon failure. The initial
|
---|
507 | number of buckets is automatically selected so as to _guarantee_ that you
|
---|
508 | may insert at least CANDIDATE different user entries before any growth of
|
---|
509 | the hash table size occurs. So, if have a reasonably tight a-priori upper
|
---|
510 | bound on the number of entries you intend to insert in the hash table, you
|
---|
511 | may save some table memory and insertion time, by specifying it here. If
|
---|
512 | the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
|
---|
513 | argument has its meaning changed to the wanted number of buckets.
|
---|
514 |
|
---|
515 | TUNING points to a structure of user-supplied values, in case some fine
|
---|
516 | tuning is wanted over the default behavior of the hasher. If TUNING is
|
---|
517 | NULL, the default tuning parameters are used instead.
|
---|
518 |
|
---|
519 | The user-supplied HASHER function should be provided. It accepts two
|
---|
520 | arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
|
---|
521 | slot number for that entry which should be in the range 0..TABLE_SIZE-1.
|
---|
522 | This slot number is then returned.
|
---|
523 |
|
---|
524 | The user-supplied COMPARATOR function should be provided. It accepts two
|
---|
525 | arguments pointing to user data, it then returns true for a pair of entries
|
---|
526 | that compare equal, or false otherwise. This function is internally called
|
---|
527 | on entries which are already known to hash to the same bucket index.
|
---|
528 |
|
---|
529 | The user-supplied DATA_FREER function, when not NULL, may be later called
|
---|
530 | with the user data as an argument, just before the entry containing the
|
---|
531 | data gets freed. This happens from within `hash_free' or `hash_clear'.
|
---|
532 | You should specify this function only if you want these functions to free
|
---|
533 | all of your `data' data. This is typically the case when your data is
|
---|
534 | simply an auxiliary struct that you have malloc'd to aggregate several
|
---|
535 | values. */
|
---|
536 |
|
---|
537 | Hash_table *
|
---|
538 | hash_initialize (size_t candidate, const Hash_tuning *tuning,
|
---|
539 | Hash_hasher hasher, Hash_comparator comparator,
|
---|
540 | Hash_data_freer data_freer)
|
---|
541 | {
|
---|
542 | Hash_table *table;
|
---|
543 |
|
---|
544 | if (hasher == NULL || comparator == NULL)
|
---|
545 | return NULL;
|
---|
546 |
|
---|
547 | table = malloc (sizeof *table);
|
---|
548 | if (table == NULL)
|
---|
549 | return NULL;
|
---|
550 |
|
---|
551 | if (!tuning)
|
---|
552 | tuning = &default_tuning;
|
---|
553 | table->tuning = tuning;
|
---|
554 | if (!check_tuning (table))
|
---|
555 | {
|
---|
556 | /* Fail if the tuning options are invalid. This is the only occasion
|
---|
557 | when the user gets some feedback about it. Once the table is created,
|
---|
558 | if the user provides invalid tuning options, we silently revert to
|
---|
559 | using the defaults, and ignore further request to change the tuning
|
---|
560 | options. */
|
---|
561 | goto fail;
|
---|
562 | }
|
---|
563 |
|
---|
564 | if (!tuning->is_n_buckets)
|
---|
565 | {
|
---|
566 | float new_candidate = candidate / tuning->growth_threshold;
|
---|
567 | if (SIZE_MAX <= new_candidate)
|
---|
568 | goto fail;
|
---|
569 | candidate = new_candidate;
|
---|
570 | }
|
---|
571 |
|
---|
572 | if (xalloc_oversized (candidate, sizeof *table->bucket))
|
---|
573 | goto fail;
|
---|
574 | table->n_buckets = next_prime (candidate);
|
---|
575 | if (xalloc_oversized (table->n_buckets, sizeof *table->bucket))
|
---|
576 | goto fail;
|
---|
577 |
|
---|
578 | table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
|
---|
579 | table->bucket_limit = table->bucket + table->n_buckets;
|
---|
580 | table->n_buckets_used = 0;
|
---|
581 | table->n_entries = 0;
|
---|
582 |
|
---|
583 | table->hasher = hasher;
|
---|
584 | table->comparator = comparator;
|
---|
585 | table->data_freer = data_freer;
|
---|
586 |
|
---|
587 | table->free_entry_list = NULL;
|
---|
588 | #if USE_OBSTACK
|
---|
589 | obstack_init (&table->entry_stack);
|
---|
590 | #endif
|
---|
591 | return table;
|
---|
592 |
|
---|
593 | fail:
|
---|
594 | free (table);
|
---|
595 | return NULL;
|
---|
596 | }
|
---|
597 |
|
---|
598 | /* Make all buckets empty, placing any chained entries on the free list.
|
---|
599 | Apply the user-specified function data_freer (if any) to the datas of any
|
---|
600 | affected entries. */
|
---|
601 |
|
---|
602 | void
|
---|
603 | hash_clear (Hash_table *table)
|
---|
604 | {
|
---|
605 | struct hash_entry *bucket;
|
---|
606 |
|
---|
607 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
608 | {
|
---|
609 | if (bucket->data)
|
---|
610 | {
|
---|
611 | struct hash_entry *cursor;
|
---|
612 | struct hash_entry *next;
|
---|
613 |
|
---|
614 | /* Free the bucket overflow. */
|
---|
615 | for (cursor = bucket->next; cursor; cursor = next)
|
---|
616 | {
|
---|
617 | if (table->data_freer)
|
---|
618 | (*table->data_freer) (cursor->data);
|
---|
619 | cursor->data = NULL;
|
---|
620 |
|
---|
621 | next = cursor->next;
|
---|
622 | /* Relinking is done one entry at a time, as it is to be expected
|
---|
623 | that overflows are either rare or short. */
|
---|
624 | cursor->next = table->free_entry_list;
|
---|
625 | table->free_entry_list = cursor;
|
---|
626 | }
|
---|
627 |
|
---|
628 | /* Free the bucket head. */
|
---|
629 | if (table->data_freer)
|
---|
630 | (*table->data_freer) (bucket->data);
|
---|
631 | bucket->data = NULL;
|
---|
632 | bucket->next = NULL;
|
---|
633 | }
|
---|
634 | }
|
---|
635 |
|
---|
636 | table->n_buckets_used = 0;
|
---|
637 | table->n_entries = 0;
|
---|
638 | }
|
---|
639 |
|
---|
640 | /* Reclaim all storage associated with a hash table. If a data_freer
|
---|
641 | function has been supplied by the user when the hash table was created,
|
---|
642 | this function applies it to the data of each entry before freeing that
|
---|
643 | entry. */
|
---|
644 |
|
---|
645 | void
|
---|
646 | hash_free (Hash_table *table)
|
---|
647 | {
|
---|
648 | struct hash_entry *bucket;
|
---|
649 | struct hash_entry *cursor;
|
---|
650 | struct hash_entry *next;
|
---|
651 |
|
---|
652 | /* Call the user data_freer function. */
|
---|
653 | if (table->data_freer && table->n_entries)
|
---|
654 | {
|
---|
655 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
656 | {
|
---|
657 | if (bucket->data)
|
---|
658 | {
|
---|
659 | for (cursor = bucket; cursor; cursor = cursor->next)
|
---|
660 | {
|
---|
661 | (*table->data_freer) (cursor->data);
|
---|
662 | }
|
---|
663 | }
|
---|
664 | }
|
---|
665 | }
|
---|
666 |
|
---|
667 | #if USE_OBSTACK
|
---|
668 |
|
---|
669 | obstack_free (&table->entry_stack, NULL);
|
---|
670 |
|
---|
671 | #else
|
---|
672 |
|
---|
673 | /* Free all bucket overflowed entries. */
|
---|
674 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
675 | {
|
---|
676 | for (cursor = bucket->next; cursor; cursor = next)
|
---|
677 | {
|
---|
678 | next = cursor->next;
|
---|
679 | free (cursor);
|
---|
680 | }
|
---|
681 | }
|
---|
682 |
|
---|
683 | /* Also reclaim the internal list of previously freed entries. */
|
---|
684 | for (cursor = table->free_entry_list; cursor; cursor = next)
|
---|
685 | {
|
---|
686 | next = cursor->next;
|
---|
687 | free (cursor);
|
---|
688 | }
|
---|
689 |
|
---|
690 | #endif
|
---|
691 |
|
---|
692 | /* Free the remainder of the hash table structure. */
|
---|
693 | free (table->bucket);
|
---|
694 | free (table);
|
---|
695 | }
|
---|
696 |
|
---|
697 | /* Insertion and deletion. */
|
---|
698 |
|
---|
699 | /* Get a new hash entry for a bucket overflow, possibly by reclying a
|
---|
700 | previously freed one. If this is not possible, allocate a new one. */
|
---|
701 |
|
---|
702 | static struct hash_entry *
|
---|
703 | allocate_entry (Hash_table *table)
|
---|
704 | {
|
---|
705 | struct hash_entry *new;
|
---|
706 |
|
---|
707 | if (table->free_entry_list)
|
---|
708 | {
|
---|
709 | new = table->free_entry_list;
|
---|
710 | table->free_entry_list = new->next;
|
---|
711 | }
|
---|
712 | else
|
---|
713 | {
|
---|
714 | #if USE_OBSTACK
|
---|
715 | new = obstack_alloc (&table->entry_stack, sizeof *new);
|
---|
716 | #else
|
---|
717 | new = malloc (sizeof *new);
|
---|
718 | #endif
|
---|
719 | }
|
---|
720 |
|
---|
721 | return new;
|
---|
722 | }
|
---|
723 |
|
---|
724 | /* Free a hash entry which was part of some bucket overflow,
|
---|
725 | saving it for later recycling. */
|
---|
726 |
|
---|
727 | static void
|
---|
728 | free_entry (Hash_table *table, struct hash_entry *entry)
|
---|
729 | {
|
---|
730 | entry->data = NULL;
|
---|
731 | entry->next = table->free_entry_list;
|
---|
732 | table->free_entry_list = entry;
|
---|
733 | }
|
---|
734 |
|
---|
735 | /* This private function is used to help with insertion and deletion. When
|
---|
736 | ENTRY matches an entry in the table, return a pointer to the corresponding
|
---|
737 | user data and set *BUCKET_HEAD to the head of the selected bucket.
|
---|
738 | Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
|
---|
739 | the table, unlink the matching entry. */
|
---|
740 |
|
---|
741 | static void *
|
---|
742 | hash_find_entry (Hash_table *table, const void *entry,
|
---|
743 | struct hash_entry **bucket_head, bool delete)
|
---|
744 | {
|
---|
745 | struct hash_entry *bucket
|
---|
746 | = table->bucket + table->hasher (entry, table->n_buckets);
|
---|
747 | struct hash_entry *cursor;
|
---|
748 |
|
---|
749 | if (! (bucket < table->bucket_limit))
|
---|
750 | abort ();
|
---|
751 |
|
---|
752 | *bucket_head = bucket;
|
---|
753 |
|
---|
754 | /* Test for empty bucket. */
|
---|
755 | if (bucket->data == NULL)
|
---|
756 | return NULL;
|
---|
757 |
|
---|
758 | /* See if the entry is the first in the bucket. */
|
---|
759 | if ((*table->comparator) (entry, bucket->data))
|
---|
760 | {
|
---|
761 | void *data = bucket->data;
|
---|
762 |
|
---|
763 | if (delete)
|
---|
764 | {
|
---|
765 | if (bucket->next)
|
---|
766 | {
|
---|
767 | struct hash_entry *next = bucket->next;
|
---|
768 |
|
---|
769 | /* Bump the first overflow entry into the bucket head, then save
|
---|
770 | the previous first overflow entry for later recycling. */
|
---|
771 | *bucket = *next;
|
---|
772 | free_entry (table, next);
|
---|
773 | }
|
---|
774 | else
|
---|
775 | {
|
---|
776 | bucket->data = NULL;
|
---|
777 | }
|
---|
778 | }
|
---|
779 |
|
---|
780 | return data;
|
---|
781 | }
|
---|
782 |
|
---|
783 | /* Scan the bucket overflow. */
|
---|
784 | for (cursor = bucket; cursor->next; cursor = cursor->next)
|
---|
785 | {
|
---|
786 | if ((*table->comparator) (entry, cursor->next->data))
|
---|
787 | {
|
---|
788 | void *data = cursor->next->data;
|
---|
789 |
|
---|
790 | if (delete)
|
---|
791 | {
|
---|
792 | struct hash_entry *next = cursor->next;
|
---|
793 |
|
---|
794 | /* Unlink the entry to delete, then save the freed entry for later
|
---|
795 | recycling. */
|
---|
796 | cursor->next = next->next;
|
---|
797 | free_entry (table, next);
|
---|
798 | }
|
---|
799 |
|
---|
800 | return data;
|
---|
801 | }
|
---|
802 | }
|
---|
803 |
|
---|
804 | /* No entry found. */
|
---|
805 | return NULL;
|
---|
806 | }
|
---|
807 |
|
---|
808 | /* For an already existing hash table, change the number of buckets through
|
---|
809 | specifying CANDIDATE. The contents of the hash table are preserved. The
|
---|
810 | new number of buckets is automatically selected so as to _guarantee_ that
|
---|
811 | the table may receive at least CANDIDATE different user entries, including
|
---|
812 | those already in the table, before any other growth of the hash table size
|
---|
813 | occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
|
---|
814 | exact number of buckets desired. */
|
---|
815 |
|
---|
816 | bool
|
---|
817 | hash_rehash (Hash_table *table, size_t candidate)
|
---|
818 | {
|
---|
819 | Hash_table *new_table;
|
---|
820 | struct hash_entry *bucket;
|
---|
821 | struct hash_entry *cursor;
|
---|
822 | struct hash_entry *next;
|
---|
823 |
|
---|
824 | new_table = hash_initialize (candidate, table->tuning, table->hasher,
|
---|
825 | table->comparator, table->data_freer);
|
---|
826 | if (new_table == NULL)
|
---|
827 | return false;
|
---|
828 |
|
---|
829 | /* Merely reuse the extra old space into the new table. */
|
---|
830 | #if USE_OBSTACK
|
---|
831 | obstack_free (&new_table->entry_stack, NULL);
|
---|
832 | new_table->entry_stack = table->entry_stack;
|
---|
833 | #endif
|
---|
834 | new_table->free_entry_list = table->free_entry_list;
|
---|
835 |
|
---|
836 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
837 | if (bucket->data)
|
---|
838 | for (cursor = bucket; cursor; cursor = next)
|
---|
839 | {
|
---|
840 | void *data = cursor->data;
|
---|
841 | struct hash_entry *new_bucket
|
---|
842 | = (new_table->bucket
|
---|
843 | + new_table->hasher (data, new_table->n_buckets));
|
---|
844 |
|
---|
845 | if (! (new_bucket < new_table->bucket_limit))
|
---|
846 | abort ();
|
---|
847 |
|
---|
848 | next = cursor->next;
|
---|
849 |
|
---|
850 | if (new_bucket->data)
|
---|
851 | {
|
---|
852 | if (cursor == bucket)
|
---|
853 | {
|
---|
854 | /* Allocate or recycle an entry, when moving from a bucket
|
---|
855 | header into a bucket overflow. */
|
---|
856 | struct hash_entry *new_entry = allocate_entry (new_table);
|
---|
857 |
|
---|
858 | if (new_entry == NULL)
|
---|
859 | return false;
|
---|
860 |
|
---|
861 | new_entry->data = data;
|
---|
862 | new_entry->next = new_bucket->next;
|
---|
863 | new_bucket->next = new_entry;
|
---|
864 | }
|
---|
865 | else
|
---|
866 | {
|
---|
867 | /* Merely relink an existing entry, when moving from a
|
---|
868 | bucket overflow into a bucket overflow. */
|
---|
869 | cursor->next = new_bucket->next;
|
---|
870 | new_bucket->next = cursor;
|
---|
871 | }
|
---|
872 | }
|
---|
873 | else
|
---|
874 | {
|
---|
875 | /* Free an existing entry, when moving from a bucket
|
---|
876 | overflow into a bucket header. Also take care of the
|
---|
877 | simple case of moving from a bucket header into a bucket
|
---|
878 | header. */
|
---|
879 | new_bucket->data = data;
|
---|
880 | new_table->n_buckets_used++;
|
---|
881 | if (cursor != bucket)
|
---|
882 | free_entry (new_table, cursor);
|
---|
883 | }
|
---|
884 | }
|
---|
885 |
|
---|
886 | free (table->bucket);
|
---|
887 | table->bucket = new_table->bucket;
|
---|
888 | table->bucket_limit = new_table->bucket_limit;
|
---|
889 | table->n_buckets = new_table->n_buckets;
|
---|
890 | table->n_buckets_used = new_table->n_buckets_used;
|
---|
891 | table->free_entry_list = new_table->free_entry_list;
|
---|
892 | /* table->n_entries already holds its value. */
|
---|
893 | #if USE_OBSTACK
|
---|
894 | table->entry_stack = new_table->entry_stack;
|
---|
895 | #endif
|
---|
896 | free (new_table);
|
---|
897 |
|
---|
898 | return true;
|
---|
899 | }
|
---|
900 |
|
---|
901 | /* If ENTRY matches an entry already in the hash table, return the pointer
|
---|
902 | to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
|
---|
903 | Return NULL if the storage required for insertion cannot be allocated. */
|
---|
904 |
|
---|
905 | void *
|
---|
906 | hash_insert (Hash_table *table, const void *entry)
|
---|
907 | {
|
---|
908 | void *data;
|
---|
909 | struct hash_entry *bucket;
|
---|
910 |
|
---|
911 | /* The caller cannot insert a NULL entry. */
|
---|
912 | if (! entry)
|
---|
913 | abort ();
|
---|
914 |
|
---|
915 | /* If there's a matching entry already in the table, return that. */
|
---|
916 | if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
|
---|
917 | return data;
|
---|
918 |
|
---|
919 | /* ENTRY is not matched, it should be inserted. */
|
---|
920 |
|
---|
921 | if (bucket->data)
|
---|
922 | {
|
---|
923 | struct hash_entry *new_entry = allocate_entry (table);
|
---|
924 |
|
---|
925 | if (new_entry == NULL)
|
---|
926 | return NULL;
|
---|
927 |
|
---|
928 | /* Add ENTRY in the overflow of the bucket. */
|
---|
929 |
|
---|
930 | new_entry->data = (void *) entry;
|
---|
931 | new_entry->next = bucket->next;
|
---|
932 | bucket->next = new_entry;
|
---|
933 | table->n_entries++;
|
---|
934 | return (void *) entry;
|
---|
935 | }
|
---|
936 |
|
---|
937 | /* Add ENTRY right in the bucket head. */
|
---|
938 |
|
---|
939 | bucket->data = (void *) entry;
|
---|
940 | table->n_entries++;
|
---|
941 | table->n_buckets_used++;
|
---|
942 |
|
---|
943 | /* If the growth threshold of the buckets in use has been reached, increase
|
---|
944 | the table size and rehash. There's no point in checking the number of
|
---|
945 | entries: if the hashing function is ill-conditioned, rehashing is not
|
---|
946 | likely to improve it. */
|
---|
947 |
|
---|
948 | if (table->n_buckets_used
|
---|
949 | > table->tuning->growth_threshold * table->n_buckets)
|
---|
950 | {
|
---|
951 | /* Check more fully, before starting real work. If tuning arguments
|
---|
952 | became invalid, the second check will rely on proper defaults. */
|
---|
953 | check_tuning (table);
|
---|
954 | if (table->n_buckets_used
|
---|
955 | > table->tuning->growth_threshold * table->n_buckets)
|
---|
956 | {
|
---|
957 | const Hash_tuning *tuning = table->tuning;
|
---|
958 | float candidate =
|
---|
959 | (tuning->is_n_buckets
|
---|
960 | ? (table->n_buckets * tuning->growth_factor)
|
---|
961 | : (table->n_buckets * tuning->growth_factor
|
---|
962 | * tuning->growth_threshold));
|
---|
963 |
|
---|
964 | if (SIZE_MAX <= candidate)
|
---|
965 | return NULL;
|
---|
966 |
|
---|
967 | /* If the rehash fails, arrange to return NULL. */
|
---|
968 | if (!hash_rehash (table, candidate))
|
---|
969 | entry = NULL;
|
---|
970 | }
|
---|
971 | }
|
---|
972 |
|
---|
973 | return (void *) entry;
|
---|
974 | }
|
---|
975 |
|
---|
976 | /* If ENTRY is already in the table, remove it and return the just-deleted
|
---|
977 | data (the user may want to deallocate its storage). If ENTRY is not in the
|
---|
978 | table, don't modify the table and return NULL. */
|
---|
979 |
|
---|
980 | void *
|
---|
981 | hash_delete (Hash_table *table, const void *entry)
|
---|
982 | {
|
---|
983 | void *data;
|
---|
984 | struct hash_entry *bucket;
|
---|
985 |
|
---|
986 | data = hash_find_entry (table, entry, &bucket, true);
|
---|
987 | if (!data)
|
---|
988 | return NULL;
|
---|
989 |
|
---|
990 | table->n_entries--;
|
---|
991 | if (!bucket->data)
|
---|
992 | {
|
---|
993 | table->n_buckets_used--;
|
---|
994 |
|
---|
995 | /* If the shrink threshold of the buckets in use has been reached,
|
---|
996 | rehash into a smaller table. */
|
---|
997 |
|
---|
998 | if (table->n_buckets_used
|
---|
999 | < table->tuning->shrink_threshold * table->n_buckets)
|
---|
1000 | {
|
---|
1001 | /* Check more fully, before starting real work. If tuning arguments
|
---|
1002 | became invalid, the second check will rely on proper defaults. */
|
---|
1003 | check_tuning (table);
|
---|
1004 | if (table->n_buckets_used
|
---|
1005 | < table->tuning->shrink_threshold * table->n_buckets)
|
---|
1006 | {
|
---|
1007 | const Hash_tuning *tuning = table->tuning;
|
---|
1008 | size_t candidate =
|
---|
1009 | (tuning->is_n_buckets
|
---|
1010 | ? table->n_buckets * tuning->shrink_factor
|
---|
1011 | : (table->n_buckets * tuning->shrink_factor
|
---|
1012 | * tuning->growth_threshold));
|
---|
1013 |
|
---|
1014 | hash_rehash (table, candidate);
|
---|
1015 | }
|
---|
1016 | }
|
---|
1017 | }
|
---|
1018 |
|
---|
1019 | return data;
|
---|
1020 | }
|
---|
1021 |
|
---|
1022 | /* Testing. */
|
---|
1023 |
|
---|
1024 | #if TESTING
|
---|
1025 |
|
---|
1026 | void
|
---|
1027 | hash_print (const Hash_table *table)
|
---|
1028 | {
|
---|
1029 | struct hash_entry const *bucket;
|
---|
1030 |
|
---|
1031 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
---|
1032 | {
|
---|
1033 | struct hash_entry *cursor;
|
---|
1034 |
|
---|
1035 | if (bucket)
|
---|
1036 | printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
|
---|
1037 |
|
---|
1038 | for (cursor = bucket; cursor; cursor = cursor->next)
|
---|
1039 | {
|
---|
1040 | char const *s = cursor->data;
|
---|
1041 | /* FIXME */
|
---|
1042 | if (s)
|
---|
1043 | printf (" %s\n", s);
|
---|
1044 | }
|
---|
1045 | }
|
---|
1046 | }
|
---|
1047 |
|
---|
1048 | #endif /* TESTING */
|
---|