| 1 | /* hash - hashing table processing.
|
|---|
| 2 |
|
|---|
| 3 | Copyright (C) 1998-2004, 2006-2007, 2009-2021 Free Software Foundation, Inc.
|
|---|
| 4 |
|
|---|
| 5 | Written by Jim Meyering, 1992.
|
|---|
| 6 |
|
|---|
| 7 | This file is free software: you can redistribute it and/or modify
|
|---|
| 8 | it under the terms of the GNU Lesser General Public License as
|
|---|
| 9 | published by the Free Software Foundation; either version 2.1 of the
|
|---|
| 10 | License, or (at your option) any later version.
|
|---|
| 11 |
|
|---|
| 12 | This file is distributed in the hope that it will be useful,
|
|---|
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 15 | GNU Lesser General Public License for more details.
|
|---|
| 16 |
|
|---|
| 17 | You should have received a copy of the GNU Lesser General Public License
|
|---|
| 18 | along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
|---|
| 19 |
|
|---|
| 20 | /* A generic hash table package. */
|
|---|
| 21 |
|
|---|
| 22 | /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
|
|---|
| 23 | of malloc. If you change USE_OBSTACK, you have to recompile! */
|
|---|
| 24 |
|
|---|
| 25 | #include <config.h>
|
|---|
| 26 |
|
|---|
| 27 | #include "hash.h"
|
|---|
| 28 |
|
|---|
| 29 | #include "bitrotate.h"
|
|---|
| 30 | #include "xalloc-oversized.h"
|
|---|
| 31 |
|
|---|
| 32 | #include <stdint.h>
|
|---|
| 33 | #include <stdio.h>
|
|---|
| 34 | #include <stdlib.h>
|
|---|
| 35 |
|
|---|
| 36 | #if USE_OBSTACK
|
|---|
| 37 | # include "obstack.h"
|
|---|
| 38 | # ifndef obstack_chunk_alloc
|
|---|
| 39 | # define obstack_chunk_alloc malloc
|
|---|
| 40 | # endif
|
|---|
| 41 | # ifndef obstack_chunk_free
|
|---|
| 42 | # define obstack_chunk_free free
|
|---|
| 43 | # endif
|
|---|
| 44 | #endif
|
|---|
| 45 |
|
|---|
| 46 | struct hash_entry
|
|---|
| 47 | {
|
|---|
| 48 | void *data;
|
|---|
| 49 | struct hash_entry *next;
|
|---|
| 50 | };
|
|---|
| 51 |
|
|---|
| 52 | struct hash_table
|
|---|
| 53 | {
|
|---|
| 54 | /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
|
|---|
| 55 | for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
|
|---|
| 56 | are not empty, there are N_ENTRIES active entries in the table. */
|
|---|
| 57 | struct hash_entry *bucket;
|
|---|
| 58 | struct hash_entry const *bucket_limit;
|
|---|
| 59 | size_t n_buckets;
|
|---|
| 60 | size_t n_buckets_used;
|
|---|
| 61 | size_t n_entries;
|
|---|
| 62 |
|
|---|
| 63 | /* Tuning arguments, kept in a physically separate structure. */
|
|---|
| 64 | const Hash_tuning *tuning;
|
|---|
| 65 |
|
|---|
| 66 | /* Three functions are given to 'hash_initialize', see the documentation
|
|---|
| 67 | block for this function. In a word, HASHER randomizes a user entry
|
|---|
| 68 | into a number up from 0 up to some maximum minus 1; COMPARATOR returns
|
|---|
| 69 | true if two user entries compare equally; and DATA_FREER is the cleanup
|
|---|
| 70 | function for a user entry. */
|
|---|
| 71 | Hash_hasher hasher;
|
|---|
| 72 | Hash_comparator comparator;
|
|---|
| 73 | Hash_data_freer data_freer;
|
|---|
| 74 |
|
|---|
| 75 | /* A linked list of freed struct hash_entry structs. */
|
|---|
| 76 | struct hash_entry *free_entry_list;
|
|---|
| 77 |
|
|---|
| 78 | #if USE_OBSTACK
|
|---|
| 79 | /* Whenever obstacks are used, it is possible to allocate all overflowed
|
|---|
| 80 | entries into a single stack, so they all can be freed in a single
|
|---|
| 81 | operation. It is not clear if the speedup is worth the trouble. */
|
|---|
| 82 | struct obstack entry_stack;
|
|---|
| 83 | #endif
|
|---|
| 84 | };
|
|---|
| 85 |
|
|---|
| 86 | /* A hash table contains many internal entries, each holding a pointer to
|
|---|
| 87 | some user-provided data (also called a user entry). An entry indistinctly
|
|---|
| 88 | refers to both the internal entry and its associated user entry. A user
|
|---|
| 89 | entry contents may be hashed by a randomization function (the hashing
|
|---|
| 90 | function, or just "hasher" for short) into a number (or "slot") between 0
|
|---|
| 91 | and the current table size. At each slot position in the hash table,
|
|---|
| 92 | starts a linked chain of entries for which the user data all hash to this
|
|---|
| 93 | slot. A bucket is the collection of all entries hashing to the same slot.
|
|---|
| 94 |
|
|---|
| 95 | A good "hasher" function will distribute entries rather evenly in buckets.
|
|---|
| 96 | In the ideal case, the length of each bucket is roughly the number of
|
|---|
| 97 | entries divided by the table size. Finding the slot for a data is usually
|
|---|
| 98 | done in constant time by the "hasher", and the later finding of a precise
|
|---|
| 99 | entry is linear in time with the size of the bucket. Consequently, a
|
|---|
| 100 | larger hash table size (that is, a larger number of buckets) is prone to
|
|---|
| 101 | yielding shorter chains, *given* the "hasher" function behaves properly.
|
|---|
| 102 |
|
|---|
| 103 | Long buckets slow down the lookup algorithm. One might use big hash table
|
|---|
| 104 | sizes in hope to reduce the average length of buckets, but this might
|
|---|
| 105 | become inordinate, as unused slots in the hash table take some space. The
|
|---|
| 106 | best bet is to make sure you are using a good "hasher" function (beware
|
|---|
| 107 | that those are not that easy to write! :-), and to use a table size
|
|---|
| 108 | larger than the actual number of entries. */
|
|---|
| 109 |
|
|---|
| 110 | /* If an insertion makes the ratio of nonempty buckets to table size larger
|
|---|
| 111 | than the growth threshold (a number between 0.0 and 1.0), then increase
|
|---|
| 112 | the table size by multiplying by the growth factor (a number greater than
|
|---|
| 113 | 1.0). The growth threshold defaults to 0.8, and the growth factor
|
|---|
| 114 | defaults to 1.414, meaning that the table will have doubled its size
|
|---|
| 115 | every second time 80% of the buckets get used. */
|
|---|
| 116 | #define DEFAULT_GROWTH_THRESHOLD 0.8f
|
|---|
| 117 | #define DEFAULT_GROWTH_FACTOR 1.414f
|
|---|
| 118 |
|
|---|
| 119 | /* If a deletion empties a bucket and causes the ratio of used buckets to
|
|---|
| 120 | table size to become smaller than the shrink threshold (a number between
|
|---|
| 121 | 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
|
|---|
| 122 | number greater than the shrink threshold but smaller than 1.0). The shrink
|
|---|
| 123 | threshold and factor default to 0.0 and 1.0, meaning that the table never
|
|---|
| 124 | shrinks. */
|
|---|
| 125 | #define DEFAULT_SHRINK_THRESHOLD 0.0f
|
|---|
| 126 | #define DEFAULT_SHRINK_FACTOR 1.0f
|
|---|
| 127 |
|
|---|
| 128 | /* Use this to initialize or reset a TUNING structure to
|
|---|
| 129 | some sensible values. */
|
|---|
| 130 | static const Hash_tuning default_tuning =
|
|---|
| 131 | {
|
|---|
| 132 | DEFAULT_SHRINK_THRESHOLD,
|
|---|
| 133 | DEFAULT_SHRINK_FACTOR,
|
|---|
| 134 | DEFAULT_GROWTH_THRESHOLD,
|
|---|
| 135 | DEFAULT_GROWTH_FACTOR,
|
|---|
| 136 | false
|
|---|
| 137 | };
|
|---|
| 138 |
|
|---|
| 139 | /* Information and lookup. */
|
|---|
| 140 |
|
|---|
| 141 | size_t
|
|---|
| 142 | hash_get_n_buckets (const Hash_table *table)
|
|---|
| 143 | {
|
|---|
| 144 | return table->n_buckets;
|
|---|
| 145 | }
|
|---|
| 146 |
|
|---|
| 147 | size_t
|
|---|
| 148 | hash_get_n_buckets_used (const Hash_table *table)
|
|---|
| 149 | {
|
|---|
| 150 | return table->n_buckets_used;
|
|---|
| 151 | }
|
|---|
| 152 |
|
|---|
| 153 | size_t
|
|---|
| 154 | hash_get_n_entries (const Hash_table *table)
|
|---|
| 155 | {
|
|---|
| 156 | return table->n_entries;
|
|---|
| 157 | }
|
|---|
| 158 |
|
|---|
| 159 | size_t
|
|---|
| 160 | hash_get_max_bucket_length (const Hash_table *table)
|
|---|
| 161 | {
|
|---|
| 162 | struct hash_entry const *bucket;
|
|---|
| 163 | size_t max_bucket_length = 0;
|
|---|
| 164 |
|
|---|
| 165 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
|---|
| 166 | {
|
|---|
| 167 | if (bucket->data)
|
|---|
| 168 | {
|
|---|
| 169 | struct hash_entry const *cursor = bucket;
|
|---|
| 170 | size_t bucket_length = 1;
|
|---|
| 171 |
|
|---|
| 172 | while (cursor = cursor->next, cursor)
|
|---|
| 173 | bucket_length++;
|
|---|
| 174 |
|
|---|
| 175 | if (bucket_length > max_bucket_length)
|
|---|
| 176 | max_bucket_length = bucket_length;
|
|---|
| 177 | }
|
|---|
| 178 | }
|
|---|
| 179 |
|
|---|
| 180 | return max_bucket_length;
|
|---|
| 181 | }
|
|---|
| 182 |
|
|---|
| 183 | bool
|
|---|
| 184 | hash_table_ok (const Hash_table *table)
|
|---|
| 185 | {
|
|---|
| 186 | struct hash_entry const *bucket;
|
|---|
| 187 | size_t n_buckets_used = 0;
|
|---|
| 188 | size_t n_entries = 0;
|
|---|
| 189 |
|
|---|
| 190 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
|---|
| 191 | {
|
|---|
| 192 | if (bucket->data)
|
|---|
| 193 | {
|
|---|
| 194 | struct hash_entry const *cursor = bucket;
|
|---|
| 195 |
|
|---|
| 196 | /* Count bucket head. */
|
|---|
| 197 | n_buckets_used++;
|
|---|
| 198 | n_entries++;
|
|---|
| 199 |
|
|---|
| 200 | /* Count bucket overflow. */
|
|---|
| 201 | while (cursor = cursor->next, cursor)
|
|---|
| 202 | n_entries++;
|
|---|
| 203 | }
|
|---|
| 204 | }
|
|---|
| 205 |
|
|---|
| 206 | if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
|
|---|
| 207 | return true;
|
|---|
| 208 |
|
|---|
| 209 | return false;
|
|---|
| 210 | }
|
|---|
| 211 |
|
|---|
| 212 | void
|
|---|
| 213 | hash_print_statistics (const Hash_table *table, FILE *stream)
|
|---|
| 214 | {
|
|---|
| 215 | size_t n_entries = hash_get_n_entries (table);
|
|---|
| 216 | size_t n_buckets = hash_get_n_buckets (table);
|
|---|
| 217 | size_t n_buckets_used = hash_get_n_buckets_used (table);
|
|---|
| 218 | size_t max_bucket_length = hash_get_max_bucket_length (table);
|
|---|
| 219 |
|
|---|
| 220 | fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
|
|---|
| 221 | fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
|
|---|
| 222 | fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
|
|---|
| 223 | (unsigned long int) n_buckets_used,
|
|---|
| 224 | (100.0 * n_buckets_used) / n_buckets);
|
|---|
| 225 | fprintf (stream, "max bucket length: %lu\n",
|
|---|
| 226 | (unsigned long int) max_bucket_length);
|
|---|
| 227 | }
|
|---|
| 228 |
|
|---|
| 229 | /* Hash KEY and return a pointer to the selected bucket.
|
|---|
| 230 | If TABLE->hasher misbehaves, abort. */
|
|---|
| 231 | static struct hash_entry *
|
|---|
| 232 | safe_hasher (const Hash_table *table, const void *key)
|
|---|
| 233 | {
|
|---|
| 234 | size_t n = table->hasher (key, table->n_buckets);
|
|---|
| 235 | if (! (n < table->n_buckets))
|
|---|
| 236 | abort ();
|
|---|
| 237 | return table->bucket + n;
|
|---|
| 238 | }
|
|---|
| 239 |
|
|---|
| 240 | void *
|
|---|
| 241 | hash_lookup (const Hash_table *table, const void *entry)
|
|---|
| 242 | {
|
|---|
| 243 | struct hash_entry const *bucket = safe_hasher (table, entry);
|
|---|
| 244 | struct hash_entry const *cursor;
|
|---|
| 245 |
|
|---|
| 246 | if (bucket->data == NULL)
|
|---|
| 247 | return NULL;
|
|---|
| 248 |
|
|---|
| 249 | for (cursor = bucket; cursor; cursor = cursor->next)
|
|---|
| 250 | if (entry == cursor->data || table->comparator (entry, cursor->data))
|
|---|
| 251 | return cursor->data;
|
|---|
| 252 |
|
|---|
| 253 | return NULL;
|
|---|
| 254 | }
|
|---|
| 255 |
|
|---|
| 256 | /* Walking. */
|
|---|
| 257 |
|
|---|
| 258 | void *
|
|---|
| 259 | hash_get_first (const Hash_table *table)
|
|---|
| 260 | {
|
|---|
| 261 | struct hash_entry const *bucket;
|
|---|
| 262 |
|
|---|
| 263 | if (table->n_entries == 0)
|
|---|
| 264 | return NULL;
|
|---|
| 265 |
|
|---|
| 266 | for (bucket = table->bucket; ; bucket++)
|
|---|
| 267 | if (! (bucket < table->bucket_limit))
|
|---|
| 268 | abort ();
|
|---|
| 269 | else if (bucket->data)
|
|---|
| 270 | return bucket->data;
|
|---|
| 271 | }
|
|---|
| 272 |
|
|---|
| 273 | void *
|
|---|
| 274 | hash_get_next (const Hash_table *table, const void *entry)
|
|---|
| 275 | {
|
|---|
| 276 | struct hash_entry const *bucket = safe_hasher (table, entry);
|
|---|
| 277 | struct hash_entry const *cursor;
|
|---|
| 278 |
|
|---|
| 279 | /* Find next entry in the same bucket. */
|
|---|
| 280 | cursor = bucket;
|
|---|
| 281 | do
|
|---|
| 282 | {
|
|---|
| 283 | if (cursor->data == entry && cursor->next)
|
|---|
| 284 | return cursor->next->data;
|
|---|
| 285 | cursor = cursor->next;
|
|---|
| 286 | }
|
|---|
| 287 | while (cursor != NULL);
|
|---|
| 288 |
|
|---|
| 289 | /* Find first entry in any subsequent bucket. */
|
|---|
| 290 | while (++bucket < table->bucket_limit)
|
|---|
| 291 | if (bucket->data)
|
|---|
| 292 | return bucket->data;
|
|---|
| 293 |
|
|---|
| 294 | /* None found. */
|
|---|
| 295 | return NULL;
|
|---|
| 296 | }
|
|---|
| 297 |
|
|---|
| 298 | size_t
|
|---|
| 299 | hash_get_entries (const Hash_table *table, void **buffer,
|
|---|
| 300 | size_t buffer_size)
|
|---|
| 301 | {
|
|---|
| 302 | size_t counter = 0;
|
|---|
| 303 | struct hash_entry const *bucket;
|
|---|
| 304 | struct hash_entry const *cursor;
|
|---|
| 305 |
|
|---|
| 306 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
|---|
| 307 | {
|
|---|
| 308 | if (bucket->data)
|
|---|
| 309 | {
|
|---|
| 310 | for (cursor = bucket; cursor; cursor = cursor->next)
|
|---|
| 311 | {
|
|---|
| 312 | if (counter >= buffer_size)
|
|---|
| 313 | return counter;
|
|---|
| 314 | buffer[counter++] = cursor->data;
|
|---|
| 315 | }
|
|---|
| 316 | }
|
|---|
| 317 | }
|
|---|
| 318 |
|
|---|
| 319 | return counter;
|
|---|
| 320 | }
|
|---|
| 321 |
|
|---|
| 322 | size_t
|
|---|
| 323 | hash_do_for_each (const Hash_table *table, Hash_processor processor,
|
|---|
| 324 | void *processor_data)
|
|---|
| 325 | {
|
|---|
| 326 | size_t counter = 0;
|
|---|
| 327 | struct hash_entry const *bucket;
|
|---|
| 328 | struct hash_entry const *cursor;
|
|---|
| 329 |
|
|---|
| 330 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
|---|
| 331 | {
|
|---|
| 332 | if (bucket->data)
|
|---|
| 333 | {
|
|---|
| 334 | for (cursor = bucket; cursor; cursor = cursor->next)
|
|---|
| 335 | {
|
|---|
| 336 | if (! processor (cursor->data, processor_data))
|
|---|
| 337 | return counter;
|
|---|
| 338 | counter++;
|
|---|
| 339 | }
|
|---|
| 340 | }
|
|---|
| 341 | }
|
|---|
| 342 |
|
|---|
| 343 | return counter;
|
|---|
| 344 | }
|
|---|
| 345 |
|
|---|
| 346 | /* Allocation and clean-up. */
|
|---|
| 347 |
|
|---|
| 348 | #if USE_DIFF_HASH
|
|---|
| 349 |
|
|---|
| 350 | /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
|
|---|
| 351 | B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
|
|---|
| 352 | Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
|
|---|
| 353 | algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
|
|---|
| 354 | may not be good for your application." */
|
|---|
| 355 |
|
|---|
| 356 | size_t
|
|---|
| 357 | hash_string (const char *string, size_t n_buckets)
|
|---|
| 358 | {
|
|---|
| 359 | # define HASH_ONE_CHAR(Value, Byte) \
|
|---|
| 360 | ((Byte) + rotl_sz (Value, 7))
|
|---|
| 361 |
|
|---|
| 362 | size_t value = 0;
|
|---|
| 363 | unsigned char ch;
|
|---|
| 364 |
|
|---|
| 365 | for (; (ch = *string); string++)
|
|---|
| 366 | value = HASH_ONE_CHAR (value, ch);
|
|---|
| 367 | return value % n_buckets;
|
|---|
| 368 |
|
|---|
| 369 | # undef HASH_ONE_CHAR
|
|---|
| 370 | }
|
|---|
| 371 |
|
|---|
| 372 | #else /* not USE_DIFF_HASH */
|
|---|
| 373 |
|
|---|
| 374 | /* This one comes from 'recode', and performs a bit better than the above as
|
|---|
| 375 | per a few experiments. It is inspired from a hashing routine found in the
|
|---|
| 376 | very old Cyber 'snoop', itself written in typical Greg Mansfield style.
|
|---|
| 377 | (By the way, what happened to this excellent man? Is he still alive?) */
|
|---|
| 378 |
|
|---|
| 379 | size_t
|
|---|
| 380 | hash_string (const char *string, size_t n_buckets)
|
|---|
| 381 | {
|
|---|
| 382 | size_t value = 0;
|
|---|
| 383 | unsigned char ch;
|
|---|
| 384 |
|
|---|
| 385 | for (; (ch = *string); string++)
|
|---|
| 386 | value = (value * 31 + ch) % n_buckets;
|
|---|
| 387 | return value;
|
|---|
| 388 | }
|
|---|
| 389 |
|
|---|
| 390 | #endif /* not USE_DIFF_HASH */
|
|---|
| 391 |
|
|---|
| 392 | /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
|
|---|
| 393 | number at least equal to 11. */
|
|---|
| 394 |
|
|---|
| 395 | static bool _GL_ATTRIBUTE_CONST
|
|---|
| 396 | is_prime (size_t candidate)
|
|---|
| 397 | {
|
|---|
| 398 | size_t divisor = 3;
|
|---|
| 399 | size_t square = divisor * divisor;
|
|---|
| 400 |
|
|---|
| 401 | while (square < candidate && (candidate % divisor))
|
|---|
| 402 | {
|
|---|
| 403 | divisor++;
|
|---|
| 404 | square += 4 * divisor;
|
|---|
| 405 | divisor++;
|
|---|
| 406 | }
|
|---|
| 407 |
|
|---|
| 408 | return (candidate % divisor ? true : false);
|
|---|
| 409 | }
|
|---|
| 410 |
|
|---|
| 411 | /* Round a given CANDIDATE number up to the nearest prime, and return that
|
|---|
| 412 | prime. Primes lower than 10 are merely skipped. */
|
|---|
| 413 |
|
|---|
| 414 | static size_t _GL_ATTRIBUTE_CONST
|
|---|
| 415 | next_prime (size_t candidate)
|
|---|
| 416 | {
|
|---|
| 417 | /* Skip small primes. */
|
|---|
| 418 | if (candidate < 10)
|
|---|
| 419 | candidate = 10;
|
|---|
| 420 |
|
|---|
| 421 | /* Make it definitely odd. */
|
|---|
| 422 | candidate |= 1;
|
|---|
| 423 |
|
|---|
| 424 | while (SIZE_MAX != candidate && !is_prime (candidate))
|
|---|
| 425 | candidate += 2;
|
|---|
| 426 |
|
|---|
| 427 | return candidate;
|
|---|
| 428 | }
|
|---|
| 429 |
|
|---|
| 430 | void
|
|---|
| 431 | hash_reset_tuning (Hash_tuning *tuning)
|
|---|
| 432 | {
|
|---|
| 433 | *tuning = default_tuning;
|
|---|
| 434 | }
|
|---|
| 435 |
|
|---|
| 436 | /* If the user passes a NULL hasher, we hash the raw pointer. */
|
|---|
| 437 | static size_t
|
|---|
| 438 | raw_hasher (const void *data, size_t n)
|
|---|
| 439 | {
|
|---|
| 440 | /* When hashing unique pointers, it is often the case that they were
|
|---|
| 441 | generated by malloc and thus have the property that the low-order
|
|---|
| 442 | bits are 0. As this tends to give poorer performance with small
|
|---|
| 443 | tables, we rotate the pointer value before performing division,
|
|---|
| 444 | in an attempt to improve hash quality. */
|
|---|
| 445 | size_t val = rotr_sz ((size_t) data, 3);
|
|---|
| 446 | return val % n;
|
|---|
| 447 | }
|
|---|
| 448 |
|
|---|
| 449 | /* If the user passes a NULL comparator, we use pointer comparison. */
|
|---|
| 450 | static bool
|
|---|
| 451 | raw_comparator (const void *a, const void *b)
|
|---|
| 452 | {
|
|---|
| 453 | return a == b;
|
|---|
| 454 | }
|
|---|
| 455 |
|
|---|
| 456 |
|
|---|
| 457 | /* For the given hash TABLE, check the user supplied tuning structure for
|
|---|
| 458 | reasonable values, and return true if there is no gross error with it.
|
|---|
| 459 | Otherwise, definitively reset the TUNING field to some acceptable default
|
|---|
| 460 | in the hash table (that is, the user loses the right of further modifying
|
|---|
| 461 | tuning arguments), and return false. */
|
|---|
| 462 |
|
|---|
| 463 | static bool
|
|---|
| 464 | check_tuning (Hash_table *table)
|
|---|
| 465 | {
|
|---|
| 466 | const Hash_tuning *tuning = table->tuning;
|
|---|
| 467 | float epsilon;
|
|---|
| 468 | if (tuning == &default_tuning)
|
|---|
| 469 | return true;
|
|---|
| 470 |
|
|---|
| 471 | /* Be a bit stricter than mathematics would require, so that
|
|---|
| 472 | rounding errors in size calculations do not cause allocations to
|
|---|
| 473 | fail to grow or shrink as they should. The smallest allocation
|
|---|
| 474 | is 11 (due to next_prime's algorithm), so an epsilon of 0.1
|
|---|
| 475 | should be good enough. */
|
|---|
| 476 | epsilon = 0.1f;
|
|---|
| 477 |
|
|---|
| 478 | if (epsilon < tuning->growth_threshold
|
|---|
| 479 | && tuning->growth_threshold < 1 - epsilon
|
|---|
| 480 | && 1 + epsilon < tuning->growth_factor
|
|---|
| 481 | && 0 <= tuning->shrink_threshold
|
|---|
| 482 | && tuning->shrink_threshold + epsilon < tuning->shrink_factor
|
|---|
| 483 | && tuning->shrink_factor <= 1
|
|---|
| 484 | && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
|
|---|
| 485 | return true;
|
|---|
| 486 |
|
|---|
| 487 | table->tuning = &default_tuning;
|
|---|
| 488 | return false;
|
|---|
| 489 | }
|
|---|
| 490 |
|
|---|
| 491 | /* Compute the size of the bucket array for the given CANDIDATE and
|
|---|
| 492 | TUNING, or return 0 if there is no possible way to allocate that
|
|---|
| 493 | many entries. */
|
|---|
| 494 |
|
|---|
| 495 | static size_t _GL_ATTRIBUTE_PURE
|
|---|
| 496 | compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
|
|---|
| 497 | {
|
|---|
| 498 | if (!tuning->is_n_buckets)
|
|---|
| 499 | {
|
|---|
| 500 | float new_candidate = candidate / tuning->growth_threshold;
|
|---|
| 501 | if ((float) SIZE_MAX <= new_candidate)
|
|---|
| 502 | return 0;
|
|---|
| 503 | candidate = new_candidate;
|
|---|
| 504 | }
|
|---|
| 505 | candidate = next_prime (candidate);
|
|---|
| 506 | if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
|
|---|
| 507 | return 0;
|
|---|
| 508 | return candidate;
|
|---|
| 509 | }
|
|---|
| 510 |
|
|---|
| 511 | Hash_table *
|
|---|
| 512 | hash_initialize (size_t candidate, const Hash_tuning *tuning,
|
|---|
| 513 | Hash_hasher hasher, Hash_comparator comparator,
|
|---|
| 514 | Hash_data_freer data_freer)
|
|---|
| 515 | {
|
|---|
| 516 | Hash_table *table;
|
|---|
| 517 |
|
|---|
| 518 | if (hasher == NULL)
|
|---|
| 519 | hasher = raw_hasher;
|
|---|
| 520 | if (comparator == NULL)
|
|---|
| 521 | comparator = raw_comparator;
|
|---|
| 522 |
|
|---|
| 523 | table = malloc (sizeof *table);
|
|---|
| 524 | if (table == NULL)
|
|---|
| 525 | return NULL;
|
|---|
| 526 |
|
|---|
| 527 | if (!tuning)
|
|---|
| 528 | tuning = &default_tuning;
|
|---|
| 529 | table->tuning = tuning;
|
|---|
| 530 | if (!check_tuning (table))
|
|---|
| 531 | {
|
|---|
| 532 | /* Fail if the tuning options are invalid. This is the only occasion
|
|---|
| 533 | when the user gets some feedback about it. Once the table is created,
|
|---|
| 534 | if the user provides invalid tuning options, we silently revert to
|
|---|
| 535 | using the defaults, and ignore further request to change the tuning
|
|---|
| 536 | options. */
|
|---|
| 537 | goto fail;
|
|---|
| 538 | }
|
|---|
| 539 |
|
|---|
| 540 | table->n_buckets = compute_bucket_size (candidate, tuning);
|
|---|
| 541 | if (!table->n_buckets)
|
|---|
| 542 | goto fail;
|
|---|
| 543 |
|
|---|
| 544 | table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
|
|---|
| 545 | if (table->bucket == NULL)
|
|---|
| 546 | goto fail;
|
|---|
| 547 | table->bucket_limit = table->bucket + table->n_buckets;
|
|---|
| 548 | table->n_buckets_used = 0;
|
|---|
| 549 | table->n_entries = 0;
|
|---|
| 550 |
|
|---|
| 551 | table->hasher = hasher;
|
|---|
| 552 | table->comparator = comparator;
|
|---|
| 553 | table->data_freer = data_freer;
|
|---|
| 554 |
|
|---|
| 555 | table->free_entry_list = NULL;
|
|---|
| 556 | #if USE_OBSTACK
|
|---|
| 557 | obstack_init (&table->entry_stack);
|
|---|
| 558 | #endif
|
|---|
| 559 | return table;
|
|---|
| 560 |
|
|---|
| 561 | fail:
|
|---|
| 562 | free (table);
|
|---|
| 563 | return NULL;
|
|---|
| 564 | }
|
|---|
| 565 |
|
|---|
| 566 | void
|
|---|
| 567 | hash_clear (Hash_table *table)
|
|---|
| 568 | {
|
|---|
| 569 | struct hash_entry *bucket;
|
|---|
| 570 |
|
|---|
| 571 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
|---|
| 572 | {
|
|---|
| 573 | if (bucket->data)
|
|---|
| 574 | {
|
|---|
| 575 | struct hash_entry *cursor;
|
|---|
| 576 | struct hash_entry *next;
|
|---|
| 577 |
|
|---|
| 578 | /* Free the bucket overflow. */
|
|---|
| 579 | for (cursor = bucket->next; cursor; cursor = next)
|
|---|
| 580 | {
|
|---|
| 581 | if (table->data_freer)
|
|---|
| 582 | table->data_freer (cursor->data);
|
|---|
| 583 | cursor->data = NULL;
|
|---|
| 584 |
|
|---|
| 585 | next = cursor->next;
|
|---|
| 586 | /* Relinking is done one entry at a time, as it is to be expected
|
|---|
| 587 | that overflows are either rare or short. */
|
|---|
| 588 | cursor->next = table->free_entry_list;
|
|---|
| 589 | table->free_entry_list = cursor;
|
|---|
| 590 | }
|
|---|
| 591 |
|
|---|
| 592 | /* Free the bucket head. */
|
|---|
| 593 | if (table->data_freer)
|
|---|
| 594 | table->data_freer (bucket->data);
|
|---|
| 595 | bucket->data = NULL;
|
|---|
| 596 | bucket->next = NULL;
|
|---|
| 597 | }
|
|---|
| 598 | }
|
|---|
| 599 |
|
|---|
| 600 | table->n_buckets_used = 0;
|
|---|
| 601 | table->n_entries = 0;
|
|---|
| 602 | }
|
|---|
| 603 |
|
|---|
| 604 | void
|
|---|
| 605 | hash_free (Hash_table *table)
|
|---|
| 606 | {
|
|---|
| 607 | struct hash_entry *bucket;
|
|---|
| 608 | struct hash_entry *cursor;
|
|---|
| 609 | struct hash_entry *next;
|
|---|
| 610 |
|
|---|
| 611 | /* Call the user data_freer function. */
|
|---|
| 612 | if (table->data_freer && table->n_entries)
|
|---|
| 613 | {
|
|---|
| 614 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
|---|
| 615 | {
|
|---|
| 616 | if (bucket->data)
|
|---|
| 617 | {
|
|---|
| 618 | for (cursor = bucket; cursor; cursor = cursor->next)
|
|---|
| 619 | table->data_freer (cursor->data);
|
|---|
| 620 | }
|
|---|
| 621 | }
|
|---|
| 622 | }
|
|---|
| 623 |
|
|---|
| 624 | #if USE_OBSTACK
|
|---|
| 625 |
|
|---|
| 626 | obstack_free (&table->entry_stack, NULL);
|
|---|
| 627 |
|
|---|
| 628 | #else
|
|---|
| 629 |
|
|---|
| 630 | /* Free all bucket overflowed entries. */
|
|---|
| 631 | for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
|
|---|
| 632 | {
|
|---|
| 633 | for (cursor = bucket->next; cursor; cursor = next)
|
|---|
| 634 | {
|
|---|
| 635 | next = cursor->next;
|
|---|
| 636 | free (cursor);
|
|---|
| 637 | }
|
|---|
| 638 | }
|
|---|
| 639 |
|
|---|
| 640 | /* Also reclaim the internal list of previously freed entries. */
|
|---|
| 641 | for (cursor = table->free_entry_list; cursor; cursor = next)
|
|---|
| 642 | {
|
|---|
| 643 | next = cursor->next;
|
|---|
| 644 | free (cursor);
|
|---|
| 645 | }
|
|---|
| 646 |
|
|---|
| 647 | #endif
|
|---|
| 648 |
|
|---|
| 649 | /* Free the remainder of the hash table structure. */
|
|---|
| 650 | free (table->bucket);
|
|---|
| 651 | free (table);
|
|---|
| 652 | }
|
|---|
| 653 |
|
|---|
| 654 | /* Insertion and deletion. */
|
|---|
| 655 |
|
|---|
| 656 | /* Get a new hash entry for a bucket overflow, possibly by recycling a
|
|---|
| 657 | previously freed one. If this is not possible, allocate a new one. */
|
|---|
| 658 |
|
|---|
| 659 | static struct hash_entry *
|
|---|
| 660 | allocate_entry (Hash_table *table)
|
|---|
| 661 | {
|
|---|
| 662 | struct hash_entry *new;
|
|---|
| 663 |
|
|---|
| 664 | if (table->free_entry_list)
|
|---|
| 665 | {
|
|---|
| 666 | new = table->free_entry_list;
|
|---|
| 667 | table->free_entry_list = new->next;
|
|---|
| 668 | }
|
|---|
| 669 | else
|
|---|
| 670 | {
|
|---|
| 671 | #if USE_OBSTACK
|
|---|
| 672 | new = obstack_alloc (&table->entry_stack, sizeof *new);
|
|---|
| 673 | #else
|
|---|
| 674 | new = malloc (sizeof *new);
|
|---|
| 675 | #endif
|
|---|
| 676 | }
|
|---|
| 677 |
|
|---|
| 678 | return new;
|
|---|
| 679 | }
|
|---|
| 680 |
|
|---|
| 681 | /* Free a hash entry which was part of some bucket overflow,
|
|---|
| 682 | saving it for later recycling. */
|
|---|
| 683 |
|
|---|
| 684 | static void
|
|---|
| 685 | free_entry (Hash_table *table, struct hash_entry *entry)
|
|---|
| 686 | {
|
|---|
| 687 | entry->data = NULL;
|
|---|
| 688 | entry->next = table->free_entry_list;
|
|---|
| 689 | table->free_entry_list = entry;
|
|---|
| 690 | }
|
|---|
| 691 |
|
|---|
| 692 | /* This private function is used to help with insertion and deletion. When
|
|---|
| 693 | ENTRY matches an entry in the table, return a pointer to the corresponding
|
|---|
| 694 | user data and set *BUCKET_HEAD to the head of the selected bucket.
|
|---|
| 695 | Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
|
|---|
| 696 | the table, unlink the matching entry. */
|
|---|
| 697 |
|
|---|
| 698 | static void *
|
|---|
| 699 | hash_find_entry (Hash_table *table, const void *entry,
|
|---|
| 700 | struct hash_entry **bucket_head, bool delete)
|
|---|
| 701 | {
|
|---|
| 702 | struct hash_entry *bucket = safe_hasher (table, entry);
|
|---|
| 703 | struct hash_entry *cursor;
|
|---|
| 704 |
|
|---|
| 705 | *bucket_head = bucket;
|
|---|
| 706 |
|
|---|
| 707 | /* Test for empty bucket. */
|
|---|
| 708 | if (bucket->data == NULL)
|
|---|
| 709 | return NULL;
|
|---|
| 710 |
|
|---|
| 711 | /* See if the entry is the first in the bucket. */
|
|---|
| 712 | if (entry == bucket->data || table->comparator (entry, bucket->data))
|
|---|
| 713 | {
|
|---|
| 714 | void *data = bucket->data;
|
|---|
| 715 |
|
|---|
| 716 | if (delete)
|
|---|
| 717 | {
|
|---|
| 718 | if (bucket->next)
|
|---|
| 719 | {
|
|---|
| 720 | struct hash_entry *next = bucket->next;
|
|---|
| 721 |
|
|---|
| 722 | /* Bump the first overflow entry into the bucket head, then save
|
|---|
| 723 | the previous first overflow entry for later recycling. */
|
|---|
| 724 | *bucket = *next;
|
|---|
| 725 | free_entry (table, next);
|
|---|
| 726 | }
|
|---|
| 727 | else
|
|---|
| 728 | {
|
|---|
| 729 | bucket->data = NULL;
|
|---|
| 730 | }
|
|---|
| 731 | }
|
|---|
| 732 |
|
|---|
| 733 | return data;
|
|---|
| 734 | }
|
|---|
| 735 |
|
|---|
| 736 | /* Scan the bucket overflow. */
|
|---|
| 737 | for (cursor = bucket; cursor->next; cursor = cursor->next)
|
|---|
| 738 | {
|
|---|
| 739 | if (entry == cursor->next->data
|
|---|
| 740 | || table->comparator (entry, cursor->next->data))
|
|---|
| 741 | {
|
|---|
| 742 | void *data = cursor->next->data;
|
|---|
| 743 |
|
|---|
| 744 | if (delete)
|
|---|
| 745 | {
|
|---|
| 746 | struct hash_entry *next = cursor->next;
|
|---|
| 747 |
|
|---|
| 748 | /* Unlink the entry to delete, then save the freed entry for later
|
|---|
| 749 | recycling. */
|
|---|
| 750 | cursor->next = next->next;
|
|---|
| 751 | free_entry (table, next);
|
|---|
| 752 | }
|
|---|
| 753 |
|
|---|
| 754 | return data;
|
|---|
| 755 | }
|
|---|
| 756 | }
|
|---|
| 757 |
|
|---|
| 758 | /* No entry found. */
|
|---|
| 759 | return NULL;
|
|---|
| 760 | }
|
|---|
| 761 |
|
|---|
| 762 | /* Internal helper, to move entries from SRC to DST. Both tables must
|
|---|
| 763 | share the same free entry list. If SAFE, only move overflow
|
|---|
| 764 | entries, saving bucket heads for later, so that no allocations will
|
|---|
| 765 | occur. Return false if the free entry list is exhausted and an
|
|---|
| 766 | allocation fails. */
|
|---|
| 767 |
|
|---|
| 768 | static bool
|
|---|
| 769 | transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
|
|---|
| 770 | {
|
|---|
| 771 | struct hash_entry *bucket;
|
|---|
| 772 | struct hash_entry *cursor;
|
|---|
| 773 | struct hash_entry *next;
|
|---|
| 774 | for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
|
|---|
| 775 | if (bucket->data)
|
|---|
| 776 | {
|
|---|
| 777 | void *data;
|
|---|
| 778 | struct hash_entry *new_bucket;
|
|---|
| 779 |
|
|---|
| 780 | /* Within each bucket, transfer overflow entries first and
|
|---|
| 781 | then the bucket head, to minimize memory pressure. After
|
|---|
| 782 | all, the only time we might allocate is when moving the
|
|---|
| 783 | bucket head, but moving overflow entries first may create
|
|---|
| 784 | free entries that can be recycled by the time we finally
|
|---|
| 785 | get to the bucket head. */
|
|---|
| 786 | for (cursor = bucket->next; cursor; cursor = next)
|
|---|
| 787 | {
|
|---|
| 788 | data = cursor->data;
|
|---|
| 789 | new_bucket = safe_hasher (dst, data);
|
|---|
| 790 |
|
|---|
| 791 | next = cursor->next;
|
|---|
| 792 |
|
|---|
| 793 | if (new_bucket->data)
|
|---|
| 794 | {
|
|---|
| 795 | /* Merely relink an existing entry, when moving from a
|
|---|
| 796 | bucket overflow into a bucket overflow. */
|
|---|
| 797 | cursor->next = new_bucket->next;
|
|---|
| 798 | new_bucket->next = cursor;
|
|---|
| 799 | }
|
|---|
| 800 | else
|
|---|
| 801 | {
|
|---|
| 802 | /* Free an existing entry, when moving from a bucket
|
|---|
| 803 | overflow into a bucket header. */
|
|---|
| 804 | new_bucket->data = data;
|
|---|
| 805 | dst->n_buckets_used++;
|
|---|
| 806 | free_entry (dst, cursor);
|
|---|
| 807 | }
|
|---|
| 808 | }
|
|---|
| 809 | /* Now move the bucket head. Be sure that if we fail due to
|
|---|
| 810 | allocation failure that the src table is in a consistent
|
|---|
| 811 | state. */
|
|---|
| 812 | data = bucket->data;
|
|---|
| 813 | bucket->next = NULL;
|
|---|
| 814 | if (safe)
|
|---|
| 815 | continue;
|
|---|
| 816 | new_bucket = safe_hasher (dst, data);
|
|---|
| 817 |
|
|---|
| 818 | if (new_bucket->data)
|
|---|
| 819 | {
|
|---|
| 820 | /* Allocate or recycle an entry, when moving from a bucket
|
|---|
| 821 | header into a bucket overflow. */
|
|---|
| 822 | struct hash_entry *new_entry = allocate_entry (dst);
|
|---|
| 823 |
|
|---|
| 824 | if (new_entry == NULL)
|
|---|
| 825 | return false;
|
|---|
| 826 |
|
|---|
| 827 | new_entry->data = data;
|
|---|
| 828 | new_entry->next = new_bucket->next;
|
|---|
| 829 | new_bucket->next = new_entry;
|
|---|
| 830 | }
|
|---|
| 831 | else
|
|---|
| 832 | {
|
|---|
| 833 | /* Move from one bucket header to another. */
|
|---|
| 834 | new_bucket->data = data;
|
|---|
| 835 | dst->n_buckets_used++;
|
|---|
| 836 | }
|
|---|
| 837 | bucket->data = NULL;
|
|---|
| 838 | src->n_buckets_used--;
|
|---|
| 839 | }
|
|---|
| 840 | return true;
|
|---|
| 841 | }
|
|---|
| 842 |
|
|---|
| 843 | bool
|
|---|
| 844 | hash_rehash (Hash_table *table, size_t candidate)
|
|---|
| 845 | {
|
|---|
| 846 | Hash_table storage;
|
|---|
| 847 | Hash_table *new_table;
|
|---|
| 848 | size_t new_size = compute_bucket_size (candidate, table->tuning);
|
|---|
| 849 |
|
|---|
| 850 | if (!new_size)
|
|---|
| 851 | return false;
|
|---|
| 852 | if (new_size == table->n_buckets)
|
|---|
| 853 | return true;
|
|---|
| 854 | new_table = &storage;
|
|---|
| 855 | new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
|
|---|
| 856 | if (new_table->bucket == NULL)
|
|---|
| 857 | return false;
|
|---|
| 858 | new_table->n_buckets = new_size;
|
|---|
| 859 | new_table->bucket_limit = new_table->bucket + new_size;
|
|---|
| 860 | new_table->n_buckets_used = 0;
|
|---|
| 861 | new_table->n_entries = 0;
|
|---|
| 862 | new_table->tuning = table->tuning;
|
|---|
| 863 | new_table->hasher = table->hasher;
|
|---|
| 864 | new_table->comparator = table->comparator;
|
|---|
| 865 | new_table->data_freer = table->data_freer;
|
|---|
| 866 |
|
|---|
| 867 | /* In order for the transfer to successfully complete, we need
|
|---|
| 868 | additional overflow entries when distinct buckets in the old
|
|---|
| 869 | table collide into a common bucket in the new table. The worst
|
|---|
| 870 | case possible is a hasher that gives a good spread with the old
|
|---|
| 871 | size, but returns a constant with the new size; if we were to
|
|---|
| 872 | guarantee table->n_buckets_used-1 free entries in advance, then
|
|---|
| 873 | the transfer would be guaranteed to not allocate memory.
|
|---|
| 874 | However, for large tables, a guarantee of no further allocation
|
|---|
| 875 | introduces a lot of extra memory pressure, all for an unlikely
|
|---|
| 876 | corner case (most rehashes reduce, rather than increase, the
|
|---|
| 877 | number of overflow entries needed). So, we instead ensure that
|
|---|
| 878 | the transfer process can be reversed if we hit a memory
|
|---|
| 879 | allocation failure mid-transfer. */
|
|---|
| 880 |
|
|---|
| 881 | /* Merely reuse the extra old space into the new table. */
|
|---|
| 882 | #if USE_OBSTACK
|
|---|
| 883 | new_table->entry_stack = table->entry_stack;
|
|---|
| 884 | #endif
|
|---|
| 885 | new_table->free_entry_list = table->free_entry_list;
|
|---|
| 886 |
|
|---|
| 887 | if (transfer_entries (new_table, table, false))
|
|---|
| 888 | {
|
|---|
| 889 | /* Entries transferred successfully; tie up the loose ends. */
|
|---|
| 890 | free (table->bucket);
|
|---|
| 891 | table->bucket = new_table->bucket;
|
|---|
| 892 | table->bucket_limit = new_table->bucket_limit;
|
|---|
| 893 | table->n_buckets = new_table->n_buckets;
|
|---|
| 894 | table->n_buckets_used = new_table->n_buckets_used;
|
|---|
| 895 | table->free_entry_list = new_table->free_entry_list;
|
|---|
| 896 | /* table->n_entries and table->entry_stack already hold their value. */
|
|---|
| 897 | return true;
|
|---|
| 898 | }
|
|---|
| 899 |
|
|---|
| 900 | /* We've allocated new_table->bucket (and possibly some entries),
|
|---|
| 901 | exhausted the free list, and moved some but not all entries into
|
|---|
| 902 | new_table. We must undo the partial move before returning
|
|---|
| 903 | failure. The only way to get into this situation is if new_table
|
|---|
| 904 | uses fewer buckets than the old table, so we will reclaim some
|
|---|
| 905 | free entries as overflows in the new table are put back into
|
|---|
| 906 | distinct buckets in the old table.
|
|---|
| 907 |
|
|---|
| 908 | There are some pathological cases where a single pass through the
|
|---|
| 909 | table requires more intermediate overflow entries than using two
|
|---|
| 910 | passes. Two passes give worse cache performance and takes
|
|---|
| 911 | longer, but at this point, we're already out of memory, so slow
|
|---|
| 912 | and safe is better than failure. */
|
|---|
| 913 | table->free_entry_list = new_table->free_entry_list;
|
|---|
| 914 | if (! (transfer_entries (table, new_table, true)
|
|---|
| 915 | && transfer_entries (table, new_table, false)))
|
|---|
| 916 | abort ();
|
|---|
| 917 | /* table->n_entries already holds its value. */
|
|---|
| 918 | free (new_table->bucket);
|
|---|
| 919 | return false;
|
|---|
| 920 | }
|
|---|
| 921 |
|
|---|
| 922 | int
|
|---|
| 923 | hash_insert_if_absent (Hash_table *table, void const *entry,
|
|---|
| 924 | void const **matched_ent)
|
|---|
| 925 | {
|
|---|
| 926 | void *data;
|
|---|
| 927 | struct hash_entry *bucket;
|
|---|
| 928 |
|
|---|
| 929 | /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
|
|---|
| 930 | to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
|
|---|
| 931 | to indicate an empty bucket. */
|
|---|
| 932 | if (! entry)
|
|---|
| 933 | abort ();
|
|---|
| 934 |
|
|---|
| 935 | /* If there's a matching entry already in the table, return that. */
|
|---|
| 936 | if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
|
|---|
| 937 | {
|
|---|
| 938 | if (matched_ent)
|
|---|
| 939 | *matched_ent = data;
|
|---|
| 940 | return 0;
|
|---|
| 941 | }
|
|---|
| 942 |
|
|---|
| 943 | /* If the growth threshold of the buckets in use has been reached, increase
|
|---|
| 944 | the table size and rehash. There's no point in checking the number of
|
|---|
| 945 | entries: if the hashing function is ill-conditioned, rehashing is not
|
|---|
| 946 | likely to improve it. */
|
|---|
| 947 |
|
|---|
| 948 | if (table->n_buckets_used
|
|---|
| 949 | > table->tuning->growth_threshold * table->n_buckets)
|
|---|
| 950 | {
|
|---|
| 951 | /* Check more fully, before starting real work. If tuning arguments
|
|---|
| 952 | became invalid, the second check will rely on proper defaults. */
|
|---|
| 953 | check_tuning (table);
|
|---|
| 954 | if (table->n_buckets_used
|
|---|
| 955 | > table->tuning->growth_threshold * table->n_buckets)
|
|---|
| 956 | {
|
|---|
| 957 | const Hash_tuning *tuning = table->tuning;
|
|---|
| 958 | float candidate =
|
|---|
| 959 | (tuning->is_n_buckets
|
|---|
| 960 | ? (table->n_buckets * tuning->growth_factor)
|
|---|
| 961 | : (table->n_buckets * tuning->growth_factor
|
|---|
| 962 | * tuning->growth_threshold));
|
|---|
| 963 |
|
|---|
| 964 | if ((float) SIZE_MAX <= candidate)
|
|---|
| 965 | return -1;
|
|---|
| 966 |
|
|---|
| 967 | /* If the rehash fails, arrange to return NULL. */
|
|---|
| 968 | if (!hash_rehash (table, candidate))
|
|---|
| 969 | return -1;
|
|---|
| 970 |
|
|---|
| 971 | /* Update the bucket we are interested in. */
|
|---|
| 972 | if (hash_find_entry (table, entry, &bucket, false) != NULL)
|
|---|
| 973 | abort ();
|
|---|
| 974 | }
|
|---|
| 975 | }
|
|---|
| 976 |
|
|---|
| 977 | /* ENTRY is not matched, it should be inserted. */
|
|---|
| 978 |
|
|---|
| 979 | if (bucket->data)
|
|---|
| 980 | {
|
|---|
| 981 | struct hash_entry *new_entry = allocate_entry (table);
|
|---|
| 982 |
|
|---|
| 983 | if (new_entry == NULL)
|
|---|
| 984 | return -1;
|
|---|
| 985 |
|
|---|
| 986 | /* Add ENTRY in the overflow of the bucket. */
|
|---|
| 987 |
|
|---|
| 988 | new_entry->data = (void *) entry;
|
|---|
| 989 | new_entry->next = bucket->next;
|
|---|
| 990 | bucket->next = new_entry;
|
|---|
| 991 | table->n_entries++;
|
|---|
| 992 | return 1;
|
|---|
| 993 | }
|
|---|
| 994 |
|
|---|
| 995 | /* Add ENTRY right in the bucket head. */
|
|---|
| 996 |
|
|---|
| 997 | bucket->data = (void *) entry;
|
|---|
| 998 | table->n_entries++;
|
|---|
| 999 | table->n_buckets_used++;
|
|---|
| 1000 |
|
|---|
| 1001 | return 1;
|
|---|
| 1002 | }
|
|---|
| 1003 |
|
|---|
| 1004 | void *
|
|---|
| 1005 | hash_insert (Hash_table *table, void const *entry)
|
|---|
| 1006 | {
|
|---|
| 1007 | void const *matched_ent;
|
|---|
| 1008 | int err = hash_insert_if_absent (table, entry, &matched_ent);
|
|---|
| 1009 | return (err == -1
|
|---|
| 1010 | ? NULL
|
|---|
| 1011 | : (void *) (err == 0 ? matched_ent : entry));
|
|---|
| 1012 | }
|
|---|
| 1013 |
|
|---|
| 1014 | void *
|
|---|
| 1015 | hash_remove (Hash_table *table, const void *entry)
|
|---|
| 1016 | {
|
|---|
| 1017 | void *data;
|
|---|
| 1018 | struct hash_entry *bucket;
|
|---|
| 1019 |
|
|---|
| 1020 | data = hash_find_entry (table, entry, &bucket, true);
|
|---|
| 1021 | if (!data)
|
|---|
| 1022 | return NULL;
|
|---|
| 1023 |
|
|---|
| 1024 | table->n_entries--;
|
|---|
| 1025 | if (!bucket->data)
|
|---|
| 1026 | {
|
|---|
| 1027 | table->n_buckets_used--;
|
|---|
| 1028 |
|
|---|
| 1029 | /* If the shrink threshold of the buckets in use has been reached,
|
|---|
| 1030 | rehash into a smaller table. */
|
|---|
| 1031 |
|
|---|
| 1032 | if (table->n_buckets_used
|
|---|
| 1033 | < table->tuning->shrink_threshold * table->n_buckets)
|
|---|
| 1034 | {
|
|---|
| 1035 | /* Check more fully, before starting real work. If tuning arguments
|
|---|
| 1036 | became invalid, the second check will rely on proper defaults. */
|
|---|
| 1037 | check_tuning (table);
|
|---|
| 1038 | if (table->n_buckets_used
|
|---|
| 1039 | < table->tuning->shrink_threshold * table->n_buckets)
|
|---|
| 1040 | {
|
|---|
| 1041 | const Hash_tuning *tuning = table->tuning;
|
|---|
| 1042 | size_t candidate =
|
|---|
| 1043 | (tuning->is_n_buckets
|
|---|
| 1044 | ? table->n_buckets * tuning->shrink_factor
|
|---|
| 1045 | : (table->n_buckets * tuning->shrink_factor
|
|---|
| 1046 | * tuning->growth_threshold));
|
|---|
| 1047 |
|
|---|
| 1048 | if (!hash_rehash (table, candidate))
|
|---|
| 1049 | {
|
|---|
| 1050 | /* Failure to allocate memory in an attempt to
|
|---|
| 1051 | shrink the table is not fatal. But since memory
|
|---|
| 1052 | is low, we can at least be kind and free any
|
|---|
| 1053 | spare entries, rather than keeping them tied up
|
|---|
| 1054 | in the free entry list. */
|
|---|
| 1055 | #if ! USE_OBSTACK
|
|---|
| 1056 | struct hash_entry *cursor = table->free_entry_list;
|
|---|
| 1057 | struct hash_entry *next;
|
|---|
| 1058 | while (cursor)
|
|---|
| 1059 | {
|
|---|
| 1060 | next = cursor->next;
|
|---|
| 1061 | free (cursor);
|
|---|
| 1062 | cursor = next;
|
|---|
| 1063 | }
|
|---|
| 1064 | table->free_entry_list = NULL;
|
|---|
| 1065 | #endif
|
|---|
| 1066 | }
|
|---|
| 1067 | }
|
|---|
| 1068 | }
|
|---|
| 1069 | }
|
|---|
| 1070 |
|
|---|
| 1071 | return data;
|
|---|
| 1072 | }
|
|---|
| 1073 |
|
|---|
| 1074 | void *
|
|---|
| 1075 | hash_delete (Hash_table *table, const void *entry)
|
|---|
| 1076 | {
|
|---|
| 1077 | return hash_remove (table, entry);
|
|---|
| 1078 | }
|
|---|
| 1079 |
|
|---|
| 1080 | /* Testing. */
|
|---|
| 1081 |
|
|---|
| 1082 | #if TESTING
|
|---|
| 1083 |
|
|---|
| 1084 | void
|
|---|
| 1085 | hash_print (const Hash_table *table)
|
|---|
| 1086 | {
|
|---|
| 1087 | struct hash_entry *bucket = (struct hash_entry *) table->bucket;
|
|---|
| 1088 |
|
|---|
| 1089 | for ( ; bucket < table->bucket_limit; bucket++)
|
|---|
| 1090 | {
|
|---|
| 1091 | struct hash_entry *cursor;
|
|---|
| 1092 |
|
|---|
| 1093 | if (bucket)
|
|---|
| 1094 | printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
|
|---|
| 1095 |
|
|---|
| 1096 | for (cursor = bucket; cursor; cursor = cursor->next)
|
|---|
| 1097 | {
|
|---|
| 1098 | char const *s = cursor->data;
|
|---|
| 1099 | /* FIXME */
|
|---|
| 1100 | if (s)
|
|---|
| 1101 | printf (" %s\n", s);
|
|---|
| 1102 | }
|
|---|
| 1103 | }
|
|---|
| 1104 | }
|
|---|
| 1105 |
|
|---|
| 1106 | #endif /* TESTING */
|
|---|