source: trunk/src/kmk/strcache2.c@ 1913

Last change on this file since 1913 was 1913, checked in by bird, 17 years ago

strcache2: hash1 -> hash; -hash2.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 35.3 KB
Line 
1/* $Id: strcache2.c 1913 2008-10-22 21:24:04Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef _MSC_VER
39typedef unsigned char uint8_t;
40typedef unsigned short uint16_t;
41typedef unsigned int uint32_t;
42#else
43# include <stdint.h>
44#endif
45
46#ifdef WINDOWS32
47# include <io.h>
48# include <process.h>
49# include <Windows.h>
50# define PARSE_IN_WORKER
51#endif
52
53#ifdef __OS2__
54# include <sys/fmutex.h>
55#endif
56
57#ifdef HAVE_PTHREAD
58# include <pthread.h>
59#endif
60
61
62/*******************************************************************************
63* Defined Constants And Macros *
64*******************************************************************************/
65/* The default size of a memory segment (1MB). */
66#define STRCACHE2_SEG_SIZE (1024U*1024U)
67/* The default hash table shift (hash size give as a power of two). */
68#define STRCACHE2_HASH_SHIFT 16
69/** Does the modding / masking of a hash number into an index. */
70#ifdef STRCACHE2_USE_MASK
71# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
72#else
73# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
74#endif
75
76
77/*******************************************************************************
78* Global Variables *
79*******************************************************************************/
80/* List of initialized string caches. */
81static struct strcache2 *strcache_head;
82
83
84/** Finds the closest primary number for power of two value (or something else
85 * useful if not support). */
86MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
87{
88 switch (shift)
89 {
90 case 5: return 31;
91 case 6: return 61;
92 case 7: return 127;
93 case 8: return 251;
94 case 9: return 509;
95 case 10: return 1021;
96 case 11: return 2039;
97 case 12: return 4093;
98 case 13: return 8191;
99 case 14: return 16381;
100 case 15: return 32749;
101 case 16: return 65521;
102 case 17: return 131063;
103 case 18: return 262139;
104 case 19: return 524269;
105 case 20: return 1048573;
106 case 21: return 2097143;
107 case 22: return 4194301;
108 case 23: return 8388593;
109
110 default:
111 assert (0);
112 return (1 << shift) - 1;
113 }
114}
115
116MY_INLINE unsigned int
117strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
118{
119#if 1
120 /* Paul Hsieh hash SuperFast function:
121 http://www.azillionmonkeys.com/qed/hash.html
122
123 This performs very good and as a sligtly better distribution than
124 STRING_N_HASH_1 on a typical kBuild run.
125
126 It is also 37% faster than return_STRING_N_HASH_1 when running the
127 two 100 times over typical kBuild strings that end up here (did a
128 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
129 with -O2.
130
131 FIXME: A path for well aligned data should be added to speed up
132 execution on alignment sensitive systems. */
133
134# if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
135 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
136# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
137# else
138 /* (endian doesn't matter) */
139# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
140 | (((const uint8_t *)(ptr))[1]) )
141# endif
142 uint32_t hash = len;
143 uint32_t tmp;
144 int rem;
145
146 assert (sizeof (uint8_t) == sizeof (char));
147 if (len == 0)
148 return 0;
149
150 /* main loop, walking on 2 x uint16_t */
151 rem = len & 3;
152 len >>= 2;
153 while (len > 0)
154 {
155 hash += strcache2_get_unaligned_16bits (str);
156 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
157 hash = (hash << 16) ^ tmp;
158 str += 2 * sizeof (uint16_t);
159 hash += hash >> 11;
160 len--;
161 }
162
163 /* the remainer */
164 switch (rem)
165 {
166 case 3:
167 hash += strcache2_get_unaligned_16bits (str);
168 hash ^= hash << 16;
169 hash ^= str[sizeof (uint16_t)] << 18;
170 hash += hash >> 11;
171 break;
172 case 2:
173 hash += strcache2_get_unaligned_16bits (str);
174 hash ^= hash << 11;
175 hash += hash >> 17;
176 break;
177 case 1:
178 hash += *str;
179 hash ^= hash << 10;
180 hash += hash >> 1;
181 break;
182 }
183
184 /* force "avalanching" of final 127 bits. */
185 hash ^= hash << 3;
186 hash += hash >> 5;
187 hash ^= hash << 4;
188 hash += hash >> 17;
189 hash ^= hash << 25;
190 hash += hash >> 6;
191
192 return hash;
193
194#elif 1
195 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
196 when running the two 100 times over typical kBuild strings that
197 end up here (did a fprintf here and built kBuild).
198 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
199
200 unsigned int hash = 0;
201 if (MY_PREDICT_TRUE(len >= 2))
202 {
203 unsigned int ch0 = *str++;
204 hash = 0;
205 len--;
206 while (len >= 2)
207 {
208 unsigned int ch1 = *str++;
209 hash += ch0 << (ch1 & 0xf);
210
211 ch0 = *str++;
212 hash += ch1 << (ch0 & 0xf);
213
214 len -= 2;
215 }
216 if (len == 1)
217 {
218 unsigned ch1 = *str;
219 hash += ch0 << (ch1 & 0xf);
220
221 hash += ch1;
222 }
223 else
224 hash += ch0;
225 }
226 else if (len)
227 {
228 hash = *str;
229 hash += hash << (hash & 0xf);
230 }
231 else
232 hash = 0;
233 return hash;
234
235#elif 1
236# if 0
237 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
238 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
239 first in the assignment made it noticably slower.)
240
241 However, it is noticably slower in practice, most likely because of more
242 collisions. Hrmpf. */
243
244# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
245 unsigned int hash = 0;
246
247# else
248 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
249 fast than return_STRING_N_HASH_1.
250
251 Ditto. */
252
253# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
254 unsigned int hash = 5381;
255# endif
256
257
258 while (len >= 4)
259 {
260 UPDATE_HASH (str[0]);
261 UPDATE_HASH (str[1]);
262 UPDATE_HASH (str[2]);
263 UPDATE_HASH (str[3]);
264 str += 4;
265 len -= 4;
266 }
267 switch (len)
268 {
269 default:
270 case 0:
271 return hash;
272 case 1:
273 UPDATE_HASH (str[0]);
274 return hash;
275 case 2:
276 UPDATE_HASH (str[0]);
277 UPDATE_HASH (str[1]);
278 return hash;
279 case 3:
280 UPDATE_HASH (str[0]);
281 UPDATE_HASH (str[1]);
282 UPDATE_HASH (str[2]);
283 return hash;
284 }
285#endif
286}
287
288MY_INLINE unsigned int
289strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
290{
291 unsigned int hash = 0;
292 if (MY_PREDICT_TRUE(len >= 2))
293 {
294 unsigned int ch0 = *str++;
295 ch0 = tolower (ch0);
296 hash = 0;
297 len--;
298 while (len >= 2)
299 {
300 unsigned int ch1 = *str++;
301 ch1 = tolower (ch1);
302 hash += ch0 << (ch1 & 0xf);
303
304 ch0 = *str++;
305 ch0 = tolower (ch0);
306 hash += ch1 << (ch0 & 0xf);
307
308 len -= 2;
309 }
310 if (len == 1)
311 {
312 unsigned ch1 = *str;
313 ch1 = tolower (ch1);
314 hash += ch0 << (ch1 & 0xf);
315
316 hash += ch1;
317 }
318 else
319 hash += ch0;
320 }
321 else if (len)
322 {
323 hash = *str;
324 hash += hash << (hash & 0xf);
325 }
326 else
327 hash = 0;
328 return hash;
329}
330
331MY_INLINE int
332strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
333{
334 if (length <= 8)
335 {
336 /* short string compare - ~50% of the kBuild calls. */
337 assert ( !((size_t)ys & 3) );
338 if (!((size_t)xs & 3))
339 {
340 /* aligned */
341 int result;
342 switch (length)
343 {
344 default: /* memcmp for longer strings */
345 return memcmp (xs, ys, length);
346 case 8:
347 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
348 result |= *(int32_t*)xs - *(int32_t*)ys;
349 return result;
350 case 7:
351 result = xs[6] - ys[6];
352 result |= xs[5] - ys[5];
353 result |= xs[4] - ys[4];
354 result |= *(int32_t*)xs - *(int32_t*)ys;
355 return result;
356 case 6:
357 result = xs[5] - ys[5];
358 result |= xs[4] - ys[4];
359 result |= *(int32_t*)xs - *(int32_t*)ys;
360 return result;
361 case 5:
362 result = xs[4] - ys[4];
363 result |= *(int32_t*)xs - *(int32_t*)ys;
364 return result;
365 case 4:
366 return *(int32_t*)xs - *(int32_t*)ys;
367 case 3:
368 result = xs[2] - ys[2];
369 result |= xs[1] - ys[1];
370 result |= xs[0] - ys[0];
371 return result;
372 case 2:
373 result = xs[1] - ys[1];
374 result |= xs[0] - ys[0];
375 return result;
376 case 1:
377 return *xs - *ys;
378 case 0:
379 return 0;
380 }
381 }
382 else
383 {
384 /* unaligned */
385 int result = 0;
386 switch (length)
387 {
388 case 8: result |= xs[7] - ys[7];
389 case 7: result |= xs[6] - ys[6];
390 case 6: result |= xs[5] - ys[5];
391 case 5: result |= xs[4] - ys[4];
392 case 4: result |= xs[3] - ys[3];
393 case 3: result |= xs[2] - ys[2];
394 case 2: result |= xs[1] - ys[1];
395 case 1: result |= xs[0] - ys[0];
396 case 0:
397 return result;
398 }
399 }
400 }
401
402 /* memcmp for longer strings */
403 return memcmp (xs, ys, length);
404}
405
406MY_INLINE int
407strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
408{
409#ifndef ELECTRIC_HEAP
410 assert ( !((size_t)ys & 3) );
411#endif
412 if (!((size_t)xs & 3))
413 {
414 int result;
415 /* aligned */
416 while (length >= 8)
417 {
418 result = *(int32_t*)xs - *(int32_t*)ys;
419 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
420 if (MY_PREDICT_FALSE(result))
421 return result;
422 xs += 8;
423 ys += 8;
424 length -= 8;
425 }
426 switch (length)
427 {
428 case 7:
429 result = *(int32_t*)xs - *(int32_t*)ys;
430 result |= xs[6] - ys[6];
431 result |= xs[5] - ys[5];
432 result |= xs[4] - ys[4];
433 return result;
434 case 6:
435 result = *(int32_t*)xs - *(int32_t*)ys;
436 result |= xs[5] - ys[5];
437 result |= xs[4] - ys[4];
438 return result;
439 case 5:
440 result = *(int32_t*)xs - *(int32_t*)ys;
441 result |= xs[4] - ys[4];
442 return result;
443 case 4:
444 return *(int32_t*)xs - *(int32_t*)ys;
445 case 3:
446 result = xs[2] - ys[2];
447 result |= xs[1] - ys[1];
448 result |= xs[0] - ys[0];
449 return result;
450 case 2:
451 result = xs[1] - ys[1];
452 result |= xs[0] - ys[0];
453 return result;
454 case 1:
455 return *xs - *ys;
456 default:
457 case 0:
458 return 0;
459 }
460 }
461 else
462 {
463 /* unaligned */
464 int result;
465 while (length >= 8)
466 {
467#if defined(__i386__) || defined(__x86_64__)
468 result = ( ((int32_t)xs[3] << 24)
469 | ((int32_t)xs[2] << 16)
470 | ((int32_t)xs[1] << 8)
471 | xs[0] )
472 - *(int32_t*)ys;
473 result |= ( ((int32_t)xs[7] << 24)
474 | ((int32_t)xs[6] << 16)
475 | ((int32_t)xs[5] << 8)
476 | xs[4] )
477 - *(int32_t*)(ys + 4);
478#else
479 result = xs[3] - ys[3];
480 result |= xs[2] - ys[2];
481 result |= xs[1] - ys[1];
482 result |= xs[0] - ys[0];
483 result |= xs[7] - ys[7];
484 result |= xs[6] - ys[6];
485 result |= xs[5] - ys[5];
486 result |= xs[4] - ys[4];
487#endif
488 if (MY_PREDICT_FALSE(result))
489 return result;
490 xs += 8;
491 ys += 8;
492 length -= 8;
493 }
494 result = 0;
495 switch (length)
496 {
497 case 7: result |= xs[6] - ys[6];
498 case 6: result |= xs[5] - ys[5];
499 case 5: result |= xs[4] - ys[4];
500 case 4: result |= xs[3] - ys[3];
501 case 3: result |= xs[2] - ys[2];
502 case 2: result |= xs[1] - ys[1];
503 case 1: result |= xs[0] - ys[0];
504 return result;
505 default:
506 case 0:
507 return 0;
508 }
509 }
510}
511
512MY_INLINE int
513strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
514 const char *str, unsigned int length, unsigned int hash)
515{
516 assert (!cache->case_insensitive);
517
518 /* the simple stuff first. */
519 if ( entry->hash != hash
520 || entry->length != length)
521 return 0;
522
523#if 0
524 return memcmp (str, entry + 1, length) == 0;
525#elif 1
526 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
527#else
528 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
529#endif
530}
531
532MY_INLINE int
533strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
534 const char *str, unsigned int length, unsigned int hash)
535{
536 assert (cache->case_insensitive);
537
538 /* the simple stuff first. */
539 if ( entry->hash != hash
540 || entry->length != length)
541 return 0;
542
543#if defined(_MSC_VER) || defined(__OS2__)
544 return _memicmp (entry + 1, str, length) == 0;
545#else
546 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
547#endif
548}
549
550static void
551strcache2_rehash (struct strcache2 *cache)
552{
553 unsigned int src = cache->hash_size;
554 struct strcache2_entry **src_tab = cache->hash_tab;
555 struct strcache2_entry **dst_tab;
556#ifndef STRCACHE2_USE_MASK
557 unsigned int hash_shift;
558#endif
559
560 /* Allocate a new hash table twice the size of the current. */
561 cache->hash_size <<= 1;
562#ifdef STRCACHE2_USE_MASK
563 cache->hash_mask <<= 1;
564 cache->hash_mask |= 1;
565#else
566 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
567 /* nothing */;
568 cache->hash_div = strcache2_find_prime (hash_shift);
569#endif
570 cache->rehash_count <<= 1;
571 cache->hash_tab = dst_tab = (struct strcache2_entry **)
572 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
573 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
574
575 /* Copy the entries from the old to the new hash table. */
576 cache->collision_count = 0;
577 while (src-- > 0)
578 {
579 struct strcache2_entry *entry = src_tab[src];
580 while (entry)
581 {
582 struct strcache2_entry *next = entry->next;
583 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
584 if ((entry->next = dst_tab[dst]) != 0)
585 cache->collision_count++;
586 dst_tab[dst] = entry;
587
588 entry = next;
589 }
590 }
591
592 /* That's it, just free the old table and we're done. */
593 free (src_tab);
594}
595
596static struct strcache2_seg *
597strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
598{
599 struct strcache2_seg *seg;
600 size_t size;
601 size_t off;
602
603 size = cache->def_seg_size;
604 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
605 {
606 size = (size_t)minlen * 2;
607 size = (size + 0xfff) & ~(size_t)0xfff;
608 }
609
610 seg = xmalloc (size);
611 seg->start = (char *)(seg + 1);
612 seg->size = size - sizeof (struct strcache2_seg);
613 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
614 if (off)
615 {
616 seg->start += off;
617 seg->size -= off;
618 }
619 assert (seg->size > minlen);
620 seg->cursor = seg->start;
621 seg->avail = seg->size;
622
623 seg->next = cache->seg_head;
624 cache->seg_head = seg;
625
626 return seg;
627}
628
629/* Internal worker that enters a new string into the cache. */
630static const char *
631strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
632 const char *str, unsigned int length,
633 unsigned int hash)
634{
635 struct strcache2_entry *entry;
636 struct strcache2_seg *seg;
637 unsigned int size;
638 char *str_copy;
639
640 /* Allocate space for the string. */
641
642 size = length + 1 + sizeof (struct strcache2_entry);
643 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
644
645 seg = cache->seg_head;
646 if (MY_PREDICT_FALSE(seg->avail < size))
647 {
648 do
649 seg = seg->next;
650 while (seg && seg->avail < size);
651 if (!seg)
652 seg = strcache2_new_seg (cache, size);
653 }
654
655 entry = (struct strcache2_entry *) seg->cursor;
656 assert ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1));
657 seg->cursor += size;
658 seg->avail -= size;
659
660 /* Setup the entry, copy the string and insert it into the hash table. */
661
662 entry->user = NULL;
663 entry->length = length;
664 entry->hash = hash;
665 str_copy = (char *) memcpy (entry + 1, str, length);
666 str_copy[length] = '\0';
667
668 if ((entry->next = cache->hash_tab[idx]) != 0)
669 cache->collision_count++;
670 cache->hash_tab[idx] = entry;
671 cache->count++;
672 if (cache->count >= cache->rehash_count)
673 strcache2_rehash (cache);
674
675 return str_copy;
676}
677
678/* The public add string interface. */
679const char *
680strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
681{
682 struct strcache2_entry const *entry;
683 unsigned int hash = strcache2_case_sensitive_hash_1 (str, length);
684 unsigned int idx;
685
686 assert (!cache->case_insensitive);
687
688 cache->lookup_count++;
689
690 /* Lookup the entry in the hash table, hoping for an
691 early match. If not found, enter the string at IDX. */
692 idx = STRCACHE2_MOD_IT (cache, hash);
693 entry = cache->hash_tab[idx];
694 if (!entry)
695 return strcache2_enter_string (cache, idx, str, length, hash);
696 if (strcache2_is_equal (cache, entry, str, length, hash))
697 return (const char *)(entry + 1);
698 cache->collision_1st_count++;
699
700 entry = entry->next;
701 if (!entry)
702 return strcache2_enter_string (cache, idx, str, length, hash);
703 if (strcache2_is_equal (cache, entry, str, length, hash))
704 return (const char *)(entry + 1);
705 cache->collision_2nd_count++;
706
707 /* Loop the rest. */
708 for (;;)
709 {
710 entry = entry->next;
711 if (!entry)
712 return strcache2_enter_string (cache, idx, str, length, hash);
713 if (strcache2_is_equal (cache, entry, str, length, hash))
714 return (const char *)(entry + 1);
715 cache->collision_3rd_count++;
716 }
717 /* not reached */
718}
719
720/* The public add string interface for prehashed strings.
721 Use strcache2_hash_str to calculate the hash of a string. */
722const char *
723strcache2_add_hashed (struct strcache2 *cache, const char *str,
724 unsigned int length, unsigned int hash)
725{
726 struct strcache2_entry const *entry;
727 unsigned int idx;
728#ifndef NDEBUG
729 unsigned correct_hash;
730
731 assert (!cache->case_insensitive);
732 correct_hash = strcache2_case_sensitive_hash_1 (str, length);
733 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
734#endif /* NDEBUG */
735
736 cache->lookup_count++;
737
738 /* Lookup the entry in the hash table, hoping for an
739 early match. If not found, enter the string at IDX. */
740 idx = STRCACHE2_MOD_IT (cache, hash);
741 entry = cache->hash_tab[idx];
742 if (!entry)
743 return strcache2_enter_string (cache, idx, str, length, hash);
744 if (strcache2_is_equal (cache, entry, str, length, hash))
745 return (const char *)(entry + 1);
746 cache->collision_1st_count++;
747
748 entry = entry->next;
749 if (!entry)
750 return strcache2_enter_string (cache, idx, str, length, hash);
751 if (strcache2_is_equal (cache, entry, str, length, hash))
752 return (const char *)(entry + 1);
753 cache->collision_2nd_count++;
754
755 /* Loop the rest. */
756 for (;;)
757 {
758 entry = entry->next;
759 if (!entry)
760 return strcache2_enter_string (cache, idx, str, length, hash);
761 if (strcache2_is_equal (cache, entry, str, length, hash))
762 return (const char *)(entry + 1);
763 cache->collision_3rd_count++;
764 }
765 /* not reached */
766}
767
768/* The public lookup (case sensitive) string interface. */
769const char *
770strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
771{
772 struct strcache2_entry const *entry;
773 unsigned int hash = strcache2_case_sensitive_hash_1 (str, length);
774 unsigned int idx;
775
776 assert (!cache->case_insensitive);
777
778 cache->lookup_count++;
779
780 /* Lookup the entry in the hash table, hoping for an
781 early match. */
782 idx = STRCACHE2_MOD_IT (cache, hash);
783 entry = cache->hash_tab[idx];
784 if (!entry)
785 return NULL;
786 if (strcache2_is_equal (cache, entry, str, length, hash))
787 return (const char *)(entry + 1);
788 cache->collision_1st_count++;
789
790 entry = entry->next;
791 if (!entry)
792 return NULL;
793 if (strcache2_is_equal (cache, entry, str, length, hash))
794 return (const char *)(entry + 1);
795 cache->collision_2nd_count++;
796
797 /* Loop the rest. */
798 for (;;)
799 {
800 entry = entry->next;
801 if (!entry)
802 return NULL;
803 if (strcache2_is_equal (cache, entry, str, length, hash))
804 return (const char *)(entry + 1);
805 cache->collision_3rd_count++;
806 }
807 /* not reached */
808}
809
810#if defined(HAVE_CASE_INSENSITIVE_FS)
811
812/* The public add string interface for case insensitive strings. */
813const char *
814strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
815{
816 struct strcache2_entry const *entry;
817 unsigned int hash = strcache2_case_insensitive_hash_1 (str, length);
818 unsigned int idx;
819
820 assert (!cache->case_insensitive);
821
822 cache->lookup_count++;
823
824 /* Lookup the entry in the hash table, hoping for an
825 early match. If not found, enter the string at IDX. */
826 idx = STRCACHE2_MOD_IT (cache, hash);
827 entry = cache->hash_tab[idx];
828 if (!entry)
829 return strcache2_enter_string (cache, idx, str, length, hash);
830 if (strcache2_is_equal (cache, entry, str, length, hash))
831 return (const char *)(entry + 1);
832 cache->collision_1st_count++;
833
834 entry = entry->next;
835 if (!entry)
836 return strcache2_enter_string (cache, idx, str, length, hash);
837 if (strcache2_is_equal (cache, entry, str, length, hash))
838 return (const char *)(entry + 1);
839 cache->collision_2nd_count++;
840
841 /* Loop the rest. */
842 for (;;)
843 {
844 entry = entry->next;
845 if (!entry)
846 return strcache2_enter_string (cache, idx, str, length, hash);
847 if (strcache2_is_equal (cache, entry, str, length, hash))
848 return (const char *)(entry + 1);
849 cache->collision_3rd_count++;
850 }
851 /* not reached */
852}
853
854/* The public add string interface for prehashed case insensitive strings.
855 Use strcache2_hash_istr to calculate the hash of a string. */
856const char *
857strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
858 unsigned int length, unsigned int hash)
859{
860 struct strcache2_entry const *entry;
861 unsigned int idx;
862#ifndef NDEBUG
863 unsigned correct_hash;
864
865 assert (!cache->case_insensitive);
866 correct_hash = strcache2_case_insensitive_hash_1 (str, length);
867 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
868#endif /* NDEBUG */
869
870 cache->lookup_count++;
871
872 /* Lookup the entry in the hash table, hoping for an
873 early match. If not found, enter the string at IDX. */
874 idx = STRCACHE2_MOD_IT (cache, hash);
875 entry = cache->hash_tab[idx];
876 if (!entry)
877 return strcache2_enter_string (cache, idx, str, length, hash);
878 if (strcache2_is_equal (cache, entry, str, length, hash))
879 return (const char *)(entry + 1);
880 cache->collision_1st_count++;
881
882 entry = entry->next;
883 if (!entry)
884 return strcache2_enter_string (cache, idx, str, length, hash);
885 if (strcache2_is_equal (cache, entry, str, length, hash))
886 return (const char *)(entry + 1);
887 cache->collision_2nd_count++;
888
889 /* Loop the rest. */
890 for (;;)
891 {
892 entry = entry->next;
893 if (!entry)
894 return strcache2_enter_string (cache, idx, str, length, hash);
895 if (strcache2_is_equal (cache, entry, str, length, hash))
896 return (const char *)(entry + 1);
897 cache->collision_3rd_count++;
898 }
899 /* not reached */
900}
901
902/* The public lookup (case insensitive) string interface. */
903const char *
904strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
905{
906 struct strcache2_entry const *entry;
907 unsigned int hash = strcache2_case_insensitive_hash_1 (str, length);
908 unsigned int idx;
909
910 assert (!cache->case_insensitive);
911
912 cache->lookup_count++;
913
914 /* Lookup the entry in the hash table, hoping for an
915 early match. */
916 idx = STRCACHE2_MOD_IT (cache, hash);
917 entry = cache->hash_tab[idx];
918 if (!entry)
919 return NULL;
920 if (strcache2_is_equal (cache, entry, str, length, hash))
921 return (const char *)(entry + 1);
922 cache->collision_1st_count++;
923
924 entry = entry->next;
925 if (!entry)
926 return NULL;
927 if (strcache2_is_equal (cache, entry, str, length, hash))
928 return (const char *)(entry + 1);
929 cache->collision_2nd_count++;
930
931 /* Loop the rest. */
932 for (;;)
933 {
934 entry = entry->next;
935 if (!entry)
936 return NULL;
937 if (strcache2_is_equal (cache, entry, str, length, hash))
938 return (const char *)(entry + 1);
939 cache->collision_3rd_count++;
940 }
941 /* not reached */
942}
943
944#endif /* HAVE_CASE_INSENSITIVE_FS */
945
946/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
947int
948strcache2_is_cached (struct strcache2 *cache, const char *str)
949{
950 /* Check mandatory alignment first. */
951 if (!((size_t)str & (sizeof (void *) - 1)))
952 {
953 /* Check the segment list and consider the question answered if the
954 string is within one of them. (Could check it more thoroughly...) */
955 struct strcache2_seg const *seg;
956 for (seg = cache->seg_head; seg; seg = seg->next)
957 if ((size_t)(str - seg->start) < seg->size)
958 return 1;
959 }
960
961 return 0;
962}
963
964
965/* Verify the integrity of the specified string, returning 0 if OK. */
966int
967strcache2_verify_entry (struct strcache2 *cache, const char *str)
968{
969 struct strcache2_entry const *entry;
970 unsigned hash;
971 const char *end;
972
973 if ((size_t)str & (sizeof (void *) - 1))
974 {
975 fprintf (stderr,
976 "strcache2[%s]: missaligned string %p\nstring: %s\n",
977 cache->name, (void *)str, str);
978 return -1;
979 }
980
981 entry = (struct strcache2_entry const *)str - 1;
982 end = memchr (str, '\0', entry->length);
983 if ((size_t)(end - str) == entry->length)
984 {
985 fprintf (stderr,
986 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
987 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
988 return -1;
989 }
990
991 hash = cache->case_insensitive
992 ? strcache2_case_insensitive_hash_1 (str, entry->length)
993 : strcache2_case_sensitive_hash_1 (str, entry->length);
994 if (hash != entry->hash)
995 {
996 fprintf (stderr,
997 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
998 cache->name, (void *)entry, hash, entry->hash, str);
999 return -1;
1000 }
1001
1002 return 0;
1003}
1004
1005
1006/* Calculates the case sensitive hash values of the string.
1007 The first hash is returned, the other is put at HASH2P. */
1008unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1009{
1010 *hash2p = 1;
1011 return strcache2_case_sensitive_hash_1 (str, length);
1012}
1013
1014/* Calculates the case insensitive hash values of the string.
1015 The first hash is returned, the other is put at HASH2P. */
1016unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1017{
1018 *hash2p = 1;
1019 return strcache2_case_insensitive_hash_1 (str, length);
1020}
1021
1022
1023
1024/* Initalizes a new cache. */
1025void
1026strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1027 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1028{
1029 unsigned hash_shift;
1030 assert (!thread_safe);
1031
1032 /* calc the size as a power of two */
1033 if (!size)
1034 hash_shift = STRCACHE2_HASH_SHIFT;
1035 else
1036 {
1037 assert (size <= (~0U / 2 + 1));
1038 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1039 /* nothing */;
1040 }
1041
1042 /* adjust the default segment size */
1043 if (!def_seg_size)
1044 def_seg_size = STRCACHE2_SEG_SIZE;
1045 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1046 def_seg_size = sizeof (struct strcache2_seg) * 10;
1047 else if ((def_seg_size & 0xfff) < 0xf00)
1048 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1049
1050
1051 /* init the structure. */
1052 cache->case_insensitive = case_insensitive;
1053#ifdef STRCACHE2_USE_MASK
1054 cache->hash_mask = (1U << hash_shift) - 1U;
1055#else
1056 cache->hash_div = strcache2_find_prime(hash_shift);
1057#endif
1058 cache->count = 0;
1059 cache->collision_count = 0;
1060 cache->lookup_count = 0;
1061 cache->collision_1st_count = 0;
1062 cache->collision_2nd_count = 0;
1063 cache->collision_3rd_count = 0;
1064 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1065 cache->init_size = 1U << hash_shift;
1066 cache->hash_size = 1U << hash_shift;
1067 cache->def_seg_size = def_seg_size;
1068 cache->lock = NULL;
1069 cache->name = name;
1070
1071 /* allocate the hash table and first segment. */
1072 cache->hash_tab = (struct strcache2_entry **)
1073 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1074 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1075 strcache2_new_seg (cache, 0);
1076
1077 /* link it */
1078 cache->next = strcache_head;
1079 strcache_head = cache;
1080}
1081
1082
1083/* Terminates a string cache, freeing all memory and other
1084 associated resources. */
1085void
1086strcache2_term (struct strcache2 *cache)
1087{
1088 /* unlink it */
1089 if (strcache_head == cache)
1090 strcache_head = cache->next;
1091 else
1092 {
1093 struct strcache2 *prev = strcache_head;
1094 while (prev->next != cache)
1095 prev = prev->next;
1096 assert (prev);
1097 prev->next = cache->next;
1098 }
1099
1100 /* free the memory segments */
1101 do
1102 {
1103 void *free_it = cache->seg_head;
1104 cache->seg_head = cache->seg_head->next;
1105 free (free_it);
1106 }
1107 while (cache->seg_head);
1108
1109 /* free the hash and clear the structure. */
1110 free (cache->hash_tab);
1111 memset (cache, '\0', sizeof (struct strcache2));
1112}
1113
1114/* Print statistics a string cache. */
1115void
1116strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1117{
1118 unsigned int seg_count = 0;
1119 unsigned long seg_total_bytes = 0;
1120 unsigned long seg_avg_bytes;
1121 unsigned long seg_avail_bytes = 0;
1122 unsigned long seg_max_bytes = 0;
1123 struct strcache2_seg *seg;
1124 unsigned int str_count = 0;
1125 unsigned long str_total_len = 0;
1126 unsigned int str_avg_len;
1127 unsigned int str_min_len = ~0U;
1128 unsigned int str_max_len = 0;
1129 unsigned int idx;
1130 unsigned int rehashes;
1131 unsigned int chain_depths[32];
1132
1133 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1134
1135 /* Segment statistics. */
1136 for (seg = cache->seg_head; seg; seg = seg->next)
1137 {
1138 seg_count++;
1139 seg_total_bytes += seg->size;
1140 seg_avail_bytes += seg->avail;
1141 if (seg->size > seg_max_bytes)
1142 seg_max_bytes = seg->size;
1143 }
1144 seg_avg_bytes = seg_total_bytes / seg_count;
1145 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1146 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1147 cache->def_seg_size, seg_avail_bytes);
1148
1149 /* String statistics. */
1150 memset (chain_depths, '\0', sizeof (chain_depths));
1151 idx = cache->hash_size;
1152 while (idx-- > 0)
1153 {
1154 struct strcache2_entry const *entry = cache->hash_tab[idx];
1155 unsigned int depth = 0;
1156 for (; entry != 0; entry = entry->next, depth++)
1157 {
1158 unsigned int length = entry->length;
1159 str_total_len += length;
1160 if (length > str_max_len)
1161 str_max_len = length;
1162 if (length < str_min_len)
1163 str_min_len = length;
1164 str_count++;
1165 }
1166 chain_depths[depth >= 32 ? 31 : depth]++;
1167 }
1168 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1169 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1170 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1171 if (str_count != cache->count)
1172 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1173 cache->count, str_count);
1174
1175 /* Hash statistics. */
1176 idx = cache->init_size;
1177 rehashes = 0;
1178 while (idx < cache->hash_size)
1179 {
1180 idx *= 2;
1181 rehashes++;
1182 }
1183
1184#ifdef STRCACHE2_USE_MASK
1185 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1186 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1187#else
1188 printf (_("%s hash size = %u div = %#x rehashed %u times lookups = %lu\n"),
1189 prefix, cache->hash_size, cache->hash_div, rehashes, cache->lookup_count);
1190#endif
1191 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1192 prefix,
1193 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1194 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1195 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1196 printf (_("%s hash insert collisions = %u (%u%%)\n"),
1197 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1198 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1199 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1200 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1201 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1202 for (idx = 2; idx < 32; idx++)
1203 {
1204 unsigned strs_at_this_depth = chain_depths[idx];
1205 unsigned i;
1206 for (i = idx + 1; i < 32; i++)
1207 strs_at_this_depth += chain_depths[i];
1208 if (strs_at_this_depth)
1209 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1210 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1211 idx - 1, idx == 2 ? " " : "s",
1212 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1213 }
1214}
1215
1216/* Print statistics for all string caches. */
1217void
1218strcache2_print_stats_all (const char *prefix)
1219{
1220 struct strcache2 *cur;
1221 for (cur = strcache_head; cur; cur = cur->next)
1222 strcache2_print_stats (cur, prefix);
1223}
1224
Note: See TracBrowser for help on using the repository browser.