source: trunk/src/kmk/strcache2.c@ 1910

Last change on this file since 1910 was 1910, checked in by bird, 17 years ago

strcache2: switch to Paul Hsieh's superfash hash function (finally found one which is both faster and better). Implemented rehashing of chained hashing.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 43.2 KB
Line 
1/* $Id: strcache2.c 1910 2008-10-22 19:50:39Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef _MSC_VER
39typedef unsigned char uint8_t;
40typedef unsigned short uint16_t;
41typedef unsigned int uint32_t;
42#else
43# include <stdint.h>
44#endif
45
46#ifdef WINDOWS32
47# include <io.h>
48# include <process.h>
49# include <Windows.h>
50# define PARSE_IN_WORKER
51#endif
52
53#ifdef __OS2__
54# include <sys/fmutex.h>
55#endif
56
57#ifdef HAVE_PTHREAD
58# include <pthread.h>
59#endif
60
61
62/*******************************************************************************
63* Defined Constants And Macros *
64*******************************************************************************/
65/* The default size of a memory segment (1MB). */
66#define STRCACHE2_SEG_SIZE (1024U*1024U)
67/* The default hash table shift (hash size give as a power of two). */
68#define STRCACHE2_HASH_SHIFT 16
69/** Does the modding / masking of a hash number into an index. */
70#ifdef STRCACHE2_USE_MASK
71# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
72#else
73# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
74#endif
75
76
77/*******************************************************************************
78* Global Variables *
79*******************************************************************************/
80/* List of initialized string caches. */
81static struct strcache2 *strcache_head;
82
83
84/** Finds the closest primary number for power of two value (or something else
85 * useful if not support). */
86MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
87{
88 switch (shift)
89 {
90 case 5: return 31;
91 case 6: return 61;
92 case 7: return 127;
93 case 8: return 251;
94 case 9: return 509;
95 case 10: return 1021;
96 case 11: return 2039;
97 case 12: return 4093;
98 case 13: return 8191;
99 case 14: return 16381;
100 case 15: return 32749;
101 case 16: return 65521;
102 case 17: return 131063;
103 case 18: return 262139;
104 case 19: return 524269;
105 case 20: return 1048573;
106 case 21: return 2097143;
107 case 22: return 4194301;
108 case 23: return 8388593;
109
110 default:
111 assert (0);
112 return (1 << shift) - 1;
113 }
114}
115
116MY_INLINE unsigned int
117strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
118{
119#if 1
120 /* Paul Hsieh hash SuperFast function:
121 http://www.azillionmonkeys.com/qed/hash.html
122
123 This performs very good and as a sligtly better distribution than
124 STRING_N_HASH_1 on a typical kBuild run.
125
126 It is also 37% faster than return_STRING_N_HASH_1 when running the
127 two 100 times over typical kBuild strings that end up here (did a
128 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
129 with -O2.
130
131 FIXME: A path for well aligned data should be added to speed up
132 execution on alignment sensitive systems. */
133
134# if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
135 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
136# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
137# else
138 /* (endian doesn't matter) */
139# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
140 | (((const uint8_t *)(ptr))[1]) )
141# endif
142 uint32_t hash = len;
143 uint32_t tmp;
144 int rem;
145
146 assert (sizeof (uint8_t) == sizeof (char));
147 if (len == 0)
148 return 0;
149
150 /* main loop, walking on 2 x uint16_t */
151 rem = len & 3;
152 len >>= 2;
153 while (len > 0)
154 {
155 hash += strcache2_get_unaligned_16bits (str);
156 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
157 hash = (hash << 16) ^ tmp;
158 str += 2 * sizeof (uint16_t);
159 hash += hash >> 11;
160 len--;
161 }
162
163 /* the remainer */
164 switch (rem)
165 {
166 case 3:
167 hash += strcache2_get_unaligned_16bits (str);
168 hash ^= hash << 16;
169 hash ^= str[sizeof (uint16_t)] << 18;
170 hash += hash >> 11;
171 break;
172 case 2:
173 hash += strcache2_get_unaligned_16bits (str);
174 hash ^= hash << 11;
175 hash += hash >> 17;
176 break;
177 case 1:
178 hash += *str;
179 hash ^= hash << 10;
180 hash += hash >> 1;
181 break;
182 }
183
184 /* force "avalanching" of final 127 bits. */
185 hash ^= hash << 3;
186 hash += hash >> 5;
187 hash ^= hash << 4;
188 hash += hash >> 17;
189 hash ^= hash << 25;
190 hash += hash >> 6;
191
192 return hash;
193
194#elif 1
195 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
196 when running the two 100 times over typical kBuild strings that
197 end up here (did a fprintf here and built kBuild).
198 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
199
200 unsigned int hash = 0;
201 if (MY_PREDICT_TRUE(len >= 2))
202 {
203 unsigned int ch0 = *str++;
204 hash = 0;
205 len--;
206 while (len >= 2)
207 {
208 unsigned int ch1 = *str++;
209 hash += ch0 << (ch1 & 0xf);
210
211 ch0 = *str++;
212 hash += ch1 << (ch0 & 0xf);
213
214 len -= 2;
215 }
216 if (len == 1)
217 {
218 unsigned ch1 = *str;
219 hash += ch0 << (ch1 & 0xf);
220
221 hash += ch1;
222 }
223 else
224 hash += ch0;
225 }
226 else if (len)
227 {
228 hash = *str;
229 hash += hash << (hash & 0xf);
230 }
231 else
232 hash = 0;
233 return hash;
234
235#elif 1
236# if 0
237 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
238 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
239 first in the assignment made it noticably slower.)
240
241 However, it is noticably slower in practice, most likely because of more
242 collisions. Hrmpf. */
243
244# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
245 unsigned int hash = 0;
246
247# else
248 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
249 fast than return_STRING_N_HASH_1.
250
251 Ditto. */
252
253# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
254 unsigned int hash = 5381;
255# endif
256
257
258 while (len >= 4)
259 {
260 UPDATE_HASH (str[0]);
261 UPDATE_HASH (str[1]);
262 UPDATE_HASH (str[2]);
263 UPDATE_HASH (str[3]);
264 str += 4;
265 len -= 4;
266 }
267 switch (len)
268 {
269 default:
270 case 0:
271 return hash;
272 case 1:
273 UPDATE_HASH (str[0]);
274 return hash;
275 case 2:
276 UPDATE_HASH (str[0]);
277 UPDATE_HASH (str[1]);
278 return hash;
279 case 3:
280 UPDATE_HASH (str[0]);
281 UPDATE_HASH (str[1]);
282 UPDATE_HASH (str[2]);
283 return hash;
284 }
285#endif
286}
287
288MY_INLINE unsigned int
289strcache2_case_sensitive_hash_2 (const char *str, unsigned int len)
290{
291#ifndef STRCACHE2_USE_CHAINING
292 unsigned int hash = 0;
293 if (MY_PREDICT_TRUE(len >= 2))
294 {
295 unsigned int ch0 = *str++;
296 hash = 0;
297 len--;
298 while (len >= 2)
299 {
300 unsigned int ch1 = *str++;
301 hash += ch0 << (ch1 & 0x7);
302
303 ch0 = *str++;
304 hash += ch1 << (ch0 & 0x7);
305
306 len -= 2;
307 }
308 if (len == 1)
309 {
310 unsigned ch1 = *str;
311 hash += ch0 << (ch1 & 0x7);
312
313 hash += ch1;
314 }
315 else
316 hash += ch0;
317 }
318 else if (len)
319 {
320 hash = *str;
321 hash += hash << (hash & 0x7);
322 }
323 else
324 hash = 1;
325 hash |= 1;
326 return hash;
327#else /* STRCACHE2_USE_CHAINING */
328 return 1;
329#endif /* STRCACHE2_USE_CHAINING */
330}
331
332MY_INLINE unsigned int
333strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
334{
335 unsigned int hash = 0;
336 if (MY_PREDICT_TRUE(len >= 2))
337 {
338 unsigned int ch0 = *str++;
339 ch0 = tolower (ch0);
340 hash = 0;
341 len--;
342 while (len >= 2)
343 {
344 unsigned int ch1 = *str++;
345 ch1 = tolower (ch1);
346 hash += ch0 << (ch1 & 0xf);
347
348 ch0 = *str++;
349 ch0 = tolower (ch0);
350 hash += ch1 << (ch0 & 0xf);
351
352 len -= 2;
353 }
354 if (len == 1)
355 {
356 unsigned ch1 = *str;
357 ch1 = tolower (ch1);
358 hash += ch0 << (ch1 & 0xf);
359
360 hash += ch1;
361 }
362 else
363 hash += ch0;
364 }
365 else if (len)
366 {
367 hash = *str;
368 hash += hash << (hash & 0xf);
369 }
370 else
371 hash = 0;
372 return hash;
373}
374
375MY_INLINE unsigned int
376strcache2_case_insensitive_hash_2 (const char *str, unsigned int len)
377{
378#ifndef STRCACHE2_USE_CHAINING
379 unsigned int hash = 0;
380 if (MY_PREDICT_TRUE(len >= 2))
381 {
382 unsigned int ch0 = *str++;
383 ch0 = tolower (ch0);
384 hash = 0;
385 len--;
386 while (len >= 2)
387 {
388 unsigned int ch1 = *str++;
389 ch1 = tolower (ch1);
390 hash += ch0 << (ch1 & 0x7);
391
392 ch0 = *str++;
393 ch0 = tolower (ch0);
394 hash += ch1 << (ch0 & 0x7);
395
396 len -= 2;
397 }
398 if (len == 1)
399 {
400 unsigned ch1 = *str;
401 ch1 = tolower (ch1);
402 hash += ch0 << (ch1 & 0x7);
403
404 hash += ch1;
405 }
406 else
407 hash += ch0;
408 }
409 else if (len)
410 {
411 hash = *str;
412 hash += hash << (hash & 0x7);
413 }
414 else
415 hash = 1;
416 hash |= 1;
417 return hash;
418#else /* STRCACHE2_USE_CHAINING */
419 return 1;
420#endif /* STRCACHE2_USE_CHAINING */
421}
422
423MY_INLINE int
424strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
425{
426 if (length <= 8)
427 {
428 /* short string compare - ~50% of the kBuild calls. */
429 assert ( !((size_t)ys & 3) );
430 if (!((size_t)xs & 3))
431 {
432 /* aligned */
433 int result;
434 switch (length)
435 {
436 default: /* memcmp for longer strings */
437 return memcmp (xs, ys, length);
438 case 8:
439 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
440 result |= *(int32_t*)xs - *(int32_t*)ys;
441 return result;
442 case 7:
443 result = xs[6] - ys[6];
444 result |= xs[5] - ys[5];
445 result |= xs[4] - ys[4];
446 result |= *(int32_t*)xs - *(int32_t*)ys;
447 return result;
448 case 6:
449 result = xs[5] - ys[5];
450 result |= xs[4] - ys[4];
451 result |= *(int32_t*)xs - *(int32_t*)ys;
452 return result;
453 case 5:
454 result = xs[4] - ys[4];
455 result |= *(int32_t*)xs - *(int32_t*)ys;
456 return result;
457 case 4:
458 return *(int32_t*)xs - *(int32_t*)ys;
459 case 3:
460 result = xs[2] - ys[2];
461 result |= xs[1] - ys[1];
462 result |= xs[0] - ys[0];
463 return result;
464 case 2:
465 result = xs[1] - ys[1];
466 result |= xs[0] - ys[0];
467 return result;
468 case 1:
469 return *xs - *ys;
470 case 0:
471 return 0;
472 }
473 }
474 else
475 {
476 /* unaligned */
477 int result = 0;
478 switch (length)
479 {
480 case 8: result |= xs[7] - ys[7];
481 case 7: result |= xs[6] - ys[6];
482 case 6: result |= xs[5] - ys[5];
483 case 5: result |= xs[4] - ys[4];
484 case 4: result |= xs[3] - ys[3];
485 case 3: result |= xs[2] - ys[2];
486 case 2: result |= xs[1] - ys[1];
487 case 1: result |= xs[0] - ys[0];
488 case 0:
489 return result;
490 }
491 }
492 }
493
494 /* memcmp for longer strings */
495 return memcmp (xs, ys, length);
496}
497
498MY_INLINE int
499strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
500{
501#ifndef ELECTRIC_HEAP
502 assert ( !((size_t)ys & 3) );
503#endif
504 if (!((size_t)xs & 3))
505 {
506 int result;
507 /* aligned */
508 while (length >= 8)
509 {
510 result = *(int32_t*)xs - *(int32_t*)ys;
511 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
512 if (MY_PREDICT_FALSE(result))
513 return result;
514 xs += 8;
515 ys += 8;
516 length -= 8;
517 }
518 switch (length)
519 {
520 case 7:
521 result = *(int32_t*)xs - *(int32_t*)ys;
522 result |= xs[6] - ys[6];
523 result |= xs[5] - ys[5];
524 result |= xs[4] - ys[4];
525 return result;
526 case 6:
527 result = *(int32_t*)xs - *(int32_t*)ys;
528 result |= xs[5] - ys[5];
529 result |= xs[4] - ys[4];
530 return result;
531 case 5:
532 result = *(int32_t*)xs - *(int32_t*)ys;
533 result |= xs[4] - ys[4];
534 return result;
535 case 4:
536 return *(int32_t*)xs - *(int32_t*)ys;
537 case 3:
538 result = xs[2] - ys[2];
539 result |= xs[1] - ys[1];
540 result |= xs[0] - ys[0];
541 return result;
542 case 2:
543 result = xs[1] - ys[1];
544 result |= xs[0] - ys[0];
545 return result;
546 case 1:
547 return *xs - *ys;
548 default:
549 case 0:
550 return 0;
551 }
552 }
553 else
554 {
555 /* unaligned */
556 int result;
557 while (length >= 8)
558 {
559#if defined(__i386__) || defined(__x86_64__)
560 result = ( ((int32_t)xs[3] << 24)
561 | ((int32_t)xs[2] << 16)
562 | ((int32_t)xs[1] << 8)
563 | xs[0] )
564 - *(int32_t*)ys;
565 result |= ( ((int32_t)xs[7] << 24)
566 | ((int32_t)xs[6] << 16)
567 | ((int32_t)xs[5] << 8)
568 | xs[4] )
569 - *(int32_t*)(ys + 4);
570#else
571 result = xs[3] - ys[3];
572 result |= xs[2] - ys[2];
573 result |= xs[1] - ys[1];
574 result |= xs[0] - ys[0];
575 result |= xs[7] - ys[7];
576 result |= xs[6] - ys[6];
577 result |= xs[5] - ys[5];
578 result |= xs[4] - ys[4];
579#endif
580 if (MY_PREDICT_FALSE(result))
581 return result;
582 xs += 8;
583 ys += 8;
584 length -= 8;
585 }
586 result = 0;
587 switch (length)
588 {
589 case 7: result |= xs[6] - ys[6];
590 case 6: result |= xs[5] - ys[5];
591 case 5: result |= xs[4] - ys[4];
592 case 4: result |= xs[3] - ys[3];
593 case 3: result |= xs[2] - ys[2];
594 case 2: result |= xs[1] - ys[1];
595 case 1: result |= xs[0] - ys[0];
596 return result;
597 default:
598 case 0:
599 return 0;
600 }
601 }
602}
603
604MY_INLINE int
605strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
606 const char *str, unsigned int length, unsigned int hash1)
607{
608 assert (!cache->case_insensitive);
609
610 /* the simple stuff first. */
611 if ( entry->hash1 != hash1
612 || entry->length != length)
613 return 0;
614
615#if 0
616 return memcmp (str, entry + 1, length) == 0;
617#elif 1
618 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
619#else
620 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
621#endif
622}
623
624MY_INLINE int
625strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
626 const char *str, unsigned int length, unsigned int hash1)
627{
628 assert (cache->case_insensitive);
629
630 /* the simple stuff first. */
631 if ( entry->hash1 != hash1
632 || entry->length != length)
633 return 0;
634
635#if defined(_MSC_VER) || defined(__OS2__)
636 return _memicmp (entry + 1, str, length) == 0;
637#else
638 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
639#endif
640}
641
642static void
643strcache2_rehash (struct strcache2 *cache)
644{
645 unsigned int src = cache->hash_size;
646 struct strcache2_entry **src_tab = cache->hash_tab;
647 struct strcache2_entry **dst_tab;
648#ifndef STRCACHE2_USE_MASK
649 unsigned int hash_shift;
650#endif
651
652 /* Allocate a new hash table twice the size of the current. */
653 cache->hash_size <<= 1;
654#ifdef STRCACHE2_USE_MASK
655 cache->hash_mask <<= 1;
656 cache->hash_mask |= 1;
657#else
658 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
659 /* nothing */;
660 cache->hash_div = strcache2_find_prime (hash_shift);
661#endif
662 cache->rehash_count <<= 1;
663 cache->hash_tab = dst_tab = (struct strcache2_entry **)
664 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
665 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
666
667 /* Copy the entries from the old to the new hash table. */
668#ifdef STRCACHE2_USE_CHAINING
669 cache->collision_count = 0;
670 while (src-- > 0)
671 {
672 struct strcache2_entry *entry = src_tab[src];
673 while (entry)
674 {
675 struct strcache2_entry *next = entry->next;
676 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash1);
677 if ((entry->next = dst_tab[dst]) != 0)
678 cache->collision_count++;
679 dst_tab[dst] = entry;
680
681 entry = next;
682 }
683 }
684#else /* !STRCACHE2_USE_CHAINING */
685 while (src-- > 0)
686 {
687 struct strcache2_entry *entry = src_tab[src];
688 if (entry)
689 {
690 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash1);
691 if (dst_tab[dst])
692 {
693 unsigned int hash2 = entry->hash2;
694 if (!hash2)
695 entry->hash2 = hash2 = cache->case_insensitive
696 ? strcache2_case_insensitive_hash_2 ((const char *)(entry + 1), entry->length)
697 : strcache2_case_sensitive_hash_2 ((const char *)(entry + 1), entry->length);
698 dst += hash2;
699 dst = STRCACHE2_MOD_IT (cache, dst);
700 while (dst_tab[dst])
701 {
702 dst += hash2;
703 dst = STRCACHE2_MOD_IT (cache, dst);
704 }
705 }
706 dst_tab[dst] = entry;
707 }
708 }
709#endif /* !STRCACHE2_USE_CHAINING */
710
711 /* That's it, just free the old table and we're done. */
712 free (src_tab);
713}
714
715static struct strcache2_seg *
716strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
717{
718 struct strcache2_seg *seg;
719 size_t size;
720 size_t off;
721
722 size = cache->def_seg_size;
723 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
724 {
725 size = (size_t)minlen * 2;
726 size = (size + 0xfff) & ~(size_t)0xfff;
727 }
728
729 seg = xmalloc (size);
730 seg->start = (char *)(seg + 1);
731 seg->size = size - sizeof (struct strcache2_seg);
732 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
733 if (off)
734 {
735 seg->start += off;
736 seg->size -= off;
737 }
738 assert (seg->size > minlen);
739 seg->cursor = seg->start;
740 seg->avail = seg->size;
741
742 seg->next = cache->seg_head;
743 cache->seg_head = seg;
744
745 return seg;
746}
747
748/* Internal worker that enters a new string into the cache. */
749static const char *
750strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
751 const char *str, unsigned int length,
752 unsigned int hash1, unsigned hash2)
753{
754 struct strcache2_entry *entry;
755 struct strcache2_seg *seg;
756 unsigned int size;
757 char *str_copy;
758
759 /* Allocate space for the string. */
760
761 size = length + 1 + sizeof (struct strcache2_entry);
762 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
763
764 seg = cache->seg_head;
765 if (MY_PREDICT_FALSE(seg->avail < size))
766 {
767 do
768 seg = seg->next;
769 while (seg && seg->avail < size);
770 if (!seg)
771 seg = strcache2_new_seg (cache, size);
772 }
773
774 entry = (struct strcache2_entry *) seg->cursor;
775 assert ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1));
776 seg->cursor += size;
777 seg->avail -= size;
778
779 /* Setup the entry, copy the string and insert it into the hash table. */
780
781 entry->user = NULL;
782 entry->length = length;
783 entry->hash1 = hash1;
784#ifndef STRCACHE2_USE_CHAINING
785 entry->hash2 = hash2;
786#endif
787 str_copy = (char *) memcpy (entry + 1, str, length);
788 str_copy[length] = '\0';
789
790#ifdef STRCACHE2_USE_CHAINING
791 if ((entry->next = cache->hash_tab[idx]) != 0)
792 cache->collision_count++;
793#endif /* STRCACHE2_USE_CHAINING */
794 cache->hash_tab[idx] = entry;
795 cache->count++;
796 if (cache->count >= cache->rehash_count)
797 strcache2_rehash (cache);
798
799 return str_copy;
800}
801
802/* The public add string interface. */
803const char *
804strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
805{
806 struct strcache2_entry const *entry;
807 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
808 unsigned int hash2 = 0;
809 unsigned int idx;
810
811 assert (!cache->case_insensitive);
812
813 cache->lookup_count++;
814
815 /* Lookup the entry in the hash table, hoping for an
816 early match. If not found, enter the string at IDX. */
817 idx = STRCACHE2_MOD_IT (cache, hash1);
818 entry = cache->hash_tab[idx];
819 if (!entry)
820 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
821 if (strcache2_is_equal (cache, entry, str, length, hash1))
822 return (const char *)(entry + 1);
823 cache->collision_1st_count++;
824
825#ifdef STRCACHE2_USE_CHAINING
826 entry = entry->next;
827#else /* !STRCACHE2_USE_CHAINING */
828 hash2 = strcache2_case_sensitive_hash_2 (str, length);
829 idx += hash2;
830 idx = STRCACHE2_MOD_IT (cache, idx);
831 entry = cache->hash_tab[idx];
832#endif /* !STRCACHE2_USE_CHAINING */
833 if (!entry)
834 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
835 if (strcache2_is_equal (cache, entry, str, length, hash1))
836 return (const char *)(entry + 1);
837 cache->collision_2nd_count++;
838
839 /* (We've established hash2, so we can do a straight loop now.) */
840 for (;;)
841 {
842#ifdef STRCACHE2_USE_CHAINING
843 entry = entry->next;
844#else /* !STRCACHE2_USE_CHAINING */
845 idx += hash2;
846 idx = STRCACHE2_MOD_IT (cache, idx);
847 entry = cache->hash_tab[idx];
848#endif /* !STRCACHE2_USE_CHAINING */
849 if (!entry)
850 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
851 if (strcache2_is_equal (cache, entry, str, length, hash1))
852 return (const char *)(entry + 1);
853 cache->collision_3rd_count++;
854 }
855 /* not reached */
856}
857
858/* The public add string interface for prehashed strings.
859 Use strcache2_hash_str to calculate the hash of a string. */
860const char *
861strcache2_add_hashed (struct strcache2 *cache, const char *str, unsigned int length,
862 unsigned int hash1, unsigned int hash2)
863{
864 struct strcache2_entry const *entry;
865 unsigned int idx;
866#ifndef NDEBUG
867 unsigned correct_hash;
868
869 assert (!cache->case_insensitive);
870 correct_hash = strcache2_case_sensitive_hash_1 (str, length);
871 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
872 if (hash2)
873 {
874 correct_hash = strcache2_case_sensitive_hash_2 (str, length);
875 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
876 }
877#endif /* NDEBUG */
878
879 cache->lookup_count++;
880
881 /* Lookup the entry in the hash table, hoping for an
882 early match. If not found, enter the string at IDX. */
883 idx = STRCACHE2_MOD_IT (cache, hash1);
884 entry = cache->hash_tab[idx];
885 if (!entry)
886 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
887 if (strcache2_is_equal (cache, entry, str, length, hash1))
888 return (const char *)(entry + 1);
889 cache->collision_1st_count++;
890
891#ifdef STRCACHE2_USE_CHAINING
892 entry = entry->next;
893#else /* !STRCACHE2_USE_CHAINING */
894 if (!hash2)
895 hash2 = strcache2_case_sensitive_hash_2 (str, length);
896 idx += hash2;
897 idx = STRCACHE2_MOD_IT (cache, idx);
898 entry = cache->hash_tab[idx];
899#endif /* !STRCACHE2_USE_CHAINING */
900 if (!entry)
901 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
902 if (strcache2_is_equal (cache, entry, str, length, hash1))
903 return (const char *)(entry + 1);
904 cache->collision_2nd_count++;
905
906 /* (We've established hash2, so we can do a straight loop now.) */
907 for (;;)
908 {
909#ifdef STRCACHE2_USE_CHAINING
910 entry = entry->next;
911#else /* !STRCACHE2_USE_CHAINING */
912 idx += hash2;
913 idx = STRCACHE2_MOD_IT (cache, idx);
914 entry = cache->hash_tab[idx];
915#endif /* !STRCACHE2_USE_CHAINING */
916 if (!entry)
917 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
918 if (strcache2_is_equal (cache, entry, str, length, hash1))
919 return (const char *)(entry + 1);
920 cache->collision_3rd_count++;
921 }
922 /* not reached */
923}
924
925/* The public lookup (case sensitive) string interface. */
926const char *
927strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
928{
929 struct strcache2_entry const *entry;
930 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
931#ifndef STRCACHE2_USE_CHAINING
932 unsigned int hash2;
933#endif
934 unsigned int idx;
935
936 assert (!cache->case_insensitive);
937
938 cache->lookup_count++;
939
940 /* Lookup the entry in the hash table, hoping for an
941 early match. */
942 idx = STRCACHE2_MOD_IT (cache, hash1);
943 entry = cache->hash_tab[idx];
944 if (!entry)
945 return NULL;
946 if (strcache2_is_equal (cache, entry, str, length, hash1))
947 return (const char *)(entry + 1);
948 cache->collision_1st_count++;
949
950#ifdef STRCACHE2_USE_CHAINING
951 entry = entry->next;
952#else /* !STRCACHE2_USE_CHAINING */
953 hash2 = strcache2_case_sensitive_hash_2 (str, length);
954 idx += hash2;
955 idx = STRCACHE2_MOD_IT (cache, idx);
956 entry = cache->hash_tab[idx];
957#endif /* !STRCACHE2_USE_CHAINING */
958 if (!entry)
959 return NULL;
960 if (strcache2_is_equal (cache, entry, str, length, hash1))
961 return (const char *)(entry + 1);
962 cache->collision_2nd_count++;
963
964 /* (We've established hash2, so we can do a straight loop now.) */
965 for (;;)
966 {
967#ifdef STRCACHE2_USE_CHAINING
968 entry = entry->next;
969#else /* !STRCACHE2_USE_CHAINING */
970 idx += hash2;
971 idx = STRCACHE2_MOD_IT (cache, idx);
972 entry = cache->hash_tab[idx];
973#endif /* !STRCACHE2_USE_CHAINING */
974 if (!entry)
975 return NULL;
976 if (strcache2_is_equal (cache, entry, str, length, hash1))
977 return (const char *)(entry + 1);
978 cache->collision_3rd_count++;
979 }
980 /* not reached */
981}
982
983#if defined(HAVE_CASE_INSENSITIVE_FS)
984
985/* The public add string interface for case insensitive strings. */
986const char *
987strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
988{
989 struct strcache2_entry const *entry;
990 unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
991 unsigned int hash2 = 0;
992 unsigned int idx;
993
994 assert (!cache->case_insensitive);
995
996 cache->lookup_count++;
997
998 /* Lookup the entry in the hash table, hoping for an
999 early match. If not found, enter the string at IDX. */
1000 idx = STRCACHE2_MOD_IT (cache, hash1);
1001 entry = cache->hash_tab[idx];
1002 if (!entry)
1003 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
1004 if (strcache2_is_equal (cache, entry, str, length, hash1))
1005 return (const char *)(entry + 1);
1006 cache->collision_1st_count++;
1007
1008#ifdef STRCACHE2_USE_CHAINING
1009 entry = entry->next;
1010#else /* !STRCACHE2_USE_CHAINING */
1011 hash2 = strcache2_case_insensitive_hash_2 (str, length);
1012 idx += hash2;
1013 idx = STRCACHE2_MOD_IT (cache, idx);
1014 entry = cache->hash_tab[idx];
1015#endif /* !STRCACHE2_USE_CHAINING */
1016 if (!entry)
1017 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
1018 if (strcache2_is_equal (cache, entry, str, length, hash1))
1019 return (const char *)(entry + 1);
1020 cache->collision_2nd_count++;
1021
1022 /* (We've established hash2, so we can do a straight loop now.) */
1023 for (;;)
1024 {
1025#ifdef STRCACHE2_USE_CHAINING
1026 entry = entry->next;
1027#else /* !STRCACHE2_USE_CHAINING */
1028 idx += hash2;
1029 idx = STRCACHE2_MOD_IT (cache, idx);
1030 entry = cache->hash_tab[idx];
1031#endif /* !STRCACHE2_USE_CHAINING */
1032 if (!entry)
1033 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
1034 if (strcache2_is_equal (cache, entry, str, length, hash1))
1035 return (const char *)(entry + 1);
1036 cache->collision_3rd_count++;
1037 }
1038 /* not reached */
1039}
1040
1041/* The public add string interface for prehashed case insensitive strings.
1042 Use strcache2_hash_istr to calculate the hash of a string. */
1043const char *
1044strcache2_iadd_hashed (struct strcache2 *cache, const char *str, unsigned int length,
1045 unsigned int hash1, unsigned int hash2)
1046{
1047 struct strcache2_entry const *entry;
1048 unsigned int idx;
1049#ifndef NDEBUG
1050 unsigned correct_hash;
1051
1052 assert (!cache->case_insensitive);
1053 correct_hash = strcache2_case_insensitive_hash_1 (str, length);
1054 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
1055 if (hash2)
1056 {
1057 correct_hash = strcache2_case_insensitive_hash_2 (str, length);
1058 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
1059 }
1060#endif /* NDEBUG */
1061
1062 cache->lookup_count++;
1063
1064 /* Lookup the entry in the hash table, hoping for an
1065 early match. If not found, enter the string at IDX. */
1066 idx = STRCACHE2_MOD_IT (cache, hash1);
1067 entry = cache->hash_tab[idx];
1068 if (!entry)
1069 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
1070 if (strcache2_is_equal (cache, entry, str, length, hash1))
1071 return (const char *)(entry + 1);
1072 cache->collision_1st_count++;
1073
1074#ifdef STRCACHE2_USE_CHAINING
1075 entry = entry->next;
1076#else /* !STRCACHE2_USE_CHAINING */
1077 if (!hash2)
1078 hash2 = strcache2_case_insensitive_hash_2 (str, length);
1079 idx += hash2;
1080 idx = STRCACHE2_MOD_IT (cache, idx);
1081 entry = cache->hash_tab[idx];
1082#endif /* !STRCACHE2_USE_CHAINING */
1083 if (!entry)
1084 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
1085 if (strcache2_is_equal (cache, entry, str, length, hash1))
1086 return (const char *)(entry + 1);
1087 cache->collision_2nd_count++;
1088
1089 /* (We've established hash2, so we can do a straight loop now.) */
1090 for (;;)
1091 {
1092#ifdef STRCACHE2_USE_CHAINING
1093 entry = entry->next;
1094#else /* !STRCACHE2_USE_CHAINING */
1095 idx += hash2;
1096 idx = STRCACHE2_MOD_IT (cache, idx);
1097 entry = cache->hash_tab[idx];
1098#endif /* !STRCACHE2_USE_CHAINING */
1099 if (!entry)
1100 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
1101 if (strcache2_is_equal (cache, entry, str, length, hash1))
1102 return (const char *)(entry + 1);
1103 cache->collision_3rd_count++;
1104 }
1105 /* not reached */
1106}
1107
1108/* The public lookup (case insensitive) string interface. */
1109const char *
1110strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
1111{
1112 struct strcache2_entry const *entry;
1113 unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
1114#ifndef STRCACHE2_USE_CHAINING
1115 unsigned int hash2;
1116#endif
1117 unsigned int idx;
1118
1119 assert (!cache->case_insensitive);
1120
1121 cache->lookup_count++;
1122
1123 /* Lookup the entry in the hash table, hoping for an
1124 early match. */
1125 idx = STRCACHE2_MOD_IT (cache, hash1);
1126 entry = cache->hash_tab[idx];
1127 if (!entry)
1128 return NULL;
1129 if (strcache2_is_equal (cache, entry, str, length, hash1))
1130 return (const char *)(entry + 1);
1131 cache->collision_1st_count++;
1132
1133#ifdef STRCACHE2_USE_CHAINING
1134 entry = entry->next;
1135#else /* !STRCACHE2_USE_CHAINING */
1136 hash2 = strcache2_case_insensitive_hash_2 (str, length);
1137 idx += hash2;
1138 idx = STRCACHE2_MOD_IT (cache, idx);
1139 entry = cache->hash_tab[idx];
1140#endif /* !STRCACHE2_USE_CHAINING */
1141 if (!entry)
1142 return NULL;
1143 if (strcache2_is_equal (cache, entry, str, length, hash1))
1144 return (const char *)(entry + 1);
1145 cache->collision_2nd_count++;
1146
1147 /* (We've established hash2, so we can do a straight loop now.) */
1148 for (;;)
1149 {
1150#ifdef STRCACHE2_USE_CHAINING
1151 entry = entry->next;
1152#else /* !STRCACHE2_USE_CHAINING */
1153 idx += hash2;
1154 idx = STRCACHE2_MOD_IT (cache, idx);
1155 entry = cache->hash_tab[idx];
1156#endif /* !STRCACHE2_USE_CHAINING */
1157 if (!entry)
1158 return NULL;
1159 if (strcache2_is_equal (cache, entry, str, length, hash1))
1160 return (const char *)(entry + 1);
1161 cache->collision_3rd_count++;
1162 }
1163 /* not reached */
1164}
1165
1166#endif /* HAVE_CASE_INSENSITIVE_FS */
1167
1168/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1169int
1170strcache2_is_cached (struct strcache2 *cache, const char *str)
1171{
1172 /* Check mandatory alignment first. */
1173 if (!((size_t)str & (sizeof (void *) - 1)))
1174 {
1175 /* Check the segment list and consider the question answered if the
1176 string is within one of them. (Could check it more thoroughly...) */
1177 struct strcache2_seg const *seg;
1178 for (seg = cache->seg_head; seg; seg = seg->next)
1179 if ((size_t)(str - seg->start) < seg->size)
1180 return 1;
1181 }
1182
1183 return 0;
1184}
1185
1186
1187/* Verify the integrity of the specified string, returning 0 if OK. */
1188int
1189strcache2_verify_entry (struct strcache2 *cache, const char *str)
1190{
1191 struct strcache2_entry const *entry;
1192 unsigned hash;
1193 const char *end;
1194
1195 if ((size_t)str & (sizeof (void *) - 1))
1196 {
1197 fprintf (stderr,
1198 "strcache2[%s]: missaligned string %p\nstring: %s\n",
1199 cache->name, (void *)str, str);
1200 return -1;
1201 }
1202
1203 entry = (struct strcache2_entry const *)str - 1;
1204 end = memchr (str, '\0', entry->length);
1205 if ((size_t)(end - str) == entry->length)
1206 {
1207 fprintf (stderr,
1208 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
1209 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
1210 return -1;
1211 }
1212
1213 hash = cache->case_insensitive
1214 ? strcache2_case_insensitive_hash_1 (str, entry->length)
1215 : strcache2_case_sensitive_hash_1 (str, entry->length);
1216 if (hash != entry->hash1)
1217 {
1218 fprintf (stderr,
1219 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
1220 cache->name, (void *)entry, hash, entry->hash1, str);
1221 return -1;
1222 }
1223
1224#ifndef STRCACHE2_USE_CHAINING
1225 if (entry->hash2)
1226 {
1227 hash = cache->case_insensitive
1228 ? strcache2_case_insensitive_hash_2 (str, entry->length)
1229 : strcache2_case_sensitive_hash_2 (str, entry->length);
1230 if (hash != entry->hash2)
1231 {
1232 fprintf (stderr,
1233 "strcache2[%s]: corrupt entry %p, hash#2: %x, expected %x;\nstring: %s\n",
1234 cache->name, (void *)entry, hash, entry->hash2, str);
1235 return -1;
1236 }
1237 }
1238#endif
1239
1240 return 0;
1241}
1242
1243#ifndef STRCACHE2_USE_CHAINING
1244/* Fallback for calculating and returning the 2nd hash. */
1245unsigned int
1246strcache2_get_hash2_fallback (struct strcache2 *cache, const char *str)
1247{
1248 struct strcache2_entry *entry = (struct strcache2_entry *) str - 1;
1249 unsigned hash2 = cache->case_insensitive
1250 ? strcache2_case_insensitive_hash_2 (str, entry->length)
1251 : strcache2_case_sensitive_hash_2 (str, entry->length);
1252 entry->hash2 = hash2;
1253 return hash2;
1254}
1255#endif /* !STRCACHE2_USE_CHAINING */
1256
1257/* Calculates the case sensitive hash values of the string.
1258 The first hash is returned, the other is put at HASH2P. */
1259unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1260{
1261#ifndef STRCACHE2_USE_CHAINING
1262 *hash2p = strcache2_case_sensitive_hash_2 (str, length);
1263#else
1264 *hash2p = 1;
1265#endif
1266 return strcache2_case_sensitive_hash_1 (str, length);
1267}
1268
1269/* Calculates the case insensitive hash values of the string.
1270 The first hash is returned, the other is put at HASH2P. */
1271unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1272{
1273#ifndef STRCACHE2_USE_CHAINING
1274 *hash2p = strcache2_case_insensitive_hash_2 (str, length);
1275#else
1276 *hash2p = 1;
1277#endif
1278 return strcache2_case_insensitive_hash_1 (str, length);
1279}
1280
1281
1282
1283/* Initalizes a new cache. */
1284void
1285strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1286 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1287{
1288 unsigned hash_shift;
1289 assert (!thread_safe);
1290
1291 /* calc the size as a power of two */
1292 if (!size)
1293 hash_shift = STRCACHE2_HASH_SHIFT;
1294 else
1295 {
1296 assert (size <= (~0U / 2 + 1));
1297 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1298 /* nothing */;
1299 }
1300
1301 /* adjust the default segment size */
1302 if (!def_seg_size)
1303 def_seg_size = STRCACHE2_SEG_SIZE;
1304 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1305 def_seg_size = sizeof (struct strcache2_seg) * 10;
1306 else if ((def_seg_size & 0xfff) < 0xf00)
1307 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1308
1309
1310 /* init the structure. */
1311 cache->case_insensitive = case_insensitive;
1312#ifdef STRCACHE2_USE_MASK
1313 cache->hash_mask = (1U << hash_shift) - 1U;
1314#else
1315 cache->hash_div = strcache2_find_prime(hash_shift);
1316#endif
1317 cache->count = 0;
1318#ifdef STRCACHE2_USE_CHAINING
1319 cache->collision_count = 0;
1320#endif
1321 cache->lookup_count = 0;
1322 cache->collision_1st_count = 0;
1323 cache->collision_2nd_count = 0;
1324 cache->collision_3rd_count = 0;
1325 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1326 cache->init_size = 1U << hash_shift;
1327 cache->hash_size = 1U << hash_shift;
1328 cache->def_seg_size = def_seg_size;
1329 cache->lock = NULL;
1330 cache->name = name;
1331
1332 /* allocate the hash table and first segment. */
1333 cache->hash_tab = (struct strcache2_entry **)
1334 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1335 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1336 strcache2_new_seg (cache, 0);
1337
1338 /* link it */
1339 cache->next = strcache_head;
1340 strcache_head = cache;
1341}
1342
1343
1344/* Terminates a string cache, freeing all memory and other
1345 associated resources. */
1346void
1347strcache2_term (struct strcache2 *cache)
1348{
1349 /* unlink it */
1350 if (strcache_head == cache)
1351 strcache_head = cache->next;
1352 else
1353 {
1354 struct strcache2 *prev = strcache_head;
1355 while (prev->next != cache)
1356 prev = prev->next;
1357 assert (prev);
1358 prev->next = cache->next;
1359 }
1360
1361 /* free the memory segments */
1362 do
1363 {
1364 void *free_it = cache->seg_head;
1365 cache->seg_head = cache->seg_head->next;
1366 free (free_it);
1367 }
1368 while (cache->seg_head);
1369
1370 /* free the hash and clear the structure. */
1371 free (cache->hash_tab);
1372 memset (cache, '\0', sizeof (struct strcache2));
1373}
1374
1375/* Print statistics a string cache. */
1376void
1377strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1378{
1379 unsigned int seg_count = 0;
1380 unsigned long seg_total_bytes = 0;
1381 unsigned long seg_avg_bytes;
1382 unsigned long seg_avail_bytes = 0;
1383 unsigned long seg_max_bytes = 0;
1384 struct strcache2_seg *seg;
1385 unsigned int str_count = 0;
1386 unsigned long str_total_len = 0;
1387 unsigned int str_avg_len;
1388 unsigned int str_min_len = ~0U;
1389 unsigned int str_max_len = 0;
1390 unsigned int idx;
1391 unsigned int rehashes;
1392#ifdef STRCACHE2_USE_CHAINING
1393 unsigned int chain_depths[32];
1394#endif
1395
1396 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1397
1398 /* Segment statistics. */
1399 for (seg = cache->seg_head; seg; seg = seg->next)
1400 {
1401 seg_count++;
1402 seg_total_bytes += seg->size;
1403 seg_avail_bytes += seg->avail;
1404 if (seg->size > seg_max_bytes)
1405 seg_max_bytes = seg->size;
1406 }
1407 seg_avg_bytes = seg_total_bytes / seg_count;
1408 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1409 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1410 cache->def_seg_size, seg_avail_bytes);
1411
1412 /* String statistics. */
1413#ifdef STRCACHE2_USE_CHAINING
1414 memset (chain_depths, '\0', sizeof (chain_depths));
1415#endif
1416 idx = cache->hash_size;
1417 while (idx-- > 0)
1418 {
1419 struct strcache2_entry const *entry = cache->hash_tab[idx];
1420#ifdef STRCACHE2_USE_CHAINING
1421 unsigned int depth = 0;
1422 for (; entry != 0; entry = entry->next, depth++)
1423#else
1424 if (entry)
1425#endif
1426 {
1427 unsigned int length = entry->length;
1428 str_total_len += length;
1429 if (length > str_max_len)
1430 str_max_len = length;
1431 if (length < str_min_len)
1432 str_min_len = length;
1433 str_count++;
1434 }
1435#ifdef STRCACHE2_USE_CHAINING
1436 chain_depths[depth >= 32 ? 31 : depth]++;
1437#endif
1438 }
1439 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1440 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1441 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1442 if (str_count != cache->count)
1443 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1444 cache->count, str_count);
1445
1446 /* Hash statistics. */
1447 idx = cache->init_size;
1448 rehashes = 0;
1449 while (idx < cache->hash_size)
1450 {
1451 idx *= 2;
1452 rehashes++;
1453 }
1454
1455#ifdef STRCACHE2_USE_MASK
1456 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1457 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1458#else
1459 printf (_("%s hash size = %u div = %#x rehashed %u times lookups = %lu\n"),
1460 prefix, cache->hash_size, cache->hash_div, rehashes, cache->lookup_count);
1461#endif
1462 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1463 prefix,
1464 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1465 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1466 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1467#ifdef STRCACHE2_USE_CHAINING
1468 printf (_("%s hash insert collisions = %u (%u%%)\n"),
1469 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1470 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1471 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1472 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1473 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1474 for (idx = 2; idx < 32; idx++)
1475 {
1476 unsigned strs_at_this_depth = chain_depths[idx];
1477 unsigned i;
1478 for (i = idx + 1; i < 32; i++)
1479 strs_at_this_depth += chain_depths[i];
1480 if (strs_at_this_depth)
1481 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1482 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1483 idx - 1, idx == 2 ? " " : "s",
1484 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1485 }
1486#endif
1487}
1488
1489/* Print statistics for all string caches. */
1490void
1491strcache2_print_stats_all (const char *prefix)
1492{
1493 struct strcache2 *cur;
1494 for (cur = strcache_head; cur; cur = cur->next)
1495 strcache2_print_stats (cur, prefix);
1496}
1497
Note: See TracBrowser for help on using the repository browser.