source: trunk/src/kmk/strcache2.c@ 1908

Last change on this file since 1908 was 1908, checked in by bird, 17 years ago

strchache2: hash hacking (no results or changes yet).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 36.6 KB
Line 
1/* $Id: strcache2.c 1908 2008-10-21 21:35:29Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef WINDOWS32
39# include <io.h>
40# include <process.h>
41# include <Windows.h>
42# define PARSE_IN_WORKER
43#endif
44
45#ifdef __OS2__
46# include <sys/fmutex.h>
47#endif
48
49#ifdef HAVE_PTHREAD
50# include <pthread.h>
51#endif
52
53
54/*******************************************************************************
55* Defined Constants And Macros *
56*******************************************************************************/
57/* The default size of a memory segment (1MB). */
58#define STRCACHE2_SEG_SIZE (1024U*1024U)
59/* The default hash table shift (hash size give as a power of two). */
60#define STRCACHE2_HASH_SHIFT 16
61/** Does the modding / masking of a hash number into an index. */
62#ifdef STRCACHE2_USE_MASK
63# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
64#else
65# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
66#endif
67
68
69/*******************************************************************************
70* Global Variables *
71*******************************************************************************/
72/* List of initialized string caches. */
73static struct strcache2 *strcache_head;
74
75
76
77/** Finds the closest primary number for power of two value (or something else
78 * useful if not support). */
79MY_INLINE strcache2_find_prime(unsigned int shift)
80{
81 switch (shift)
82 {
83 case 5: return 31;
84 case 6: return 61;
85 case 7: return 127;
86 case 8: return 251;
87 case 9: return 509;
88 case 10: return 1021;
89 case 11: return 2039;
90 case 12: return 4093;
91 case 13: return 8191;
92 case 14: return 16381;
93 case 15: return 32749;
94 case 16: return 65521;
95 case 17: return 131063;
96 case 18: return 262139;
97 case 19: return 524269;
98 case 20: return 1048573;
99 case 21: return 2097143;
100 case 22: return 4194301;
101 case 23: return 8388593;
102
103 default:
104 assert (0);
105 return (1 << shift) - 1;
106 }
107}
108
109static struct strcache2_seg *
110strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
111{
112 struct strcache2_seg *seg;
113 size_t size;
114 size_t off;
115
116 size = cache->def_seg_size;
117 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
118 {
119 size = (size_t)minlen * 2;
120 size = (size + 0xfff) & ~(size_t)0xfff;
121 }
122
123 seg = xmalloc (size);
124 seg->start = (char *)(seg + 1);
125 seg->size = size - sizeof (struct strcache2_seg);
126 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
127 if (off)
128 {
129 seg->start += off;
130 seg->size -= off;
131 }
132 assert (seg->size > minlen);
133 seg->cursor = seg->start;
134 seg->avail = seg->size;
135
136 seg->next = cache->seg_head;
137 cache->seg_head = seg;
138
139 return seg;
140}
141
142MY_INLINE unsigned int
143strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
144{
145#if 1
146 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
147 when running the two 100 times over typical kBuild strings that
148 end up here (did a fprintf here and built kBuild).
149 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
150
151 unsigned int hash = 0;
152 if (MY_PREDICT_TRUE(len >= 2))
153 {
154 unsigned int ch0 = *str++;
155 hash = 0;
156 len--;
157 while (len >= 2)
158 {
159 unsigned int ch1 = *str++;
160 hash += ch0 << (ch1 & 0xf);
161
162 ch0 = *str++;
163 hash += ch1 << (ch0 & 0xf);
164
165 len -= 2;
166 }
167 if (len == 1)
168 {
169 unsigned ch1 = *str;
170 hash += ch0 << (ch1 & 0xf);
171
172 hash += ch1;
173 }
174 else
175 hash += ch0;
176 }
177 else if (len)
178 {
179 hash = *str;
180 hash += hash << (hash & 0xf);
181 }
182 else
183 hash = 0;
184 return hash;
185#else
186# if 1
187 /* This is SDBM. This specific form and loop unroll was benchmarked to
188 be 28% faster than return_STRING_N_HASH_1. (Now the weird thing is
189 that putting the (ch) first in the assignment made it noticably slower.)
190
191 However, it is noticably slower in practice, most likely because of more
192 collisions. Hrmpf.
193 */
194# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
195 unsigned int hash = 0;
196# else
197 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
198 fast than return_STRING_N_HASH_1.
199
200 Ditto. 2x Hrmpf */
201# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
202 unsigned int hash = 5381;
203# endif
204 while (len >= 4)
205 {
206 UPDATE_HASH (str[0]);
207 UPDATE_HASH (str[1]);
208 UPDATE_HASH (str[2]);
209 UPDATE_HASH (str[3]);
210 str += 4;
211 len -= 4;
212 }
213 switch (len)
214 {
215 default:
216 case 0:
217 return hash;
218 case 1:
219 UPDATE_HASH (str[0]);
220 return hash;
221 case 2:
222 UPDATE_HASH (str[0]);
223 UPDATE_HASH (str[1]);
224 return hash;
225 case 3:
226 UPDATE_HASH (str[0]);
227 UPDATE_HASH (str[1]);
228 UPDATE_HASH (str[2]);
229 return hash;
230 }
231#endif
232}
233
234MY_INLINE unsigned int
235strcache2_case_sensitive_hash_2 (const char *str, unsigned int len)
236{
237 unsigned int hash = 0;
238 if (MY_PREDICT_TRUE(len >= 2))
239 {
240 unsigned int ch0 = *str++;
241 hash = 0;
242 len--;
243 while (len >= 2)
244 {
245 unsigned int ch1 = *str++;
246 hash += ch0 << (ch1 & 0x7);
247
248 ch0 = *str++;
249 hash += ch1 << (ch0 & 0x7);
250
251 len -= 2;
252 }
253 if (len == 1)
254 {
255 unsigned ch1 = *str;
256 hash += ch0 << (ch1 & 0x7);
257
258 hash += ch1;
259 }
260 else
261 hash += ch0;
262 }
263 else if (len)
264 {
265 hash = *str;
266 hash += hash << (hash & 0x7);
267 }
268 else
269 hash = 1;
270 hash |= 1;
271 return hash;
272}
273
274MY_INLINE unsigned int
275strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
276{
277 unsigned int hash = 0;
278 if (MY_PREDICT_TRUE(len >= 2))
279 {
280 unsigned int ch0 = *str++;
281 ch0 = tolower (ch0);
282 hash = 0;
283 len--;
284 while (len >= 2)
285 {
286 unsigned int ch1 = *str++;
287 ch1 = tolower (ch1);
288 hash += ch0 << (ch1 & 0xf);
289
290 ch0 = *str++;
291 ch0 = tolower (ch0);
292 hash += ch1 << (ch0 & 0xf);
293
294 len -= 2;
295 }
296 if (len == 1)
297 {
298 unsigned ch1 = *str;
299 ch1 = tolower (ch1);
300 hash += ch0 << (ch1 & 0xf);
301
302 hash += ch1;
303 }
304 else
305 hash += ch0;
306 }
307 else if (len)
308 {
309 hash = *str;
310 hash += hash << (hash & 0xf);
311 }
312 else
313 hash = 0;
314 return hash;
315}
316
317MY_INLINE unsigned int
318strcache2_case_insensitive_hash_2 (const char *str, unsigned int len)
319{
320 unsigned int hash = 0;
321 if (MY_PREDICT_TRUE(len >= 2))
322 {
323 unsigned int ch0 = *str++;
324 ch0 = tolower (ch0);
325 hash = 0;
326 len--;
327 while (len >= 2)
328 {
329 unsigned int ch1 = *str++;
330 ch1 = tolower (ch1);
331 hash += ch0 << (ch1 & 0x7);
332
333 ch0 = *str++;
334 ch0 = tolower (ch0);
335 hash += ch1 << (ch0 & 0x7);
336
337 len -= 2;
338 }
339 if (len == 1)
340 {
341 unsigned ch1 = *str;
342 ch1 = tolower (ch1);
343 hash += ch0 << (ch1 & 0x7);
344
345 hash += ch1;
346 }
347 else
348 hash += ch0;
349 }
350 else if (len)
351 {
352 hash = *str;
353 hash += hash << (hash & 0x7);
354 }
355 else
356 hash = 1;
357 hash |= 1;
358 return hash;
359}
360
361#ifdef _MSC_VER
362 typedef unsigned int int32_t;
363#else
364# include <stdint.h>
365#endif
366
367MY_INLINE int
368strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
369{
370 if (length <= 8)
371 {
372 /* short string compare - ~50% of the kBuild calls. */
373 assert ( !((size_t)ys & 3) );
374 if (!((size_t)xs & 3))
375 {
376 /* aligned */
377 int result;
378 switch (length)
379 {
380 default: /* memcmp for longer strings */
381 return memcmp (xs, ys, length);
382 case 8:
383 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
384 result |= *(int32_t*)xs - *(int32_t*)ys;
385 return result;
386 case 7:
387 result = xs[6] - ys[6];
388 result |= xs[5] - ys[5];
389 result |= xs[4] - ys[4];
390 result |= *(int32_t*)xs - *(int32_t*)ys;
391 return result;
392 case 6:
393 result = xs[5] - ys[5];
394 result |= xs[4] - ys[4];
395 result |= *(int32_t*)xs - *(int32_t*)ys;
396 return result;
397 case 5:
398 result = xs[4] - ys[4];
399 result |= *(int32_t*)xs - *(int32_t*)ys;
400 return result;
401 case 4:
402 return *(int32_t*)xs - *(int32_t*)ys;
403 case 3:
404 result = xs[2] - ys[2];
405 result |= xs[1] - ys[1];
406 result |= xs[0] - ys[0];
407 return result;
408 case 2:
409 result = xs[1] - ys[1];
410 result |= xs[0] - ys[0];
411 return result;
412 case 1:
413 return *xs - *ys;
414 case 0:
415 return 0;
416 }
417 }
418 else
419 {
420 /* unaligned */
421 int result = 0;
422 switch (length)
423 {
424 case 8: result |= xs[7] - ys[7];
425 case 7: result |= xs[6] - ys[6];
426 case 6: result |= xs[5] - ys[5];
427 case 5: result |= xs[4] - ys[4];
428 case 4: result |= xs[3] - ys[3];
429 case 3: result |= xs[2] - ys[2];
430 case 2: result |= xs[1] - ys[1];
431 case 1: result |= xs[0] - ys[0];
432 case 0:
433 return result;
434 }
435 }
436 }
437
438 /* memcmp for longer strings */
439 return memcmp (xs, ys, length);
440}
441
442MY_INLINE int
443strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
444{
445#ifndef ELECTRIC_HEAP
446 assert ( !((size_t)ys & 3) );
447#endif
448 if (!((size_t)xs & 3))
449 {
450 int result;
451 /* aligned */
452 while (length >= 8)
453 {
454 result = *(int32_t*)xs - *(int32_t*)ys;
455 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
456 if (MY_PREDICT_FALSE(result))
457 return result;
458 xs += 8;
459 ys += 8;
460 length -= 8;
461 }
462 switch (length)
463 {
464 case 7:
465 result = *(int32_t*)xs - *(int32_t*)ys;
466 result |= xs[6] - ys[6];
467 result |= xs[5] - ys[5];
468 result |= xs[4] - ys[4];
469 return result;
470 case 6:
471 result = *(int32_t*)xs - *(int32_t*)ys;
472 result |= xs[5] - ys[5];
473 result |= xs[4] - ys[4];
474 return result;
475 case 5:
476 result = *(int32_t*)xs - *(int32_t*)ys;
477 result |= xs[4] - ys[4];
478 return result;
479 case 4:
480 return *(int32_t*)xs - *(int32_t*)ys;
481 case 3:
482 result = xs[2] - ys[2];
483 result |= xs[1] - ys[1];
484 result |= xs[0] - ys[0];
485 return result;
486 case 2:
487 result = xs[1] - ys[1];
488 result |= xs[0] - ys[0];
489 return result;
490 case 1:
491 return *xs - *ys;
492 default:
493 case 0:
494 return 0;
495 }
496 }
497 else
498 {
499 /* unaligned */
500 int result;
501 while (length >= 8)
502 {
503#if defined(__i386__) || defined(__x86_64__)
504 result = ( ((int32_t)xs[3] << 24)
505 | ((int32_t)xs[2] << 16)
506 | ((int32_t)xs[1] << 8)
507 | xs[0] )
508 - *(int32_t*)ys;
509 result |= ( ((int32_t)xs[7] << 24)
510 | ((int32_t)xs[6] << 16)
511 | ((int32_t)xs[5] << 8)
512 | xs[4] )
513 - *(int32_t*)(ys + 4);
514#else
515 result = xs[3] - ys[3];
516 result |= xs[2] - ys[2];
517 result |= xs[1] - ys[1];
518 result |= xs[0] - ys[0];
519 result |= xs[7] - ys[7];
520 result |= xs[6] - ys[6];
521 result |= xs[5] - ys[5];
522 result |= xs[4] - ys[4];
523#endif
524 if (MY_PREDICT_FALSE(result))
525 return result;
526 xs += 8;
527 ys += 8;
528 length -= 8;
529 }
530 result = 0;
531 switch (length)
532 {
533 case 7: result |= xs[6] - ys[6];
534 case 6: result |= xs[5] - ys[5];
535 case 5: result |= xs[4] - ys[4];
536 case 4: result |= xs[3] - ys[3];
537 case 3: result |= xs[2] - ys[2];
538 case 2: result |= xs[1] - ys[1];
539 case 1: result |= xs[0] - ys[0];
540 return result;
541 default:
542 case 0:
543 return 0;
544 }
545 }
546}
547
548MY_INLINE int
549strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
550 const char *str, unsigned int length, unsigned int hash1)
551{
552 assert (!cache->case_insensitive);
553
554 /* the simple stuff first. */
555 if ( entry == NULL
556 || entry->hash1 != hash1
557 || entry->length != length)
558 return 0;
559
560#if 0
561 return memcmp (str, entry + 1, length) == 0;
562#elif 1
563 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
564#else
565 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
566#endif
567}
568
569MY_INLINE int
570strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
571 const char *str, unsigned int length, unsigned int hash1)
572{
573 assert (cache->case_insensitive);
574
575 /* the simple stuff first. */
576 if ( entry == NULL
577 || entry->hash1 != hash1
578 || entry->length != length)
579 return 0;
580
581#if defined(_MSC_VER) || defined(__OS2__)
582 return _memicmp (entry + 1, str, length) == 0;
583#else
584 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
585#endif
586}
587
588static void
589strcache2_rehash (struct strcache2 *cache)
590{
591 unsigned int src = cache->hash_size;
592 struct strcache2_entry **src_tab = cache->hash_tab;
593 struct strcache2_entry **dst_tab;
594#ifdef STRCACHE2_USE_MASK
595 unsigned int hash_mask;
596#else
597 unsigned int hash_div;
598#endif
599
600 /* Allocate a new hash table twice the size of the current. */
601 cache->hash_size <<= 1;
602#ifdef STRCACHE2_USE_MASK
603 cache->hash_mask <<= 1;
604 cache->hash_mask |= 1;
605#else
606 for (hash_div = 1; (1U << hash_div) < cache->hash_size; hash_div++)
607 /* nothing */;
608 cache->hash_div = strcache2_find_prime(hash_div);
609#endif
610 cache->rehash_count <<= 1;
611 cache->hash_tab = dst_tab = (struct strcache2_entry **)
612 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
613 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
614
615 /* Copy the entries from the old to the new hash table. */
616#ifdef STRCACHE2_USE_MASK
617 hash_mask = cache->hash_mask;
618#else
619 hash_div = cache->hash_div;
620#endif
621 while (src-- > 0)
622 {
623 struct strcache2_entry *entry = src_tab[src];
624 if (entry)
625 {
626#ifdef STRCACHE2_USE_MASK
627 unsigned int dst = entry->hash1 & hash_mask;
628#else
629 unsigned int dst = entry->hash1 % hash_div;
630#endif
631 if (dst_tab[dst])
632 {
633 unsigned int hash2 = entry->hash2;
634 if (!hash2)
635 entry->hash2 = hash2 = cache->case_insensitive
636 ? strcache2_case_insensitive_hash_2 ((const char *)(entry + 1), entry->length)
637 : strcache2_case_sensitive_hash_2 ((const char *)(entry + 1), entry->length);
638 dst += hash2;
639#ifdef STRCACHE2_USE_MASK
640 dst &= hash_mask;
641#else
642 dst %= hash_div;
643#endif
644 while (dst_tab[dst])
645 {
646 dst += hash2;
647#ifdef STRCACHE2_USE_MASK
648 dst &= hash_mask;
649#else
650 dst %= hash_div;
651#endif
652 }
653 }
654
655 dst_tab[dst] = entry;
656 }
657 }
658
659 /* That's it, just free the old table and we're done. */
660 free (src_tab);
661}
662
663/* Internal worker that enters a new string into the cache. */
664static const char *
665strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
666 const char *str, unsigned int length,
667 unsigned int hash1, unsigned hash2)
668{
669 struct strcache2_entry *entry;
670 struct strcache2_seg *seg;
671 unsigned int size;
672 char *str_copy;
673
674 /* Allocate space for the string. */
675
676 size = length + 1 + sizeof (struct strcache2_entry);
677 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
678
679 seg = cache->seg_head;
680 if (MY_PREDICT_FALSE(seg->avail < size))
681 {
682 do
683 seg = seg->next;
684 while (seg && seg->avail < size);
685 if (!seg)
686 seg = strcache2_new_seg (cache, size);
687 }
688
689 entry = (struct strcache2_entry *) seg->cursor;
690 assert ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1));
691 seg->cursor += size;
692 seg->avail -= size;
693
694 /* Setup the entry, copy the string and insert it into the hash table. */
695
696 entry->user = NULL;
697 entry->length = length;
698 entry->hash1 = hash1;
699 entry->hash2 = hash2;
700 str_copy = (char *) memcpy (entry + 1, str, length);
701 str_copy[length] = '\0';
702
703 cache->hash_tab[idx] = entry;
704 cache->count++;
705 if (cache->count >= cache->rehash_count)
706 strcache2_rehash (cache);
707
708 return str_copy;
709}
710
711/* The public add string interface. */
712const char *
713strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
714{
715 struct strcache2_entry const *entry;
716 unsigned int hash2;
717 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
718 unsigned int idx;
719
720 assert (!cache->case_insensitive);
721
722 cache->lookup_count++;
723
724 /* Lookup the entry in the hash table, hoping for an
725 early match. */
726 idx = STRCACHE2_MOD_IT(cache, hash1);
727 entry = cache->hash_tab[idx];
728 if (strcache2_is_equal (cache, entry, str, length, hash1))
729 return (const char *)(entry + 1);
730 if (!entry)
731 hash2 = 0;
732 else
733 {
734 cache->collision_1st_count++;
735
736 hash2 = strcache2_case_sensitive_hash_2 (str, length);
737 idx += hash2;
738 idx = STRCACHE2_MOD_IT(cache, idx);
739 entry = cache->hash_tab[idx];
740 if (strcache2_is_equal (cache, entry, str, length, hash1))
741 return (const char *)(entry + 1);
742
743 if (entry)
744 {
745 cache->collision_2nd_count++;
746 do
747 {
748 idx += hash2;
749 idx = STRCACHE2_MOD_IT(cache, idx);
750 entry = cache->hash_tab[idx];
751 cache->collision_3rd_count++;
752 if (strcache2_is_equal (cache, entry, str, length, hash1))
753 return (const char *)(entry + 1);
754 }
755 while (entry);
756 }
757 }
758
759 /* Not found, add it at IDX. */
760 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
761}
762
763/* The public add string interface for prehashed strings.
764 Use strcache2_hash_str to calculate the hash of a string. */
765const char *
766strcache2_add_hashed (struct strcache2 *cache, const char *str, unsigned int length,
767 unsigned int hash1, unsigned int hash2)
768{
769 struct strcache2_entry const *entry;
770 unsigned int idx;
771#ifndef NDEBUG
772 unsigned correct_hash;
773
774 assert (!cache->case_insensitive);
775 correct_hash = strcache2_case_sensitive_hash_1 (str, length);
776 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
777 if (hash2)
778 {
779 correct_hash = strcache2_case_sensitive_hash_2 (str, length);
780 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
781 }
782#endif /* NDEBUG */
783
784 cache->lookup_count++;
785
786 /* Lookup the entry in the hash table, hoping for an
787 early match. */
788 idx = STRCACHE2_MOD_IT(cache, hash1);
789 entry = cache->hash_tab[idx];
790 if (strcache2_is_equal (cache, entry, str, length, hash1))
791 return (const char *)(entry + 1);
792 if (entry)
793 {
794 cache->collision_1st_count++;
795
796 if (!hash2)
797 hash2 = cache->case_insensitive
798 ? strcache2_case_insensitive_hash_2 (str, length)
799 : strcache2_case_sensitive_hash_2 (str, length);
800 idx += hash2;
801 idx = STRCACHE2_MOD_IT(cache, idx);
802 entry = cache->hash_tab[idx];
803 if (strcache2_is_equal (cache, entry, str, length, hash1))
804 return (const char *)(entry + 1);
805
806 if (entry)
807 {
808 cache->collision_2nd_count++;
809 do
810 {
811 idx += hash2;
812 idx = STRCACHE2_MOD_IT(cache, idx);
813 entry = cache->hash_tab[idx];
814 cache->collision_3rd_count++;
815 if (strcache2_is_equal (cache, entry, str, length, hash1))
816 return (const char *)(entry + 1);
817 }
818 while (entry);
819 }
820 }
821
822 /* Not found, add it at IDX. */
823 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
824}
825
826/* The public lookup (case sensitive) string interface. */
827const char *
828strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
829{
830 struct strcache2_entry const *entry;
831 unsigned int hash2;
832 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
833 unsigned int idx;
834
835 assert (!cache->case_insensitive);
836
837 cache->lookup_count++;
838
839 /* Lookup the entry in the hash table, hoping for an
840 early match. */
841 idx = STRCACHE2_MOD_IT(cache, hash1);
842 entry = cache->hash_tab[idx];
843 if (strcache2_is_equal (cache, entry, str, length, hash1))
844 return (const char *)(entry + 1);
845 if (!entry)
846 hash2 = 0;
847 else
848 {
849 cache->collision_1st_count++;
850
851 hash2 = strcache2_case_sensitive_hash_2 (str, length);
852 idx += hash2;
853 idx = STRCACHE2_MOD_IT(cache, idx);
854 entry = cache->hash_tab[idx];
855 if (strcache2_is_equal (cache, entry, str, length, hash1))
856 return (const char *)(entry + 1);
857
858 if (entry)
859 {
860 cache->collision_2nd_count++;
861 do
862 {
863 idx += hash2;
864 idx = STRCACHE2_MOD_IT(cache, idx);
865 entry = cache->hash_tab[idx];
866 cache->collision_3rd_count++;
867 if (strcache2_is_equal (cache, entry, str, length, hash1))
868 return (const char *)(entry + 1);
869 }
870 while (entry);
871 }
872 }
873
874 /* Not found. */
875 return NULL;
876}
877
878#if defined(HAVE_CASE_INSENSITIVE_FS)
879
880/* The public add string interface for case insensitive strings. */
881const char *
882strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
883{
884 struct strcache2_entry const *entry;
885 unsigned int hash2;
886 unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
887 unsigned int idx;
888
889 assert (cache->case_insensitive);
890 cache->lookup_count++;
891
892 /* Lookup the entry in the hash table, hoping for an
893 early match. */
894 idx = STRCACHE2_MOD_IT(cache, hash1);
895 entry = cache->hash_tab[idx];
896 if (strcache2_is_iequal (cache, entry, str, length, hash1))
897 return (const char *)(entry + 1);
898 if (!entry)
899 hash2 = 0;
900 else
901 {
902 cache->collision_1st_count++;
903
904 hash2 = strcache2_case_insensitive_hash_2 (str, length);
905 idx += hash2;
906 idx = STRCACHE2_MOD_IT(cache, idx);
907 entry = cache->hash_tab[idx];
908 if (strcache2_is_iequal (cache, entry, str, length, hash1))
909 return (const char *)(entry + 1);
910
911 if (entry)
912 {
913 cache->collision_2nd_count++;
914 do
915 {
916 idx += hash2;
917 idx = STRCACHE2_MOD_IT(cache, idx);
918 entry = cache->hash_tab[idx];
919 cache->collision_3rd_count++;
920 if (strcache2_is_iequal (cache, entry, str, length, hash1))
921 return (const char *)(entry + 1);
922 }
923 while (entry);
924 }
925 }
926
927 /* Not found, add it at IDX. */
928 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
929}
930
931/* The public add string interface for prehashed case insensitive strings.
932 Use strcache2_hash_istr to calculate the hash of a string. */
933const char *
934strcache2_iadd_hashed (struct strcache2 *cache, const char *str, unsigned int length,
935 unsigned int hash1, unsigned int hash2)
936{
937 struct strcache2_entry const *entry;
938 unsigned int idx;
939#ifndef NDEBUG
940 unsigned correct_hash;
941
942 assert (cache->case_insensitive);
943 correct_hash = strcache2_case_insensitive_hash_1 (str, length);
944 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
945 if (hash2)
946 {
947 correct_hash = strcache2_case_insensitive_hash_2 (str, length);
948 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
949 }
950#endif /* NDEBUG */
951
952 cache->lookup_count++;
953
954 /* Lookup the entry in the hash table, hoping for an
955 early match. */
956 idx = STRCACHE2_MOD_IT(cache, hash1);
957 entry = cache->hash_tab[idx];
958 if (strcache2_is_iequal (cache, entry, str, length, hash1))
959 return (const char *)(entry + 1);
960 if (entry)
961 {
962 cache->collision_1st_count++;
963
964 if (!hash2)
965 hash2 = strcache2_case_insensitive_hash_2 (str, length);
966 idx += hash2;
967 idx = STRCACHE2_MOD_IT(cache, idx);
968 entry = cache->hash_tab[idx];
969 if (strcache2_is_iequal (cache, entry, str, length, hash1))
970 return (const char *)(entry + 1);
971
972 if (entry)
973 {
974 cache->collision_2nd_count++;
975 do
976 {
977 idx += hash2;
978 idx = STRCACHE2_MOD_IT(cache, idx);
979 entry = cache->hash_tab[idx];
980 cache->collision_3rd_count++;
981 if (strcache2_is_iequal (cache, entry, str, length, hash1))
982 return (const char *)(entry + 1);
983 }
984 while (entry);
985 }
986 }
987
988 /* Not found, add it at IDX. */
989 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
990}
991
992/* The public lookup (case insensitive) string interface. */
993const char *
994strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
995{
996 struct strcache2_entry const *entry;
997 unsigned int hash2;
998 unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
999 unsigned int idx;
1000
1001 assert (cache->case_insensitive);
1002
1003 cache->lookup_count++;
1004
1005 /* Lookup the entry in the hash table, hoping for an
1006 early match. */
1007 idx = STRCACHE2_MOD_IT(cache, hash1);
1008 entry = cache->hash_tab[idx];
1009 if (strcache2_is_iequal (cache, entry, str, length, hash1))
1010 return (const char *)(entry + 1);
1011 if (!entry)
1012 hash2 = 0;
1013 else
1014 {
1015 cache->collision_1st_count++;
1016
1017 hash2 = strcache2_case_insensitive_hash_2 (str, length);
1018 idx += hash2;
1019 idx = STRCACHE2_MOD_IT(cache, idx);
1020 entry = cache->hash_tab[idx];
1021 if (strcache2_is_iequal (cache, entry, str, length, hash1))
1022 return (const char *)(entry + 1);
1023
1024 if (entry)
1025 {
1026 cache->collision_2nd_count++;
1027 do
1028 {
1029 idx += hash2;
1030 idx = STRCACHE2_MOD_IT(cache, idx);
1031 entry = cache->hash_tab[idx];
1032 cache->collision_3rd_count++;
1033 if (strcache2_is_iequal (cache, entry, str, length, hash1))
1034 return (const char *)(entry + 1);
1035 }
1036 while (entry);
1037 }
1038 }
1039
1040 /* Not found. */
1041 return NULL;
1042}
1043
1044#endif /* HAVE_CASE_INSENSITIVE_FS */
1045
1046/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1047int
1048strcache2_is_cached (struct strcache2 *cache, const char *str)
1049{
1050 /* Check mandatory alignment first. */
1051 if (!((size_t)str & (sizeof (void *) - 1)))
1052 {
1053 /* Check the segment list and consider the question answered if the
1054 string is within one of them. (Could check it more thoroughly...) */
1055 struct strcache2_seg const *seg;
1056 for (seg = cache->seg_head; seg; seg = seg->next)
1057 if ((size_t)(str - seg->start) < seg->size)
1058 return 1;
1059 }
1060
1061 return 0;
1062}
1063
1064
1065/* Verify the integrity of the specified string, returning 0 if OK. */
1066int
1067strcache2_verify_entry (struct strcache2 *cache, const char *str)
1068{
1069 struct strcache2_entry const *entry;
1070 unsigned hash;
1071 const char *end;
1072
1073 if ((size_t)str & (sizeof (void *) - 1))
1074 {
1075 fprintf (stderr,
1076 "strcache2[%s]: missaligned string %p\nstring: %s\n",
1077 cache->name, (void *)str, str);
1078 return -1;
1079 }
1080
1081 entry = (struct strcache2_entry const *)str - 1;
1082 end = memchr (str, '\0', entry->length);
1083 if ((size_t)(end - str) == entry->length)
1084 {
1085 fprintf (stderr,
1086 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
1087 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
1088 return -1;
1089 }
1090
1091 hash = cache->case_insensitive
1092 ? strcache2_case_insensitive_hash_1 (str, entry->length)
1093 : strcache2_case_sensitive_hash_1 (str, entry->length);
1094 if (hash != entry->hash1)
1095 {
1096 fprintf (stderr,
1097 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
1098 cache->name, (void *)entry, hash, entry->hash1, str);
1099 return -1;
1100 }
1101
1102 if (entry->hash2)
1103 {
1104 hash = cache->case_insensitive
1105 ? strcache2_case_insensitive_hash_2 (str, entry->length)
1106 : strcache2_case_sensitive_hash_2 (str, entry->length);
1107 if (hash != entry->hash2)
1108 {
1109 fprintf (stderr,
1110 "strcache2[%s]: corrupt entry %p, hash#2: %x, expected %x;\nstring: %s\n",
1111 cache->name, (void *)entry, hash, entry->hash2, str);
1112 return -1;
1113 }
1114 }
1115
1116 return 0;
1117}
1118
1119/* Fallback for calculating and returning the 2nd hash. */
1120unsigned int
1121strcache2_get_hash2_fallback (struct strcache2 *cache, const char *str)
1122{
1123 struct strcache2_entry *entry = (struct strcache2_entry *) str - 1;
1124 unsigned hash2 = cache->case_insensitive
1125 ? strcache2_case_insensitive_hash_2 (str, entry->length)
1126 : strcache2_case_sensitive_hash_2 (str, entry->length);
1127 entry->hash2 = hash2;
1128 return hash2;
1129}
1130
1131/* Calculates the case sensitive hash values of the string.
1132 The first hash is returned, the other is put at HASH2P. */
1133unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1134{
1135 *hash2p = strcache2_case_sensitive_hash_2 (str, length);
1136 return strcache2_case_sensitive_hash_1 (str, length);
1137}
1138
1139/* Calculates the case insensitive hash values of the string.
1140 The first hash is returned, the other is put at HASH2P. */
1141unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1142{
1143 *hash2p = strcache2_case_insensitive_hash_2 (str, length);
1144 return strcache2_case_insensitive_hash_1 (str, length);
1145}
1146
1147
1148
1149/* Initalizes a new cache. */
1150void
1151strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1152 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1153{
1154 unsigned hash_shift;
1155 assert (!thread_safe);
1156
1157 /* calc the size as a power of two */
1158 if (!size)
1159 hash_shift = STRCACHE2_HASH_SHIFT;
1160 else
1161 {
1162 assert (size <= (~0U / 2 + 1));
1163 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1164 /* nothing */;
1165 }
1166
1167 /* adjust the default segment size */
1168 if (!def_seg_size)
1169 def_seg_size = STRCACHE2_SEG_SIZE;
1170 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1171 def_seg_size = sizeof (struct strcache2_seg) * 10;
1172 else if ((def_seg_size & 0xfff) < 0xf00)
1173 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1174
1175
1176 /* init the structure. */
1177 cache->case_insensitive = case_insensitive;
1178#ifdef STRCACHE2_USE_MASK
1179 cache->hash_mask = (1U << hash_shift) - 1U;
1180#else
1181 cache->hash_div = strcache2_find_prime(hash_shift);
1182#endif
1183 cache->count = 0;
1184 cache->lookup_count = 0;
1185 cache->collision_1st_count = 0;
1186 cache->collision_2nd_count = 0;
1187 cache->collision_3rd_count = 0;
1188 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1189 cache->init_size = 1U << hash_shift;
1190 cache->hash_size = 1U << hash_shift;
1191 cache->def_seg_size = def_seg_size;
1192 cache->lock = NULL;
1193 cache->name = name;
1194
1195 /* allocate the hash table and first segment. */
1196 cache->hash_tab = (struct strcache2_entry **)
1197 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1198 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1199 strcache2_new_seg (cache, 0);
1200
1201 /* link it */
1202 cache->next = strcache_head;
1203 strcache_head = cache;
1204}
1205
1206
1207/* Terminates a string cache, freeing all memory and other
1208 associated resources. */
1209void
1210strcache2_term (struct strcache2 *cache)
1211{
1212 /* unlink it */
1213 if (strcache_head == cache)
1214 strcache_head = cache->next;
1215 else
1216 {
1217 struct strcache2 *prev = strcache_head;
1218 while (prev->next != cache)
1219 prev = prev->next;
1220 assert (prev);
1221 prev->next = cache->next;
1222 }
1223
1224 /* free the memory segments */
1225 do
1226 {
1227 void *free_it = cache->seg_head;
1228 cache->seg_head = cache->seg_head->next;
1229 free (free_it);
1230 }
1231 while (cache->seg_head);
1232
1233 /* free the hash and clear the structure. */
1234 free (cache->hash_tab);
1235 memset (cache, '\0', sizeof (struct strcache2));
1236}
1237
1238/* Print statistics a string cache. */
1239void
1240strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1241{
1242 unsigned int seg_count = 0;
1243 unsigned long seg_total_bytes = 0;
1244 unsigned long seg_avg_bytes;
1245 unsigned long seg_avail_bytes = 0;
1246 unsigned long seg_max_bytes = 0;
1247 struct strcache2_seg *seg;
1248 unsigned long str_total_len = 0;
1249 unsigned int str_avg_len;
1250 unsigned int str_min_len = ~0U;
1251 unsigned int str_max_len = 0;
1252 unsigned int idx;
1253 unsigned int rehashes;
1254
1255 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1256
1257 /* Segment statistics. */
1258 for (seg = cache->seg_head; seg; seg = seg->next)
1259 {
1260 seg_count++;
1261 seg_total_bytes += seg->size;
1262 seg_avail_bytes += seg->avail;
1263 if (seg->size > seg_max_bytes)
1264 seg_max_bytes = seg->size;
1265 }
1266 seg_avg_bytes = seg_total_bytes / seg_count;
1267 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1268 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1269 cache->def_seg_size, seg_avail_bytes);
1270
1271 /* String statistics. */
1272 idx = cache->hash_size;
1273 while (idx-- > 0)
1274 {
1275 struct strcache2_entry const *entry = cache->hash_tab[idx];
1276 if (entry)
1277 {
1278 unsigned int length = entry->length;
1279 str_total_len += length;
1280 if (length > str_max_len)
1281 str_max_len = length;
1282 if (length < str_min_len)
1283 str_min_len = length;
1284 }
1285 }
1286 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1287 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1288 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1289
1290 /* Hash statistics. */
1291 idx = cache->init_size;
1292 rehashes = 0;
1293 while (idx < cache->hash_size)
1294 {
1295 idx *= 2;
1296 rehashes++;
1297 }
1298
1299#ifdef STRCACHE2_USE_MASK
1300 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1301 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1302#else
1303 printf (_("%s hash size = %u div = %#x rehashed %u times lookups = %lu\n"),
1304 prefix, cache->hash_size, cache->hash_div, rehashes, cache->lookup_count);
1305#endif
1306 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1307 prefix,
1308 cache->collision_1st_count, (unsigned int)((float)cache->collision_1st_count * 100 / cache->lookup_count),
1309 cache->collision_2nd_count, (unsigned int)((float)cache->collision_2nd_count * 100 / cache->lookup_count),
1310 cache->collision_3rd_count, (unsigned int)((float)cache->collision_3rd_count * 100 / cache->lookup_count));
1311}
1312
1313/* Print statistics for all string caches. */
1314void
1315strcache2_print_stats_all (const char *prefix)
1316{
1317 struct strcache2 *cur;
1318 for (cur = strcache_head; cur; cur = cur->next)
1319 strcache2_print_stats (cur, prefix);
1320}
1321
Note: See TracBrowser for help on using the repository browser.