source: trunk/src/kmk/strcache2.c@ 1897

Last change on this file since 1897 was 1897, checked in by bird, 17 years ago

kmk: added strcache2_get_ptr_hash for more efficent hashing by string pool users; replacing hash1 with that and hash2 with hash1, thus avoiding unnecessary hash2 calculations.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 28.9 KB
Line 
1/* $Id: strcache2.c 1897 2008-10-21 01:21:39Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef WINDOWS32
39# include <io.h>
40# include <process.h>
41# include <Windows.h>
42# define PARSE_IN_WORKER
43#endif
44
45#ifdef __OS2__
46# include <sys/fmutex.h>
47#endif
48
49#ifdef HAVE_PTHREAD
50# include <pthread.h>
51#endif
52
53
54/*******************************************************************************
55* Defined Constants And Macros *
56*******************************************************************************/
57/* The default size of a memory segment (1MB). */
58#define STRCACHE2_SEG_SIZE (1024U*1024U)
59/* The default hash table shift (hash size give as a power of two). */
60#define STRCACHE2_HASH_SHIFT 16
61
62
63/*******************************************************************************
64* Global Variables *
65*******************************************************************************/
66/* List of initialized string caches. */
67static struct strcache2 *strcache_head;
68
69
70
71static struct strcache2_seg *
72strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
73{
74 struct strcache2_seg *seg;
75 size_t size;
76
77 size = cache->def_seg_size;
78 if (size < (size_t)minlen + sizeof (struct strcache2_seg))
79 {
80 size = (size_t)minlen * 2;
81 size = (size + 0xfff) & ~(size_t)0xfff;
82 }
83
84 seg = xmalloc (size);
85 seg->cursor = seg->start = (char *)(seg + 1);
86 seg->size = seg->avail = size - sizeof (struct strcache2_seg);
87 assert (seg->size > minlen);
88
89 seg->next = cache->seg_head;
90 cache->seg_head = seg;
91
92 return seg;
93}
94
95MY_INLINE unsigned int
96strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
97{
98 unsigned int hash = 0;
99 if (MY_PREDICT_TRUE(len >= 2))
100 {
101 unsigned int ch0 = *str++;
102 hash = 0;
103 len--;
104 while (len >= 2)
105 {
106 unsigned int ch1 = *str++;
107 hash += ch0 << (ch1 & 0xf);
108
109 ch0 = *str++;
110 hash += ch1 << (ch0 & 0xf);
111
112 len -= 2;
113 }
114 if (len == 1)
115 {
116 unsigned ch1 = *str;
117 hash += ch0 << (ch1 & 0xf);
118
119 hash += ch1;
120 }
121 else
122 hash += ch0;
123 }
124 else if (len)
125 {
126 hash = *str;
127 hash += hash << (hash & 0xf);
128 }
129 else
130 hash = 0;
131 return hash;
132}
133
134MY_INLINE unsigned int
135strcache2_case_sensitive_hash_2 (const char *str, unsigned int len)
136{
137 unsigned int hash = 0;
138 if (MY_PREDICT_TRUE(len >= 2))
139 {
140 unsigned int ch0 = *str++;
141 hash = 0;
142 len--;
143 while (len >= 2)
144 {
145 unsigned int ch1 = *str++;
146 hash += ch0 << (ch1 & 0x7);
147
148 ch0 = *str++;
149 hash += ch1 << (ch0 & 0x7);
150
151 len -= 2;
152 }
153 if (len == 1)
154 {
155 unsigned ch1 = *str;
156 hash += ch0 << (ch1 & 0x7);
157
158 hash += ch1;
159 }
160 else
161 hash += ch0;
162 }
163 else if (len)
164 {
165 hash = *str;
166 hash += hash << (hash & 0x7);
167 }
168 else
169 hash = 1;
170 hash |= 1;
171 return hash;
172}
173
174MY_INLINE unsigned int
175strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
176{
177 unsigned int hash = 0;
178 if (MY_PREDICT_TRUE(len >= 2))
179 {
180 unsigned int ch0 = *str++;
181 ch0 = tolower (ch0);
182 hash = 0;
183 len--;
184 while (len >= 2)
185 {
186 unsigned int ch1 = *str++;
187 ch1 = tolower (ch1);
188 hash += ch0 << (ch1 & 0xf);
189
190 ch0 = *str++;
191 ch0 = tolower (ch0);
192 hash += ch1 << (ch0 & 0xf);
193
194 len -= 2;
195 }
196 if (len == 1)
197 {
198 unsigned ch1 = *str;
199 ch1 = tolower (ch1);
200 hash += ch0 << (ch1 & 0xf);
201
202 hash += ch1;
203 }
204 else
205 hash += ch0;
206 }
207 else if (len)
208 {
209 hash = *str;
210 hash += hash << (hash & 0xf);
211 }
212 else
213 hash = 0;
214 return hash;
215}
216
217MY_INLINE unsigned int
218strcache2_case_insensitive_hash_2 (const char *str, unsigned int len)
219{
220 unsigned int hash = 0;
221 if (MY_PREDICT_TRUE(len >= 2))
222 {
223 unsigned int ch0 = *str++;
224 ch0 = tolower (ch0);
225 hash = 0;
226 len--;
227 while (len >= 2)
228 {
229 unsigned int ch1 = *str++;
230 ch1 = tolower (ch1);
231 hash += ch0 << (ch1 & 0x7);
232
233 ch0 = *str++;
234 ch0 = tolower (ch0);
235 hash += ch1 << (ch0 & 0x7);
236
237 len -= 2;
238 }
239 if (len == 1)
240 {
241 unsigned ch1 = *str;
242 ch1 = tolower (ch1);
243 hash += ch0 << (ch1 & 0x7);
244
245 hash += ch1;
246 }
247 else
248 hash += ch0;
249 }
250 else if (len)
251 {
252 hash = *str;
253 hash += hash << (hash & 0x7);
254 }
255 else
256 hash = 1;
257 hash |= 1;
258 return hash;
259}
260
261#if 0 /* FIXME: Use this (salvaged from variable.c) */
262
263MY_INLINE int
264variable_hash_cmp_2_memcmp (const char *xs, const char *ys, unsigned int length)
265{
266 /* short string compare - ~50% of the kBuild calls. */
267 assert ( !((size_t)ys & 3) );
268 if (!((size_t)xs & 3))
269 {
270 /* aligned */
271 int result;
272 switch (length)
273 {
274 case 8:
275 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
276 result |= *(int32_t*)xs - *(int32_t*)ys;
277 return result;
278 case 7:
279 result = xs[6] - ys[6];
280 result |= xs[5] - ys[5];
281 result |= xs[4] - ys[4];
282 result |= *(int32_t*)xs - *(int32_t*)ys;
283 return result;
284 case 6:
285 result = xs[5] - ys[5];
286 result |= xs[4] - ys[4];
287 result |= *(int32_t*)xs - *(int32_t*)ys;
288 return result;
289 case 5:
290 result = xs[4] - ys[4];
291 result |= *(int32_t*)xs - *(int32_t*)ys;
292 return result;
293 case 4:
294 return *(int32_t*)xs - *(int32_t*)ys;
295 case 3:
296 result = xs[2] - ys[2];
297 result |= xs[1] - ys[1];
298 result |= xs[0] - ys[0];
299 return result;
300 case 2:
301 result = xs[1] - ys[1];
302 result |= xs[0] - ys[0];
303 return result;
304 case 1:
305 return *xs - *ys;
306 case 0:
307 return 0;
308 }
309 }
310 else
311 {
312 /* unaligned */
313 int result = 0;
314 switch (length)
315 {
316 case 8: result |= xs[7] - ys[7];
317 case 7: result |= xs[6] - ys[6];
318 case 6: result |= xs[5] - ys[5];
319 case 5: result |= xs[4] - ys[4];
320 case 4: result |= xs[3] - ys[3];
321 case 3: result |= xs[2] - ys[2];
322 case 2: result |= xs[1] - ys[1];
323 case 1: result |= xs[0] - ys[0];
324 case 0:
325 return result;
326 }
327 }
328
329 /* memcmp for longer strings */
330# ifdef __GNUC__
331 return __builtin_memcmp (xs, ys, length);
332# else
333 return memcmp (xs, ys, length);
334# endif
335}
336
337MY_INLINE int
338variable_hash_cmp_2_inlined (const char *xs, const char *ys, unsigned int length)
339{
340#ifndef ELECTRIC_HEAP
341 assert ( !((size_t)ys & 3) );
342#endif
343 if (!((size_t)xs & 3))
344 {
345 int result;
346 /* aligned */
347 while (length >= 8)
348 {
349 result = *(int32_t*)xs - *(int32_t*)ys;
350 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
351 if (MY_PREDICT_FALSE(result))
352 return result;
353 xs += 8;
354 ys += 8;
355 length -= 8;
356 }
357 switch (length)
358 {
359 case 7:
360 result = *(int32_t*)xs - *(int32_t*)ys;
361 result |= xs[6] - ys[6];
362 result |= xs[5] - ys[5];
363 result |= xs[4] - ys[4];
364 return result;
365 case 6:
366 result = *(int32_t*)xs - *(int32_t*)ys;
367 result |= xs[5] - ys[5];
368 result |= xs[4] - ys[4];
369 return result;
370 case 5:
371 result = *(int32_t*)xs - *(int32_t*)ys;
372 result |= xs[4] - ys[4];
373 return result;
374 case 4:
375 return *(int32_t*)xs - *(int32_t*)ys;
376 case 3:
377 result = xs[2] - ys[2];
378 result |= xs[1] - ys[1];
379 result |= xs[0] - ys[0];
380 return result;
381 case 2:
382 result = xs[1] - ys[1];
383 result |= xs[0] - ys[0];
384 return result;
385 case 1:
386 return *xs - *ys;
387 default:
388 case 0:
389 return 0;
390 }
391 }
392 else
393 {
394 /* unaligned */
395 int result;
396 while (length >= 8)
397 {
398#if defined(__i386__) || defined(__x86_64__)
399 result = ( ((int32_t)xs[3] << 24)
400 | ((int32_t)xs[2] << 16)
401 | ((int32_t)xs[1] << 8)
402 | xs[0] )
403 - *(int32_t*)ys;
404 result |= ( ((int32_t)xs[7] << 24)
405 | ((int32_t)xs[6] << 16)
406 | ((int32_t)xs[5] << 8)
407 | xs[4] )
408 - *(int32_t*)(ys + 4);
409#else
410 result = xs[3] - ys[3];
411 result |= xs[2] - ys[2];
412 result |= xs[1] - ys[1];
413 result |= xs[0] - ys[0];
414 result |= xs[7] - ys[7];
415 result |= xs[6] - ys[6];
416 result |= xs[5] - ys[5];
417 result |= xs[4] - ys[4];
418#endif
419 if (MY_PREDICT_FALSE(result))
420 return result;
421 xs += 8;
422 ys += 8;
423 length -= 8;
424 }
425 result = 0;
426 switch (length)
427 {
428 case 7: result |= xs[6] - ys[6];
429 case 6: result |= xs[5] - ys[5];
430 case 5: result |= xs[4] - ys[4];
431 case 4: result |= xs[3] - ys[3];
432 case 3: result |= xs[2] - ys[2];
433 case 2: result |= xs[1] - ys[1];
434 case 1: result |= xs[0] - ys[0];
435 return result;
436 default:
437 case 0:
438 return 0;
439 }
440 }
441}
442
443#endif
444
445MY_INLINE int
446strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
447 const char *str, unsigned int length, unsigned int hash1)
448{
449 /* the simple stuff first. */
450 if ( entry == NULL
451 || entry->hash1 != hash1
452 || entry->length != length)
453 return 0;
454
455 return !cache->case_insensitive
456 ? memcmp (entry + 1, str, length) == 0
457#if defined(_MSC_VER) || defined(__OS2__)
458 : _memicmp (entry + 1, str, length) == 0
459#else
460 : strncasecmp ((const char *)(entry + 1), str, length) == 0
461#endif
462 ;
463}
464
465static void
466strcache2_rehash (struct strcache2 *cache)
467{
468 unsigned int src = cache->hash_size;
469 struct strcache2_entry **src_tab = cache->hash_tab;
470 struct strcache2_entry **dst_tab;
471 unsigned int hash_mask;
472
473 /* Allocate a new hash table twice the size of the current. */
474 cache->hash_size <<= 1;
475 cache->hash_mask <<= 1;
476 cache->hash_mask |= 1;
477 cache->rehash_count <<= 1;
478 cache->hash_tab = dst_tab = (struct strcache2_entry **)
479 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
480 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
481
482 /* Copy the entries from the old to the new hash table. */
483 hash_mask = cache->hash_mask;
484 while (src-- > 0)
485 {
486 struct strcache2_entry *entry = src_tab[src];
487 if (entry)
488 {
489 unsigned int dst = entry->hash1 & hash_mask;
490 if (dst_tab[dst])
491 {
492 unsigned int hash2 = entry->hash2;
493 if (!hash2)
494 entry->hash2 = hash2 = cache->case_insensitive
495 ? strcache2_case_insensitive_hash_2 ((const char *)(entry + 1), entry->length)
496 : strcache2_case_sensitive_hash_2 ((const char *)(entry + 1), entry->length);
497 dst += hash2;
498 dst &= hash_mask;
499 while (dst_tab[dst])
500 {
501 dst += hash2;
502 dst &= hash_mask;
503 }
504 }
505
506 dst_tab[dst] = entry;
507 }
508 }
509
510 /* That's it, just free the old table and we're done. */
511 free (src_tab);
512}
513
514/* Internal worker that enters a new string into the cache. */
515static const char *
516strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
517 const char *str, unsigned int length,
518 unsigned int hash1, unsigned hash2)
519{
520 struct strcache2_entry *entry;
521 struct strcache2_seg *seg;
522 unsigned int size;
523 char *str_copy;
524
525 /* Allocate space for the string. */
526
527 size = length + 1 + sizeof (struct strcache2_entry);
528 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
529
530 seg = cache->seg_head;
531 if (MY_PREDICT_FALSE(seg->avail < size))
532 {
533 do
534 seg = seg->next;
535 while (seg && seg->avail < size);
536 if (!seg)
537 seg = strcache2_new_seg (cache, size);
538 }
539
540 entry = (struct strcache2_entry *) seg->cursor;
541 seg->cursor += size;
542 seg->avail -= size;
543
544 /* Setup the entry, copy the string and insert it into the hash table. */
545
546 entry->user = NULL;
547 entry->length = length;
548 entry->hash1 = hash1;
549 entry->hash2 = hash2;
550 str_copy = (char *) memcpy (entry + 1, str, length);
551 str_copy[length] = '\0';
552
553 cache->hash_tab[idx] = entry;
554 cache->count++;
555 if (cache->count >= cache->rehash_count)
556 strcache2_rehash (cache);
557
558 return str_copy;
559}
560
561/* The public add string interface. */
562const char *
563strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
564{
565 struct strcache2_entry const *entry;
566 unsigned int hash2;
567 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
568 unsigned int idx;
569
570 assert (!cache->case_insensitive);
571
572 cache->lookup_count++;
573
574 /* Lookup the entry in the hash table, hoping for an
575 early match. */
576 idx = hash1 & cache->hash_mask;
577 entry = cache->hash_tab[idx];
578 if (strcache2_is_equal (cache, entry, str, length, hash1))
579 return (const char *)(entry + 1);
580 if (!entry)
581 hash2 = 0;
582 else
583 {
584 cache->collision_1st_count++;
585
586 hash2 = strcache2_case_sensitive_hash_2 (str, length);
587 idx += hash2;
588 idx &= cache->hash_mask;
589 entry = cache->hash_tab[idx];
590 if (strcache2_is_equal (cache, entry, str, length, hash1))
591 return (const char *)(entry + 1);
592
593 if (entry)
594 {
595 cache->collision_2nd_count++;
596 do
597 {
598 idx += hash2;
599 idx &= cache->hash_mask;
600 entry = cache->hash_tab[idx];
601 cache->collision_3rd_count++;
602 if (strcache2_is_equal (cache, entry, str, length, hash1))
603 return (const char *)(entry + 1);
604 }
605 while (entry);
606 }
607 }
608
609 /* Not found, add it at IDX. */
610 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
611}
612
613/* The public add string interface for prehashed strings.
614 Use strcache2_hash_str to calculate the hash of a string. */
615const char *
616strcache2_add_hashed (struct strcache2 *cache, const char *str, unsigned int length,
617 unsigned int hash1, unsigned int hash2)
618{
619 struct strcache2_entry const *entry;
620 unsigned int idx;
621#ifndef NDEBUG
622 unsigned correct_hash;
623
624 correct_hash = cache->case_insensitive
625 ? strcache2_case_insensitive_hash_1 (str, length)
626 : strcache2_case_sensitive_hash_1 (str, length);
627 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
628 if (hash2)
629 {
630 correct_hash = cache->case_insensitive
631 ? strcache2_case_insensitive_hash_2 (str, length)
632 : strcache2_case_sensitive_hash_2 (str, length);
633 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
634 }
635#endif /* NDEBUG */
636
637 cache->lookup_count++;
638
639 /* Lookup the entry in the hash table, hoping for an
640 early match. */
641 idx = hash1 & cache->hash_mask;
642 entry = cache->hash_tab[idx];
643 if (strcache2_is_equal (cache, entry, str, length, hash1))
644 return (const char *)(entry + 1);
645 if (entry)
646 {
647 cache->collision_1st_count++;
648
649 if (!hash2)
650 hash2 = cache->case_insensitive
651 ? strcache2_case_insensitive_hash_2 (str, length)
652 : strcache2_case_sensitive_hash_2 (str, length);
653 idx += hash2;
654 idx &= cache->hash_mask;
655 entry = cache->hash_tab[idx];
656 if (strcache2_is_equal (cache, entry, str, length, hash1))
657 return (const char *)(entry + 1);
658
659 if (entry)
660 {
661 cache->collision_2nd_count++;
662 do
663 {
664 idx += hash2;
665 idx &= cache->hash_mask;
666 entry = cache->hash_tab[idx];
667 cache->collision_3rd_count++;
668 if (strcache2_is_equal (cache, entry, str, length, hash1))
669 return (const char *)(entry + 1);
670 }
671 while (entry);
672 }
673 }
674
675 /* Not found, add it at IDX. */
676 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
677}
678
679/* The public lookup string interface. */
680const char *
681strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
682{
683 struct strcache2_entry const *entry;
684 unsigned int hash2;
685 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
686 unsigned int idx;
687
688 assert (!cache->case_insensitive);
689
690 cache->lookup_count++;
691
692 /* Lookup the entry in the hash table, hoping for an
693 early match. */
694 idx = hash1 & cache->hash_mask;
695 entry = cache->hash_tab[idx];
696 if (strcache2_is_equal (cache, entry, str, length, hash1))
697 return (const char *)(entry + 1);
698 if (!entry)
699 hash2 = 0;
700 else
701 {
702 cache->collision_1st_count++;
703
704 hash2 = strcache2_case_sensitive_hash_2 (str, length);
705 idx += hash2;
706 idx &= cache->hash_mask;
707 entry = cache->hash_tab[idx];
708 if (strcache2_is_equal (cache, entry, str, length, hash1))
709 return (const char *)(entry + 1);
710
711 if (entry)
712 {
713 cache->collision_2nd_count++;
714 do
715 {
716 idx += hash2;
717 idx &= cache->hash_mask;
718 entry = cache->hash_tab[idx];
719 cache->collision_3rd_count++;
720 if (strcache2_is_equal (cache, entry, str, length, hash1))
721 return (const char *)(entry + 1);
722 }
723 while (entry);
724 }
725 }
726
727 /* Not found. */
728 return NULL;
729}
730
731#if defined(HAVE_CASE_INSENSITIVE_FS)
732
733/* The public add string interface for case insensitive strings. */
734const char *
735strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
736{
737 struct strcache2_entry const *entry;
738 unsigned int hash2;
739 unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
740 unsigned int idx;
741
742 assert (cache->case_insensitive);
743 cache->lookup_count++;
744
745 /* Lookup the entry in the hash table, hoping for an
746 early match. */
747 idx = hash1 & cache->hash_mask;
748 entry = cache->hash_tab[idx];
749 if (strcache2_is_equal (cache, entry, str, length, hash1))
750 return (const char *)(entry + 1);
751 if (!entry)
752 hash2 = 0;
753 else
754 {
755 cache->collision_1st_count++;
756
757 hash2 = strcache2_case_insensitive_hash_2 (str, length);
758 idx += hash2;
759 idx &= cache->hash_mask;
760 entry = cache->hash_tab[idx];
761 if (strcache2_is_equal (cache, entry, str, length, hash1))
762 return (const char *)(entry + 1);
763
764 if (entry)
765 {
766 cache->collision_2nd_count++;
767 do
768 {
769 idx += hash2;
770 idx &= cache->hash_mask;
771 entry = cache->hash_tab[idx];
772 cache->collision_3rd_count++;
773 if (strcache2_is_equal (cache, entry, str, length, hash1))
774 return (const char *)(entry + 1);
775 }
776 while (entry);
777 }
778 }
779
780 /* Not found, add it at IDX. */
781 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
782}
783
784/* strcache_ilookup later */
785
786#endif /* HAVE_CASE_INSENSITIVE_FS */
787
788/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
789int
790strcache2_is_cached (struct strcache2 *cache, const char *str)
791{
792 /* Check mandatory alignment first. */
793 if (!((size_t)str & (sizeof (void *) - 1)))
794 {
795 /* Check the segment list and consider the question answered if the
796 string is within one of them. (Could check it more thoroughly...) */
797 struct strcache2_seg const *seg;
798 for (seg = cache->seg_head; seg; seg = seg->next)
799 if ((size_t)(str - seg->start) < seg->size)
800 return 1;
801 }
802
803 return 0;
804}
805
806
807/* Verify the integrity of the specified string, returning 0 if OK. */
808int
809strcache2_verify_entry (struct strcache2 *cache, const char *str)
810{
811 struct strcache2_entry const *entry;
812 unsigned hash;
813 const char *end;
814
815 if ((size_t)str & (sizeof (void *) - 1))
816 {
817 fprintf (stderr,
818 "strcache2[%s]: missaligned string %p\nstring: %s\n",
819 cache->name, (void *)str, str);
820 return -1;
821 }
822
823 entry = (struct strcache2_entry const *)str - 1;
824 end = memchr (str, '\0', entry->length);
825 if ((size_t)(end - str) == entry->length)
826 {
827 fprintf (stderr,
828 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
829 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
830 return -1;
831 }
832
833 hash = cache->case_insensitive
834 ? strcache2_case_insensitive_hash_1 (str, entry->length)
835 : strcache2_case_sensitive_hash_1 (str, entry->length);
836 if (hash != entry->hash1)
837 {
838 fprintf (stderr,
839 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
840 cache->name, (void *)entry, hash, entry->hash1, str);
841 return -1;
842 }
843
844 if (entry->hash2)
845 {
846 hash = cache->case_insensitive
847 ? strcache2_case_insensitive_hash_2 (str, entry->length)
848 : strcache2_case_sensitive_hash_2 (str, entry->length);
849 if (hash != entry->hash2)
850 {
851 fprintf (stderr,
852 "strcache2[%s]: corrupt entry %p, hash#2: %x, expected %x;\nstring: %s\n",
853 cache->name, (void *)entry, hash, entry->hash2, str);
854 return -1;
855 }
856 }
857
858 return 0;
859}
860
861/* Fallback for calculating and returning the 2nd hash. */
862unsigned int
863strcache2_get_hash2_fallback (struct strcache2 *cache, const char *str)
864{
865 struct strcache2_entry *entry = (struct strcache2_entry *) str - 1;
866 unsigned hash2 = cache->case_insensitive
867 ? strcache2_case_insensitive_hash_2 (str, entry->length)
868 : strcache2_case_sensitive_hash_2 (str, entry->length);
869 entry->hash2 = hash2;
870 return hash2;
871}
872
873/* Calculates the case sensitive hash values of the string.
874 The first hash is returned, the other is put at HASH2P. */
875unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
876{
877 *hash2p = strcache2_case_sensitive_hash_2 (str, length);
878 return strcache2_case_sensitive_hash_1 (str, length);
879}
880
881/* Calculates the case insensitive hash values of the string.
882 The first hash is returned, the other is put at HASH2P. */
883unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
884{
885 *hash2p = strcache2_case_insensitive_hash_2 (str, length);
886 return strcache2_case_insensitive_hash_1 (str, length);
887}
888
889
890
891/* Initalizes a new cache. */
892void
893strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
894 unsigned int def_seg_size, int case_insensitive, int thread_safe)
895{
896 unsigned hash_shift;
897 assert (!thread_safe);
898
899 /* calc the size as a power of two */
900 if (!size)
901 hash_shift = STRCACHE2_HASH_SHIFT;
902 else
903 {
904 assert (size <= (~0U / 2 + 1));
905 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
906 /* nothing */;
907 }
908
909 /* adjust the default segment size */
910 if (!def_seg_size)
911 def_seg_size = STRCACHE2_SEG_SIZE;
912 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
913 def_seg_size = sizeof (struct strcache2_seg) * 10;
914 else if ((def_seg_size & 0xfff) < 0xf00)
915 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
916
917
918 /* init the structure. */
919 cache->case_insensitive = case_insensitive;
920 cache->hash_mask = (1U << hash_shift) - 1U;
921 cache->count = 0;
922 cache->lookup_count = 0;
923 cache->collision_1st_count = 0;
924 cache->collision_2nd_count = 0;
925 cache->collision_3rd_count = 0;
926 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
927 cache->init_size = 1U << hash_shift;
928 cache->hash_size = 1U << hash_shift;
929 cache->def_seg_size = def_seg_size;
930 cache->lock = NULL;
931 cache->name = name;
932
933 /* allocate the hash table and first segment. */
934 cache->hash_tab = (struct strcache2_entry **)
935 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
936 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
937 strcache2_new_seg (cache, 0);
938
939 /* link it */
940 cache->next = strcache_head;
941 strcache_head = cache;
942}
943
944
945/* Terminates a string cache, freeing all memory and other
946 associated resources. */
947void
948strcache2_term (struct strcache2 *cache)
949{
950 /* unlink it */
951 if (strcache_head == cache)
952 strcache_head = cache->next;
953 else
954 {
955 struct strcache2 *prev = strcache_head;
956 while (prev->next != cache)
957 prev = prev->next;
958 assert (prev);
959 prev->next = cache->next;
960 }
961
962 /* free the memory segments */
963 do
964 {
965 void *free_it = cache->seg_head;
966 cache->seg_head = cache->seg_head->next;
967 free (free_it);
968 }
969 while (cache->seg_head);
970
971 /* free the hash and clear the structure. */
972 free (cache->hash_tab);
973 memset (cache, '\0', sizeof (struct strcache2));
974}
975
976/* Print statistics a string cache. */
977void
978strcache2_print_stats (struct strcache2 *cache, const char *prefix)
979{
980 unsigned int seg_count = 0;
981 unsigned long seg_total_bytes = 0;
982 unsigned long seg_avg_bytes;
983 unsigned long seg_avail_bytes = 0;
984 unsigned long seg_max_bytes = 0;
985 struct strcache2_seg *seg;
986 unsigned long str_total_len = 0;
987 unsigned int str_avg_len;
988 unsigned int str_min_len = ~0U;
989 unsigned int str_max_len = 0;
990 unsigned int idx;
991 unsigned int rehashes;
992
993 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
994
995 /* Segment statistics. */
996 for (seg = cache->seg_head; seg; seg = seg->next)
997 {
998 seg_count++;
999 seg_total_bytes += seg->size;
1000 seg_avail_bytes += seg->avail;
1001 if (seg->size > seg_max_bytes)
1002 seg_max_bytes = seg->size;
1003 }
1004 seg_avg_bytes = seg_total_bytes / seg_count;
1005 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1006 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1007 cache->def_seg_size, seg_avail_bytes);
1008
1009 /* String statistics. */
1010 idx = cache->hash_size;
1011 while (idx-- > 0)
1012 {
1013 struct strcache2_entry const *entry = cache->hash_tab[idx];
1014 if (entry)
1015 {
1016 unsigned int length = entry->length;
1017 str_total_len += length;
1018 if (length > str_max_len)
1019 str_max_len = length;
1020 if (length < str_min_len)
1021 str_min_len = length;
1022 }
1023 }
1024 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1025 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1026 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1027
1028 /* Hash statistics. */
1029 idx = cache->init_size;
1030 rehashes = 0;
1031 while (idx < cache->hash_size)
1032 {
1033 idx *= 2;
1034 rehashes++;
1035 }
1036
1037 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1038 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1039 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1040 prefix,
1041 cache->collision_1st_count, (unsigned int)((float)cache->collision_1st_count * 100 / cache->lookup_count),
1042 cache->collision_2nd_count, (unsigned int)((float)cache->collision_2nd_count * 100 / cache->lookup_count),
1043 cache->collision_3rd_count, (unsigned int)((float)cache->collision_3rd_count * 100 / cache->lookup_count));
1044}
1045
1046/* Print statistics for all string caches. */
1047void
1048strcache2_print_stats_all (const char *prefix)
1049{
1050 struct strcache2 *cur;
1051 for (cur = strcache_head; cur; cur = cur->next)
1052 strcache2_print_stats (cur, prefix);
1053}
1054
Note: See TracBrowser for help on using the repository browser.