source: trunk/src/kmk/strcache2.c@ 1887

Last change on this file since 1887 was 1887, checked in by bird, 17 years ago

kmk: delegating variable string hashing to the strcache, dropping the VARIALBE_HASH hack (remaining code to be removed).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 28.9 KB
Line 
1/* $Id: strcache2.c 1887 2008-10-19 23:08:10Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef WINDOWS32
39# include <io.h>
40# include <process.h>
41# include <Windows.h>
42# define PARSE_IN_WORKER
43#endif
44
45#ifdef __OS2__
46# include <sys/fmutex.h>
47#endif
48
49#ifdef HAVE_PTHREAD
50# include <pthread.h>
51#endif
52
53
54/*******************************************************************************
55* Defined Constants And Macros *
56*******************************************************************************/
57/* The default size of a memory segment (1MB). */
58#define STRCACHE2_SEG_SIZE (1024U*1024U)
59/* The default hash table shift (hash size give as a power of two). */
60#define STRCACHE2_HASH_SHIFT 16
61
62
63/*******************************************************************************
64* Global Variables *
65*******************************************************************************/
66/* List of initialized string caches. */
67static struct strcache2 *strcache_head;
68
69
70
71static struct strcache2_seg *
72strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
73{
74 struct strcache2_seg *seg;
75 size_t size;
76
77 size = cache->def_seg_size;
78 if (size < (size_t)minlen + sizeof (struct strcache2_seg))
79 {
80 size = (size_t)minlen * 2;
81 size = (size + 0xfff) & ~(size_t)0xfff;
82 }
83
84 seg = xmalloc (size);
85 seg->cursor = seg->start = (char *)(seg + 1);
86 seg->size = seg->avail = size - sizeof (struct strcache2_seg);
87 assert (seg->size > minlen);
88
89 seg->next = cache->seg_head;
90 cache->seg_head = seg;
91
92 return seg;
93}
94
95MY_INLINE unsigned int
96strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
97{
98 unsigned int hash = 0;
99 if (MY_PREDICT_TRUE(len >= 2))
100 {
101 unsigned int ch0 = *str++;
102 hash = 0;
103 len--;
104 while (len >= 2)
105 {
106 unsigned int ch1 = *str++;
107 hash += ch0 << (ch1 & 0xf);
108
109 ch0 = *str++;
110 hash += ch1 << (ch0 & 0xf);
111
112 len -= 2;
113 }
114 if (len == 1)
115 {
116 unsigned ch1 = *str;
117 hash += ch0 << (ch1 & 0xf);
118
119 hash += ch1;
120 }
121 else
122 hash += ch0;
123 }
124 else if (len)
125 {
126 hash = *str;
127 hash += hash << (hash & 0xf);
128 }
129 else
130 hash = 0;
131 return hash;
132}
133
134MY_INLINE unsigned int
135strcache2_case_sensitive_hash_2 (const char *str, unsigned int len)
136{
137 unsigned int hash = 0;
138 if (MY_PREDICT_TRUE(len >= 2))
139 {
140 unsigned int ch0 = *str++;
141 hash = 0;
142 len--;
143 while (len >= 2)
144 {
145 unsigned int ch1 = *str++;
146 hash += ch0 << (ch1 & 0x7);
147
148 ch0 = *str++;
149 hash += ch1 << (ch0 & 0x7);
150
151 len -= 2;
152 }
153 if (len == 1)
154 {
155 unsigned ch1 = *str;
156 hash += ch0 << (ch1 & 0x7);
157
158 hash += ch1;
159 }
160 else
161 hash += ch0;
162 }
163 else if (len)
164 {
165 hash = *str;
166 hash += hash << (hash & 0x7);
167 }
168 else
169 hash = 1;
170 hash |= 1;
171 return hash;
172}
173
174MY_INLINE unsigned int
175strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
176{
177 unsigned int hash = 0;
178 if (MY_PREDICT_TRUE(len >= 2))
179 {
180 unsigned int ch0 = *str++;
181 ch0 = tolower (ch0);
182 hash = 0;
183 len--;
184 while (len >= 2)
185 {
186 unsigned int ch1 = *str++;
187 ch1 = tolower (ch1);
188 hash += ch0 << (ch1 & 0xf);
189
190 ch0 = *str++;
191 ch0 = tolower (ch0);
192 hash += ch1 << (ch0 & 0xf);
193
194 len -= 2;
195 }
196 if (len == 1)
197 {
198 unsigned ch1 = *str;
199 ch1 = tolower (ch1);
200 hash += ch0 << (ch1 & 0xf);
201
202 hash += ch1;
203 }
204 else
205 hash += ch0;
206 }
207 else if (len)
208 {
209 hash = *str;
210 hash += hash << (hash & 0xf);
211 }
212 else
213 hash = 0;
214 return hash;
215}
216
217MY_INLINE unsigned int
218strcache2_case_insensitive_hash_2 (const char *str, unsigned int len)
219{
220 unsigned int hash = 0;
221 if (MY_PREDICT_TRUE(len >= 2))
222 {
223 unsigned int ch0 = *str++;
224 ch0 = tolower (ch0);
225 hash = 0;
226 len--;
227 while (len >= 2)
228 {
229 unsigned int ch1 = *str++;
230 ch1 = tolower (ch1);
231 hash += ch0 << (ch1 & 0x7);
232
233 ch0 = *str++;
234 ch0 = tolower (ch0);
235 hash += ch1 << (ch0 & 0x7);
236
237 len -= 2;
238 }
239 if (len == 1)
240 {
241 unsigned ch1 = *str;
242 ch1 = tolower (ch1);
243 hash += ch0 << (ch1 & 0x7);
244
245 hash += ch1;
246 }
247 else
248 hash += ch0;
249 }
250 else if (len)
251 {
252 hash = *str;
253 hash += hash << (hash & 0x7);
254 }
255 else
256 hash = 1;
257 hash |= 1;
258 return hash;
259}
260
261#if 0 /* FIXME: Use this (salvaged from variable.c) */
262
263MY_INLINE int
264variable_hash_cmp_2_memcmp (const char *xs, const char *ys, unsigned int length)
265{
266 /* short string compare - ~50% of the kBuild calls. */
267 assert ( !((size_t)ys & 3) );
268 if (!((size_t)xs & 3))
269 {
270 /* aligned */
271 int result;
272 switch (length)
273 {
274 case 8:
275 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
276 result |= *(int32_t*)xs - *(int32_t*)ys;
277 return result;
278 case 7:
279 result = xs[6] - ys[6];
280 result |= xs[5] - ys[5];
281 result |= xs[4] - ys[4];
282 result |= *(int32_t*)xs - *(int32_t*)ys;
283 return result;
284 case 6:
285 result = xs[5] - ys[5];
286 result |= xs[4] - ys[4];
287 result |= *(int32_t*)xs - *(int32_t*)ys;
288 return result;
289 case 5:
290 result = xs[4] - ys[4];
291 result |= *(int32_t*)xs - *(int32_t*)ys;
292 return result;
293 case 4:
294 return *(int32_t*)xs - *(int32_t*)ys;
295 case 3:
296 result = xs[2] - ys[2];
297 result |= xs[1] - ys[1];
298 result |= xs[0] - ys[0];
299 return result;
300 case 2:
301 result = xs[1] - ys[1];
302 result |= xs[0] - ys[0];
303 return result;
304 case 1:
305 return *xs - *ys;
306 case 0:
307 return 0;
308 }
309 }
310 else
311 {
312 /* unaligned */
313 int result = 0;
314 switch (length)
315 {
316 case 8: result |= xs[7] - ys[7];
317 case 7: result |= xs[6] - ys[6];
318 case 6: result |= xs[5] - ys[5];
319 case 5: result |= xs[4] - ys[4];
320 case 4: result |= xs[3] - ys[3];
321 case 3: result |= xs[2] - ys[2];
322 case 2: result |= xs[1] - ys[1];
323 case 1: result |= xs[0] - ys[0];
324 case 0:
325 return result;
326 }
327 }
328
329 /* memcmp for longer strings */
330# ifdef __GNUC__
331 return __builtin_memcmp (xs, ys, length);
332# else
333 return memcmp (xs, ys, length);
334# endif
335}
336
337MY_INLINE int
338variable_hash_cmp_2_inlined (const char *xs, const char *ys, unsigned int length)
339{
340#ifndef ELECTRIC_HEAP
341 assert ( !((size_t)ys & 3) );
342#endif
343 if (!((size_t)xs & 3))
344 {
345 int result;
346 /* aligned */
347 while (length >= 8)
348 {
349 result = *(int32_t*)xs - *(int32_t*)ys;
350 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
351 if (MY_PREDICT_FALSE(result))
352 return result;
353 xs += 8;
354 ys += 8;
355 length -= 8;
356 }
357 switch (length)
358 {
359 case 7:
360 result = *(int32_t*)xs - *(int32_t*)ys;
361 result |= xs[6] - ys[6];
362 result |= xs[5] - ys[5];
363 result |= xs[4] - ys[4];
364 return result;
365 case 6:
366 result = *(int32_t*)xs - *(int32_t*)ys;
367 result |= xs[5] - ys[5];
368 result |= xs[4] - ys[4];
369 return result;
370 case 5:
371 result = *(int32_t*)xs - *(int32_t*)ys;
372 result |= xs[4] - ys[4];
373 return result;
374 case 4:
375 return *(int32_t*)xs - *(int32_t*)ys;
376 case 3:
377 result = xs[2] - ys[2];
378 result |= xs[1] - ys[1];
379 result |= xs[0] - ys[0];
380 return result;
381 case 2:
382 result = xs[1] - ys[1];
383 result |= xs[0] - ys[0];
384 return result;
385 case 1:
386 return *xs - *ys;
387 default:
388 case 0:
389 return 0;
390 }
391 }
392 else
393 {
394 /* unaligned */
395 int result;
396 while (length >= 8)
397 {
398#if defined(__i386__) || defined(__x86_64__)
399 result = ( ((int32_t)xs[3] << 24)
400 | ((int32_t)xs[2] << 16)
401 | ((int32_t)xs[1] << 8)
402 | xs[0] )
403 - *(int32_t*)ys;
404 result |= ( ((int32_t)xs[7] << 24)
405 | ((int32_t)xs[6] << 16)
406 | ((int32_t)xs[5] << 8)
407 | xs[4] )
408 - *(int32_t*)(ys + 4);
409#else
410 result = xs[3] - ys[3];
411 result |= xs[2] - ys[2];
412 result |= xs[1] - ys[1];
413 result |= xs[0] - ys[0];
414 result |= xs[7] - ys[7];
415 result |= xs[6] - ys[6];
416 result |= xs[5] - ys[5];
417 result |= xs[4] - ys[4];
418#endif
419 if (MY_PREDICT_FALSE(result))
420 return result;
421 xs += 8;
422 ys += 8;
423 length -= 8;
424 }
425 result = 0;
426 switch (length)
427 {
428 case 7: result |= xs[6] - ys[6];
429 case 6: result |= xs[5] - ys[5];
430 case 5: result |= xs[4] - ys[4];
431 case 4: result |= xs[3] - ys[3];
432 case 3: result |= xs[2] - ys[2];
433 case 2: result |= xs[1] - ys[1];
434 case 1: result |= xs[0] - ys[0];
435 return result;
436 default:
437 case 0:
438 return 0;
439 }
440 }
441}
442
443#endif
444
445MY_INLINE int
446strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
447 const char *str, unsigned int length, unsigned int hash1)
448{
449 /* the simple stuff first. */
450 if ( entry == NULL
451 || entry->hash1 != hash1
452 || entry->length != length)
453 return 0;
454
455 return !cache->case_insensitive
456 ? memcmp (entry + 1, str, length) == 0
457#if defined(_MSC_VER) || defined(__OS2__)
458 : _memicmp (entry + 1, str, length) == 0
459#else
460 : strncasecmp ((const char *)(entry + 1), str, length) == 0
461#endif
462 ;
463}
464
465static void
466strcache2_rehash (struct strcache2 *cache)
467{
468 unsigned int src = cache->hash_size;
469 struct strcache2_entry **src_tab = cache->hash_tab;
470 struct strcache2_entry **dst_tab;
471 unsigned int hash_mask;
472
473 /* Allocate a new hash table twice the size of the current. */
474 cache->hash_size <<= 1;
475 cache->hash_mask <<= 1;
476 cache->hash_mask |= 1;
477 cache->rehash_count <<= 1;
478 cache->hash_tab = dst_tab = (struct strcache2_entry **)
479 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
480 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
481
482 /* Copy the entries from the old to the new hash table. */
483 hash_mask = cache->hash_mask;
484 while (src-- > 0)
485 {
486 struct strcache2_entry *entry = src_tab[src];
487 if (entry)
488 {
489 unsigned int dst = entry->hash1 & hash_mask;
490 if (dst_tab[dst])
491 {
492 unsigned int hash2 = entry->hash2;
493 if (!hash2)
494 entry->hash2 = hash2 = cache->case_insensitive
495 ? strcache2_case_insensitive_hash_2 ((const char *)(entry + 1), entry->length)
496 : strcache2_case_sensitive_hash_2 ((const char *)(entry + 1), entry->length);
497 dst += hash2;
498 dst &= hash_mask;
499 while (dst_tab[dst])
500 {
501 dst += hash2;
502 dst &= hash_mask;
503 }
504 }
505
506 dst_tab[dst] = entry;
507 }
508 }
509
510 /* That's it, just free the old table and we're done. */
511 free (src_tab);
512}
513
514/* Internal worker that enters a new string into the cache. */
515static const char *
516strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
517 const char *str, unsigned int length,
518 unsigned int hash1, unsigned hash2)
519{
520 struct strcache2_entry *entry;
521 struct strcache2_seg *seg;
522 unsigned int size;
523 char *str_copy;
524
525 /* Allocate space for the string. */
526
527 size = length + 1 + sizeof (struct strcache2_entry);
528 size = (size + sizeof (void *) - 1) & ~(sizeof (void *) - 1U);
529
530 seg = cache->seg_head;
531 if (MY_PREDICT_FALSE(seg->avail < size))
532 {
533 do
534 seg = seg->next;
535 while (seg && seg->avail < size);
536 if (!seg)
537 seg = strcache2_new_seg (cache, size);
538 }
539
540 entry = (struct strcache2_entry *) seg->cursor;
541 seg->cursor += size;
542 seg->avail -= size;
543
544 /* Setup the entry, copy the string and insert it into the hash table. */
545
546 entry->user = NULL;
547 entry->length = length;
548 entry->hash1 = hash1;
549 entry->hash2 = hash2;
550 str_copy = (char *) memcpy (entry + 1, str, length);
551 str_copy[length] = '\0';
552
553 cache->hash_tab[idx] = entry;
554 cache->count++;
555 if (cache->count >= cache->rehash_count)
556 strcache2_rehash (cache);
557
558 return str_copy;
559}
560
561/* The public add string interface. */
562const char *
563strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
564{
565 struct strcache2_entry const *entry;
566 unsigned int hash2;
567 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
568 unsigned int idx;
569
570 assert (!cache->case_insensitive);
571
572
573 cache->lookup_count++;
574
575 /* Lookup the entry in the hash table, hoping for an
576 early match. */
577 idx = hash1 & cache->hash_mask;
578 entry = cache->hash_tab[idx];
579 if (strcache2_is_equal (cache, entry, str, length, hash1))
580 return (const char *)(entry + 1);
581 if (!entry)
582 hash2 = 0;
583 else
584 {
585 cache->collision_1st_count++;
586
587 hash2 = strcache2_case_sensitive_hash_2 (str, length);
588 idx += hash2;
589 idx &= cache->hash_mask;
590 entry = cache->hash_tab[idx];
591 if (strcache2_is_equal (cache, entry, str, length, hash1))
592 return (const char *)(entry + 1);
593
594 if (entry)
595 {
596 cache->collision_2nd_count++;
597 do
598 {
599 idx += hash2;
600 idx &= cache->hash_mask;
601 entry = cache->hash_tab[idx];
602 cache->collision_3rd_count++;
603 if (strcache2_is_equal (cache, entry, str, length, hash1))
604 return (const char *)(entry + 1);
605 }
606 while (entry);
607 }
608 }
609
610 /* Not found, add it at IDX. */
611 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
612}
613
614/* The public add string interface for prehashed strings.
615 Use strcache2_hash_str to calculate the hash of a string. */
616const char *
617strcache2_add_hashed (struct strcache2 *cache, const char *str, unsigned int length,
618 unsigned int hash1, unsigned int hash2)
619{
620 struct strcache2_entry const *entry;
621 unsigned int idx;
622#ifndef NDEBUG
623 unsigned correct_hash;
624
625 correct_hash = cache->case_insensitive
626 ? strcache2_case_insensitive_hash_1 (str, length)
627 : strcache2_case_sensitive_hash_1 (str, length);
628 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
629 if (hash2)
630 {
631 correct_hash = cache->case_insensitive
632 ? strcache2_case_insensitive_hash_2 (str, length)
633 : strcache2_case_sensitive_hash_2 (str, length);
634 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
635 }
636#endif /* NDEBUG */
637
638 cache->lookup_count++;
639
640 /* Lookup the entry in the hash table, hoping for an
641 early match. */
642 idx = hash1 & cache->hash_mask;
643 entry = cache->hash_tab[idx];
644 if (strcache2_is_equal (cache, entry, str, length, hash1))
645 return (const char *)(entry + 1);
646 if (entry)
647 {
648 cache->collision_1st_count++;
649
650 if (!hash2)
651 hash2 = cache->case_insensitive
652 ? strcache2_case_insensitive_hash_2 (str, length)
653 : strcache2_case_sensitive_hash_2 (str, length);
654 idx += hash2;
655 idx &= cache->hash_mask;
656 entry = cache->hash_tab[idx];
657 if (strcache2_is_equal (cache, entry, str, length, hash1))
658 return (const char *)(entry + 1);
659
660 if (entry)
661 {
662 cache->collision_2nd_count++;
663 do
664 {
665 idx += hash2;
666 idx &= cache->hash_mask;
667 entry = cache->hash_tab[idx];
668 cache->collision_3rd_count++;
669 if (strcache2_is_equal (cache, entry, str, length, hash1))
670 return (const char *)(entry + 1);
671 }
672 while (entry);
673 }
674 }
675
676 /* Not found, add it at IDX. */
677 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
678}
679
680/* The public lookup string interface. */
681const char *
682strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
683{
684 struct strcache2_entry const *entry;
685 unsigned int hash2;
686 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
687 unsigned int idx;
688
689 assert (!cache->case_insensitive);
690
691 cache->lookup_count++;
692
693 /* Lookup the entry in the hash table, hoping for an
694 early match. */
695 idx = hash1 & cache->hash_mask;
696 entry = cache->hash_tab[idx];
697 if (strcache2_is_equal (cache, entry, str, length, hash1))
698 return (const char *)(entry + 1);
699 if (!entry)
700 hash2 = 0;
701 else
702 {
703 cache->collision_1st_count++;
704
705 hash2 = strcache2_case_sensitive_hash_2 (str, length);
706 idx += hash2;
707 idx &= cache->hash_mask;
708 entry = cache->hash_tab[idx];
709 if (strcache2_is_equal (cache, entry, str, length, hash1))
710 return (const char *)(entry + 1);
711
712 if (entry)
713 {
714 cache->collision_2nd_count++;
715 do
716 {
717 idx += hash2;
718 idx &= cache->hash_mask;
719 entry = cache->hash_tab[idx];
720 cache->collision_3rd_count++;
721 if (strcache2_is_equal (cache, entry, str, length, hash1))
722 return (const char *)(entry + 1);
723 }
724 while (entry);
725 }
726 }
727
728 /* Not found. */
729 return NULL;
730}
731
732#if defined(HAVE_CASE_INSENSITIVE_FS)
733
734/* The public add string interface for case insensitive strings. */
735const char *
736strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
737{
738 struct strcache2_entry const *entry;
739 unsigned int hash2;
740 unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
741 unsigned int idx;
742
743 assert (cache->case_insensitive);
744 cache->lookup_count++;
745
746 /* Lookup the entry in the hash table, hoping for an
747 early match. */
748 idx = hash1 & cache->hash_mask;
749 entry = cache->hash_tab[idx];
750 if (strcache2_is_equal (cache, entry, str, length, hash1))
751 return (const char *)(entry + 1);
752 if (!entry)
753 hash2 = 0;
754 else
755 {
756 cache->collision_1st_count++;
757
758 hash2 = strcache2_case_insensitive_hash_2 (str, length);
759 idx += hash2;
760 idx &= cache->hash_mask;
761 entry = cache->hash_tab[idx];
762 if (strcache2_is_equal (cache, entry, str, length, hash1))
763 return (const char *)(entry + 1);
764
765 if (entry)
766 {
767 cache->collision_2nd_count++;
768 do
769 {
770 idx += hash2;
771 idx &= cache->hash_mask;
772 entry = cache->hash_tab[idx];
773 cache->collision_3rd_count++;
774 if (strcache2_is_equal (cache, entry, str, length, hash1))
775 return (const char *)(entry + 1);
776 }
777 while (entry);
778 }
779 }
780
781 /* Not found, add it at IDX. */
782 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
783}
784
785/* strcache_ilookup later */
786
787#endif /* HAVE_CASE_INSENSITIVE_FS */
788
789/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
790int
791strcache2_is_cached (struct strcache2 *cache, const char *str)
792{
793 /* Check mandatory alignment first. */
794 if (!((size_t)str & (sizeof (void *) - 1)))
795 {
796 /* Check the segment list and consider the question answered if the
797 string is within one of them. (Could check it more thoroughly...) */
798 struct strcache2_seg const *seg;
799 for (seg = cache->seg_head; seg; seg = seg->next)
800 if ((size_t)(str - seg->start) < seg->size)
801 return 1;
802 }
803
804 return 0;
805}
806
807
808/* Verify the integrity of the specified string, returning 0 if OK. */
809int
810strcache2_verify_entry (struct strcache2 *cache, const char *str)
811{
812 struct strcache2_entry const *entry;
813 unsigned hash;
814 const char *end;
815
816 if ((size_t)str & (sizeof (void *) - 1))
817 {
818 fprintf (stderr,
819 "strcache2[%s]: missaligned string %p\nstring: %s\n",
820 cache->name, (void *)str, str);
821 return -1;
822 }
823
824 entry = (struct strcache2_entry const *)str - 1;
825 end = memchr (str, '\0', entry->length);
826 if ((size_t)(end - str) == entry->length)
827 {
828 fprintf (stderr,
829 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
830 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
831 return -1;
832 }
833
834 hash = cache->case_insensitive
835 ? strcache2_case_insensitive_hash_1 (str, entry->length)
836 : strcache2_case_sensitive_hash_1 (str, entry->length);
837 if (hash != entry->hash1)
838 {
839 fprintf (stderr,
840 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
841 cache->name, (void *)entry, hash, entry->hash1, str);
842 return -1;
843 }
844
845 if (entry->hash2)
846 {
847 hash = cache->case_insensitive
848 ? strcache2_case_insensitive_hash_2 (str, entry->length)
849 : strcache2_case_sensitive_hash_2 (str, entry->length);
850 if (hash != entry->hash2)
851 {
852 fprintf (stderr,
853 "strcache2[%s]: corrupt entry %p, hash#2: %x, expected %x;\nstring: %s\n",
854 cache->name, (void *)entry, hash, entry->hash2, str);
855 return -1;
856 }
857 }
858
859 return 0;
860}
861
862/* Fallback for calculating and returning the 2nd hash. */
863unsigned int
864strcache2_get_hash2_fallback (struct strcache2 *cache, const char *str)
865{
866 struct strcache2_entry *entry = (struct strcache2_entry *) str - 1;
867 unsigned hash2 = cache->case_insensitive
868 ? strcache2_case_insensitive_hash_2 (str, entry->length)
869 : strcache2_case_sensitive_hash_2 (str, entry->length);
870 entry->hash2 = hash2;
871 return hash2;
872}
873
874/* Calculates the case sensitive hash values of the string.
875 The first hash is returned, the other is put at HASH2P. */
876unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
877{
878 *hash2p = strcache2_case_sensitive_hash_2 (str, length);
879 return strcache2_case_sensitive_hash_1 (str, length);
880}
881
882/* Calculates the case insensitive hash values of the string.
883 The first hash is returned, the other is put at HASH2P. */
884unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
885{
886 *hash2p = strcache2_case_insensitive_hash_2 (str, length);
887 return strcache2_case_insensitive_hash_1 (str, length);
888}
889
890
891
892/* Initalizes a new cache. */
893void
894strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
895 unsigned int def_seg_size, int case_insensitive, int thread_safe)
896{
897 unsigned hash_shift;
898 assert (!thread_safe);
899
900 /* calc the size as a power of two */
901 if (!size)
902 hash_shift = STRCACHE2_HASH_SHIFT;
903 else
904 {
905 assert (size <= (~0U / 2 + 1));
906 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
907 /* nothing */;
908 }
909
910 /* adjust the default segment size */
911 if (!def_seg_size)
912 def_seg_size = STRCACHE2_SEG_SIZE;
913 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
914 def_seg_size = sizeof (struct strcache2_seg) * 10;
915 else if ((def_seg_size & 0xfff) < 0xf00)
916 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
917
918
919 /* init the structure. */
920 cache->case_insensitive = case_insensitive;
921 cache->hash_mask = (1U << hash_shift) - 1U;
922 cache->count = 0;
923 cache->lookup_count = 0;
924 cache->collision_1st_count = 0;
925 cache->collision_2nd_count = 0;
926 cache->collision_3rd_count = 0;
927 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
928 cache->init_size = 1U << hash_shift;
929 cache->hash_size = 1U << hash_shift;
930 cache->def_seg_size = def_seg_size;
931 cache->lock = NULL;
932 cache->name = name;
933
934 /* allocate the hash table and first segment. */
935 cache->hash_tab = (struct strcache2_entry **)
936 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
937 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
938 strcache2_new_seg (cache, 0);
939
940 /* link it */
941 cache->next = strcache_head;
942 strcache_head = cache;
943}
944
945
946/* Terminates a string cache, freeing all memory and other
947 associated resources. */
948void
949strcache2_term (struct strcache2 *cache)
950{
951 /* unlink it */
952 if (strcache_head == cache)
953 strcache_head = cache->next;
954 else
955 {
956 struct strcache2 *prev = strcache_head;
957 while (prev->next != cache)
958 prev = prev->next;
959 assert (prev);
960 prev->next = cache->next;
961 }
962
963 /* free the memory segments */
964 do
965 {
966 void *free_it = cache->seg_head;
967 cache->seg_head = cache->seg_head->next;
968 free (free_it);
969 }
970 while (cache->seg_head);
971
972 /* free the hash and clear the structure. */
973 free (cache->hash_tab);
974 memset (cache, '\0', sizeof (struct strcache2));
975}
976
977/* Print statistics a string cache. */
978void
979strcache2_print_stats (struct strcache2 *cache, const char *prefix)
980{
981 unsigned int seg_count = 0;
982 unsigned long seg_total_bytes = 0;
983 unsigned long seg_avg_bytes;
984 unsigned long seg_avail_bytes = 0;
985 unsigned long seg_max_bytes = 0;
986 struct strcache2_seg *seg;
987 unsigned long str_total_len = 0;
988 unsigned int str_avg_len;
989 unsigned int str_min_len = ~0U;
990 unsigned int str_max_len = 0;
991 unsigned int idx;
992 unsigned int rehashes;
993
994 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
995
996 /* Segment statistics. */
997 for (seg = cache->seg_head; seg; seg = seg->next)
998 {
999 seg_count++;
1000 seg_total_bytes += seg->size;
1001 seg_avail_bytes += seg->avail;
1002 if (seg->size > seg_max_bytes)
1003 seg_max_bytes = seg->size;
1004 }
1005 seg_avg_bytes = seg_total_bytes / seg_count;
1006 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1007 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1008 cache->def_seg_size, seg_avail_bytes);
1009
1010 /* String statistics. */
1011 idx = cache->hash_size;
1012 while (idx-- > 0)
1013 {
1014 struct strcache2_entry const *entry = cache->hash_tab[idx];
1015 if (entry)
1016 {
1017 unsigned int length = entry->length;
1018 str_total_len += length;
1019 if (length > str_max_len)
1020 str_max_len = length;
1021 if (length < str_min_len)
1022 str_min_len = length;
1023 }
1024 }
1025 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1026 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1027 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1028
1029 /* Hash statistics. */
1030 idx = cache->init_size;
1031 rehashes = 0;
1032 while (idx < cache->hash_size)
1033 {
1034 idx *= 2;
1035 rehashes++;
1036 }
1037
1038 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1039 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1040 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1041 prefix,
1042 cache->collision_1st_count, (unsigned int)((float)cache->collision_1st_count * 100 / cache->lookup_count),
1043 cache->collision_2nd_count, (unsigned int)((float)cache->collision_2nd_count * 100 / cache->lookup_count),
1044 cache->collision_3rd_count, (unsigned int)((float)cache->collision_3rd_count * 100 / cache->lookup_count));
1045}
1046
1047/* Print statistics for all string caches. */
1048void
1049strcache2_print_stats_all (const char *prefix)
1050{
1051 struct strcache2 *cur;
1052 for (cur = strcache_head; cur; cur = cur->next)
1053 strcache2_print_stats (cur, prefix);
1054}
1055
Note: See TracBrowser for help on using the repository browser.