source: trunk/src/kmk/strcache2.c@ 1905

Last change on this file since 1905 was 1905, checked in by bird, 17 years ago

strcache2: benchmarked strcache2_case_sensitive_hash_1 against return_STRING_N_HASH_1, finding it 18% faster.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 33.4 KB
Line 
1/* $Id: strcache2.c 1905 2008-10-21 05:13:12Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef WINDOWS32
39# include <io.h>
40# include <process.h>
41# include <Windows.h>
42# define PARSE_IN_WORKER
43#endif
44
45#ifdef __OS2__
46# include <sys/fmutex.h>
47#endif
48
49#ifdef HAVE_PTHREAD
50# include <pthread.h>
51#endif
52
53
54/*******************************************************************************
55* Defined Constants And Macros *
56*******************************************************************************/
57/* The default size of a memory segment (1MB). */
58#define STRCACHE2_SEG_SIZE (1024U*1024U)
59/* The default hash table shift (hash size give as a power of two). */
60#define STRCACHE2_HASH_SHIFT 16
61
62
63/*******************************************************************************
64* Global Variables *
65*******************************************************************************/
66/* List of initialized string caches. */
67static struct strcache2 *strcache_head;
68
69
70
71static struct strcache2_seg *
72strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
73{
74 struct strcache2_seg *seg;
75 size_t size;
76 size_t off;
77
78 size = cache->def_seg_size;
79 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
80 {
81 size = (size_t)minlen * 2;
82 size = (size + 0xfff) & ~(size_t)0xfff;
83 }
84
85 seg = xmalloc (size);
86 seg->start = (char *)(seg + 1);
87 seg->size = size - sizeof (struct strcache2_seg);
88 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
89 if (off)
90 {
91 seg->start += off;
92 seg->size -= off;
93 }
94 assert (seg->size > minlen);
95 seg->cursor = seg->start;
96 seg->avail = seg->size;
97
98 seg->next = cache->seg_head;
99 cache->seg_head = seg;
100
101 return seg;
102}
103
104MY_INLINE unsigned int
105strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
106{
107 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
108 when running the two 100 times over typical kBuild strings that
109 end up here (did a fprintf here and built kBuild).
110 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
111
112 unsigned int hash = 0;
113 if (MY_PREDICT_TRUE(len >= 2))
114 {
115 unsigned int ch0 = *str++;
116 hash = 0;
117 len--;
118 while (len >= 2)
119 {
120 unsigned int ch1 = *str++;
121 hash += ch0 << (ch1 & 0xf);
122
123 ch0 = *str++;
124 hash += ch1 << (ch0 & 0xf);
125
126 len -= 2;
127 }
128 if (len == 1)
129 {
130 unsigned ch1 = *str;
131 hash += ch0 << (ch1 & 0xf);
132
133 hash += ch1;
134 }
135 else
136 hash += ch0;
137 }
138 else if (len)
139 {
140 hash = *str;
141 hash += hash << (hash & 0xf);
142 }
143 else
144 hash = 0;
145 return hash;
146}
147
148MY_INLINE unsigned int
149strcache2_case_sensitive_hash_2 (const char *str, unsigned int len)
150{
151 unsigned int hash = 0;
152 if (MY_PREDICT_TRUE(len >= 2))
153 {
154 unsigned int ch0 = *str++;
155 hash = 0;
156 len--;
157 while (len >= 2)
158 {
159 unsigned int ch1 = *str++;
160 hash += ch0 << (ch1 & 0x7);
161
162 ch0 = *str++;
163 hash += ch1 << (ch0 & 0x7);
164
165 len -= 2;
166 }
167 if (len == 1)
168 {
169 unsigned ch1 = *str;
170 hash += ch0 << (ch1 & 0x7);
171
172 hash += ch1;
173 }
174 else
175 hash += ch0;
176 }
177 else if (len)
178 {
179 hash = *str;
180 hash += hash << (hash & 0x7);
181 }
182 else
183 hash = 1;
184 hash |= 1;
185 return hash;
186}
187
188MY_INLINE unsigned int
189strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
190{
191 unsigned int hash = 0;
192 if (MY_PREDICT_TRUE(len >= 2))
193 {
194 unsigned int ch0 = *str++;
195 ch0 = tolower (ch0);
196 hash = 0;
197 len--;
198 while (len >= 2)
199 {
200 unsigned int ch1 = *str++;
201 ch1 = tolower (ch1);
202 hash += ch0 << (ch1 & 0xf);
203
204 ch0 = *str++;
205 ch0 = tolower (ch0);
206 hash += ch1 << (ch0 & 0xf);
207
208 len -= 2;
209 }
210 if (len == 1)
211 {
212 unsigned ch1 = *str;
213 ch1 = tolower (ch1);
214 hash += ch0 << (ch1 & 0xf);
215
216 hash += ch1;
217 }
218 else
219 hash += ch0;
220 }
221 else if (len)
222 {
223 hash = *str;
224 hash += hash << (hash & 0xf);
225 }
226 else
227 hash = 0;
228 return hash;
229}
230
231MY_INLINE unsigned int
232strcache2_case_insensitive_hash_2 (const char *str, unsigned int len)
233{
234 unsigned int hash = 0;
235 if (MY_PREDICT_TRUE(len >= 2))
236 {
237 unsigned int ch0 = *str++;
238 ch0 = tolower (ch0);
239 hash = 0;
240 len--;
241 while (len >= 2)
242 {
243 unsigned int ch1 = *str++;
244 ch1 = tolower (ch1);
245 hash += ch0 << (ch1 & 0x7);
246
247 ch0 = *str++;
248 ch0 = tolower (ch0);
249 hash += ch1 << (ch0 & 0x7);
250
251 len -= 2;
252 }
253 if (len == 1)
254 {
255 unsigned ch1 = *str;
256 ch1 = tolower (ch1);
257 hash += ch0 << (ch1 & 0x7);
258
259 hash += ch1;
260 }
261 else
262 hash += ch0;
263 }
264 else if (len)
265 {
266 hash = *str;
267 hash += hash << (hash & 0x7);
268 }
269 else
270 hash = 1;
271 hash |= 1;
272 return hash;
273}
274
275#ifdef _MSC_VER
276 typedef unsigned int int32_t;
277#else
278# include <stdint.h>
279#endif
280
281MY_INLINE int
282strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
283{
284 if (length <= 8)
285 {
286 /* short string compare - ~50% of the kBuild calls. */
287 assert ( !((size_t)ys & 3) );
288 if (!((size_t)xs & 3))
289 {
290 /* aligned */
291 int result;
292 switch (length)
293 {
294 default: /* memcmp for longer strings */
295 return memcmp (xs, ys, length);
296 case 8:
297 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
298 result |= *(int32_t*)xs - *(int32_t*)ys;
299 return result;
300 case 7:
301 result = xs[6] - ys[6];
302 result |= xs[5] - ys[5];
303 result |= xs[4] - ys[4];
304 result |= *(int32_t*)xs - *(int32_t*)ys;
305 return result;
306 case 6:
307 result = xs[5] - ys[5];
308 result |= xs[4] - ys[4];
309 result |= *(int32_t*)xs - *(int32_t*)ys;
310 return result;
311 case 5:
312 result = xs[4] - ys[4];
313 result |= *(int32_t*)xs - *(int32_t*)ys;
314 return result;
315 case 4:
316 return *(int32_t*)xs - *(int32_t*)ys;
317 case 3:
318 result = xs[2] - ys[2];
319 result |= xs[1] - ys[1];
320 result |= xs[0] - ys[0];
321 return result;
322 case 2:
323 result = xs[1] - ys[1];
324 result |= xs[0] - ys[0];
325 return result;
326 case 1:
327 return *xs - *ys;
328 case 0:
329 return 0;
330 }
331 }
332 else
333 {
334 /* unaligned */
335 int result = 0;
336 switch (length)
337 {
338 case 8: result |= xs[7] - ys[7];
339 case 7: result |= xs[6] - ys[6];
340 case 6: result |= xs[5] - ys[5];
341 case 5: result |= xs[4] - ys[4];
342 case 4: result |= xs[3] - ys[3];
343 case 3: result |= xs[2] - ys[2];
344 case 2: result |= xs[1] - ys[1];
345 case 1: result |= xs[0] - ys[0];
346 case 0:
347 return result;
348 }
349 }
350 }
351
352 /* memcmp for longer strings */
353 return memcmp (xs, ys, length);
354}
355
356MY_INLINE int
357strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
358{
359#ifndef ELECTRIC_HEAP
360 assert ( !((size_t)ys & 3) );
361#endif
362 if (!((size_t)xs & 3))
363 {
364 int result;
365 /* aligned */
366 while (length >= 8)
367 {
368 result = *(int32_t*)xs - *(int32_t*)ys;
369 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
370 if (MY_PREDICT_FALSE(result))
371 return result;
372 xs += 8;
373 ys += 8;
374 length -= 8;
375 }
376 switch (length)
377 {
378 case 7:
379 result = *(int32_t*)xs - *(int32_t*)ys;
380 result |= xs[6] - ys[6];
381 result |= xs[5] - ys[5];
382 result |= xs[4] - ys[4];
383 return result;
384 case 6:
385 result = *(int32_t*)xs - *(int32_t*)ys;
386 result |= xs[5] - ys[5];
387 result |= xs[4] - ys[4];
388 return result;
389 case 5:
390 result = *(int32_t*)xs - *(int32_t*)ys;
391 result |= xs[4] - ys[4];
392 return result;
393 case 4:
394 return *(int32_t*)xs - *(int32_t*)ys;
395 case 3:
396 result = xs[2] - ys[2];
397 result |= xs[1] - ys[1];
398 result |= xs[0] - ys[0];
399 return result;
400 case 2:
401 result = xs[1] - ys[1];
402 result |= xs[0] - ys[0];
403 return result;
404 case 1:
405 return *xs - *ys;
406 default:
407 case 0:
408 return 0;
409 }
410 }
411 else
412 {
413 /* unaligned */
414 int result;
415 while (length >= 8)
416 {
417#if defined(__i386__) || defined(__x86_64__)
418 result = ( ((int32_t)xs[3] << 24)
419 | ((int32_t)xs[2] << 16)
420 | ((int32_t)xs[1] << 8)
421 | xs[0] )
422 - *(int32_t*)ys;
423 result |= ( ((int32_t)xs[7] << 24)
424 | ((int32_t)xs[6] << 16)
425 | ((int32_t)xs[5] << 8)
426 | xs[4] )
427 - *(int32_t*)(ys + 4);
428#else
429 result = xs[3] - ys[3];
430 result |= xs[2] - ys[2];
431 result |= xs[1] - ys[1];
432 result |= xs[0] - ys[0];
433 result |= xs[7] - ys[7];
434 result |= xs[6] - ys[6];
435 result |= xs[5] - ys[5];
436 result |= xs[4] - ys[4];
437#endif
438 if (MY_PREDICT_FALSE(result))
439 return result;
440 xs += 8;
441 ys += 8;
442 length -= 8;
443 }
444 result = 0;
445 switch (length)
446 {
447 case 7: result |= xs[6] - ys[6];
448 case 6: result |= xs[5] - ys[5];
449 case 5: result |= xs[4] - ys[4];
450 case 4: result |= xs[3] - ys[3];
451 case 3: result |= xs[2] - ys[2];
452 case 2: result |= xs[1] - ys[1];
453 case 1: result |= xs[0] - ys[0];
454 return result;
455 default:
456 case 0:
457 return 0;
458 }
459 }
460}
461
462MY_INLINE int
463strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
464 const char *str, unsigned int length, unsigned int hash1)
465{
466 assert (!cache->case_insensitive);
467
468 /* the simple stuff first. */
469 if ( entry == NULL
470 || entry->hash1 != hash1
471 || entry->length != length)
472 return 0;
473
474#if 0
475 return memcmp (entry + 1, str, length) == 0;
476#elif 1
477 return strcache2_memcmp_inlined ((const char *)(entry + 1), str, length) == 0;
478#else
479 return strcache2_memcmp_inline_short ((const char *)(entry + 1), str, length) == 0;
480#endif
481}
482
483MY_INLINE int
484strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
485 const char *str, unsigned int length, unsigned int hash1)
486{
487 assert (cache->case_insensitive);
488
489 /* the simple stuff first. */
490 if ( entry == NULL
491 || entry->hash1 != hash1
492 || entry->length != length)
493 return 0;
494
495#if defined(_MSC_VER) || defined(__OS2__)
496 return _memicmp (entry + 1, str, length) == 0;
497#else
498 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
499#endif
500}
501
502static void
503strcache2_rehash (struct strcache2 *cache)
504{
505 unsigned int src = cache->hash_size;
506 struct strcache2_entry **src_tab = cache->hash_tab;
507 struct strcache2_entry **dst_tab;
508 unsigned int hash_mask;
509
510 /* Allocate a new hash table twice the size of the current. */
511 cache->hash_size <<= 1;
512 cache->hash_mask <<= 1;
513 cache->hash_mask |= 1;
514 cache->rehash_count <<= 1;
515 cache->hash_tab = dst_tab = (struct strcache2_entry **)
516 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
517 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
518
519 /* Copy the entries from the old to the new hash table. */
520 hash_mask = cache->hash_mask;
521 while (src-- > 0)
522 {
523 struct strcache2_entry *entry = src_tab[src];
524 if (entry)
525 {
526 unsigned int dst = entry->hash1 & hash_mask;
527 if (dst_tab[dst])
528 {
529 unsigned int hash2 = entry->hash2;
530 if (!hash2)
531 entry->hash2 = hash2 = cache->case_insensitive
532 ? strcache2_case_insensitive_hash_2 ((const char *)(entry + 1), entry->length)
533 : strcache2_case_sensitive_hash_2 ((const char *)(entry + 1), entry->length);
534 dst += hash2;
535 dst &= hash_mask;
536 while (dst_tab[dst])
537 {
538 dst += hash2;
539 dst &= hash_mask;
540 }
541 }
542
543 dst_tab[dst] = entry;
544 }
545 }
546
547 /* That's it, just free the old table and we're done. */
548 free (src_tab);
549}
550
551/* Internal worker that enters a new string into the cache. */
552static const char *
553strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
554 const char *str, unsigned int length,
555 unsigned int hash1, unsigned hash2)
556{
557 struct strcache2_entry *entry;
558 struct strcache2_seg *seg;
559 unsigned int size;
560 char *str_copy;
561
562 /* Allocate space for the string. */
563
564 size = length + 1 + sizeof (struct strcache2_entry);
565 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
566
567 seg = cache->seg_head;
568 if (MY_PREDICT_FALSE(seg->avail < size))
569 {
570 do
571 seg = seg->next;
572 while (seg && seg->avail < size);
573 if (!seg)
574 seg = strcache2_new_seg (cache, size);
575 }
576
577 entry = (struct strcache2_entry *) seg->cursor;
578 assert ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1));
579 seg->cursor += size;
580 seg->avail -= size;
581
582 /* Setup the entry, copy the string and insert it into the hash table. */
583
584 entry->user = NULL;
585 entry->length = length;
586 entry->hash1 = hash1;
587 entry->hash2 = hash2;
588 str_copy = (char *) memcpy (entry + 1, str, length);
589 str_copy[length] = '\0';
590
591 cache->hash_tab[idx] = entry;
592 cache->count++;
593 if (cache->count >= cache->rehash_count)
594 strcache2_rehash (cache);
595
596 return str_copy;
597}
598
599/* The public add string interface. */
600const char *
601strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
602{
603 struct strcache2_entry const *entry;
604 unsigned int hash2;
605 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
606 unsigned int idx;
607
608 assert (!cache->case_insensitive);
609
610 cache->lookup_count++;
611
612 /* Lookup the entry in the hash table, hoping for an
613 early match. */
614 idx = hash1 & cache->hash_mask;
615 entry = cache->hash_tab[idx];
616 if (strcache2_is_equal (cache, entry, str, length, hash1))
617 return (const char *)(entry + 1);
618 if (!entry)
619 hash2 = 0;
620 else
621 {
622 cache->collision_1st_count++;
623
624 hash2 = strcache2_case_sensitive_hash_2 (str, length);
625 idx += hash2;
626 idx &= cache->hash_mask;
627 entry = cache->hash_tab[idx];
628 if (strcache2_is_equal (cache, entry, str, length, hash1))
629 return (const char *)(entry + 1);
630
631 if (entry)
632 {
633 cache->collision_2nd_count++;
634 do
635 {
636 idx += hash2;
637 idx &= cache->hash_mask;
638 entry = cache->hash_tab[idx];
639 cache->collision_3rd_count++;
640 if (strcache2_is_equal (cache, entry, str, length, hash1))
641 return (const char *)(entry + 1);
642 }
643 while (entry);
644 }
645 }
646
647 /* Not found, add it at IDX. */
648 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
649}
650
651/* The public add string interface for prehashed strings.
652 Use strcache2_hash_str to calculate the hash of a string. */
653const char *
654strcache2_add_hashed (struct strcache2 *cache, const char *str, unsigned int length,
655 unsigned int hash1, unsigned int hash2)
656{
657 struct strcache2_entry const *entry;
658 unsigned int idx;
659#ifndef NDEBUG
660 unsigned correct_hash;
661
662 assert (!cache->case_insensitive);
663 correct_hash = strcache2_case_sensitive_hash_1 (str, length);
664 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
665 if (hash2)
666 {
667 correct_hash = strcache2_case_sensitive_hash_2 (str, length);
668 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
669 }
670#endif /* NDEBUG */
671
672 cache->lookup_count++;
673
674 /* Lookup the entry in the hash table, hoping for an
675 early match. */
676 idx = hash1 & cache->hash_mask;
677 entry = cache->hash_tab[idx];
678 if (strcache2_is_equal (cache, entry, str, length, hash1))
679 return (const char *)(entry + 1);
680 if (entry)
681 {
682 cache->collision_1st_count++;
683
684 if (!hash2)
685 hash2 = cache->case_insensitive
686 ? strcache2_case_insensitive_hash_2 (str, length)
687 : strcache2_case_sensitive_hash_2 (str, length);
688 idx += hash2;
689 idx &= cache->hash_mask;
690 entry = cache->hash_tab[idx];
691 if (strcache2_is_equal (cache, entry, str, length, hash1))
692 return (const char *)(entry + 1);
693
694 if (entry)
695 {
696 cache->collision_2nd_count++;
697 do
698 {
699 idx += hash2;
700 idx &= cache->hash_mask;
701 entry = cache->hash_tab[idx];
702 cache->collision_3rd_count++;
703 if (strcache2_is_equal (cache, entry, str, length, hash1))
704 return (const char *)(entry + 1);
705 }
706 while (entry);
707 }
708 }
709
710 /* Not found, add it at IDX. */
711 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
712}
713
714/* The public lookup (case sensitive) string interface. */
715const char *
716strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
717{
718 struct strcache2_entry const *entry;
719 unsigned int hash2;
720 unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
721 unsigned int idx;
722
723 assert (!cache->case_insensitive);
724
725 cache->lookup_count++;
726
727 /* Lookup the entry in the hash table, hoping for an
728 early match. */
729 idx = hash1 & cache->hash_mask;
730 entry = cache->hash_tab[idx];
731 if (strcache2_is_equal (cache, entry, str, length, hash1))
732 return (const char *)(entry + 1);
733 if (!entry)
734 hash2 = 0;
735 else
736 {
737 cache->collision_1st_count++;
738
739 hash2 = strcache2_case_sensitive_hash_2 (str, length);
740 idx += hash2;
741 idx &= cache->hash_mask;
742 entry = cache->hash_tab[idx];
743 if (strcache2_is_equal (cache, entry, str, length, hash1))
744 return (const char *)(entry + 1);
745
746 if (entry)
747 {
748 cache->collision_2nd_count++;
749 do
750 {
751 idx += hash2;
752 idx &= cache->hash_mask;
753 entry = cache->hash_tab[idx];
754 cache->collision_3rd_count++;
755 if (strcache2_is_equal (cache, entry, str, length, hash1))
756 return (const char *)(entry + 1);
757 }
758 while (entry);
759 }
760 }
761
762 /* Not found. */
763 return NULL;
764}
765
766#if defined(HAVE_CASE_INSENSITIVE_FS)
767
768/* The public add string interface for case insensitive strings. */
769const char *
770strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
771{
772 struct strcache2_entry const *entry;
773 unsigned int hash2;
774 unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
775 unsigned int idx;
776
777 assert (cache->case_insensitive);
778 cache->lookup_count++;
779
780 /* Lookup the entry in the hash table, hoping for an
781 early match. */
782 idx = hash1 & cache->hash_mask;
783 entry = cache->hash_tab[idx];
784 if (strcache2_is_iequal (cache, entry, str, length, hash1))
785 return (const char *)(entry + 1);
786 if (!entry)
787 hash2 = 0;
788 else
789 {
790 cache->collision_1st_count++;
791
792 hash2 = strcache2_case_insensitive_hash_2 (str, length);
793 idx += hash2;
794 idx &= cache->hash_mask;
795 entry = cache->hash_tab[idx];
796 if (strcache2_is_iequal (cache, entry, str, length, hash1))
797 return (const char *)(entry + 1);
798
799 if (entry)
800 {
801 cache->collision_2nd_count++;
802 do
803 {
804 idx += hash2;
805 idx &= cache->hash_mask;
806 entry = cache->hash_tab[idx];
807 cache->collision_3rd_count++;
808 if (strcache2_is_iequal (cache, entry, str, length, hash1))
809 return (const char *)(entry + 1);
810 }
811 while (entry);
812 }
813 }
814
815 /* Not found, add it at IDX. */
816 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
817}
818
819/* The public add string interface for prehashed case insensitive strings.
820 Use strcache2_hash_istr to calculate the hash of a string. */
821const char *
822strcache2_iadd_hashed (struct strcache2 *cache, const char *str, unsigned int length,
823 unsigned int hash1, unsigned int hash2)
824{
825 struct strcache2_entry const *entry;
826 unsigned int idx;
827#ifndef NDEBUG
828 unsigned correct_hash;
829
830 assert (cache->case_insensitive);
831 correct_hash = strcache2_case_insensitive_hash_1 (str, length);
832 MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
833 if (hash2)
834 {
835 correct_hash = strcache2_case_insensitive_hash_2 (str, length);
836 MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
837 }
838#endif /* NDEBUG */
839
840 cache->lookup_count++;
841
842 /* Lookup the entry in the hash table, hoping for an
843 early match. */
844 idx = hash1 & cache->hash_mask;
845 entry = cache->hash_tab[idx];
846 if (strcache2_is_iequal (cache, entry, str, length, hash1))
847 return (const char *)(entry + 1);
848 if (entry)
849 {
850 cache->collision_1st_count++;
851
852 if (!hash2)
853 hash2 = strcache2_case_insensitive_hash_2 (str, length);
854 idx += hash2;
855 idx &= cache->hash_mask;
856 entry = cache->hash_tab[idx];
857 if (strcache2_is_iequal (cache, entry, str, length, hash1))
858 return (const char *)(entry + 1);
859
860 if (entry)
861 {
862 cache->collision_2nd_count++;
863 do
864 {
865 idx += hash2;
866 idx &= cache->hash_mask;
867 entry = cache->hash_tab[idx];
868 cache->collision_3rd_count++;
869 if (strcache2_is_iequal (cache, entry, str, length, hash1))
870 return (const char *)(entry + 1);
871 }
872 while (entry);
873 }
874 }
875
876 /* Not found, add it at IDX. */
877 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
878}
879
880/* The public lookup (case insensitive) string interface. */
881const char *
882strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
883{
884 struct strcache2_entry const *entry;
885 unsigned int hash2;
886 unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
887 unsigned int idx;
888
889 assert (cache->case_insensitive);
890
891 cache->lookup_count++;
892
893 /* Lookup the entry in the hash table, hoping for an
894 early match. */
895 idx = hash1 & cache->hash_mask;
896 entry = cache->hash_tab[idx];
897 if (strcache2_is_iequal (cache, entry, str, length, hash1))
898 return (const char *)(entry + 1);
899 if (!entry)
900 hash2 = 0;
901 else
902 {
903 cache->collision_1st_count++;
904
905 hash2 = strcache2_case_insensitive_hash_2 (str, length);
906 idx += hash2;
907 idx &= cache->hash_mask;
908 entry = cache->hash_tab[idx];
909 if (strcache2_is_iequal (cache, entry, str, length, hash1))
910 return (const char *)(entry + 1);
911
912 if (entry)
913 {
914 cache->collision_2nd_count++;
915 do
916 {
917 idx += hash2;
918 idx &= cache->hash_mask;
919 entry = cache->hash_tab[idx];
920 cache->collision_3rd_count++;
921 if (strcache2_is_iequal (cache, entry, str, length, hash1))
922 return (const char *)(entry + 1);
923 }
924 while (entry);
925 }
926 }
927
928 /* Not found. */
929 return NULL;
930}
931
932#endif /* HAVE_CASE_INSENSITIVE_FS */
933
934/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
935int
936strcache2_is_cached (struct strcache2 *cache, const char *str)
937{
938 /* Check mandatory alignment first. */
939 if (!((size_t)str & (sizeof (void *) - 1)))
940 {
941 /* Check the segment list and consider the question answered if the
942 string is within one of them. (Could check it more thoroughly...) */
943 struct strcache2_seg const *seg;
944 for (seg = cache->seg_head; seg; seg = seg->next)
945 if ((size_t)(str - seg->start) < seg->size)
946 return 1;
947 }
948
949 return 0;
950}
951
952
953/* Verify the integrity of the specified string, returning 0 if OK. */
954int
955strcache2_verify_entry (struct strcache2 *cache, const char *str)
956{
957 struct strcache2_entry const *entry;
958 unsigned hash;
959 const char *end;
960
961 if ((size_t)str & (sizeof (void *) - 1))
962 {
963 fprintf (stderr,
964 "strcache2[%s]: missaligned string %p\nstring: %s\n",
965 cache->name, (void *)str, str);
966 return -1;
967 }
968
969 entry = (struct strcache2_entry const *)str - 1;
970 end = memchr (str, '\0', entry->length);
971 if ((size_t)(end - str) == entry->length)
972 {
973 fprintf (stderr,
974 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
975 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
976 return -1;
977 }
978
979 hash = cache->case_insensitive
980 ? strcache2_case_insensitive_hash_1 (str, entry->length)
981 : strcache2_case_sensitive_hash_1 (str, entry->length);
982 if (hash != entry->hash1)
983 {
984 fprintf (stderr,
985 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
986 cache->name, (void *)entry, hash, entry->hash1, str);
987 return -1;
988 }
989
990 if (entry->hash2)
991 {
992 hash = cache->case_insensitive
993 ? strcache2_case_insensitive_hash_2 (str, entry->length)
994 : strcache2_case_sensitive_hash_2 (str, entry->length);
995 if (hash != entry->hash2)
996 {
997 fprintf (stderr,
998 "strcache2[%s]: corrupt entry %p, hash#2: %x, expected %x;\nstring: %s\n",
999 cache->name, (void *)entry, hash, entry->hash2, str);
1000 return -1;
1001 }
1002 }
1003
1004 return 0;
1005}
1006
1007/* Fallback for calculating and returning the 2nd hash. */
1008unsigned int
1009strcache2_get_hash2_fallback (struct strcache2 *cache, const char *str)
1010{
1011 struct strcache2_entry *entry = (struct strcache2_entry *) str - 1;
1012 unsigned hash2 = cache->case_insensitive
1013 ? strcache2_case_insensitive_hash_2 (str, entry->length)
1014 : strcache2_case_sensitive_hash_2 (str, entry->length);
1015 entry->hash2 = hash2;
1016 return hash2;
1017}
1018
1019/* Calculates the case sensitive hash values of the string.
1020 The first hash is returned, the other is put at HASH2P. */
1021unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1022{
1023 *hash2p = strcache2_case_sensitive_hash_2 (str, length);
1024 return strcache2_case_sensitive_hash_1 (str, length);
1025}
1026
1027/* Calculates the case insensitive hash values of the string.
1028 The first hash is returned, the other is put at HASH2P. */
1029unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1030{
1031 *hash2p = strcache2_case_insensitive_hash_2 (str, length);
1032 return strcache2_case_insensitive_hash_1 (str, length);
1033}
1034
1035
1036
1037/* Initalizes a new cache. */
1038void
1039strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1040 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1041{
1042 unsigned hash_shift;
1043 assert (!thread_safe);
1044
1045 /* calc the size as a power of two */
1046 if (!size)
1047 hash_shift = STRCACHE2_HASH_SHIFT;
1048 else
1049 {
1050 assert (size <= (~0U / 2 + 1));
1051 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1052 /* nothing */;
1053 }
1054
1055 /* adjust the default segment size */
1056 if (!def_seg_size)
1057 def_seg_size = STRCACHE2_SEG_SIZE;
1058 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1059 def_seg_size = sizeof (struct strcache2_seg) * 10;
1060 else if ((def_seg_size & 0xfff) < 0xf00)
1061 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1062
1063
1064 /* init the structure. */
1065 cache->case_insensitive = case_insensitive;
1066 cache->hash_mask = (1U << hash_shift) - 1U;
1067 cache->count = 0;
1068 cache->lookup_count = 0;
1069 cache->collision_1st_count = 0;
1070 cache->collision_2nd_count = 0;
1071 cache->collision_3rd_count = 0;
1072 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1073 cache->init_size = 1U << hash_shift;
1074 cache->hash_size = 1U << hash_shift;
1075 cache->def_seg_size = def_seg_size;
1076 cache->lock = NULL;
1077 cache->name = name;
1078
1079 /* allocate the hash table and first segment. */
1080 cache->hash_tab = (struct strcache2_entry **)
1081 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1082 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1083 strcache2_new_seg (cache, 0);
1084
1085 /* link it */
1086 cache->next = strcache_head;
1087 strcache_head = cache;
1088}
1089
1090
1091/* Terminates a string cache, freeing all memory and other
1092 associated resources. */
1093void
1094strcache2_term (struct strcache2 *cache)
1095{
1096 /* unlink it */
1097 if (strcache_head == cache)
1098 strcache_head = cache->next;
1099 else
1100 {
1101 struct strcache2 *prev = strcache_head;
1102 while (prev->next != cache)
1103 prev = prev->next;
1104 assert (prev);
1105 prev->next = cache->next;
1106 }
1107
1108 /* free the memory segments */
1109 do
1110 {
1111 void *free_it = cache->seg_head;
1112 cache->seg_head = cache->seg_head->next;
1113 free (free_it);
1114 }
1115 while (cache->seg_head);
1116
1117 /* free the hash and clear the structure. */
1118 free (cache->hash_tab);
1119 memset (cache, '\0', sizeof (struct strcache2));
1120}
1121
1122/* Print statistics a string cache. */
1123void
1124strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1125{
1126 unsigned int seg_count = 0;
1127 unsigned long seg_total_bytes = 0;
1128 unsigned long seg_avg_bytes;
1129 unsigned long seg_avail_bytes = 0;
1130 unsigned long seg_max_bytes = 0;
1131 struct strcache2_seg *seg;
1132 unsigned long str_total_len = 0;
1133 unsigned int str_avg_len;
1134 unsigned int str_min_len = ~0U;
1135 unsigned int str_max_len = 0;
1136 unsigned int idx;
1137 unsigned int rehashes;
1138
1139 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1140
1141 /* Segment statistics. */
1142 for (seg = cache->seg_head; seg; seg = seg->next)
1143 {
1144 seg_count++;
1145 seg_total_bytes += seg->size;
1146 seg_avail_bytes += seg->avail;
1147 if (seg->size > seg_max_bytes)
1148 seg_max_bytes = seg->size;
1149 }
1150 seg_avg_bytes = seg_total_bytes / seg_count;
1151 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1152 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1153 cache->def_seg_size, seg_avail_bytes);
1154
1155 /* String statistics. */
1156 idx = cache->hash_size;
1157 while (idx-- > 0)
1158 {
1159 struct strcache2_entry const *entry = cache->hash_tab[idx];
1160 if (entry)
1161 {
1162 unsigned int length = entry->length;
1163 str_total_len += length;
1164 if (length > str_max_len)
1165 str_max_len = length;
1166 if (length < str_min_len)
1167 str_min_len = length;
1168 }
1169 }
1170 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1171 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1172 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1173
1174 /* Hash statistics. */
1175 idx = cache->init_size;
1176 rehashes = 0;
1177 while (idx < cache->hash_size)
1178 {
1179 idx *= 2;
1180 rehashes++;
1181 }
1182
1183 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1184 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1185 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1186 prefix,
1187 cache->collision_1st_count, (unsigned int)((float)cache->collision_1st_count * 100 / cache->lookup_count),
1188 cache->collision_2nd_count, (unsigned int)((float)cache->collision_2nd_count * 100 / cache->lookup_count),
1189 cache->collision_3rd_count, (unsigned int)((float)cache->collision_3rd_count * 100 / cache->lookup_count));
1190}
1191
1192/* Print statistics for all string caches. */
1193void
1194strcache2_print_stats_all (const char *prefix)
1195{
1196 struct strcache2 *cur;
1197 for (cur = strcache_head; cur; cur = cur->next)
1198 strcache2_print_stats (cur, prefix);
1199}
1200
Note: See TracBrowser for help on using the repository browser.