source: trunk/src/kmk/strcache2.c@ 1916

Last change on this file since 1916 was 1916, checked in by bird, 17 years ago

build fix.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 35.7 KB
Line 
1/* $Id: strcache2.c 1916 2008-10-23 13:34:40Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef _MSC_VER
39typedef unsigned char uint8_t;
40typedef unsigned short uint16_t;
41typedef unsigned int uint32_t;
42typedef signed char int8_t;
43typedef signed short int16_t;
44typedef signed int int32_t;
45#else
46# include <stdint.h>
47#endif
48
49#ifdef WINDOWS32
50# include <io.h>
51# include <process.h>
52# include <Windows.h>
53# define PARSE_IN_WORKER
54#endif
55
56#ifdef __OS2__
57# include <sys/fmutex.h>
58#endif
59
60#ifdef HAVE_PTHREAD
61# include <pthread.h>
62#endif
63
64
65/*******************************************************************************
66* Defined Constants And Macros *
67*******************************************************************************/
68/* The default size of a memory segment (1MB). */
69#define STRCACHE2_SEG_SIZE (1024U*1024U)
70/* The default hash table shift (hash size give as a power of two). */
71#define STRCACHE2_HASH_SHIFT 16
72/** Does the modding / masking of a hash number into an index. */
73#ifdef STRCACHE2_USE_MASK
74# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
75#else
76# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
77#endif
78
79
80/*******************************************************************************
81* Global Variables *
82*******************************************************************************/
83/* List of initialized string caches. */
84static struct strcache2 *strcache_head;
85
86
87/** Finds the closest primary number for power of two value (or something else
88 * useful if not support). */
89MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
90{
91 switch (shift)
92 {
93 case 5: return 31;
94 case 6: return 61;
95 case 7: return 127;
96 case 8: return 251;
97 case 9: return 509;
98 case 10: return 1021;
99 case 11: return 2039;
100 case 12: return 4093;
101 case 13: return 8191;
102 case 14: return 16381;
103 case 15: return 32749;
104 case 16: return 65521;
105 case 17: return 131063;
106 case 18: return 262139;
107 case 19: return 524269;
108 case 20: return 1048573;
109 case 21: return 2097143;
110 case 22: return 4194301;
111 case 23: return 8388593;
112
113 default:
114 assert (0);
115 return (1 << shift) - 1;
116 }
117}
118
119MY_INLINE unsigned int
120strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
121{
122#if 1
123 /* Paul Hsieh hash SuperFast function:
124 http://www.azillionmonkeys.com/qed/hash.html
125
126 This performs very good and as a sligtly better distribution than
127 STRING_N_HASH_1 on a typical kBuild run.
128
129 It is also 37% faster than return_STRING_N_HASH_1 when running the
130 two 100 times over typical kBuild strings that end up here (did a
131 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
132 with -O2.
133
134 FIXME: A path for well aligned data should be added to speed up
135 execution on alignment sensitive systems. */
136
137# if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
138 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
139# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
140# else
141 /* (endian doesn't matter) */
142# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
143 | (((const uint8_t *)(ptr))[1]) )
144# endif
145 uint32_t hash = len;
146 uint32_t tmp;
147 int rem;
148
149 assert (sizeof (uint8_t) == sizeof (char));
150 if (len == 0)
151 return 0;
152
153 /* main loop, walking on 2 x uint16_t */
154 rem = len & 3;
155 len >>= 2;
156 while (len > 0)
157 {
158 hash += strcache2_get_unaligned_16bits (str);
159 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
160 hash = (hash << 16) ^ tmp;
161 str += 2 * sizeof (uint16_t);
162 hash += hash >> 11;
163 len--;
164 }
165
166 /* the remainer */
167 switch (rem)
168 {
169 case 3:
170 hash += strcache2_get_unaligned_16bits (str);
171 hash ^= hash << 16;
172 hash ^= str[sizeof (uint16_t)] << 18;
173 hash += hash >> 11;
174 break;
175 case 2:
176 hash += strcache2_get_unaligned_16bits (str);
177 hash ^= hash << 11;
178 hash += hash >> 17;
179 break;
180 case 1:
181 hash += *str;
182 hash ^= hash << 10;
183 hash += hash >> 1;
184 break;
185 }
186
187 /* force "avalanching" of final 127 bits. */
188 hash ^= hash << 3;
189 hash += hash >> 5;
190 hash ^= hash << 4;
191 hash += hash >> 17;
192 hash ^= hash << 25;
193 hash += hash >> 6;
194
195 return hash;
196
197#elif 1
198 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
199 when running the two 100 times over typical kBuild strings that
200 end up here (did a fprintf here and built kBuild).
201 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
202
203 unsigned int hash = 0;
204 if (MY_PREDICT_TRUE(len >= 2))
205 {
206 unsigned int ch0 = *str++;
207 hash = 0;
208 len--;
209 while (len >= 2)
210 {
211 unsigned int ch1 = *str++;
212 hash += ch0 << (ch1 & 0xf);
213
214 ch0 = *str++;
215 hash += ch1 << (ch0 & 0xf);
216
217 len -= 2;
218 }
219 if (len == 1)
220 {
221 unsigned ch1 = *str;
222 hash += ch0 << (ch1 & 0xf);
223
224 hash += ch1;
225 }
226 else
227 hash += ch0;
228 }
229 else if (len)
230 {
231 hash = *str;
232 hash += hash << (hash & 0xf);
233 }
234 else
235 hash = 0;
236 return hash;
237
238#elif 1
239# if 0
240 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
241 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
242 first in the assignment made it noticably slower.)
243
244 However, it is noticably slower in practice, most likely because of more
245 collisions. Hrmpf. */
246
247# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
248 unsigned int hash = 0;
249
250# else
251 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
252 fast than return_STRING_N_HASH_1.
253
254 Ditto. */
255
256# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
257 unsigned int hash = 5381;
258# endif
259
260
261 while (len >= 4)
262 {
263 UPDATE_HASH (str[0]);
264 UPDATE_HASH (str[1]);
265 UPDATE_HASH (str[2]);
266 UPDATE_HASH (str[3]);
267 str += 4;
268 len -= 4;
269 }
270 switch (len)
271 {
272 default:
273 case 0:
274 return hash;
275 case 1:
276 UPDATE_HASH (str[0]);
277 return hash;
278 case 2:
279 UPDATE_HASH (str[0]);
280 UPDATE_HASH (str[1]);
281 return hash;
282 case 3:
283 UPDATE_HASH (str[0]);
284 UPDATE_HASH (str[1]);
285 UPDATE_HASH (str[2]);
286 return hash;
287 }
288#endif
289}
290
291MY_INLINE unsigned int
292strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
293{
294 unsigned int hash = 0;
295 if (MY_PREDICT_TRUE(len >= 2))
296 {
297 unsigned int ch0 = *str++;
298 ch0 = tolower (ch0);
299 hash = 0;
300 len--;
301 while (len >= 2)
302 {
303 unsigned int ch1 = *str++;
304 ch1 = tolower (ch1);
305 hash += ch0 << (ch1 & 0xf);
306
307 ch0 = *str++;
308 ch0 = tolower (ch0);
309 hash += ch1 << (ch0 & 0xf);
310
311 len -= 2;
312 }
313 if (len == 1)
314 {
315 unsigned ch1 = *str;
316 ch1 = tolower (ch1);
317 hash += ch0 << (ch1 & 0xf);
318
319 hash += ch1;
320 }
321 else
322 hash += ch0;
323 }
324 else if (len)
325 {
326 hash = *str;
327 hash += hash << (hash & 0xf);
328 }
329 else
330 hash = 0;
331 return hash;
332}
333
334MY_INLINE int
335strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
336{
337 if (length <= 8)
338 {
339 /* short string compare - ~50% of the kBuild calls. */
340 assert ( !((size_t)ys & 3) );
341 if (!((size_t)xs & 3))
342 {
343 /* aligned */
344 int result;
345 switch (length)
346 {
347 default: /* memcmp for longer strings */
348 return memcmp (xs, ys, length);
349 case 8:
350 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
351 result |= *(int32_t*)xs - *(int32_t*)ys;
352 return result;
353 case 7:
354 result = xs[6] - ys[6];
355 result |= xs[5] - ys[5];
356 result |= xs[4] - ys[4];
357 result |= *(int32_t*)xs - *(int32_t*)ys;
358 return result;
359 case 6:
360 result = xs[5] - ys[5];
361 result |= xs[4] - ys[4];
362 result |= *(int32_t*)xs - *(int32_t*)ys;
363 return result;
364 case 5:
365 result = xs[4] - ys[4];
366 result |= *(int32_t*)xs - *(int32_t*)ys;
367 return result;
368 case 4:
369 return *(int32_t*)xs - *(int32_t*)ys;
370 case 3:
371 result = xs[2] - ys[2];
372 result |= xs[1] - ys[1];
373 result |= xs[0] - ys[0];
374 return result;
375 case 2:
376 result = xs[1] - ys[1];
377 result |= xs[0] - ys[0];
378 return result;
379 case 1:
380 return *xs - *ys;
381 case 0:
382 return 0;
383 }
384 }
385 else
386 {
387 /* unaligned */
388 int result = 0;
389 switch (length)
390 {
391 case 8: result |= xs[7] - ys[7];
392 case 7: result |= xs[6] - ys[6];
393 case 6: result |= xs[5] - ys[5];
394 case 5: result |= xs[4] - ys[4];
395 case 4: result |= xs[3] - ys[3];
396 case 3: result |= xs[2] - ys[2];
397 case 2: result |= xs[1] - ys[1];
398 case 1: result |= xs[0] - ys[0];
399 case 0:
400 return result;
401 }
402 }
403 }
404
405 /* memcmp for longer strings */
406 return memcmp (xs, ys, length);
407}
408
409MY_INLINE int
410strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
411{
412#ifndef ELECTRIC_HEAP
413 assert ( !((size_t)ys & 3) );
414#endif
415 if (!((size_t)xs & 3))
416 {
417 int result;
418 /* aligned */
419 while (length >= 8)
420 {
421 result = *(int32_t*)xs - *(int32_t*)ys;
422 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
423 if (MY_PREDICT_FALSE(result))
424 return result;
425 xs += 8;
426 ys += 8;
427 length -= 8;
428 }
429 switch (length)
430 {
431 case 7:
432 result = *(int32_t*)xs - *(int32_t*)ys;
433 result |= xs[6] - ys[6];
434 result |= xs[5] - ys[5];
435 result |= xs[4] - ys[4];
436 return result;
437 case 6:
438 result = *(int32_t*)xs - *(int32_t*)ys;
439 result |= xs[5] - ys[5];
440 result |= xs[4] - ys[4];
441 return result;
442 case 5:
443 result = *(int32_t*)xs - *(int32_t*)ys;
444 result |= xs[4] - ys[4];
445 return result;
446 case 4:
447 return *(int32_t*)xs - *(int32_t*)ys;
448 case 3:
449 result = xs[2] - ys[2];
450 result |= xs[1] - ys[1];
451 result |= xs[0] - ys[0];
452 return result;
453 case 2:
454 result = xs[1] - ys[1];
455 result |= xs[0] - ys[0];
456 return result;
457 case 1:
458 return *xs - *ys;
459 default:
460 case 0:
461 return 0;
462 }
463 }
464 else
465 {
466 /* unaligned */
467 int result;
468 while (length >= 8)
469 {
470#if defined(__i386__) || defined(__x86_64__)
471 result = ( ((int32_t)xs[3] << 24)
472 | ((int32_t)xs[2] << 16)
473 | ((int32_t)xs[1] << 8)
474 | xs[0] )
475 - *(int32_t*)ys;
476 result |= ( ((int32_t)xs[7] << 24)
477 | ((int32_t)xs[6] << 16)
478 | ((int32_t)xs[5] << 8)
479 | xs[4] )
480 - *(int32_t*)(ys + 4);
481#else
482 result = xs[3] - ys[3];
483 result |= xs[2] - ys[2];
484 result |= xs[1] - ys[1];
485 result |= xs[0] - ys[0];
486 result |= xs[7] - ys[7];
487 result |= xs[6] - ys[6];
488 result |= xs[5] - ys[5];
489 result |= xs[4] - ys[4];
490#endif
491 if (MY_PREDICT_FALSE(result))
492 return result;
493 xs += 8;
494 ys += 8;
495 length -= 8;
496 }
497 result = 0;
498 switch (length)
499 {
500 case 7: result |= xs[6] - ys[6];
501 case 6: result |= xs[5] - ys[5];
502 case 5: result |= xs[4] - ys[4];
503 case 4: result |= xs[3] - ys[3];
504 case 3: result |= xs[2] - ys[2];
505 case 2: result |= xs[1] - ys[1];
506 case 1: result |= xs[0] - ys[0];
507 return result;
508 default:
509 case 0:
510 return 0;
511 }
512 }
513}
514
515MY_INLINE int
516strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
517 const char *str, unsigned int length, unsigned int hash)
518{
519 assert (!cache->case_insensitive);
520
521 /* the simple stuff first. */
522 if ( entry->hash != hash
523 || entry->length != length)
524 return 0;
525
526#if 0
527 return memcmp (str, entry + 1, length) == 0;
528#elif 1
529 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
530#else
531 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
532#endif
533}
534
535MY_INLINE int
536strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
537 const char *str, unsigned int length, unsigned int hash)
538{
539 assert (cache->case_insensitive);
540
541 /* the simple stuff first. */
542 if ( entry->hash != hash
543 || entry->length != length)
544 return 0;
545
546#if defined(_MSC_VER) || defined(__OS2__)
547 return _memicmp (entry + 1, str, length) == 0;
548#else
549 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
550#endif
551}
552
553static void
554strcache2_rehash (struct strcache2 *cache)
555{
556 unsigned int src = cache->hash_size;
557 struct strcache2_entry **src_tab = cache->hash_tab;
558 struct strcache2_entry **dst_tab;
559#ifndef STRCACHE2_USE_MASK
560 unsigned int hash_shift;
561#endif
562
563 /* Allocate a new hash table twice the size of the current. */
564 cache->hash_size <<= 1;
565#ifdef STRCACHE2_USE_MASK
566 cache->hash_mask <<= 1;
567 cache->hash_mask |= 1;
568#else
569 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
570 /* nothing */;
571 cache->hash_div = strcache2_find_prime (hash_shift);
572#endif
573 cache->rehash_count <<= 1;
574 cache->hash_tab = dst_tab = (struct strcache2_entry **)
575 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
576 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
577
578 /* Copy the entries from the old to the new hash table. */
579 cache->collision_count = 0;
580 while (src-- > 0)
581 {
582 struct strcache2_entry *entry = src_tab[src];
583 while (entry)
584 {
585 struct strcache2_entry *next = entry->next;
586 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
587 if ((entry->next = dst_tab[dst]) != 0)
588 cache->collision_count++;
589 dst_tab[dst] = entry;
590
591 entry = next;
592 }
593 }
594
595 /* That's it, just free the old table and we're done. */
596 free (src_tab);
597}
598
599static struct strcache2_seg *
600strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
601{
602 struct strcache2_seg *seg;
603 size_t size;
604 size_t off;
605
606 size = cache->def_seg_size;
607 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
608 {
609 size = (size_t)minlen * 2;
610 size = (size + 0xfff) & ~(size_t)0xfff;
611 }
612
613 seg = xmalloc (size);
614 seg->start = (char *)(seg + 1);
615 seg->size = size - sizeof (struct strcache2_seg);
616 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
617 if (off)
618 {
619 seg->start += off;
620 seg->size -= off;
621 }
622 assert (seg->size > minlen);
623 seg->cursor = seg->start;
624 seg->avail = seg->size;
625
626 seg->next = cache->seg_head;
627 cache->seg_head = seg;
628
629 return seg;
630}
631
632/* Internal worker that enters a new string into the cache. */
633static const char *
634strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
635 const char *str, unsigned int length,
636 unsigned int hash)
637{
638 struct strcache2_entry *entry;
639 struct strcache2_seg *seg;
640 unsigned int size;
641 char *str_copy;
642
643 /* Allocate space for the string. */
644
645 size = length + 1 + sizeof (struct strcache2_entry);
646 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
647
648 seg = cache->seg_head;
649 if (MY_PREDICT_FALSE(seg->avail < size))
650 {
651 do
652 seg = seg->next;
653 while (seg && seg->avail < size);
654 if (!seg)
655 seg = strcache2_new_seg (cache, size);
656 }
657
658 entry = (struct strcache2_entry *) seg->cursor;
659 assert ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1));
660 seg->cursor += size;
661 seg->avail -= size;
662
663 /* Setup the entry, copy the string and insert it into the hash table. */
664
665 entry->user = NULL;
666 entry->length = length;
667 entry->hash = hash;
668 str_copy = (char *) memcpy (entry + 1, str, length);
669 str_copy[length] = '\0';
670
671 if ((entry->next = cache->hash_tab[idx]) != 0)
672 cache->collision_count++;
673 cache->hash_tab[idx] = entry;
674 cache->count++;
675 if (cache->count >= cache->rehash_count)
676 strcache2_rehash (cache);
677
678 return str_copy;
679}
680
681/* The public add string interface. */
682const char *
683strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
684{
685 struct strcache2_entry const *entry;
686 unsigned int hash = strcache2_case_sensitive_hash_1 (str, length);
687 unsigned int idx;
688
689 assert (!cache->case_insensitive);
690
691 MAKE_STATS (cache->lookup_count++);
692
693 /* Lookup the entry in the hash table, hoping for an
694 early match. If not found, enter the string at IDX. */
695 idx = STRCACHE2_MOD_IT (cache, hash);
696 entry = cache->hash_tab[idx];
697 if (!entry)
698 return strcache2_enter_string (cache, idx, str, length, hash);
699 if (strcache2_is_equal (cache, entry, str, length, hash))
700 return (const char *)(entry + 1);
701 MAKE_STATS (cache->collision_1st_count++);
702
703 entry = entry->next;
704 if (!entry)
705 return strcache2_enter_string (cache, idx, str, length, hash);
706 if (strcache2_is_equal (cache, entry, str, length, hash))
707 return (const char *)(entry + 1);
708 MAKE_STATS (cache->collision_2nd_count++);
709
710 /* Loop the rest. */
711 for (;;)
712 {
713 entry = entry->next;
714 if (!entry)
715 return strcache2_enter_string (cache, idx, str, length, hash);
716 if (strcache2_is_equal (cache, entry, str, length, hash))
717 return (const char *)(entry + 1);
718 MAKE_STATS (cache->collision_3rd_count++);
719 }
720 /* not reached */
721}
722
723/* The public add string interface for prehashed strings.
724 Use strcache2_hash_str to calculate the hash of a string. */
725const char *
726strcache2_add_hashed (struct strcache2 *cache, const char *str,
727 unsigned int length, unsigned int hash)
728{
729 struct strcache2_entry const *entry;
730 unsigned int idx;
731#ifndef NDEBUG
732 unsigned correct_hash;
733
734 assert (!cache->case_insensitive);
735 correct_hash = strcache2_case_sensitive_hash_1 (str, length);
736 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
737#endif /* NDEBUG */
738
739 MAKE_STATS (cache->lookup_count++);
740
741 /* Lookup the entry in the hash table, hoping for an
742 early match. If not found, enter the string at IDX. */
743 idx = STRCACHE2_MOD_IT (cache, hash);
744 entry = cache->hash_tab[idx];
745 if (!entry)
746 return strcache2_enter_string (cache, idx, str, length, hash);
747 if (strcache2_is_equal (cache, entry, str, length, hash))
748 return (const char *)(entry + 1);
749 MAKE_STATS (cache->collision_1st_count++);
750
751 entry = entry->next;
752 if (!entry)
753 return strcache2_enter_string (cache, idx, str, length, hash);
754 if (strcache2_is_equal (cache, entry, str, length, hash))
755 return (const char *)(entry + 1);
756 MAKE_STATS (cache->collision_2nd_count++);
757
758 /* Loop the rest. */
759 for (;;)
760 {
761 entry = entry->next;
762 if (!entry)
763 return strcache2_enter_string (cache, idx, str, length, hash);
764 if (strcache2_is_equal (cache, entry, str, length, hash))
765 return (const char *)(entry + 1);
766 MAKE_STATS (cache->collision_3rd_count++);
767 }
768 /* not reached */
769}
770
771/* The public lookup (case sensitive) string interface. */
772const char *
773strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
774{
775 struct strcache2_entry const *entry;
776 unsigned int hash = strcache2_case_sensitive_hash_1 (str, length);
777 unsigned int idx;
778
779 assert (!cache->case_insensitive);
780
781 MAKE_STATS (cache->lookup_count++);
782
783 /* Lookup the entry in the hash table, hoping for an
784 early match. */
785 idx = STRCACHE2_MOD_IT (cache, hash);
786 entry = cache->hash_tab[idx];
787 if (!entry)
788 return NULL;
789 if (strcache2_is_equal (cache, entry, str, length, hash))
790 return (const char *)(entry + 1);
791 MAKE_STATS (cache->collision_1st_count++);
792
793 entry = entry->next;
794 if (!entry)
795 return NULL;
796 if (strcache2_is_equal (cache, entry, str, length, hash))
797 return (const char *)(entry + 1);
798 MAKE_STATS (cache->collision_2nd_count++);
799
800 /* Loop the rest. */
801 for (;;)
802 {
803 entry = entry->next;
804 if (!entry)
805 return NULL;
806 if (strcache2_is_equal (cache, entry, str, length, hash))
807 return (const char *)(entry + 1);
808 MAKE_STATS (cache->collision_3rd_count++);
809 }
810 /* not reached */
811}
812
813#if defined(HAVE_CASE_INSENSITIVE_FS)
814
815/* The public add string interface for case insensitive strings. */
816const char *
817strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
818{
819 struct strcache2_entry const *entry;
820 unsigned int hash = strcache2_case_insensitive_hash_1 (str, length);
821 unsigned int idx;
822
823 assert (!cache->case_insensitive);
824
825 MAKE_STATS (cache->lookup_count++);
826
827 /* Lookup the entry in the hash table, hoping for an
828 early match. If not found, enter the string at IDX. */
829 idx = STRCACHE2_MOD_IT (cache, hash);
830 entry = cache->hash_tab[idx];
831 if (!entry)
832 return strcache2_enter_string (cache, idx, str, length, hash);
833 if (strcache2_is_equal (cache, entry, str, length, hash))
834 return (const char *)(entry + 1);
835 MAKE_STATS (cache->collision_1st_count++);
836
837 entry = entry->next;
838 if (!entry)
839 return strcache2_enter_string (cache, idx, str, length, hash);
840 if (strcache2_is_equal (cache, entry, str, length, hash))
841 return (const char *)(entry + 1);
842 MAKE_STATS (cache->collision_2nd_count++);
843
844 /* Loop the rest. */
845 for (;;)
846 {
847 entry = entry->next;
848 if (!entry)
849 return strcache2_enter_string (cache, idx, str, length, hash);
850 if (strcache2_is_equal (cache, entry, str, length, hash))
851 return (const char *)(entry + 1);
852 MAKE_STATS (cache->collision_3rd_count++);
853 }
854 /* not reached */
855}
856
857/* The public add string interface for prehashed case insensitive strings.
858 Use strcache2_hash_istr to calculate the hash of a string. */
859const char *
860strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
861 unsigned int length, unsigned int hash)
862{
863 struct strcache2_entry const *entry;
864 unsigned int idx;
865#ifndef NDEBUG
866 unsigned correct_hash;
867
868 assert (!cache->case_insensitive);
869 correct_hash = strcache2_case_insensitive_hash_1 (str, length);
870 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
871#endif /* NDEBUG */
872
873 MAKE_STATS (cache->lookup_count++);
874
875 /* Lookup the entry in the hash table, hoping for an
876 early match. If not found, enter the string at IDX. */
877 idx = STRCACHE2_MOD_IT (cache, hash);
878 entry = cache->hash_tab[idx];
879 if (!entry)
880 return strcache2_enter_string (cache, idx, str, length, hash);
881 if (strcache2_is_equal (cache, entry, str, length, hash))
882 return (const char *)(entry + 1);
883 MAKE_STATS (cache->collision_1st_count++);
884
885 entry = entry->next;
886 if (!entry)
887 return strcache2_enter_string (cache, idx, str, length, hash);
888 if (strcache2_is_equal (cache, entry, str, length, hash))
889 return (const char *)(entry + 1);
890 MAKE_STATS (cache->collision_2nd_count++);
891
892 /* Loop the rest. */
893 for (;;)
894 {
895 entry = entry->next;
896 if (!entry)
897 return strcache2_enter_string (cache, idx, str, length, hash);
898 if (strcache2_is_equal (cache, entry, str, length, hash))
899 return (const char *)(entry + 1);
900 MAKE_STATS (cache->collision_3rd_count++);
901 }
902 /* not reached */
903}
904
905/* The public lookup (case insensitive) string interface. */
906const char *
907strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
908{
909 struct strcache2_entry const *entry;
910 unsigned int hash = strcache2_case_insensitive_hash_1 (str, length);
911 unsigned int idx;
912
913 assert (!cache->case_insensitive);
914
915 MAKE_STATS (cache->lookup_count++);
916
917 /* Lookup the entry in the hash table, hoping for an
918 early match. */
919 idx = STRCACHE2_MOD_IT (cache, hash);
920 entry = cache->hash_tab[idx];
921 if (!entry)
922 return NULL;
923 if (strcache2_is_equal (cache, entry, str, length, hash))
924 return (const char *)(entry + 1);
925 MAKE_STATS (cache->collision_1st_count++);
926
927 entry = entry->next;
928 if (!entry)
929 return NULL;
930 if (strcache2_is_equal (cache, entry, str, length, hash))
931 return (const char *)(entry + 1);
932 MAKE_STATS (cache->collision_2nd_count++);
933
934 /* Loop the rest. */
935 for (;;)
936 {
937 entry = entry->next;
938 if (!entry)
939 return NULL;
940 if (strcache2_is_equal (cache, entry, str, length, hash))
941 return (const char *)(entry + 1);
942 MAKE_STATS (cache->collision_3rd_count++);
943 }
944 /* not reached */
945}
946
947#endif /* HAVE_CASE_INSENSITIVE_FS */
948
949/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
950int
951strcache2_is_cached (struct strcache2 *cache, const char *str)
952{
953 /* Check mandatory alignment first. */
954 if (!((size_t)str & (sizeof (void *) - 1)))
955 {
956 /* Check the segment list and consider the question answered if the
957 string is within one of them. (Could check it more thoroughly...) */
958 struct strcache2_seg const *seg;
959 for (seg = cache->seg_head; seg; seg = seg->next)
960 if ((size_t)(str - seg->start) < seg->size)
961 return 1;
962 }
963
964 return 0;
965}
966
967
968/* Verify the integrity of the specified string, returning 0 if OK. */
969int
970strcache2_verify_entry (struct strcache2 *cache, const char *str)
971{
972 struct strcache2_entry const *entry;
973 unsigned hash;
974 const char *end;
975
976 if ((size_t)str & (sizeof (void *) - 1))
977 {
978 fprintf (stderr,
979 "strcache2[%s]: missaligned string %p\nstring: %s\n",
980 cache->name, (void *)str, str);
981 return -1;
982 }
983
984 entry = (struct strcache2_entry const *)str - 1;
985 end = memchr (str, '\0', entry->length);
986 if ((size_t)(end - str) == entry->length)
987 {
988 fprintf (stderr,
989 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
990 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
991 return -1;
992 }
993
994 hash = cache->case_insensitive
995 ? strcache2_case_insensitive_hash_1 (str, entry->length)
996 : strcache2_case_sensitive_hash_1 (str, entry->length);
997 if (hash != entry->hash)
998 {
999 fprintf (stderr,
1000 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
1001 cache->name, (void *)entry, hash, entry->hash, str);
1002 return -1;
1003 }
1004
1005 return 0;
1006}
1007
1008
1009/* Calculates the case sensitive hash values of the string.
1010 The first hash is returned, the other is put at HASH2P. */
1011unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1012{
1013 *hash2p = 1;
1014 return strcache2_case_sensitive_hash_1 (str, length);
1015}
1016
1017/* Calculates the case insensitive hash values of the string.
1018 The first hash is returned, the other is put at HASH2P. */
1019unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1020{
1021 *hash2p = 1;
1022 return strcache2_case_insensitive_hash_1 (str, length);
1023}
1024
1025
1026
1027/* Initalizes a new cache. */
1028void
1029strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1030 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1031{
1032 unsigned hash_shift;
1033 assert (!thread_safe);
1034
1035 /* calc the size as a power of two */
1036 if (!size)
1037 hash_shift = STRCACHE2_HASH_SHIFT;
1038 else
1039 {
1040 assert (size <= (~0U / 2 + 1));
1041 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1042 /* nothing */;
1043 }
1044
1045 /* adjust the default segment size */
1046 if (!def_seg_size)
1047 def_seg_size = STRCACHE2_SEG_SIZE;
1048 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1049 def_seg_size = sizeof (struct strcache2_seg) * 10;
1050 else if ((def_seg_size & 0xfff) < 0xf00)
1051 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1052
1053
1054 /* init the structure. */
1055 cache->case_insensitive = case_insensitive;
1056#ifdef STRCACHE2_USE_MASK
1057 cache->hash_mask = (1U << hash_shift) - 1U;
1058#else
1059 cache->hash_div = strcache2_find_prime(hash_shift);
1060#endif
1061 cache->count = 0;
1062 cache->collision_count = 0;
1063 cache->lookup_count = 0;
1064 cache->collision_1st_count = 0;
1065 cache->collision_2nd_count = 0;
1066 cache->collision_3rd_count = 0;
1067 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1068 cache->init_size = 1U << hash_shift;
1069 cache->hash_size = 1U << hash_shift;
1070 cache->def_seg_size = def_seg_size;
1071 cache->lock = NULL;
1072 cache->name = name;
1073
1074 /* allocate the hash table and first segment. */
1075 cache->hash_tab = (struct strcache2_entry **)
1076 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1077 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1078 strcache2_new_seg (cache, 0);
1079
1080 /* link it */
1081 cache->next = strcache_head;
1082 strcache_head = cache;
1083}
1084
1085
1086/* Terminates a string cache, freeing all memory and other
1087 associated resources. */
1088void
1089strcache2_term (struct strcache2 *cache)
1090{
1091 /* unlink it */
1092 if (strcache_head == cache)
1093 strcache_head = cache->next;
1094 else
1095 {
1096 struct strcache2 *prev = strcache_head;
1097 while (prev->next != cache)
1098 prev = prev->next;
1099 assert (prev);
1100 prev->next = cache->next;
1101 }
1102
1103 /* free the memory segments */
1104 do
1105 {
1106 void *free_it = cache->seg_head;
1107 cache->seg_head = cache->seg_head->next;
1108 free (free_it);
1109 }
1110 while (cache->seg_head);
1111
1112 /* free the hash and clear the structure. */
1113 free (cache->hash_tab);
1114 memset (cache, '\0', sizeof (struct strcache2));
1115}
1116
1117/* Print statistics a string cache. */
1118void
1119strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1120{
1121 unsigned int seg_count = 0;
1122 unsigned long seg_total_bytes = 0;
1123 unsigned long seg_avg_bytes;
1124 unsigned long seg_avail_bytes = 0;
1125 unsigned long seg_max_bytes = 0;
1126 struct strcache2_seg *seg;
1127 unsigned int str_count = 0;
1128 unsigned long str_total_len = 0;
1129 unsigned int str_avg_len;
1130 unsigned int str_min_len = ~0U;
1131 unsigned int str_max_len = 0;
1132 unsigned int idx;
1133 unsigned int rehashes;
1134 unsigned int chain_depths[32];
1135
1136 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1137
1138 /* Segment statistics. */
1139 for (seg = cache->seg_head; seg; seg = seg->next)
1140 {
1141 seg_count++;
1142 seg_total_bytes += seg->size;
1143 seg_avail_bytes += seg->avail;
1144 if (seg->size > seg_max_bytes)
1145 seg_max_bytes = seg->size;
1146 }
1147 seg_avg_bytes = seg_total_bytes / seg_count;
1148 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1149 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1150 cache->def_seg_size, seg_avail_bytes);
1151
1152 /* String statistics. */
1153 memset (chain_depths, '\0', sizeof (chain_depths));
1154 idx = cache->hash_size;
1155 while (idx-- > 0)
1156 {
1157 struct strcache2_entry const *entry = cache->hash_tab[idx];
1158 unsigned int depth = 0;
1159 for (; entry != 0; entry = entry->next, depth++)
1160 {
1161 unsigned int length = entry->length;
1162 str_total_len += length;
1163 if (length > str_max_len)
1164 str_max_len = length;
1165 if (length < str_min_len)
1166 str_min_len = length;
1167 str_count++;
1168 }
1169 chain_depths[depth >= 32 ? 31 : depth]++;
1170 }
1171 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1172 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1173 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1174 if (str_count != cache->count)
1175 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1176 cache->count, str_count);
1177
1178 /* Hash statistics. */
1179 idx = cache->init_size;
1180 rehashes = 0;
1181 while (idx < cache->hash_size)
1182 {
1183 idx *= 2;
1184 rehashes++;
1185 }
1186
1187#ifdef STRCACHE2_USE_MASK
1188 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1189 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1190#else
1191 printf (_("%s hash size = %u div = %#x rehashed %u times lookups = %lu\n"),
1192 prefix, cache->hash_size, cache->hash_div, rehashes, cache->lookup_count);
1193#endif
1194 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1195 prefix,
1196 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1197 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1198 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1199 printf (_("%s hash insert collisions = %u (%u%%)\n"),
1200 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1201 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1202 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1203 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1204 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1205 for (idx = 2; idx < 32; idx++)
1206 {
1207 unsigned strs_at_this_depth = chain_depths[idx];
1208 unsigned i;
1209 for (i = idx + 1; i < 32; i++)
1210 strs_at_this_depth += chain_depths[i];
1211 if (strs_at_this_depth)
1212 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1213 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1214 idx - 1, idx == 2 ? " " : "s",
1215 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1216 }
1217}
1218
1219/* Print statistics for all string caches. */
1220void
1221strcache2_print_stats_all (const char *prefix)
1222{
1223 struct strcache2 *cur;
1224 for (cur = strcache_head; cur; cur = cur->next)
1225 strcache2_print_stats (cur, prefix);
1226}
1227
Note: See TracBrowser for help on using the repository browser.