Context Navigation

source: trunk/src/kmk/strcache2.c@ 1912

Last change on this file since 1912 was 1912, checked in by bird, 17 years ago
strcache2: collapsed the STRCACHE2_USE_CHAINING checks.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 36.2 KB

Line
1	/* $Id: strcache2.c 1912 2008-10-22 21:17:27Z bird $ */
2	/** @file
3	* strcache2 - New string cache.
4	*/
5
6	/*
7	* Copyright (c) 2008 knut st. osmundsen <bird-src-spam@anduin.net>
8	*
9	* This file is part of kBuild.
10	*
11	* kBuild is free software; you can redistribute it and/or modify
12	* it under the terms of the GNU General Public License as published by
13	* the Free Software Foundation; either version 2 of the License, or
14	* (at your option) any later version.
15	*
16	* kBuild is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*
21	* You should have received a copy of the GNU General Public License
22	* along with kBuild; if not, write to the Free Software
23	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24	*
25	*/
26
27
28	/*******************************************************************************
29	* Header Files *
30	*******************************************************************************/
31	#include "make.h"
32	#include "strcache2.h"
33
34	#include <assert.h>
35
36	#include "debug.h"
37
38	#ifdef _MSC_VER
39	typedef unsigned char uint8_t;
40	typedef unsigned short uint16_t;
41	typedef unsigned int uint32_t;
42	#else
43	# include <stdint.h>
44	#endif
45
46	#ifdef WINDOWS32
47	# include <io.h>
48	# include <process.h>
49	# include <Windows.h>
50	# define PARSE_IN_WORKER
51	#endif
52
53	#ifdef __OS2__
54	# include <sys/fmutex.h>
55	#endif
56
57	#ifdef HAVE_PTHREAD
58	# include <pthread.h>
59	#endif
60
61
62	/*******************************************************************************
63	* Defined Constants And Macros *
64	*******************************************************************************/
65	/* The default size of a memory segment (1MB). */
66	#define STRCACHE2_SEG_SIZE (1024U*1024U)
67	/* The default hash table shift (hash size give as a power of two). */
68	#define STRCACHE2_HASH_SHIFT 16
69	/** Does the modding / masking of a hash number into an index. */
70	#ifdef STRCACHE2_USE_MASK
71	# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
72	#else
73	# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
74	#endif
75
76
77	/*******************************************************************************
78	* Global Variables *
79	*******************************************************************************/
80	/* List of initialized string caches. */
81	static struct strcache2 *strcache_head;
82
83
84	/** Finds the closest primary number for power of two value (or something else
85	* useful if not support). */
86	MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
87	{
88	switch (shift)
89	{
90	case 5: return 31;
91	case 6: return 61;
92	case 7: return 127;
93	case 8: return 251;
94	case 9: return 509;
95	case 10: return 1021;
96	case 11: return 2039;
97	case 12: return 4093;
98	case 13: return 8191;
99	case 14: return 16381;
100	case 15: return 32749;
101	case 16: return 65521;
102	case 17: return 131063;
103	case 18: return 262139;
104	case 19: return 524269;
105	case 20: return 1048573;
106	case 21: return 2097143;
107	case 22: return 4194301;
108	case 23: return 8388593;
109
110	default:
111	assert (0);
112	return (1 << shift) - 1;
113	}
114	}
115
116	MY_INLINE unsigned int
117	strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
118	{
119	#if 1
120	/* Paul Hsieh hash SuperFast function:
121	http://www.azillionmonkeys.com/qed/hash.html
122
123	This performs very good and as a sligtly better distribution than
124	STRING_N_HASH_1 on a typical kBuild run.
125
126	It is also 37% faster than return_STRING_N_HASH_1 when running the
127	two 100 times over typical kBuild strings that end up here (did a
128	fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
129	with -O2.
130
131	FIXME: A path for well aligned data should be added to speed up
132	execution on alignment sensitive systems. */
133
134	# if defined(__amd64__) \|\| defined(__x86_64__) \|\| defined(__AMD64__) \|\| defined(_M_X64) \|\| defined(__amd64) \
135	\|\| defined(__i386__) \|\| defined(__x86__) \|\| defined(__X86__) \|\| defined(_M_IX86) \|\| defined(__i386)
136	# define strcache2_get_unaligned_16bits(ptr) ( ((const uint16_t )(ptr)))
137	# else
138	/* (endian doesn't matter) */
139	# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
140	\| (((const uint8_t *)(ptr))[1]) )
141	# endif
142	uint32_t hash = len;
143	uint32_t tmp;
144	int rem;
145
146	assert (sizeof (uint8_t) == sizeof (char));
147	if (len == 0)
148	return 0;
149
150	/* main loop, walking on 2 x uint16_t */
151	rem = len & 3;
152	len >>= 2;
153	while (len > 0)
154	{
155	hash += strcache2_get_unaligned_16bits (str);
156	tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
157	hash = (hash << 16) ^ tmp;
158	str += 2 * sizeof (uint16_t);
159	hash += hash >> 11;
160	len--;
161	}
162
163	/* the remainer */
164	switch (rem)
165	{
166	case 3:
167	hash += strcache2_get_unaligned_16bits (str);
168	hash ^= hash << 16;
169	hash ^= str[sizeof (uint16_t)] << 18;
170	hash += hash >> 11;
171	break;
172	case 2:
173	hash += strcache2_get_unaligned_16bits (str);
174	hash ^= hash << 11;
175	hash += hash >> 17;
176	break;
177	case 1:
178	hash += *str;
179	hash ^= hash << 10;
180	hash += hash >> 1;
181	break;
182	}
183
184	/* force "avalanching" of final 127 bits. */
185	hash ^= hash << 3;
186	hash += hash >> 5;
187	hash ^= hash << 4;
188	hash += hash >> 17;
189	hash ^= hash << 25;
190	hash += hash >> 6;
191
192	return hash;
193
194	#elif 1
195	/* Note! This implementation is 18% faster than return_STRING_N_HASH_1
196	when running the two 100 times over typical kBuild strings that
197	end up here (did a fprintf here and built kBuild).
198	Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
199
200	unsigned int hash = 0;
201	if (MY_PREDICT_TRUE(len >= 2))
202	{
203	unsigned int ch0 = *str++;
204	hash = 0;
205	len--;
206	while (len >= 2)
207	{
208	unsigned int ch1 = *str++;
209	hash += ch0 << (ch1 & 0xf);
210
211	ch0 = *str++;
212	hash += ch1 << (ch0 & 0xf);
213
214	len -= 2;
215	}
216	if (len == 1)
217	{
218	unsigned ch1 = *str;
219	hash += ch0 << (ch1 & 0xf);
220
221	hash += ch1;
222	}
223	else
224	hash += ch0;
225	}
226	else if (len)
227	{
228	hash = *str;
229	hash += hash << (hash & 0xf);
230	}
231	else
232	hash = 0;
233	return hash;
234
235	#elif 1
236	# if 0
237	/* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
238	than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
239	first in the assignment made it noticably slower.)
240
241	However, it is noticably slower in practice, most likely because of more
242	collisions. Hrmpf. */
243
244	# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
245	unsigned int hash = 0;
246
247	# else
248	/* This is DJB2. This specific form/unroll was benchmarked to be 27%
249	fast than return_STRING_N_HASH_1.
250
251	Ditto. */
252
253	# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
254	unsigned int hash = 5381;
255	# endif
256
257
258	while (len >= 4)
259	{
260	UPDATE_HASH (str[0]);
261	UPDATE_HASH (str[1]);
262	UPDATE_HASH (str[2]);
263	UPDATE_HASH (str[3]);
264	str += 4;
265	len -= 4;
266	}
267	switch (len)
268	{
269	default:
270	case 0:
271	return hash;
272	case 1:
273	UPDATE_HASH (str[0]);
274	return hash;
275	case 2:
276	UPDATE_HASH (str[0]);
277	UPDATE_HASH (str[1]);
278	return hash;
279	case 3:
280	UPDATE_HASH (str[0]);
281	UPDATE_HASH (str[1]);
282	UPDATE_HASH (str[2]);
283	return hash;
284	}
285	#endif
286	}
287
288	MY_INLINE unsigned int
289	strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
290	{
291	unsigned int hash = 0;
292	if (MY_PREDICT_TRUE(len >= 2))
293	{
294	unsigned int ch0 = *str++;
295	ch0 = tolower (ch0);
296	hash = 0;
297	len--;
298	while (len >= 2)
299	{
300	unsigned int ch1 = *str++;
301	ch1 = tolower (ch1);
302	hash += ch0 << (ch1 & 0xf);
303
304	ch0 = *str++;
305	ch0 = tolower (ch0);
306	hash += ch1 << (ch0 & 0xf);
307
308	len -= 2;
309	}
310	if (len == 1)
311	{
312	unsigned ch1 = *str;
313	ch1 = tolower (ch1);
314	hash += ch0 << (ch1 & 0xf);
315
316	hash += ch1;
317	}
318	else
319	hash += ch0;
320	}
321	else if (len)
322	{
323	hash = *str;
324	hash += hash << (hash & 0xf);
325	}
326	else
327	hash = 0;
328	return hash;
329	}
330
331	MY_INLINE int
332	strcache2_memcmp_inline_short (const char xs, const char ys, unsigned int length)
333	{
334	if (length <= 8)
335	{
336	/* short string compare - ~50% of the kBuild calls. */
337	assert ( !((size_t)ys & 3) );
338	if (!((size_t)xs & 3))
339	{
340	/* aligned */
341	int result;
342	switch (length)
343	{
344	default: /* memcmp for longer strings */
345	return memcmp (xs, ys, length);
346	case 8:
347	result = (int32_t)(xs + 4) - (int32_t)(ys + 4);
348	result \|= (int32_t)xs - (int32_t)ys;
349	return result;
350	case 7:
351	result = xs[6] - ys[6];
352	result \|= xs[5] - ys[5];
353	result \|= xs[4] - ys[4];
354	result \|= (int32_t)xs - (int32_t)ys;
355	return result;
356	case 6:
357	result = xs[5] - ys[5];
358	result \|= xs[4] - ys[4];
359	result \|= (int32_t)xs - (int32_t)ys;
360	return result;
361	case 5:
362	result = xs[4] - ys[4];
363	result \|= (int32_t)xs - (int32_t)ys;
364	return result;
365	case 4:
366	return (int32_t)xs - (int32_t)ys;
367	case 3:
368	result = xs[2] - ys[2];
369	result \|= xs[1] - ys[1];
370	result \|= xs[0] - ys[0];
371	return result;
372	case 2:
373	result = xs[1] - ys[1];
374	result \|= xs[0] - ys[0];
375	return result;
376	case 1:
377	return xs - ys;
378	case 0:
379	return 0;
380	}
381	}
382	else
383	{
384	/* unaligned */
385	int result = 0;
386	switch (length)
387	{
388	case 8: result \|= xs[7] - ys[7];
389	case 7: result \|= xs[6] - ys[6];
390	case 6: result \|= xs[5] - ys[5];
391	case 5: result \|= xs[4] - ys[4];
392	case 4: result \|= xs[3] - ys[3];
393	case 3: result \|= xs[2] - ys[2];
394	case 2: result \|= xs[1] - ys[1];
395	case 1: result \|= xs[0] - ys[0];
396	case 0:
397	return result;
398	}
399	}
400	}
401
402	/* memcmp for longer strings */
403	return memcmp (xs, ys, length);
404	}
405
406	MY_INLINE int
407	strcache2_memcmp_inlined (const char xs, const char ys, unsigned int length)
408	{
409	#ifndef ELECTRIC_HEAP
410	assert ( !((size_t)ys & 3) );
411	#endif
412	if (!((size_t)xs & 3))
413	{
414	int result;
415	/* aligned */
416	while (length >= 8)
417	{
418	result = (int32_t)xs - (int32_t)ys;
419	result \|= (int32_t)(xs + 4) - (int32_t)(ys + 4);
420	if (MY_PREDICT_FALSE(result))
421	return result;
422	xs += 8;
423	ys += 8;
424	length -= 8;
425	}
426	switch (length)
427	{
428	case 7:
429	result = (int32_t)xs - (int32_t)ys;
430	result \|= xs[6] - ys[6];
431	result \|= xs[5] - ys[5];
432	result \|= xs[4] - ys[4];
433	return result;
434	case 6:
435	result = (int32_t)xs - (int32_t)ys;
436	result \|= xs[5] - ys[5];
437	result \|= xs[4] - ys[4];
438	return result;
439	case 5:
440	result = (int32_t)xs - (int32_t)ys;
441	result \|= xs[4] - ys[4];
442	return result;
443	case 4:
444	return (int32_t)xs - (int32_t)ys;
445	case 3:
446	result = xs[2] - ys[2];
447	result \|= xs[1] - ys[1];
448	result \|= xs[0] - ys[0];
449	return result;
450	case 2:
451	result = xs[1] - ys[1];
452	result \|= xs[0] - ys[0];
453	return result;
454	case 1:
455	return xs - ys;
456	default:
457	case 0:
458	return 0;
459	}
460	}
461	else
462	{
463	/* unaligned */
464	int result;
465	while (length >= 8)
466	{
467	#if defined(__i386__) \|\| defined(__x86_64__)
468	result = ( ((int32_t)xs[3] << 24)
469	\| ((int32_t)xs[2] << 16)
470	\| ((int32_t)xs[1] << 8)
471	\| xs[0] )
472	- (int32_t)ys;
473	result \|= ( ((int32_t)xs[7] << 24)
474	\| ((int32_t)xs[6] << 16)
475	\| ((int32_t)xs[5] << 8)
476	\| xs[4] )
477	- (int32_t)(ys + 4);
478	#else
479	result = xs[3] - ys[3];
480	result \|= xs[2] - ys[2];
481	result \|= xs[1] - ys[1];
482	result \|= xs[0] - ys[0];
483	result \|= xs[7] - ys[7];
484	result \|= xs[6] - ys[6];
485	result \|= xs[5] - ys[5];
486	result \|= xs[4] - ys[4];
487	#endif
488	if (MY_PREDICT_FALSE(result))
489	return result;
490	xs += 8;
491	ys += 8;
492	length -= 8;
493	}
494	result = 0;
495	switch (length)
496	{
497	case 7: result \|= xs[6] - ys[6];
498	case 6: result \|= xs[5] - ys[5];
499	case 5: result \|= xs[4] - ys[4];
500	case 4: result \|= xs[3] - ys[3];
501	case 3: result \|= xs[2] - ys[2];
502	case 2: result \|= xs[1] - ys[1];
503	case 1: result \|= xs[0] - ys[0];
504	return result;
505	default:
506	case 0:
507	return 0;
508	}
509	}
510	}
511
512	MY_INLINE int
513	strcache2_is_equal (struct strcache2 cache, struct strcache2_entry const entry,
514	const char *str, unsigned int length, unsigned int hash1)
515	{
516	assert (!cache->case_insensitive);
517
518	/* the simple stuff first. */
519	if ( entry->hash1 != hash1
520	\|\| entry->length != length)
521	return 0;
522
523	#if 0
524	return memcmp (str, entry + 1, length) == 0;
525	#elif 1
526	return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
527	#else
528	return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
529	#endif
530	}
531
532	MY_INLINE int
533	strcache2_is_iequal (struct strcache2 cache, struct strcache2_entry const entry,
534	const char *str, unsigned int length, unsigned int hash1)
535	{
536	assert (cache->case_insensitive);
537
538	/* the simple stuff first. */
539	if ( entry->hash1 != hash1
540	\|\| entry->length != length)
541	return 0;
542
543	#if defined(_MSC_VER) \|\| defined(__OS2__)
544	return _memicmp (entry + 1, str, length) == 0;
545	#else
546	return strncasecmp ((const char *)(entry + 1), str, length) == 0;
547	#endif
548	}
549
550	static void
551	strcache2_rehash (struct strcache2 *cache)
552	{
553	unsigned int src = cache->hash_size;
554	struct strcache2_entry **src_tab = cache->hash_tab;
555	struct strcache2_entry **dst_tab;
556	#ifndef STRCACHE2_USE_MASK
557	unsigned int hash_shift;
558	#endif
559
560	/* Allocate a new hash table twice the size of the current. */
561	cache->hash_size <<= 1;
562	#ifdef STRCACHE2_USE_MASK
563	cache->hash_mask <<= 1;
564	cache->hash_mask \|= 1;
565	#else
566	for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
567	/* nothing */;
568	cache->hash_div = strcache2_find_prime (hash_shift);
569	#endif
570	cache->rehash_count <<= 1;
571	cache->hash_tab = dst_tab = (struct strcache2_entry **)
572	xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
573	memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
574
575	/* Copy the entries from the old to the new hash table. */
576	cache->collision_count = 0;
577	while (src-- > 0)
578	{
579	struct strcache2_entry *entry = src_tab[src];
580	while (entry)
581	{
582	struct strcache2_entry *next = entry->next;
583	unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash1);
584	if ((entry->next = dst_tab[dst]) != 0)
585	cache->collision_count++;
586	dst_tab[dst] = entry;
587
588	entry = next;
589	}
590	}
591
592	/* That's it, just free the old table and we're done. */
593	free (src_tab);
594	}
595
596	static struct strcache2_seg *
597	strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
598	{
599	struct strcache2_seg *seg;
600	size_t size;
601	size_t off;
602
603	size = cache->def_seg_size;
604	if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
605	{
606	size = (size_t)minlen * 2;
607	size = (size + 0xfff) & ~(size_t)0xfff;
608	}
609
610	seg = xmalloc (size);
611	seg->start = (char *)(seg + 1);
612	seg->size = size - sizeof (struct strcache2_seg);
613	off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
614	if (off)
615	{
616	seg->start += off;
617	seg->size -= off;
618	}
619	assert (seg->size > minlen);
620	seg->cursor = seg->start;
621	seg->avail = seg->size;
622
623	seg->next = cache->seg_head;
624	cache->seg_head = seg;
625
626	return seg;
627	}
628
629	/* Internal worker that enters a new string into the cache. */
630	static const char *
631	strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
632	const char *str, unsigned int length,
633	unsigned int hash1, unsigned hash2)
634	{
635	struct strcache2_entry *entry;
636	struct strcache2_seg *seg;
637	unsigned int size;
638	char *str_copy;
639
640	/* Allocate space for the string. */
641
642	size = length + 1 + sizeof (struct strcache2_entry);
643	size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
644
645	seg = cache->seg_head;
646	if (MY_PREDICT_FALSE(seg->avail < size))
647	{
648	do
649	seg = seg->next;
650	while (seg && seg->avail < size);
651	if (!seg)
652	seg = strcache2_new_seg (cache, size);
653	}
654
655	entry = (struct strcache2_entry *) seg->cursor;
656	assert ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1));
657	seg->cursor += size;
658	seg->avail -= size;
659
660	/* Setup the entry, copy the string and insert it into the hash table. */
661
662	entry->user = NULL;
663	entry->length = length;
664	entry->hash1 = hash1;
665	str_copy = (char *) memcpy (entry + 1, str, length);
666	str_copy[length] = '\0';
667
668	if ((entry->next = cache->hash_tab[idx]) != 0)
669	cache->collision_count++;
670	cache->hash_tab[idx] = entry;
671	cache->count++;
672	if (cache->count >= cache->rehash_count)
673	strcache2_rehash (cache);
674
675	return str_copy;
676	}
677
678	/* The public add string interface. */
679	const char *
680	strcache2_add (struct strcache2 cache, const char str, unsigned int length)
681	{
682	struct strcache2_entry const *entry;
683	unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
684	unsigned int hash2 = 0;
685	unsigned int idx;
686
687	assert (!cache->case_insensitive);
688
689	cache->lookup_count++;
690
691	/* Lookup the entry in the hash table, hoping for an
692	early match. If not found, enter the string at IDX. */
693	idx = STRCACHE2_MOD_IT (cache, hash1);
694	entry = cache->hash_tab[idx];
695	if (!entry)
696	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
697	if (strcache2_is_equal (cache, entry, str, length, hash1))
698	return (const char *)(entry + 1);
699	cache->collision_1st_count++;
700
701	entry = entry->next;
702	if (!entry)
703	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
704	if (strcache2_is_equal (cache, entry, str, length, hash1))
705	return (const char *)(entry + 1);
706	cache->collision_2nd_count++;
707
708	/* (We've established hash2, so we can do a straight loop now.) */
709	for (;;)
710	{
711	entry = entry->next;
712	if (!entry)
713	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
714	if (strcache2_is_equal (cache, entry, str, length, hash1))
715	return (const char *)(entry + 1);
716	cache->collision_3rd_count++;
717	}
718	/* not reached */
719	}
720
721	/* The public add string interface for prehashed strings.
722	Use strcache2_hash_str to calculate the hash of a string. */
723	const char *
724	strcache2_add_hashed (struct strcache2 cache, const char str, unsigned int length,
725	unsigned int hash1, unsigned int hash2)
726	{
727	struct strcache2_entry const *entry;
728	unsigned int idx;
729	#ifndef NDEBUG
730	unsigned correct_hash;
731
732	assert (!cache->case_insensitive);
733	correct_hash = strcache2_case_sensitive_hash_1 (str, length);
734	MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
735	if (hash2)
736	{
737	correct_hash = strcache2_case_sensitive_hash_2 (str, length);
738	MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
739	}
740	#endif /* NDEBUG */
741
742	cache->lookup_count++;
743
744	/* Lookup the entry in the hash table, hoping for an
745	early match. If not found, enter the string at IDX. */
746	idx = STRCACHE2_MOD_IT (cache, hash1);
747	entry = cache->hash_tab[idx];
748	if (!entry)
749	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
750	if (strcache2_is_equal (cache, entry, str, length, hash1))
751	return (const char *)(entry + 1);
752	cache->collision_1st_count++;
753
754	entry = entry->next;
755	if (!entry)
756	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
757	if (strcache2_is_equal (cache, entry, str, length, hash1))
758	return (const char *)(entry + 1);
759	cache->collision_2nd_count++;
760
761	/* (We've established hash2, so we can do a straight loop now.) */
762	for (;;)
763	{
764	entry = entry->next;
765	if (!entry)
766	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
767	if (strcache2_is_equal (cache, entry, str, length, hash1))
768	return (const char *)(entry + 1);
769	cache->collision_3rd_count++;
770	}
771	/* not reached */
772	}
773
774	/* The public lookup (case sensitive) string interface. */
775	const char *
776	strcache2_lookup (struct strcache2 cache, const char str, unsigned int length)
777	{
778	struct strcache2_entry const *entry;
779	unsigned int hash1 = strcache2_case_sensitive_hash_1 (str, length);
780	unsigned int idx;
781
782	assert (!cache->case_insensitive);
783
784	cache->lookup_count++;
785
786	/* Lookup the entry in the hash table, hoping for an
787	early match. */
788	idx = STRCACHE2_MOD_IT (cache, hash1);
789	entry = cache->hash_tab[idx];
790	if (!entry)
791	return NULL;
792	if (strcache2_is_equal (cache, entry, str, length, hash1))
793	return (const char *)(entry + 1);
794	cache->collision_1st_count++;
795
796	entry = entry->next;
797	if (!entry)
798	return NULL;
799	if (strcache2_is_equal (cache, entry, str, length, hash1))
800	return (const char *)(entry + 1);
801	cache->collision_2nd_count++;
802
803	/* (We've established hash2, so we can do a straight loop now.) */
804	for (;;)
805	{
806	entry = entry->next;
807	if (!entry)
808	return NULL;
809	if (strcache2_is_equal (cache, entry, str, length, hash1))
810	return (const char *)(entry + 1);
811	cache->collision_3rd_count++;
812	}
813	/* not reached */
814	}
815
816	#if defined(HAVE_CASE_INSENSITIVE_FS)
817
818	/* The public add string interface for case insensitive strings. */
819	const char *
820	strcache2_iadd (struct strcache2 cache, const char str, unsigned int length)
821	{
822	struct strcache2_entry const *entry;
823	unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
824	unsigned int hash2 = 0;
825	unsigned int idx;
826
827	assert (!cache->case_insensitive);
828
829	cache->lookup_count++;
830
831	/* Lookup the entry in the hash table, hoping for an
832	early match. If not found, enter the string at IDX. */
833	idx = STRCACHE2_MOD_IT (cache, hash1);
834	entry = cache->hash_tab[idx];
835	if (!entry)
836	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
837	if (strcache2_is_equal (cache, entry, str, length, hash1))
838	return (const char *)(entry + 1);
839	cache->collision_1st_count++;
840
841	entry = entry->next;
842	if (!entry)
843	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
844	if (strcache2_is_equal (cache, entry, str, length, hash1))
845	return (const char *)(entry + 1);
846	cache->collision_2nd_count++;
847
848	/* (We've established hash2, so we can do a straight loop now.) */
849	for (;;)
850	{
851	entry = entry->next;
852	if (!entry)
853	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
854	if (strcache2_is_equal (cache, entry, str, length, hash1))
855	return (const char *)(entry + 1);
856	cache->collision_3rd_count++;
857	}
858	/* not reached */
859	}
860
861	/* The public add string interface for prehashed case insensitive strings.
862	Use strcache2_hash_istr to calculate the hash of a string. */
863	const char *
864	strcache2_iadd_hashed (struct strcache2 cache, const char str, unsigned int length,
865	unsigned int hash1, unsigned int hash2)
866	{
867	struct strcache2_entry const *entry;
868	unsigned int idx;
869	#ifndef NDEBUG
870	unsigned correct_hash;
871
872	assert (!cache->case_insensitive);
873	correct_hash = strcache2_case_insensitive_hash_1 (str, length);
874	MY_ASSERT_MSG (hash1 == correct_hash, ("%#x != %#x\n", hash1, correct_hash));
875	if (hash2)
876	{
877	correct_hash = strcache2_case_insensitive_hash_2 (str, length);
878	MY_ASSERT_MSG (hash2 == correct_hash, ("%#x != %#x\n", hash2, correct_hash));
879	}
880	#endif /* NDEBUG */
881
882	cache->lookup_count++;
883
884	/* Lookup the entry in the hash table, hoping for an
885	early match. If not found, enter the string at IDX. */
886	idx = STRCACHE2_MOD_IT (cache, hash1);
887	entry = cache->hash_tab[idx];
888	if (!entry)
889	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
890	if (strcache2_is_equal (cache, entry, str, length, hash1))
891	return (const char *)(entry + 1);
892	cache->collision_1st_count++;
893
894	entry = entry->next;
895	if (!entry)
896	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
897	if (strcache2_is_equal (cache, entry, str, length, hash1))
898	return (const char *)(entry + 1);
899	cache->collision_2nd_count++;
900
901	/* (We've established hash2, so we can do a straight loop now.) */
902	for (;;)
903	{
904	entry = entry->next;
905	if (!entry)
906	return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
907	if (strcache2_is_equal (cache, entry, str, length, hash1))
908	return (const char *)(entry + 1);
909	cache->collision_3rd_count++;
910	}
911	/* not reached */
912	}
913
914	/* The public lookup (case insensitive) string interface. */
915	const char *
916	strcache2_ilookup (struct strcache2 cache, const char str, unsigned int length)
917	{
918	struct strcache2_entry const *entry;
919	unsigned int hash1 = strcache2_case_insensitive_hash_1 (str, length);
920	unsigned int idx;
921
922	assert (!cache->case_insensitive);
923
924	cache->lookup_count++;
925
926	/* Lookup the entry in the hash table, hoping for an
927	early match. */
928	idx = STRCACHE2_MOD_IT (cache, hash1);
929	entry = cache->hash_tab[idx];
930	if (!entry)
931	return NULL;
932	if (strcache2_is_equal (cache, entry, str, length, hash1))
933	return (const char *)(entry + 1);
934	cache->collision_1st_count++;
935
936	entry = entry->next;
937	if (!entry)
938	return NULL;
939	if (strcache2_is_equal (cache, entry, str, length, hash1))
940	return (const char *)(entry + 1);
941	cache->collision_2nd_count++;
942
943	/* (We've established hash2, so we can do a straight loop now.) */
944	for (;;)
945	{
946	entry = entry->next;
947	if (!entry)
948	return NULL;
949	if (strcache2_is_equal (cache, entry, str, length, hash1))
950	return (const char *)(entry + 1);
951	cache->collision_3rd_count++;
952	}
953	/* not reached */
954	}
955
956	#endif /* HAVE_CASE_INSENSITIVE_FS */
957
958	/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
959	int
960	strcache2_is_cached (struct strcache2 cache, const char str)
961	{
962	/* Check mandatory alignment first. */
963	if (!((size_t)str & (sizeof (void *) - 1)))
964	{
965	/* Check the segment list and consider the question answered if the
966	string is within one of them. (Could check it more thoroughly...) */
967	struct strcache2_seg const *seg;
968	for (seg = cache->seg_head; seg; seg = seg->next)
969	if ((size_t)(str - seg->start) < seg->size)
970	return 1;
971	}
972
973	return 0;
974	}
975
976
977	/* Verify the integrity of the specified string, returning 0 if OK. */
978	int
979	strcache2_verify_entry (struct strcache2 cache, const char str)
980	{
981	struct strcache2_entry const *entry;
982	unsigned hash;
983	const char *end;
984
985	if ((size_t)str & (sizeof (void *) - 1))
986	{
987	fprintf (stderr,
988	"strcache2[%s]: missaligned string %p\nstring: %s\n",
989	cache->name, (void *)str, str);
990	return -1;
991	}
992
993	entry = (struct strcache2_entry const *)str - 1;
994	end = memchr (str, '\0', entry->length);
995	if ((size_t)(end - str) == entry->length)
996	{
997	fprintf (stderr,
998	"strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
999	cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
1000	return -1;
1001	}
1002
1003	hash = cache->case_insensitive
1004	? strcache2_case_insensitive_hash_1 (str, entry->length)
1005	: strcache2_case_sensitive_hash_1 (str, entry->length);
1006	if (hash != entry->hash1)
1007	{
1008	fprintf (stderr,
1009	"strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
1010	cache->name, (void *)entry, hash, entry->hash1, str);
1011	return -1;
1012	}
1013
1014	return 0;
1015	}
1016
1017
1018	/* Calculates the case sensitive hash values of the string.
1019	The first hash is returned, the other is put at HASH2P. */
1020	unsigned int strcache2_hash_str (const char str, unsigned int length, unsigned int hash2p)
1021	{
1022	*hash2p = 1;
1023	return strcache2_case_sensitive_hash_1 (str, length);
1024	}
1025
1026	/* Calculates the case insensitive hash values of the string.
1027	The first hash is returned, the other is put at HASH2P. */
1028	unsigned int strcache2_hash_istr (const char str, unsigned int length, unsigned int hash2p)
1029	{
1030	*hash2p = 1;
1031	return strcache2_case_insensitive_hash_1 (str, length);
1032	}
1033
1034
1035
1036	/* Initalizes a new cache. */
1037	void
1038	strcache2_init (struct strcache2 cache, const char name, unsigned int size,
1039	unsigned int def_seg_size, int case_insensitive, int thread_safe)
1040	{
1041	unsigned hash_shift;
1042	assert (!thread_safe);
1043
1044	/* calc the size as a power of two */
1045	if (!size)
1046	hash_shift = STRCACHE2_HASH_SHIFT;
1047	else
1048	{
1049	assert (size <= (~0U / 2 + 1));
1050	for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1051	/* nothing */;
1052	}
1053
1054	/* adjust the default segment size */
1055	if (!def_seg_size)
1056	def_seg_size = STRCACHE2_SEG_SIZE;
1057	else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1058	def_seg_size = sizeof (struct strcache2_seg) * 10;
1059	else if ((def_seg_size & 0xfff) < 0xf00)
1060	def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1061
1062
1063	/* init the structure. */
1064	cache->case_insensitive = case_insensitive;
1065	#ifdef STRCACHE2_USE_MASK
1066	cache->hash_mask = (1U << hash_shift) - 1U;
1067	#else
1068	cache->hash_div = strcache2_find_prime(hash_shift);
1069	#endif
1070	cache->count = 0;
1071	cache->collision_count = 0;
1072	cache->lookup_count = 0;
1073	cache->collision_1st_count = 0;
1074	cache->collision_2nd_count = 0;
1075	cache->collision_3rd_count = 0;
1076	cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1077	cache->init_size = 1U << hash_shift;
1078	cache->hash_size = 1U << hash_shift;
1079	cache->def_seg_size = def_seg_size;
1080	cache->lock = NULL;
1081	cache->name = name;
1082
1083	/* allocate the hash table and first segment. */
1084	cache->hash_tab = (struct strcache2_entry **)
1085	xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1086	memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1087	strcache2_new_seg (cache, 0);
1088
1089	/* link it */
1090	cache->next = strcache_head;
1091	strcache_head = cache;
1092	}
1093
1094
1095	/* Terminates a string cache, freeing all memory and other
1096	associated resources. */
1097	void
1098	strcache2_term (struct strcache2 *cache)
1099	{
1100	/* unlink it */
1101	if (strcache_head == cache)
1102	strcache_head = cache->next;
1103	else
1104	{
1105	struct strcache2 *prev = strcache_head;
1106	while (prev->next != cache)
1107	prev = prev->next;
1108	assert (prev);
1109	prev->next = cache->next;
1110	}
1111
1112	/* free the memory segments */
1113	do
1114	{
1115	void *free_it = cache->seg_head;
1116	cache->seg_head = cache->seg_head->next;
1117	free (free_it);
1118	}
1119	while (cache->seg_head);
1120
1121	/* free the hash and clear the structure. */
1122	free (cache->hash_tab);
1123	memset (cache, '\0', sizeof (struct strcache2));
1124	}
1125
1126	/* Print statistics a string cache. */
1127	void
1128	strcache2_print_stats (struct strcache2 cache, const char prefix)
1129	{
1130	unsigned int seg_count = 0;
1131	unsigned long seg_total_bytes = 0;
1132	unsigned long seg_avg_bytes;
1133	unsigned long seg_avail_bytes = 0;
1134	unsigned long seg_max_bytes = 0;
1135	struct strcache2_seg *seg;
1136	unsigned int str_count = 0;
1137	unsigned long str_total_len = 0;
1138	unsigned int str_avg_len;
1139	unsigned int str_min_len = ~0U;
1140	unsigned int str_max_len = 0;
1141	unsigned int idx;
1142	unsigned int rehashes;
1143	unsigned int chain_depths[32];
1144
1145	printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1146
1147	/* Segment statistics. */
1148	for (seg = cache->seg_head; seg; seg = seg->next)
1149	{
1150	seg_count++;
1151	seg_total_bytes += seg->size;
1152	seg_avail_bytes += seg->avail;
1153	if (seg->size > seg_max_bytes)
1154	seg_max_bytes = seg->size;
1155	}
1156	seg_avg_bytes = seg_total_bytes / seg_count;
1157	printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1158	prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1159	cache->def_seg_size, seg_avail_bytes);
1160
1161	/* String statistics. */
1162	memset (chain_depths, '\0', sizeof (chain_depths));
1163	idx = cache->hash_size;
1164	while (idx-- > 0)
1165	{
1166	struct strcache2_entry const *entry = cache->hash_tab[idx];
1167	unsigned int depth = 0;
1168	for (; entry != 0; entry = entry->next, depth++)
1169	{
1170	unsigned int length = entry->length;
1171	str_total_len += length;
1172	if (length > str_max_len)
1173	str_max_len = length;
1174	if (length < str_min_len)
1175	str_min_len = length;
1176	str_count++;
1177	}
1178	chain_depths[depth >= 32 ? 31 : depth]++;
1179	}
1180	str_avg_len = cache->count ? str_total_len / cache->count : 0;
1181	printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1182	prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1183	if (str_count != cache->count)
1184	printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1185	cache->count, str_count);
1186
1187	/* Hash statistics. */
1188	idx = cache->init_size;
1189	rehashes = 0;
1190	while (idx < cache->hash_size)
1191	{
1192	idx *= 2;
1193	rehashes++;
1194	}
1195
1196	#ifdef STRCACHE2_USE_MASK
1197	printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1198	prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1199	#else
1200	printf (_("%s hash size = %u div = %#x rehashed %u times lookups = %lu\n"),
1201	prefix, cache->hash_size, cache->hash_div, rehashes, cache->lookup_count);
1202	#endif
1203	printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1204	prefix,
1205	cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1206	cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1207	cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1208	printf (_("%s hash insert collisions = %u (%u%%)\n"),
1209	prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1210	printf (_("%s %5u (%u%%) empty hash table slots\n"),
1211	prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1212	printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1213	prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1214	for (idx = 2; idx < 32; idx++)
1215	{
1216	unsigned strs_at_this_depth = chain_depths[idx];
1217	unsigned i;
1218	for (i = idx + 1; i < 32; i++)
1219	strs_at_this_depth += chain_depths[i];
1220	if (strs_at_this_depth)
1221	printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1222	prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1223	idx - 1, idx == 2 ? " " : "s",
1224	strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1225	}
1226	}
1227
1228	/* Print statistics for all string caches. */
1229	void
1230	strcache2_print_stats_all (const char *prefix)
1231	{
1232	struct strcache2 *cur;
1233	for (cur = strcache_head; cur; cur = cur->next)
1234	strcache2_print_stats (cur, prefix);
1235	}
1236

Note: See TracBrowser for help on using the repository browser.

Download in other formats: