source: trunk/src/kmk/strcache.c@ 1857

Last change on this file since 1857 was 1857, checked in by bird, 17 years ago

kmk: improved the hashing of file table entries by making the strcache cache their hash values along with the string length.

  • Property svn:eol-style set to native
File size: 12.6 KB
Line 
1/* Constant string caching for GNU Make.
2Copyright (C) 2006 Free Software Foundation, Inc.
3This file is part of GNU Make.
4
5GNU Make is free software; you can redistribute it and/or modify it under the
6terms of the GNU General Public License as published by the Free Software
7Foundation; either version 2, or (at your option) any later version.
8
9GNU Make is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13You should have received a copy of the GNU General Public License along with
14GNU Make; see the file COPYING. If not, write to the Free Software
15Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */
16
17#include "make.h"
18
19#include <assert.h>
20
21#include "hash.h"
22
23/* The size (in bytes) of each cache buffer.
24 Try to pick something that will map well into the heap. */
25#define CACHE_BUFFER_SIZE (8192 - 16)
26
27
28/* A string cached here will never be freed, so we don't need to worry about
29 reference counting. We just store the string, and then remember it in a
30 hash so it can be looked up again. */
31
32struct strcache {
33 struct strcache *next; /* The next block of strings. */
34 char *end; /* Pointer to the beginning of the free space. */
35 int count; /* # of strings in this buffer (for stats). */
36 int bytesfree; /* The amount of the buffer that is free. */
37 char buffer[1]; /* The buffer comes after this. */
38};
39
40static int bufsize = CACHE_BUFFER_SIZE;
41static struct strcache *strcache = NULL;
42
43/* Add a new buffer to the cache. Add it at the front to reduce search time.
44 This can also increase the overhead, since it's less likely that older
45 buffers will be filled in. However, GNU make has so many smaller strings
46 that this doesn't seem to be much of an issue in practice.
47 */
48static struct strcache *
49new_cache()
50{
51 struct strcache *new;
52 new = xmalloc (sizeof (*new) + bufsize);
53 new->end = new->buffer;
54 new->count = 0;
55 new->bytesfree = bufsize;
56
57 new->next = strcache;
58 strcache = new;
59
60 return new;
61}
62
63#ifndef CONFIG_WITH_VALUE_LENGTH
64static const char *
65add_string(const char *str, int len)
66{
67 struct strcache *best = NULL;
68 struct strcache *sp;
69 const char *res;
70
71 /* If the string we want is too large to fit into a single buffer, then
72 we're screwed; nothing will ever fit! Change the maximum size of the
73 cache to be big enough. */
74 if (len > bufsize)
75 bufsize = len * 2;
76
77 /* First, find a cache with enough free space. We always look through all
78 the blocks and choose the one with the best fit (the one that leaves the
79 least amount of space free). */
80 for (sp = strcache; sp != NULL; sp = sp->next)
81 if (sp->bytesfree > len && (!best || best->bytesfree > sp->bytesfree))
82 best = sp;
83
84 /* If nothing is big enough, make a new cache. */
85 if (!best)
86 best = new_cache();
87
88 assert (best->bytesfree > len);
89
90 /* Add the string to the best cache. */
91 res = best->end;
92 memcpy (best->end, str, len);
93 best->end += len;
94 *(best->end++) = '\0';
95 best->bytesfree -= len + 1;
96 ++best->count;
97 return res;
98}
99
100#else /* CONFIG_WITH_VALUE_LENGTH */
101
102static const char *
103add_string(const char *str, struct strcache_pref *prefix)
104{
105 struct strcache *best = NULL;
106 struct strcache *sp;
107 struct strcache_pref *real_prefix;
108 int len;
109 const char *res;
110 char *dst;
111
112 /* Calc the entry length; the prefix data + the string + alignment. */
113 len = sizeof(struct strcache_pref) + prefix->len + 1;
114 len = (len + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
115
116 /* If the string we want is too large to fit into a single buffer, then
117 we're screwed; nothing will ever fit! Change the maximum size of the
118 cache to be big enough. */
119 if (len > bufsize)
120 bufsize = len * 2;
121
122 /* First, find a cache with enough free space. We always look through all
123 the blocks and choose the one with the best fit (the one that leaves the
124 least amount of space free). */
125 for (sp = strcache; sp != NULL; sp = sp->next)
126 if (sp->bytesfree >= len && (!best || best->bytesfree > sp->bytesfree))
127 best = sp;
128
129 /* If nothing is big enough, make a new cache. */
130 if (!best)
131 best = new_cache();
132
133 assert (best->bytesfree >= len);
134
135 /* Add the string to the best cache. */
136 real_prefix = (struct strcache_pref *)best->end;
137 *real_prefix = *prefix;
138
139 res = dst = (char *)(real_prefix + 1);
140 assert(!((size_t)res & (sizeof(void *) - 1)));
141 memcpy (dst, str, prefix->len);
142 dst += prefix->len;
143 *(dst++) = '\0';
144
145 best->end += len;
146 best->bytesfree -= len;
147 ++best->count;
148
149 return res;
150}
151
152/* Hackish globals for passing data to the hash functions.
153 There isn't really any other way without running the
154 risk of breaking rehashing. */
155static const char *lookup_string;
156static struct strcache_pref *lookup_prefix;
157
158#endif /* CONFIG_WITH_VALUE_LENGTH */
159
160
161/* Hash table of strings in the cache. */
162
163static unsigned long
164str_hash_1 (const void *key)
165{
166#ifdef CONFIG_WITH_VALUE_LENGTH
167 if (MY_PREDICT_TRUE ((const char *) key == lookup_string))
168 return lookup_prefix->hash1;
169#endif
170 return_ISTRING_HASH_1 ((const char *) key);
171}
172
173static unsigned long
174str_hash_2 (const void *key)
175{
176#ifdef CONFIG_WITH_VALUE_LENGTH
177 if (MY_PREDICT_TRUE ((const char *) key == lookup_string))
178 {
179 if (lookup_prefix->hash2)
180 {
181 unsigned long hash2 = 0;
182 ISTRING_HASH_2 ((const char *)key, hash2);
183 lookup_prefix->hash2 = hash2;
184 }
185 return lookup_prefix->hash2;
186 }
187#endif
188 return_ISTRING_HASH_2 ((const char *) key);
189}
190
191static int
192str_hash_cmp (const void *x, const void *y)
193{
194#ifdef CONFIG_WITH_VALUE_LENGTH
195 /* Use the string length to avoid some unncessary comparing.
196 X is either the add_hash input (during hash_find_slot)
197 or a cache entry (during rare hash_insert_at calls).
198 This catches 520253 out of 1341947 calls in the typical
199 kBuild scenario. */
200
201 if (MY_PREDICT_TRUE ((const char *) x == lookup_string))
202 {
203 assert (lookup_prefix->len == strlen ((const char *)x));
204 if (strcache_get_len ((const char *)y) != lookup_prefix->len)
205 return -1;
206 }
207#endif
208 return_ISTRING_COMPARE ((const char *) x, (const char *) y);
209}
210
211static struct hash_table strings;
212static unsigned long total_adds = 0;
213
214#ifndef CONFIG_WITH_VALUE_LENGTH
215static const char *
216add_hash (const char *str, int len)
217{
218 /* Look up the string in the hash. If it's there, return it. */
219 char *const *slot = (char *const *) hash_find_slot (&strings, str);
220 const char *key = *slot;
221
222 /* Count the total number of adds we performed. */
223 ++total_adds;
224
225 if (!HASH_VACANT (key))
226 return key;
227
228 /* Not there yet so add it to a buffer, then into the hash table. */
229 key = add_string (str, len);
230 hash_insert_at (&strings, key, slot);
231 return key;
232}
233
234#else /* CONFIG_WITH_VALUE_LENGTH */
235
236static const char *
237add_hash (const char *str, int len, unsigned long hash1, unsigned long hash2)
238{
239 char *const *slot;
240 const char *key;
241 struct strcache_pref prefix;
242
243 /* Look up the string in the hash. If it's there, return it. */
244 prefix.len = len;
245 prefix.hash2 = hash2;
246 if (!hash1 && !hash2)
247 ISTRING_HASH_1 (str, hash1);
248 prefix.hash1 = hash1;
249
250 lookup_string = str;
251 lookup_prefix = &prefix;
252
253 slot = (char *const *) hash_find_slot (&strings, str);
254 key = *slot;
255
256 /* Count the total number of adds we performed. */
257 ++total_adds;
258
259 if (!HASH_VACANT (key))
260 return key;
261
262 /* Not there yet so add it to a buffer, then into the hash table. */
263 key = add_string (str, &prefix);
264 hash_insert_at (&strings, key, slot);
265 return key;
266}
267
268/* Verifies that a string cache entry didn't change and that the
269 prefix is still valid. */
270int
271strcache_check_sanity (const char *str)
272{
273 struct strcache_pref const *prefix = (struct strcache_pref const *)str - 1;
274 unsigned long hash;
275
276 if (strlen (str) != prefix->len)
277 {
278 MY_ASSERT_MSG (0, ("len: %u != %u - '%s'\n", (unsigned int)strlen (str),
279 prefix->len, str));
280 return -1;
281 }
282
283 hash = 0;
284 ISTRING_HASH_1 (str, hash);
285 if (hash != prefix->hash1)
286 {
287 MY_ASSERT_MSG (0, ("hash1: %lx != %lx - '%s'\n", hash, prefix->hash1, str));
288 return -1;
289 }
290
291 if (prefix->hash2)
292 {
293 hash = 0;
294 ISTRING_HASH_2 (str, hash);
295 if (hash != prefix->hash2)
296 {
297 MY_ASSERT_MSG (0, ("hash2: %lx != %lx - '%s'\n", hash, prefix->hash2, str));
298 return -1;
299 }
300 }
301
302 return 0;
303}
304
305/* Fallback for when the hash2 value isn't present. */
306unsigned long
307strcache_get_hash2_fallback (const char *str)
308{
309 struct strcache_pref *prefix = (struct strcache_pref *)str - 1;
310 unsigned long hash2 = 0;
311
312 ISTRING_HASH_2 (str, hash2);
313 prefix->hash2 = hash2;
314
315 return hash2;
316}
317
318#endif /* CONFIG_WITH_VALUE_LENGTH */
319
320/* Returns true if the string is in the cache; false if not. */
321int
322strcache_iscached (const char *str)
323{
324 struct strcache *sp;
325
326 for (sp = strcache; sp != 0; sp = sp->next)
327 if (str >= sp->buffer && str < sp->end)
328 return 1;
329
330 return 0;
331}
332
333/* If the string is already in the cache, return a pointer to the cached
334 version. If not, add it then return a pointer to the cached version.
335 Note we do NOT take control of the string passed in. */
336const char *
337strcache_add (const char *str)
338{
339#ifndef CONFIG_WITH_VALUE_LENGTH
340 return add_hash (str, strlen (str));
341#else
342 /* XXX: calc the hash1 while determining the string length. */
343 return add_hash (str, strlen (str), 0, 0);
344#endif
345}
346
347const char *
348strcache_add_len (const char *str, int len)
349{
350 /* If we're not given a nul-terminated string we have to create one, because
351 the hashing functions expect it. */
352 if (str[len] != '\0')
353 {
354 char *key = alloca (len + 1);
355 memcpy (key, str, len);
356 key[len] = '\0';
357 str = key;
358 }
359
360#ifndef CONFIG_WITH_VALUE_LENGTH
361 return add_hash (str, len);
362#else
363 /* XXX: eliminate the alloca mess using the prefixing? */
364 return add_hash (str, len, 0, 0);
365#endif
366}
367
368#ifdef CONFIG_WITH_INCLUDEDEP
369
370/* A special variant used by the includedep worker threads, it off loads
371 the main thread when it adds the strings to the cache later. */
372const char *
373strcache_add_prehashed (const char *str, int len, unsigned long hash1,
374 unsigned long hash2)
375{
376 return add_hash (str, len, hash1, hash2);
377}
378
379/* Performs the prehashing for use with strcache_add_prehashed(). */
380void
381strcache_prehash_str (const char *str, unsigned long *hash1p,
382 unsigned long *hash2p)
383{
384 *hash1p = str_hash_1 (str);
385 *hash2p = str_hash_2 (str);
386}
387
388#endif /* CONFIG_WITH_INCLUDEDEP */
389
390int
391strcache_setbufsize(int size)
392{
393 if (size > bufsize)
394 bufsize = size;
395 return bufsize;
396}
397
398void
399strcache_init (void)
400{
401#ifdef KMK
402 hash_init (&strings, 65535, str_hash_1, str_hash_2, str_hash_cmp);
403#else
404 hash_init (&strings, 8000, str_hash_1, str_hash_2, str_hash_cmp);
405#endif
406}
407
408
409/* Generate some stats output. */
410
411void
412strcache_print_stats (const char *prefix)
413{
414 int numbuffs = 0, numstrs = 0;
415 int totsize = 0, avgsize, maxsize = 0, minsize = bufsize;
416 int totfree = 0, avgfree, maxfree = 0, minfree = bufsize;
417 int lastused = 0, lastfree = 0;
418
419 if (strcache)
420 {
421 const struct strcache *sp;
422
423 /* Count the first buffer separately since it's not full. */
424 lastused = strcache->end - strcache->buffer;
425 lastfree = strcache->bytesfree;
426
427 for (sp = strcache->next; sp != NULL; sp = sp->next)
428 {
429 int bf = sp->bytesfree;
430 int sz = sp->end - sp->buffer;
431
432 ++numbuffs;
433 numstrs += sp->count;
434
435 totsize += sz;
436 maxsize = (sz > maxsize ? sz : maxsize);
437 minsize = (sz < minsize ? sz : minsize);
438
439 totfree += bf;
440 maxfree = (bf > maxfree ? bf : maxfree);
441 minfree = (bf < minfree ? bf : minfree);
442 }
443 }
444
445 avgsize = numbuffs ? (int)(totsize / numbuffs) : 0;
446 avgfree = numbuffs ? (int)(totfree / numbuffs) : 0;
447
448 printf (_("\n%s # of strings in strcache: %d / lookups = %lu / hits = %lu\n"),
449 prefix, numstrs, total_adds, (total_adds - numstrs));
450 printf (_("%s # of strcache buffers: %d (* %d B/buffer = %d B)\n"),
451 prefix, (numbuffs + 1), bufsize, ((numbuffs + 1) * bufsize));
452 printf (_("%s strcache used: total = %d (%d) / max = %d / min = %d / avg = %d\n"),
453 prefix, totsize, lastused, maxsize, minsize, avgsize);
454 printf (_("%s strcache free: total = %d (%d) / max = %d / min = %d / avg = %d\n"),
455 prefix, totfree, lastfree, maxfree, minfree, avgfree);
456
457 fputs (_("\n# strcache hash-table stats:\n# "), stdout);
458 hash_print_stats (&strings, stdout);
459}
Note: See TracBrowser for help on using the repository browser.