source: trunk/src/kmk/strcache.c@ 1854

Last change on this file since 1854 was 1854, checked in by bird, 17 years ago

kmk: offload hashing of strcache entries to the includedep thread(s).

  • Property svn:eol-style set to native
File size: 10.0 KB
Line 
1/* Constant string caching for GNU Make.
2Copyright (C) 2006 Free Software Foundation, Inc.
3This file is part of GNU Make.
4
5GNU Make is free software; you can redistribute it and/or modify it under the
6terms of the GNU General Public License as published by the Free Software
7Foundation; either version 2, or (at your option) any later version.
8
9GNU Make is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13You should have received a copy of the GNU General Public License along with
14GNU Make; see the file COPYING. If not, write to the Free Software
15Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */
16
17#include "make.h"
18
19#include <assert.h>
20
21#include "hash.h"
22
23/* The size (in bytes) of each cache buffer.
24 Try to pick something that will map well into the heap. */
25#define CACHE_BUFFER_SIZE (8192 - 16)
26
27
28/* A string cached here will never be freed, so we don't need to worry about
29 reference counting. We just store the string, and then remember it in a
30 hash so it can be looked up again. */
31
32struct strcache {
33 struct strcache *next; /* The next block of strings. */
34 char *end; /* Pointer to the beginning of the free space. */
35 int count; /* # of strings in this buffer (for stats). */
36 int bytesfree; /* The amount of the buffer that is free. */
37 char buffer[1]; /* The buffer comes after this. */
38};
39
40static int bufsize = CACHE_BUFFER_SIZE;
41static struct strcache *strcache = NULL;
42
43/* Add a new buffer to the cache. Add it at the front to reduce search time.
44 This can also increase the overhead, since it's less likely that older
45 buffers will be filled in. However, GNU make has so many smaller strings
46 that this doesn't seem to be much of an issue in practice.
47 */
48static struct strcache *
49new_cache()
50{
51 struct strcache *new;
52 new = xmalloc (sizeof (*new) + bufsize);
53 new->end = new->buffer;
54 new->count = 0;
55 new->bytesfree = bufsize;
56
57 new->next = strcache;
58 strcache = new;
59
60 return new;
61}
62
63static const char *
64add_string(const char *str, int len)
65{
66 struct strcache *best = NULL;
67 struct strcache *sp;
68 const char *res;
69#ifdef CONFIG_WITH_VALUE_LENGTH
70 int str_len = len;
71 char *tmp;
72
73 /* Add space a length prefix and the terminator and assure
74 (somewhat) natural alignment. */
75 len += sizeof(unsigned int) + 1;
76 len = (len + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
77 --len;
78#endif
79
80 /* If the string we want is too large to fit into a single buffer, then
81 we're screwed; nothing will ever fit! Change the maximum size of the
82 cache to be big enough. */
83 if (len > bufsize)
84 bufsize = len * 2;
85
86 /* First, find a cache with enough free space. We always look through all
87 the blocks and choose the one with the best fit (the one that leaves the
88 least amount of space free). */
89 for (sp = strcache; sp != NULL; sp = sp->next)
90 if (sp->bytesfree > len && (!best || best->bytesfree > sp->bytesfree))
91 best = sp;
92
93 /* If nothing is big enough, make a new cache. */
94 if (!best)
95 best = new_cache();
96
97 assert (best->bytesfree > len);
98
99 /* Add the string to the best cache. */
100#ifndef CONFIG_WITH_VALUE_LENGTH
101 res = best->end;
102 memcpy (best->end, str, len);
103 best->end += len;
104 *(best->end++) = '\0';
105 best->bytesfree -= len + 1;
106 ++best->count;
107#else /* CONFIG_WITH_VALUE_LENGTH */
108 tmp = best->end;
109 best->end += len + 1;
110 assert(!((size_t)tmp & (sizeof(void *) - 1)));
111
112 *(unsigned int *)tmp = str_len; /* length prefix */
113 tmp += sizeof(unsigned int);
114
115 res = tmp;
116 memcpy (tmp, str, str_len);
117 tmp += str_len;
118 *(tmp++) = '\0';
119
120 best->bytesfree -= len + 1;
121 ++best->count;
122
123 assert (tmp <= best->end);
124 assert (!((size_t)res & (sizeof(void *) - 1)));
125#endif /* CONFIG_WITH_VALUE_LENGTH */
126 return res;
127}
128
129
130/* Hash table of strings in the cache. */
131
132#ifdef CONFIG_WITH_VALUE_LENGTH
133/* Hackish globals for passing data to the hash functions.
134 There isn't really any other way without running the
135 risk of breaking rehashing. */
136static const char *lookup_string;
137static unsigned int lookup_string_len;
138# ifdef CONFIG_WITH_INCLUDEDEP
139static unsigned long lookup_string_hash1;
140static unsigned long lookup_string_hash2;
141# endif /* CONFIG_WITH_INCLUDEDEP */
142#endif /* CONFIG_WITH_VALUE_LENGTH */
143
144static unsigned long
145str_hash_1 (const void *key)
146{
147#ifdef CONFIG_WITH_INCLUDEDEP
148 if ((const char *) key == lookup_string && lookup_string_hash1)
149 return lookup_string_hash1;
150#endif
151 return_ISTRING_HASH_1 ((const char *) key);
152}
153
154static unsigned long
155str_hash_2 (const void *key)
156{
157#ifdef CONFIG_WITH_INCLUDEDEP
158 if ((const char *) key == lookup_string && lookup_string_hash2)
159 return lookup_string_hash2;
160#endif
161 return_ISTRING_HASH_2 ((const char *) key);
162}
163
164static int
165str_hash_cmp (const void *x, const void *y)
166{
167#ifdef CONFIG_WITH_VALUE_LENGTH
168 /* Use the string length to avoid some unncessary comparing.
169 X is either the add_hash input (during hash_find_slot)
170 or a cache entry (during rare hash_insert_at calls).
171 This catches 520253 out of 1341947 calls in the typical
172 kBuild scenario. */
173
174 if ((const char *) x == lookup_string)
175 {
176 assert (lookup_string_len == strlen ((const char *)x));
177 if (strcache_get_len ((const char *)y) != lookup_string_len)
178 return -1;
179 }
180#endif
181 return_ISTRING_COMPARE ((const char *) x, (const char *) y);
182}
183
184static struct hash_table strings;
185static unsigned long total_adds = 0;
186
187static const char *
188add_hash (const char *str, int len)
189{
190 /* Look up the string in the hash. If it's there, return it. */
191#ifndef CONFIG_WITH_VALUE_LENGTH
192 char *const *slot = (char *const *) hash_find_slot (&strings, str);
193 const char *key = *slot;
194#else /* CONFIG_WITH_VALUE_LENGTH */
195 char *const *slot;
196 const char *key;
197
198 lookup_string = str;
199 lookup_string_len = len;
200 slot = (char *const *) hash_find_slot (&strings, str);
201 key = *slot;
202#endif /* CONFIG_WITH_VALUE_LENGTH */
203
204 /* Count the total number of adds we performed. */
205 ++total_adds;
206
207 if (!HASH_VACANT (key))
208 return key;
209
210 /* Not there yet so add it to a buffer, then into the hash table. */
211 key = add_string (str, len);
212 hash_insert_at (&strings, key, slot);
213 return key;
214}
215
216/* Returns true if the string is in the cache; false if not. */
217int
218strcache_iscached (const char *str)
219{
220 struct strcache *sp;
221
222 for (sp = strcache; sp != 0; sp = sp->next)
223 if (str >= sp->buffer && str < sp->end)
224 return 1;
225
226 return 0;
227}
228
229/* If the string is already in the cache, return a pointer to the cached
230 version. If not, add it then return a pointer to the cached version.
231 Note we do NOT take control of the string passed in. */
232const char *
233strcache_add (const char *str)
234{
235 return add_hash (str, strlen (str));
236}
237
238const char *
239strcache_add_len (const char *str, int len)
240{
241 /* If we're not given a nul-terminated string we have to create one, because
242 the hashing functions expect it. */
243 if (str[len] != '\0')
244 {
245 char *key = alloca (len + 1);
246 memcpy (key, str, len);
247 key[len] = '\0';
248 str = key;
249 }
250
251 return add_hash (str, len);
252}
253
254#ifdef CONFIG_WITH_INCLUDEDEP
255
256/* A special variant used by the includedep worker threads, it off loads
257 the main thread when it adds the strings to the cache later. */
258const char *
259strcache_add_prehashed (const char *str, int len, unsigned long hash1,
260 unsigned long hash2)
261{
262 const char *retstr;
263
264 assert (hash1 == str_hash_1 (str));
265 assert (hash2 == str_hash_2 (str));
266
267 lookup_string_hash1 = hash1;
268 lookup_string_hash2 = hash2;
269
270 retstr = add_hash (str, len);
271
272 lookup_string_hash1 = 0;
273 lookup_string_hash2 = 0;
274
275 return retstr;
276}
277
278/* Performs the prehashing for use with strcache_add_prehashed(). */
279void
280strcache_prehash_str (const char *str, unsigned long *hash1p,
281 unsigned long *hash2p)
282{
283 *hash1p = str_hash_1 (str);
284 *hash2p = str_hash_2 (str);
285}
286
287#endif /* CONFIG_WITH_INCLUDEDEP */
288
289int
290strcache_setbufsize(int size)
291{
292 if (size > bufsize)
293 bufsize = size;
294 return bufsize;
295}
296
297void
298strcache_init (void)
299{
300#ifdef KMK
301 hash_init (&strings, 65535, str_hash_1, str_hash_2, str_hash_cmp);
302#else
303 hash_init (&strings, 8000, str_hash_1, str_hash_2, str_hash_cmp);
304#endif
305}
306
307
308/* Generate some stats output. */
309
310void
311strcache_print_stats (const char *prefix)
312{
313 int numbuffs = 0, numstrs = 0;
314 int totsize = 0, avgsize, maxsize = 0, minsize = bufsize;
315 int totfree = 0, avgfree, maxfree = 0, minfree = bufsize;
316 int lastused = 0, lastfree = 0;
317
318 if (strcache)
319 {
320 const struct strcache *sp;
321
322 /* Count the first buffer separately since it's not full. */
323 lastused = strcache->end - strcache->buffer;
324 lastfree = strcache->bytesfree;
325
326 for (sp = strcache->next; sp != NULL; sp = sp->next)
327 {
328 int bf = sp->bytesfree;
329 int sz = sp->end - sp->buffer;
330
331 ++numbuffs;
332 numstrs += sp->count;
333
334 totsize += sz;
335 maxsize = (sz > maxsize ? sz : maxsize);
336 minsize = (sz < minsize ? sz : minsize);
337
338 totfree += bf;
339 maxfree = (bf > maxfree ? bf : maxfree);
340 minfree = (bf < minfree ? bf : minfree);
341 }
342 }
343
344 avgsize = numbuffs ? (int)(totsize / numbuffs) : 0;
345 avgfree = numbuffs ? (int)(totfree / numbuffs) : 0;
346
347 printf (_("\n%s # of strings in strcache: %d / lookups = %lu / hits = %lu\n"),
348 prefix, numstrs, total_adds, (total_adds - numstrs));
349 printf (_("%s # of strcache buffers: %d (* %d B/buffer = %d B)\n"),
350 prefix, (numbuffs + 1), bufsize, ((numbuffs + 1) * bufsize));
351 printf (_("%s strcache used: total = %d (%d) / max = %d / min = %d / avg = %d\n"),
352 prefix, totsize, lastused, maxsize, minsize, avgsize);
353 printf (_("%s strcache free: total = %d (%d) / max = %d / min = %d / avg = %d\n"),
354 prefix, totfree, lastfree, maxfree, minfree, avgfree);
355
356 fputs (_("\n# strcache hash-table stats:\n# "), stdout);
357 hash_print_stats (&strings, stdout);
358}
Note: See TracBrowser for help on using the repository browser.