source: trunk/src/kmk/strcache.c@ 1847

Last change on this file since 1847 was 1845, checked in by bird, 17 years ago

kmk/strcache: Make use of the string length to avoid expensive compare calls. (sorted out 38% of the calls in a kBuild test)

  • Property svn:eol-style set to native
File size: 8.6 KB
Line 
1/* Constant string caching for GNU Make.
2Copyright (C) 2006 Free Software Foundation, Inc.
3This file is part of GNU Make.
4
5GNU Make is free software; you can redistribute it and/or modify it under the
6terms of the GNU General Public License as published by the Free Software
7Foundation; either version 2, or (at your option) any later version.
8
9GNU Make is distributed in the hope that it will be useful, but WITHOUT ANY
10WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13You should have received a copy of the GNU General Public License along with
14GNU Make; see the file COPYING. If not, write to the Free Software
15Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. */
16
17#include "make.h"
18
19#include <assert.h>
20
21#include "hash.h"
22
23/* The size (in bytes) of each cache buffer.
24 Try to pick something that will map well into the heap. */
25#define CACHE_BUFFER_SIZE (8192 - 16)
26
27
28/* A string cached here will never be freed, so we don't need to worry about
29 reference counting. We just store the string, and then remember it in a
30 hash so it can be looked up again. */
31
32struct strcache {
33 struct strcache *next; /* The next block of strings. */
34 char *end; /* Pointer to the beginning of the free space. */
35 int count; /* # of strings in this buffer (for stats). */
36 int bytesfree; /* The amount of the buffer that is free. */
37 char buffer[1]; /* The buffer comes after this. */
38};
39
40static int bufsize = CACHE_BUFFER_SIZE;
41static struct strcache *strcache = NULL;
42
43/* Add a new buffer to the cache. Add it at the front to reduce search time.
44 This can also increase the overhead, since it's less likely that older
45 buffers will be filled in. However, GNU make has so many smaller strings
46 that this doesn't seem to be much of an issue in practice.
47 */
48static struct strcache *
49new_cache()
50{
51 struct strcache *new;
52 new = xmalloc (sizeof (*new) + bufsize);
53 new->end = new->buffer;
54 new->count = 0;
55 new->bytesfree = bufsize;
56
57 new->next = strcache;
58 strcache = new;
59
60 return new;
61}
62
63static const char *
64add_string(const char *str, int len)
65{
66 struct strcache *best = NULL;
67 struct strcache *sp;
68 const char *res;
69#ifdef CONFIG_WITH_VALUE_LENGTH
70 int str_len = len;
71 char *tmp;
72
73 /* Add space a length prefix and the terminator and assure
74 (somewhat) natural alignment. */
75 len += sizeof(unsigned int) + 1;
76 len = (len + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
77 --len;
78#endif
79
80 /* If the string we want is too large to fit into a single buffer, then
81 we're screwed; nothing will ever fit! Change the maximum size of the
82 cache to be big enough. */
83 if (len > bufsize)
84 bufsize = len * 2;
85
86 /* First, find a cache with enough free space. We always look through all
87 the blocks and choose the one with the best fit (the one that leaves the
88 least amount of space free). */
89 for (sp = strcache; sp != NULL; sp = sp->next)
90 if (sp->bytesfree > len && (!best || best->bytesfree > sp->bytesfree))
91 best = sp;
92
93 /* If nothing is big enough, make a new cache. */
94 if (!best)
95 best = new_cache();
96
97 assert (best->bytesfree > len);
98
99 /* Add the string to the best cache. */
100#ifndef CONFIG_WITH_VALUE_LENGTH
101 res = best->end;
102 memcpy (best->end, str, len);
103 best->end += len;
104 *(best->end++) = '\0';
105 best->bytesfree -= len + 1;
106 ++best->count;
107#else /* CONFIG_WITH_VALUE_LENGTH */
108 tmp = best->end;
109 best->end += len + 1;
110 assert(!((size_t)tmp & (sizeof(void *) - 1)));
111
112 *(unsigned int *)tmp = str_len; /* length prefix */
113 tmp += sizeof(unsigned int);
114
115 res = tmp;
116 memcpy (tmp, str, str_len);
117 tmp += str_len;
118 *(tmp++) = '\0';
119
120 best->bytesfree -= len + 1;
121 ++best->count;
122
123 assert (tmp <= best->end);
124 assert (!((size_t)res & (sizeof(void *) - 1)));
125#endif /* CONFIG_WITH_VALUE_LENGTH */
126 return res;
127}
128
129
130/* Hash table of strings in the cache. */
131
132#ifdef CONFIG_WITH_VALUE_LENGTH
133static const char *lookup_string;
134static unsigned int lookup_string_len;
135#endif
136
137static unsigned long
138str_hash_1 (const void *key)
139{
140 return_ISTRING_HASH_1 ((const char *) key);
141}
142
143static unsigned long
144str_hash_2 (const void *key)
145{
146 return_ISTRING_HASH_2 ((const char *) key);
147}
148
149static int
150str_hash_cmp (const void *x, const void *y)
151{
152#ifdef CONFIG_WITH_VALUE_LENGTH
153 /* Use the string length to avoid some unncessary comparing.
154 X is either the add_hash input (during hash_find_slot)
155 or a cache entry (during rare hash_insert_at calls).
156 This catches 520253 out of 1341947 calls in the typical
157 kBuild scenario. */
158
159 if (x == lookup_string)
160 {
161 assert (lookup_string_len == strlen ((const char *)x));
162 if (strcache_get_len ((const char *)y) != lookup_string_len)
163 return -1;
164 }
165#endif
166 return_ISTRING_COMPARE ((const char *) x, (const char *) y);
167}
168
169static struct hash_table strings;
170static unsigned long total_adds = 0;
171
172static const char *
173add_hash (const char *str, int len)
174{
175 /* Look up the string in the hash. If it's there, return it. */
176#ifndef CONFIG_WITH_VALUE_LENGTH
177 char *const *slot = (char *const *) hash_find_slot (&strings, str);
178 const char *key = *slot;
179#else /* CONFIG_WITH_VALUE_LENGTH */
180 char *const *slot;
181 const char *key;
182
183 lookup_string = str;
184 lookup_string_len = len;
185 slot = (char *const *) hash_find_slot (&strings, str);
186 key = *slot;
187#endif /* CONFIG_WITH_VALUE_LENGTH */
188
189 /* Count the total number of adds we performed. */
190 ++total_adds;
191
192 if (!HASH_VACANT (key))
193 return key;
194
195 /* Not there yet so add it to a buffer, then into the hash table. */
196 key = add_string (str, len);
197 hash_insert_at (&strings, key, slot);
198 return key;
199}
200
201/* Returns true if the string is in the cache; false if not. */
202int
203strcache_iscached (const char *str)
204{
205 struct strcache *sp;
206
207 for (sp = strcache; sp != 0; sp = sp->next)
208 if (str >= sp->buffer && str < sp->end)
209 return 1;
210
211 return 0;
212}
213
214/* If the string is already in the cache, return a pointer to the cached
215 version. If not, add it then return a pointer to the cached version.
216 Note we do NOT take control of the string passed in. */
217const char *
218strcache_add (const char *str)
219{
220 return add_hash (str, strlen (str));
221}
222
223const char *
224strcache_add_len (const char *str, int len)
225{
226 /* If we're not given a nul-terminated string we have to create one, because
227 the hashing functions expect it. */
228 if (str[len] != '\0')
229 {
230 char *key = alloca (len + 1);
231 memcpy (key, str, len);
232 key[len] = '\0';
233 str = key;
234 }
235
236 return add_hash (str, len);
237}
238
239int
240strcache_setbufsize(int size)
241{
242 if (size > bufsize)
243 bufsize = size;
244 return bufsize;
245}
246
247void
248strcache_init (void)
249{
250#ifdef KMK
251 hash_init (&strings, 65535, str_hash_1, str_hash_2, str_hash_cmp);
252#else
253 hash_init (&strings, 8000, str_hash_1, str_hash_2, str_hash_cmp);
254#endif
255}
256
257
258/* Generate some stats output. */
259
260void
261strcache_print_stats (const char *prefix)
262{
263 int numbuffs = 0, numstrs = 0;
264 int totsize = 0, avgsize, maxsize = 0, minsize = bufsize;
265 int totfree = 0, avgfree, maxfree = 0, minfree = bufsize;
266 int lastused = 0, lastfree = 0;
267
268 if (strcache)
269 {
270 const struct strcache *sp;
271
272 /* Count the first buffer separately since it's not full. */
273 lastused = strcache->end - strcache->buffer;
274 lastfree = strcache->bytesfree;
275
276 for (sp = strcache->next; sp != NULL; sp = sp->next)
277 {
278 int bf = sp->bytesfree;
279 int sz = sp->end - sp->buffer;
280
281 ++numbuffs;
282 numstrs += sp->count;
283
284 totsize += sz;
285 maxsize = (sz > maxsize ? sz : maxsize);
286 minsize = (sz < minsize ? sz : minsize);
287
288 totfree += bf;
289 maxfree = (bf > maxfree ? bf : maxfree);
290 minfree = (bf < minfree ? bf : minfree);
291 }
292 }
293
294 avgsize = numbuffs ? (int)(totsize / numbuffs) : 0;
295 avgfree = numbuffs ? (int)(totfree / numbuffs) : 0;
296
297 printf (_("\n%s # of strings in strcache: %d / lookups = %lu / hits = %lu\n"),
298 prefix, numstrs, total_adds, (total_adds - numstrs));
299 printf (_("%s # of strcache buffers: %d (* %d B/buffer = %d B)\n"),
300 prefix, (numbuffs + 1), bufsize, ((numbuffs + 1) * bufsize));
301 printf (_("%s strcache used: total = %d (%d) / max = %d / min = %d / avg = %d\n"),
302 prefix, totsize, lastused, maxsize, minsize, avgsize);
303 printf (_("%s strcache free: total = %d (%d) / max = %d / min = %d / avg = %d\n"),
304 prefix, totfree, lastfree, maxfree, minfree, avgfree);
305
306 fputs (_("\n# strcache hash-table stats:\n# "), stdout);
307 hash_print_stats (&strings, stdout);
308}
Note: See TracBrowser for help on using the repository browser.