source: branches/samba-3.5.x/lib/tdb/common/check.c

Last change on this file was 414, checked in by Herwig Bauernfeind, 15 years ago

Samba 3.5.0: Initial import

File size: 11.5 KB
Line 
1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Rusty Russell 2009
7
8 ** NOTE! The following LGPL license applies to the tdb
9 ** library. This does NOT imply that all of Samba is released
10 ** under the LGPL
11
12 This library is free software; you can redistribute it and/or
13 modify it under the terms of the GNU Lesser General Public
14 License as published by the Free Software Foundation; either
15 version 3 of the License, or (at your option) any later version.
16
17 This library is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Lesser General Public License for more details.
21
22 You should have received a copy of the GNU Lesser General Public
23 License along with this library; if not, see <http://www.gnu.org/licenses/>.
24*/
25#include "tdb_private.h"
26
27/* Since we opened it, these shouldn't fail unless it's recent corruption. */
28static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
29{
30 struct tdb_header hdr;
31
32 if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), DOCONV()) == -1)
33 return false;
34 if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
35 goto corrupt;
36
37 CONVERT(hdr);
38 if (hdr.version != TDB_VERSION)
39 goto corrupt;
40
41 if (hdr.rwlocks != 0)
42 goto corrupt;
43
44 if (hdr.hash_size == 0)
45 goto corrupt;
46
47 if (hdr.hash_size != tdb->header.hash_size)
48 goto corrupt;
49
50 if (hdr.recovery_start != 0 &&
51 hdr.recovery_start < TDB_DATA_START(tdb->header.hash_size))
52 goto corrupt;
53
54 *recovery = hdr.recovery_start;
55 return true;
56
57corrupt:
58 tdb->ecode = TDB_ERR_CORRUPT;
59 TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
60 return false;
61}
62
63/* Generic record header check. */
64static bool tdb_check_record(struct tdb_context *tdb,
65 tdb_off_t off,
66 const struct tdb_record *rec)
67{
68 tdb_off_t tailer;
69
70 /* Check rec->next: 0 or points to record offset, aligned. */
71 if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->header.hash_size)){
72 TDB_LOG((tdb, TDB_DEBUG_ERROR,
73 "Record offset %d too small next %d\n",
74 off, rec->next));
75 goto corrupt;
76 }
77 if (rec->next + sizeof(*rec) < rec->next) {
78 TDB_LOG((tdb, TDB_DEBUG_ERROR,
79 "Record offset %d too large next %d\n",
80 off, rec->next));
81 goto corrupt;
82 }
83 if ((rec->next % TDB_ALIGNMENT) != 0) {
84 TDB_LOG((tdb, TDB_DEBUG_ERROR,
85 "Record offset %d misaligned next %d\n",
86 off, rec->next));
87 goto corrupt;
88 }
89 if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0))
90 goto corrupt;
91
92 /* Check rec_len: similar to rec->next, implies next record. */
93 if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
94 TDB_LOG((tdb, TDB_DEBUG_ERROR,
95 "Record offset %d misaligned length %d\n",
96 off, rec->rec_len));
97 goto corrupt;
98 }
99 /* Must fit tailer. */
100 if (rec->rec_len < sizeof(tailer)) {
101 TDB_LOG((tdb, TDB_DEBUG_ERROR,
102 "Record offset %d too short length %d\n",
103 off, rec->rec_len));
104 goto corrupt;
105 }
106 /* OOB allows "right at the end" access, so this works for last rec. */
107 if (tdb->methods->tdb_oob(tdb, off+sizeof(*rec)+rec->rec_len, 0))
108 goto corrupt;
109
110 /* Check tailer. */
111 if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
112 &tailer) == -1)
113 goto corrupt;
114 if (tailer != sizeof(*rec) + rec->rec_len) {
115 TDB_LOG((tdb, TDB_DEBUG_ERROR,
116 "Record offset %d invalid tailer\n", off));
117 goto corrupt;
118 }
119
120 return true;
121
122corrupt:
123 tdb->ecode = TDB_ERR_CORRUPT;
124 return false;
125}
126
127/* Grab some bytes: may copy if can't use mmap.
128 Caller has already done bounds check. */
129static TDB_DATA get_bytes(struct tdb_context *tdb,
130 tdb_off_t off, tdb_len_t len)
131{
132 TDB_DATA d;
133
134 d.dsize = len;
135
136 if (tdb->transaction == NULL && tdb->map_ptr != NULL)
137 d.dptr = (unsigned char *)tdb->map_ptr + off;
138 else
139 d.dptr = tdb_alloc_read(tdb, off, d.dsize);
140 return d;
141}
142
143/* Frees data if we're not able to simply use mmap. */
144static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
145{
146 if (tdb->transaction == NULL && tdb->map_ptr != NULL)
147 return;
148 free(d.dptr);
149}
150
151/* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
152 * See: http://burtleburtle.net/bob/c/lookup3.c
153 */
154#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
155static void hash(uint32_t key, uint32_t *pc, uint32_t *pb)
156{
157 uint32_t a,b,c;
158
159 /* Set up the internal state */
160 a = b = c = 0xdeadbeef + *pc;
161 c += *pb;
162 a += key;
163 c ^= b; c -= rot(b,14);
164 a ^= c; a -= rot(c,11);
165 b ^= a; b -= rot(a,25);
166 c ^= b; c -= rot(b,16);
167 a ^= c; a -= rot(c,4);
168 b ^= a; b -= rot(a,14);
169 c ^= b; c -= rot(b,24);
170 *pc=c; *pb=b;
171}
172
173/*
174 We want to check that all free records are in the free list
175 (only once), and all free list entries are free records. Similarly
176 for each hash chain of used records.
177
178 Doing that naively (without walking hash chains, since we want to be
179 linear) means keeping a list of records which have been seen in each
180 hash chain, and another of records pointed to (ie. next pointers
181 from records and the initial hash chain heads). These two lists
182 should be equal. This will take 8 bytes per record, and require
183 sorting at the end.
184
185 So instead, we record each offset in a bitmap such a way that
186 recording it twice will cancel out. Since each offset should appear
187 exactly twice, the bitmap should be zero at the end.
188
189 The approach was inspired by Bloom Filters (see Wikipedia). For
190 each value, we flip K bits in a bitmap of size N. The number of
191 distinct arrangements is:
192
193 N! / (K! * (N-K)!)
194
195 Of course, not all arrangements are actually distinct, but testing
196 shows this formula to be close enough.
197
198 So, if K == 8 and N == 256, the probability of two things flipping the same
199 bits is 1 in 409,663,695,276,000.
200
201 Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
202 (320k) seems reasonable.
203*/
204#define NUM_HASHES 8
205#define BITMAP_BITS 256
206
207static void bit_flip(unsigned char bits[], unsigned int idx)
208{
209 bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
210}
211
212/* We record offsets in a bitmap for the particular chain it should be in. */
213static void record_offset(unsigned char bits[], tdb_off_t off)
214{
215 uint32_t h1 = off, h2 = 0;
216 unsigned int i;
217
218 /* We get two good hash values out of jhash2, so we use both. Then
219 * we keep going to produce further hash values. */
220 for (i = 0; i < NUM_HASHES / 2; i++) {
221 hash(off, &h1, &h2);
222 bit_flip(bits, h1 % BITMAP_BITS);
223 bit_flip(bits, h2 % BITMAP_BITS);
224 h2++;
225 }
226}
227
228/* Check that an in-use record is valid. */
229static bool tdb_check_used_record(struct tdb_context *tdb,
230 tdb_off_t off,
231 const struct tdb_record *rec,
232 unsigned char **hashes,
233 int (*check)(TDB_DATA, TDB_DATA, void *),
234 void *private_data)
235{
236 TDB_DATA key, data;
237
238 if (!tdb_check_record(tdb, off, rec))
239 return false;
240
241 /* key + data + tailer must fit in record */
242 if (rec->key_len + rec->data_len + sizeof(tdb_off_t) > rec->rec_len) {
243 TDB_LOG((tdb, TDB_DEBUG_ERROR,
244 "Record offset %d too short for contents\n", off));
245 return false;
246 }
247
248 key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
249 if (!key.dptr)
250 return false;
251
252 if (tdb->hash_fn(&key) != rec->full_hash) {
253 TDB_LOG((tdb, TDB_DEBUG_ERROR,
254 "Record offset %d has incorrect hash\n", off));
255 goto fail_put_key;
256 }
257
258 /* Mark this offset as a known value for this hash bucket. */
259 record_offset(hashes[BUCKET(rec->full_hash)+1], off);
260 /* And similarly if the next pointer is valid. */
261 if (rec->next)
262 record_offset(hashes[BUCKET(rec->full_hash)+1], rec->next);
263
264 /* If they supply a check function and this record isn't dead,
265 get data and feed it. */
266 if (check && rec->magic != TDB_DEAD_MAGIC) {
267 data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
268 rec->data_len);
269 if (!data.dptr)
270 goto fail_put_key;
271
272 if (check(key, data, private_data) == -1)
273 goto fail_put_data;
274 put_bytes(tdb, data);
275 }
276
277 put_bytes(tdb, key);
278 return true;
279
280fail_put_data:
281 put_bytes(tdb, data);
282fail_put_key:
283 put_bytes(tdb, key);
284 return false;
285}
286
287/* Check that an unused record is valid. */
288static bool tdb_check_free_record(struct tdb_context *tdb,
289 tdb_off_t off,
290 const struct tdb_record *rec,
291 unsigned char **hashes)
292{
293 if (!tdb_check_record(tdb, off, rec))
294 return false;
295
296 /* Mark this offset as a known value for the free list. */
297 record_offset(hashes[0], off);
298 /* And similarly if the next pointer is valid. */
299 if (rec->next)
300 record_offset(hashes[0], rec->next);
301 return true;
302}
303
304int tdb_check(struct tdb_context *tdb,
305 int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
306 void *private_data)
307{
308 unsigned int h;
309 unsigned char **hashes;
310 tdb_off_t off, recovery_start;
311 struct tdb_record rec;
312 bool found_recovery = false;
313
314 if (tdb_lockall(tdb) == -1)
315 return -1;
316
317 /* Make sure we know true size of the underlying file. */
318 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
319
320 /* Header must be OK: also gets us the recovery ptr, if any. */
321 if (!tdb_check_header(tdb, &recovery_start))
322 goto unlock;
323
324 /* We should have the whole header, too. */
325 if (tdb->map_size < TDB_DATA_START(tdb->header.hash_size)) {
326 tdb->ecode = TDB_ERR_CORRUPT;
327 TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
328 goto unlock;
329 }
330
331 /* One big malloc: pointers then bit arrays. */
332 hashes = (unsigned char **)calloc(
333 1, sizeof(hashes[0]) * (1+tdb->header.hash_size)
334 + BITMAP_BITS / CHAR_BIT * (1+tdb->header.hash_size));
335 if (!hashes) {
336 tdb->ecode = TDB_ERR_OOM;
337 goto unlock;
338 }
339
340 /* Initialize pointers */
341 hashes[0] = (unsigned char *)(&hashes[1+tdb->header.hash_size]);
342 for (h = 1; h < 1+tdb->header.hash_size; h++)
343 hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
344
345 /* Freelist and hash headers are all in a row: read them. */
346 for (h = 0; h < 1+tdb->header.hash_size; h++) {
347 if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
348 &off) == -1)
349 goto free;
350 if (off)
351 record_offset(hashes[h], off);
352 }
353
354 /* For each record, read it in and check it's ok. */
355 for (off = TDB_DATA_START(tdb->header.hash_size);
356 off < tdb->map_size;
357 off += sizeof(rec) + rec.rec_len) {
358 if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
359 DOCONV()) == -1)
360 goto free;
361 switch (rec.magic) {
362 case TDB_MAGIC:
363 case TDB_DEAD_MAGIC:
364 if (!tdb_check_used_record(tdb, off, &rec, hashes,
365 check, private_data))
366 goto free;
367 break;
368 case TDB_FREE_MAGIC:
369 if (!tdb_check_free_record(tdb, off, &rec, hashes))
370 goto free;
371 break;
372 case TDB_RECOVERY_MAGIC:
373 case 0: /* Used for invalid (or in-progress) recovery area. */
374 if (recovery_start != off) {
375 TDB_LOG((tdb, TDB_DEBUG_ERROR,
376 "Unexpected recovery record at offset %d\n",
377 off));
378 goto free;
379 }
380 found_recovery = true;
381 break;
382 default:
383 tdb->ecode = TDB_ERR_CORRUPT;
384 TDB_LOG((tdb, TDB_DEBUG_ERROR,
385 "Bad magic 0x%x at offset %d\n",
386 rec.magic, off));
387 goto free;
388 }
389 }
390
391 /* Now, hashes should all be empty: each record exists and is referred
392 * to by one other. */
393 for (h = 0; h < 1+tdb->header.hash_size; h++) {
394 unsigned int i;
395 for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
396 if (hashes[h][i] != 0) {
397 tdb->ecode = TDB_ERR_CORRUPT;
398 TDB_LOG((tdb, TDB_DEBUG_ERROR,
399 "Hashes do not match records\n"));
400 goto free;
401 }
402 }
403 }
404
405 /* We must have found recovery area if there was one. */
406 if (recovery_start != 0 && !found_recovery) {
407 TDB_LOG((tdb, TDB_DEBUG_ERROR,
408 "Expected %s recovery area, got %s\n",
409 recovery_start ? "a" : "no",
410 found_recovery ? "one" : "none"));
411 goto free;
412 }
413
414 free(hashes);
415 tdb_unlockall(tdb);
416 return 0;
417
418free:
419 free(hashes);
420unlock:
421 tdb_unlockall(tdb);
422 return -1;
423}
Note: See TracBrowser for help on using the repository browser.