source: trunk/server/lib/tdb/common/check.c

Last change on this file was 987, checked in by Silvan Scherrer, 9 years ago

samba server: fix crlf in tdb trunk code

File size: 12.6 KB
Line 
1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Rusty Russell 2009
7
8 ** NOTE! The following LGPL license applies to the tdb
9 ** library. This does NOT imply that all of Samba is released
10 ** under the LGPL
11
12 This library is free software; you can redistribute it and/or
13 modify it under the terms of the GNU Lesser General Public
14 License as published by the Free Software Foundation; either
15 version 3 of the License, or (at your option) any later version.
16
17 This library is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Lesser General Public License for more details.
21
22 You should have received a copy of the GNU Lesser General Public
23 License along with this library; if not, see <http://www.gnu.org/licenses/>.
24*/
25#include "tdb_private.h"
26
27/* Since we opened it, these shouldn't fail unless it's recent corruption. */
28static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
29{
30 struct tdb_header hdr;
31 uint32_t h1, h2;
32
33 if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1)
34 return false;
35 if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
36 goto corrupt;
37
38 CONVERT(hdr);
39 if (hdr.version != TDB_VERSION)
40 goto corrupt;
41
42 if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
43 goto corrupt;
44
45 tdb_header_hash(tdb, &h1, &h2);
46 if (hdr.magic1_hash && hdr.magic2_hash &&
47 (hdr.magic1_hash != h1 || hdr.magic2_hash != h2))
48 goto corrupt;
49
50 if (hdr.hash_size == 0)
51 goto corrupt;
52
53 if (hdr.hash_size != tdb->header.hash_size)
54 goto corrupt;
55
56 if (hdr.recovery_start != 0 &&
57 hdr.recovery_start < TDB_DATA_START(tdb->header.hash_size))
58 goto corrupt;
59
60 *recovery = hdr.recovery_start;
61 return true;
62
63corrupt:
64 tdb->ecode = TDB_ERR_CORRUPT;
65 TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
66 return false;
67}
68
69/* Generic record header check. */
70static bool tdb_check_record(struct tdb_context *tdb,
71 tdb_off_t off,
72 const struct tdb_record *rec)
73{
74 tdb_off_t tailer;
75
76 /* Check rec->next: 0 or points to record offset, aligned. */
77 if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->header.hash_size)){
78 TDB_LOG((tdb, TDB_DEBUG_ERROR,
79 "Record offset %d too small next %d\n",
80 off, rec->next));
81 goto corrupt;
82 }
83 if (rec->next + sizeof(*rec) < rec->next) {
84 TDB_LOG((tdb, TDB_DEBUG_ERROR,
85 "Record offset %d too large next %d\n",
86 off, rec->next));
87 goto corrupt;
88 }
89 if ((rec->next % TDB_ALIGNMENT) != 0) {
90 TDB_LOG((tdb, TDB_DEBUG_ERROR,
91 "Record offset %d misaligned next %d\n",
92 off, rec->next));
93 goto corrupt;
94 }
95 if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0))
96 goto corrupt;
97
98 /* Check rec_len: similar to rec->next, implies next record. */
99 if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
100 TDB_LOG((tdb, TDB_DEBUG_ERROR,
101 "Record offset %d misaligned length %d\n",
102 off, rec->rec_len));
103 goto corrupt;
104 }
105 /* Must fit tailer. */
106 if (rec->rec_len < sizeof(tailer)) {
107 TDB_LOG((tdb, TDB_DEBUG_ERROR,
108 "Record offset %d too short length %d\n",
109 off, rec->rec_len));
110 goto corrupt;
111 }
112 /* OOB allows "right at the end" access, so this works for last rec. */
113 if (tdb->methods->tdb_oob(tdb, off+sizeof(*rec)+rec->rec_len, 0))
114 goto corrupt;
115
116 /* Check tailer. */
117 if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
118 &tailer) == -1)
119 goto corrupt;
120 if (tailer != sizeof(*rec) + rec->rec_len) {
121 TDB_LOG((tdb, TDB_DEBUG_ERROR,
122 "Record offset %d invalid tailer\n", off));
123 goto corrupt;
124 }
125
126 return true;
127
128corrupt:
129 tdb->ecode = TDB_ERR_CORRUPT;
130 return false;
131}
132
133/* Grab some bytes: may copy if can't use mmap.
134 Caller has already done bounds check. */
135static TDB_DATA get_bytes(struct tdb_context *tdb,
136 tdb_off_t off, tdb_len_t len)
137{
138 TDB_DATA d;
139
140 d.dsize = len;
141
142 if (tdb->transaction == NULL && tdb->map_ptr != NULL)
143 d.dptr = (unsigned char *)tdb->map_ptr + off;
144 else
145 d.dptr = tdb_alloc_read(tdb, off, d.dsize);
146 return d;
147}
148
149/* Frees data if we're not able to simply use mmap. */
150static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
151{
152 if (tdb->transaction == NULL && tdb->map_ptr != NULL)
153 return;
154 free(d.dptr);
155}
156
157/* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
158 * See: http://burtleburtle.net/bob/c/lookup3.c
159 */
160#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
161static void hash(uint32_t key, uint32_t *pc, uint32_t *pb)
162{
163 uint32_t a,b,c;
164
165 /* Set up the internal state */
166 a = b = c = 0xdeadbeef + *pc;
167 c += *pb;
168 a += key;
169 c ^= b; c -= rot(b,14);
170 a ^= c; a -= rot(c,11);
171 b ^= a; b -= rot(a,25);
172 c ^= b; c -= rot(b,16);
173 a ^= c; a -= rot(c,4);
174 b ^= a; b -= rot(a,14);
175 c ^= b; c -= rot(b,24);
176 *pc=c; *pb=b;
177}
178
179/*
180 We want to check that all free records are in the free list
181 (only once), and all free list entries are free records. Similarly
182 for each hash chain of used records.
183
184 Doing that naively (without walking hash chains, since we want to be
185 linear) means keeping a list of records which have been seen in each
186 hash chain, and another of records pointed to (ie. next pointers
187 from records and the initial hash chain heads). These two lists
188 should be equal. This will take 8 bytes per record, and require
189 sorting at the end.
190
191 So instead, we record each offset in a bitmap such a way that
192 recording it twice will cancel out. Since each offset should appear
193 exactly twice, the bitmap should be zero at the end.
194
195 The approach was inspired by Bloom Filters (see Wikipedia). For
196 each value, we flip K bits in a bitmap of size N. The number of
197 distinct arrangements is:
198
199 N! / (K! * (N-K)!)
200
201 Of course, not all arrangements are actually distinct, but testing
202 shows this formula to be close enough.
203
204 So, if K == 8 and N == 256, the probability of two things flipping the same
205 bits is 1 in 409,663,695,276,000.
206
207 Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
208 (320k) seems reasonable.
209*/
210#define NUM_HASHES 8
211#define BITMAP_BITS 256
212
213static void bit_flip(unsigned char bits[], unsigned int idx)
214{
215 bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
216}
217
218/* We record offsets in a bitmap for the particular chain it should be in. */
219static void record_offset(unsigned char bits[], tdb_off_t off)
220{
221 uint32_t h1 = off, h2 = 0;
222 unsigned int i;
223
224 /* We get two good hash values out of jhash2, so we use both. Then
225 * we keep going to produce further hash values. */
226 for (i = 0; i < NUM_HASHES / 2; i++) {
227 hash(off, &h1, &h2);
228 bit_flip(bits, h1 % BITMAP_BITS);
229 bit_flip(bits, h2 % BITMAP_BITS);
230 h2++;
231 }
232}
233
234/* Check that an in-use record is valid. */
235static bool tdb_check_used_record(struct tdb_context *tdb,
236 tdb_off_t off,
237 const struct tdb_record *rec,
238 unsigned char **hashes,
239 int (*check)(TDB_DATA, TDB_DATA, void *),
240 void *private_data)
241{
242 TDB_DATA key, data;
243
244 if (!tdb_check_record(tdb, off, rec))
245 return false;
246
247 /* key + data + tailer must fit in record */
248 if (rec->key_len + rec->data_len + sizeof(tdb_off_t) > rec->rec_len) {
249 TDB_LOG((tdb, TDB_DEBUG_ERROR,
250 "Record offset %d too short for contents\n", off));
251 return false;
252 }
253
254 key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
255 if (!key.dptr)
256 return false;
257
258 if (tdb->hash_fn(&key) != rec->full_hash) {
259 TDB_LOG((tdb, TDB_DEBUG_ERROR,
260 "Record offset %d has incorrect hash\n", off));
261 goto fail_put_key;
262 }
263
264 /* Mark this offset as a known value for this hash bucket. */
265 record_offset(hashes[BUCKET(rec->full_hash)+1], off);
266 /* And similarly if the next pointer is valid. */
267 if (rec->next)
268 record_offset(hashes[BUCKET(rec->full_hash)+1], rec->next);
269
270 /* If they supply a check function and this record isn't dead,
271 get data and feed it. */
272 if (check && rec->magic != TDB_DEAD_MAGIC) {
273 data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
274 rec->data_len);
275 if (!data.dptr)
276 goto fail_put_key;
277
278 if (check(key, data, private_data) == -1)
279 goto fail_put_data;
280 put_bytes(tdb, data);
281 }
282
283 put_bytes(tdb, key);
284 return true;
285
286fail_put_data:
287 put_bytes(tdb, data);
288fail_put_key:
289 put_bytes(tdb, key);
290 return false;
291}
292
293/* Check that an unused record is valid. */
294static bool tdb_check_free_record(struct tdb_context *tdb,
295 tdb_off_t off,
296 const struct tdb_record *rec,
297 unsigned char **hashes)
298{
299 if (!tdb_check_record(tdb, off, rec))
300 return false;
301
302 /* Mark this offset as a known value for the free list. */
303 record_offset(hashes[0], off);
304 /* And similarly if the next pointer is valid. */
305 if (rec->next)
306 record_offset(hashes[0], rec->next);
307 return true;
308}
309
310/* Slow, but should be very rare. */
311size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off)
312{
313 size_t len;
314
315 for (len = 0; off + len < tdb->map_size; len++) {
316 char c;
317 if (tdb->methods->tdb_read(tdb, off, &c, 1, 0))
318 return 0;
319 if (c != 0 && c != 0x42)
320 break;
321 }
322 return len;
323}
324
325_PUBLIC_ int tdb_check(struct tdb_context *tdb,
326 int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
327 void *private_data)
328{
329 unsigned int h;
330 unsigned char **hashes;
331 tdb_off_t off, recovery_start;
332 struct tdb_record rec;
333 bool found_recovery = false;
334 tdb_len_t dead;
335 bool locked;
336
337 /* Read-only databases use no locking at all: it's best-effort.
338 * We may have a write lock already, so skip that case too. */
339 if (tdb->read_only || tdb->allrecord_lock.count != 0) {
340 locked = false;
341 } else {
342 if (tdb_lockall_read(tdb) == -1)
343 return -1;
344 locked = true;
345 }
346
347 /* Make sure we know true size of the underlying file. */
348 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
349
350 /* Header must be OK: also gets us the recovery ptr, if any. */
351 if (!tdb_check_header(tdb, &recovery_start))
352 goto unlock;
353
354 /* We should have the whole header, too. */
355 if (tdb->map_size < TDB_DATA_START(tdb->header.hash_size)) {
356 tdb->ecode = TDB_ERR_CORRUPT;
357 TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
358 goto unlock;
359 }
360
361 /* One big malloc: pointers then bit arrays. */
362 hashes = (unsigned char **)calloc(
363 1, sizeof(hashes[0]) * (1+tdb->header.hash_size)
364 + BITMAP_BITS / CHAR_BIT * (1+tdb->header.hash_size));
365 if (!hashes) {
366 tdb->ecode = TDB_ERR_OOM;
367 goto unlock;
368 }
369
370 /* Initialize pointers */
371 hashes[0] = (unsigned char *)(&hashes[1+tdb->header.hash_size]);
372 for (h = 1; h < 1+tdb->header.hash_size; h++)
373 hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
374
375 /* Freelist and hash headers are all in a row: read them. */
376 for (h = 0; h < 1+tdb->header.hash_size; h++) {
377 if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
378 &off) == -1)
379 goto free;
380 if (off)
381 record_offset(hashes[h], off);
382 }
383
384 /* For each record, read it in and check it's ok. */
385 for (off = TDB_DATA_START(tdb->header.hash_size);
386 off < tdb->map_size;
387 off += sizeof(rec) + rec.rec_len) {
388 if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
389 DOCONV()) == -1)
390 goto free;
391 switch (rec.magic) {
392 case TDB_MAGIC:
393 case TDB_DEAD_MAGIC:
394 if (!tdb_check_used_record(tdb, off, &rec, hashes,
395 check, private_data))
396 goto free;
397 break;
398 case TDB_FREE_MAGIC:
399 if (!tdb_check_free_record(tdb, off, &rec, hashes))
400 goto free;
401 break;
402 /* If we crash after ftruncate, we can get zeroes or fill. */
403 case TDB_RECOVERY_INVALID_MAGIC:
404 case 0x42424242:
405 if (recovery_start == off) {
406 found_recovery = true;
407 break;
408 }
409 dead = tdb_dead_space(tdb, off);
410 if (dead < sizeof(rec))
411 goto corrupt;
412
413 TDB_LOG((tdb, TDB_DEBUG_ERROR,
414 "Dead space at %d-%d (of %u)\n",
415 off, off + dead, tdb->map_size));
416 rec.rec_len = dead - sizeof(rec);
417 break;
418 case TDB_RECOVERY_MAGIC:
419 if (recovery_start != off) {
420 TDB_LOG((tdb, TDB_DEBUG_ERROR,
421 "Unexpected recovery record at offset %d\n",
422 off));
423 goto free;
424 }
425 found_recovery = true;
426 break;
427 default: ;
428 corrupt:
429 tdb->ecode = TDB_ERR_CORRUPT;
430 TDB_LOG((tdb, TDB_DEBUG_ERROR,
431 "Bad magic 0x%x at offset %d\n",
432 rec.magic, off));
433 goto free;
434 }
435 }
436
437 /* Now, hashes should all be empty: each record exists and is referred
438 * to by one other. */
439 for (h = 0; h < 1+tdb->header.hash_size; h++) {
440 unsigned int i;
441 for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
442 if (hashes[h][i] != 0) {
443 tdb->ecode = TDB_ERR_CORRUPT;
444 TDB_LOG((tdb, TDB_DEBUG_ERROR,
445 "Hashes do not match records\n"));
446 goto free;
447 }
448 }
449 }
450
451 /* We must have found recovery area if there was one. */
452 if (recovery_start != 0 && !found_recovery) {
453 TDB_LOG((tdb, TDB_DEBUG_ERROR,
454 "Expected a recovery area at %u\n",
455 recovery_start));
456 goto free;
457 }
458
459 free(hashes);
460 if (locked) {
461 tdb_unlockall_read(tdb);
462 }
463 return 0;
464
465free:
466 free(hashes);
467unlock:
468 if (locked) {
469 tdb_unlockall_read(tdb);
470 }
471 return -1;
472}
Note: See TracBrowser for help on using the repository browser.