source: vendor/current/lib/tdb/common/check.c

Last change on this file was 988, checked in by Silvan Scherrer, 9 years ago

Samba Server: update vendor to version 4.4.3

File size: 12.6 KB
Line 
1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Rusty Russell 2009
7
8 ** NOTE! The following LGPL license applies to the tdb
9 ** library. This does NOT imply that all of Samba is released
10 ** under the LGPL
11
12 This library is free software; you can redistribute it and/or
13 modify it under the terms of the GNU Lesser General Public
14 License as published by the Free Software Foundation; either
15 version 3 of the License, or (at your option) any later version.
16
17 This library is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 Lesser General Public License for more details.
21
22 You should have received a copy of the GNU Lesser General Public
23 License along with this library; if not, see <http://www.gnu.org/licenses/>.
24*/
25#include "tdb_private.h"
26
27/* Since we opened it, these shouldn't fail unless it's recent corruption. */
28static bool tdb_check_header(struct tdb_context *tdb, tdb_off_t *recovery)
29{
30 struct tdb_header hdr;
31 uint32_t h1, h2;
32
33 if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1)
34 return false;
35 if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
36 goto corrupt;
37
38 CONVERT(hdr);
39 if (hdr.version != TDB_VERSION)
40 goto corrupt;
41
42 if (hdr.rwlocks != 0 &&
43 hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC &&
44 hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
45 goto corrupt;
46
47 tdb_header_hash(tdb, &h1, &h2);
48 if (hdr.magic1_hash && hdr.magic2_hash &&
49 (hdr.magic1_hash != h1 || hdr.magic2_hash != h2))
50 goto corrupt;
51
52 if (hdr.hash_size == 0)
53 goto corrupt;
54
55 if (hdr.hash_size != tdb->hash_size)
56 goto corrupt;
57
58 if (hdr.recovery_start != 0 &&
59 hdr.recovery_start < TDB_DATA_START(tdb->hash_size))
60 goto corrupt;
61
62 *recovery = hdr.recovery_start;
63 return true;
64
65corrupt:
66 tdb->ecode = TDB_ERR_CORRUPT;
67 TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
68 return false;
69}
70
71/* Generic record header check. */
72static bool tdb_check_record(struct tdb_context *tdb,
73 tdb_off_t off,
74 const struct tdb_record *rec)
75{
76 tdb_off_t tailer;
77
78 /* Check rec->next: 0 or points to record offset, aligned. */
79 if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size)){
80 TDB_LOG((tdb, TDB_DEBUG_ERROR,
81 "Record offset %u too small next %u\n",
82 off, rec->next));
83 goto corrupt;
84 }
85 if (rec->next + sizeof(*rec) < rec->next) {
86 TDB_LOG((tdb, TDB_DEBUG_ERROR,
87 "Record offset %u too large next %u\n",
88 off, rec->next));
89 goto corrupt;
90 }
91 if ((rec->next % TDB_ALIGNMENT) != 0) {
92 TDB_LOG((tdb, TDB_DEBUG_ERROR,
93 "Record offset %u misaligned next %u\n",
94 off, rec->next));
95 goto corrupt;
96 }
97 if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0))
98 goto corrupt;
99
100 /* Check rec_len: similar to rec->next, implies next record. */
101 if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
102 TDB_LOG((tdb, TDB_DEBUG_ERROR,
103 "Record offset %u misaligned length %u\n",
104 off, rec->rec_len));
105 goto corrupt;
106 }
107 /* Must fit tailer. */
108 if (rec->rec_len < sizeof(tailer)) {
109 TDB_LOG((tdb, TDB_DEBUG_ERROR,
110 "Record offset %u too short length %u\n",
111 off, rec->rec_len));
112 goto corrupt;
113 }
114 /* OOB allows "right at the end" access, so this works for last rec. */
115 if (tdb->methods->tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
116 goto corrupt;
117
118 /* Check tailer. */
119 if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
120 &tailer) == -1)
121 goto corrupt;
122 if (tailer != sizeof(*rec) + rec->rec_len) {
123 TDB_LOG((tdb, TDB_DEBUG_ERROR,
124 "Record offset %u invalid tailer\n", off));
125 goto corrupt;
126 }
127
128 return true;
129
130corrupt:
131 tdb->ecode = TDB_ERR_CORRUPT;
132 return false;
133}
134
135/* Grab some bytes: may copy if can't use mmap.
136 Caller has already done bounds check. */
137static TDB_DATA get_bytes(struct tdb_context *tdb,
138 tdb_off_t off, tdb_len_t len)
139{
140 TDB_DATA d;
141
142 d.dsize = len;
143
144 if (tdb->transaction == NULL && tdb->map_ptr != NULL)
145 d.dptr = (unsigned char *)tdb->map_ptr + off;
146 else
147 d.dptr = tdb_alloc_read(tdb, off, d.dsize);
148 return d;
149}
150
151/* Frees data if we're not able to simply use mmap. */
152static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
153{
154 if (tdb->transaction == NULL && tdb->map_ptr != NULL)
155 return;
156 free(d.dptr);
157}
158
159/* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
160 * See: http://burtleburtle.net/bob/c/lookup3.c
161 */
162#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
163static void hash(uint32_t key, uint32_t *pc, uint32_t *pb)
164{
165 uint32_t a,b,c;
166
167 /* Set up the internal state */
168 a = b = c = 0xdeadbeef + *pc;
169 c += *pb;
170 a += key;
171 c ^= b; c -= rot(b,14);
172 a ^= c; a -= rot(c,11);
173 b ^= a; b -= rot(a,25);
174 c ^= b; c -= rot(b,16);
175 a ^= c; a -= rot(c,4);
176 b ^= a; b -= rot(a,14);
177 c ^= b; c -= rot(b,24);
178 *pc=c; *pb=b;
179}
180
181/*
182 We want to check that all free records are in the free list
183 (only once), and all free list entries are free records. Similarly
184 for each hash chain of used records.
185
186 Doing that naively (without walking hash chains, since we want to be
187 linear) means keeping a list of records which have been seen in each
188 hash chain, and another of records pointed to (ie. next pointers
189 from records and the initial hash chain heads). These two lists
190 should be equal. This will take 8 bytes per record, and require
191 sorting at the end.
192
193 So instead, we record each offset in a bitmap such a way that
194 recording it twice will cancel out. Since each offset should appear
195 exactly twice, the bitmap should be zero at the end.
196
197 The approach was inspired by Bloom Filters (see Wikipedia). For
198 each value, we flip K bits in a bitmap of size N. The number of
199 distinct arrangements is:
200
201 N! / (K! * (N-K)!)
202
203 Of course, not all arrangements are actually distinct, but testing
204 shows this formula to be close enough.
205
206 So, if K == 8 and N == 256, the probability of two things flipping the same
207 bits is 1 in 409,663,695,276,000.
208
209 Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
210 (320k) seems reasonable.
211*/
212#define NUM_HASHES 8
213#define BITMAP_BITS 256
214
215static void bit_flip(unsigned char bits[], unsigned int idx)
216{
217 bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
218}
219
220/* We record offsets in a bitmap for the particular chain it should be in. */
221static void record_offset(unsigned char bits[], tdb_off_t off)
222{
223 uint32_t h1 = off, h2 = 0;
224 unsigned int i;
225
226 /* We get two good hash values out of jhash2, so we use both. Then
227 * we keep going to produce further hash values. */
228 for (i = 0; i < NUM_HASHES / 2; i++) {
229 hash(off, &h1, &h2);
230 bit_flip(bits, h1 % BITMAP_BITS);
231 bit_flip(bits, h2 % BITMAP_BITS);
232 h2++;
233 }
234}
235
236/* Check that an in-use record is valid. */
237static bool tdb_check_used_record(struct tdb_context *tdb,
238 tdb_off_t off,
239 const struct tdb_record *rec,
240 unsigned char **hashes,
241 int (*check)(TDB_DATA, TDB_DATA, void *),
242 void *private_data)
243{
244 TDB_DATA key, data;
245
246 if (!tdb_check_record(tdb, off, rec))
247 return false;
248
249 /* key + data + tailer must fit in record */
250 if (rec->key_len + rec->data_len + sizeof(tdb_off_t) > rec->rec_len) {
251 TDB_LOG((tdb, TDB_DEBUG_ERROR,
252 "Record offset %u too short for contents\n", off));
253 return false;
254 }
255
256 key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
257 if (!key.dptr)
258 return false;
259
260 if (tdb->hash_fn(&key) != rec->full_hash) {
261 TDB_LOG((tdb, TDB_DEBUG_ERROR,
262 "Record offset %u has incorrect hash\n", off));
263 goto fail_put_key;
264 }
265
266 /* Mark this offset as a known value for this hash bucket. */
267 record_offset(hashes[BUCKET(rec->full_hash)+1], off);
268 /* And similarly if the next pointer is valid. */
269 if (rec->next)
270 record_offset(hashes[BUCKET(rec->full_hash)+1], rec->next);
271
272 /* If they supply a check function and this record isn't dead,
273 get data and feed it. */
274 if (check && rec->magic != TDB_DEAD_MAGIC) {
275 data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
276 rec->data_len);
277 if (!data.dptr)
278 goto fail_put_key;
279
280 if (check(key, data, private_data) == -1)
281 goto fail_put_data;
282 put_bytes(tdb, data);
283 }
284
285 put_bytes(tdb, key);
286 return true;
287
288fail_put_data:
289 put_bytes(tdb, data);
290fail_put_key:
291 put_bytes(tdb, key);
292 return false;
293}
294
295/* Check that an unused record is valid. */
296static bool tdb_check_free_record(struct tdb_context *tdb,
297 tdb_off_t off,
298 const struct tdb_record *rec,
299 unsigned char **hashes)
300{
301 if (!tdb_check_record(tdb, off, rec))
302 return false;
303
304 /* Mark this offset as a known value for the free list. */
305 record_offset(hashes[0], off);
306 /* And similarly if the next pointer is valid. */
307 if (rec->next)
308 record_offset(hashes[0], rec->next);
309 return true;
310}
311
312/* Slow, but should be very rare. */
313size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off)
314{
315 size_t len;
316
317 for (len = 0; off + len < tdb->map_size; len++) {
318 char c;
319 if (tdb->methods->tdb_read(tdb, off, &c, 1, 0))
320 return 0;
321 if (c != 0 && c != 0x42)
322 break;
323 }
324 return len;
325}
326
327_PUBLIC_ int tdb_check(struct tdb_context *tdb,
328 int (*check)(TDB_DATA key, TDB_DATA data, void *private_data),
329 void *private_data)
330{
331 unsigned int h;
332 unsigned char **hashes;
333 tdb_off_t off, recovery_start;
334 struct tdb_record rec;
335 bool found_recovery = false;
336 tdb_len_t dead;
337 bool locked;
338
339 /* Read-only databases use no locking at all: it's best-effort.
340 * We may have a write lock already, so skip that case too. */
341 if (tdb->read_only || tdb->allrecord_lock.count != 0) {
342 locked = false;
343 } else {
344 if (tdb_lockall_read(tdb) == -1)
345 return -1;
346 locked = true;
347 }
348
349 /* Make sure we know true size of the underlying file. */
350 tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
351
352 /* Header must be OK: also gets us the recovery ptr, if any. */
353 if (!tdb_check_header(tdb, &recovery_start))
354 goto unlock;
355
356 /* We should have the whole header, too. */
357 if (tdb->map_size < TDB_DATA_START(tdb->hash_size)) {
358 tdb->ecode = TDB_ERR_CORRUPT;
359 TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
360 goto unlock;
361 }
362
363 /* One big malloc: pointers then bit arrays. */
364 hashes = (unsigned char **)calloc(
365 1, sizeof(hashes[0]) * (1+tdb->hash_size)
366 + BITMAP_BITS / CHAR_BIT * (1+tdb->hash_size));
367 if (!hashes) {
368 tdb->ecode = TDB_ERR_OOM;
369 goto unlock;
370 }
371
372 /* Initialize pointers */
373 hashes[0] = (unsigned char *)(&hashes[1+tdb->hash_size]);
374 for (h = 1; h < 1+tdb->hash_size; h++)
375 hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
376
377 /* Freelist and hash headers are all in a row: read them. */
378 for (h = 0; h < 1+tdb->hash_size; h++) {
379 if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
380 &off) == -1)
381 goto free;
382 if (off)
383 record_offset(hashes[h], off);
384 }
385
386 /* For each record, read it in and check it's ok. */
387 for (off = TDB_DATA_START(tdb->hash_size);
388 off < tdb->map_size;
389 off += sizeof(rec) + rec.rec_len) {
390 if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
391 DOCONV()) == -1)
392 goto free;
393 switch (rec.magic) {
394 case TDB_MAGIC:
395 case TDB_DEAD_MAGIC:
396 if (!tdb_check_used_record(tdb, off, &rec, hashes,
397 check, private_data))
398 goto free;
399 break;
400 case TDB_FREE_MAGIC:
401 if (!tdb_check_free_record(tdb, off, &rec, hashes))
402 goto free;
403 break;
404 /* If we crash after ftruncate, we can get zeroes or fill. */
405 case TDB_RECOVERY_INVALID_MAGIC:
406 case 0x42424242:
407 if (recovery_start == off) {
408 found_recovery = true;
409 break;
410 }
411 dead = tdb_dead_space(tdb, off);
412 if (dead < sizeof(rec))
413 goto corrupt;
414
415 TDB_LOG((tdb, TDB_DEBUG_ERROR,
416 "Dead space at %u-%u (of %u)\n",
417 off, off + dead, tdb->map_size));
418 rec.rec_len = dead - sizeof(rec);
419 break;
420 case TDB_RECOVERY_MAGIC:
421 if (recovery_start != off) {
422 TDB_LOG((tdb, TDB_DEBUG_ERROR,
423 "Unexpected recovery record at offset %u\n",
424 off));
425 goto free;
426 }
427 found_recovery = true;
428 break;
429 default: ;
430 corrupt:
431 tdb->ecode = TDB_ERR_CORRUPT;
432 TDB_LOG((tdb, TDB_DEBUG_ERROR,
433 "Bad magic 0x%x at offset %u\n",
434 rec.magic, off));
435 goto free;
436 }
437 }
438
439 /* Now, hashes should all be empty: each record exists and is referred
440 * to by one other. */
441 for (h = 0; h < 1+tdb->hash_size; h++) {
442 unsigned int i;
443 for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
444 if (hashes[h][i] != 0) {
445 tdb->ecode = TDB_ERR_CORRUPT;
446 TDB_LOG((tdb, TDB_DEBUG_ERROR,
447 "Hashes do not match records\n"));
448 goto free;
449 }
450 }
451 }
452
453 /* We must have found recovery area if there was one. */
454 if (recovery_start != 0 && !found_recovery) {
455 TDB_LOG((tdb, TDB_DEBUG_ERROR,
456 "Expected a recovery area at %u\n",
457 recovery_start));
458 goto free;
459 }
460
461 free(hashes);
462 if (locked) {
463 tdb_unlockall_read(tdb);
464 }
465 return 0;
466
467free:
468 free(hashes);
469unlock:
470 if (locked) {
471 tdb_unlockall_read(tdb);
472 }
473 return -1;
474}
Note: See TracBrowser for help on using the repository browser.