Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

check.c

Visit:

Last change on this file was 987, checked in by Silvan Scherrer, 9 years ago
samba server: fix crlf in tdb trunk code
File size: 12.6 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3
4	trivial database library
5
6	Copyright (C) Rusty Russell 2009
7
8	** NOTE! The following LGPL license applies to the tdb
9	** library. This does NOT imply that all of Samba is released
10	** under the LGPL
11
12	This library is free software; you can redistribute it and/or
13	modify it under the terms of the GNU Lesser General Public
14	License as published by the Free Software Foundation; either
15	version 3 of the License, or (at your option) any later version.
16
17	This library is distributed in the hope that it will be useful,
18	but WITHOUT ANY WARRANTY; without even the implied warranty of
19	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	Lesser General Public License for more details.
21
22	You should have received a copy of the GNU Lesser General Public
23	License along with this library; if not, see <http://www.gnu.org/licenses/>.
24	*/
25	#include "tdb_private.h"
26
27	/* Since we opened it, these shouldn't fail unless it's recent corruption. */
28	static bool tdb_check_header(struct tdb_context tdb, tdb_off_t recovery)
29	{
30	struct tdb_header hdr;
31	uint32_t h1, h2;
32
33	if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), 0) == -1)
34	return false;
35	if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
36	goto corrupt;
37
38	CONVERT(hdr);
39	if (hdr.version != TDB_VERSION)
40	goto corrupt;
41
42	if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
43	goto corrupt;
44
45	tdb_header_hash(tdb, &h1, &h2);
46	if (hdr.magic1_hash && hdr.magic2_hash &&
47	(hdr.magic1_hash != h1 \|\| hdr.magic2_hash != h2))
48	goto corrupt;
49
50	if (hdr.hash_size == 0)
51	goto corrupt;
52
53	if (hdr.hash_size != tdb->header.hash_size)
54	goto corrupt;
55
56	if (hdr.recovery_start != 0 &&
57	hdr.recovery_start < TDB_DATA_START(tdb->header.hash_size))
58	goto corrupt;
59
60	*recovery = hdr.recovery_start;
61	return true;
62
63	corrupt:
64	tdb->ecode = TDB_ERR_CORRUPT;
65	TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
66	return false;
67	}
68
69	/* Generic record header check. */
70	static bool tdb_check_record(struct tdb_context *tdb,
71	tdb_off_t off,
72	const struct tdb_record *rec)
73	{
74	tdb_off_t tailer;
75
76	/* Check rec->next: 0 or points to record offset, aligned. */
77	if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->header.hash_size)){
78	TDB_LOG((tdb, TDB_DEBUG_ERROR,
79	"Record offset %d too small next %d\n",
80	off, rec->next));
81	goto corrupt;
82	}
83	if (rec->next + sizeof(*rec) < rec->next) {
84	TDB_LOG((tdb, TDB_DEBUG_ERROR,
85	"Record offset %d too large next %d\n",
86	off, rec->next));
87	goto corrupt;
88	}
89	if ((rec->next % TDB_ALIGNMENT) != 0) {
90	TDB_LOG((tdb, TDB_DEBUG_ERROR,
91	"Record offset %d misaligned next %d\n",
92	off, rec->next));
93	goto corrupt;
94	}
95	if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0))
96	goto corrupt;
97
98	/* Check rec_len: similar to rec->next, implies next record. */
99	if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
100	TDB_LOG((tdb, TDB_DEBUG_ERROR,
101	"Record offset %d misaligned length %d\n",
102	off, rec->rec_len));
103	goto corrupt;
104	}
105	/* Must fit tailer. */
106	if (rec->rec_len < sizeof(tailer)) {
107	TDB_LOG((tdb, TDB_DEBUG_ERROR,
108	"Record offset %d too short length %d\n",
109	off, rec->rec_len));
110	goto corrupt;
111	}
112	/* OOB allows "right at the end" access, so this works for last rec. */
113	if (tdb->methods->tdb_oob(tdb, off+sizeof(*rec)+rec->rec_len, 0))
114	goto corrupt;
115
116	/* Check tailer. */
117	if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
118	&tailer) == -1)
119	goto corrupt;
120	if (tailer != sizeof(*rec) + rec->rec_len) {
121	TDB_LOG((tdb, TDB_DEBUG_ERROR,
122	"Record offset %d invalid tailer\n", off));
123	goto corrupt;
124	}
125
126	return true;
127
128	corrupt:
129	tdb->ecode = TDB_ERR_CORRUPT;
130	return false;
131	}
132
133	/* Grab some bytes: may copy if can't use mmap.
134	Caller has already done bounds check. */
135	static TDB_DATA get_bytes(struct tdb_context *tdb,
136	tdb_off_t off, tdb_len_t len)
137	{
138	TDB_DATA d;
139
140	d.dsize = len;
141
142	if (tdb->transaction == NULL && tdb->map_ptr != NULL)
143	d.dptr = (unsigned char *)tdb->map_ptr + off;
144	else
145	d.dptr = tdb_alloc_read(tdb, off, d.dsize);
146	return d;
147	}
148
149	/* Frees data if we're not able to simply use mmap. */
150	static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
151	{
152	if (tdb->transaction == NULL && tdb->map_ptr != NULL)
153	return;
154	free(d.dptr);
155	}
156
157	/* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
158	* See: http://burtleburtle.net/bob/c/lookup3.c
159	*/
160	#define rot(x,k) (((x)<<(k)) \| ((x)>>(32-(k))))
161	static void hash(uint32_t key, uint32_t pc, uint32_t pb)
162	{
163	uint32_t a,b,c;
164
165	/* Set up the internal state */
166	a = b = c = 0xdeadbeef + *pc;
167	c += *pb;
168	a += key;
169	c ^= b; c -= rot(b,14);
170	a ^= c; a -= rot(c,11);
171	b ^= a; b -= rot(a,25);
172	c ^= b; c -= rot(b,16);
173	a ^= c; a -= rot(c,4);
174	b ^= a; b -= rot(a,14);
175	c ^= b; c -= rot(b,24);
176	pc=c; pb=b;
177	}
178
179	/*
180	We want to check that all free records are in the free list
181	(only once), and all free list entries are free records. Similarly
182	for each hash chain of used records.
183
184	Doing that naively (without walking hash chains, since we want to be
185	linear) means keeping a list of records which have been seen in each
186	hash chain, and another of records pointed to (ie. next pointers
187	from records and the initial hash chain heads). These two lists
188	should be equal. This will take 8 bytes per record, and require
189	sorting at the end.
190
191	So instead, we record each offset in a bitmap such a way that
192	recording it twice will cancel out. Since each offset should appear
193	exactly twice, the bitmap should be zero at the end.
194
195	The approach was inspired by Bloom Filters (see Wikipedia). For
196	each value, we flip K bits in a bitmap of size N. The number of
197	distinct arrangements is:
198
199	N! / (K! * (N-K)!)
200
201	Of course, not all arrangements are actually distinct, but testing
202	shows this formula to be close enough.
203
204	So, if K == 8 and N == 256, the probability of two things flipping the same
205	bits is 1 in 409,663,695,276,000.
206
207	Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
208	(320k) seems reasonable.
209	*/
210	#define NUM_HASHES 8
211	#define BITMAP_BITS 256
212
213	static void bit_flip(unsigned char bits[], unsigned int idx)
214	{
215	bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
216	}
217
218	/* We record offsets in a bitmap for the particular chain it should be in. */
219	static void record_offset(unsigned char bits[], tdb_off_t off)
220	{
221	uint32_t h1 = off, h2 = 0;
222	unsigned int i;
223
224	/* We get two good hash values out of jhash2, so we use both. Then
225	* we keep going to produce further hash values. */
226	for (i = 0; i < NUM_HASHES / 2; i++) {
227	hash(off, &h1, &h2);
228	bit_flip(bits, h1 % BITMAP_BITS);
229	bit_flip(bits, h2 % BITMAP_BITS);
230	h2++;
231	}
232	}
233
234	/* Check that an in-use record is valid. */
235	static bool tdb_check_used_record(struct tdb_context *tdb,
236	tdb_off_t off,
237	const struct tdb_record *rec,
238	unsigned char **hashes,
239	int (check)(TDB_DATA, TDB_DATA, void ),
240	void *private_data)
241	{
242	TDB_DATA key, data;
243
244	if (!tdb_check_record(tdb, off, rec))
245	return false;
246
247	/* key + data + tailer must fit in record */
248	if (rec->key_len + rec->data_len + sizeof(tdb_off_t) > rec->rec_len) {
249	TDB_LOG((tdb, TDB_DEBUG_ERROR,
250	"Record offset %d too short for contents\n", off));
251	return false;
252	}
253
254	key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
255	if (!key.dptr)
256	return false;
257
258	if (tdb->hash_fn(&key) != rec->full_hash) {
259	TDB_LOG((tdb, TDB_DEBUG_ERROR,
260	"Record offset %d has incorrect hash\n", off));
261	goto fail_put_key;
262	}
263
264	/* Mark this offset as a known value for this hash bucket. */
265	record_offset(hashes[BUCKET(rec->full_hash)+1], off);
266	/* And similarly if the next pointer is valid. */
267	if (rec->next)
268	record_offset(hashes[BUCKET(rec->full_hash)+1], rec->next);
269
270	/* If they supply a check function and this record isn't dead,
271	get data and feed it. */
272	if (check && rec->magic != TDB_DEAD_MAGIC) {
273	data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
274	rec->data_len);
275	if (!data.dptr)
276	goto fail_put_key;
277
278	if (check(key, data, private_data) == -1)
279	goto fail_put_data;
280	put_bytes(tdb, data);
281	}
282
283	put_bytes(tdb, key);
284	return true;
285
286	fail_put_data:
287	put_bytes(tdb, data);
288	fail_put_key:
289	put_bytes(tdb, key);
290	return false;
291	}
292
293	/* Check that an unused record is valid. */
294	static bool tdb_check_free_record(struct tdb_context *tdb,
295	tdb_off_t off,
296	const struct tdb_record *rec,
297	unsigned char **hashes)
298	{
299	if (!tdb_check_record(tdb, off, rec))
300	return false;
301
302	/* Mark this offset as a known value for the free list. */
303	record_offset(hashes[0], off);
304	/* And similarly if the next pointer is valid. */
305	if (rec->next)
306	record_offset(hashes[0], rec->next);
307	return true;
308	}
309
310	/* Slow, but should be very rare. */
311	size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off)
312	{
313	size_t len;
314
315	for (len = 0; off + len < tdb->map_size; len++) {
316	char c;
317	if (tdb->methods->tdb_read(tdb, off, &c, 1, 0))
318	return 0;
319	if (c != 0 && c != 0x42)
320	break;
321	}
322	return len;
323	}
324
325	_PUBLIC_ int tdb_check(struct tdb_context *tdb,
326	int (check)(TDB_DATA key, TDB_DATA data, void private_data),
327	void *private_data)
328	{
329	unsigned int h;
330	unsigned char **hashes;
331	tdb_off_t off, recovery_start;
332	struct tdb_record rec;
333	bool found_recovery = false;
334	tdb_len_t dead;
335	bool locked;
336
337	/* Read-only databases use no locking at all: it's best-effort.
338	* We may have a write lock already, so skip that case too. */
339	if (tdb->read_only \|\| tdb->allrecord_lock.count != 0) {
340	locked = false;
341	} else {
342	if (tdb_lockall_read(tdb) == -1)
343	return -1;
344	locked = true;
345	}
346
347	/* Make sure we know true size of the underlying file. */
348	tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
349
350	/* Header must be OK: also gets us the recovery ptr, if any. */
351	if (!tdb_check_header(tdb, &recovery_start))
352	goto unlock;
353
354	/* We should have the whole header, too. */
355	if (tdb->map_size < TDB_DATA_START(tdb->header.hash_size)) {
356	tdb->ecode = TDB_ERR_CORRUPT;
357	TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
358	goto unlock;
359	}
360
361	/* One big malloc: pointers then bit arrays. */
362	hashes = (unsigned char **)calloc(
363	1, sizeof(hashes[0]) * (1+tdb->header.hash_size)
364	+ BITMAP_BITS / CHAR_BIT * (1+tdb->header.hash_size));
365	if (!hashes) {
366	tdb->ecode = TDB_ERR_OOM;
367	goto unlock;
368	}
369
370	/* Initialize pointers */
371	hashes[0] = (unsigned char *)(&hashes[1+tdb->header.hash_size]);
372	for (h = 1; h < 1+tdb->header.hash_size; h++)
373	hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
374
375	/* Freelist and hash headers are all in a row: read them. */
376	for (h = 0; h < 1+tdb->header.hash_size; h++) {
377	if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
378	&off) == -1)
379	goto free;
380	if (off)
381	record_offset(hashes[h], off);
382	}
383
384	/* For each record, read it in and check it's ok. */
385	for (off = TDB_DATA_START(tdb->header.hash_size);
386	off < tdb->map_size;
387	off += sizeof(rec) + rec.rec_len) {
388	if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
389	DOCONV()) == -1)
390	goto free;
391	switch (rec.magic) {
392	case TDB_MAGIC:
393	case TDB_DEAD_MAGIC:
394	if (!tdb_check_used_record(tdb, off, &rec, hashes,
395	check, private_data))
396	goto free;
397	break;
398	case TDB_FREE_MAGIC:
399	if (!tdb_check_free_record(tdb, off, &rec, hashes))
400	goto free;
401	break;
402	/* If we crash after ftruncate, we can get zeroes or fill. */
403	case TDB_RECOVERY_INVALID_MAGIC:
404	case 0x42424242:
405	if (recovery_start == off) {
406	found_recovery = true;
407	break;
408	}
409	dead = tdb_dead_space(tdb, off);
410	if (dead < sizeof(rec))
411	goto corrupt;
412
413	TDB_LOG((tdb, TDB_DEBUG_ERROR,
414	"Dead space at %d-%d (of %u)\n",
415	off, off + dead, tdb->map_size));
416	rec.rec_len = dead - sizeof(rec);
417	break;
418	case TDB_RECOVERY_MAGIC:
419	if (recovery_start != off) {
420	TDB_LOG((tdb, TDB_DEBUG_ERROR,
421	"Unexpected recovery record at offset %d\n",
422	off));
423	goto free;
424	}
425	found_recovery = true;
426	break;
427	default: ;
428	corrupt:
429	tdb->ecode = TDB_ERR_CORRUPT;
430	TDB_LOG((tdb, TDB_DEBUG_ERROR,
431	"Bad magic 0x%x at offset %d\n",
432	rec.magic, off));
433	goto free;
434	}
435	}
436
437	/* Now, hashes should all be empty: each record exists and is referred
438	* to by one other. */
439	for (h = 0; h < 1+tdb->header.hash_size; h++) {
440	unsigned int i;
441	for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
442	if (hashes[h][i] != 0) {
443	tdb->ecode = TDB_ERR_CORRUPT;
444	TDB_LOG((tdb, TDB_DEBUG_ERROR,
445	"Hashes do not match records\n"));
446	goto free;
447	}
448	}
449	}
450
451	/* We must have found recovery area if there was one. */
452	if (recovery_start != 0 && !found_recovery) {
453	TDB_LOG((tdb, TDB_DEBUG_ERROR,
454	"Expected a recovery area at %u\n",
455	recovery_start));
456	goto free;
457	}
458
459	free(hashes);
460	if (locked) {
461	tdb_unlockall_read(tdb);
462	}
463	return 0;
464
465	free:
466	free(hashes);
467	unlock:
468	if (locked) {
469	tdb_unlockall_read(tdb);
470	}
471	return -1;
472	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/server/lib/tdb/common/check.c

Download in other formats: