Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: branches/samba-3.5.x/lib/tdb/common/check.c

Visit:

Last change on this file was 414, checked in by Herwig Bauernfeind, 15 years ago
Samba 3.5.0: Initial import
File size: 11.5 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3
4	trivial database library
5
6	Copyright (C) Rusty Russell 2009
7
8	** NOTE! The following LGPL license applies to the tdb
9	** library. This does NOT imply that all of Samba is released
10	** under the LGPL
11
12	This library is free software; you can redistribute it and/or
13	modify it under the terms of the GNU Lesser General Public
14	License as published by the Free Software Foundation; either
15	version 3 of the License, or (at your option) any later version.
16
17	This library is distributed in the hope that it will be useful,
18	but WITHOUT ANY WARRANTY; without even the implied warranty of
19	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	Lesser General Public License for more details.
21
22	You should have received a copy of the GNU Lesser General Public
23	License along with this library; if not, see <http://www.gnu.org/licenses/>.
24	*/
25	#include "tdb_private.h"
26
27	/* Since we opened it, these shouldn't fail unless it's recent corruption. */
28	static bool tdb_check_header(struct tdb_context tdb, tdb_off_t recovery)
29	{
30	struct tdb_header hdr;
31
32	if (tdb->methods->tdb_read(tdb, 0, &hdr, sizeof(hdr), DOCONV()) == -1)
33	return false;
34	if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0)
35	goto corrupt;
36
37	CONVERT(hdr);
38	if (hdr.version != TDB_VERSION)
39	goto corrupt;
40
41	if (hdr.rwlocks != 0)
42	goto corrupt;
43
44	if (hdr.hash_size == 0)
45	goto corrupt;
46
47	if (hdr.hash_size != tdb->header.hash_size)
48	goto corrupt;
49
50	if (hdr.recovery_start != 0 &&
51	hdr.recovery_start < TDB_DATA_START(tdb->header.hash_size))
52	goto corrupt;
53
54	*recovery = hdr.recovery_start;
55	return true;
56
57	corrupt:
58	tdb->ecode = TDB_ERR_CORRUPT;
59	TDB_LOG((tdb, TDB_DEBUG_ERROR, "Header is corrupt\n"));
60	return false;
61	}
62
63	/* Generic record header check. */
64	static bool tdb_check_record(struct tdb_context *tdb,
65	tdb_off_t off,
66	const struct tdb_record *rec)
67	{
68	tdb_off_t tailer;
69
70	/* Check rec->next: 0 or points to record offset, aligned. */
71	if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->header.hash_size)){
72	TDB_LOG((tdb, TDB_DEBUG_ERROR,
73	"Record offset %d too small next %d\n",
74	off, rec->next));
75	goto corrupt;
76	}
77	if (rec->next + sizeof(*rec) < rec->next) {
78	TDB_LOG((tdb, TDB_DEBUG_ERROR,
79	"Record offset %d too large next %d\n",
80	off, rec->next));
81	goto corrupt;
82	}
83	if ((rec->next % TDB_ALIGNMENT) != 0) {
84	TDB_LOG((tdb, TDB_DEBUG_ERROR,
85	"Record offset %d misaligned next %d\n",
86	off, rec->next));
87	goto corrupt;
88	}
89	if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0))
90	goto corrupt;
91
92	/* Check rec_len: similar to rec->next, implies next record. */
93	if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
94	TDB_LOG((tdb, TDB_DEBUG_ERROR,
95	"Record offset %d misaligned length %d\n",
96	off, rec->rec_len));
97	goto corrupt;
98	}
99	/* Must fit tailer. */
100	if (rec->rec_len < sizeof(tailer)) {
101	TDB_LOG((tdb, TDB_DEBUG_ERROR,
102	"Record offset %d too short length %d\n",
103	off, rec->rec_len));
104	goto corrupt;
105	}
106	/* OOB allows "right at the end" access, so this works for last rec. */
107	if (tdb->methods->tdb_oob(tdb, off+sizeof(*rec)+rec->rec_len, 0))
108	goto corrupt;
109
110	/* Check tailer. */
111	if (tdb_ofs_read(tdb, off+sizeof(*rec)+rec->rec_len-sizeof(tailer),
112	&tailer) == -1)
113	goto corrupt;
114	if (tailer != sizeof(*rec) + rec->rec_len) {
115	TDB_LOG((tdb, TDB_DEBUG_ERROR,
116	"Record offset %d invalid tailer\n", off));
117	goto corrupt;
118	}
119
120	return true;
121
122	corrupt:
123	tdb->ecode = TDB_ERR_CORRUPT;
124	return false;
125	}
126
127	/* Grab some bytes: may copy if can't use mmap.
128	Caller has already done bounds check. */
129	static TDB_DATA get_bytes(struct tdb_context *tdb,
130	tdb_off_t off, tdb_len_t len)
131	{
132	TDB_DATA d;
133
134	d.dsize = len;
135
136	if (tdb->transaction == NULL && tdb->map_ptr != NULL)
137	d.dptr = (unsigned char *)tdb->map_ptr + off;
138	else
139	d.dptr = tdb_alloc_read(tdb, off, d.dsize);
140	return d;
141	}
142
143	/* Frees data if we're not able to simply use mmap. */
144	static void put_bytes(struct tdb_context *tdb, TDB_DATA d)
145	{
146	if (tdb->transaction == NULL && tdb->map_ptr != NULL)
147	return;
148	free(d.dptr);
149	}
150
151	/* We use the excellent Jenkins lookup3 hash; this is based on hash_word2.
152	* See: http://burtleburtle.net/bob/c/lookup3.c
153	*/
154	#define rot(x,k) (((x)<<(k)) \| ((x)>>(32-(k))))
155	static void hash(uint32_t key, uint32_t pc, uint32_t pb)
156	{
157	uint32_t a,b,c;
158
159	/* Set up the internal state */
160	a = b = c = 0xdeadbeef + *pc;
161	c += *pb;
162	a += key;
163	c ^= b; c -= rot(b,14);
164	a ^= c; a -= rot(c,11);
165	b ^= a; b -= rot(a,25);
166	c ^= b; c -= rot(b,16);
167	a ^= c; a -= rot(c,4);
168	b ^= a; b -= rot(a,14);
169	c ^= b; c -= rot(b,24);
170	pc=c; pb=b;
171	}
172
173	/*
174	We want to check that all free records are in the free list
175	(only once), and all free list entries are free records. Similarly
176	for each hash chain of used records.
177
178	Doing that naively (without walking hash chains, since we want to be
179	linear) means keeping a list of records which have been seen in each
180	hash chain, and another of records pointed to (ie. next pointers
181	from records and the initial hash chain heads). These two lists
182	should be equal. This will take 8 bytes per record, and require
183	sorting at the end.
184
185	So instead, we record each offset in a bitmap such a way that
186	recording it twice will cancel out. Since each offset should appear
187	exactly twice, the bitmap should be zero at the end.
188
189	The approach was inspired by Bloom Filters (see Wikipedia). For
190	each value, we flip K bits in a bitmap of size N. The number of
191	distinct arrangements is:
192
193	N! / (K! * (N-K)!)
194
195	Of course, not all arrangements are actually distinct, but testing
196	shows this formula to be close enough.
197
198	So, if K == 8 and N == 256, the probability of two things flipping the same
199	bits is 1 in 409,663,695,276,000.
200
201	Given that ldb uses a hash size of 10000, using 32 bytes per hash chain
202	(320k) seems reasonable.
203	*/
204	#define NUM_HASHES 8
205	#define BITMAP_BITS 256
206
207	static void bit_flip(unsigned char bits[], unsigned int idx)
208	{
209	bits[idx / CHAR_BIT] ^= (1 << (idx % CHAR_BIT));
210	}
211
212	/* We record offsets in a bitmap for the particular chain it should be in. */
213	static void record_offset(unsigned char bits[], tdb_off_t off)
214	{
215	uint32_t h1 = off, h2 = 0;
216	unsigned int i;
217
218	/* We get two good hash values out of jhash2, so we use both. Then
219	* we keep going to produce further hash values. */
220	for (i = 0; i < NUM_HASHES / 2; i++) {
221	hash(off, &h1, &h2);
222	bit_flip(bits, h1 % BITMAP_BITS);
223	bit_flip(bits, h2 % BITMAP_BITS);
224	h2++;
225	}
226	}
227
228	/* Check that an in-use record is valid. */
229	static bool tdb_check_used_record(struct tdb_context *tdb,
230	tdb_off_t off,
231	const struct tdb_record *rec,
232	unsigned char **hashes,
233	int (check)(TDB_DATA, TDB_DATA, void ),
234	void *private_data)
235	{
236	TDB_DATA key, data;
237
238	if (!tdb_check_record(tdb, off, rec))
239	return false;
240
241	/* key + data + tailer must fit in record */
242	if (rec->key_len + rec->data_len + sizeof(tdb_off_t) > rec->rec_len) {
243	TDB_LOG((tdb, TDB_DEBUG_ERROR,
244	"Record offset %d too short for contents\n", off));
245	return false;
246	}
247
248	key = get_bytes(tdb, off + sizeof(*rec), rec->key_len);
249	if (!key.dptr)
250	return false;
251
252	if (tdb->hash_fn(&key) != rec->full_hash) {
253	TDB_LOG((tdb, TDB_DEBUG_ERROR,
254	"Record offset %d has incorrect hash\n", off));
255	goto fail_put_key;
256	}
257
258	/* Mark this offset as a known value for this hash bucket. */
259	record_offset(hashes[BUCKET(rec->full_hash)+1], off);
260	/* And similarly if the next pointer is valid. */
261	if (rec->next)
262	record_offset(hashes[BUCKET(rec->full_hash)+1], rec->next);
263
264	/* If they supply a check function and this record isn't dead,
265	get data and feed it. */
266	if (check && rec->magic != TDB_DEAD_MAGIC) {
267	data = get_bytes(tdb, off + sizeof(*rec) + rec->key_len,
268	rec->data_len);
269	if (!data.dptr)
270	goto fail_put_key;
271
272	if (check(key, data, private_data) == -1)
273	goto fail_put_data;
274	put_bytes(tdb, data);
275	}
276
277	put_bytes(tdb, key);
278	return true;
279
280	fail_put_data:
281	put_bytes(tdb, data);
282	fail_put_key:
283	put_bytes(tdb, key);
284	return false;
285	}
286
287	/* Check that an unused record is valid. */
288	static bool tdb_check_free_record(struct tdb_context *tdb,
289	tdb_off_t off,
290	const struct tdb_record *rec,
291	unsigned char **hashes)
292	{
293	if (!tdb_check_record(tdb, off, rec))
294	return false;
295
296	/* Mark this offset as a known value for the free list. */
297	record_offset(hashes[0], off);
298	/* And similarly if the next pointer is valid. */
299	if (rec->next)
300	record_offset(hashes[0], rec->next);
301	return true;
302	}
303
304	int tdb_check(struct tdb_context *tdb,
305	int (check)(TDB_DATA key, TDB_DATA data, void private_data),
306	void *private_data)
307	{
308	unsigned int h;
309	unsigned char **hashes;
310	tdb_off_t off, recovery_start;
311	struct tdb_record rec;
312	bool found_recovery = false;
313
314	if (tdb_lockall(tdb) == -1)
315	return -1;
316
317	/* Make sure we know true size of the underlying file. */
318	tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
319
320	/* Header must be OK: also gets us the recovery ptr, if any. */
321	if (!tdb_check_header(tdb, &recovery_start))
322	goto unlock;
323
324	/* We should have the whole header, too. */
325	if (tdb->map_size < TDB_DATA_START(tdb->header.hash_size)) {
326	tdb->ecode = TDB_ERR_CORRUPT;
327	TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
328	goto unlock;
329	}
330
331	/* One big malloc: pointers then bit arrays. */
332	hashes = (unsigned char **)calloc(
333	1, sizeof(hashes[0]) * (1+tdb->header.hash_size)
334	+ BITMAP_BITS / CHAR_BIT * (1+tdb->header.hash_size));
335	if (!hashes) {
336	tdb->ecode = TDB_ERR_OOM;
337	goto unlock;
338	}
339
340	/* Initialize pointers */
341	hashes[0] = (unsigned char *)(&hashes[1+tdb->header.hash_size]);
342	for (h = 1; h < 1+tdb->header.hash_size; h++)
343	hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
344
345	/* Freelist and hash headers are all in a row: read them. */
346	for (h = 0; h < 1+tdb->header.hash_size; h++) {
347	if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
348	&off) == -1)
349	goto free;
350	if (off)
351	record_offset(hashes[h], off);
352	}
353
354	/* For each record, read it in and check it's ok. */
355	for (off = TDB_DATA_START(tdb->header.hash_size);
356	off < tdb->map_size;
357	off += sizeof(rec) + rec.rec_len) {
358	if (tdb->methods->tdb_read(tdb, off, &rec, sizeof(rec),
359	DOCONV()) == -1)
360	goto free;
361	switch (rec.magic) {
362	case TDB_MAGIC:
363	case TDB_DEAD_MAGIC:
364	if (!tdb_check_used_record(tdb, off, &rec, hashes,
365	check, private_data))
366	goto free;
367	break;
368	case TDB_FREE_MAGIC:
369	if (!tdb_check_free_record(tdb, off, &rec, hashes))
370	goto free;
371	break;
372	case TDB_RECOVERY_MAGIC:
373	case 0: /* Used for invalid (or in-progress) recovery area. */
374	if (recovery_start != off) {
375	TDB_LOG((tdb, TDB_DEBUG_ERROR,
376	"Unexpected recovery record at offset %d\n",
377	off));
378	goto free;
379	}
380	found_recovery = true;
381	break;
382	default:
383	tdb->ecode = TDB_ERR_CORRUPT;
384	TDB_LOG((tdb, TDB_DEBUG_ERROR,
385	"Bad magic 0x%x at offset %d\n",
386	rec.magic, off));
387	goto free;
388	}
389	}
390
391	/* Now, hashes should all be empty: each record exists and is referred
392	* to by one other. */
393	for (h = 0; h < 1+tdb->header.hash_size; h++) {
394	unsigned int i;
395	for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {
396	if (hashes[h][i] != 0) {
397	tdb->ecode = TDB_ERR_CORRUPT;
398	TDB_LOG((tdb, TDB_DEBUG_ERROR,
399	"Hashes do not match records\n"));
400	goto free;
401	}
402	}
403	}
404
405	/* We must have found recovery area if there was one. */
406	if (recovery_start != 0 && !found_recovery) {
407	TDB_LOG((tdb, TDB_DEBUG_ERROR,
408	"Expected %s recovery area, got %s\n",
409	recovery_start ? "a" : "no",
410	found_recovery ? "one" : "none"));
411	goto free;
412	}
413
414	free(hashes);
415	tdb_unlockall(tdb);
416	return 0;
417
418	free:
419	free(hashes);
420	unlock:
421	tdb_unlockall(tdb);
422	return -1;
423	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: