Context Navigation

source: trunk-3.0/source/tdb/common/tdb.c@ 103

Visit:

Last change on this file since 103 was 62, checked in by Paul Smedley, 18 years ago
Update source to 3.0.25c level
File size: 16.4 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3
4	trivial database library
5
6	Copyright (C) Andrew Tridgell 1999-2005
7	Copyright (C) Paul `Rusty' Russell 2000
8	Copyright (C) Jeremy Allison 2000-2003
9
10	** NOTE! The following LGPL license applies to the tdb
11	** library. This does NOT imply that all of Samba is released
12	** under the LGPL
13
14	This library is free software; you can redistribute it and/or
15	modify it under the terms of the GNU Lesser General Public
16	License as published by the Free Software Foundation; either
17	version 2 of the License, or (at your option) any later version.
18
19	This library is distributed in the hope that it will be useful,
20	but WITHOUT ANY WARRANTY; without even the implied warranty of
21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22	Lesser General Public License for more details.
23
24	You should have received a copy of the GNU Lesser General Public
25	License along with this library; if not, write to the Free Software
26	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27	*/
28
29	#include "tdb_private.h"
30
31	TDB_DATA tdb_null;
32
33	/*
34	increment the tdb sequence number if the tdb has been opened using
35	the TDB_SEQNUM flag
36	*/
37	static void tdb_increment_seqnum(struct tdb_context *tdb)
38	{
39	tdb_off_t seqnum=0;
40
41	if (!(tdb->flags & TDB_SEQNUM)) {
42	return;
43	}
44
45	if (tdb_brlock(tdb, TDB_SEQNUM_OFS, F_WRLCK, F_SETLKW, 1, 1) != 0) {
46	return;
47	}
48
49	/* we ignore errors from this, as we have no sane way of
50	dealing with them.
51	*/
52	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
53	seqnum++;
54	tdb_ofs_write(tdb, TDB_SEQNUM_OFS, &seqnum);
55
56	tdb_brlock(tdb, TDB_SEQNUM_OFS, F_UNLCK, F_SETLKW, 1, 1);
57	}
58
59	static int tdb_key_compare(TDB_DATA key, TDB_DATA data, void *private_data)
60	{
61	return memcmp(data.dptr, key.dptr, data.dsize);
62	}
63
64	/* Returns 0 on fail. On success, return offset of record, and fills
65	in rec */
66	static tdb_off_t tdb_find(struct tdb_context *tdb, TDB_DATA key, u32 hash,
67	struct list_struct *r)
68	{
69	tdb_off_t rec_ptr;
70
71	/* read in the hash top */
72	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
73	return 0;
74
75	/* keep looking until we find the right record */
76	while (rec_ptr) {
77	if (tdb_rec_read(tdb, rec_ptr, r) == -1)
78	return 0;
79
80	if (!TDB_DEAD(r) && hash==r->full_hash
81	&& key.dsize==r->key_len
82	&& tdb_parse_data(tdb, key, rec_ptr + sizeof(*r),
83	r->key_len, tdb_key_compare,
84	NULL) == 0) {
85	return rec_ptr;
86	}
87	rec_ptr = r->next;
88	}
89	return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
90	}
91
92	/* As tdb_find, but if you succeed, keep the lock */
93	tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, int locktype,
94	struct list_struct *rec)
95	{
96	u32 rec_ptr;
97
98	if (tdb_lock(tdb, BUCKET(hash), locktype) == -1)
99	return 0;
100	if (!(rec_ptr = tdb_find(tdb, key, hash, rec)))
101	tdb_unlock(tdb, BUCKET(hash), locktype);
102	return rec_ptr;
103	}
104
105
106	/* update an entry in place - this only works if the new data size
107	is <= the old data size and the key exists.
108	on failure return -1.
109	*/
110	static int tdb_update_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash, TDB_DATA dbuf)
111	{
112	struct list_struct rec;
113	tdb_off_t rec_ptr;
114
115	/* find entry */
116	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec)))
117	return -1;
118
119	/* must be long enough key, data and tailer */
120	if (rec.rec_len < key.dsize + dbuf.dsize + sizeof(tdb_off_t)) {
121	tdb->ecode = TDB_SUCCESS; /* Not really an error */
122	return -1;
123	}
124
125	if (tdb->methods->tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
126	dbuf.dptr, dbuf.dsize) == -1)
127	return -1;
128
129	if (dbuf.dsize != rec.data_len) {
130	/* update size */
131	rec.data_len = dbuf.dsize;
132	return tdb_rec_write(tdb, rec_ptr, &rec);
133	}
134
135	return 0;
136	}
137
138	/* find an entry in the database given a key */
139	/* If an entry doesn't exist tdb_err will be set to
140	* TDB_ERR_NOEXIST. If a key has no data attached
141	* then the TDB_DATA will have zero length but
142	* a non-zero pointer
143	*/
144	TDB_DATA tdb_fetch(struct tdb_context *tdb, TDB_DATA key)
145	{
146	tdb_off_t rec_ptr;
147	struct list_struct rec;
148	TDB_DATA ret;
149	u32 hash;
150
151	/* find which hash bucket it is in */
152	hash = tdb->hash_fn(&key);
153	if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec)))
154	return tdb_null;
155
156	ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec) + rec.key_len,
157	rec.data_len);
158	ret.dsize = rec.data_len;
159	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
160	return ret;
161	}
162
163	/*
164	* Find an entry in the database and hand the record's data to a parsing
165	* function. The parsing function is executed under the chain read lock, so it
166	* should be fast and should not block on other syscalls.
167	*
168	* DONT CALL OTHER TDB CALLS FROM THE PARSER, THIS MIGHT LEAD TO SEGFAULTS.
169	*
170	* For mmapped tdb's that do not have a transaction open it points the parsing
171	* function directly at the mmap area, it avoids the malloc/memcpy in this
172	* case. If a transaction is open or no mmap is available, it has to do
173	* malloc/read/parse/free.
174	*
175	* This is interesting for all readers of potentially large data structures in
176	* the tdb records, ldb indexes being one example.
177	*/
178
179	int tdb_parse_record(struct tdb_context *tdb, TDB_DATA key,
180	int (*parser)(TDB_DATA key, TDB_DATA data,
181	void *private_data),
182	void *private_data)
183	{
184	tdb_off_t rec_ptr;
185	struct list_struct rec;
186	int ret;
187	u32 hash;
188
189	/* find which hash bucket it is in */
190	hash = tdb->hash_fn(&key);
191
192	if (!(rec_ptr = tdb_find_lock_hash(tdb,key,hash,F_RDLCK,&rec))) {
193	return TDB_ERRCODE(TDB_ERR_NOEXIST, 0);
194	}
195
196	ret = tdb_parse_data(tdb, key, rec_ptr + sizeof(rec) + rec.key_len,
197	rec.data_len, parser, private_data);
198
199	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
200
201	return ret;
202	}
203
204	/* check if an entry in the database exists
205
206	note that 1 is returned if the key is found and 0 is returned if not found
207	this doesn't match the conventions in the rest of this module, but is
208	compatible with gdbm
209	*/
210	static int tdb_exists_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
211	{
212	struct list_struct rec;
213
214	if (tdb_find_lock_hash(tdb, key, hash, F_RDLCK, &rec) == 0)
215	return 0;
216	tdb_unlock(tdb, BUCKET(rec.full_hash), F_RDLCK);
217	return 1;
218	}
219
220	int tdb_exists(struct tdb_context *tdb, TDB_DATA key)
221	{
222	u32 hash = tdb->hash_fn(&key);
223	return tdb_exists_hash(tdb, key, hash);
224	}
225
226	/* actually delete an entry in the database given the offset */
227	int tdb_do_delete(struct tdb_context tdb, tdb_off_t rec_ptr, struct list_structrec)
228	{
229	tdb_off_t last_ptr, i;
230	struct list_struct lastrec;
231
232	if (tdb->read_only \|\| tdb->traverse_read) return -1;
233
234	if (tdb_write_lock_record(tdb, rec_ptr) == -1) {
235	/* Someone traversing here: mark it as dead */
236	rec->magic = TDB_DEAD_MAGIC;
237	return tdb_rec_write(tdb, rec_ptr, rec);
238	}
239	if (tdb_write_unlock_record(tdb, rec_ptr) != 0)
240	return -1;
241
242	/* find previous record in hash chain */
243	if (tdb_ofs_read(tdb, TDB_HASH_TOP(rec->full_hash), &i) == -1)
244	return -1;
245	for (last_ptr = 0; i != rec_ptr; last_ptr = i, i = lastrec.next)
246	if (tdb_rec_read(tdb, i, &lastrec) == -1)
247	return -1;
248
249	/* unlink it: next ptr is at start of record. */
250	if (last_ptr == 0)
251	last_ptr = TDB_HASH_TOP(rec->full_hash);
252	if (tdb_ofs_write(tdb, last_ptr, &rec->next) == -1)
253	return -1;
254
255	/* recover the space */
256	if (tdb_free(tdb, rec_ptr, rec) == -1)
257	return -1;
258	return 0;
259	}
260
261	static int tdb_count_dead(struct tdb_context *tdb, u32 hash)
262	{
263	int res = 0;
264	tdb_off_t rec_ptr;
265	struct list_struct rec;
266
267	/* read in the hash top */
268	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
269	return 0;
270
271	while (rec_ptr) {
272	if (tdb_rec_read(tdb, rec_ptr, &rec) == -1)
273	return 0;
274
275	if (rec.magic == TDB_DEAD_MAGIC) {
276	res += 1;
277	}
278	rec_ptr = rec.next;
279	}
280	return res;
281	}
282
283	/*
284	* Purge all DEAD records from a hash chain
285	*/
286	static int tdb_purge_dead(struct tdb_context *tdb, u32 hash)
287	{
288	int res = -1;
289	struct list_struct rec;
290	tdb_off_t rec_ptr;
291
292	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
293	return -1;
294	}
295
296	/* read in the hash top */
297	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
298	goto fail;
299
300	while (rec_ptr) {
301	tdb_off_t next;
302
303	if (tdb_rec_read(tdb, rec_ptr, &rec) == -1) {
304	goto fail;
305	}
306
307	next = rec.next;
308
309	if (rec.magic == TDB_DEAD_MAGIC
310	&& tdb_do_delete(tdb, rec_ptr, &rec) == -1) {
311	goto fail;
312	}
313	rec_ptr = next;
314	}
315	res = 0;
316	fail:
317	tdb_unlock(tdb, -1, F_WRLCK);
318	return res;
319	}
320
321	/* delete an entry in the database given a key */
322	static int tdb_delete_hash(struct tdb_context *tdb, TDB_DATA key, u32 hash)
323	{
324	tdb_off_t rec_ptr;
325	struct list_struct rec;
326	int ret;
327
328	if (tdb->max_dead_records != 0) {
329
330	/*
331	* Allow for some dead records per hash chain, mainly for
332	* tdb's with a very high create/delete rate like locking.tdb.
333	*/
334
335	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
336	return -1;
337
338	if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
339	/*
340	* Don't let the per-chain freelist grow too large,
341	* delete all existing dead records
342	*/
343	tdb_purge_dead(tdb, hash);
344	}
345
346	if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
347	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
348	return -1;
349	}
350
351	/*
352	* Just mark the record as dead.
353	*/
354	rec.magic = TDB_DEAD_MAGIC;
355	ret = tdb_rec_write(tdb, rec_ptr, &rec);
356	}
357	else {
358	if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
359	&rec)))
360	return -1;
361
362	ret = tdb_do_delete(tdb, rec_ptr, &rec);
363	}
364
365	if (ret == 0) {
366	tdb_increment_seqnum(tdb);
367	}
368
369	if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
370	TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
371	return ret;
372	}
373
374	int tdb_delete(struct tdb_context *tdb, TDB_DATA key)
375	{
376	u32 hash = tdb->hash_fn(&key);
377	return tdb_delete_hash(tdb, key, hash);
378	}
379
380	/*
381	* See if we have a dead record around with enough space
382	*/
383	static tdb_off_t tdb_find_dead(struct tdb_context *tdb, u32 hash,
384	struct list_struct *r, tdb_len_t length)
385	{
386	tdb_off_t rec_ptr;
387
388	/* read in the hash top */
389	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
390	return 0;
391
392	/* keep looking until we find the right record */
393	while (rec_ptr) {
394	if (tdb_rec_read(tdb, rec_ptr, r) == -1)
395	return 0;
396
397	if (TDB_DEAD(r) && r->rec_len >= length) {
398	/*
399	* First fit for simple coding, TODO: change to best
400	* fit
401	*/
402	return rec_ptr;
403	}
404	rec_ptr = r->next;
405	}
406	return 0;
407	}
408
409	/* store an element in the database, replacing any existing element
410	with the same key
411
412	return 0 on success, -1 on failure
413	*/
414	int tdb_store(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
415	{
416	struct list_struct rec;
417	u32 hash;
418	tdb_off_t rec_ptr;
419	char *p = NULL;
420	int ret = -1;
421
422	if (tdb->read_only \|\| tdb->traverse_read) {
423	tdb->ecode = TDB_ERR_RDONLY;
424	return -1;
425	}
426
427	/* find which hash bucket it is in */
428	hash = tdb->hash_fn(&key);
429	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
430	return -1;
431
432	/* check for it existing, on insert. */
433	if (flag == TDB_INSERT) {
434	if (tdb_exists_hash(tdb, key, hash)) {
435	tdb->ecode = TDB_ERR_EXISTS;
436	goto fail;
437	}
438	} else {
439	/* first try in-place update, on modify or replace. */
440	if (tdb_update_hash(tdb, key, hash, dbuf) == 0) {
441	goto done;
442	}
443	if (tdb->ecode == TDB_ERR_NOEXIST &&
444	flag == TDB_MODIFY) {
445	/* if the record doesn't exist and we are in TDB_MODIFY mode then
446	we should fail the store */
447	goto fail;
448	}
449	}
450	/* reset the error code potentially set by the tdb_update() */
451	tdb->ecode = TDB_SUCCESS;
452
453	/* delete any existing record - if it doesn't exist we don't
454	care. Doing this first reduces fragmentation, and avoids
455	coalescing with `allocated' block before it's updated. */
456	if (flag != TDB_INSERT)
457	tdb_delete_hash(tdb, key, hash);
458
459	/* Copy key+value before allocating free space in case malloc
460	fails and we are left with a dead spot in the tdb. */
461
462	if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
463	tdb->ecode = TDB_ERR_OOM;
464	goto fail;
465	}
466
467	memcpy(p, key.dptr, key.dsize);
468	if (dbuf.dsize)
469	memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
470
471	if (tdb->max_dead_records != 0) {
472	/*
473	* Allow for some dead records per hash chain, look if we can
474	* find one that can hold the new record. We need enough space
475	* for key, data and tailer. If we find one, we don't have to
476	* consult the central freelist.
477	*/
478	rec_ptr = tdb_find_dead(
479	tdb, hash, &rec,
480	key.dsize + dbuf.dsize + sizeof(tdb_off_t));
481
482	if (rec_ptr != 0) {
483	rec.key_len = key.dsize;
484	rec.data_len = dbuf.dsize;
485	rec.full_hash = hash;
486	rec.magic = TDB_MAGIC;
487	if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
488	\|\| tdb->methods->tdb_write(
489	tdb, rec_ptr + sizeof(rec),
490	p, key.dsize + dbuf.dsize) == -1) {
491	goto fail;
492	}
493	goto done;
494	}
495	}
496
497	/*
498	* We have to allocate some space from the freelist, so this means we
499	* have to lock it. Use the chance to purge all the DEAD records from
500	* the hash chain under the freelist lock.
501	*/
502
503	if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
504	goto fail;
505	}
506
507	if ((tdb->max_dead_records != 0)
508	&& (tdb_purge_dead(tdb, hash) == -1)) {
509	tdb_unlock(tdb, -1, F_WRLCK);
510	goto fail;
511	}
512
513	/* we have to allocate some space */
514	rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
515
516	tdb_unlock(tdb, -1, F_WRLCK);
517
518	if (rec_ptr == 0) {
519	goto fail;
520	}
521
522	/* Read hash top into next ptr */
523	if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec.next) == -1)
524	goto fail;
525
526	rec.key_len = key.dsize;
527	rec.data_len = dbuf.dsize;
528	rec.full_hash = hash;
529	rec.magic = TDB_MAGIC;
530
531	/* write out and point the top of the hash chain at it */
532	if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
533	\|\| tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
534	\|\| tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
535	/* Need to tdb_unallocate() here */
536	goto fail;
537	}
538
539	done:
540	ret = 0;
541	fail:
542	if (ret == 0) {
543	tdb_increment_seqnum(tdb);
544	}
545
546	SAFE_FREE(p);
547	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
548	return ret;
549	}
550
551
552	/* Append to an entry. Create if not exist. */
553	int tdb_append(struct tdb_context *tdb, TDB_DATA key, TDB_DATA new_dbuf)
554	{
555	u32 hash;
556	TDB_DATA dbuf;
557	int ret = -1;
558
559	/* find which hash bucket it is in */
560	hash = tdb->hash_fn(&key);
561	if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
562	return -1;
563
564	dbuf = tdb_fetch(tdb, key);
565
566	if (dbuf.dptr == NULL) {
567	dbuf.dptr = (char *)malloc(new_dbuf.dsize);
568	} else {
569	char new_dptr = (char )realloc(dbuf.dptr,
570	dbuf.dsize + new_dbuf.dsize);
571	if (new_dptr == NULL) {
572	free(dbuf.dptr);
573	}
574	dbuf.dptr = new_dptr;
575	}
576
577	if (dbuf.dptr == NULL) {
578	tdb->ecode = TDB_ERR_OOM;
579	goto failed;
580	}
581
582	memcpy(dbuf.dptr + dbuf.dsize, new_dbuf.dptr, new_dbuf.dsize);
583	dbuf.dsize += new_dbuf.dsize;
584
585	ret = tdb_store(tdb, key, dbuf, 0);
586
587	failed:
588	tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
589	SAFE_FREE(dbuf.dptr);
590	return ret;
591	}
592
593
594	/*
595	return the name of the current tdb file
596	useful for external logging functions
597	*/
598	const char tdb_name(struct tdb_context tdb)
599	{
600	return tdb->name;
601	}
602
603	/*
604	return the underlying file descriptor being used by tdb, or -1
605	useful for external routines that want to check the device/inode
606	of the fd
607	*/
608	int tdb_fd(struct tdb_context *tdb)
609	{
610	return tdb->fd;
611	}
612
613	/*
614	return the current logging function
615	useful for external tdb routines that wish to log tdb errors
616	*/
617	tdb_log_func tdb_log_fn(struct tdb_context *tdb)
618	{
619	return tdb->log.log_fn;
620	}
621
622
623	/*
624	get the tdb sequence number. Only makes sense if the writers opened
625	with TDB_SEQNUM set. Note that this sequence number will wrap quite
626	quickly, so it should only be used for a 'has something changed'
627	test, not for code that relies on the count of the number of changes
628	made. If you want a counter then use a tdb record.
629
630	The aim of this sequence number is to allow for a very lightweight
631	test of a possible tdb change.
632	*/
633	int tdb_get_seqnum(struct tdb_context *tdb)
634	{
635	tdb_off_t seqnum=0;
636
637	tdb_ofs_read(tdb, TDB_SEQNUM_OFS, &seqnum);
638	return seqnum;
639	}
640
641	int tdb_hash_size(struct tdb_context *tdb)
642	{
643	return tdb->header.hash_size;
644	}
645
646	size_t tdb_map_size(struct tdb_context *tdb)
647	{
648	return tdb->map_size;
649	}
650
651	int tdb_get_flags(struct tdb_context *tdb)
652	{
653	return tdb->flags;
654	}
655

Note: See TracBrowser for help on using the repository browser.

Download in other formats: