source: branches/samba-3.2.x/source/lib/tdb/common/io.c

Last change on this file was 204, checked in by Herwig Bauernfeind, 16 years ago

Update 3.2 branch to 3.2.4

File size: 13.4 KB
Line 
1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
13
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
23
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
26*/
27
28
29#include "tdb_private.h"
30
31/* check for an out of bounds access - if it is out of bounds then
32 see if the database has been expanded by someone else and expand
33 if necessary
34 note that "len" is the minimum length needed for the db
35*/
36static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
37{
38 struct stat st;
39 if (len <= tdb->map_size)
40 return 0;
41 if (tdb->flags & TDB_INTERNAL) {
42 if (!probe) {
43 /* Ensure ecode is set for log fn. */
44 tdb->ecode = TDB_ERR_IO;
45 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n",
46 (int)len, (int)tdb->map_size));
47 }
48 return TDB_ERRCODE(TDB_ERR_IO, -1);
49 }
50
51 if (fstat(tdb->fd, &st) == -1) {
52 return TDB_ERRCODE(TDB_ERR_IO, -1);
53 }
54
55 if (st.st_size < (size_t)len) {
56 if (!probe) {
57 /* Ensure ecode is set for log fn. */
58 tdb->ecode = TDB_ERR_IO;
59 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n",
60 (int)len, (int)st.st_size));
61 }
62 return TDB_ERRCODE(TDB_ERR_IO, -1);
63 }
64
65 /* Unmap, update size, remap */
66 if (tdb_munmap(tdb) == -1)
67 return TDB_ERRCODE(TDB_ERR_IO, -1);
68 tdb->map_size = st.st_size;
69 tdb_mmap(tdb);
70 return 0;
71}
72
73/* write a lump of data at a specified offset */
74static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
75 const void *buf, tdb_len_t len)
76{
77 if (len == 0) {
78 return 0;
79 }
80
81 if (tdb->read_only || tdb->traverse_read) {
82 tdb->ecode = TDB_ERR_RDONLY;
83 return -1;
84 }
85
86 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
87 return -1;
88#ifdef __OS2__
89 // YD we must upgrade read locks to write locks (exclusive), otherwise
90 // the owner (us) is not allowed to write to the file (different from unix)
91 TDB_LOG((tdb, TDB_DEBUG_TRACE,"unlocking at %d len=%d before writing.\n", off, len));
92 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, 1);
93 // if a wider previous lock is in effect, we cannot write lock our segment
94 // (e.g. a lock_upgrade locks all the file), so we hope the previous lock
95 // is a write lock: do not wait for lock.
96 tdb_brlock( tdb, off, F_WRLCK, F_SETLK, 0, len);
97#endif
98
99 if (tdb->map_ptr) {
100 memcpy(off + (char *)tdb->map_ptr, buf, len);
101 } else {
102 ssize_t written = pwrite(tdb->fd, buf, len, off);
103 if ((written != (ssize_t)len) && (written != -1)) {
104 /* try once more */
105 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
106 "%d of %d bytes at %d, trying once more\n",
107 (int)written, len, off));
108 errno = ENOSPC;
109 written = pwrite(tdb->fd, (const void *)((const char *)buf+written),
110 len-written,
111 off+written);
112 }
113 if (written == -1) {
114 /* Ensure ecode is set for log fn. */
115 tdb->ecode = TDB_ERR_IO;
116 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
117 "len=%d (%s)\n", off, len, strerror(errno)));
118 return TDB_ERRCODE(TDB_ERR_IO, -1);
119 } else if (written != (ssize_t)len) {
120 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
121 "write %d bytes at %d in two attempts\n",
122 len, off));
123 errno = ENOSPC;
124#ifdef __OS2__
125 // remove our lock
126 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, len);
127#endif
128 return TDB_ERRCODE(TDB_ERR_IO, -1);
129 }
130 }
131#ifdef __OS2__
132 // remove our lock
133 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, len);
134#endif
135 return 0;
136}
137
138/* Endian conversion: we only ever deal with 4 byte quantities */
139void *tdb_convert(void *buf, uint32_t size)
140{
141 uint32_t i, *p = (uint32_t *)buf;
142 for (i = 0; i < size / 4; i++)
143 p[i] = TDB_BYTEREV(p[i]);
144 return buf;
145}
146
147
148/* read a lump of data at a specified offset, maybe convert */
149static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
150 tdb_len_t len, int cv)
151{
152 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
153 return -1;
154 }
155
156 if (tdb->map_ptr) {
157 memcpy(buf, off + (char *)tdb->map_ptr, len);
158 } else {
159 ssize_t ret = pread(tdb->fd, buf, len, off);
160 if (ret != (ssize_t)len) {
161 /* Ensure ecode is set for log fn. */
162 tdb->ecode = TDB_ERR_IO;
163 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
164 "len=%d ret=%d (%s) map_size=%d\n",
165 (int)off, (int)len, (int)ret, strerror(errno),
166 (int)tdb->map_size));
167 return TDB_ERRCODE(TDB_ERR_IO, -1);
168 }
169 }
170 if (cv) {
171 tdb_convert(buf, len);
172 }
173 return 0;
174}
175
176
177
178/*
179 do an unlocked scan of the hash table heads to find the next non-zero head. The value
180 will then be confirmed with the lock held
181*/
182static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
183{
184 uint32_t h = *chain;
185 if (tdb->map_ptr) {
186 for (;h < tdb->header.hash_size;h++) {
187 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
188 break;
189 }
190 }
191 } else {
192 uint32_t off=0;
193 for (;h < tdb->header.hash_size;h++) {
194 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
195 break;
196 }
197 }
198 }
199 (*chain) = h;
200}
201
202
203int tdb_munmap(struct tdb_context *tdb)
204{
205 if (tdb->flags & TDB_INTERNAL)
206 return 0;
207
208#ifdef HAVE_MMAP
209 if (tdb->map_ptr) {
210 int ret;
211
212 ret = munmap(tdb->map_ptr, tdb->map_size);
213 if (ret != 0)
214 return ret;
215 }
216#endif
217 tdb->map_ptr = NULL;
218 return 0;
219}
220
221void tdb_mmap(struct tdb_context *tdb)
222{
223 if (tdb->flags & TDB_INTERNAL)
224 return;
225
226#ifdef HAVE_MMAP
227 if (!(tdb->flags & TDB_NOMMAP)) {
228 tdb->map_ptr = mmap(NULL, tdb->map_size,
229 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
230 MAP_SHARED|MAP_FILE, tdb->fd, 0);
231
232 /*
233 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
234 */
235
236 if (tdb->map_ptr == MAP_FAILED) {
237 tdb->map_ptr = NULL;
238 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n",
239 tdb->map_size, strerror(errno)));
240 }
241 } else {
242 tdb->map_ptr = NULL;
243 }
244#else
245 tdb->map_ptr = NULL;
246#endif
247}
248
249/* expand a file. we prefer to use ftruncate, as that is what posix
250 says to use for mmap expansion */
251static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
252{
253 char buf[8192];
254
255 if (tdb->read_only || tdb->traverse_read) {
256 tdb->ecode = TDB_ERR_RDONLY;
257 return -1;
258 }
259
260 if (ftruncate(tdb->fd, size+addition) == -1) {
261 char b = 0;
262 ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
263 if (written == 0) {
264 /* try once more, potentially revealing errno */
265 written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
266 }
267 if (written == 0) {
268 /* again - give up, guessing errno */
269 errno = ENOSPC;
270 }
271 if (written != 1) {
272 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n",
273 size+addition, strerror(errno)));
274 return -1;
275 }
276 }
277
278 /* now fill the file with something. This ensures that the
279 file isn't sparse, which would be very bad if we ran out of
280 disk. This must be done with write, not via mmap */
281 memset(buf, TDB_PAD_BYTE, sizeof(buf));
282 while (addition) {
283 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
284 ssize_t written = pwrite(tdb->fd, buf, n, size);
285 if (written == 0) {
286 /* prevent infinite loops: try _once_ more */
287 written = pwrite(tdb->fd, buf, n, size);
288 }
289 if (written == 0) {
290 /* give up, trying to provide a useful errno */
291 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
292 "returned 0 twice: giving up!\n"));
293 errno = ENOSPC;
294 return -1;
295 } else if (written == -1) {
296 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
297 "%d bytes failed (%s)\n", (int)n,
298 strerror(errno)));
299 return -1;
300 } else if (written != n) {
301 TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
302 "only %d of %d bytes - retrying\n", (int)written,
303 (int)n));
304 }
305 addition -= written;
306 size += written;
307 }
308 return 0;
309}
310
311
312/* expand the database at least size bytes by expanding the underlying
313 file and doing the mmap again if necessary */
314int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
315{
316 struct list_struct rec;
317 tdb_off_t offset, new_size;
318
319 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
320 TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
321 return -1;
322 }
323
324 /* must know about any previous expansions by another process */
325 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
326
327 /* always make room for at least 100 more records, and at
328 least 25% more space. Round the database up to a multiple
329 of the page size */
330 new_size = MAX(tdb->map_size + size*100, tdb->map_size * 1.25);
331 size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size;
332
333 if (!(tdb->flags & TDB_INTERNAL))
334 tdb_munmap(tdb);
335
336 /*
337 * We must ensure the file is unmapped before doing this
338 * to ensure consistency with systems like OpenBSD where
339 * writes and mmaps are not consistent.
340 */
341
342 /* expand the file itself */
343 if (!(tdb->flags & TDB_INTERNAL)) {
344 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
345 goto fail;
346 }
347
348 tdb->map_size += size;
349
350 if (tdb->flags & TDB_INTERNAL) {
351 char *new_map_ptr = (char *)realloc(tdb->map_ptr,
352 tdb->map_size);
353 if (!new_map_ptr) {
354 tdb->map_size -= size;
355 goto fail;
356 }
357 tdb->map_ptr = new_map_ptr;
358 } else {
359 /*
360 * We must ensure the file is remapped before adding the space
361 * to ensure consistency with systems like OpenBSD where
362 * writes and mmaps are not consistent.
363 */
364
365 /* We're ok if the mmap fails as we'll fallback to read/write */
366 tdb_mmap(tdb);
367 }
368
369 /* form a new freelist record */
370 memset(&rec,'\0',sizeof(rec));
371 rec.rec_len = size - sizeof(rec);
372
373 /* link it into the free list */
374 offset = tdb->map_size - size;
375 if (tdb_free(tdb, offset, &rec) == -1)
376 goto fail;
377
378 tdb_unlock(tdb, -1, F_WRLCK);
379 return 0;
380 fail:
381 tdb_unlock(tdb, -1, F_WRLCK);
382 return -1;
383}
384
385/* read/write a tdb_off_t */
386int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
387{
388 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
389}
390
391int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
392{
393 tdb_off_t off = *d;
394 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
395}
396
397
398/* read a lump of data, allocating the space for it */
399unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
400{
401 unsigned char *buf;
402
403 /* some systems don't like zero length malloc */
404 if (len == 0) {
405 len = 1;
406 }
407
408 if (!(buf = (unsigned char *)malloc(len))) {
409 /* Ensure ecode is set for log fn. */
410 tdb->ecode = TDB_ERR_OOM;
411 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
412 len, strerror(errno)));
413 return TDB_ERRCODE(TDB_ERR_OOM, buf);
414 }
415 if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
416 SAFE_FREE(buf);
417 return NULL;
418 }
419 return buf;
420}
421
422/* Give a piece of tdb data to a parser */
423
424int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
425 tdb_off_t offset, tdb_len_t len,
426 int (*parser)(TDB_DATA key, TDB_DATA data,
427 void *private_data),
428 void *private_data)
429{
430 TDB_DATA data;
431 int result;
432
433 data.dsize = len;
434
435 if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
436 /*
437 * Optimize by avoiding the malloc/memcpy/free, point the
438 * parser directly at the mmap area.
439 */
440 if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) {
441 return -1;
442 }
443 data.dptr = offset + (unsigned char *)tdb->map_ptr;
444 return parser(key, data, private_data);
445 }
446
447 if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
448 return -1;
449 }
450
451 result = parser(key, data, private_data);
452 free(data.dptr);
453 return result;
454}
455
456/* read/write a record */
457int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
458{
459 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
460 return -1;
461 if (TDB_BAD_MAGIC(rec)) {
462 /* Ensure ecode is set for log fn. */
463 tdb->ecode = TDB_ERR_CORRUPT;
464 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
465 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
466 }
467 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
468}
469
470int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
471{
472 struct list_struct r = *rec;
473 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
474}
475
476static const struct tdb_methods io_methods = {
477 tdb_read,
478 tdb_write,
479 tdb_next_hash_chain,
480 tdb_oob,
481 tdb_expand_file,
482 tdb_brlock
483};
484
485/*
486 initialise the default methods table
487*/
488void tdb_io_init(struct tdb_context *tdb)
489{
490 tdb->methods = &io_methods;
491}
Note: See TracBrowser for help on using the repository browser.