source: branches/samba-3.3.x/source/lib/tdb/common/io.c

Last change on this file was 206, checked in by Herwig Bauernfeind, 16 years ago

Import Samba 3.3 branch at 3.0.0 level (psmedley's port)

File size: 13.4 KB
Line 
1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
13
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
23
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
26*/
27
28
29#include "tdb_private.h"
30
31/* check for an out of bounds access - if it is out of bounds then
32 see if the database has been expanded by someone else and expand
33 if necessary
34 note that "len" is the minimum length needed for the db
35*/
36static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
37{
38 struct stat st;
39 if (len <= tdb->map_size)
40 return 0;
41 if (tdb->flags & TDB_INTERNAL) {
42 if (!probe) {
43 /* Ensure ecode is set for log fn. */
44 tdb->ecode = TDB_ERR_IO;
45 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n",
46 (int)len, (int)tdb->map_size));
47 }
48 return TDB_ERRCODE(TDB_ERR_IO, -1);
49 }
50
51 if (fstat(tdb->fd, &st) == -1) {
52 return TDB_ERRCODE(TDB_ERR_IO, -1);
53 }
54
55 if (st.st_size < (size_t)len) {
56 if (!probe) {
57 /* Ensure ecode is set for log fn. */
58 tdb->ecode = TDB_ERR_IO;
59 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n",
60 (int)len, (int)st.st_size));
61 }
62 return TDB_ERRCODE(TDB_ERR_IO, -1);
63 }
64
65 /* Unmap, update size, remap */
66 if (tdb_munmap(tdb) == -1)
67 return TDB_ERRCODE(TDB_ERR_IO, -1);
68 tdb->map_size = st.st_size;
69 tdb_mmap(tdb);
70 return 0;
71}
72
73/* write a lump of data at a specified offset */
74static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
75 const void *buf, tdb_len_t len)
76{
77 if (len == 0) {
78 return 0;
79 }
80
81 if (tdb->read_only || tdb->traverse_read) {
82 tdb->ecode = TDB_ERR_RDONLY;
83 return -1;
84 }
85
86 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
87 return -1;
88
89#ifdef __OS2__
90 // YD we must upgrade read locks to write locks (exclusive), otherwise
91 // the owner (us) is not allowed to write to the file (different from unix)
92 TDB_LOG((tdb, TDB_DEBUG_TRACE,"unlocking at %d len=%d before writing.\n", off, len));
93 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, 1);
94 // if a wider previous lock is in effect, we cannot write lock our segment
95 // (e.g. a lock_upgrade locks all the file), so we hope the previous lock
96 // is a write lock: do not wait for lock.
97 tdb_brlock( tdb, off, F_WRLCK, F_SETLK, 0, len);
98#endif
99
100 if (tdb->map_ptr) {
101 memcpy(off + (char *)tdb->map_ptr, buf, len);
102 } else {
103 ssize_t written = pwrite(tdb->fd, buf, len, off);
104 if ((written != (ssize_t)len) && (written != -1)) {
105 /* try once more */
106 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
107 "%d of %d bytes at %d, trying once more\n",
108 (int)written, len, off));
109 errno = ENOSPC;
110 written = pwrite(tdb->fd, (const void *)((const char *)buf+written),
111 len-written,
112 off+written);
113 }
114 if (written == -1) {
115 /* Ensure ecode is set for log fn. */
116 tdb->ecode = TDB_ERR_IO;
117 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
118 "len=%d (%s)\n", off, len, strerror(errno)));
119 return TDB_ERRCODE(TDB_ERR_IO, -1);
120 } else if (written != (ssize_t)len) {
121 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
122 "write %d bytes at %d in two attempts\n",
123 len, off));
124 errno = ENOSPC;
125#ifdef __OS2__
126 // remove our lock
127 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, len);
128#endif
129 return TDB_ERRCODE(TDB_ERR_IO, -1);
130 }
131 }
132#ifdef __OS2__
133 // remove our lock
134 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, len);
135#endif
136 return 0;
137}
138
139/* Endian conversion: we only ever deal with 4 byte quantities */
140void *tdb_convert(void *buf, uint32_t size)
141{
142 uint32_t i, *p = (uint32_t *)buf;
143 for (i = 0; i < size / 4; i++)
144 p[i] = TDB_BYTEREV(p[i]);
145 return buf;
146}
147
148
149/* read a lump of data at a specified offset, maybe convert */
150static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
151 tdb_len_t len, int cv)
152{
153 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
154 return -1;
155 }
156
157 if (tdb->map_ptr) {
158 memcpy(buf, off + (char *)tdb->map_ptr, len);
159 } else {
160 ssize_t ret = pread(tdb->fd, buf, len, off);
161#if 0
162 if (ret != (ssize_t)len) {
163 /* Ensure ecode is set for log fn. */
164 tdb->ecode = TDB_ERR_IO;
165 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
166 "len=%d ret=%d (%s) map_size=%d\n",
167 (int)off, (int)len, (int)ret, strerror(errno),
168 (int)tdb->map_size));
169 return TDB_ERRCODE(TDB_ERR_IO, -1);
170 }
171#endif
172 }
173 if (cv) {
174 tdb_convert(buf, len);
175 }
176 return 0;
177}
178
179
180
181/*
182 do an unlocked scan of the hash table heads to find the next non-zero head. The value
183 will then be confirmed with the lock held
184*/
185static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
186{
187 uint32_t h = *chain;
188 if (tdb->map_ptr) {
189 for (;h < tdb->header.hash_size;h++) {
190 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
191 break;
192 }
193 }
194 } else {
195 uint32_t off=0;
196 for (;h < tdb->header.hash_size;h++) {
197 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
198 break;
199 }
200 }
201 }
202 (*chain) = h;
203}
204
205
206int tdb_munmap(struct tdb_context *tdb)
207{
208 if (tdb->flags & TDB_INTERNAL)
209 return 0;
210
211#ifdef HAVE_MMAP
212 if (tdb->map_ptr) {
213 int ret;
214
215 ret = munmap(tdb->map_ptr, tdb->map_size);
216 if (ret != 0)
217 return ret;
218 }
219#endif
220 tdb->map_ptr = NULL;
221 return 0;
222}
223
224void tdb_mmap(struct tdb_context *tdb)
225{
226 if (tdb->flags & TDB_INTERNAL)
227 return;
228
229#ifdef HAVE_MMAP
230 if (!(tdb->flags & TDB_NOMMAP)) {
231 tdb->map_ptr = mmap(NULL, tdb->map_size,
232 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
233 MAP_SHARED|MAP_FILE, tdb->fd, 0);
234
235 /*
236 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
237 */
238
239 if (tdb->map_ptr == MAP_FAILED) {
240 tdb->map_ptr = NULL;
241 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n",
242 tdb->map_size, strerror(errno)));
243 }
244 } else {
245 tdb->map_ptr = NULL;
246 }
247#else
248 tdb->map_ptr = NULL;
249#endif
250}
251
252/* expand a file. we prefer to use ftruncate, as that is what posix
253 says to use for mmap expansion */
254static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
255{
256 char buf[8192];
257
258 if (tdb->read_only || tdb->traverse_read) {
259 tdb->ecode = TDB_ERR_RDONLY;
260 return -1;
261 }
262
263 if (ftruncate(tdb->fd, size+addition) == -1) {
264 char b = 0;
265 ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
266 if (written == 0) {
267 /* try once more, potentially revealing errno */
268 written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
269 }
270 if (written == 0) {
271 /* again - give up, guessing errno */
272 errno = ENOSPC;
273 }
274 if (written != 1) {
275 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n",
276 size+addition, strerror(errno)));
277 return -1;
278 }
279 }
280
281 /* now fill the file with something. This ensures that the
282 file isn't sparse, which would be very bad if we ran out of
283 disk. This must be done with write, not via mmap */
284 memset(buf, TDB_PAD_BYTE, sizeof(buf));
285 while (addition) {
286 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
287 ssize_t written = pwrite(tdb->fd, buf, n, size);
288 if (written == 0) {
289 /* prevent infinite loops: try _once_ more */
290 written = pwrite(tdb->fd, buf, n, size);
291 }
292 if (written == 0) {
293 /* give up, trying to provide a useful errno */
294 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
295 "returned 0 twice: giving up!\n"));
296 errno = ENOSPC;
297 return -1;
298 } else if (written == -1) {
299 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
300 "%d bytes failed (%s)\n", (int)n,
301 strerror(errno)));
302 return -1;
303 } else if (written != n) {
304 TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
305 "only %d of %d bytes - retrying\n", (int)written,
306 (int)n));
307 }
308 addition -= written;
309 size += written;
310 }
311 return 0;
312}
313
314
315/* expand the database at least size bytes by expanding the underlying
316 file and doing the mmap again if necessary */
317int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
318{
319 struct list_struct rec;
320 tdb_off_t offset, new_size;
321
322 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
323 TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
324 return -1;
325 }
326
327 /* must know about any previous expansions by another process */
328 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
329
330 /* always make room for at least 100 more records, and at
331 least 25% more space. Round the database up to a multiple
332 of the page size */
333 new_size = MAX(tdb->map_size + size*100, tdb->map_size * 1.25);
334 size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size;
335
336 if (!(tdb->flags & TDB_INTERNAL))
337 tdb_munmap(tdb);
338
339 /*
340 * We must ensure the file is unmapped before doing this
341 * to ensure consistency with systems like OpenBSD where
342 * writes and mmaps are not consistent.
343 */
344
345 /* expand the file itself */
346 if (!(tdb->flags & TDB_INTERNAL)) {
347 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
348 goto fail;
349 }
350
351 tdb->map_size += size;
352
353 if (tdb->flags & TDB_INTERNAL) {
354 char *new_map_ptr = (char *)realloc(tdb->map_ptr,
355 tdb->map_size);
356 if (!new_map_ptr) {
357 tdb->map_size -= size;
358 goto fail;
359 }
360 tdb->map_ptr = new_map_ptr;
361 } else {
362 /*
363 * We must ensure the file is remapped before adding the space
364 * to ensure consistency with systems like OpenBSD where
365 * writes and mmaps are not consistent.
366 */
367
368 /* We're ok if the mmap fails as we'll fallback to read/write */
369 tdb_mmap(tdb);
370 }
371
372 /* form a new freelist record */
373 memset(&rec,'\0',sizeof(rec));
374 rec.rec_len = size - sizeof(rec);
375
376 /* link it into the free list */
377 offset = tdb->map_size - size;
378 if (tdb_free(tdb, offset, &rec) == -1)
379 goto fail;
380
381 tdb_unlock(tdb, -1, F_WRLCK);
382 return 0;
383 fail:
384 tdb_unlock(tdb, -1, F_WRLCK);
385 return -1;
386}
387
388/* read/write a tdb_off_t */
389int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
390{
391 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
392}
393
394int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
395{
396 tdb_off_t off = *d;
397 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
398}
399
400
401/* read a lump of data, allocating the space for it */
402unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
403{
404 unsigned char *buf;
405
406 /* some systems don't like zero length malloc */
407 if (len == 0) {
408 len = 1;
409 }
410
411 if (!(buf = (unsigned char *)malloc(len))) {
412 /* Ensure ecode is set for log fn. */
413 tdb->ecode = TDB_ERR_OOM;
414 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
415 len, strerror(errno)));
416 return TDB_ERRCODE(TDB_ERR_OOM, buf);
417 }
418 if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
419 SAFE_FREE(buf);
420 return NULL;
421 }
422 return buf;
423}
424
425/* Give a piece of tdb data to a parser */
426
427int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
428 tdb_off_t offset, tdb_len_t len,
429 int (*parser)(TDB_DATA key, TDB_DATA data,
430 void *private_data),
431 void *private_data)
432{
433 TDB_DATA data;
434 int result;
435
436 data.dsize = len;
437
438 if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
439 /*
440 * Optimize by avoiding the malloc/memcpy/free, point the
441 * parser directly at the mmap area.
442 */
443 if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) {
444 return -1;
445 }
446 data.dptr = offset + (unsigned char *)tdb->map_ptr;
447 return parser(key, data, private_data);
448 }
449
450 if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
451 return -1;
452 }
453
454 result = parser(key, data, private_data);
455 free(data.dptr);
456 return result;
457}
458
459/* read/write a record */
460int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
461{
462 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
463 return -1;
464 if (TDB_BAD_MAGIC(rec)) {
465 /* Ensure ecode is set for log fn. */
466 tdb->ecode = TDB_ERR_CORRUPT;
467 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
468 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
469 }
470 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
471}
472
473int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
474{
475 struct list_struct r = *rec;
476 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
477}
478
479static const struct tdb_methods io_methods = {
480 tdb_read,
481 tdb_write,
482 tdb_next_hash_chain,
483 tdb_oob,
484 tdb_expand_file,
485 tdb_brlock
486};
487
488/*
489 initialise the default methods table
490*/
491void tdb_io_init(struct tdb_context *tdb)
492{
493 tdb->methods = &io_methods;
494}
Note: See TracBrowser for help on using the repository browser.