source: trunk-3.0/source/tdb/common/io.c@ 101

Last change on this file since 101 was 71, checked in by Paul Smedley, 18 years ago

Update source to 3.0.26a

File size: 13.5 KB
Line 
1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
13
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
23
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27*/
28
29
30#include "tdb_private.h"
31
32/* check for an out of bounds access - if it is out of bounds then
33 see if the database has been expanded by someone else and expand
34 if necessary
35 note that "len" is the minimum length needed for the db
36*/
37static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
38{
39 struct stat st;
40 if (len <= tdb->map_size)
41 return 0;
42 if (tdb->flags & TDB_INTERNAL) {
43 if (!probe) {
44 /* Ensure ecode is set for log fn. */
45 tdb->ecode = TDB_ERR_IO;
46 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n",
47 (int)len, (int)tdb->map_size));
48 }
49 return TDB_ERRCODE(TDB_ERR_IO, -1);
50 }
51
52 if (fstat(tdb->fd, &st) == -1) {
53 return TDB_ERRCODE(TDB_ERR_IO, -1);
54 }
55
56 if (st.st_size < (size_t)len) {
57 if (!probe) {
58 /* Ensure ecode is set for log fn. */
59 tdb->ecode = TDB_ERR_IO;
60 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n",
61 (int)len, (int)st.st_size));
62 }
63 return TDB_ERRCODE(TDB_ERR_IO, -1);
64 }
65
66 /* Unmap, update size, remap */
67 if (tdb_munmap(tdb) == -1)
68 return TDB_ERRCODE(TDB_ERR_IO, -1);
69 tdb->map_size = st.st_size;
70 tdb_mmap(tdb);
71 return 0;
72}
73
74/* write a lump of data at a specified offset */
75static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
76 const void *buf, tdb_len_t len)
77{
78 if (len == 0) {
79 return 0;
80 }
81
82 if (tdb->read_only || tdb->traverse_read) {
83 tdb->ecode = TDB_ERR_RDONLY;
84 return -1;
85 }
86
87 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
88 return -1;
89
90#ifdef __OS2__
91 // YD we must upgrade read locks to write locks (exclusive), otherwise
92 // the owner (us) is not allowed to write to the file (different from unix)
93 TDB_LOG((tdb, TDB_DEBUG_TRACE,"unlocking at %d len=%d before writing.\n", off, len));
94 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, 1);
95 // if a wider previous lock is in effect, we cannot write lock our segment
96 // (e.g. a lock_upgrade locks all the file), so we hope the previous lock
97 // is a write lock: do not wait for lock.
98 tdb_brlock( tdb, off, F_WRLCK, F_SETLK, 0, len);
99#endif
100
101 if (tdb->map_ptr) {
102 memcpy(off + (char *)tdb->map_ptr, buf, len);
103 } else if (pwrite(tdb->fd, buf, len, off) != (ssize_t)len) {
104 /* Ensure ecode is set for log fn. */
105 tdb->ecode = TDB_ERR_IO;
106 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d len=%d (%s)\n",
107 off, len, strerror(errno)));
108 } else {
109 ssize_t written = pwrite(tdb->fd, buf, len, off);
110 if ((written != (ssize_t)len) && (written != -1)) {
111 /* try once more */
112 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
113 "%d of %d bytes at %d, trying once more\n",
114 written, len, off));
115 errno = ENOSPC;
116 written = pwrite(tdb->fd, (void *)((char *)buf+written),
117 len-written,
118 off+written);
119 }
120 if (written == -1) {
121 /* Ensure ecode is set for log fn. */
122 tdb->ecode = TDB_ERR_IO;
123 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
124 "len=%d (%s)\n", off, len, strerror(errno)));
125 return TDB_ERRCODE(TDB_ERR_IO, -1);
126 } else if (written != (ssize_t)len) {
127 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
128 "write %d bytes at %d in two attempts\n",
129 len, off));
130 errno = ENOSPC;
131#ifdef __OS2__
132 // remove our lock
133 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, len);
134#endif
135 return TDB_ERRCODE(TDB_ERR_IO, -1);
136 }
137 }
138#ifdef __OS2__
139 // remove our lock
140 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, len);
141#endif
142 return 0;
143}
144
145/* Endian conversion: we only ever deal with 4 byte quantities */
146void *tdb_convert(void *buf, u32 size)
147{
148 u32 i, *p = (u32 *)buf;
149 for (i = 0; i < size / 4; i++)
150 p[i] = TDB_BYTEREV(p[i]);
151 return buf;
152}
153
154
155/* read a lump of data at a specified offset, maybe convert */
156static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
157 tdb_len_t len, int cv)
158{
159 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
160 return -1;
161 }
162
163 if (tdb->map_ptr) {
164 memcpy(buf, off + (char *)tdb->map_ptr, len);
165 } else {
166 ssize_t ret = pread(tdb->fd, buf, len, off);
167 if (ret != (ssize_t)len) {
168 /* Ensure ecode is set for log fn. */
169 tdb->ecode = TDB_ERR_IO;
170 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
171 "len=%d ret=%d (%s) map_size=%d\n",
172 (int)off, (int)len, (int)ret, strerror(errno),
173 (int)tdb->map_size));
174 return TDB_ERRCODE(TDB_ERR_IO, -1);
175 }
176 }
177 if (cv) {
178 tdb_convert(buf, len);
179 }
180 return 0;
181}
182
183
184
185/*
186 do an unlocked scan of the hash table heads to find the next non-zero head. The value
187 will then be confirmed with the lock held
188*/
189static void tdb_next_hash_chain(struct tdb_context *tdb, u32 *chain)
190{
191 u32 h = *chain;
192 if (tdb->map_ptr) {
193 for (;h < tdb->header.hash_size;h++) {
194 if (0 != *(u32 *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
195 break;
196 }
197 }
198 } else {
199 u32 off=0;
200 for (;h < tdb->header.hash_size;h++) {
201 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
202 break;
203 }
204 }
205 }
206 (*chain) = h;
207}
208
209
210int tdb_munmap(struct tdb_context *tdb)
211{
212 if (tdb->flags & TDB_INTERNAL)
213 return 0;
214
215#ifdef HAVE_MMAP
216 if (tdb->map_ptr) {
217 int ret = munmap(tdb->map_ptr, tdb->map_size);
218 if (ret != 0)
219 return ret;
220 }
221#endif
222 tdb->map_ptr = NULL;
223 return 0;
224}
225
226void tdb_mmap(struct tdb_context *tdb)
227{
228 if (tdb->flags & TDB_INTERNAL)
229 return;
230
231#ifdef HAVE_MMAP
232 if (!(tdb->flags & TDB_NOMMAP)) {
233 tdb->map_ptr = mmap(NULL, tdb->map_size,
234 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
235 MAP_SHARED|MAP_FILE, tdb->fd, 0);
236
237 /*
238 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
239 */
240
241 if (tdb->map_ptr == MAP_FAILED) {
242 tdb->map_ptr = NULL;
243 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n",
244 tdb->map_size, strerror(errno)));
245 }
246 } else {
247 tdb->map_ptr = NULL;
248 }
249#else
250 tdb->map_ptr = NULL;
251#endif
252}
253
254/* expand a file. we prefer to use ftruncate, as that is what posix
255 says to use for mmap expansion */
256static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
257{
258 char buf[1024];
259
260 if (tdb->read_only || tdb->traverse_read) {
261 tdb->ecode = TDB_ERR_RDONLY;
262 return -1;
263 }
264
265 if (ftruncate(tdb->fd, size+addition) == -1) {
266 char b = 0;
267 ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
268 if (written == 0) {
269 /* try once more, potentially revealing errno */
270 written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
271 }
272 if (written == 0) {
273 /* again - give up, guessing errno */
274 errno = ENOSPC;
275 }
276 if (written != 1) {
277 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n",
278 size+addition, strerror(errno)));
279 return -1;
280 }
281 }
282
283 /* now fill the file with something. This ensures that the
284 file isn't sparse, which would be very bad if we ran out of
285 disk. This must be done with write, not via mmap */
286 memset(buf, TDB_PAD_BYTE, sizeof(buf));
287 while (addition) {
288 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
289 ssize_t written = pwrite(tdb->fd, buf, n, size);
290 if (written == 0) {
291 /* prevent infinite loops: try _once_ more */
292 written = pwrite(tdb->fd, buf, n, size);
293 }
294 if (written == 0) {
295 /* give up, trying to provide a useful errno */
296 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
297 "returned 0 twice: giving up!\n"));
298 errno = ENOSPC;
299 return -1;
300 } else if (written == -1) {
301 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
302 "%d bytes failed (%s)\n", n, strerror(errno)));
303 return -1;
304 } else if (written != n) {
305 TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
306 "only %d of %d bytes - retrying\n", written,n));
307 }
308 addition -= written;
309 size += written;
310 }
311 return 0;
312}
313
314
315/* expand the database at least size bytes by expanding the underlying
316 file and doing the mmap again if necessary */
317int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
318{
319 struct list_struct rec;
320 tdb_off_t offset;
321
322 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
323 TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
324 return -1;
325 }
326
327 /* must know about any previous expansions by another process */
328 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
329
330 /* always make room for at least 10 more records, and round
331 the database up to a multiple of the page size */
332 size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size;
333
334 if (!(tdb->flags & TDB_INTERNAL))
335 tdb_munmap(tdb);
336
337 /*
338 * We must ensure the file is unmapped before doing this
339 * to ensure consistency with systems like OpenBSD where
340 * writes and mmaps are not consistent.
341 */
342
343 /* expand the file itself */
344 if (!(tdb->flags & TDB_INTERNAL)) {
345 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
346 goto fail;
347 }
348
349 tdb->map_size += size;
350
351 if (tdb->flags & TDB_INTERNAL) {
352 char *new_map_ptr = (char *)realloc(tdb->map_ptr,
353 tdb->map_size);
354 if (!new_map_ptr) {
355 tdb->map_size -= size;
356 goto fail;
357 }
358 tdb->map_ptr = new_map_ptr;
359 } else {
360 /*
361 * We must ensure the file is remapped before adding the space
362 * to ensure consistency with systems like OpenBSD where
363 * writes and mmaps are not consistent.
364 */
365
366 /* We're ok if the mmap fails as we'll fallback to read/write */
367 tdb_mmap(tdb);
368 }
369
370 /* form a new freelist record */
371 memset(&rec,'\0',sizeof(rec));
372 rec.rec_len = size - sizeof(rec);
373
374 /* link it into the free list */
375 offset = tdb->map_size - size;
376 if (tdb_free(tdb, offset, &rec) == -1)
377 goto fail;
378
379 tdb_unlock(tdb, -1, F_WRLCK);
380 return 0;
381 fail:
382 tdb_unlock(tdb, -1, F_WRLCK);
383 return -1;
384}
385
386/* read/write a tdb_off_t */
387int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
388{
389 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
390}
391
392int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
393{
394 tdb_off_t off = *d;
395 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
396}
397
398
399/* read a lump of data, allocating the space for it */
400char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
401{
402 char *buf;
403
404 /* some systems don't like zero length malloc */
405 if (len == 0) {
406 len = 1;
407 }
408
409 if (!(buf = (char *)malloc(len))) {
410 /* Ensure ecode is set for log fn. */
411 tdb->ecode = TDB_ERR_OOM;
412 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
413 len, strerror(errno)));
414 return TDB_ERRCODE(TDB_ERR_OOM, buf);
415 }
416 if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
417 SAFE_FREE(buf);
418 return NULL;
419 }
420 return buf;
421}
422
423/* Give a piece of tdb data to a parser */
424
425int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
426 tdb_off_t offset, tdb_len_t len,
427 int (*parser)(TDB_DATA key, TDB_DATA data,
428 void *private_data),
429 void *private_data)
430{
431 TDB_DATA data;
432 int result;
433
434 data.dsize = len;
435
436 if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
437 /*
438 * Optimize by avoiding the malloc/memcpy/free, point the
439 * parser directly at the mmap area.
440 */
441 if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) {
442 return -1;
443 }
444 data.dptr = offset + (char *)tdb->map_ptr;
445 return parser(key, data, private_data);
446 }
447
448 if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
449 return -1;
450 }
451
452 result = parser(key, data, private_data);
453 free(data.dptr);
454 return result;
455}
456
457/* read/write a record */
458int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
459{
460 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
461 return -1;
462 if (TDB_BAD_MAGIC(rec)) {
463 /* Ensure ecode is set for log fn. */
464 tdb->ecode = TDB_ERR_CORRUPT;
465 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
466 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
467 }
468 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
469}
470
471int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
472{
473 struct list_struct r = *rec;
474 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
475}
476
477static const struct tdb_methods io_methods = {
478 tdb_read,
479 tdb_write,
480 tdb_next_hash_chain,
481 tdb_oob,
482 tdb_expand_file,
483 tdb_brlock
484};
485
486/*
487 initialise the default methods table
488*/
489void tdb_io_init(struct tdb_context *tdb)
490{
491 tdb->methods = &io_methods;
492}
Note: See TracBrowser for help on using the repository browser.