source: branches/samba-3.0/source/tdb/common/io.c

Last change on this file was 165, checked in by Paul Smedley, 16 years ago

Add 'missing' 3.0.34 diffs

File size: 13.3 KB
Line 
1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
13
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 2 of the License, or (at your option) any later version.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
23
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27*/
28
29
30#include "tdb_private.h"
31
32/* check for an out of bounds access - if it is out of bounds then
33 see if the database has been expanded by someone else and expand
34 if necessary
35 note that "len" is the minimum length needed for the db
36*/
37static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
38{
39 struct stat st;
40 if (len <= tdb->map_size)
41 return 0;
42 if (tdb->flags & TDB_INTERNAL) {
43 if (!probe) {
44 /* Ensure ecode is set for log fn. */
45 tdb->ecode = TDB_ERR_IO;
46 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n",
47 (int)len, (int)tdb->map_size));
48 }
49 return TDB_ERRCODE(TDB_ERR_IO, -1);
50 }
51
52 if (fstat(tdb->fd, &st) == -1) {
53 return TDB_ERRCODE(TDB_ERR_IO, -1);
54 }
55
56 if (st.st_size < (size_t)len) {
57 if (!probe) {
58 /* Ensure ecode is set for log fn. */
59 tdb->ecode = TDB_ERR_IO;
60 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n",
61 (int)len, (int)st.st_size));
62 }
63 return TDB_ERRCODE(TDB_ERR_IO, -1);
64 }
65
66 /* Unmap, update size, remap */
67 if (tdb_munmap(tdb) == -1)
68 return TDB_ERRCODE(TDB_ERR_IO, -1);
69 tdb->map_size = st.st_size;
70 tdb_mmap(tdb);
71 return 0;
72}
73
74/* write a lump of data at a specified offset */
75static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
76 const void *buf, tdb_len_t len)
77{
78 if (len == 0) {
79 return 0;
80 }
81
82 if (tdb->read_only || tdb->traverse_read) {
83 tdb->ecode = TDB_ERR_RDONLY;
84 return -1;
85 }
86
87 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
88 return -1;
89
90#ifdef __OS2__
91 // YD we must upgrade read locks to write locks (exclusive), otherwise
92 // the owner (us) is not allowed to write to the file (different from unix)
93 TDB_LOG((tdb, TDB_DEBUG_TRACE,"unlocking at %d len=%d before writing.\n", off, len));
94 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, 1);
95 // if a wider previous lock is in effect, we cannot write lock our segment
96 // (e.g. a lock_upgrade locks all the file), so we hope the previous lock
97 // is a write lock: do not wait for lock.
98 tdb_brlock( tdb, off, F_WRLCK, F_SETLK, 0, len);
99#endif
100
101 if (tdb->map_ptr) {
102 memcpy(off + (char *)tdb->map_ptr, buf, len);
103 } else {
104 ssize_t written = pwrite(tdb->fd, buf, len, off);
105 if ((written != (ssize_t)len) && (written != -1)) {
106 /* try once more */
107 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
108 "%d of %d bytes at %d, trying once more\n",
109 (uint32_t)written, len, off));
110 errno = ENOSPC;
111 written = pwrite(tdb->fd, (void *)((char *)buf+written),
112 len-written,
113 off+written);
114 }
115 if (written == -1) {
116 /* Ensure ecode is set for log fn. */
117 tdb->ecode = TDB_ERR_IO;
118 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
119 "len=%d (%s)\n", off, len, strerror(errno)));
120 return TDB_ERRCODE(TDB_ERR_IO, -1);
121 } else if (written != (ssize_t)len) {
122 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
123 "write %d bytes at %d in two attempts\n",
124 len, off));
125 errno = ENOSPC;
126#ifdef __OS2__
127 // remove our lock
128 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, len);
129#endif
130 return TDB_ERRCODE(TDB_ERR_IO, -1);
131 }
132 }
133#ifdef __OS2__
134 // remove our lock
135 tdb_brlock( tdb, off, F_UNLCK, F_SETLK, 0, len);
136#endif
137 return 0;
138}
139
140/* Endian conversion: we only ever deal with 4 byte quantities */
141void *tdb_convert(void *buf, u32 size)
142{
143 u32 i, *p = (u32 *)buf;
144 for (i = 0; i < size / 4; i++)
145 p[i] = TDB_BYTEREV(p[i]);
146 return buf;
147}
148
149
150/* read a lump of data at a specified offset, maybe convert */
151static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
152 tdb_len_t len, int cv)
153{
154 if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
155 return -1;
156 }
157
158 if (tdb->map_ptr) {
159 memcpy(buf, off + (char *)tdb->map_ptr, len);
160 } else {
161 ssize_t ret = pread(tdb->fd, buf, len, off);
162 if (ret != (ssize_t)len) {
163 /* Ensure ecode is set for log fn. */
164 tdb->ecode = TDB_ERR_IO;
165 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
166 "len=%d ret=%d (%s) map_size=%d\n",
167 (int)off, (int)len, (int)ret, strerror(errno),
168 (int)tdb->map_size));
169 return TDB_ERRCODE(TDB_ERR_IO, -1);
170 }
171 }
172 if (cv) {
173 tdb_convert(buf, len);
174 }
175 return 0;
176}
177
178
179
180/*
181 do an unlocked scan of the hash table heads to find the next non-zero head. The value
182 will then be confirmed with the lock held
183*/
184static void tdb_next_hash_chain(struct tdb_context *tdb, u32 *chain)
185{
186 u32 h = *chain;
187 if (tdb->map_ptr) {
188 for (;h < tdb->header.hash_size;h++) {
189 if (0 != *(u32 *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
190 break;
191 }
192 }
193 } else {
194 u32 off=0;
195 for (;h < tdb->header.hash_size;h++) {
196 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
197 break;
198 }
199 }
200 }
201 (*chain) = h;
202}
203
204
205int tdb_munmap(struct tdb_context *tdb)
206{
207 if (tdb->flags & TDB_INTERNAL)
208 return 0;
209
210#ifdef HAVE_MMAP
211 if (tdb->map_ptr) {
212 int ret;
213
214 ret = munmap(tdb->map_ptr, tdb->map_size);
215 if (ret != 0)
216 return ret;
217 }
218#endif
219 tdb->map_ptr = NULL;
220 return 0;
221}
222
223void tdb_mmap(struct tdb_context *tdb)
224{
225 if (tdb->flags & TDB_INTERNAL)
226 return;
227
228#ifdef HAVE_MMAP
229 if (!(tdb->flags & TDB_NOMMAP)) {
230 tdb->map_ptr = mmap(NULL, tdb->map_size,
231 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
232 MAP_SHARED|MAP_FILE, tdb->fd, 0);
233
234 /*
235 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
236 */
237
238 if (tdb->map_ptr == MAP_FAILED) {
239 tdb->map_ptr = NULL;
240 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n",
241 tdb->map_size, strerror(errno)));
242 }
243 } else {
244 tdb->map_ptr = NULL;
245 }
246#else
247 tdb->map_ptr = NULL;
248#endif
249}
250
251/* expand a file. we prefer to use ftruncate, as that is what posix
252 says to use for mmap expansion */
253static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
254{
255 char buf[1024];
256
257 if (tdb->read_only || tdb->traverse_read) {
258 tdb->ecode = TDB_ERR_RDONLY;
259 return -1;
260 }
261
262 if (ftruncate(tdb->fd, size+addition) == -1) {
263 char b = 0;
264 ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
265 if (written == 0) {
266 /* try once more, potentially revealing errno */
267 written = pwrite(tdb->fd, &b, 1, (size+addition) - 1);
268 }
269 if (written == 0) {
270 /* again - give up, guessing errno */
271 errno = ENOSPC;
272 }
273 if (written != 1) {
274 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n",
275 size+addition, strerror(errno)));
276 return -1;
277 }
278 }
279
280 /* now fill the file with something. This ensures that the
281 file isn't sparse, which would be very bad if we ran out of
282 disk. This must be done with write, not via mmap */
283 memset(buf, TDB_PAD_BYTE, sizeof(buf));
284 while (addition) {
285 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
286 ssize_t written = pwrite(tdb->fd, buf, n, size);
287 if (written == 0) {
288 /* prevent infinite loops: try _once_ more */
289 written = pwrite(tdb->fd, buf, n, size);
290 }
291 if (written == 0) {
292 /* give up, trying to provide a useful errno */
293 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
294 "returned 0 twice: giving up!\n"));
295 errno = ENOSPC;
296 return -1;
297 } else if (written == -1) {
298 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
299 "%d bytes failed (%s)\n", (uint32_t)n, strerror(errno)));
300 return -1;
301 } else if (written != n) {
302 TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
303 "only %d of %d bytes - retrying\n",
304 (uint32_t)written, (uint32_t)n));
305 }
306 addition -= written;
307 size += written;
308 }
309 return 0;
310}
311
312
313/* expand the database at least size bytes by expanding the underlying
314 file and doing the mmap again if necessary */
315int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
316{
317 struct list_struct rec;
318 tdb_off_t offset;
319
320 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
321 TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
322 return -1;
323 }
324
325 /* must know about any previous expansions by another process */
326 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
327
328 /* always make room for at least 10 more records, and round
329 the database up to a multiple of the page size */
330 size = TDB_ALIGN(tdb->map_size + size*10, tdb->page_size) - tdb->map_size;
331
332 if (!(tdb->flags & TDB_INTERNAL))
333 tdb_munmap(tdb);
334
335 /*
336 * We must ensure the file is unmapped before doing this
337 * to ensure consistency with systems like OpenBSD where
338 * writes and mmaps are not consistent.
339 */
340
341 /* expand the file itself */
342 if (!(tdb->flags & TDB_INTERNAL)) {
343 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
344 goto fail;
345 }
346
347 tdb->map_size += size;
348
349 if (tdb->flags & TDB_INTERNAL) {
350 char *new_map_ptr = (char *)realloc(tdb->map_ptr,
351 tdb->map_size);
352 if (!new_map_ptr) {
353 tdb->map_size -= size;
354 goto fail;
355 }
356 tdb->map_ptr = new_map_ptr;
357 } else {
358 /*
359 * We must ensure the file is remapped before adding the space
360 * to ensure consistency with systems like OpenBSD where
361 * writes and mmaps are not consistent.
362 */
363
364 /* We're ok if the mmap fails as we'll fallback to read/write */
365 tdb_mmap(tdb);
366 }
367
368 /* form a new freelist record */
369 memset(&rec,'\0',sizeof(rec));
370 rec.rec_len = size - sizeof(rec);
371
372 /* link it into the free list */
373 offset = tdb->map_size - size;
374 if (tdb_free(tdb, offset, &rec) == -1)
375 goto fail;
376
377 tdb_unlock(tdb, -1, F_WRLCK);
378 return 0;
379 fail:
380 tdb_unlock(tdb, -1, F_WRLCK);
381 return -1;
382}
383
384/* read/write a tdb_off_t */
385int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
386{
387 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
388}
389
390int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
391{
392 tdb_off_t off = *d;
393 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
394}
395
396
397/* read a lump of data, allocating the space for it */
398char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
399{
400 char *buf;
401
402 /* some systems don't like zero length malloc */
403 if (len == 0) {
404 len = 1;
405 }
406
407 if (!(buf = (char *)malloc(len))) {
408 /* Ensure ecode is set for log fn. */
409 tdb->ecode = TDB_ERR_OOM;
410 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
411 len, strerror(errno)));
412 return TDB_ERRCODE(TDB_ERR_OOM, buf);
413 }
414 if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
415 SAFE_FREE(buf);
416 return NULL;
417 }
418 return buf;
419}
420
421/* Give a piece of tdb data to a parser */
422
423int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
424 tdb_off_t offset, tdb_len_t len,
425 int (*parser)(TDB_DATA key, TDB_DATA data,
426 void *private_data),
427 void *private_data)
428{
429 TDB_DATA data;
430 int result;
431
432 data.dsize = len;
433
434 if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
435 /*
436 * Optimize by avoiding the malloc/memcpy/free, point the
437 * parser directly at the mmap area.
438 */
439 if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) {
440 return -1;
441 }
442 data.dptr = offset + (char *)tdb->map_ptr;
443 return parser(key, data, private_data);
444 }
445
446 if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
447 return -1;
448 }
449
450 result = parser(key, data, private_data);
451 free(data.dptr);
452 return result;
453}
454
455/* read/write a record */
456int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
457{
458 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
459 return -1;
460 if (TDB_BAD_MAGIC(rec)) {
461 /* Ensure ecode is set for log fn. */
462 tdb->ecode = TDB_ERR_CORRUPT;
463 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
464 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
465 }
466 return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
467}
468
469int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
470{
471 struct list_struct r = *rec;
472 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
473}
474
475static const struct tdb_methods io_methods = {
476 tdb_read,
477 tdb_write,
478 tdb_next_hash_chain,
479 tdb_oob,
480 tdb_expand_file,
481 tdb_brlock
482};
483
484/*
485 initialise the default methods table
486*/
487void tdb_io_init(struct tdb_context *tdb)
488{
489 tdb->methods = &io_methods;
490}
Note: See TracBrowser for help on using the repository browser.