source: vendor/current/lib/tdb/common/io.c

Last change on this file was 988, checked in by Silvan Scherrer, 9 years ago

Samba Server: update vendor to version 4.4.3

File size: 16.1 KB
Line 
1 /*
2 Unix SMB/CIFS implementation.
3
4 trivial database library
5
6 Copyright (C) Andrew Tridgell 1999-2005
7 Copyright (C) Paul `Rusty' Russell 2000
8 Copyright (C) Jeremy Allison 2000-2003
9
10 ** NOTE! The following LGPL license applies to the tdb
11 ** library. This does NOT imply that all of Samba is released
12 ** under the LGPL
13
14 This library is free software; you can redistribute it and/or
15 modify it under the terms of the GNU Lesser General Public
16 License as published by the Free Software Foundation; either
17 version 3 of the License, or (at your option) any later version.
18
19 This library is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 Lesser General Public License for more details.
23
24 You should have received a copy of the GNU Lesser General Public
25 License along with this library; if not, see <http://www.gnu.org/licenses/>.
26*/
27
28
29#include "tdb_private.h"
30
31/*
32 * We prepend the mutex area, so fixup offsets. See mutex.c for details.
33 * tdb->hdr_ofs is 0 or header.mutex_size.
34 *
35 * Note: that we only have the 4GB limit of tdb_off_t for
36 * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs!
37 */
38
39static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off)
40{
41 off_t tmp = tdb->hdr_ofs + *off;
42
43 if ((tmp < tdb->hdr_ofs) || (tmp < *off)) {
44 errno = EIO;
45 return false;
46 }
47
48 *off = tmp;
49 return true;
50}
51
52static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf,
53 size_t count, off_t offset)
54{
55 if (!tdb_adjust_offset(tdb, &offset)) {
56 return -1;
57 }
58 return pwrite(tdb->fd, buf, count, offset);
59}
60
61static ssize_t tdb_pread(struct tdb_context *tdb, void *buf,
62 size_t count, off_t offset)
63{
64 if (!tdb_adjust_offset(tdb, &offset)) {
65 return -1;
66 }
67 return pread(tdb->fd, buf, count, offset);
68}
69
70static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
71{
72 if (!tdb_adjust_offset(tdb, &length)) {
73 return -1;
74 }
75 return ftruncate(tdb->fd, length);
76}
77
78static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
79{
80 int ret;
81
82 ret = fstat(tdb->fd, buf);
83 if (ret == -1) {
84 return -1;
85 }
86
87 if (buf->st_size < tdb->hdr_ofs) {
88 errno = EIO;
89 return -1;
90 }
91 buf->st_size -= tdb->hdr_ofs;
92
93 return ret;
94}
95
96/* check for an out of bounds access - if it is out of bounds then
97 see if the database has been expanded by someone else and expand
98 if necessary
99*/
100static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
101 int probe)
102{
103 struct stat st;
104 if (len + off < len) {
105 if (!probe) {
106 /* Ensure ecode is set for log fn. */
107 tdb->ecode = TDB_ERR_IO;
108 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob off %u len %u wrap\n",
109 off, len));
110 }
111 return -1;
112 }
113
114 if (off + len <= tdb->map_size)
115 return 0;
116 if (tdb->flags & TDB_INTERNAL) {
117 if (!probe) {
118 /* Ensure ecode is set for log fn. */
119 tdb->ecode = TDB_ERR_IO;
120 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond internal malloc size %u\n",
121 (int)(off + len), (int)tdb->map_size));
122 }
123 return -1;
124 }
125
126 if (tdb_fstat(tdb, &st) == -1) {
127 tdb->ecode = TDB_ERR_IO;
128 return -1;
129 }
130
131 /* Beware >4G files! */
132 if ((tdb_off_t)st.st_size != st.st_size) {
133 /* Ensure ecode is set for log fn. */
134 tdb->ecode = TDB_ERR_IO;
135 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_oob len %llu too large!\n",
136 (long long)st.st_size));
137 return -1;
138 }
139
140 /* Unmap, update size, remap. We do this unconditionally, to handle
141 * the unusual case where the db is truncated.
142 *
143 * This can happen to a child using tdb_reopen_all(true) on a
144 * TDB_CLEAR_IF_FIRST tdb whose parent crashes: the next
145 * opener will truncate the database. */
146 if (tdb_munmap(tdb) == -1) {
147 tdb->ecode = TDB_ERR_IO;
148 return -1;
149 }
150 tdb->map_size = st.st_size;
151 if (tdb_mmap(tdb) != 0) {
152 return -1;
153 }
154
155 if (st.st_size < (size_t)off + len) {
156 if (!probe) {
157 /* Ensure ecode is set for log fn. */
158 tdb->ecode = TDB_ERR_IO;
159 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond eof at %u\n",
160 (int)(off + len), (int)st.st_size));
161 }
162 return -1;
163 }
164 return 0;
165}
166
167/* write a lump of data at a specified offset */
168static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
169 const void *buf, tdb_len_t len)
170{
171 if (len == 0) {
172 return 0;
173 }
174
175 if (tdb->read_only || tdb->traverse_read) {
176 tdb->ecode = TDB_ERR_RDONLY;
177 return -1;
178 }
179
180 if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0)
181 return -1;
182
183 if (tdb->map_ptr) {
184 memcpy(off + (char *)tdb->map_ptr, buf, len);
185 } else {
186#ifdef HAVE_INCOHERENT_MMAP
187 tdb->ecode = TDB_ERR_IO;
188 return -1;
189#else
190 ssize_t written;
191
192 written = tdb_pwrite(tdb, buf, len, off);
193
194 if ((written != (ssize_t)len) && (written != -1)) {
195 /* try once more */
196 tdb->ecode = TDB_ERR_IO;
197 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
198 "%zi of %u bytes at %u, trying once more\n",
199 written, len, off));
200 written = tdb_pwrite(tdb, (const char *)buf+written,
201 len-written, off+written);
202 }
203 if (written == -1) {
204 /* Ensure ecode is set for log fn. */
205 tdb->ecode = TDB_ERR_IO;
206 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %u "
207 "len=%u (%s)\n", off, len, strerror(errno)));
208 return -1;
209 } else if (written != (ssize_t)len) {
210 tdb->ecode = TDB_ERR_IO;
211 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
212 "write %u bytes at %u in two attempts\n",
213 len, off));
214 return -1;
215 }
216#endif
217 }
218 return 0;
219}
220
221/* Endian conversion: we only ever deal with 4 byte quantities */
222void *tdb_convert(void *buf, uint32_t size)
223{
224 uint32_t i, *p = (uint32_t *)buf;
225 for (i = 0; i < size / 4; i++)
226 p[i] = TDB_BYTEREV(p[i]);
227 return buf;
228}
229
230
231/* read a lump of data at a specified offset, maybe convert */
232static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
233 tdb_len_t len, int cv)
234{
235 if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0) {
236 return -1;
237 }
238
239 if (tdb->map_ptr) {
240 memcpy(buf, off + (char *)tdb->map_ptr, len);
241 } else {
242#ifdef HAVE_INCOHERENT_MMAP
243 tdb->ecode = TDB_ERR_IO;
244 return -1;
245#else
246 ssize_t ret;
247
248 ret = tdb_pread(tdb, buf, len, off);
249 if (ret != (ssize_t)len) {
250 /* Ensure ecode is set for log fn. */
251 tdb->ecode = TDB_ERR_IO;
252 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %u "
253 "len=%u ret=%zi (%s) map_size=%u\n",
254 off, len, ret, strerror(errno),
255 tdb->map_size));
256 return -1;
257 }
258#endif
259 }
260 if (cv) {
261 tdb_convert(buf, len);
262 }
263 return 0;
264}
265
266
267
268/*
269 do an unlocked scan of the hash table heads to find the next non-zero head. The value
270 will then be confirmed with the lock held
271*/
272static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
273{
274 uint32_t h = *chain;
275 if (tdb->map_ptr) {
276 for (;h < tdb->hash_size;h++) {
277 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
278 break;
279 }
280 }
281 } else {
282 uint32_t off=0;
283 for (;h < tdb->hash_size;h++) {
284 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
285 break;
286 }
287 }
288 }
289 (*chain) = h;
290}
291
292
293int tdb_munmap(struct tdb_context *tdb)
294{
295 if (tdb->flags & TDB_INTERNAL)
296 return 0;
297
298#ifdef HAVE_MMAP
299 if (tdb->map_ptr) {
300 int ret;
301
302 ret = munmap(tdb->map_ptr, tdb->map_size);
303 if (ret != 0)
304 return ret;
305 }
306#endif
307 tdb->map_ptr = NULL;
308 return 0;
309}
310
311/* If mmap isn't coherent, *everyone* must always mmap. */
312static bool should_mmap(const struct tdb_context *tdb)
313{
314#ifdef HAVE_INCOHERENT_MMAP
315 return true;
316#else
317 return !(tdb->flags & TDB_NOMMAP);
318#endif
319}
320
321int tdb_mmap(struct tdb_context *tdb)
322{
323 if (tdb->flags & TDB_INTERNAL)
324 return 0;
325
326#ifdef HAVE_MMAP
327 if (should_mmap(tdb)) {
328 tdb->map_ptr = mmap(NULL, tdb->map_size,
329 PROT_READ|(tdb->read_only? 0:PROT_WRITE),
330 MAP_SHARED|MAP_FILE, tdb->fd,
331 tdb->hdr_ofs);
332
333 /*
334 * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
335 */
336
337 if (tdb->map_ptr == MAP_FAILED) {
338 tdb->map_ptr = NULL;
339 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %u (%s)\n",
340 tdb->map_size, strerror(errno)));
341#ifdef HAVE_INCOHERENT_MMAP
342 tdb->ecode = TDB_ERR_IO;
343 return -1;
344#endif
345 }
346 } else {
347 tdb->map_ptr = NULL;
348 }
349#else
350 tdb->map_ptr = NULL;
351#endif
352 return 0;
353}
354
355/* expand a file. we prefer to use ftruncate, as that is what posix
356 says to use for mmap expansion */
357static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
358{
359 char buf[8192];
360 tdb_off_t new_size;
361
362 if (tdb->read_only || tdb->traverse_read) {
363 tdb->ecode = TDB_ERR_RDONLY;
364 return -1;
365 }
366
367 if (!tdb_add_off_t(size, addition, &new_size)) {
368 tdb->ecode = TDB_ERR_OOM;
369 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
370 "overflow detected current size[%u] addition[%u]!\n",
371 (unsigned)size, (unsigned)addition));
372 errno = ENOSPC;
373 return -1;
374 }
375
376 if (tdb_ftruncate(tdb, new_size) == -1) {
377 char b = 0;
378 ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1);
379 if (written == 0) {
380 /* try once more, potentially revealing errno */
381 written = tdb_pwrite(tdb, &b, 1, new_size - 1);
382 }
383 if (written == 0) {
384 /* again - give up, guessing errno */
385 errno = ENOSPC;
386 }
387 if (written != 1) {
388 tdb->ecode = TDB_ERR_OOM;
389 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %u failed (%s)\n",
390 (unsigned)new_size, strerror(errno)));
391 return -1;
392 }
393 }
394
395 /* now fill the file with something. This ensures that the
396 file isn't sparse, which would be very bad if we ran out of
397 disk. This must be done with write, not via mmap */
398 memset(buf, TDB_PAD_BYTE, sizeof(buf));
399 while (addition) {
400 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
401 ssize_t written = tdb_pwrite(tdb, buf, n, size);
402 if (written == 0) {
403 /* prevent infinite loops: try _once_ more */
404 written = tdb_pwrite(tdb, buf, n, size);
405 }
406 if (written == 0) {
407 /* give up, trying to provide a useful errno */
408 tdb->ecode = TDB_ERR_OOM;
409 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
410 "returned 0 twice: giving up!\n"));
411 errno = ENOSPC;
412 return -1;
413 }
414 if (written == -1) {
415 tdb->ecode = TDB_ERR_OOM;
416 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
417 "%u bytes failed (%s)\n", (int)n,
418 strerror(errno)));
419 return -1;
420 }
421 if (written != n) {
422 TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
423 "only %zu of %zi bytes - retrying\n", written,
424 n));
425 }
426 addition -= written;
427 size += written;
428 }
429 return 0;
430}
431
432
433/* You need 'size', this tells you how much you should expand by. */
434tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size)
435{
436 tdb_off_t new_size, top_size, increment;
437 tdb_off_t max_size = UINT32_MAX - map_size;
438
439 if (size > max_size) {
440 /*
441 * We can't round up anymore, just give back
442 * what we're asked for.
443 *
444 * The caller has to take care of the ENOSPC handling.
445 */
446 return size;
447 }
448
449 /* limit size in order to avoid using up huge amounts of memory for
450 * in memory tdbs if an oddball huge record creeps in */
451 if (size > 100 * 1024) {
452 increment = size * 2;
453 } else {
454 increment = size * 100;
455 }
456 if (increment < size) {
457 goto overflow;
458 }
459
460 if (!tdb_add_off_t(map_size, increment, &top_size)) {
461 goto overflow;
462 }
463
464 /* always make room for at least top_size more records, and at
465 least 25% more space. if the DB is smaller than 100MiB,
466 otherwise grow it by 10% only. */
467 if (map_size > 100 * 1024 * 1024) {
468 new_size = map_size * 1.10;
469 } else {
470 new_size = map_size * 1.25;
471 }
472 if (new_size < map_size) {
473 goto overflow;
474 }
475
476 /* Round the database up to a multiple of the page size */
477 new_size = MAX(top_size, new_size);
478
479 if (new_size + page_size < new_size) {
480 /* There's a "+" in TDB_ALIGN that might overflow... */
481 goto overflow;
482 }
483
484 return TDB_ALIGN(new_size, page_size) - map_size;
485
486overflow:
487 /*
488 * Somewhere in between we went over 4GB. Make one big jump to
489 * exactly 4GB database size.
490 */
491 return max_size;
492}
493
494/* expand the database at least size bytes by expanding the underlying
495 file and doing the mmap again if necessary */
496int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
497{
498 struct tdb_record rec;
499 tdb_off_t offset;
500 tdb_off_t new_size;
501
502 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
503 TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
504 return -1;
505 }
506
507 /* must know about any previous expansions by another process */
508 tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
509
510 /*
511 * Note: that we don't care about tdb->hdr_ofs != 0 here
512 *
513 * The 4GB limitation is just related to tdb->map_size
514 * and the offset calculation in the records.
515 *
516 * The file on disk can be up to 4GB + tdb->hdr_ofs
517 */
518 size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size);
519
520 if (!tdb_add_off_t(tdb->map_size, size, &new_size)) {
521 tdb->ecode = TDB_ERR_OOM;
522 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_expand "
523 "overflow detected current map_size[%u] size[%u]!\n",
524 (unsigned)tdb->map_size, (unsigned)size));
525 goto fail;
526 }
527
528 /* form a new freelist record */
529 offset = tdb->map_size;
530 memset(&rec,'\0',sizeof(rec));
531 rec.rec_len = size - sizeof(rec);
532
533 if (tdb->flags & TDB_INTERNAL) {
534 char *new_map_ptr;
535
536 new_map_ptr = (char *)realloc(tdb->map_ptr, new_size);
537 if (!new_map_ptr) {
538 tdb->ecode = TDB_ERR_OOM;
539 goto fail;
540 }
541 tdb->map_ptr = new_map_ptr;
542 tdb->map_size = new_size;
543 } else {
544 int ret;
545
546 /*
547 * expand the file itself
548 */
549 ret = tdb->methods->tdb_expand_file(tdb, tdb->map_size, size);
550 if (ret != 0) {
551 goto fail;
552 }
553
554 /* Explicitly remap: if we're in a transaction, this won't
555 * happen automatically! */
556 tdb_munmap(tdb);
557 tdb->map_size = new_size;
558 if (tdb_mmap(tdb) != 0) {
559 goto fail;
560 }
561 }
562
563 /* link it into the free list */
564 if (tdb_free(tdb, offset, &rec) == -1)
565 goto fail;
566
567 tdb_unlock(tdb, -1, F_WRLCK);
568 return 0;
569 fail:
570 tdb_unlock(tdb, -1, F_WRLCK);
571 return -1;
572}
573
574/* read/write a tdb_off_t */
575int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
576{
577 return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
578}
579
580int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
581{
582 tdb_off_t off = *d;
583 return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
584}
585
586
587/* read a lump of data, allocating the space for it */
588unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
589{
590 unsigned char *buf;
591
592 /* some systems don't like zero length malloc */
593
594 if (!(buf = (unsigned char *)malloc(len ? len : 1))) {
595 /* Ensure ecode is set for log fn. */
596 tdb->ecode = TDB_ERR_OOM;
597 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%u (%s)\n",
598 len, strerror(errno)));
599 return NULL;
600 }
601 if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
602 SAFE_FREE(buf);
603 return NULL;
604 }
605 return buf;
606}
607
608/* Give a piece of tdb data to a parser */
609
610int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
611 tdb_off_t offset, tdb_len_t len,
612 int (*parser)(TDB_DATA key, TDB_DATA data,
613 void *private_data),
614 void *private_data)
615{
616 TDB_DATA data;
617 int result;
618
619 data.dsize = len;
620
621 if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
622 /*
623 * Optimize by avoiding the malloc/memcpy/free, point the
624 * parser directly at the mmap area.
625 */
626 if (tdb->methods->tdb_oob(tdb, offset, len, 0) != 0) {
627 return -1;
628 }
629 data.dptr = offset + (unsigned char *)tdb->map_ptr;
630 return parser(key, data, private_data);
631 }
632
633 if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
634 return -1;
635 }
636
637 result = parser(key, data, private_data);
638 free(data.dptr);
639 return result;
640}
641
642/* read/write a record */
643int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
644{
645 if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
646 return -1;
647 if (TDB_BAD_MAGIC(rec)) {
648 /* Ensure ecode is set for log fn. */
649 tdb->ecode = TDB_ERR_CORRUPT;
650 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%u\n", rec->magic, offset));
651 return -1;
652 }
653 return tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0);
654}
655
656int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
657{
658 struct tdb_record r = *rec;
659 return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
660}
661
662static const struct tdb_methods io_methods = {
663 tdb_read,
664 tdb_write,
665 tdb_next_hash_chain,
666 tdb_oob,
667 tdb_expand_file,
668};
669
670/*
671 initialise the default methods table
672*/
673void tdb_io_init(struct tdb_context *tdb)
674{
675 tdb->methods = &io_methods;
676}
Note: See TracBrowser for help on using the repository browser.