Changeset 988 for vendor/current/lib/tdb/common

vendor/current/lib/tdb/common/check.c

-              r986
+              r988
                 goto corrupt;
+        if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
+        if (hdr.rwlocks != 0 &&
+            hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC &&
+            hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC)
                 goto corrupt;
 …
                 goto corrupt;
         if (hdr.hash_size != tdb->header.hash_size)
+        if (hdr.hash_size != tdb->hash_size)
                 goto corrupt;
         if (hdr.recovery_start != 0 &&
             hdr.recovery_start < TDB_DATA_START(tdb->header.hash_size))
+            hdr.recovery_start < TDB_DATA_START(tdb->hash_size))
                 goto corrupt;
 …
         /* Check rec->next: 0 or points to record offset, aligned. */
         if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->header.hash_size)){
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                          "Record offset %d too small next %d\n",
+        if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size)){
+                TDB_LOG((tdb, TDB_DEBUG_ERROR,
+                         "Record offset %u too small next %u\n",
                          off, rec->next));
                 goto corrupt;
 …
         if (rec->next + sizeof(*rec) < rec->next) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                          "Record offset %d too large next %d\n",
+                         "Record offset %u too large next %u\n",
                          off, rec->next));
                 goto corrupt;
 …
         if ((rec->next % TDB_ALIGNMENT) != 0) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                          "Record offset %d misaligned next %d\n",
+                         "Record offset %u misaligned next %u\n",
                          off, rec->next));
                 goto corrupt;
+        }
         if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0))
+        if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0))
                 goto corrupt;
 …
         if ((rec->rec_len % TDB_ALIGNMENT) != 0) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                          "Record offset %d misaligned length %d\n",
+                         "Record offset %u misaligned length %u\n",
                          off, rec->rec_len));
                 goto corrupt;
 …
         if (rec->rec_len < sizeof(tailer)) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                          "Record offset %d too short length %d\n",
+                         "Record offset %u too short length %u\n",
                          off, rec->rec_len));
                 goto corrupt;
+        }
         /* OOB allows "right at the end" access, so this works for last rec. */
         if (tdb->methods->tdb_oob(tdb, off+sizeof(*rec)+rec->rec_len, 0))
+        if (tdb->methods->tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0))
                 goto corrupt;
 …
         if (tailer != sizeof(*rec) + rec->rec_len) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                          "Record offset %d invalid tailer\n", off));
+                         "Record offset %u invalid tailer\n", off));
                 goto corrupt;
+        }
 …
         if (rec->key_len + rec->data_len + sizeof(tdb_off_t) > rec->rec_len) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                          "Record offset %d too short for contents\n", off));
+                         "Record offset %u too short for contents\n", off));
                 return false;
+        }
 …
         if (tdb->hash_fn(&key) != rec->full_hash) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                          "Record offset %d has incorrect hash\n", off));
+                         "Record offset %u has incorrect hash\n", off));
                 goto fail_put_key;
+        }
 …
         /* Make sure we know true size of the underlying file. */
         tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
+        tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
         /* Header must be OK: also gets us the recovery ptr, if any. */
 …
         /* We should have the whole header, too. */
         if (tdb->map_size < TDB_DATA_START(tdb->header.hash_size)) {
+        if (tdb->map_size < TDB_DATA_START(tdb->hash_size)) {
                 tdb->ecode = TDB_ERR_CORRUPT;
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n"));
 …
         /* One big malloc: pointers then bit arrays. */
         hashes = (unsigned char **)calloc(
 , sizeof(hashes[0]) * (1+tdb->header.hash_size)
                         + BITMAP_BITS / CHAR_BIT * (1+tdb->header.hash_size));
+, sizeof(hashes[0]) * (1+tdb->hash_size)
+                        + BITMAP_BITS / CHAR_BIT * (1+tdb->hash_size));
         if (!hashes) {
                 tdb->ecode = TDB_ERR_OOM;
 …
         /* Initialize pointers */
         hashes[0] = (unsigned char *)(&hashes[1+tdb->header.hash_size]);
         for (h = 1; h < 1+tdb->header.hash_size; h++)
+        hashes[0] = (unsigned char *)(&hashes[1+tdb->hash_size]);
+        for (h = 1; h < 1+tdb->hash_size; h++)
                 hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT;
         /* Freelist and hash headers are all in a row: read them. */
         for (h = 0; h < 1+tdb->header.hash_size; h++) {
+        for (h = 0; h < 1+tdb->hash_size; h++) {
                 if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t),
                                  &off) == -1)
 …
         /* For each record, read it in and check it's ok. */
         for (off = TDB_DATA_START(tdb->header.hash_size);
+        for (off = TDB_DATA_START(tdb->hash_size);
              off < tdb->map_size;
              off += sizeof(rec) + rec.rec_len) {
 …
                         TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                  "Dead space at %d-%d (of %u)\n",
+                                 "Dead space at %u-%u (of %u)\n",
                                  off, off + dead, tdb->map_size));
                         rec.rec_len = dead - sizeof(rec);
 …
                         if (recovery_start != off) {
                                 TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                          "Unexpected recovery record at offset %d\n",
+                                         "Unexpected recovery record at offset %u\n",
                                          off));
                                 goto free;
 …
                         tdb->ecode = TDB_ERR_CORRUPT;
                         TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                  "Bad magic 0x%x at offset %d\n",
+                                 "Bad magic 0x%x at offset %u\n",
                                  rec.magic, off));
                         goto free;
 …
         /* Now, hashes should all be empty: each record exists and is referred
          * to by one other. */
         for (h = 0; h < 1+tdb->header.hash_size; h++) {
+        for (h = 0; h < 1+tdb->hash_size; h++) {
                 unsigned int i;
                 for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) {

vendor/current/lib/tdb/common/dump.c

-              r986
+              r988
  /*
+ /*
    Unix SMB/CIFS implementation.
 …
         tdb_off_t tailer_ofs, tailer;
         if (tdb->methods->tdb_read(tdb, offset, (char *)&rec,
+        if (tdb->methods->tdb_read(tdb, offset, (char *)&rec,
                                    sizeof(rec), DOCONV()) == -1) {
                 printf("ERROR: failed to read record at %u\n", offset);
 …
+        }
         printf(" rec: hash=%d offset=0x%08x next=0x%08x rec_len=%d "
                "key_len=%d data_len=%d full_hash=0x%x magic=0x%x\n",
+        printf(" rec: hash=%d offset=0x%08x next=0x%08x rec_len=%u "
+               "key_len=%u data_len=%u full_hash=0x%08x magic=0x%08x\n",
                hash, offset, rec.next, rec.rec_len, rec.key_len, rec.data_len,
                rec.full_hash, rec.magic);
 …
+{
         int i;
         for (i=0;i<tdb->header.hash_size;i++) {
+        for (i=0;i<tdb->hash_size;i++) {
                 tdb_dump_chain(tdb, i);
+        }
 …
         printf("freelist top=[0x%08x]\n", rec_ptr );
         while (rec_ptr) {
                 if (tdb->methods->tdb_read(tdb, rec_ptr, (char *)&rec,
+                if (tdb->methods->tdb_read(tdb, rec_ptr, (char *)&rec,
                                            sizeof(rec), DOCONV()) == -1) {
                         tdb_unlock(tdb, -1, F_WRLCK);
 …
+                }
                 printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%d)] (end = 0x%08x)\n",
+                printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%u)] (end = 0x%08x)\n",
                        rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len);
                 total_free += rec.rec_len;
 …
                 rec_ptr = rec.next;
+        }
+        printf("total rec_len = [0x%08x (%d)]\n", (int)total_free,
+               (int)total_free);
+        printf("total rec_len = [0x%08lx (%lu)]\n", total_free, total_free);
         return tdb_unlock(tdb, -1, F_WRLCK);

vendor/current/lib/tdb/common/error.c

r986	r988
1		/*
	1	/*
2	2	Unix SMB/CIFS implementation.
3	3

vendor/current/lib/tdb/common/freelist.c

-              r986
+              r988
  /*
+ /*
    Unix SMB/CIFS implementation.
 …
 /* 'right' merges can involve O(n^2) cost when combined with a
    traverse, so they are disabled until we find a way to do them in
+   traverse, so they are disabled until we find a way to do them in
    O(1) time
 */
 …
                 /* this happens when a app is showdown while deleting a record - we should
                    not completely fail when this happens */
                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%d - fixing\n",
+                TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%u - fixing\n",
                          rec->magic, off));
                 rec->magic = TDB_FREE_MAGIC;
                 if (tdb->methods->tdb_write(tdb, off, rec, sizeof(*rec)) == -1)
+                if (tdb_rec_write(tdb, off, rec) == -1)
                         return -1;
+        }
 …
                 /* Ensure ecode is set for log fn. */
                 tdb->ecode = TDB_ERR_CORRUPT;
                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%d\n",
+                TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%u\n",
                            rec->magic, off));
                 return -1;
+        }
         if (tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0) != 0)
+        if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0) != 0)
                 return -1;
         return 0;
 …
+        }
         tdb->ecode = TDB_ERR_CORRUPT;
         TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%d\n", off));
+        TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%u\n", off));
         return -1;
+}
 …
+}
+/* Add an element into the freelist. Merge adjacent records if
+   necessary. */
+/**
+ * Read the record directly on the left.
+ * Fail if there is no record on the left.
+ */
+static int read_record_on_left(struct tdb_context *tdb, tdb_off_t rec_ptr,
+                               tdb_off_t *left_p,
+                               struct tdb_record *left_r)
+{
+        tdb_off_t left_ptr;
+        tdb_off_t left_size;
+        struct tdb_record left_rec;
+        int ret;
+        left_ptr = rec_ptr - sizeof(tdb_off_t);
+        if (left_ptr <= TDB_DATA_START(tdb->hash_size)) {
+                /* no record on the left */
+                return -1;
+        }
+        /* Read in tailer and jump back to header */
+        ret = tdb_ofs_read(tdb, left_ptr, &left_size);
+        if (ret == -1) {
+                TDB_LOG((tdb, TDB_DEBUG_FATAL,
+                        "tdb_free: left offset read failed at %u\n", left_ptr));
+                return -1;
+        }
+        /* it could be uninitialised data */
+        if (left_size == 0 || left_size == TDB_PAD_U32) {
+                return -1;
+        }
+        if (left_size > rec_ptr) {
+                return -1;
+        }
+        left_ptr = rec_ptr - left_size;
+        if (left_ptr < TDB_DATA_START(tdb->hash_size)) {
+                return -1;
+        }
+        /* Now read in the left record */
+        ret = tdb->methods->tdb_read(tdb, left_ptr, &left_rec,
+                                     sizeof(left_rec), DOCONV());
+        if (ret == -1) {
+                TDB_LOG((tdb, TDB_DEBUG_FATAL,
+                         "tdb_free: left read failed at %u (%u)\n",
+                         left_ptr, left_size));
+                return -1;
+        }
+        *left_p = left_ptr;
+        *left_r = left_rec;
+        return 0;
+}
+/**
+ * Merge new freelist record with the direct left neighbour.
+ * This assumes that left_rec represents the record
+ * directly to the left of right_rec and that this is
+ * a freelist record.
+ */
+static int merge_with_left_record(struct tdb_context *tdb,
+                                  tdb_off_t left_ptr,
+                                  struct tdb_record *left_rec,
+                                  struct tdb_record *right_rec)
+{
+        int ret;
+        left_rec->rec_len += sizeof(*right_rec) + right_rec->rec_len;
+        ret = tdb_rec_write(tdb, left_ptr, left_rec);
+        if (ret == -1) {
+                TDB_LOG((tdb, TDB_DEBUG_FATAL,
+                         "merge_with_left_record: update_left failed at %u\n",
+                         left_ptr));
+                return -1;
+        }
+        ret = update_tailer(tdb, left_ptr, left_rec);
+        if (ret == -1) {
+                TDB_LOG((tdb, TDB_DEBUG_FATAL,
+                         "merge_with_left_record: update_tailer failed at %u\n",
+                         left_ptr));
+                return -1;
+        }
+        return 0;
+}
+/**
+ * Check whether the record left of a given freelist record is
+ * also a freelist record, and if so, merge the two records.
+ *
+ * Return code:
+ *  -1 upon error
+ *   0 if left was not a free record
+ *   1 if left was free and successfully merged.
+ *
+ * The currend record is handed in with pointer and fully read record.
+ *
+ * The left record pointer and struct can be retrieved as result
+ * in lp and lr;
+ */
+static int check_merge_with_left_record(struct tdb_context *tdb,
+                                        tdb_off_t rec_ptr,
+                                        struct tdb_record *rec,
+                                        tdb_off_t *lp,
+                                        struct tdb_record *lr)
+{
+        tdb_off_t left_ptr;
+        struct tdb_record left_rec;
+        int ret;
+        ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec);
+        if (ret != 0) {
+                return 0;
+        }
+        if (left_rec.magic != TDB_FREE_MAGIC) {
+                return 0;
+        }
+        /* It's free - expand to include it. */
+        ret = merge_with_left_record(tdb, left_ptr, &left_rec, rec);
+        if (ret != 0) {
+                return -1;
+        }
+        if (lp != NULL) {
+                *lp = left_ptr;
+        }
+        if (lr != NULL) {
+                *lr = left_rec;
+        }
+        return 1;
+}
+/**
+ * Check whether the record left of a given freelist record is
+ * also a freelist record, and if so, merge the two records.
+ *
+ * Return code:
+ *  -1 upon error
+ *   0 if left was not a free record
+ *   1 if left was free and successfully merged.
+ *
+ * In this variant, the input record is specified just as the pointer
+ * and is read from the database if needed.
+ *
+ * next_ptr will contain the original record's next pointer after
+ * successful merging (which will be lost after merging), so that
+ * the caller can update the last pointer.
+ */
+static int check_merge_ptr_with_left_record(struct tdb_context *tdb,
+                                            tdb_off_t rec_ptr,
+                                            tdb_off_t *next_ptr)
+{
+        tdb_off_t left_ptr;
+        struct tdb_record rec, left_rec;
+        int ret;
+        ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec);
+        if (ret != 0) {
+                return 0;
+        }
+        if (left_rec.magic != TDB_FREE_MAGIC) {
+                return 0;
+        }
+        /* It's free - expand to include it. */
+        ret = tdb->methods->tdb_read(tdb, rec_ptr, &rec,
+                                     sizeof(rec), DOCONV());
+        if (ret != 0) {
+                return -1;
+        }
+        ret = merge_with_left_record(tdb, left_ptr, &left_rec, &rec);
+        if (ret != 0) {
+                return -1;
+        }
+        if (next_ptr != NULL) {
+                *next_ptr = rec.next;
+        }
+        return 1;
+}
+/**
+ * Add an element into the freelist.
+ *
+ * We merge the new record into the left record if it is also a
+ * free record, but not with the right one. This makes the
+ * operation O(1) instead of O(n): merging with the right record
+ * requires a traverse of the freelist to find the previous
+ * record in the free list.
+ *
+ * This prevents db traverses from being O(n^2) after a lot of deletes.
+ */
 int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec)
+{
+        int ret;
         /* Allocation and tailer lock */
         if (tdb_lock(tdb, -1, F_WRLCK) != 0)
 …
 #endif
+        /* Look left */
+        if (offset - sizeof(tdb_off_t) > TDB_DATA_START(tdb->header.hash_size)) {
+                tdb_off_t left = offset - sizeof(tdb_off_t);
+                struct tdb_record l;
+                tdb_off_t leftsize;
+                /* Read in tailer and jump back to header */
+                if (tdb_ofs_read(tdb, left, &leftsize) == -1) {
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left));
+                        goto update;
+                }
+                /* it could be uninitialised data */
+                if (leftsize == 0 || leftsize == TDB_PAD_U32) {
+                        goto update;
+                }
+                left = offset - leftsize;
+                if (leftsize > offset ||
+                    left < TDB_DATA_START(tdb->header.hash_size)) {
+                        goto update;
+                }
+                /* Now read in the left record */
+                if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) {
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize));
+                        goto update;
+                }
+                /* If it's free, expand to include it. */
+                if (l.magic == TDB_FREE_MAGIC) {
+                        /* we now merge the new record into the left record, rather than the other
+                           way around. This makes the operation O(1) instead of O(n). This change
+                           prevents traverse from being O(n^2) after a lot of deletes */
+                        l.rec_len += sizeof(*rec) + rec->rec_len;
+                        if (tdb_rec_write(tdb, left, &l) == -1) {
+                                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_left failed at %u\n", left));
+                                goto fail;
+                        }
+                        if (update_tailer(tdb, left, &l) == -1) {
+                                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset));
+                                goto fail;
+                        }
+                        tdb_unlock(tdb, -1, F_WRLCK);
+                        return 0;
+                }
+        }
+update:
+        /* Now, prepend to free list */
+        ret = check_merge_with_left_record(tdb, offset, rec, NULL, NULL);
+        if (ret == -1) {
+                goto fail;
+        }
+        if (ret == 1) {
+                /* merged */
+                goto done;
+        }
+        /* Nothing to merge, prepend to free list */
         rec->magic = TDB_FREE_MAGIC;
 …
             tdb_rec_write(tdb, offset, rec) == -1 ||
             tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%d\n", offset));
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%u\n", offset));
                 goto fail;
+        }
+done:
         /* And we're done. */
         tdb_unlock(tdb, -1, F_WRLCK);
 …
 /*
+/*
    the core of tdb_allocate - called when we have decided which
    free list entry to use
 …
    able to free up the record without fragmentation
  */
 static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb,
+static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb,
                                   tdb_len_t length, tdb_off_t rec_ptr,
                                   struct tdb_record *rec, tdb_off_t last_ptr)
 …
         /* and setup the new record */
         rec_ptr += sizeof(*rec) + rec->rec_len;
+        rec_ptr += sizeof(*rec) + rec->rec_len;
         memset(rec, '\0', sizeof(*rec));
 …
 is returned if the space could not be allocated
  */
+tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec)
+static tdb_off_t tdb_allocate_from_freelist(
+        struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec)
+{
         tdb_off_t rec_ptr, last_ptr, newrec_ptr;
 …
         } bestfit;
         float multiplier = 1.0;
+        if (tdb_lock(tdb, -1, F_WRLCK) == -1)
+                return 0;
+        bool merge_created_candidate;
         /* over-allocate to reduce fragmentation */
 …
  again:
+        merge_created_candidate = false;
         last_ptr = FREELIST_TOP;
         /* read in the freelist top */
         if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1)
                 goto fail;
+                return 0;
         bestfit.rec_ptr = 0;
 …
         bestfit.rec_len = 0;
         /*
+        /*
            this is a best fit allocation strategy. Originally we used
            a first fit strategy, but it suffered from massive fragmentation
 …
          */
         while (rec_ptr) {
+                int ret;
+                tdb_off_t left_ptr;
+                struct tdb_record left_rec;
                 if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) {
+                        goto fail;
+                        return 0;
+                }
+                ret = check_merge_with_left_record(tdb, rec_ptr, rec,
+                                                   &left_ptr, &left_rec);
+                if (ret == -1) {
+                        return 0;
+                }
+                if (ret == 1) {
+                        /* merged */
+                        rec_ptr = rec->next;
+                        ret = tdb_ofs_write(tdb, last_ptr, &rec->next);
+                        if (ret == -1) {
+                                return 0;
+                        }
+                        /*
+                         * We have merged the current record into the left
+                         * neighbour. So our traverse of the freelist will
+                         * skip it and consider the next record in the chain.
+                         *
+                         * But the enlarged left neighbour may be a candidate.
+                         * If it is, we can not directly use it, though.
+                         * The only thing we can do and have to do here is to
+                         * update the current best fit size in the chain if the
+                         * current best fit is the left record. (By that we may
+                         * worsen the best fit we already had, bit this is not a
+                         * problem.)
+                         *
+                         * If the current best fit is not the left record,
+                         * all we can do is remember the fact that a merge
+                         * created a new candidate so that we can trigger
+                         * a second walk of the freelist if at the end of
+                         * the first walk we have not found any fit.
+                         * This way we can avoid expanding the database.
+                         */
+                        if (bestfit.rec_ptr == left_ptr) {
+                                bestfit.rec_len = left_rec.rec_len;
+                        }
+                        if (left_rec.rec_len > length) {
+                                merge_created_candidate = true;
+                        }
+                        continue;
+                }
 …
         if (bestfit.rec_ptr != 0) {
                 if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) {
                         goto fail;
+                }
                 newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr,
+                        return 0;
+                }
+                newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr,
                                               rec, bestfit.last_ptr);
-                tdb_unlock(tdb, -1, F_WRLCK);
                 return newrec_ptr;
+        }
+        if (merge_created_candidate) {
+                goto again;
+        }
 …
         if (tdb_expand(tdb, length + sizeof(*rec)) == 0)
                 goto again;
+ fail:
+        return 0;
+}
+static bool tdb_alloc_dead(
+        struct tdb_context *tdb, int hash, tdb_len_t length,
+        tdb_off_t *rec_ptr, struct tdb_record *rec)
+{
+        tdb_off_t last_ptr;
+        *rec_ptr = tdb_find_dead(tdb, hash, rec, length, &last_ptr);
+        if (*rec_ptr == 0) {
+                return false;
+        }
+        /*
+         * Unlink the record from the hash chain, it's about to be moved into
+         * another one.
+         */
+        return (tdb_ofs_write(tdb, last_ptr, &rec->next) == 0);
+}
+/*
+ * Chain "hash" is assumed to be locked
+ */
+tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length,
+                       struct tdb_record *rec)
+{
+        tdb_off_t ret;
+        int i;
+        if (tdb->max_dead_records == 0) {
+                /*
+                 * No dead records to expect anywhere. Do the blocking
+                 * freelist lock without trying to steal from others
+                 */
+                goto blocking_freelist_allocate;
+        }
+        /*
+         * The following loop tries to get the freelist lock nonblocking. If
+         * it gets the lock, allocate from there. If the freelist is busy,
+         * instead of waiting we try to steal dead records from other hash
+         * chains.
+         *
+         * Be aware that we do nonblocking locks on the other hash chains as
+         * well and fail gracefully. This way we avoid deadlocks (we block two
+         * hash chains, something which is pretty bad normally)
+         */
+        for (i=0; i<tdb->hash_size; i++) {
+                int list;
+                list = BUCKET(hash+i);
+                if (tdb_lock_nonblock(tdb, list, F_WRLCK) == 0) {
+                        bool got_dead;
+                        got_dead = tdb_alloc_dead(tdb, list, length, &ret, rec);
+                        tdb_unlock(tdb, list, F_WRLCK);
+                        if (got_dead) {
+                                return ret;
+                        }
+                }
+                if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == 0) {
+                        /*
+                         * Under the freelist lock take the chance to give
+                         * back our dead records.
+                         */
+                        tdb_purge_dead(tdb, hash);
+                        ret = tdb_allocate_from_freelist(tdb, length, rec);
+                        tdb_unlock(tdb, -1, F_WRLCK);
+                        return ret;
+                }
+        }
+blocking_freelist_allocate:
+        if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
+                return 0;
+        }
+        ret = tdb_allocate_from_freelist(tdb, length, rec);
         tdb_unlock(tdb, -1, F_WRLCK);
+        return 0;
+}
+/*
+   return the size of the freelist - used to decide if we should repack
+*/
+_PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb)
+        return ret;
+}
+/**
+ * Merge adjacent records in the freelist.
+ */
+static int tdb_freelist_merge_adjacent(struct tdb_context *tdb,
+                                       int *count_records, int *count_merged)
+{
+        tdb_off_t cur, next;
+        int count = 0;
+        int merged = 0;
+        int ret;
+        ret = tdb_lock(tdb, -1, F_RDLCK);
+        if (ret == -1) {
+                return -1;
+        }
+        cur = FREELIST_TOP;
+        while (tdb_ofs_read(tdb, cur, &next) == 0 && next != 0) {
+                tdb_off_t next2;
+                count++;
+                ret = check_merge_ptr_with_left_record(tdb, next, &next2);
+                if (ret == -1) {
+                        goto done;
+                }
+                if (ret == 1) {
+                        /*
+                         * merged:
+                         * now let cur->next point to next2 instead of next
+                         */
+                        ret = tdb_ofs_write(tdb, cur, &next2);
+                        if (ret != 0) {
+                                goto done;
+                        }
+                        next = next2;
+                        merged++;
+                }
+                cur = next;
+        }
+        if (count_records != NULL) {
+                *count_records = count;
+        }
+        if (count_merged != NULL) {
+                *count_merged = merged;
+        }
+        ret = 0;
+done:
+        tdb_unlock(tdb, -1, F_RDLCK);
+        return ret;
+}
+/**
+ * return the size of the freelist - no merging done
+ */
+static int tdb_freelist_size_no_merge(struct tdb_context *tdb)
+{
         tdb_off_t ptr;
 …
         return count;
+}
+/**
+ * return the size of the freelist - used to decide if we should repack
+ *
+ * As a side effect, adjacent records are merged unless the
+ * database is read-only, in order to reduce the fragmentation
+ * without repacking.
+ */
+_PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb)
+{
+        int count = 0;
+        if (tdb->read_only) {
+                count = tdb_freelist_size_no_merge(tdb);
+        } else {
+                int ret;
+                ret = tdb_freelist_merge_adjacent(tdb, &count, NULL);
+                if (ret != 0) {
+                        return -1;
+                }
+        }
+        return count;
+}

vendor/current/lib/tdb/common/freelistcheck.c

-              r986
+              r988
 static int seen_insert(struct tdb_context *mem_tdb, tdb_off_t rec_ptr)
+{
         TDB_DATA key, data;
+        TDB_DATA key;
-        memset(&data, '\0', sizeof(data));
         key.dptr = (unsigned char *)&rec_ptr;
         key.dsize = sizeof(rec_ptr);
         return tdb_store(mem_tdb, key, data, TDB_INSERT);
+        return tdb_store(mem_tdb, key, tdb_null, TDB_INSERT);
+}
 …
         *pnum_entries = 0;
         mem_tdb = tdb_open("flval", tdb->header.hash_size,
+        mem_tdb = tdb_open("flval", tdb->hash_size,
                                 TDB_INTERNAL, O_RDWR, 0600);
         if (!mem_tdb) {

vendor/current/lib/tdb/common/hash.c

-              r986
+              r988
   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
-#ifdef VALGRIND
     const uint8_t  *k8;
-#endif
     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
 …
     /*----------------------------- handle the last (probably partial) block */
-    /*
-     * "k[2]&0xffffff" actually reads beyond the end of the string, but
-     * then masks off the part it's not allowed to read.  Because the
-     * string is aligned, the masked-off tail is in the same word as the
-     * rest of the string.  Every machine with memory protection I've seen
-     * does it on word boundaries, so is OK with this.  But VALGRIND will
-     * still catch it and complain.  The masking trick does make the hash
-     * noticably faster for short strings (like English words).
-     */
-#ifndef VALGRIND
-    switch(length)
+    {
-    case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
-    case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
-    case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
-    case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
-    case 8 : b+=k[1]; a+=k[0]; break;
-    case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
-    case 6 : b+=k[1]&0xffff; a+=k[0]; break;
-    case 5 : b+=k[1]&0xff; a+=k[0]; break;
-    case 4 : a+=k[0]; break;
-    case 3 : a+=k[0]&0xffffff; break;
-    case 2 : a+=k[0]&0xffff; break;
-    case 1 : a+=k[0]&0xff; break;
-    case 0 : return c;              /* zero length strings require no mixing */
+    }
-#else /* make valgrind happy */
     k8 = (const uint8_t *)k;
     switch(length)
 …
     case 0 : return c;
+    }
-#endif /* !valgrind */
   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */

vendor/current/lib/tdb/common/io.c

-              r986
+              r988
  /*
+ /*
    Unix SMB/CIFS implementation.
 …
 #include "tdb_private.h"
+/*
+ * We prepend the mutex area, so fixup offsets. See mutex.c for details.
+ * tdb->hdr_ofs is 0 or header.mutex_size.
+ *
+ * Note: that we only have the 4GB limit of tdb_off_t for
+ * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs!
+ */
+static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off)
+{
+        off_t tmp = tdb->hdr_ofs + *off;
+        if ((tmp < tdb->hdr_ofs) || (tmp < *off)) {
+                errno = EIO;
+                return false;
+        }
+        *off = tmp;
+        return true;
+}
+static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf,
+                          size_t count, off_t offset)
+{
+        if (!tdb_adjust_offset(tdb, &offset)) {
+                return -1;
+        }
+        return pwrite(tdb->fd, buf, count, offset);
+}
+static ssize_t tdb_pread(struct tdb_context *tdb, void *buf,
+                         size_t count, off_t offset)
+{
+        if (!tdb_adjust_offset(tdb, &offset)) {
+                return -1;
+        }
+        return pread(tdb->fd, buf, count, offset);
+}
+static int tdb_ftruncate(struct tdb_context *tdb, off_t length)
+{
+        if (!tdb_adjust_offset(tdb, &length)) {
+                return -1;
+        }
+        return ftruncate(tdb->fd, length);
+}
+static int tdb_fstat(struct tdb_context *tdb, struct stat *buf)
+{
+        int ret;
+        ret = fstat(tdb->fd, buf);
+        if (ret == -1) {
+                return -1;
+        }
+        if (buf->st_size < tdb->hdr_ofs) {
+                errno = EIO;
+                return -1;
+        }
+        buf->st_size -= tdb->hdr_ofs;
+        return ret;
+}
 /* check for an out of bounds access - if it is out of bounds then
    see if the database has been expanded by someone else and expand
+   if necessary
+   note that "len" is the minimum length needed for the db
+   if necessary
 */
+static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
+static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len,
+                   int probe)
+{
         struct stat st;
+        if (len <= tdb->map_size)
+        if (len + off < len) {
+                if (!probe) {
+                        /* Ensure ecode is set for log fn. */
+                        tdb->ecode = TDB_ERR_IO;
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob off %u len %u wrap\n",
+                                 off, len));
+                }
+                return -1;
+        }
+        if (off + len <= tdb->map_size)
                 return 0;
         if (tdb->flags & TDB_INTERNAL) {
 …
                         /* Ensure ecode is set for log fn. */
                         tdb->ecode = TDB_ERR_IO;
                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n",
                                  (int)len, (int)tdb->map_size));
+                }
                 return -1;
+        }
         if (fstat(tdb->fd, &st) == -1) {
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond internal malloc size %u\n",
+                                 (int)(off + len), (int)tdb->map_size));
+                }
+                return -1;
+        }
+        if (tdb_fstat(tdb, &st) == -1) {
                 tdb->ecode = TDB_ERR_IO;
                 return -1;
+        }
+        /* Unmap, update size, remap */
+        /* Beware >4G files! */
+        if ((tdb_off_t)st.st_size != st.st_size) {
+                /* Ensure ecode is set for log fn. */
+                tdb->ecode = TDB_ERR_IO;
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_oob len %llu too large!\n",
+                         (long long)st.st_size));
+                return -1;
+        }
+        /* Unmap, update size, remap.  We do this unconditionally, to handle
+         * the unusual case where the db is truncated.
+         *
+         * This can happen to a child using tdb_reopen_all(true) on a
+         * TDB_CLEAR_IF_FIRST tdb whose parent crashes: the next
+         * opener will truncate the database. */
         if (tdb_munmap(tdb) == -1) {
                 tdb->ecode = TDB_ERR_IO;
 …
+        }
         tdb->map_size = st.st_size;
+        tdb_mmap(tdb);
+        if (st.st_size < (size_t)len) {
+        if (tdb_mmap(tdb) != 0) {
+                return -1;
+        }
+        if (st.st_size < (size_t)off + len) {
                 if (!probe) {
                         /* Ensure ecode is set for log fn. */
                         tdb->ecode = TDB_ERR_IO;
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n",
+                                 (int)len, (int)st.st_size));
+                }
+                return -1;
+        }
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond eof at %u\n",
+                                 (int)(off + len), (int)st.st_size));
+                }
+                return -1;
+        }
         return 0;
+}
 /* write a lump of data at a specified offset */
 static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
+static int tdb_write(struct tdb_context *tdb, tdb_off_t off,
                      const void *buf, tdb_len_t len)
+{
 …
+        }
         if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
+        if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0)
                 return -1;
 …
                 memcpy(off + (char *)tdb->map_ptr, buf, len);
         } else {
+                ssize_t written = pwrite(tdb->fd, buf, len, off);
+#ifdef HAVE_INCOHERENT_MMAP
+                tdb->ecode = TDB_ERR_IO;
+                return -1;
+#else
+                ssize_t written;
+                written = tdb_pwrite(tdb, buf, len, off);
                 if ((written != (ssize_t)len) && (written != -1)) {
                         /* try once more */
                         tdb->ecode = TDB_ERR_IO;
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
+                                 "%d of %d bytes at %d, trying once more\n",
+                                 (int)written, len, off));
+                        written = pwrite(tdb->fd, (const char *)buf+written,
+                                         len-written,
+                                         off+written);
+                                 "%zi of %u bytes at %u, trying once more\n",
+                                 written, len, off));
+                        written = tdb_pwrite(tdb, (const char *)buf+written,
+                                             len-written, off+written);
+                }
                 if (written == -1) {
                         /* Ensure ecode is set for log fn. */
                         tdb->ecode = TDB_ERR_IO;
                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
                                  "len=%d (%s)\n", off, len, strerror(errno)));
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %u "
+                                 "len=%u (%s)\n", off, len, strerror(errno)));
                         return -1;
                 } else if (written != (ssize_t)len) {
                         tdb->ecode = TDB_ERR_IO;
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
                                  "write %d bytes at %d in two attempts\n",
+                                 "write %u bytes at %u in two attempts\n",
                                  len, off));
                         return -1;
+                }
+#endif
+        }
         return 0;
 …
 /* read a lump of data at a specified offset, maybe convert */
 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
+static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
                     tdb_len_t len, int cv)
+{
         if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
+        if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0) {
                 return -1;
+        }
 …
                 memcpy(buf, off + (char *)tdb->map_ptr, len);
         } else {
+                ssize_t ret = pread(tdb->fd, buf, len, off);
+#ifdef HAVE_INCOHERENT_MMAP
+                tdb->ecode = TDB_ERR_IO;
+                return -1;
+#else
+                ssize_t ret;
+                ret = tdb_pread(tdb, buf, len, off);
                 if (ret != (ssize_t)len) {
                         /* Ensure ecode is set for log fn. */
                         tdb->ecode = TDB_ERR_IO;
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
+                                 "len=%d ret=%d (%s) map_size=%d\n",
+                                 (int)off, (int)len, (int)ret, strerror(errno),
+                                 (int)tdb->map_size));
+                        return -1;
+                }
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %u "
+                                 "len=%u ret=%zi (%s) map_size=%u\n",
+                                 off, len, ret, strerror(errno),
+                                 tdb->map_size));
+                        return -1;
+                }
+#endif
+        }
         if (cv) {
 …
   do an unlocked scan of the hash table heads to find the next non-zero head. The value
   will then be confirmed with the lock held
 */
+*/
 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
+{
         uint32_t h = *chain;
         if (tdb->map_ptr) {
                 for (;h < tdb->header.hash_size;h++) {
+                for (;h < tdb->hash_size;h++) {
                         if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
                                 break;
 …
         } else {
                 uint32_t off=0;
                 for (;h < tdb->header.hash_size;h++) {
+                for (;h < tdb->hash_size;h++) {
                         if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
                                 break;
 …
+}
+void tdb_mmap(struct tdb_context *tdb)
+/* If mmap isn't coherent, *everyone* must always mmap. */
+static bool should_mmap(const struct tdb_context *tdb)
+{
+#ifdef HAVE_INCOHERENT_MMAP
+        return true;
+#else
+        return !(tdb->flags & TDB_NOMMAP);
+#endif
+}
+int tdb_mmap(struct tdb_context *tdb)
+{
         if (tdb->flags & TDB_INTERNAL)
                 return;
+                return 0;
 #ifdef HAVE_MMAP
+        if (!(tdb->flags & TDB_NOMMAP)) {
+                tdb->map_ptr = mmap(NULL, tdb->map_size,
+                                    PROT_READ|(tdb->read_only? 0:PROT_WRITE),
+                                    MAP_SHARED|MAP_FILE, tdb->fd, 0);
+        if (should_mmap(tdb)) {
+                tdb->map_ptr = mmap(NULL, tdb->map_size,
+                                    PROT_READ|(tdb->read_only? 0:PROT_WRITE),
+                                    MAP_SHARED|MAP_FILE, tdb->fd,
+                                    tdb->hdr_ofs);
                 /*
 …
                 if (tdb->map_ptr == MAP_FAILED) {
                         tdb->map_ptr = NULL;
                         TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n",
+                        TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %u (%s)\n",
                                  tdb->map_size, strerror(errno)));
+#ifdef HAVE_INCOHERENT_MMAP
+                        tdb->ecode = TDB_ERR_IO;
+                        return -1;
+#endif
+                }
         } else {
 …
         tdb->map_ptr = NULL;
 #endif
+        return 0;
+}
 …
+{
         char buf[8192];
+        tdb_off_t new_size;
         if (tdb->read_only || tdb->traverse_read) {
 …
+        }
+        if (ftruncate(tdb->fd, size+addition) == -1) {
+        if (!tdb_add_off_t(size, addition, &new_size)) {
+                tdb->ecode = TDB_ERR_OOM;
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
+                        "overflow detected current size[%u] addition[%u]!\n",
+                        (unsigned)size, (unsigned)addition));
+                errno = ENOSPC;
+                return -1;
+        }
+        if (tdb_ftruncate(tdb, new_size) == -1) {
                 char b = 0;
                 ssize_t written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
+                ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1);
                 if (written == 0) {
                         /* try once more, potentially revealing errno */
                         written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
+                        written = tdb_pwrite(tdb, &b, 1, new_size - 1);
+                }
                 if (written == 0) {
 …
+                }
                 if (written != 1) {
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n",
+                                 size+addition, strerror(errno)));
+                        tdb->ecode = TDB_ERR_OOM;
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %u failed (%s)\n",
+                                 (unsigned)new_size, strerror(errno)));
                         return -1;
+                }
 …
         while (addition) {
                 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
                 ssize_t written = pwrite(tdb->fd, buf, n, size);
+                ssize_t written = tdb_pwrite(tdb, buf, n, size);
                 if (written == 0) {
                         /* prevent infinite loops: try _once_ more */
                         written = pwrite(tdb->fd, buf, n, size);
+                        written = tdb_pwrite(tdb, buf, n, size);
+                }
                 if (written == 0) {
                         /* give up, trying to provide a useful errno */
+                        tdb->ecode = TDB_ERR_OOM;
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
                                 "returned 0 twice: giving up!\n"));
                         errno = ENOSPC;
                         return -1;
+                } else if (written == -1) {
+                }
+                if (written == -1) {
+                        tdb->ecode = TDB_ERR_OOM;
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
                                  "%d bytes failed (%s)\n", (int)n,
+                                 "%u bytes failed (%s)\n", (int)n,
                                  strerror(errno)));
                         return -1;
+                } else if (written != n) {
+                }
+                if (written != n) {
                         TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
                                  "only %d of %d bytes - retrying\n", (int)written,
                                  (int)n));
+                                 "only %zu of %zi bytes - retrying\n", written,
+                                 n));
+                }
                 addition -= written;
 …
+}
+/* You need 'size', this tells you how much you should expand by. */
+tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size)
+{
+        tdb_off_t new_size, top_size, increment;
+        tdb_off_t max_size = UINT32_MAX - map_size;
+        if (size > max_size) {
+                /*
+                 * We can't round up anymore, just give back
+                 * what we're asked for.
+                 *
+                 * The caller has to take care of the ENOSPC handling.
+                 */
+                return size;
+        }
+        /* limit size in order to avoid using up huge amounts of memory for
+         * in memory tdbs if an oddball huge record creeps in */
+        if (size > 100 * 1024) {
+                increment = size * 2;
+        } else {
+                increment = size * 100;
+        }
+        if (increment < size) {
+                goto overflow;
+        }
+        if (!tdb_add_off_t(map_size, increment, &top_size)) {
+                goto overflow;
+        }
+        /* always make room for at least top_size more records, and at
+           least 25% more space. if the DB is smaller than 100MiB,
+           otherwise grow it by 10% only. */
+        if (map_size > 100 * 1024 * 1024) {
+                new_size = map_size * 1.10;
+        } else {
+                new_size = map_size * 1.25;
+        }
+        if (new_size < map_size) {
+                goto overflow;
+        }
+        /* Round the database up to a multiple of the page size */
+        new_size = MAX(top_size, new_size);
+        if (new_size + page_size < new_size) {
+                /* There's a "+" in TDB_ALIGN that might overflow... */
+                goto overflow;
+        }
+        return TDB_ALIGN(new_size, page_size) - map_size;
+overflow:
+        /*
+         * Somewhere in between we went over 4GB. Make one big jump to
+         * exactly 4GB database size.
+         */
+        return max_size;
+}
 /* expand the database at least size bytes by expanding the underlying
 …
+{
         struct tdb_record rec;
+        tdb_off_t offset, new_size, top_size, map_size;
+        tdb_off_t offset;
+        tdb_off_t new_size;
         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 …
         /* must know about any previous expansions by another process */
+        tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
+        /* limit size in order to avoid using up huge amounts of memory for
+         * in memory tdbs if an oddball huge record creeps in */
+        if (size > 100 * 1024) {
+                top_size = tdb->map_size + size * 2;
+        } else {
+                top_size = tdb->map_size + size * 100;
+        }
+        /* always make room for at least top_size more records, and at
+           least 25% more space. if the DB is smaller than 100MiB,
+           otherwise grow it by 10% only. */
+        if (tdb->map_size > 100 * 1024 * 1024) {
+                map_size = tdb->map_size * 1.10;
+        } else {
+                map_size = tdb->map_size * 1.25;
+        }
+        /* Round the database up to a multiple of the page size */
+        new_size = MAX(top_size, map_size);
+        size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size;
+        if (!(tdb->flags & TDB_INTERNAL))
+                tdb_munmap(tdb);
+        tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
         /*
+         * We must ensure the file is unmapped before doing this
+         * to ensure consistency with systems like OpenBSD where
+         * writes and mmaps are not consistent.
+         * Note: that we don't care about tdb->hdr_ofs != 0 here
+         *
+         * The 4GB limitation is just related to tdb->map_size
+         * and the offset calculation in the records.
+         *
+         * The file on disk can be up to 4GB + tdb->hdr_ofs
          */
+        /* expand the file itself */
+        if (!(tdb->flags & TDB_INTERNAL)) {
+                if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
+                        goto fail;
+        }
+        tdb->map_size += size;
+        if (tdb->flags & TDB_INTERNAL) {
+                char *new_map_ptr = (char *)realloc(tdb->map_ptr,
+                                                    tdb->map_size);
+                if (!new_map_ptr) {
+                        tdb->map_size -= size;
+                        goto fail;
+                }
+                tdb->map_ptr = new_map_ptr;
+        } else {
+                /*
+                 * We must ensure the file is remapped before adding the space
+                 * to ensure consistency with systems like OpenBSD where
+                 * writes and mmaps are not consistent.
+                 */
+                /* We're ok if the mmap fails as we'll fallback to read/write */
+                tdb_mmap(tdb);
+        size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size);
+        if (!tdb_add_off_t(tdb->map_size, size, &new_size)) {
+                tdb->ecode = TDB_ERR_OOM;
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_expand "
+                        "overflow detected current map_size[%u] size[%u]!\n",
+                        (unsigned)tdb->map_size, (unsigned)size));
+                goto fail;
+        }
         /* form a new freelist record */
+        offset = tdb->map_size;
         memset(&rec,'\0',sizeof(rec));
         rec.rec_len = size - sizeof(rec);
+        if (tdb->flags & TDB_INTERNAL) {
+                char *new_map_ptr;
+                new_map_ptr = (char *)realloc(tdb->map_ptr, new_size);
+                if (!new_map_ptr) {
+                        tdb->ecode = TDB_ERR_OOM;
+                        goto fail;
+                }
+                tdb->map_ptr = new_map_ptr;
+                tdb->map_size = new_size;
+        } else {
+                int ret;
+                /*
+                 * expand the file itself
+                 */
+                ret = tdb->methods->tdb_expand_file(tdb, tdb->map_size, size);
+                if (ret != 0) {
+                        goto fail;
+                }
+                /* Explicitly remap: if we're in a transaction, this won't
+                 * happen automatically! */
+                tdb_munmap(tdb);
+                tdb->map_size = new_size;
+                if (tdb_mmap(tdb) != 0) {
+                        goto fail;
+                }
+        }
         /* link it into the free list */
-        offset = tdb->map_size - size;
         if (tdb_free(tdb, offset, &rec) == -1)
                 goto fail;
 …
                 /* Ensure ecode is set for log fn. */
                 tdb->ecode = TDB_ERR_OOM;
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
+                TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%u (%s)\n",
                            len, strerror(errno)));
                 return NULL;
 …
                  * parser directly at the mmap area.
                  */
                 if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) {
+                if (tdb->methods->tdb_oob(tdb, offset, len, 0) != 0) {
                         return -1;
+                }
 …
                 /* Ensure ecode is set for log fn. */
                 tdb->ecode = TDB_ERR_CORRUPT;
                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
                 return -1;
+        }
         return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
+                TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%u\n", rec->magic, offset));
+                return -1;
+        }
+        return tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0);
+}

vendor/current/lib/tdb/common/lock.c

-              r986
+              r988
  /*
+ /*
    Unix SMB/CIFS implementation.
 …
+{
         struct flock fl;
+        int cmd;
+#ifdef USE_TDB_MUTEX_LOCKING
+        {
+                int ret;
+                if (tdb_mutex_lock(tdb, rw, off, len, waitflag, &ret)) {
+                        return ret;
+                }
+        }
+#endif
         fl.l_type = rw;
 …
         fl.l_pid = 0;
+        if (waitflag)
+                return fcntl(tdb->fd, F_SETLKW, &fl);
+        else
+                return fcntl(tdb->fd, F_SETLK, &fl);
+        cmd = waitflag ? F_SETLKW : F_SETLK;
+        return fcntl(tdb->fd, cmd, &fl);
+}
 …
 #endif
+#ifdef USE_TDB_MUTEX_LOCKING
+        {
+                int ret;
+                if (tdb_mutex_unlock(tdb, rw, off, len, &ret)) {
+                        return ret;
+                }
+        }
+#endif
         fl.l_type = F_UNLCK;
         fl.l_whence = SEEK_SET;
 …
 /* a byte range locking function - return 0 on success
    this functions locks/unlocks 1 byte at the specified offset.
+   this functions locks/unlocks "len" byte at the specified offset.
    On error, errno is also set so that errors are passed back properly
    through tdb_open().
+   through tdb_open().
    note that a len of zero means lock to end of file
 …
                  * locks. */
                 if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) {
                         TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %d rw_type=%d flags=%d len=%d\n",
                                  tdb->fd, offset, rw_type, flags, (int)len));
+                        TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %u rw_type=%d flags=%d len=%zu\n",
+                                 tdb->fd, offset, rw_type, flags, len));
+                }
                 return -1;
 …
         if (ret == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %d rw_type=%d len=%d\n",
                          tdb->fd, offset, rw_type, (int)len));
+                TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %u rw_type=%u len=%zu\n",
+                         tdb->fd, offset, rw_type, len));
+        }
         return ret;
 …
 /*
+  upgrade a read lock to a write lock. This needs to be handled in a
+  special way as some OSes (such as solaris) have too conservative
+  deadlock detection and claim a deadlock when progress can be
+  made. For those OSes we may loop for a while.
+ * Do a tdb_brlock in a loop. Some OSes (such as solaris) have too
+ * conservative deadlock detection and claim a deadlock when progress can be
+ * made. For those OSes we may loop for a while.
+ */
+static int tdb_brlock_retry(struct tdb_context *tdb,
+                            int rw_type, tdb_off_t offset, size_t len,
+                            enum tdb_lock_flags flags)
+{
+        int count = 1000;
+        while (count--) {
+                struct timeval tv;
+                int ret;
+                ret = tdb_brlock(tdb, rw_type, offset, len, flags);
+                if (ret == 0) {
+                        return 0;
+                }
+                if (errno != EDEADLK) {
+                        break;
+                }
+                /* sleep for as short a time as we can - more portable than usleep() */
+                tv.tv_sec = 0;
+                tv.tv_usec = 1;
+                select(0, NULL, NULL, NULL, &tv);
+        }
+        return -1;
+}
+/*
+  upgrade a read lock to a write lock.
 */
 int tdb_allrecord_upgrade(struct tdb_context *tdb)
+{
         int count = 1000;
+        int ret;
         if (tdb->allrecord_lock.count != 1) {
 …
+        }
+        while (count--) {
+                struct timeval tv;
+                if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
+                               TDB_LOCK_WAIT|TDB_LOCK_PROBE) == 0) {
+                        tdb->allrecord_lock.ltype = F_WRLCK;
+                        tdb->allrecord_lock.off = 0;
+                        return 0;
+                }
+                if (errno != EDEADLK) {
+                        break;
+                }
+                /* sleep for as short a time as we can - more portable than usleep() */
+                tv.tv_sec = 0;
+                tv.tv_usec = 1;
+                select(0, NULL, NULL, NULL, &tv);
+        }
+        if (tdb_have_mutexes(tdb)) {
+                ret = tdb_mutex_allrecord_upgrade(tdb);
+                if (ret == -1) {
+                        goto fail;
+                }
+                ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size),
+, TDB_LOCK_WAIT|TDB_LOCK_PROBE);
+                if (ret == -1) {
+                        tdb_mutex_allrecord_downgrade(tdb);
+                }
+        } else {
+                ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0,
+                                       TDB_LOCK_WAIT|TDB_LOCK_PROBE);
+        }
+        if (ret == 0) {
+                tdb->allrecord_lock.ltype = F_WRLCK;
+                tdb->allrecord_lock.off = 0;
+                return 0;
+        }
+fail:
         TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n"));
         return -1;
 …
         struct tdb_lock_type *new_lck;
         if (offset >= lock_offset(tdb->header.hash_size)) {
+        if (offset >= lock_offset(tdb->hash_size)) {
                 tdb->ecode = TDB_ERR_LOCK;
                 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid offset %u for ltype=%d\n",
 …
+        }
+        new_lck = (struct tdb_lock_type *)realloc(
+                tdb->lockrecs,
+                sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1));
+        if (new_lck == NULL) {
+                errno = ENOMEM;
+                return -1;
+        }
+        tdb->lockrecs = new_lck;
+        if (tdb->num_lockrecs == tdb->lockrecs_array_length) {
+                new_lck = (struct tdb_lock_type *)realloc(
+                        tdb->lockrecs,
+                        sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1));
+                if (new_lck == NULL) {
+                        errno = ENOMEM;
+                        return -1;
+                }
+                tdb->lockrecs_array_length = tdb->num_lockrecs+1;
+                tdb->lockrecs = new_lck;
+        }
         /* Since fcntl locks don't nest, we do a lock for the first one,
 …
+        }
+        tdb->lockrecs[tdb->num_lockrecs].off = offset;
+        tdb->lockrecs[tdb->num_lockrecs].count = 1;
+        tdb->lockrecs[tdb->num_lockrecs].ltype = ltype;
+        new_lck = &tdb->lockrecs[tdb->num_lockrecs];
+        new_lck->off = offset;
+        new_lck->count = 1;
+        new_lck->ltype = ltype;
         tdb->num_lockrecs++;
 …
+}
+/*
+ * A allrecord lock allows us to avoid per chain locks. Check if the allrecord
+ * lock is strong enough.
+ */
+static int tdb_lock_covered_by_allrecord_lock(struct tdb_context *tdb,
+                                              int ltype)
+{
+        if (ltype == F_RDLCK) {
+                /*
+                 * The allrecord_lock is equal (F_RDLCK) or stronger
+                 * (F_WRLCK). Pass.
+                 */
+                return 0;
+        }
+        if (tdb->allrecord_lock.ltype == F_RDLCK) {
+                /*
+                 * We ask for ltype==F_WRLCK, but the allrecord_lock
+                 * is too weak. We can't upgrade here, so fail.
+                 */
+                tdb->ecode = TDB_ERR_LOCK;
+                return -1;
+        }
+        /*
+         * Asking for F_WRLCK, allrecord is F_WRLCK as well. Pass.
+         */
+        return 0;
+}
 static int tdb_lock_list(struct tdb_context *tdb, int list, int ltype,
                          enum tdb_lock_flags waitflag)
 …
         bool check = false;
-        /* a allrecord lock allows us to avoid per chain locks */
-        if (tdb->allrecord_lock.count &&
-            (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
-                return 0;
+        }
         if (tdb->allrecord_lock.count) {
+                tdb->ecode = TDB_ERR_LOCK;
+                ret = -1;
+        } else {
+                /* Only check when we grab first data lock. */
+                check = !have_data_locks(tdb);
+                ret = tdb_nest_lock(tdb, lock_offset(list), ltype, waitflag);
+                if (ret == 0 && check && tdb_needs_recovery(tdb)) {
+                        tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
+                        if (tdb_lock_and_recover(tdb) == -1) {
+                                return -1;
+                        }
+                        return tdb_lock_list(tdb, list, ltype, waitflag);
+                }
+                return tdb_lock_covered_by_allrecord_lock(tdb, ltype);
+        }
+        /*
+         * Check for recoveries: Someone might have kill -9'ed a process
+         * during a commit.
+         */
+        check = !have_data_locks(tdb);
+        ret = tdb_nest_lock(tdb, lock_offset(list), ltype, waitflag);
+        if (ret == 0 && check && tdb_needs_recovery(tdb)) {
+                tdb_nest_unlock(tdb, lock_offset(list), ltype, false);
+                if (tdb_lock_and_recover(tdb) == -1) {
+                        return -1;
+                }
+                return tdb_lock_list(tdb, list, ltype, waitflag);
+        }
         return ret;
 …
 /* lock a list in the database. list -1 is the alloc list. non-blocking lock */
 int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype)
+_PUBLIC_ int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype)
+{
         return tdb_lock_list(tdb, list, ltype, TDB_LOCK_NOWAIT);
 …
         /* Sanity checks */
         if (offset >= lock_offset(tdb->header.hash_size)) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: offset %u invalid (%d)\n", offset, tdb->header.hash_size));
+        if (offset >= lock_offset(tdb->hash_size)) {
+                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: offset %u invalid (%d)\n", offset, tdb->hash_size));
                 return ret;
+        }
 …
          */
-        if (tdb->num_lockrecs == 0) {
-                SAFE_FREE(tdb->lockrecs);
+        }
         if (ret)
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: An error occurred unlocking!\n"));
         return ret;
+}
 int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
+                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: An error occurred unlocking!\n"));
+        return ret;
+}
+_PUBLIC_ int tdb_unlock(struct tdb_context *tdb, int list, int ltype)
+{
         /* a global lock allows us to avoid per chain locks */
-        if (tdb->allrecord_lock.count &&
-            (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {
-                return 0;
+        }
         if (tdb->allrecord_lock.count) {
+                tdb->ecode = TDB_ERR_LOCK;
+                return -1;
+                return tdb_lock_covered_by_allrecord_lock(tdb, ltype);
+        }
 …
                        enum tdb_lock_flags flags, bool upgradable)
+{
+        int ret;
         switch (tdb_allrecord_check(tdb, ltype, flags, upgradable)) {
         case -1:
 …
         /* We cover two kinds of locks:
          * 1) Normal chain locks.  Taken for almost all operations.
          * 3) Individual records locks.  Taken after normal or free
+         * 2) Individual records locks.  Taken after normal or free
          *    chain locks.
+         *
          * It is (1) which cause the starvation problem, so we're only
          * gradual for that. */
+        if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP,
+                                  tdb->header.hash_size * 4) == -1) {
+        if (tdb_have_mutexes(tdb)) {
+                ret = tdb_mutex_allrecord_lock(tdb, ltype, flags);
+        } else {
+                ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP,
+                                            tdb->hash_size * 4);
+        }
+        if (ret == -1) {
                 return -1;
+        }
         /* Grab individual record locks. */
         if (tdb_brlock(tdb, ltype, lock_offset(tdb->header.hash_size), 0,
+        if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0,
                        flags) == -1) {
+                tdb_brunlock(tdb, ltype, FREELIST_TOP,
+                             tdb->header.hash_size * 4);
+                if (tdb_have_mutexes(tdb)) {
+                        tdb_mutex_allrecord_unlock(tdb);
+                } else {
+                        tdb_brunlock(tdb, ltype, FREELIST_TOP,
+                                     tdb->hash_size * 4);
+                }
                 return -1;
+        }
 …
+        }
+        if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) {
+                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno)));
+                return -1;
+        if (!mark_lock) {
+                int ret;
+                if (tdb_have_mutexes(tdb)) {
+                        ret = tdb_mutex_allrecord_unlock(tdb);
+                        if (ret == 0) {
+                                ret = tdb_brunlock(tdb, ltype,
+                                                   lock_offset(tdb->hash_size),
+);
+                        }
+                } else {
+                        ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0);
+                }
+                if (ret != 0) {
+                        TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed "
+                                 "(%s)\n", strerror(errno)));
+                        return -1;
+                }
+        }
 …
         tdb_trace_1rec(tdb, "tdb_chainunlock_read", key);
         return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
+}
+_PUBLIC_ int tdb_chainlock_read_nonblock(struct tdb_context *tdb, TDB_DATA key)
+{
+        int ret = tdb_lock_nonblock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK);
+        tdb_trace_1rec_ret(tdb, "tdb_chainlock_read_nonblock", key, ret);
+        return ret;
+}
 …
         if (tdb->allrecord_lock.count != 0) {
                 tdb_brunlock(tdb, tdb->allrecord_lock.ltype, FREELIST_TOP, 0);
+                tdb_allrecord_unlock(tdb, tdb->allrecord_lock.ltype, false);
                 tdb->allrecord_lock.count = 0;
+        }
 …
+        }
         tdb->num_lockrecs = active;
+        if (tdb->num_lockrecs == 0) {
+                SAFE_FREE(tdb->lockrecs);
+        }
+}
+}
+/* Following functions are added specifically to support CTDB. */
+/* Don't do actual fcntl locking, just mark tdb locked */
+int tdb_transaction_write_lock_mark(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_write_lock_mark(struct tdb_context *tdb)
+{
+        return tdb_transaction_lock(tdb, F_WRLCK, TDB_LOCK_MARK_ONLY);
+}
+/* Don't do actual fcntl unlocking, just mark tdb unlocked */
+int tdb_transaction_write_lock_unmark(struct tdb_context *tdb);
+_PUBLIC_ int tdb_transaction_write_lock_unmark(struct tdb_context *tdb)
+{
+        return tdb_nest_unlock(tdb, TRANSACTION_LOCK, F_WRLCK, true);
+}

vendor/current/lib/tdb/common/open.c

-              r914
+              r988
  /*
+ /*
    Unix SMB/CIFS implementation.
 …
 /* initialise a new database with a specified hash size */
+static int tdb_new_database(struct tdb_context *tdb, int hash_size)
+static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header,
+                            int hash_size)
+{
         struct tdb_header *newdb;
 …
                 newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC;
+        /*
+         * We create a tdb with TDB_FEATURE_FLAG_MUTEX support,
+         * the flag combination and runtime feature checks
+         * are done by the caller already.
+         */
+        if (tdb->flags & TDB_MUTEX_LOCKING) {
+                newdb->feature_flags |= TDB_FEATURE_FLAG_MUTEX;
+        }
+        /*
+         * If we have any features we add the FEATURE_FLAG_MAGIC, overwriting the
+         * TDB_HASH_RWLOCK_MAGIC above.
+         */
+        if (newdb->feature_flags != 0) {
+                newdb->rwlocks = TDB_FEATURE_FLAG_MAGIC;
+        }
+        /*
+         * It's required for some following code pathes
+         * to have the fields on 'tdb' up-to-date.
+         *
+         * E.g. tdb_mutex_size() requires it
+         */
+        tdb->feature_flags = newdb->feature_flags;
+        tdb->hash_size = newdb->hash_size;
         if (tdb->flags & TDB_INTERNAL) {
                 tdb->map_size = size;
                 tdb->map_ptr = (char *)newdb;
                 memcpy(&tdb->header, newdb, sizeof(tdb->header));
+                memcpy(header, newdb, sizeof(*header));
                 /* Convert the `ondisk' version if asked. */
                 CONVERT(*newdb);
 …
                 goto fail;
+        if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) {
+                newdb->mutex_size = tdb_mutex_size(tdb);
+                tdb->hdr_ofs = newdb->mutex_size;
+        }
         /* This creates an endian-converted header, as if read from disk */
         CONVERT(*newdb);
         memcpy(&tdb->header, newdb, sizeof(tdb->header));
+        memcpy(header, newdb, sizeof(*header));
         /* Don't endian-convert the magic food! */
         memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
+        /* we still have "ret == -1" here */
+        if (tdb_write_all(tdb->fd, newdb, size))
+                ret = 0;
+        if (!tdb_write_all(tdb->fd, newdb, size))
+                goto fail;
+        if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) {
+                /*
+                 * Now we init the mutex area
+                 * followed by a second header.
+                 */
+                ret = ftruncate(
+                        tdb->fd,
+                        newdb->mutex_size + sizeof(struct tdb_header));
+                if (ret == -1) {
+                        goto fail;
+                }
+                ret = tdb_mutex_init(tdb);
+                if (ret == -1) {
+                        goto fail;
+                }
+                /*
+                 * Write a second header behind the mutexes. That's the area
+                 * that will be mmapp'ed.
+                 */
+                ret = lseek(tdb->fd, newdb->mutex_size, SEEK_SET);
+                if (ret == -1) {
+                        goto fail;
+                }
+                if (!tdb_write_all(tdb->fd, newdb, size)) {
+                        goto fail;
+                }
+        }
+        ret = 0;
   fail:
         SAFE_FREE(newdb);
 …
+}
 /* open the database, creating it if necessary
+/* open the database, creating it if necessary
    The open_flags and mode are passed straight to the open call on the
 …
    is advisory, use zero for a default value.
    Return is NULL on error, in which case errno is also set.  Don't
+   Return is NULL on error, in which case errno is also set.  Don't
    try to call tdb_error or tdb_errname, just do strerror(errno).
 …
 static bool check_header_hash(struct tdb_context *tdb,
+                              struct tdb_header *header,
                               bool default_hash, uint32_t *m1, uint32_t *m2)
+{
         tdb_header_hash(tdb, m1, m2);
         if (tdb->header.magic1_hash == *m1 &&
             tdb->header.magic2_hash == *m2) {
+        if (header->magic1_hash == *m1 &&
+            header->magic2_hash == *m2) {
                 return true;
+        }
 …
         else
                 tdb->hash_fn = tdb_old_hash;
+        return check_header_hash(tdb, false, m1, m2);
+        return check_header_hash(tdb, header, false, m1, m2);
+}
+static bool tdb_mutex_open_ok(struct tdb_context *tdb,
+                              const struct tdb_header *header)
+{
+        int locked;
+        if (tdb->flags & TDB_NOLOCK) {
+                /*
+                 * We don't look at locks, so it does not matter to have a
+                 * compatible mutex implementation. Allow the open.
+                 */
+                return true;
+        }
+        locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK,
+                               TDB_LOCK_NOWAIT|TDB_LOCK_PROBE);
+        if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) {
+                /*
+                 * CLEAR_IF_FIRST still active. The tdb was created on this
+                 * host, so we can assume the mutex implementation is
+                 * compatible. Important for tools like tdbdump on a still
+                 * open locking.tdb.
+                 */
+                goto check_local_settings;
+        }
+        /*
+         * We got the CLEAR_IF_FIRST lock. That means the database was
+         * potentially copied from somewhere else. The mutex implementation
+         * might be incompatible.
+         */
+        if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) {
+                /*
+                 * Should not happen
+                 */
+                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: "
+                         "failed to release ACTIVE_LOCK on %s: %s\n",
+                         tdb->name, strerror(errno)));
+                return false;
+        }
+check_local_settings:
+        if (!(tdb->flags & TDB_MUTEX_LOCKING)) {
+                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: "
+                         "Can use mutexes only with "
+                         "MUTEX_LOCKING or NOLOCK\n",
+                         tdb->name));
+                return false;
+        }
+        if (tdb_mutex_size(tdb) != header->mutex_size) {
+                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: "
+                         "Mutex size changed from %u to %u\n.",
+                         tdb->name,
+                         (unsigned int)header->mutex_size,
+                         (unsigned int)tdb_mutex_size(tdb)));
+                return false;
+        }
+        return true;
+}
 …
                                 tdb_hash_func hash_fn)
+{
+        int orig_errno = errno;
+        struct tdb_header header;
         struct tdb_context *tdb;
         struct stat st;
 …
         const char *hash_alg;
         uint32_t magic1, magic2;
+        int ret;
+        ZERO_STRUCT(header);
         if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) {
 …
+        }
         tdb_io_init(tdb);
+        if (tdb_flags & TDB_INTERNAL) {
+                tdb_flags |= TDB_INCOMPATIBLE_HASH;
+        }
+        if (tdb_flags & TDB_MUTEX_LOCKING) {
+                tdb_flags |= TDB_INCOMPATIBLE_HASH;
+        }
         tdb->fd = -1;
 #ifdef TDB_TRACE
 …
+        }
         /* now make a copy of the name, as the caller memory might went away */
+        /* now make a copy of the name, as the caller memory might go away */
         if (!(tdb->name = (char *)strdup(name))) {
                 /*
 …
                 /* read only databases don't do locking or clear if first */
                 tdb->flags |= TDB_NOLOCK;
                 tdb->flags &= ~TDB_CLEAR_IF_FIRST;
+                tdb->flags &= ~(TDB_CLEAR_IF_FIRST|TDB_MUTEX_LOCKING);
+        }
 …
+        }
+        if (tdb->flags & TDB_MUTEX_LOCKING) {
+                /*
+                 * Here we catch bugs in the callers,
+                 * the runtime check for existing tdb's comes later.
+                 */
+                if (!(tdb->flags & TDB_CLEAR_IF_FIRST)) {
+                        TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
+                                "invalid flags for %s - TDB_MUTEX_LOCKING "
+                                "requires TDB_CLEAR_IF_FIRST\n", name));
+                        errno = EINVAL;
+                        goto fail;
+                }
+                if (tdb->flags & TDB_INTERNAL) {
+                        TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
+                                "invalid flags for %s - TDB_MUTEX_LOCKING and "
+                                "TDB_INTERNAL are not allowed together\n", name));
+                        errno = EINVAL;
+                        goto fail;
+                }
+                if (tdb->flags & TDB_NOMMAP) {
+                        TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
+                                "invalid flags for %s - TDB_MUTEX_LOCKING and "
+                                "TDB_NOMMAP are not allowed together\n", name));
+                        errno = EINVAL;
+                        goto fail;
+                }
+                if (tdb->read_only) {
+                        TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
+                                "invalid flags for %s - TDB_MUTEX_LOCKING "
+                                "not allowed read only\n", name));
+                        errno = EINVAL;
+                        goto fail;
+                }
+                /*
+                 * The callers should have called
+                 * tdb_runtime_check_for_robust_mutexes()
+                 * before using TDB_MUTEX_LOCKING!
+                 *
+                 * This makes sure the caller understands
+                 * that the locking may behave a bit differently
+                 * than with pure fcntl locking. E.g. multiple
+                 * read locks are not supported.
+                 */
+                if (!tdb_runtime_check_for_robust_mutexes()) {
+                        TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
+                                "invalid flags for %s - TDB_MUTEX_LOCKING "
+                                "requires support for robust_mutexes\n",
+                                name));
+                        errno = ENOSYS;
+                        goto fail;
+                }
+        }
         if (getenv("TDB_NO_FSYNC")) {
                 tdb->flags |= TDB_NOSYNC;
 …
                 tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP);
                 tdb->flags &= ~TDB_CLEAR_IF_FIRST;
                 if (tdb_new_database(tdb, hash_size) != 0) {
+                if (tdb_new_database(tdb, &header, hash_size) != 0) {
                         TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: tdb_new_database failed!"));
                         goto fail;
+                }
+                tdb->hash_size = hash_size;
                 goto internal;
+        }
 …
             (!tdb->read_only) &&
             (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) {
-                int ret;
                 ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0,
                                 TDB_LOCK_WAIT);
+                                 TDB_LOCK_WAIT);
                 if (ret == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
                                 "tdb_brlock failed for %s: %s\n",
                                 name, strerror(errno)));
                         goto fail;
+                }
                 ret = tdb_new_database(tdb, hash_size);
+                                 "tdb_brlock failed for %s: %s\n",
+                                 name, strerror(errno)));
+                        goto fail;
+                }
+                ret = tdb_new_database(tdb, &header, hash_size);
                 if (ret == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
                                 "tdb_new_database failed for %s: %s\n",
                                 name, strerror(errno)));
+                                 "tdb_new_database failed for %s: %s\n",
+                                 name, strerror(errno)));
                         tdb_unlockall(tdb);
                         goto fail;
 …
                 if (ret == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
                                 "tdb_unlockall failed for %s: %s\n",
                                 name, strerror(errno)));
+                                 "tdb_unlockall failed for %s: %s\n",
+                                 name, strerror(errno)));
                         goto fail;
+                }
 …
                 if (ret == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
                                 "lseek failed for %s: %s\n",
                                 name, strerror(errno)));
+                                 "lseek failed for %s: %s\n",
+                                 name, strerror(errno)));
                         goto fail;
+                }
 …
         errno = 0;
+        if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header)
+            || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) {
+                if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) {
+        if (read(tdb->fd, &header, sizeof(header)) != sizeof(header)
+            || strcmp(header.magic_food, TDB_MAGIC_FOOD) != 0) {
+                if (!(open_flags & O_CREAT) ||
+                    tdb_new_database(tdb, &header, hash_size) == -1) {
                         if (errno == 0) {
                                 errno = EIO; /* ie bad format or something */
 …
+                }
                 rev = (tdb->flags & TDB_CONVERT);
         } else if (tdb->header.version != TDB_VERSION
                    && !(rev = (tdb->header.version==TDB_BYTEREV(TDB_VERSION)))) {
+        } else if (header.version != TDB_VERSION
+                   && !(rev = (header.version==TDB_BYTEREV(TDB_VERSION)))) {
                 /* wrong version */
                 errno = EIO;
                 goto fail;
+        }
         vp = (unsigned char *)&tdb->header.version;
+        vp = (unsigned char *)&header.version;
         vertest = (((uint32_t)vp[0]) << 24) | (((uint32_t)vp[1]) << 16) |
                   (((uint32_t)vp[2]) << 8) | (uint32_t)vp[3];
 …
         else {
                 tdb->flags |= TDB_CONVERT;
+                tdb_convert(&tdb->header, sizeof(tdb->header));
+        }
+        if (fstat(tdb->fd, &st) == -1)
+                goto fail;
+        if (tdb->header.rwlocks != 0 &&
+            tdb->header.rwlocks != TDB_HASH_RWLOCK_MAGIC) {
+                tdb_convert(&header, sizeof(header));
+        }
+        /*
+         * We only use st.st_dev and st.st_ino from the raw fstat()
+         * call, everything else needs to use tdb_fstat() in order
+         * to skip tdb->hdr_ofs!
+         */
+        if (fstat(tdb->fd, &st) == -1) {
+                goto fail;
+        }
+        tdb->device = st.st_dev;
+        tdb->inode = st.st_ino;
+        ZERO_STRUCT(st);
+        if (header.rwlocks != 0 &&
+            header.rwlocks != TDB_FEATURE_FLAG_MAGIC &&
+            header.rwlocks != TDB_HASH_RWLOCK_MAGIC) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n"));
+                goto fail;
+        }
+        if ((tdb->header.magic1_hash == 0) && (tdb->header.magic2_hash == 0)) {
+                errno = ENOSYS;
+                goto fail;
+        }
+        if (header.hash_size == 0) {
+                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: invalid database: 0 hash_size\n"));
+                errno = ENOSYS;
+                goto fail;
+        }
+        tdb->hash_size = header.hash_size;
+        if (header.rwlocks == TDB_FEATURE_FLAG_MAGIC) {
+                tdb->feature_flags = header.feature_flags;
+        }
+        if (tdb->feature_flags & ~TDB_SUPPORTED_FEATURE_FLAGS) {
+                TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: unsupported "
+                         "features in tdb %s: 0x%08x (supported: 0x%08x)\n",
+                         name, (unsigned)tdb->feature_flags,
+                         (unsigned)TDB_SUPPORTED_FEATURE_FLAGS));
+                errno = ENOSYS;
+                goto fail;
+        }
+        if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) {
+                if (!tdb_mutex_open_ok(tdb, &header)) {
+                        errno = EINVAL;
+                        goto fail;
+                }
+                /*
+                 * We need to remember the hdr_ofs
+                 * also for the TDB_NOLOCK case
+                 * if the current library doesn't support
+                 * mutex locking.
+                 */
+                tdb->hdr_ofs = header.mutex_size;
+        }
+        if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) {
                 /* older TDB without magic hash references */
                 tdb->hash_fn = tdb_old_hash;
+        } else if (!check_header_hash(tdb, !hash_fn, &magic1, &magic2)) {
+        } else if (!check_header_hash(tdb, &header, !hash_fn,
+                                      &magic1, &magic2)) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: "
                          "%s was not created with %s hash function we are using\n"
 …
                          "magic2_hash[0x%08X %s 0x%08X]\n",
                          name, hash_alg,
                          tdb->header.magic1_hash,
                          (tdb->header.magic1_hash == magic1) ? "==" : "!=",
+                         header.magic1_hash,
+                         (header.magic1_hash == magic1) ? "==" : "!=",
                          magic1,
                          tdb->header.magic2_hash,
                          (tdb->header.magic2_hash == magic2) ? "==" : "!=",
+                         header.magic2_hash,
+                         (header.magic2_hash == magic2) ? "==" : "!=",
                          magic2));
                 errno = EINVAL;
 …
         /* Is it already in the open list?  If so, fail. */
         if (tdb_already_open(st.st_dev, st.st_ino)) {
+        if (tdb_already_open(tdb->device, tdb->inode)) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: "
                          "%s (%d,%d) is already open in this process\n",
                          name, (int)st.st_dev, (int)st.st_ino));
+                         name, (int)tdb->device, (int)tdb->inode));
                 errno = EBUSY;
                 goto fail;
+        }
+        tdb->map_size = st.st_size;
+        tdb->device = st.st_dev;
+        tdb->inode = st.st_ino;
+        tdb_mmap(tdb);
+        /*
+         * We had tdb_mmap(tdb) here before,
+         * but we need to use tdb_fstat(),
+         * which is triggered from tdb_oob() before calling tdb_mmap().
+         * As this skips tdb->hdr_ofs.
+         */
+        tdb->map_size = 0;
+        ret = tdb->methods->tdb_oob(tdb, 0, 1, 0);
+        if (ret == -1) {
+                errno = EIO;
+                goto fail;
+        }
+        if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) {
+                if (!(tdb->flags & TDB_NOLOCK)) {
+                        ret = tdb_mutex_mmap(tdb);
+                        if (ret != 0) {
+                                goto fail;
+                        }
+                }
+        }
         if (locked) {
                 if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) {
 …
         tdb->next = tdbs;
         tdbs = tdb;
+        errno = orig_errno;
         return tdb;
 …
                         tdb_munmap(tdb);
+        }
+        tdb_mutex_munmap(tdb);
         SAFE_FREE(tdb->name);
         if (tdb->fd != -1) {
 …
                 goto fail;
+        }
+        /*
+         * We only use st.st_dev and st.st_ino from the raw fstat()
+         * call, everything else needs to use tdb_fstat() in order
+         * to skip tdb->hdr_ofs!
+         */
         if (fstat(tdb->fd, &st) != 0) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno)));
 …
                 goto fail;
+        }
+        tdb_mmap(tdb);
+        ZERO_STRUCT(st);
+        /*
+         * We had tdb_mmap(tdb) here before,
+         * but we need to use tdb_fstat(),
+         * which is triggered from tdb_oob() before calling tdb_mmap().
+         * As this skips tdb->hdr_ofs.
+         */
+        tdb->map_size = 0;
+        if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) {
+                goto fail;
+        }
 #endif /* fake pread or pwrite */
 …
         tdb->num_lockrecs = 0;
         SAFE_FREE(tdb->lockrecs);
+        tdb->lockrecs_array_length = 0;
         if (active_lock && tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) {

vendor/current/lib/tdb/common/summary.c

-              r986
+              r988
  /*
+ /*
    Trivial Database: human-readable summary code
    Copyright (C) Rusty Russell 2010
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
 …
 #define SUMMARY_FORMAT \
+        "Size of file/data: %u/%zu\n" \
+        "Size of file/data: %llu/%zu\n" \
+        "Header offset/logical size: %zu/%zu\n" \
         "Number of records: %zu\n" \
+        "Incompatible hash: %s\n" \
+        "Active/supported feature flags: 0x%08x/0x%08x\n" \
+        "Robust mutexes locking: %s\n" \
         "Smallest/average/largest keys: %zu/%zu/%zu\n" \
         "Smallest/average/largest data: %zu/%zu/%zu\n" \
 …
 _PUBLIC_ char *tdb_summary(struct tdb_context *tdb)
+{
+        off_t file_size;
         tdb_off_t off, rec_off;
         struct tally freet, keys, data, dead, extra, hash, uncoal;
+        struct tally freet, keys, data, dead, extra, hashval, uncoal;
         struct tdb_record rec;
         char *ret = NULL;
         bool locked;
+        size_t len, unc = 0;
+        size_t unc = 0;
+        int len;
         struct tdb_record recovery;
 …
         tally_init(&dead);
         tally_init(&extra);
         tally_init(&hash);
+        tally_init(&hashval);
         tally_init(&uncoal);
         for (off = TDB_DATA_START(tdb->header.hash_size);
+        for (off = TDB_DATA_START(tdb->hash_size);
              off < tdb->map_size - 1;
              off += sizeof(rec) + rec.rec_len) {
 …
                 default:
                         TDB_LOG((tdb, TDB_DEBUG_ERROR,
                                  "Unexpected record magic 0x%x at offset %d\n",
+                                 "Unexpected record magic 0x%x at offset %u\n",
                                  rec.magic, off));
                         goto unlock;
 …
                 tally_add(&uncoal, unc - 1);
+        for (off = 0; off < tdb->header.hash_size; off++)
+                tally_add(&hash, get_hash_length(tdb, off));
+        /* 20 is max length of a %zu. */
+        len = strlen(SUMMARY_FORMAT) + 35*20 + 1;
+        ret = (char *)malloc(len);
+        if (!ret)
+                goto unlock;
+        snprintf(ret, len, SUMMARY_FORMAT,
+                 tdb->map_size, keys.total+data.total,
+        for (off = 0; off < tdb->hash_size; off++)
+                tally_add(&hashval, get_hash_length(tdb, off));
+        file_size = tdb->hdr_ofs + tdb->map_size;
+        len = asprintf(&ret, SUMMARY_FORMAT,
+                 (unsigned long long)file_size, keys.total+data.total,
+                 (size_t)tdb->hdr_ofs, (size_t)tdb->map_size,
                  keys.num,
+                 (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no",
+                 (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS,
+                 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX)?"yes":"no",
                  keys.min, tally_mean(&keys), keys.max,
                  data.min, tally_mean(&data), data.max,
 …
                  freet.num,
                  freet.min, tally_mean(&freet), freet.max,
                  hash.num,
                  hash.min, tally_mean(&hash), hash.max,
+                 hashval.num,
+                 hashval.min, tally_mean(&hashval), hashval.max,
                  uncoal.total,
                  uncoal.min, tally_mean(&uncoal), uncoal.max,
                  keys.total * 100.0 / tdb->map_size,
                  data.total * 100.0 / tdb->map_size,
                  extra.total * 100.0 / tdb->map_size,
                  freet.total * 100.0 / tdb->map_size,
                  dead.total * 100.0 / tdb->map_size,
+                 keys.total * 100.0 / file_size,
+                 data.total * 100.0 / file_size,
+                 extra.total * 100.0 / file_size,
+                 freet.total * 100.0 / file_size,
+                 dead.total * 100.0 / file_size,
                  (keys.num + freet.num + dead.num)
                  * (sizeof(struct tdb_record) + sizeof(uint32_t))
+                 * 100.0 / tdb->map_size,
+                 tdb->header.hash_size * sizeof(tdb_off_t)
+                 * 100.0 / tdb->map_size);
+                 * 100.0 / file_size,
+                 tdb->hash_size * sizeof(tdb_off_t)
+                 * 100.0 / file_size);
+        if (len == -1) {
+                goto unlock;
+        }
 unlock:

vendor/current/lib/tdb/common/tdb.c

-              r986
+              r988
  /*
+ /*
    Unix SMB/CIFS implementation.
 …
 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key);
+static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data)
+{
+        TDB_DATA *dbuf = (TDB_DATA *)private_data;
+        if (dbuf->dsize != data.dsize) {
+                return -1;
+        }
+        if (memcmp(dbuf->dptr, data.dptr, data.dsize) != 0) {
+                return -1;
+        }
+        return 0;
+}
 /* update an entry in place - this only works if the new data size
    is <= the old data size and the key exists.
 …
         /* it could be an exact duplicate of what is there - this is
          * surprisingly common (eg. with a ldb re-index). */
         if (rec.key_len == key.dsize &&
+        if (rec.key_len == key.dsize &&
             rec.data_len == dbuf.dsize &&
+            rec.full_hash == hash) {
+                TDB_DATA data = _tdb_fetch(tdb, key);
+                if (data.dsize == dbuf.dsize &&
+                    memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) {
+                        if (data.dptr) {
+                                free(data.dptr);
+                        }
+                        return 0;
+                }
+                if (data.dptr) {
+                        free(data.dptr);
+                }
+            rec.full_hash == hash &&
+            tdb_parse_record(tdb, key, tdb_update_hash_cmp, &dbuf) == 0) {
+                return 0;
+        }
 …
+}
 /* check if an entry in the database exists
+/* check if an entry in the database exists
    note that 1 is returned if the key is found and 0 is returned if not found
 …
  * Purge all DEAD records from a hash chain
  */
 static int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
+int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)
+{
         int res = -1;
 …
         tdb_off_t rec_ptr;
+        if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
+        if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) {
+                /*
+                 * Don't block the freelist if not strictly necessary
+                 */
                 return -1;
+        }
 …
         int ret;
+        rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec);
+        if (rec_ptr == 0) {
+                return -1;
+        }
         if (tdb->max_dead_records != 0) {
+                uint32_t magic = TDB_DEAD_MAGIC;
                 /*
 …
                  * tdb's with a very high create/delete rate like locking.tdb.
                  */
-                if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)
-                        return -1;
                 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) {
 …
+                }
-                if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {
-                        tdb_unlock(tdb, BUCKET(hash), F_WRLCK);
-                        return -1;
+                }
                 /*
                  * Just mark the record as dead.
                  */
+                rec.magic = TDB_DEAD_MAGIC;
+                ret = tdb_rec_write(tdb, rec_ptr, &rec);
+                ret = tdb_ofs_write(
+                        tdb, rec_ptr + offsetof(struct tdb_record, magic),
+                        &magic);
+        }
         else {
-                if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,
-                                                   &rec)))
-                        return -1;
                 ret = tdb_do_delete(tdb, rec_ptr, &rec);
+        }
 …
+        }
         if (tdb_unlock(tdb, BUCKET(rec.full_hash), F_WRLCK) != 0)
+        if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0)
                 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n"));
         return ret;
 …
  * See if we have a dead record around with enough space
  */
+static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
+                               struct tdb_record *r, tdb_len_t length)
+{
+        tdb_off_t rec_ptr;
+tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
+                        struct tdb_record *r, tdb_len_t length,
+                        tdb_off_t *p_last_ptr)
+{
+        tdb_off_t rec_ptr, last_ptr;
+        tdb_off_t best_rec_ptr = 0;
+        tdb_off_t best_last_ptr = 0;
+        struct tdb_record best = { .rec_len = UINT32_MAX };
+        length += sizeof(tdb_off_t); /* tailer */
+        last_ptr = TDB_HASH_TOP(hash);
         /* read in the hash top */
         if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)
+        if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1)
                 return 0;
 …
                         return 0;
                 if (TDB_DEAD(r) && r->rec_len >= length) {
                         /*
                          * First fit for simple coding, TODO: change to best
                          * fit
                          */
                         return rec_ptr;
+                }
+                if (TDB_DEAD(r) && (r->rec_len >= length) &&
+                    (r->rec_len < best.rec_len)) {
+                        best_rec_ptr = rec_ptr;
+                        best_last_ptr = last_ptr;
+                        best = *r;
+                }
+                last_ptr = rec_ptr;
                 rec_ptr = r->next;
+        }
+        return 0;
+        if (best.rec_len == UINT32_MAX) {
+                return 0;
+        }
+        *r = best;
+        *p_last_ptr = best_last_ptr;
+        return best_rec_ptr;
+}
 …
         struct tdb_record rec;
         tdb_off_t rec_ptr;
-        char *p = NULL;
         int ret = -1;
 …
+                }
+        }
         /* reset the error code potentially set by the tdb_update() */
+        /* reset the error code potentially set by the tdb_update_hash() */
         tdb->ecode = TDB_SUCCESS;
 …
                 tdb_delete_hash(tdb, key, hash);
-        /* Copy key+value *before* allocating free space in case malloc
-           fails and we are left with a dead spot in the tdb. */
-        if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {
-                tdb->ecode = TDB_ERR_OOM;
-                goto fail;
+        }
-        memcpy(p, key.dptr, key.dsize);
-        if (dbuf.dsize)
-                memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);
-        if (tdb->max_dead_records != 0) {
-                /*
-                 * Allow for some dead records per hash chain, look if we can
-                 * find one that can hold the new record. We need enough space
-                 * for key, data and tailer. If we find one, we don't have to
-                 * consult the central freelist.
-                 */
-                rec_ptr = tdb_find_dead(
-                        tdb, hash, &rec,
-                        key.dsize + dbuf.dsize + sizeof(tdb_off_t));
-                if (rec_ptr != 0) {
-                        rec.key_len = key.dsize;
-                        rec.data_len = dbuf.dsize;
-                        rec.full_hash = hash;
-                        rec.magic = TDB_MAGIC;
-                        if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
-                            || tdb->methods->tdb_write(
-                                    tdb, rec_ptr + sizeof(rec),
-                                    p, key.dsize + dbuf.dsize) == -1) {
-                                goto fail;
+                        }
-                        goto done;
+                }
+        }
-        /*
-         * We have to allocate some space from the freelist, so this means we
-         * have to lock it. Use the chance to purge all the DEAD records from
-         * the hash chain under the freelist lock.
-         */
-        if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
-                goto fail;
+        }
-        if ((tdb->max_dead_records != 0)
-            && (tdb_purge_dead(tdb, hash) == -1)) {
-                tdb_unlock(tdb, -1, F_WRLCK);
-                goto fail;
+        }
         /* we have to allocate some space */
+        rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec);
+        tdb_unlock(tdb, -1, F_WRLCK);
+        rec_ptr = tdb_allocate(tdb, hash, key.dsize + dbuf.dsize, &rec);
         if (rec_ptr == 0) {
 …
         /* write out and point the top of the hash chain at it */
         if (tdb_rec_write(tdb, rec_ptr, &rec) == -1
+            || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1
+            || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec),
+                                       key.dptr, key.dsize) == -1
+            || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec)+key.dsize,
+                                       dbuf.dptr, dbuf.dsize) == -1
             || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) {
                 /* Need to tdb_unallocate() here */
 …
                 tdb_increment_seqnum(tdb);
+        }
-        SAFE_FREE(p);
         return ret;
+}
 …
 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb)
+{
         return tdb->header.hash_size;
+        return tdb->hash_size;
+}
 …
+        }
+        if ((flags & TDB_NOLOCK) &&
+            (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) &&
+            (tdb->mutexes == NULL)) {
+                tdb->ecode = TDB_ERR_LOCK;
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: "
+                         "Can not remove NOLOCK flag on mutexed databases"));
+                return;
+        }
         if (flags & TDB_ALLOW_NESTING) {
                 tdb->flags |= TDB_DISALLOW_NESTING;
 …
 /*
   add a region of the file to the freelist. Length is the size of the region in bytes,
+  add a region of the file to the freelist. Length is the size of the region in bytes,
   which includes the free list header that needs to be added
  */
 …
         if (length + offset > tdb->map_size) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n"));
                 return -1;
+                return -1;
+        }
         memset(&rec,'\0',sizeof(rec));
 …
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n"));
                         return -1;
+                }
+                }
                 recovery_size = rec.rec_len + sizeof(rec);
+        }
         /* wipe the hashes */
         for (i=0;i<tdb->header.hash_size;i++) {
+        for (i=0;i<tdb->hash_size;i++) {
                 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) {
                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i));
 …
+        }
         /* add all the rest of the file to the freelist, possibly leaving a gap
+        /* add all the rest of the file to the freelist, possibly leaving a gap
            for the recovery area */
         if (recovery_size == 0) {
                 /* the simple case - the whole file can be used as a freelist */
                 data_len = (tdb->map_size - TDB_DATA_START(tdb->header.hash_size));
                 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
+                data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size));
+                if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
                         goto failed;
+                }
         } else {
                 /* we need to add two freelist entries - one on either
                    side of the recovery area
+                   side of the recovery area
                    Note that we cannot shift the recovery area during
 …
                    corruption
                 */
                 data_len = (recovery_head - TDB_DATA_START(tdb->header.hash_size));
                 if (tdb_free_region(tdb, TDB_DATA_START(tdb->header.hash_size), data_len) != 0) {
+                data_len = (recovery_head - TDB_DATA_START(tdb->hash_size));
+                if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) {
                         goto failed;
+                }
 …
+        }
+        tdb_increment_seqnum_nonblock(tdb);
         if (tdb_unlockall(tdb) != 0) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n"));
 …
                 tdb_transaction_cancel(tdb);
                 tdb_close(tmp_db);
                 return -1;
+                return -1;
+        }
 …
                 tdb_transaction_cancel(tdb);
                 tdb_close(tmp_db);
                 return -1;
+                return -1;
+        }
 …
+}
+bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret)
+{
+        tdb_off_t ret = a + b;
+        if ((ret < a) || (ret < b)) {
+                return false;
+        }
+        *pret = ret;
+        return true;
+}
 #ifdef TDB_TRACE
 static void tdb_trace_write(struct tdb_context *tdb, const char *str)
+{
         if (!tdb_write_alltdb->tracefd, str, strlen(str)) {
+        if (!tdb_write_all(tdb->tracefd, str, strlen(str))) {
                 close(tdb->tracefd);
                 tdb->tracefd = -1;

vendor/current/lib/tdb/common/tdb_private.h

-              r986
+              r988
+ /*
+#ifndef TDB_PRIVATE_H
+#define TDB_PRIVATE_H
+ /*
    Unix SMB/CIFS implementation.
 …
 #define TDB_RECOVERY_INVALID_MAGIC (0x0)
 #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U)
+#define TDB_FEATURE_FLAG_MAGIC (0xbad1a52U)
 #define TDB_ALIGNMENT 4
 #define DEFAULT_HASH_SIZE 131
 …
 #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r))
 #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t))
 #define TDB_HASHTABLE_SIZE(tdb) ((tdb->header.hash_size+1)*sizeof(tdb_off_t))
+#define TDB_HASHTABLE_SIZE(tdb) ((tdb->hash_size+1)*sizeof(tdb_off_t))
 #define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + sizeof(tdb_off_t))
 #define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start)
 …
 #define TDB_PAD_BYTE 0x42
 #define TDB_PAD_U32  0x42424242
+#define TDB_FEATURE_FLAG_MUTEX 0x00000001
+#define TDB_SUPPORTED_FEATURE_FLAGS ( \
+        TDB_FEATURE_FLAG_MUTEX | \
+)
 /* NB assumes there is a local variable called "tdb" that is the
 …
 #endif
 #define BUCKET(hash) ((hash) % tdb->header.hash_size)
+#define BUCKET(hash) ((hash) % tdb->hash_size)
 #define DOCONV() (tdb->flags & TDB_CONVERT)
 …
         uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */
         uint32_t magic2_hash; /* hash of TDB_MAGIC. */
+        tdb_off_t reserved[27];
+        uint32_t feature_flags;
+        tdb_len_t mutex_size; /* set if TDB_FEATURE_FLAG_MUTEX is set */
+        tdb_off_t reserved[25];
 };
 …
         int (*tdb_write)(struct tdb_context *, tdb_off_t, const void *, tdb_len_t);
         void (*next_hash_chain)(struct tdb_context *, uint32_t *);
         int (*tdb_oob)(struct tdb_context *, tdb_off_t , int );
+        int (*tdb_oob)(struct tdb_context *, tdb_off_t , tdb_len_t, int );
         int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t );
 };
+struct tdb_mutexes;
 struct tdb_context {
 …
         int num_lockrecs;
         struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */
+        int lockrecs_array_length;
+        tdb_off_t hdr_ofs; /* this is 0 or header.mutex_size */
+        struct tdb_mutexes *mutexes; /* mmap of the mutex area */
         enum TDB_ERROR ecode; /* error code for last tdb error */
+        struct tdb_header header; /* a cached copy of the header */
+        uint32_t hash_size;
+        uint32_t feature_flags;
         uint32_t flags; /* the flags passed to tdb_open */
         struct tdb_traverse_lock travlocks; /* current traversal locks */
 …
 */
 int tdb_munmap(struct tdb_context *tdb);
 void tdb_mmap(struct tdb_context *tdb);
+int tdb_mmap(struct tdb_context *tdb);
 int tdb_lock(struct tdb_context *tdb, int list, int ltype);
 int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype);
 …
 void *tdb_convert(void *buf, uint32_t size);
 int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec);
+tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec);
+tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length,
+                       struct tdb_record *rec);
 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d);
 …
 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype,
                            struct tdb_record *rec);
+tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash,
+                        struct tdb_record *r, tdb_len_t length,
+                        tdb_off_t *p_last_ptr);
+int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash);
 void tdb_io_init(struct tdb_context *tdb);
 int tdb_expand(struct tdb_context *tdb, tdb_off_t size);
+tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size);
 int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off,
                       struct tdb_record *rec);
 …
 unsigned int tdb_old_hash(TDB_DATA *key);
 size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off);
+bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret);
+/* tdb_off_t and tdb_len_t right now are both uint32_t */
+#define tdb_add_len_t tdb_add_off_t
+size_t tdb_mutex_size(struct tdb_context *tdb);
+bool tdb_have_mutexes(struct tdb_context *tdb);
+int tdb_mutex_init(struct tdb_context *tdb);
+int tdb_mutex_mmap(struct tdb_context *tdb);
+int tdb_mutex_munmap(struct tdb_context *tdb);
+bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
+                    bool waitflag, int *pret);
+bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
+                      int *pret);
+int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
+                             enum tdb_lock_flags flags);
+int tdb_mutex_allrecord_unlock(struct tdb_context *tdb);
+int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb);
+void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb);
+#endif /* TDB_PRIVATE_H */

vendor/current/lib/tdb/common/transaction.c

-              r986
+              r988
  /*
+ /*
    Unix SMB/CIFS implementation.
 …
   - if TDB_NOSYNC is passed to flags in tdb_open then transactions are
+    still available, but no transaction recovery area is used and no
+    fsync/msync calls are made.
+    still available, but no fsync/msync calls are made.  This means we
+    are still proof against a process dying during transaction commit,
+    but not against machine reboot.
   - if TDB_ALLOW_NESTING is passed to flags in tdb open, or added using
 …
   of transaction elements, then if not do a real read
 */
 static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
+static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf,
                             tdb_len_t len, int cv)
+{
 …
 fail:
         TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%d len=%d\n", off, len));
+        TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%u len=%u\n", off, len));
         tdb->ecode = TDB_ERR_IO;
         tdb->transaction->transaction_error = 1;
 …
   write while in a transaction
 */
 static int transaction_write(struct tdb_context *tdb, tdb_off_t off,
+static int transaction_write(struct tdb_context *tdb, tdb_off_t off,
                              const void *buf, tdb_len_t len)
+{
 …
                 uint8_t **new_blocks;
                 /* expand the blocks array */
+                if (tdb->transaction->blocks == NULL) {
+                        new_blocks = (uint8_t **)malloc(
+                                (blk+1)*sizeof(uint8_t *));
+                } else {
+                        new_blocks = (uint8_t **)realloc(
+                                tdb->transaction->blocks,
+                                (blk+1)*sizeof(uint8_t *));
+                }
+                new_blocks = (uint8_t **)realloc(tdb->transaction->blocks,
+                                                 (blk+1)*sizeof(uint8_t *));
                 if (new_blocks == NULL) {
                         tdb->ecode = TDB_ERR_OOM;
                         goto fail;
+                }
                 memset(&new_blocks[tdb->transaction->num_blocks], 0,
+                memset(&new_blocks[tdb->transaction->num_blocks], 0,
                        (1+(blk - tdb->transaction->num_blocks))*sizeof(uint8_t *));
                 tdb->transaction->blocks = new_blocks;
 …
                         tdb->ecode = TDB_ERR_OOM;
                         tdb->transaction->transaction_error = 1;
                         return -1;
+                        return -1;
+                }
                 if (tdb->transaction->old_map_size > blk * tdb->transaction->block_size) {
 …
                                 len2 = tdb->transaction->old_map_size - (blk * tdb->transaction->block_size);
+                        }
                         if (tdb->transaction->io_methods->tdb_read(tdb, blk * tdb->transaction->block_size,
                                                                    tdb->transaction->blocks[blk],
+                        if (tdb->transaction->io_methods->tdb_read(tdb, blk * tdb->transaction->block_size,
+                                                                   tdb->transaction->blocks[blk],
                                                                    len2, 0) != 0) {
                                 SAFE_FREE(tdb->transaction->blocks[blk]);
+                                SAFE_FREE(tdb->transaction->blocks[blk]);
                                 tdb->ecode = TDB_ERR_IO;
                                 goto fail;
 …
                         if (blk == tdb->transaction->num_blocks-1) {
                                 tdb->transaction->last_block_size = len2;
+                        }
+                        }
+                }
+        }
 …
 fail:
         TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%d len=%d\n",
+        TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%u len=%u\n",
                  (blk*tdb->transaction->block_size) + off, len));
         tdb->transaction->transaction_error = 1;
 …
 /*
   write while in a transaction - this varient never expands the transaction blocks, it only
+  write while in a transaction - this variant never expands the transaction blocks, it only
   updates existing blocks. This means it cannot change the recovery size
 */
 static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off,
+static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off,
                                       const void *buf, tdb_len_t len)
+{
 …
+{
         uint32_t h = *chain;
         for (;h < tdb->header.hash_size;h++) {
+        for (;h < tdb->hash_size;h++) {
                 /* the +1 takes account of the freelist */
                 if (0 != tdb->transaction->hash_heads[h+1]) {
 …
   out of bounds check during a transaction
 */
+static int transaction_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
+{
+        if (len <= tdb->map_size) {
+static int transaction_oob(struct tdb_context *tdb, tdb_off_t off,
+                           tdb_len_t len, int probe)
+{
+        if (off + len >= off && off + len <= tdb->map_size) {
                 return 0;
+        }
 …
   transaction version of tdb_expand().
 */
 static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size,
+static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size,
                                    tdb_off_t addition)
+{
 …
+{
         /* some sanity checks */
+        if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) {
+        if (tdb->read_only || (tdb->flags & TDB_INTERNAL)
+            || tdb->traverse_read) {
                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n"));
                 tdb->ecode = TDB_ERR_EINVAL;
 …
+                }
                 tdb->transaction->nesting++;
                 TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n",
+                TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n",
                          tdb->transaction->nesting));
                 return 0;
 …
            traverse can be fast */
         tdb->transaction->hash_heads = (uint32_t *)
                 calloc(tdb->header.hash_size+1, sizeof(uint32_t));
+                calloc(tdb->hash_size+1, sizeof(uint32_t));
         if (tdb->transaction->hash_heads == NULL) {
                 tdb->ecode = TDB_ERR_OOM;
 …
         /* make sure we know about any file expansions already done by
            anyone else */
         tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
+        tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1);
         tdb->transaction->old_map_size = tdb->map_size;
 …
 */
 static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t length)
+{
+{
         if (tdb->flags & TDB_NOSYNC) {
                 return 0;
 …
         if (tdb->map_ptr) {
                 tdb_off_t moffset = offset & ~(tdb->page_size-1);
                 if (msync(moffset + (char *)tdb->map_ptr,
+                if (msync(moffset + (char *)tdb->map_ptr,
                           length + (offset - moffset), MS_SYNC) != 0) {
                         tdb->ecode = TDB_ERR_IO;
 …
 static int _tdb_transaction_cancel(struct tdb_context *tdb)
+{
+{
         int i, ret = 0;
 …
                 tdb->transaction->nesting--;
                 return 0;
+        }
+        }
         tdb->map_size = tdb->transaction->old_map_size;
 …
   work out how much space the linearised recovery data will consume
 */
 static tdb_len_t tdb_recovery_size(struct tdb_context *tdb)
+static bool tdb_recovery_size(struct tdb_context *tdb, tdb_len_t *result)
+{
         tdb_len_t recovery_size = 0;
 …
         recovery_size = sizeof(uint32_t);
         for (i=0;i<tdb->transaction->num_blocks;i++) {
+                tdb_len_t block_size;
                 if (i * tdb->transaction->block_size >= tdb->transaction->old_map_size) {
                         break;
 …
                         continue;
+                }
+                recovery_size += 2*sizeof(tdb_off_t);
+                if (!tdb_add_len_t(recovery_size, 2*sizeof(tdb_off_t),
+                                   &recovery_size)) {
+                        return false;
+                }
                 if (i == tdb->transaction->num_blocks-1) {
                         recovery_size += tdb->transaction->last_block_size;
+                        block_size = tdb->transaction->last_block_size;
                 } else {
+                        recovery_size += tdb->transaction->block_size;
+                }
+        }
+        return recovery_size;
+                        block_size =  tdb->transaction->block_size;
+                }
+                if (!tdb_add_len_t(recovery_size, block_size,
+                                   &recovery_size)) {
+                        return false;
+                }
+        }
+        *result = recovery_size;
+        return true;
+}
 …
   large enough
 */
 static int tdb_recovery_allocate(struct tdb_context *tdb,
+static int tdb_recovery_allocate(struct tdb_context *tdb,
                                  tdb_len_t *recovery_size,
                                  tdb_off_t *recovery_offset,
 …
         struct tdb_record rec;
         const struct tdb_methods *methods = tdb->transaction->io_methods;
         tdb_off_t recovery_head;
+        tdb_off_t recovery_head, new_end;
         if (tdb_recovery_area(tdb, methods, &recovery_head, &rec) == -1) {
 …
+        }
+        *recovery_size = tdb_recovery_size(tdb);
+        if (!tdb_recovery_size(tdb, recovery_size)) {
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: "
+                         "overflow recovery size\n"));
+                return -1;
+        }
+        /* Existing recovery area? */
         if (recovery_head != 0 && *recovery_size <= rec.rec_len) {
                 /* it fits in the existing area */
 …
+        }
+        /* we need to free up the old recovery area, then allocate a
+           new one at the end of the file. Note that we cannot use
+           tdb_allocate() to allocate the new one as that might return
+           us an area that is being currently used (as of the start of
+           the transaction) */
+        if (recovery_head != 0) {
+                if (tdb_free(tdb, recovery_head, &rec) == -1) {
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to free previous recovery area\n"));
+                        return -1;
+                }
+        }
+        /* the tdb_free() call might have increased the recovery size */
+        *recovery_size = tdb_recovery_size(tdb);
+        /* round up to a multiple of page size */
+        *recovery_max_size = TDB_ALIGN(sizeof(rec) + *recovery_size, tdb->page_size) - sizeof(rec);
+        *recovery_offset = tdb->map_size;
+        recovery_head = *recovery_offset;
+        if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size,
+                                     (tdb->map_size - tdb->transaction->old_map_size) +
+                                     sizeof(rec) + *recovery_max_size) == -1) {
+        /* If recovery area in middle of file, we need a new one. */
+        if (recovery_head == 0
+            || recovery_head + sizeof(rec) + rec.rec_len != tdb->map_size) {
+                /* we need to free up the old recovery area, then allocate a
+                   new one at the end of the file. Note that we cannot use
+                   tdb_allocate() to allocate the new one as that might return
+                   us an area that is being currently used (as of the start of
+                   the transaction) */
+                if (recovery_head) {
+                        if (tdb_free(tdb, recovery_head, &rec) == -1) {
+                                TDB_LOG((tdb, TDB_DEBUG_FATAL,
+                                         "tdb_recovery_allocate: failed to"
+                                         " free previous recovery area\n"));
+                                return -1;
+                        }
+                        /* the tdb_free() call might have increased
+                         * the recovery size */
+                        if (!tdb_recovery_size(tdb, recovery_size)) {
+                                TDB_LOG((tdb, TDB_DEBUG_FATAL,
+                                         "tdb_recovery_allocate: "
+                                         "overflow recovery size\n"));
+                                return -1;
+                        }
+                }
+                /* New head will be at end of file. */
+                recovery_head = tdb->map_size;
+        }
+        /* Now we know where it will be. */
+        *recovery_offset = recovery_head;
+        /* Expand by more than we need, so we don't do it often. */
+        *recovery_max_size = tdb_expand_adjust(tdb->map_size,
+                                               *recovery_size,
+                                               tdb->page_size)
+                - sizeof(rec);
+        if (!tdb_add_off_t(recovery_head, sizeof(rec), &new_end) ||
+            !tdb_add_off_t(new_end, *recovery_max_size, &new_end)) {
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: "
+                         "overflow recovery area\n"));
+                return -1;
+        }
+        if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size,
+                                     new_end - tdb->transaction->old_map_size)
+            == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to create recovery area\n"));
                 return -1;
 …
         /* remap the file (if using mmap) */
         methods->tdb_oob(tdb, tdb->map_size + 1, 1);
+        methods->tdb_oob(tdb, tdb->map_size, 1, 1);
         /* we have to reset the old map size so that we don't try to expand the file
 …
            as the magic ptr in the recovery record has not been set */
         CONVERT(recovery_head);
         if (methods->tdb_write(tdb, TDB_RECOVERY_HEAD,
+        if (methods->tdb_write(tdb, TDB_RECOVERY_HEAD,
                                &recovery_head, sizeof(tdb_off_t)) == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n"));
 …
   setup the recovery data that will be used on a crash during commit
 */
 static int transaction_setup_recovery(struct tdb_context *tdb,
+static int transaction_setup_recovery(struct tdb_context *tdb,
                                       tdb_off_t *magic_offset)
+{
 …
           check that the recovery area has enough space
         */
         if (tdb_recovery_allocate(tdb, &recovery_size,
+        if (tdb_recovery_allocate(tdb, &recovery_size,
                                   &recovery_offset, &recovery_max_size) == -1) {
                 return -1;
 …
 static int _tdb_transaction_prepare_commit(struct tdb_context *tdb)
+{
+{
         const struct tdb_methods *methods;
 …
         if (tdb->transaction->nesting != 0) {
                 return 0;
+        }
+        }
         /* check for a null transaction */
 …
+        }
+        if (!(tdb->flags & TDB_NOSYNC)) {
+                /* write the recovery data to the end of the file */
+                if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) {
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: failed to setup recovery data\n"));
+                        _tdb_transaction_cancel(tdb);
+                        return -1;
+                }
+        /* write the recovery data to the end of the file */
+        if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) {
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: failed to setup recovery data\n"));
+                _tdb_transaction_cancel(tdb);
+                return -1;
+        }
 …
         /* expand the file to the new size if needed */
         if (tdb->map_size != tdb->transaction->old_map_size) {
                 if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size,
                                              tdb->map_size -
+                if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size,
+                                             tdb->map_size -
                                              tdb->transaction->old_map_size) == -1) {
                         tdb->ecode = TDB_ERR_IO;
 …
+                }
                 tdb->map_size = tdb->transaction->old_map_size;
                 methods->tdb_oob(tdb, tdb->map_size + 1, 1);
+                methods->tdb_oob(tdb, tdb->map_size, 1, 1);
+        }
 …
                            run the crash recovery code */
                         tdb->methods = methods;
                         tdb_transaction_recover(tdb);
+                        tdb_transaction_recover(tdb);
                         _tdb_transaction_cancel(tdb);
 …
+                }
                 SAFE_FREE(tdb->transaction->blocks[i]);
+        }
+        }
         /* Do this before we drop lock or blocks. */
 …
         /* read the recovery record */
         if (tdb->methods->tdb_read(tdb, recovery_head, &rec,
+        if (tdb->methods->tdb_read(tdb, recovery_head, &rec,
                                    sizeof(rec), DOCONV()) == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery record\n"));
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery record\n"));
                 tdb->ecode = TDB_ERR_IO;
                 return -1;
 …
         data = (unsigned char *)malloc(rec.data_len);
         if (data == NULL) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to allocate recovery data\n"));
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to allocate recovery data\n"));
                 tdb->ecode = TDB_ERR_OOM;
                 return -1;
 …
         if (tdb->methods->tdb_read(tdb, recovery_head + sizeof(rec), data,
                                    rec.data_len, 0) == -1) {
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery data\n"));
+                TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery data\n"));
                 tdb->ecode = TDB_ERR_IO;
                 return -1;
 …
                 if (tdb->methods->tdb_write(tdb, ofs, p+8, len) == -1) {
                         free(data);
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to recover %d bytes at offset %d\n", len, ofs));
+                        TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to recover %u bytes at offset %u\n", len, ofs));
                         tdb->ecode = TDB_ERR_IO;
                         return -1;
 …
                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery head\n"));
                         tdb->ecode = TDB_ERR_IO;
                         return -1;
+                        return -1;
+                }
+        }
 …
                 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery magic\n"));
                 tdb->ecode = TDB_ERR_IO;
                 return -1;
+                return -1;
+        }
 …
+        }
         TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_recover: recovered %d byte database\n",
+        TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_recover: recovered %u byte database\n",
                  recovery_eof));

vendor/current/lib/tdb/common/traverse.c

-              r986
+              r988
  /*
+ /*
    Unix SMB/CIFS implementation.
 …
         /* Lock each chain from the start one. */
         for (; tlock->hash < tdb->header.hash_size; tlock->hash++) {
+        for (; tlock->hash < tdb->hash_size; tlock->hash++) {
                 if (!tlock->off && tlock->hash != 0) {
                         /* this is an optimisation for the common case where
 …
                         */
                         tdb->methods->next_hash_chain(tdb, &tlock->hash);
                         if (tlock->hash == tdb->header.hash_size) {
+                        if (tlock->hash == tdb->hash_size) {
                                 continue;
+                        }
 …
                         current = tlock->off;
                         tlock->off = rec->next;
                         if (!(tdb->read_only || tdb->traverse_read) &&
+                        if (!(tdb->read_only || tdb->traverse_read) &&
                             tdb_do_delete(tdb, current, rec) != 0)
                                 goto fail;
 …
    a non-zero return value from fn() indicates that the traversal should stop
   */
 static int tdb_traverse_internal(struct tdb_context *tdb,
+static int tdb_traverse_internal(struct tdb_context *tdb,
                                  tdb_traverse_func fn, void *private_data,
                                  struct tdb_traverse_lock *tl)
 …
         tdb_off_t off;
         /* This was in the initializaton, above, but the IRIX compiler
+        /* This was in the initialization, above, but the IRIX compiler
          * did not like it.  crh
          */
 …
                 count++;
                 /* now read the full record */
                 key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec),
+                key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec),
                                           rec.key_len + rec.data_len);
                 if (!key.dptr) {
 …
 /*
   a write style traverse - temporarily marks the db read only
+  a read style traverse - temporarily marks the db read only
 */
 _PUBLIC_ int tdb_traverse_read(struct tdb_context *tdb,
+_PUBLIC_ int tdb_traverse_read(struct tdb_context *tdb,
                       tdb_traverse_func fn, void *private_data)
+{
 …
   WARNING: The data buffer given to the callback fn does NOT meet the
   alignment restrictions malloc gives you.
+  alignment guarantees malloc gives you.
 */
 _PUBLIC_ int tdb_traverse(struct tdb_context *tdb,
+_PUBLIC_ int tdb_traverse(struct tdb_context *tdb,
                  tdb_traverse_func fn, void *private_data)
+{
         struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK };
+        enum tdb_lock_flags lock_flags;
         int ret;
 …
+        }
+        if (tdb_transaction_lock(tdb, F_WRLCK, TDB_LOCK_WAIT)) {
+        lock_flags = TDB_LOCK_WAIT;
+        if (tdb->allrecord_lock.count != 0) {
+                /*
+                 * This avoids a deadlock between tdb_lockall() and
+                 * tdb_traverse(). See
+                 * https://bugzilla.samba.org/show_bug.cgi?id=11381
+                 */
+                lock_flags = TDB_LOCK_NOWAIT;
+        }
+        if (tdb_transaction_lock(tdb, F_WRLCK, lock_flags)) {
                 return -1;
+        }

Context Navigation

Legend:

vendor/current/lib/tdb/common/check.c

vendor/current/lib/tdb/common/dump.c

vendor/current/lib/tdb/common/error.c

vendor/current/lib/tdb/common/freelist.c

vendor/current/lib/tdb/common/freelistcheck.c

vendor/current/lib/tdb/common/hash.c

vendor/current/lib/tdb/common/io.c

vendor/current/lib/tdb/common/lock.c

vendor/current/lib/tdb/common/open.c

vendor/current/lib/tdb/common/summary.c

vendor/current/lib/tdb/common/tdb.c

vendor/current/lib/tdb/common/tdb_private.h

vendor/current/lib/tdb/common/transaction.c

vendor/current/lib/tdb/common/traverse.c

Download in other formats: