Changeset 988 for vendor/current/lib/tdb/common
- Timestamp:
- Nov 24, 2016, 1:14:11 PM (9 years ago)
- Location:
- vendor/current/lib/tdb/common
- Files:
-
- 2 added
- 14 edited
Legend:
- Unmodified
- Added
- Removed
-
vendor/current/lib/tdb/common/check.c
r986 r988 40 40 goto corrupt; 41 41 42 if (hdr.rwlocks != 0 && hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) 42 if (hdr.rwlocks != 0 && 43 hdr.rwlocks != TDB_FEATURE_FLAG_MAGIC && 44 hdr.rwlocks != TDB_HASH_RWLOCK_MAGIC) 43 45 goto corrupt; 44 46 … … 51 53 goto corrupt; 52 54 53 if (hdr.hash_size != tdb->h eader.hash_size)55 if (hdr.hash_size != tdb->hash_size) 54 56 goto corrupt; 55 57 56 58 if (hdr.recovery_start != 0 && 57 hdr.recovery_start < TDB_DATA_START(tdb->h eader.hash_size))59 hdr.recovery_start < TDB_DATA_START(tdb->hash_size)) 58 60 goto corrupt; 59 61 … … 75 77 76 78 /* Check rec->next: 0 or points to record offset, aligned. */ 77 if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->h eader.hash_size)){78 TDB_LOG((tdb, TDB_DEBUG_ERROR, 79 "Record offset % d too small next %d\n",79 if (rec->next > 0 && rec->next < TDB_DATA_START(tdb->hash_size)){ 80 TDB_LOG((tdb, TDB_DEBUG_ERROR, 81 "Record offset %u too small next %u\n", 80 82 off, rec->next)); 81 83 goto corrupt; … … 83 85 if (rec->next + sizeof(*rec) < rec->next) { 84 86 TDB_LOG((tdb, TDB_DEBUG_ERROR, 85 "Record offset % d too large next %d\n",87 "Record offset %u too large next %u\n", 86 88 off, rec->next)); 87 89 goto corrupt; … … 89 91 if ((rec->next % TDB_ALIGNMENT) != 0) { 90 92 TDB_LOG((tdb, TDB_DEBUG_ERROR, 91 "Record offset % d misaligned next %d\n",93 "Record offset %u misaligned next %u\n", 92 94 off, rec->next)); 93 95 goto corrupt; 94 96 } 95 if (tdb->methods->tdb_oob(tdb, rec->next +sizeof(*rec), 0))97 if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0)) 96 98 goto corrupt; 97 99 … … 99 101 if ((rec->rec_len % TDB_ALIGNMENT) != 0) { 100 102 TDB_LOG((tdb, TDB_DEBUG_ERROR, 101 "Record offset % d misaligned length %d\n",103 "Record offset %u misaligned length %u\n", 102 104 off, rec->rec_len)); 103 105 goto corrupt; … … 106 108 if (rec->rec_len < sizeof(tailer)) { 107 109 TDB_LOG((tdb, TDB_DEBUG_ERROR, 108 "Record offset % d too short length %d\n",110 "Record offset %u too short length %u\n", 109 111 off, rec->rec_len)); 110 112 goto corrupt; 111 113 } 112 114 /* OOB allows "right at the end" access, so this works for last rec. */ 113 if (tdb->methods->tdb_oob(tdb, off +sizeof(*rec)+rec->rec_len, 0))115 if (tdb->methods->tdb_oob(tdb, off, sizeof(*rec)+rec->rec_len, 0)) 114 116 goto corrupt; 115 117 … … 120 122 if (tailer != sizeof(*rec) + rec->rec_len) { 121 123 TDB_LOG((tdb, TDB_DEBUG_ERROR, 122 "Record offset % dinvalid tailer\n", off));124 "Record offset %u invalid tailer\n", off)); 123 125 goto corrupt; 124 126 } … … 248 250 if (rec->key_len + rec->data_len + sizeof(tdb_off_t) > rec->rec_len) { 249 251 TDB_LOG((tdb, TDB_DEBUG_ERROR, 250 "Record offset % dtoo short for contents\n", off));252 "Record offset %u too short for contents\n", off)); 251 253 return false; 252 254 } … … 258 260 if (tdb->hash_fn(&key) != rec->full_hash) { 259 261 TDB_LOG((tdb, TDB_DEBUG_ERROR, 260 "Record offset % dhas incorrect hash\n", off));262 "Record offset %u has incorrect hash\n", off)); 261 263 goto fail_put_key; 262 264 } … … 346 348 347 349 /* Make sure we know true size of the underlying file. */ 348 tdb->methods->tdb_oob(tdb, tdb->map_size +1, 1);350 tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1); 349 351 350 352 /* Header must be OK: also gets us the recovery ptr, if any. */ … … 353 355 354 356 /* We should have the whole header, too. */ 355 if (tdb->map_size < TDB_DATA_START(tdb->h eader.hash_size)) {357 if (tdb->map_size < TDB_DATA_START(tdb->hash_size)) { 356 358 tdb->ecode = TDB_ERR_CORRUPT; 357 359 TDB_LOG((tdb, TDB_DEBUG_ERROR, "File too short for hashes\n")); … … 361 363 /* One big malloc: pointers then bit arrays. */ 362 364 hashes = (unsigned char **)calloc( 363 1, sizeof(hashes[0]) * (1+tdb->h eader.hash_size)364 + BITMAP_BITS / CHAR_BIT * (1+tdb->h eader.hash_size));365 1, sizeof(hashes[0]) * (1+tdb->hash_size) 366 + BITMAP_BITS / CHAR_BIT * (1+tdb->hash_size)); 365 367 if (!hashes) { 366 368 tdb->ecode = TDB_ERR_OOM; … … 369 371 370 372 /* Initialize pointers */ 371 hashes[0] = (unsigned char *)(&hashes[1+tdb->h eader.hash_size]);372 for (h = 1; h < 1+tdb->h eader.hash_size; h++)373 hashes[0] = (unsigned char *)(&hashes[1+tdb->hash_size]); 374 for (h = 1; h < 1+tdb->hash_size; h++) 373 375 hashes[h] = hashes[h-1] + BITMAP_BITS / CHAR_BIT; 374 376 375 377 /* Freelist and hash headers are all in a row: read them. */ 376 for (h = 0; h < 1+tdb->h eader.hash_size; h++) {378 for (h = 0; h < 1+tdb->hash_size; h++) { 377 379 if (tdb_ofs_read(tdb, FREELIST_TOP + h*sizeof(tdb_off_t), 378 380 &off) == -1) … … 383 385 384 386 /* For each record, read it in and check it's ok. */ 385 for (off = TDB_DATA_START(tdb->h eader.hash_size);387 for (off = TDB_DATA_START(tdb->hash_size); 386 388 off < tdb->map_size; 387 389 off += sizeof(rec) + rec.rec_len) { … … 412 414 413 415 TDB_LOG((tdb, TDB_DEBUG_ERROR, 414 "Dead space at % d-%d(of %u)\n",416 "Dead space at %u-%u (of %u)\n", 415 417 off, off + dead, tdb->map_size)); 416 418 rec.rec_len = dead - sizeof(rec); … … 419 421 if (recovery_start != off) { 420 422 TDB_LOG((tdb, TDB_DEBUG_ERROR, 421 "Unexpected recovery record at offset % d\n",423 "Unexpected recovery record at offset %u\n", 422 424 off)); 423 425 goto free; … … 429 431 tdb->ecode = TDB_ERR_CORRUPT; 430 432 TDB_LOG((tdb, TDB_DEBUG_ERROR, 431 "Bad magic 0x%x at offset % d\n",433 "Bad magic 0x%x at offset %u\n", 432 434 rec.magic, off)); 433 435 goto free; … … 437 439 /* Now, hashes should all be empty: each record exists and is referred 438 440 * to by one other. */ 439 for (h = 0; h < 1+tdb->h eader.hash_size; h++) {441 for (h = 0; h < 1+tdb->hash_size; h++) { 440 442 unsigned int i; 441 443 for (i = 0; i < BITMAP_BITS / CHAR_BIT; i++) { -
vendor/current/lib/tdb/common/dump.c
r986 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 … … 34 34 tdb_off_t tailer_ofs, tailer; 35 35 36 if (tdb->methods->tdb_read(tdb, offset, (char *)&rec, 36 if (tdb->methods->tdb_read(tdb, offset, (char *)&rec, 37 37 sizeof(rec), DOCONV()) == -1) { 38 38 printf("ERROR: failed to read record at %u\n", offset); … … 40 40 } 41 41 42 printf(" rec: hash=%d offset=0x%08x next=0x%08x rec_len=% d"43 "key_len=% d data_len=%d full_hash=0x%x magic=0x%x\n",42 printf(" rec: hash=%d offset=0x%08x next=0x%08x rec_len=%u " 43 "key_len=%u data_len=%u full_hash=0x%08x magic=0x%08x\n", 44 44 hash, offset, rec.next, rec.rec_len, rec.key_len, rec.data_len, 45 45 rec.full_hash, rec.magic); … … 84 84 { 85 85 int i; 86 for (i=0;i<tdb->h eader.hash_size;i++) {86 for (i=0;i<tdb->hash_size;i++) { 87 87 tdb_dump_chain(tdb, i); 88 88 } … … 111 111 printf("freelist top=[0x%08x]\n", rec_ptr ); 112 112 while (rec_ptr) { 113 if (tdb->methods->tdb_read(tdb, rec_ptr, (char *)&rec, 113 if (tdb->methods->tdb_read(tdb, rec_ptr, (char *)&rec, 114 114 sizeof(rec), DOCONV()) == -1) { 115 115 tdb_unlock(tdb, -1, F_WRLCK); … … 123 123 } 124 124 125 printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (% d)] (end = 0x%08x)\n",125 printf("entry offset=[0x%08x], rec.rec_len = [0x%08x (%u)] (end = 0x%08x)\n", 126 126 rec_ptr, rec.rec_len, rec.rec_len, rec_ptr + rec.rec_len); 127 127 total_free += rec.rec_len; … … 130 130 rec_ptr = rec.next; 131 131 } 132 printf("total rec_len = [0x%08x (%d)]\n", (int)total_free, 133 (int)total_free); 132 printf("total rec_len = [0x%08lx (%lu)]\n", total_free, total_free); 134 133 135 134 return tdb_unlock(tdb, -1, F_WRLCK); -
vendor/current/lib/tdb/common/error.c
r986 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 -
vendor/current/lib/tdb/common/freelist.c
r986 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 … … 29 29 30 30 /* 'right' merges can involve O(n^2) cost when combined with a 31 traverse, so they are disabled until we find a way to do them in 31 traverse, so they are disabled until we find a way to do them in 32 32 O(1) time 33 33 */ … … 43 43 /* this happens when a app is showdown while deleting a record - we should 44 44 not completely fail when this happens */ 45 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=% d - fixing\n",45 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read non-free magic 0x%x at offset=%u - fixing\n", 46 46 rec->magic, off)); 47 47 rec->magic = TDB_FREE_MAGIC; 48 if (tdb ->methods->tdb_write(tdb, off, rec, sizeof(*rec)) == -1)48 if (tdb_rec_write(tdb, off, rec) == -1) 49 49 return -1; 50 50 } … … 53 53 /* Ensure ecode is set for log fn. */ 54 54 tdb->ecode = TDB_ERR_CORRUPT; 55 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=% d\n",55 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_rec_free_read bad magic 0x%x at offset=%u\n", 56 56 rec->magic, off)); 57 57 return -1; 58 58 } 59 if (tdb->methods->tdb_oob(tdb, rec->next +sizeof(*rec), 0) != 0)59 if (tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0) != 0) 60 60 return -1; 61 61 return 0; … … 80 80 } 81 81 tdb->ecode = TDB_ERR_CORRUPT; 82 TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=% d\n", off));82 TDB_LOG((tdb, TDB_DEBUG_FATAL,"remove_from_freelist: not on list at off=%u\n", off)); 83 83 return -1; 84 84 } … … 98 98 } 99 99 100 /* Add an element into the freelist. Merge adjacent records if 101 necessary. */ 100 /** 101 * Read the record directly on the left. 102 * Fail if there is no record on the left. 103 */ 104 static int read_record_on_left(struct tdb_context *tdb, tdb_off_t rec_ptr, 105 tdb_off_t *left_p, 106 struct tdb_record *left_r) 107 { 108 tdb_off_t left_ptr; 109 tdb_off_t left_size; 110 struct tdb_record left_rec; 111 int ret; 112 113 left_ptr = rec_ptr - sizeof(tdb_off_t); 114 115 if (left_ptr <= TDB_DATA_START(tdb->hash_size)) { 116 /* no record on the left */ 117 return -1; 118 } 119 120 /* Read in tailer and jump back to header */ 121 ret = tdb_ofs_read(tdb, left_ptr, &left_size); 122 if (ret == -1) { 123 TDB_LOG((tdb, TDB_DEBUG_FATAL, 124 "tdb_free: left offset read failed at %u\n", left_ptr)); 125 return -1; 126 } 127 128 /* it could be uninitialised data */ 129 if (left_size == 0 || left_size == TDB_PAD_U32) { 130 return -1; 131 } 132 133 if (left_size > rec_ptr) { 134 return -1; 135 } 136 137 left_ptr = rec_ptr - left_size; 138 139 if (left_ptr < TDB_DATA_START(tdb->hash_size)) { 140 return -1; 141 } 142 143 /* Now read in the left record */ 144 ret = tdb->methods->tdb_read(tdb, left_ptr, &left_rec, 145 sizeof(left_rec), DOCONV()); 146 if (ret == -1) { 147 TDB_LOG((tdb, TDB_DEBUG_FATAL, 148 "tdb_free: left read failed at %u (%u)\n", 149 left_ptr, left_size)); 150 return -1; 151 } 152 153 *left_p = left_ptr; 154 *left_r = left_rec; 155 156 return 0; 157 } 158 159 /** 160 * Merge new freelist record with the direct left neighbour. 161 * This assumes that left_rec represents the record 162 * directly to the left of right_rec and that this is 163 * a freelist record. 164 */ 165 static int merge_with_left_record(struct tdb_context *tdb, 166 tdb_off_t left_ptr, 167 struct tdb_record *left_rec, 168 struct tdb_record *right_rec) 169 { 170 int ret; 171 172 left_rec->rec_len += sizeof(*right_rec) + right_rec->rec_len; 173 174 ret = tdb_rec_write(tdb, left_ptr, left_rec); 175 if (ret == -1) { 176 TDB_LOG((tdb, TDB_DEBUG_FATAL, 177 "merge_with_left_record: update_left failed at %u\n", 178 left_ptr)); 179 return -1; 180 } 181 182 ret = update_tailer(tdb, left_ptr, left_rec); 183 if (ret == -1) { 184 TDB_LOG((tdb, TDB_DEBUG_FATAL, 185 "merge_with_left_record: update_tailer failed at %u\n", 186 left_ptr)); 187 return -1; 188 } 189 190 return 0; 191 } 192 193 /** 194 * Check whether the record left of a given freelist record is 195 * also a freelist record, and if so, merge the two records. 196 * 197 * Return code: 198 * -1 upon error 199 * 0 if left was not a free record 200 * 1 if left was free and successfully merged. 201 * 202 * The currend record is handed in with pointer and fully read record. 203 * 204 * The left record pointer and struct can be retrieved as result 205 * in lp and lr; 206 */ 207 static int check_merge_with_left_record(struct tdb_context *tdb, 208 tdb_off_t rec_ptr, 209 struct tdb_record *rec, 210 tdb_off_t *lp, 211 struct tdb_record *lr) 212 { 213 tdb_off_t left_ptr; 214 struct tdb_record left_rec; 215 int ret; 216 217 ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec); 218 if (ret != 0) { 219 return 0; 220 } 221 222 if (left_rec.magic != TDB_FREE_MAGIC) { 223 return 0; 224 } 225 226 /* It's free - expand to include it. */ 227 ret = merge_with_left_record(tdb, left_ptr, &left_rec, rec); 228 if (ret != 0) { 229 return -1; 230 } 231 232 if (lp != NULL) { 233 *lp = left_ptr; 234 } 235 236 if (lr != NULL) { 237 *lr = left_rec; 238 } 239 240 return 1; 241 } 242 243 /** 244 * Check whether the record left of a given freelist record is 245 * also a freelist record, and if so, merge the two records. 246 * 247 * Return code: 248 * -1 upon error 249 * 0 if left was not a free record 250 * 1 if left was free and successfully merged. 251 * 252 * In this variant, the input record is specified just as the pointer 253 * and is read from the database if needed. 254 * 255 * next_ptr will contain the original record's next pointer after 256 * successful merging (which will be lost after merging), so that 257 * the caller can update the last pointer. 258 */ 259 static int check_merge_ptr_with_left_record(struct tdb_context *tdb, 260 tdb_off_t rec_ptr, 261 tdb_off_t *next_ptr) 262 { 263 tdb_off_t left_ptr; 264 struct tdb_record rec, left_rec; 265 int ret; 266 267 ret = read_record_on_left(tdb, rec_ptr, &left_ptr, &left_rec); 268 if (ret != 0) { 269 return 0; 270 } 271 272 if (left_rec.magic != TDB_FREE_MAGIC) { 273 return 0; 274 } 275 276 /* It's free - expand to include it. */ 277 278 ret = tdb->methods->tdb_read(tdb, rec_ptr, &rec, 279 sizeof(rec), DOCONV()); 280 if (ret != 0) { 281 return -1; 282 } 283 284 ret = merge_with_left_record(tdb, left_ptr, &left_rec, &rec); 285 if (ret != 0) { 286 return -1; 287 } 288 289 if (next_ptr != NULL) { 290 *next_ptr = rec.next; 291 } 292 293 return 1; 294 } 295 296 /** 297 * Add an element into the freelist. 298 * 299 * We merge the new record into the left record if it is also a 300 * free record, but not with the right one. This makes the 301 * operation O(1) instead of O(n): merging with the right record 302 * requires a traverse of the freelist to find the previous 303 * record in the free list. 304 * 305 * This prevents db traverses from being O(n^2) after a lot of deletes. 306 */ 102 307 int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec) 103 308 { 309 int ret; 310 104 311 /* Allocation and tailer lock */ 105 312 if (tdb_lock(tdb, -1, F_WRLCK) != 0) … … 139 346 #endif 140 347 141 /* Look left */ 142 if (offset - sizeof(tdb_off_t) > TDB_DATA_START(tdb->header.hash_size)) { 143 tdb_off_t left = offset - sizeof(tdb_off_t); 144 struct tdb_record l; 145 tdb_off_t leftsize; 146 147 /* Read in tailer and jump back to header */ 148 if (tdb_ofs_read(tdb, left, &leftsize) == -1) { 149 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left offset read failed at %u\n", left)); 150 goto update; 151 } 152 153 /* it could be uninitialised data */ 154 if (leftsize == 0 || leftsize == TDB_PAD_U32) { 155 goto update; 156 } 157 158 left = offset - leftsize; 159 160 if (leftsize > offset || 161 left < TDB_DATA_START(tdb->header.hash_size)) { 162 goto update; 163 } 164 165 /* Now read in the left record */ 166 if (tdb->methods->tdb_read(tdb, left, &l, sizeof(l), DOCONV()) == -1) { 167 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: left read failed at %u (%u)\n", left, leftsize)); 168 goto update; 169 } 170 171 /* If it's free, expand to include it. */ 172 if (l.magic == TDB_FREE_MAGIC) { 173 /* we now merge the new record into the left record, rather than the other 174 way around. This makes the operation O(1) instead of O(n). This change 175 prevents traverse from being O(n^2) after a lot of deletes */ 176 l.rec_len += sizeof(*rec) + rec->rec_len; 177 if (tdb_rec_write(tdb, left, &l) == -1) { 178 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_left failed at %u\n", left)); 179 goto fail; 180 } 181 if (update_tailer(tdb, left, &l) == -1) { 182 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free: update_tailer failed at %u\n", offset)); 183 goto fail; 184 } 185 tdb_unlock(tdb, -1, F_WRLCK); 186 return 0; 187 } 188 } 189 190 update: 191 192 /* Now, prepend to free list */ 348 ret = check_merge_with_left_record(tdb, offset, rec, NULL, NULL); 349 if (ret == -1) { 350 goto fail; 351 } 352 if (ret == 1) { 353 /* merged */ 354 goto done; 355 } 356 357 /* Nothing to merge, prepend to free list */ 358 193 359 rec->magic = TDB_FREE_MAGIC; 194 360 … … 196 362 tdb_rec_write(tdb, offset, rec) == -1 || 197 363 tdb_ofs_write(tdb, FREELIST_TOP, &offset) == -1) { 198 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=% d\n", offset));364 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_free record write failed at offset=%u\n", offset)); 199 365 goto fail; 200 366 } 201 367 368 done: 202 369 /* And we're done. */ 203 370 tdb_unlock(tdb, -1, F_WRLCK); … … 211 378 212 379 213 /* 380 /* 214 381 the core of tdb_allocate - called when we have decided which 215 382 free list entry to use … … 219 386 able to free up the record without fragmentation 220 387 */ 221 static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, 388 static tdb_off_t tdb_allocate_ofs(struct tdb_context *tdb, 222 389 tdb_len_t length, tdb_off_t rec_ptr, 223 390 struct tdb_record *rec, tdb_off_t last_ptr) … … 251 418 252 419 /* and setup the new record */ 253 rec_ptr += sizeof(*rec) + rec->rec_len; 420 rec_ptr += sizeof(*rec) + rec->rec_len; 254 421 255 422 memset(rec, '\0', sizeof(*rec)); … … 274 441 0 is returned if the space could not be allocated 275 442 */ 276 tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec) 443 static tdb_off_t tdb_allocate_from_freelist( 444 struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec) 277 445 { 278 446 tdb_off_t rec_ptr, last_ptr, newrec_ptr; … … 282 450 } bestfit; 283 451 float multiplier = 1.0; 284 285 if (tdb_lock(tdb, -1, F_WRLCK) == -1) 286 return 0; 452 bool merge_created_candidate; 287 453 288 454 /* over-allocate to reduce fragmentation */ … … 294 460 295 461 again: 462 merge_created_candidate = false; 296 463 last_ptr = FREELIST_TOP; 297 464 298 465 /* read in the freelist top */ 299 466 if (tdb_ofs_read(tdb, FREELIST_TOP, &rec_ptr) == -1) 300 goto fail;467 return 0; 301 468 302 469 bestfit.rec_ptr = 0; … … 304 471 bestfit.rec_len = 0; 305 472 306 /* 473 /* 307 474 this is a best fit allocation strategy. Originally we used 308 475 a first fit strategy, but it suffered from massive fragmentation … … 310 477 */ 311 478 while (rec_ptr) { 479 int ret; 480 tdb_off_t left_ptr; 481 struct tdb_record left_rec; 482 312 483 if (tdb_rec_free_read(tdb, rec_ptr, rec) == -1) { 313 goto fail; 484 return 0; 485 } 486 487 ret = check_merge_with_left_record(tdb, rec_ptr, rec, 488 &left_ptr, &left_rec); 489 if (ret == -1) { 490 return 0; 491 } 492 if (ret == 1) { 493 /* merged */ 494 rec_ptr = rec->next; 495 ret = tdb_ofs_write(tdb, last_ptr, &rec->next); 496 if (ret == -1) { 497 return 0; 498 } 499 500 /* 501 * We have merged the current record into the left 502 * neighbour. So our traverse of the freelist will 503 * skip it and consider the next record in the chain. 504 * 505 * But the enlarged left neighbour may be a candidate. 506 * If it is, we can not directly use it, though. 507 * The only thing we can do and have to do here is to 508 * update the current best fit size in the chain if the 509 * current best fit is the left record. (By that we may 510 * worsen the best fit we already had, bit this is not a 511 * problem.) 512 * 513 * If the current best fit is not the left record, 514 * all we can do is remember the fact that a merge 515 * created a new candidate so that we can trigger 516 * a second walk of the freelist if at the end of 517 * the first walk we have not found any fit. 518 * This way we can avoid expanding the database. 519 */ 520 521 if (bestfit.rec_ptr == left_ptr) { 522 bestfit.rec_len = left_rec.rec_len; 523 } 524 525 if (left_rec.rec_len > length) { 526 merge_created_candidate = true; 527 } 528 529 continue; 314 530 } 315 531 … … 345 561 if (bestfit.rec_ptr != 0) { 346 562 if (tdb_rec_free_read(tdb, bestfit.rec_ptr, rec) == -1) { 347 goto fail;348 } 349 350 newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, 563 return 0; 564 } 565 566 newrec_ptr = tdb_allocate_ofs(tdb, length, bestfit.rec_ptr, 351 567 rec, bestfit.last_ptr); 352 tdb_unlock(tdb, -1, F_WRLCK);353 568 return newrec_ptr; 569 } 570 571 if (merge_created_candidate) { 572 goto again; 354 573 } 355 574 … … 358 577 if (tdb_expand(tdb, length + sizeof(*rec)) == 0) 359 578 goto again; 360 fail: 579 580 return 0; 581 } 582 583 static bool tdb_alloc_dead( 584 struct tdb_context *tdb, int hash, tdb_len_t length, 585 tdb_off_t *rec_ptr, struct tdb_record *rec) 586 { 587 tdb_off_t last_ptr; 588 589 *rec_ptr = tdb_find_dead(tdb, hash, rec, length, &last_ptr); 590 if (*rec_ptr == 0) { 591 return false; 592 } 593 /* 594 * Unlink the record from the hash chain, it's about to be moved into 595 * another one. 596 */ 597 return (tdb_ofs_write(tdb, last_ptr, &rec->next) == 0); 598 } 599 600 /* 601 * Chain "hash" is assumed to be locked 602 */ 603 604 tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length, 605 struct tdb_record *rec) 606 { 607 tdb_off_t ret; 608 int i; 609 610 if (tdb->max_dead_records == 0) { 611 /* 612 * No dead records to expect anywhere. Do the blocking 613 * freelist lock without trying to steal from others 614 */ 615 goto blocking_freelist_allocate; 616 } 617 618 /* 619 * The following loop tries to get the freelist lock nonblocking. If 620 * it gets the lock, allocate from there. If the freelist is busy, 621 * instead of waiting we try to steal dead records from other hash 622 * chains. 623 * 624 * Be aware that we do nonblocking locks on the other hash chains as 625 * well and fail gracefully. This way we avoid deadlocks (we block two 626 * hash chains, something which is pretty bad normally) 627 */ 628 629 for (i=0; i<tdb->hash_size; i++) { 630 631 int list; 632 633 list = BUCKET(hash+i); 634 635 if (tdb_lock_nonblock(tdb, list, F_WRLCK) == 0) { 636 bool got_dead; 637 638 got_dead = tdb_alloc_dead(tdb, list, length, &ret, rec); 639 tdb_unlock(tdb, list, F_WRLCK); 640 641 if (got_dead) { 642 return ret; 643 } 644 } 645 646 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == 0) { 647 /* 648 * Under the freelist lock take the chance to give 649 * back our dead records. 650 */ 651 tdb_purge_dead(tdb, hash); 652 653 ret = tdb_allocate_from_freelist(tdb, length, rec); 654 tdb_unlock(tdb, -1, F_WRLCK); 655 return ret; 656 } 657 } 658 659 blocking_freelist_allocate: 660 661 if (tdb_lock(tdb, -1, F_WRLCK) == -1) { 662 return 0; 663 } 664 ret = tdb_allocate_from_freelist(tdb, length, rec); 361 665 tdb_unlock(tdb, -1, F_WRLCK); 362 return 0; 363 } 364 365 366 367 /* 368 return the size of the freelist - used to decide if we should repack 369 */ 370 _PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb) 666 return ret; 667 } 668 669 /** 670 * Merge adjacent records in the freelist. 671 */ 672 static int tdb_freelist_merge_adjacent(struct tdb_context *tdb, 673 int *count_records, int *count_merged) 674 { 675 tdb_off_t cur, next; 676 int count = 0; 677 int merged = 0; 678 int ret; 679 680 ret = tdb_lock(tdb, -1, F_RDLCK); 681 if (ret == -1) { 682 return -1; 683 } 684 685 cur = FREELIST_TOP; 686 while (tdb_ofs_read(tdb, cur, &next) == 0 && next != 0) { 687 tdb_off_t next2; 688 689 count++; 690 691 ret = check_merge_ptr_with_left_record(tdb, next, &next2); 692 if (ret == -1) { 693 goto done; 694 } 695 if (ret == 1) { 696 /* 697 * merged: 698 * now let cur->next point to next2 instead of next 699 */ 700 701 ret = tdb_ofs_write(tdb, cur, &next2); 702 if (ret != 0) { 703 goto done; 704 } 705 706 next = next2; 707 merged++; 708 } 709 710 cur = next; 711 } 712 713 if (count_records != NULL) { 714 *count_records = count; 715 } 716 717 if (count_merged != NULL) { 718 *count_merged = merged; 719 } 720 721 ret = 0; 722 723 done: 724 tdb_unlock(tdb, -1, F_RDLCK); 725 return ret; 726 } 727 728 /** 729 * return the size of the freelist - no merging done 730 */ 731 static int tdb_freelist_size_no_merge(struct tdb_context *tdb) 371 732 { 372 733 tdb_off_t ptr; … … 385 746 return count; 386 747 } 748 749 /** 750 * return the size of the freelist - used to decide if we should repack 751 * 752 * As a side effect, adjacent records are merged unless the 753 * database is read-only, in order to reduce the fragmentation 754 * without repacking. 755 */ 756 _PUBLIC_ int tdb_freelist_size(struct tdb_context *tdb) 757 { 758 759 int count = 0; 760 761 if (tdb->read_only) { 762 count = tdb_freelist_size_no_merge(tdb); 763 } else { 764 int ret; 765 ret = tdb_freelist_merge_adjacent(tdb, &count, NULL); 766 if (ret != 0) { 767 return -1; 768 } 769 } 770 771 return count; 772 } -
vendor/current/lib/tdb/common/freelistcheck.c
r986 r988 36 36 static int seen_insert(struct tdb_context *mem_tdb, tdb_off_t rec_ptr) 37 37 { 38 TDB_DATA key , data;38 TDB_DATA key; 39 39 40 memset(&data, '\0', sizeof(data));41 40 key.dptr = (unsigned char *)&rec_ptr; 42 41 key.dsize = sizeof(rec_ptr); 43 return tdb_store(mem_tdb, key, data, TDB_INSERT);42 return tdb_store(mem_tdb, key, tdb_null, TDB_INSERT); 44 43 } 45 44 … … 53 52 *pnum_entries = 0; 54 53 55 mem_tdb = tdb_open("flval", tdb->h eader.hash_size,54 mem_tdb = tdb_open("flval", tdb->hash_size, 56 55 TDB_INTERNAL, O_RDWR, 0600); 57 56 if (!mem_tdb) { -
vendor/current/lib/tdb/common/hash.c
r986 r988 215 215 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 216 216 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 217 #ifdef VALGRIND218 217 const uint8_t *k8; 219 #endif220 218 221 219 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ … … 231 229 232 230 /*----------------------------- handle the last (probably partial) block */ 233 /*234 * "k[2]&0xffffff" actually reads beyond the end of the string, but235 * then masks off the part it's not allowed to read. Because the236 * string is aligned, the masked-off tail is in the same word as the237 * rest of the string. Every machine with memory protection I've seen238 * does it on word boundaries, so is OK with this. But VALGRIND will239 * still catch it and complain. The masking trick does make the hash240 * noticably faster for short strings (like English words).241 */242 #ifndef VALGRIND243 244 switch(length)245 {246 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;247 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;248 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;249 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;250 case 8 : b+=k[1]; a+=k[0]; break;251 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;252 case 6 : b+=k[1]&0xffff; a+=k[0]; break;253 case 5 : b+=k[1]&0xff; a+=k[0]; break;254 case 4 : a+=k[0]; break;255 case 3 : a+=k[0]&0xffffff; break;256 case 2 : a+=k[0]&0xffff; break;257 case 1 : a+=k[0]&0xff; break;258 case 0 : return c; /* zero length strings require no mixing */259 }260 261 #else /* make valgrind happy */262 263 231 k8 = (const uint8_t *)k; 264 232 switch(length) … … 278 246 case 0 : return c; 279 247 } 280 281 #endif /* !valgrind */282 283 248 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 284 249 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ -
vendor/current/lib/tdb/common/io.c
r986 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 … … 29 29 #include "tdb_private.h" 30 30 31 /* 32 * We prepend the mutex area, so fixup offsets. See mutex.c for details. 33 * tdb->hdr_ofs is 0 or header.mutex_size. 34 * 35 * Note: that we only have the 4GB limit of tdb_off_t for 36 * tdb->map_size. The file size on disk can be 4GB + tdb->hdr_ofs! 37 */ 38 39 static bool tdb_adjust_offset(struct tdb_context *tdb, off_t *off) 40 { 41 off_t tmp = tdb->hdr_ofs + *off; 42 43 if ((tmp < tdb->hdr_ofs) || (tmp < *off)) { 44 errno = EIO; 45 return false; 46 } 47 48 *off = tmp; 49 return true; 50 } 51 52 static ssize_t tdb_pwrite(struct tdb_context *tdb, const void *buf, 53 size_t count, off_t offset) 54 { 55 if (!tdb_adjust_offset(tdb, &offset)) { 56 return -1; 57 } 58 return pwrite(tdb->fd, buf, count, offset); 59 } 60 61 static ssize_t tdb_pread(struct tdb_context *tdb, void *buf, 62 size_t count, off_t offset) 63 { 64 if (!tdb_adjust_offset(tdb, &offset)) { 65 return -1; 66 } 67 return pread(tdb->fd, buf, count, offset); 68 } 69 70 static int tdb_ftruncate(struct tdb_context *tdb, off_t length) 71 { 72 if (!tdb_adjust_offset(tdb, &length)) { 73 return -1; 74 } 75 return ftruncate(tdb->fd, length); 76 } 77 78 static int tdb_fstat(struct tdb_context *tdb, struct stat *buf) 79 { 80 int ret; 81 82 ret = fstat(tdb->fd, buf); 83 if (ret == -1) { 84 return -1; 85 } 86 87 if (buf->st_size < tdb->hdr_ofs) { 88 errno = EIO; 89 return -1; 90 } 91 buf->st_size -= tdb->hdr_ofs; 92 93 return ret; 94 } 95 31 96 /* check for an out of bounds access - if it is out of bounds then 32 97 see if the database has been expanded by someone else and expand 33 if necessary 34 note that "len" is the minimum length needed for the db 98 if necessary 35 99 */ 36 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe) 100 static int tdb_oob(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len, 101 int probe) 37 102 { 38 103 struct stat st; 39 if (len <= tdb->map_size) 104 if (len + off < len) { 105 if (!probe) { 106 /* Ensure ecode is set for log fn. */ 107 tdb->ecode = TDB_ERR_IO; 108 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob off %u len %u wrap\n", 109 off, len)); 110 } 111 return -1; 112 } 113 114 if (off + len <= tdb->map_size) 40 115 return 0; 41 116 if (tdb->flags & TDB_INTERNAL) { … … 43 118 /* Ensure ecode is set for log fn. */ 44 119 tdb->ecode = TDB_ERR_IO; 45 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len % d beyond internal malloc size %d\n",46 (int) len, (int)tdb->map_size));47 } 48 return -1; 49 } 50 51 if ( fstat(tdb->fd, &st) == -1) {120 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond internal malloc size %u\n", 121 (int)(off + len), (int)tdb->map_size)); 122 } 123 return -1; 124 } 125 126 if (tdb_fstat(tdb, &st) == -1) { 52 127 tdb->ecode = TDB_ERR_IO; 53 128 return -1; 54 129 } 55 130 56 /* Unmap, update size, remap */ 131 /* Beware >4G files! */ 132 if ((tdb_off_t)st.st_size != st.st_size) { 133 /* Ensure ecode is set for log fn. */ 134 tdb->ecode = TDB_ERR_IO; 135 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_oob len %llu too large!\n", 136 (long long)st.st_size)); 137 return -1; 138 } 139 140 /* Unmap, update size, remap. We do this unconditionally, to handle 141 * the unusual case where the db is truncated. 142 * 143 * This can happen to a child using tdb_reopen_all(true) on a 144 * TDB_CLEAR_IF_FIRST tdb whose parent crashes: the next 145 * opener will truncate the database. */ 57 146 if (tdb_munmap(tdb) == -1) { 58 147 tdb->ecode = TDB_ERR_IO; … … 60 149 } 61 150 tdb->map_size = st.st_size; 62 tdb_mmap(tdb); 63 64 if (st.st_size < (size_t)len) { 151 if (tdb_mmap(tdb) != 0) { 152 return -1; 153 } 154 155 if (st.st_size < (size_t)off + len) { 65 156 if (!probe) { 66 157 /* Ensure ecode is set for log fn. */ 67 158 tdb->ecode = TDB_ERR_IO; 68 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n", 69 (int)len, (int)st.st_size)); 70 } 71 return -1; 72 } 73 159 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %u beyond eof at %u\n", 160 (int)(off + len), (int)st.st_size)); 161 } 162 return -1; 163 } 74 164 return 0; 75 165 } 76 166 77 167 /* write a lump of data at a specified offset */ 78 static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 168 static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 79 169 const void *buf, tdb_len_t len) 80 170 { … … 88 178 } 89 179 90 if (tdb->methods->tdb_oob(tdb, off +len, 0) != 0)180 if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0) 91 181 return -1; 92 182 … … 94 184 memcpy(off + (char *)tdb->map_ptr, buf, len); 95 185 } else { 96 ssize_t written = pwrite(tdb->fd, buf, len, off); 186 #ifdef HAVE_INCOHERENT_MMAP 187 tdb->ecode = TDB_ERR_IO; 188 return -1; 189 #else 190 ssize_t written; 191 192 written = tdb_pwrite(tdb, buf, len, off); 193 97 194 if ((written != (ssize_t)len) && (written != -1)) { 98 195 /* try once more */ 99 196 tdb->ecode = TDB_ERR_IO; 100 197 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only " 101 "%d of %d bytes at %d, trying once more\n", 102 (int)written, len, off)); 103 written = pwrite(tdb->fd, (const char *)buf+written, 104 len-written, 105 off+written); 198 "%zi of %u bytes at %u, trying once more\n", 199 written, len, off)); 200 written = tdb_pwrite(tdb, (const char *)buf+written, 201 len-written, off+written); 106 202 } 107 203 if (written == -1) { 108 204 /* Ensure ecode is set for log fn. */ 109 205 tdb->ecode = TDB_ERR_IO; 110 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at % d"111 "len=% d(%s)\n", off, len, strerror(errno)));206 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %u " 207 "len=%u (%s)\n", off, len, strerror(errno))); 112 208 return -1; 113 209 } else if (written != (ssize_t)len) { 114 210 tdb->ecode = TDB_ERR_IO; 115 211 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to " 116 "write % d bytes at %din two attempts\n",212 "write %u bytes at %u in two attempts\n", 117 213 len, off)); 118 214 return -1; 119 215 } 216 #endif 120 217 } 121 218 return 0; … … 133 230 134 231 /* read a lump of data at a specified offset, maybe convert */ 135 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, 232 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, 136 233 tdb_len_t len, int cv) 137 234 { 138 if (tdb->methods->tdb_oob(tdb, off +len, 0) != 0) {235 if (tdb->methods->tdb_oob(tdb, off, len, 0) != 0) { 139 236 return -1; 140 237 } … … 143 240 memcpy(buf, off + (char *)tdb->map_ptr, len); 144 241 } else { 145 ssize_t ret = pread(tdb->fd, buf, len, off); 242 #ifdef HAVE_INCOHERENT_MMAP 243 tdb->ecode = TDB_ERR_IO; 244 return -1; 245 #else 246 ssize_t ret; 247 248 ret = tdb_pread(tdb, buf, len, off); 146 249 if (ret != (ssize_t)len) { 147 250 /* Ensure ecode is set for log fn. */ 148 251 tdb->ecode = TDB_ERR_IO; 149 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d " 150 "len=%d ret=%d (%s) map_size=%d\n", 151 (int)off, (int)len, (int)ret, strerror(errno), 152 (int)tdb->map_size)); 153 return -1; 154 } 252 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %u " 253 "len=%u ret=%zi (%s) map_size=%u\n", 254 off, len, ret, strerror(errno), 255 tdb->map_size)); 256 return -1; 257 } 258 #endif 155 259 } 156 260 if (cv) { … … 165 269 do an unlocked scan of the hash table heads to find the next non-zero head. The value 166 270 will then be confirmed with the lock held 167 */ 271 */ 168 272 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain) 169 273 { 170 274 uint32_t h = *chain; 171 275 if (tdb->map_ptr) { 172 for (;h < tdb->h eader.hash_size;h++) {276 for (;h < tdb->hash_size;h++) { 173 277 if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) { 174 278 break; … … 177 281 } else { 178 282 uint32_t off=0; 179 for (;h < tdb->h eader.hash_size;h++) {283 for (;h < tdb->hash_size;h++) { 180 284 if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) { 181 285 break; … … 205 309 } 206 310 207 void tdb_mmap(struct tdb_context *tdb) 311 /* If mmap isn't coherent, *everyone* must always mmap. */ 312 static bool should_mmap(const struct tdb_context *tdb) 313 { 314 #ifdef HAVE_INCOHERENT_MMAP 315 return true; 316 #else 317 return !(tdb->flags & TDB_NOMMAP); 318 #endif 319 } 320 321 int tdb_mmap(struct tdb_context *tdb) 208 322 { 209 323 if (tdb->flags & TDB_INTERNAL) 210 return ;324 return 0; 211 325 212 326 #ifdef HAVE_MMAP 213 if (!(tdb->flags & TDB_NOMMAP)) { 214 tdb->map_ptr = mmap(NULL, tdb->map_size, 215 PROT_READ|(tdb->read_only? 0:PROT_WRITE), 216 MAP_SHARED|MAP_FILE, tdb->fd, 0); 327 if (should_mmap(tdb)) { 328 tdb->map_ptr = mmap(NULL, tdb->map_size, 329 PROT_READ|(tdb->read_only? 0:PROT_WRITE), 330 MAP_SHARED|MAP_FILE, tdb->fd, 331 tdb->hdr_ofs); 217 332 218 333 /* … … 222 337 if (tdb->map_ptr == MAP_FAILED) { 223 338 tdb->map_ptr = NULL; 224 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size % d (%s)\n",339 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %u (%s)\n", 225 340 tdb->map_size, strerror(errno))); 341 #ifdef HAVE_INCOHERENT_MMAP 342 tdb->ecode = TDB_ERR_IO; 343 return -1; 344 #endif 226 345 } 227 346 } else { … … 231 350 tdb->map_ptr = NULL; 232 351 #endif 352 return 0; 233 353 } 234 354 … … 238 358 { 239 359 char buf[8192]; 360 tdb_off_t new_size; 240 361 241 362 if (tdb->read_only || tdb->traverse_read) { … … 244 365 } 245 366 246 if (ftruncate(tdb->fd, size+addition) == -1) { 367 if (!tdb_add_off_t(size, addition, &new_size)) { 368 tdb->ecode = TDB_ERR_OOM; 369 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write " 370 "overflow detected current size[%u] addition[%u]!\n", 371 (unsigned)size, (unsigned)addition)); 372 errno = ENOSPC; 373 return -1; 374 } 375 376 if (tdb_ftruncate(tdb, new_size) == -1) { 247 377 char b = 0; 248 ssize_t written = pwrite(tdb->fd, &b, 1, (size+addition)- 1);378 ssize_t written = tdb_pwrite(tdb, &b, 1, new_size - 1); 249 379 if (written == 0) { 250 380 /* try once more, potentially revealing errno */ 251 written = pwrite(tdb->fd, &b, 1, (size+addition)- 1);381 written = tdb_pwrite(tdb, &b, 1, new_size - 1); 252 382 } 253 383 if (written == 0) { … … 256 386 } 257 387 if (written != 1) { 258 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n", 259 size+addition, strerror(errno))); 388 tdb->ecode = TDB_ERR_OOM; 389 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %u failed (%s)\n", 390 (unsigned)new_size, strerror(errno))); 260 391 return -1; 261 392 } … … 268 399 while (addition) { 269 400 size_t n = addition>sizeof(buf)?sizeof(buf):addition; 270 ssize_t written = pwrite(tdb->fd, buf, n, size);401 ssize_t written = tdb_pwrite(tdb, buf, n, size); 271 402 if (written == 0) { 272 403 /* prevent infinite loops: try _once_ more */ 273 written = pwrite(tdb->fd, buf, n, size);404 written = tdb_pwrite(tdb, buf, n, size); 274 405 } 275 406 if (written == 0) { 276 407 /* give up, trying to provide a useful errno */ 408 tdb->ecode = TDB_ERR_OOM; 277 409 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write " 278 410 "returned 0 twice: giving up!\n")); 279 411 errno = ENOSPC; 280 412 return -1; 281 } else if (written == -1) { 413 } 414 if (written == -1) { 415 tdb->ecode = TDB_ERR_OOM; 282 416 TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of " 283 "% dbytes failed (%s)\n", (int)n,417 "%u bytes failed (%s)\n", (int)n, 284 418 strerror(errno))); 285 419 return -1; 286 } else if (written != n) { 420 } 421 if (written != n) { 287 422 TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote " 288 "only % d of %d bytes - retrying\n", (int)written,289 (int)n));423 "only %zu of %zi bytes - retrying\n", written, 424 n)); 290 425 } 291 426 addition -= written; … … 295 430 } 296 431 432 433 /* You need 'size', this tells you how much you should expand by. */ 434 tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size) 435 { 436 tdb_off_t new_size, top_size, increment; 437 tdb_off_t max_size = UINT32_MAX - map_size; 438 439 if (size > max_size) { 440 /* 441 * We can't round up anymore, just give back 442 * what we're asked for. 443 * 444 * The caller has to take care of the ENOSPC handling. 445 */ 446 return size; 447 } 448 449 /* limit size in order to avoid using up huge amounts of memory for 450 * in memory tdbs if an oddball huge record creeps in */ 451 if (size > 100 * 1024) { 452 increment = size * 2; 453 } else { 454 increment = size * 100; 455 } 456 if (increment < size) { 457 goto overflow; 458 } 459 460 if (!tdb_add_off_t(map_size, increment, &top_size)) { 461 goto overflow; 462 } 463 464 /* always make room for at least top_size more records, and at 465 least 25% more space. if the DB is smaller than 100MiB, 466 otherwise grow it by 10% only. */ 467 if (map_size > 100 * 1024 * 1024) { 468 new_size = map_size * 1.10; 469 } else { 470 new_size = map_size * 1.25; 471 } 472 if (new_size < map_size) { 473 goto overflow; 474 } 475 476 /* Round the database up to a multiple of the page size */ 477 new_size = MAX(top_size, new_size); 478 479 if (new_size + page_size < new_size) { 480 /* There's a "+" in TDB_ALIGN that might overflow... */ 481 goto overflow; 482 } 483 484 return TDB_ALIGN(new_size, page_size) - map_size; 485 486 overflow: 487 /* 488 * Somewhere in between we went over 4GB. Make one big jump to 489 * exactly 4GB database size. 490 */ 491 return max_size; 492 } 297 493 298 494 /* expand the database at least size bytes by expanding the underlying … … 301 497 { 302 498 struct tdb_record rec; 303 tdb_off_t offset, new_size, top_size, map_size; 499 tdb_off_t offset; 500 tdb_off_t new_size; 304 501 305 502 if (tdb_lock(tdb, -1, F_WRLCK) == -1) { … … 309 506 310 507 /* must know about any previous expansions by another process */ 311 tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1); 312 313 /* limit size in order to avoid using up huge amounts of memory for 314 * in memory tdbs if an oddball huge record creeps in */ 315 if (size > 100 * 1024) { 316 top_size = tdb->map_size + size * 2; 317 } else { 318 top_size = tdb->map_size + size * 100; 319 } 320 321 /* always make room for at least top_size more records, and at 322 least 25% more space. if the DB is smaller than 100MiB, 323 otherwise grow it by 10% only. */ 324 if (tdb->map_size > 100 * 1024 * 1024) { 325 map_size = tdb->map_size * 1.10; 326 } else { 327 map_size = tdb->map_size * 1.25; 328 } 329 330 /* Round the database up to a multiple of the page size */ 331 new_size = MAX(top_size, map_size); 332 size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size; 333 334 if (!(tdb->flags & TDB_INTERNAL)) 335 tdb_munmap(tdb); 508 tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1); 336 509 337 510 /* 338 * We must ensure the file is unmapped before doing this 339 * to ensure consistency with systems like OpenBSD where 340 * writes and mmaps are not consistent. 511 * Note: that we don't care about tdb->hdr_ofs != 0 here 512 * 513 * The 4GB limitation is just related to tdb->map_size 514 * and the offset calculation in the records. 515 * 516 * The file on disk can be up to 4GB + tdb->hdr_ofs 341 517 */ 342 343 /* expand the file itself */ 344 if (!(tdb->flags & TDB_INTERNAL)) { 345 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0) 346 goto fail; 347 } 348 349 tdb->map_size += size; 350 351 if (tdb->flags & TDB_INTERNAL) { 352 char *new_map_ptr = (char *)realloc(tdb->map_ptr, 353 tdb->map_size); 354 if (!new_map_ptr) { 355 tdb->map_size -= size; 356 goto fail; 357 } 358 tdb->map_ptr = new_map_ptr; 359 } else { 360 /* 361 * We must ensure the file is remapped before adding the space 362 * to ensure consistency with systems like OpenBSD where 363 * writes and mmaps are not consistent. 364 */ 365 366 /* We're ok if the mmap fails as we'll fallback to read/write */ 367 tdb_mmap(tdb); 518 size = tdb_expand_adjust(tdb->map_size, size, tdb->page_size); 519 520 if (!tdb_add_off_t(tdb->map_size, size, &new_size)) { 521 tdb->ecode = TDB_ERR_OOM; 522 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_expand " 523 "overflow detected current map_size[%u] size[%u]!\n", 524 (unsigned)tdb->map_size, (unsigned)size)); 525 goto fail; 368 526 } 369 527 370 528 /* form a new freelist record */ 529 offset = tdb->map_size; 371 530 memset(&rec,'\0',sizeof(rec)); 372 531 rec.rec_len = size - sizeof(rec); 373 532 533 if (tdb->flags & TDB_INTERNAL) { 534 char *new_map_ptr; 535 536 new_map_ptr = (char *)realloc(tdb->map_ptr, new_size); 537 if (!new_map_ptr) { 538 tdb->ecode = TDB_ERR_OOM; 539 goto fail; 540 } 541 tdb->map_ptr = new_map_ptr; 542 tdb->map_size = new_size; 543 } else { 544 int ret; 545 546 /* 547 * expand the file itself 548 */ 549 ret = tdb->methods->tdb_expand_file(tdb, tdb->map_size, size); 550 if (ret != 0) { 551 goto fail; 552 } 553 554 /* Explicitly remap: if we're in a transaction, this won't 555 * happen automatically! */ 556 tdb_munmap(tdb); 557 tdb->map_size = new_size; 558 if (tdb_mmap(tdb) != 0) { 559 goto fail; 560 } 561 } 562 374 563 /* link it into the free list */ 375 offset = tdb->map_size - size;376 564 if (tdb_free(tdb, offset, &rec) == -1) 377 565 goto fail; … … 407 595 /* Ensure ecode is set for log fn. */ 408 596 tdb->ecode = TDB_ERR_OOM; 409 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=% d(%s)\n",597 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%u (%s)\n", 410 598 len, strerror(errno))); 411 599 return NULL; … … 436 624 * parser directly at the mmap area. 437 625 */ 438 if (tdb->methods->tdb_oob(tdb, offset +len, 0) != 0) {626 if (tdb->methods->tdb_oob(tdb, offset, len, 0) != 0) { 439 627 return -1; 440 628 } … … 460 648 /* Ensure ecode is set for log fn. */ 461 649 tdb->ecode = TDB_ERR_CORRUPT; 462 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=% d\n", rec->magic, offset));463 return -1; 464 } 465 return tdb->methods->tdb_oob(tdb, rec->next +sizeof(*rec), 0);650 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%u\n", rec->magic, offset)); 651 return -1; 652 } 653 return tdb->methods->tdb_oob(tdb, rec->next, sizeof(*rec), 0); 466 654 } 467 655 -
vendor/current/lib/tdb/common/lock.c
r986 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 … … 37 37 { 38 38 struct flock fl; 39 int cmd; 40 41 #ifdef USE_TDB_MUTEX_LOCKING 42 { 43 int ret; 44 if (tdb_mutex_lock(tdb, rw, off, len, waitflag, &ret)) { 45 return ret; 46 } 47 } 48 #endif 39 49 40 50 fl.l_type = rw; … … 44 54 fl.l_pid = 0; 45 55 46 if (waitflag) 47 return fcntl(tdb->fd, F_SETLKW, &fl); 48 else 49 return fcntl(tdb->fd, F_SETLK, &fl); 56 cmd = waitflag ? F_SETLKW : F_SETLK; 57 58 return fcntl(tdb->fd, cmd, &fl); 50 59 } 51 60 … … 111 120 #endif 112 121 122 #ifdef USE_TDB_MUTEX_LOCKING 123 { 124 int ret; 125 if (tdb_mutex_unlock(tdb, rw, off, len, &ret)) { 126 return ret; 127 } 128 } 129 #endif 130 113 131 fl.l_type = F_UNLCK; 114 132 fl.l_whence = SEEK_SET; … … 127 145 128 146 /* a byte range locking function - return 0 on success 129 this functions locks/unlocks 1byte at the specified offset.147 this functions locks/unlocks "len" byte at the specified offset. 130 148 131 149 On error, errno is also set so that errors are passed back properly 132 through tdb_open(). 150 through tdb_open(). 133 151 134 152 note that a len of zero means lock to end of file … … 170 188 * locks. */ 171 189 if (!(flags & TDB_LOCK_PROBE) && errno != EAGAIN) { 172 TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset % d rw_type=%d flags=%d len=%d\n",173 tdb->fd, offset, rw_type, flags, (int)len));190 TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brlock failed (fd=%d) at offset %u rw_type=%d flags=%d len=%zu\n", 191 tdb->fd, offset, rw_type, flags, len)); 174 192 } 175 193 return -1; … … 192 210 193 211 if (ret == -1) { 194 TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset % d rw_type=%d len=%d\n",195 tdb->fd, offset, rw_type, (int)len));212 TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_brunlock failed (fd=%d) at offset %u rw_type=%u len=%zu\n", 213 tdb->fd, offset, rw_type, len)); 196 214 } 197 215 return ret; … … 199 217 200 218 /* 201 upgrade a read lock to a write lock. This needs to be handled in a 202 special way as some OSes (such as solaris) have too conservative 203 deadlock detection and claim a deadlock when progress can be 204 made. For those OSes we may loop for a while. 219 * Do a tdb_brlock in a loop. Some OSes (such as solaris) have too 220 * conservative deadlock detection and claim a deadlock when progress can be 221 * made. For those OSes we may loop for a while. 222 */ 223 224 static int tdb_brlock_retry(struct tdb_context *tdb, 225 int rw_type, tdb_off_t offset, size_t len, 226 enum tdb_lock_flags flags) 227 { 228 int count = 1000; 229 230 while (count--) { 231 struct timeval tv; 232 int ret; 233 234 ret = tdb_brlock(tdb, rw_type, offset, len, flags); 235 if (ret == 0) { 236 return 0; 237 } 238 if (errno != EDEADLK) { 239 break; 240 } 241 /* sleep for as short a time as we can - more portable than usleep() */ 242 tv.tv_sec = 0; 243 tv.tv_usec = 1; 244 select(0, NULL, NULL, NULL, &tv); 245 } 246 return -1; 247 } 248 249 /* 250 upgrade a read lock to a write lock. 205 251 */ 206 252 int tdb_allrecord_upgrade(struct tdb_context *tdb) 207 253 { 208 int count = 1000;254 int ret; 209 255 210 256 if (tdb->allrecord_lock.count != 1) { … … 221 267 } 222 268 223 while (count--) { 224 struct timeval tv; 225 if (tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, 226 TDB_LOCK_WAIT|TDB_LOCK_PROBE) == 0) { 227 tdb->allrecord_lock.ltype = F_WRLCK; 228 tdb->allrecord_lock.off = 0; 229 return 0; 230 } 231 if (errno != EDEADLK) { 232 break; 233 } 234 /* sleep for as short a time as we can - more portable than usleep() */ 235 tv.tv_sec = 0; 236 tv.tv_usec = 1; 237 select(0, NULL, NULL, NULL, &tv); 238 } 269 if (tdb_have_mutexes(tdb)) { 270 ret = tdb_mutex_allrecord_upgrade(tdb); 271 if (ret == -1) { 272 goto fail; 273 } 274 ret = tdb_brlock_retry(tdb, F_WRLCK, lock_offset(tdb->hash_size), 275 0, TDB_LOCK_WAIT|TDB_LOCK_PROBE); 276 if (ret == -1) { 277 tdb_mutex_allrecord_downgrade(tdb); 278 } 279 } else { 280 ret = tdb_brlock_retry(tdb, F_WRLCK, FREELIST_TOP, 0, 281 TDB_LOCK_WAIT|TDB_LOCK_PROBE); 282 } 283 284 if (ret == 0) { 285 tdb->allrecord_lock.ltype = F_WRLCK; 286 tdb->allrecord_lock.off = 0; 287 return 0; 288 } 289 fail: 239 290 TDB_LOG((tdb, TDB_DEBUG_TRACE,"tdb_allrecord_upgrade failed\n")); 240 291 return -1; … … 260 311 struct tdb_lock_type *new_lck; 261 312 262 if (offset >= lock_offset(tdb->h eader.hash_size)) {313 if (offset >= lock_offset(tdb->hash_size)) { 263 314 tdb->ecode = TDB_ERR_LOCK; 264 315 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_lock: invalid offset %u for ltype=%d\n", … … 279 330 } 280 331 281 new_lck = (struct tdb_lock_type *)realloc( 282 tdb->lockrecs, 283 sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1)); 284 if (new_lck == NULL) { 285 errno = ENOMEM; 286 return -1; 287 } 288 tdb->lockrecs = new_lck; 332 if (tdb->num_lockrecs == tdb->lockrecs_array_length) { 333 new_lck = (struct tdb_lock_type *)realloc( 334 tdb->lockrecs, 335 sizeof(*tdb->lockrecs) * (tdb->num_lockrecs+1)); 336 if (new_lck == NULL) { 337 errno = ENOMEM; 338 return -1; 339 } 340 tdb->lockrecs_array_length = tdb->num_lockrecs+1; 341 tdb->lockrecs = new_lck; 342 } 289 343 290 344 /* Since fcntl locks don't nest, we do a lock for the first one, … … 294 348 } 295 349 296 tdb->lockrecs[tdb->num_lockrecs].off = offset; 297 tdb->lockrecs[tdb->num_lockrecs].count = 1; 298 tdb->lockrecs[tdb->num_lockrecs].ltype = ltype; 350 new_lck = &tdb->lockrecs[tdb->num_lockrecs]; 351 352 new_lck->off = offset; 353 new_lck->count = 1; 354 new_lck->ltype = ltype; 299 355 tdb->num_lockrecs++; 300 356 … … 335 391 } 336 392 393 /* 394 * A allrecord lock allows us to avoid per chain locks. Check if the allrecord 395 * lock is strong enough. 396 */ 397 static int tdb_lock_covered_by_allrecord_lock(struct tdb_context *tdb, 398 int ltype) 399 { 400 if (ltype == F_RDLCK) { 401 /* 402 * The allrecord_lock is equal (F_RDLCK) or stronger 403 * (F_WRLCK). Pass. 404 */ 405 return 0; 406 } 407 408 if (tdb->allrecord_lock.ltype == F_RDLCK) { 409 /* 410 * We ask for ltype==F_WRLCK, but the allrecord_lock 411 * is too weak. We can't upgrade here, so fail. 412 */ 413 tdb->ecode = TDB_ERR_LOCK; 414 return -1; 415 } 416 417 /* 418 * Asking for F_WRLCK, allrecord is F_WRLCK as well. Pass. 419 */ 420 return 0; 421 } 422 337 423 static int tdb_lock_list(struct tdb_context *tdb, int list, int ltype, 338 424 enum tdb_lock_flags waitflag) … … 341 427 bool check = false; 342 428 343 /* a allrecord lock allows us to avoid per chain locks */344 if (tdb->allrecord_lock.count &&345 (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {346 return 0;347 }348 349 429 if (tdb->allrecord_lock.count) { 350 tdb->ecode = TDB_ERR_LOCK; 351 ret = -1; 352 } else { 353 /* Only check when we grab first data lock. */ 354 check = !have_data_locks(tdb); 355 ret = tdb_nest_lock(tdb, lock_offset(list), ltype, waitflag); 356 357 if (ret == 0 && check && tdb_needs_recovery(tdb)) { 358 tdb_nest_unlock(tdb, lock_offset(list), ltype, false); 359 360 if (tdb_lock_and_recover(tdb) == -1) { 361 return -1; 362 } 363 return tdb_lock_list(tdb, list, ltype, waitflag); 364 } 430 return tdb_lock_covered_by_allrecord_lock(tdb, ltype); 431 } 432 433 /* 434 * Check for recoveries: Someone might have kill -9'ed a process 435 * during a commit. 436 */ 437 check = !have_data_locks(tdb); 438 ret = tdb_nest_lock(tdb, lock_offset(list), ltype, waitflag); 439 440 if (ret == 0 && check && tdb_needs_recovery(tdb)) { 441 tdb_nest_unlock(tdb, lock_offset(list), ltype, false); 442 443 if (tdb_lock_and_recover(tdb) == -1) { 444 return -1; 445 } 446 return tdb_lock_list(tdb, list, ltype, waitflag); 365 447 } 366 448 return ret; … … 381 463 382 464 /* lock a list in the database. list -1 is the alloc list. non-blocking lock */ 383 int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype)465 _PUBLIC_ int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype) 384 466 { 385 467 return tdb_lock_list(tdb, list, ltype, TDB_LOCK_NOWAIT); … … 397 479 398 480 /* Sanity checks */ 399 if (offset >= lock_offset(tdb->h eader.hash_size)) {400 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: offset %u invalid (%d)\n", offset, tdb->h eader.hash_size));481 if (offset >= lock_offset(tdb->hash_size)) { 482 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: offset %u invalid (%d)\n", offset, tdb->hash_size)); 401 483 return ret; 402 484 } … … 437 519 */ 438 520 439 if (tdb->num_lockrecs == 0) {440 SAFE_FREE(tdb->lockrecs);441 }442 443 521 if (ret) 444 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: An error occurred unlocking!\n")); 445 return ret; 446 } 447 448 int tdb_unlock(struct tdb_context *tdb, int list, int ltype)522 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlock: An error occurred unlocking!\n")); 523 return ret; 524 } 525 526 _PUBLIC_ int tdb_unlock(struct tdb_context *tdb, int list, int ltype) 449 527 { 450 528 /* a global lock allows us to avoid per chain locks */ 451 if (tdb->allrecord_lock.count &&452 (ltype == tdb->allrecord_lock.ltype || ltype == F_RDLCK)) {453 return 0;454 }455 456 529 if (tdb->allrecord_lock.count) { 457 tdb->ecode = TDB_ERR_LOCK; 458 return -1; 530 return tdb_lock_covered_by_allrecord_lock(tdb, ltype); 459 531 } 460 532 … … 554 626 enum tdb_lock_flags flags, bool upgradable) 555 627 { 628 int ret; 629 556 630 switch (tdb_allrecord_check(tdb, ltype, flags, upgradable)) { 557 631 case -1: … … 563 637 /* We cover two kinds of locks: 564 638 * 1) Normal chain locks. Taken for almost all operations. 565 * 3) Individual records locks. Taken after normal or free639 * 2) Individual records locks. Taken after normal or free 566 640 * chain locks. 567 641 * 568 642 * It is (1) which cause the starvation problem, so we're only 569 643 * gradual for that. */ 570 if (tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, 571 tdb->header.hash_size * 4) == -1) { 644 645 if (tdb_have_mutexes(tdb)) { 646 ret = tdb_mutex_allrecord_lock(tdb, ltype, flags); 647 } else { 648 ret = tdb_chainlock_gradual(tdb, ltype, flags, FREELIST_TOP, 649 tdb->hash_size * 4); 650 } 651 652 if (ret == -1) { 572 653 return -1; 573 654 } 574 655 575 656 /* Grab individual record locks. */ 576 if (tdb_brlock(tdb, ltype, lock_offset(tdb->h eader.hash_size), 0,657 if (tdb_brlock(tdb, ltype, lock_offset(tdb->hash_size), 0, 577 658 flags) == -1) { 578 tdb_brunlock(tdb, ltype, FREELIST_TOP, 579 tdb->header.hash_size * 4); 659 if (tdb_have_mutexes(tdb)) { 660 tdb_mutex_allrecord_unlock(tdb); 661 } else { 662 tdb_brunlock(tdb, ltype, FREELIST_TOP, 663 tdb->hash_size * 4); 664 } 580 665 return -1; 581 666 } … … 633 718 } 634 719 635 if (!mark_lock && tdb_brunlock(tdb, ltype, FREELIST_TOP, 0)) { 636 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed (%s)\n", strerror(errno))); 637 return -1; 720 if (!mark_lock) { 721 int ret; 722 723 if (tdb_have_mutexes(tdb)) { 724 ret = tdb_mutex_allrecord_unlock(tdb); 725 if (ret == 0) { 726 ret = tdb_brunlock(tdb, ltype, 727 lock_offset(tdb->hash_size), 728 0); 729 } 730 } else { 731 ret = tdb_brunlock(tdb, ltype, FREELIST_TOP, 0); 732 } 733 734 if (ret != 0) { 735 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_unlockall failed " 736 "(%s)\n", strerror(errno))); 737 return -1; 738 } 638 739 } 639 740 … … 756 857 tdb_trace_1rec(tdb, "tdb_chainunlock_read", key); 757 858 return tdb_unlock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK); 859 } 860 861 _PUBLIC_ int tdb_chainlock_read_nonblock(struct tdb_context *tdb, TDB_DATA key) 862 { 863 int ret = tdb_lock_nonblock(tdb, BUCKET(tdb->hash_fn(&key)), F_RDLCK); 864 tdb_trace_1rec_ret(tdb, "tdb_chainlock_read_nonblock", key, ret); 865 return ret; 758 866 } 759 867 … … 841 949 842 950 if (tdb->allrecord_lock.count != 0) { 843 tdb_ brunlock(tdb, tdb->allrecord_lock.ltype, FREELIST_TOP, 0);951 tdb_allrecord_unlock(tdb, tdb->allrecord_lock.ltype, false); 844 952 tdb->allrecord_lock.count = 0; 845 953 } … … 856 964 } 857 965 tdb->num_lockrecs = active; 858 if (tdb->num_lockrecs == 0) { 859 SAFE_FREE(tdb->lockrecs); 860 } 861 } 966 } 967 968 /* Following functions are added specifically to support CTDB. */ 969 970 /* Don't do actual fcntl locking, just mark tdb locked */ 971 int tdb_transaction_write_lock_mark(struct tdb_context *tdb); 972 _PUBLIC_ int tdb_transaction_write_lock_mark(struct tdb_context *tdb) 973 { 974 return tdb_transaction_lock(tdb, F_WRLCK, TDB_LOCK_MARK_ONLY); 975 } 976 977 /* Don't do actual fcntl unlocking, just mark tdb unlocked */ 978 int tdb_transaction_write_lock_unmark(struct tdb_context *tdb); 979 _PUBLIC_ int tdb_transaction_write_lock_unmark(struct tdb_context *tdb) 980 { 981 return tdb_nest_unlock(tdb, TRANSACTION_LOCK, F_WRLCK, true); 982 } -
vendor/current/lib/tdb/common/open.c
r914 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 … … 52 52 53 53 /* initialise a new database with a specified hash size */ 54 static int tdb_new_database(struct tdb_context *tdb, int hash_size) 54 static int tdb_new_database(struct tdb_context *tdb, struct tdb_header *header, 55 int hash_size) 55 56 { 56 57 struct tdb_header *newdb; … … 76 77 newdb->rwlocks = TDB_HASH_RWLOCK_MAGIC; 77 78 79 /* 80 * We create a tdb with TDB_FEATURE_FLAG_MUTEX support, 81 * the flag combination and runtime feature checks 82 * are done by the caller already. 83 */ 84 if (tdb->flags & TDB_MUTEX_LOCKING) { 85 newdb->feature_flags |= TDB_FEATURE_FLAG_MUTEX; 86 } 87 88 /* 89 * If we have any features we add the FEATURE_FLAG_MAGIC, overwriting the 90 * TDB_HASH_RWLOCK_MAGIC above. 91 */ 92 if (newdb->feature_flags != 0) { 93 newdb->rwlocks = TDB_FEATURE_FLAG_MAGIC; 94 } 95 96 /* 97 * It's required for some following code pathes 98 * to have the fields on 'tdb' up-to-date. 99 * 100 * E.g. tdb_mutex_size() requires it 101 */ 102 tdb->feature_flags = newdb->feature_flags; 103 tdb->hash_size = newdb->hash_size; 104 78 105 if (tdb->flags & TDB_INTERNAL) { 79 106 tdb->map_size = size; 80 107 tdb->map_ptr = (char *)newdb; 81 memcpy( &tdb->header, newdb, sizeof(tdb->header));108 memcpy(header, newdb, sizeof(*header)); 82 109 /* Convert the `ondisk' version if asked. */ 83 110 CONVERT(*newdb); … … 90 117 goto fail; 91 118 119 if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { 120 newdb->mutex_size = tdb_mutex_size(tdb); 121 tdb->hdr_ofs = newdb->mutex_size; 122 } 123 92 124 /* This creates an endian-converted header, as if read from disk */ 93 125 CONVERT(*newdb); 94 memcpy( &tdb->header, newdb, sizeof(tdb->header));126 memcpy(header, newdb, sizeof(*header)); 95 127 /* Don't endian-convert the magic food! */ 96 128 memcpy(newdb->magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1); 97 /* we still have "ret == -1" here */ 98 if (tdb_write_all(tdb->fd, newdb, size)) 99 ret = 0; 100 129 130 if (!tdb_write_all(tdb->fd, newdb, size)) 131 goto fail; 132 133 if (newdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { 134 135 /* 136 * Now we init the mutex area 137 * followed by a second header. 138 */ 139 140 ret = ftruncate( 141 tdb->fd, 142 newdb->mutex_size + sizeof(struct tdb_header)); 143 if (ret == -1) { 144 goto fail; 145 } 146 ret = tdb_mutex_init(tdb); 147 if (ret == -1) { 148 goto fail; 149 } 150 151 /* 152 * Write a second header behind the mutexes. That's the area 153 * that will be mmapp'ed. 154 */ 155 ret = lseek(tdb->fd, newdb->mutex_size, SEEK_SET); 156 if (ret == -1) { 157 goto fail; 158 } 159 if (!tdb_write_all(tdb->fd, newdb, size)) { 160 goto fail; 161 } 162 } 163 164 ret = 0; 101 165 fail: 102 166 SAFE_FREE(newdb); … … 120 184 } 121 185 122 /* open the database, creating it if necessary 186 /* open the database, creating it if necessary 123 187 124 188 The open_flags and mode are passed straight to the open call on the … … 126 190 is advisory, use zero for a default value. 127 191 128 Return is NULL on error, in which case errno is also set. Don't 192 Return is NULL on error, in which case errno is also set. Don't 129 193 try to call tdb_error or tdb_errname, just do strerror(errno). 130 194 … … 143 207 144 208 static bool check_header_hash(struct tdb_context *tdb, 209 struct tdb_header *header, 145 210 bool default_hash, uint32_t *m1, uint32_t *m2) 146 211 { 147 212 tdb_header_hash(tdb, m1, m2); 148 if ( tdb->header.magic1_hash == *m1 &&149 tdb->header.magic2_hash == *m2) {213 if (header->magic1_hash == *m1 && 214 header->magic2_hash == *m2) { 150 215 return true; 151 216 } … … 160 225 else 161 226 tdb->hash_fn = tdb_old_hash; 162 return check_header_hash(tdb, false, m1, m2); 227 return check_header_hash(tdb, header, false, m1, m2); 228 } 229 230 static bool tdb_mutex_open_ok(struct tdb_context *tdb, 231 const struct tdb_header *header) 232 { 233 int locked; 234 235 if (tdb->flags & TDB_NOLOCK) { 236 /* 237 * We don't look at locks, so it does not matter to have a 238 * compatible mutex implementation. Allow the open. 239 */ 240 return true; 241 } 242 243 locked = tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, 244 TDB_LOCK_NOWAIT|TDB_LOCK_PROBE); 245 246 if ((locked == -1) && (tdb->ecode == TDB_ERR_LOCK)) { 247 /* 248 * CLEAR_IF_FIRST still active. The tdb was created on this 249 * host, so we can assume the mutex implementation is 250 * compatible. Important for tools like tdbdump on a still 251 * open locking.tdb. 252 */ 253 goto check_local_settings; 254 } 255 256 /* 257 * We got the CLEAR_IF_FIRST lock. That means the database was 258 * potentially copied from somewhere else. The mutex implementation 259 * might be incompatible. 260 */ 261 262 if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { 263 /* 264 * Should not happen 265 */ 266 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok: " 267 "failed to release ACTIVE_LOCK on %s: %s\n", 268 tdb->name, strerror(errno))); 269 return false; 270 } 271 272 check_local_settings: 273 274 if (!(tdb->flags & TDB_MUTEX_LOCKING)) { 275 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: " 276 "Can use mutexes only with " 277 "MUTEX_LOCKING or NOLOCK\n", 278 tdb->name)); 279 return false; 280 } 281 282 if (tdb_mutex_size(tdb) != header->mutex_size) { 283 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_mutex_open_ok[%s]: " 284 "Mutex size changed from %u to %u\n.", 285 tdb->name, 286 (unsigned int)header->mutex_size, 287 (unsigned int)tdb_mutex_size(tdb))); 288 return false; 289 } 290 291 return true; 163 292 } 164 293 … … 168 297 tdb_hash_func hash_fn) 169 298 { 299 int orig_errno = errno; 300 struct tdb_header header; 170 301 struct tdb_context *tdb; 171 302 struct stat st; … … 176 307 const char *hash_alg; 177 308 uint32_t magic1, magic2; 309 int ret; 310 311 ZERO_STRUCT(header); 178 312 179 313 if (!(tdb = (struct tdb_context *)calloc(1, sizeof *tdb))) { … … 183 317 } 184 318 tdb_io_init(tdb); 319 320 if (tdb_flags & TDB_INTERNAL) { 321 tdb_flags |= TDB_INCOMPATIBLE_HASH; 322 } 323 if (tdb_flags & TDB_MUTEX_LOCKING) { 324 tdb_flags |= TDB_INCOMPATIBLE_HASH; 325 } 326 185 327 tdb->fd = -1; 186 328 #ifdef TDB_TRACE … … 210 352 } 211 353 212 /* now make a copy of the name, as the caller memory might wentaway */354 /* now make a copy of the name, as the caller memory might go away */ 213 355 if (!(tdb->name = (char *)strdup(name))) { 214 356 /* … … 258 400 /* read only databases don't do locking or clear if first */ 259 401 tdb->flags |= TDB_NOLOCK; 260 tdb->flags &= ~ TDB_CLEAR_IF_FIRST;402 tdb->flags &= ~(TDB_CLEAR_IF_FIRST|TDB_MUTEX_LOCKING); 261 403 } 262 404 … … 270 412 } 271 413 414 if (tdb->flags & TDB_MUTEX_LOCKING) { 415 /* 416 * Here we catch bugs in the callers, 417 * the runtime check for existing tdb's comes later. 418 */ 419 420 if (!(tdb->flags & TDB_CLEAR_IF_FIRST)) { 421 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " 422 "invalid flags for %s - TDB_MUTEX_LOCKING " 423 "requires TDB_CLEAR_IF_FIRST\n", name)); 424 errno = EINVAL; 425 goto fail; 426 } 427 428 if (tdb->flags & TDB_INTERNAL) { 429 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " 430 "invalid flags for %s - TDB_MUTEX_LOCKING and " 431 "TDB_INTERNAL are not allowed together\n", name)); 432 errno = EINVAL; 433 goto fail; 434 } 435 436 if (tdb->flags & TDB_NOMMAP) { 437 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " 438 "invalid flags for %s - TDB_MUTEX_LOCKING and " 439 "TDB_NOMMAP are not allowed together\n", name)); 440 errno = EINVAL; 441 goto fail; 442 } 443 444 if (tdb->read_only) { 445 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " 446 "invalid flags for %s - TDB_MUTEX_LOCKING " 447 "not allowed read only\n", name)); 448 errno = EINVAL; 449 goto fail; 450 } 451 452 /* 453 * The callers should have called 454 * tdb_runtime_check_for_robust_mutexes() 455 * before using TDB_MUTEX_LOCKING! 456 * 457 * This makes sure the caller understands 458 * that the locking may behave a bit differently 459 * than with pure fcntl locking. E.g. multiple 460 * read locks are not supported. 461 */ 462 if (!tdb_runtime_check_for_robust_mutexes()) { 463 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " 464 "invalid flags for %s - TDB_MUTEX_LOCKING " 465 "requires support for robust_mutexes\n", 466 name)); 467 errno = ENOSYS; 468 goto fail; 469 } 470 } 471 272 472 if (getenv("TDB_NO_FSYNC")) { 273 473 tdb->flags |= TDB_NOSYNC; … … 286 486 tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP); 287 487 tdb->flags &= ~TDB_CLEAR_IF_FIRST; 288 if (tdb_new_database(tdb, hash_size) != 0) {488 if (tdb_new_database(tdb, &header, hash_size) != 0) { 289 489 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: tdb_new_database failed!")); 290 490 goto fail; 291 491 } 492 tdb->hash_size = hash_size; 292 493 goto internal; 293 494 } … … 314 515 (!tdb->read_only) && 315 516 (locked = (tdb_nest_lock(tdb, ACTIVE_LOCK, F_WRLCK, TDB_LOCK_NOWAIT|TDB_LOCK_PROBE) == 0))) { 316 int ret;317 517 ret = tdb_brlock(tdb, F_WRLCK, FREELIST_TOP, 0, 318 TDB_LOCK_WAIT);518 TDB_LOCK_WAIT); 319 519 if (ret == -1) { 320 520 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " 321 "tdb_brlock failed for %s: %s\n",322 name, strerror(errno)));323 goto fail; 324 } 325 ret = tdb_new_database(tdb, hash_size);521 "tdb_brlock failed for %s: %s\n", 522 name, strerror(errno))); 523 goto fail; 524 } 525 ret = tdb_new_database(tdb, &header, hash_size); 326 526 if (ret == -1) { 327 527 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " 328 "tdb_new_database failed for %s: %s\n",329 name, strerror(errno)));528 "tdb_new_database failed for %s: %s\n", 529 name, strerror(errno))); 330 530 tdb_unlockall(tdb); 331 531 goto fail; … … 334 534 if (ret == -1) { 335 535 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " 336 "tdb_unlockall failed for %s: %s\n",337 name, strerror(errno)));536 "tdb_unlockall failed for %s: %s\n", 537 name, strerror(errno))); 338 538 goto fail; 339 539 } … … 341 541 if (ret == -1) { 342 542 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " 343 "lseek failed for %s: %s\n",344 name, strerror(errno)));543 "lseek failed for %s: %s\n", 544 name, strerror(errno))); 345 545 goto fail; 346 546 } … … 348 548 349 549 errno = 0; 350 if (read(tdb->fd, &tdb->header, sizeof(tdb->header)) != sizeof(tdb->header) 351 || strcmp(tdb->header.magic_food, TDB_MAGIC_FOOD) != 0) { 352 if (!(open_flags & O_CREAT) || tdb_new_database(tdb, hash_size) == -1) { 550 if (read(tdb->fd, &header, sizeof(header)) != sizeof(header) 551 || strcmp(header.magic_food, TDB_MAGIC_FOOD) != 0) { 552 if (!(open_flags & O_CREAT) || 553 tdb_new_database(tdb, &header, hash_size) == -1) { 353 554 if (errno == 0) { 354 555 errno = EIO; /* ie bad format or something */ … … 357 558 } 358 559 rev = (tdb->flags & TDB_CONVERT); 359 } else if ( tdb->header.version != TDB_VERSION360 && !(rev = ( tdb->header.version==TDB_BYTEREV(TDB_VERSION)))) {560 } else if (header.version != TDB_VERSION 561 && !(rev = (header.version==TDB_BYTEREV(TDB_VERSION)))) { 361 562 /* wrong version */ 362 563 errno = EIO; 363 564 goto fail; 364 565 } 365 vp = (unsigned char *)& tdb->header.version;566 vp = (unsigned char *)&header.version; 366 567 vertest = (((uint32_t)vp[0]) << 24) | (((uint32_t)vp[1]) << 16) | 367 568 (((uint32_t)vp[2]) << 8) | (uint32_t)vp[3]; … … 371 572 else { 372 573 tdb->flags |= TDB_CONVERT; 373 tdb_convert(&tdb->header, sizeof(tdb->header)); 374 } 375 if (fstat(tdb->fd, &st) == -1) 376 goto fail; 377 378 if (tdb->header.rwlocks != 0 && 379 tdb->header.rwlocks != TDB_HASH_RWLOCK_MAGIC) { 574 tdb_convert(&header, sizeof(header)); 575 } 576 577 /* 578 * We only use st.st_dev and st.st_ino from the raw fstat() 579 * call, everything else needs to use tdb_fstat() in order 580 * to skip tdb->hdr_ofs! 581 */ 582 if (fstat(tdb->fd, &st) == -1) { 583 goto fail; 584 } 585 tdb->device = st.st_dev; 586 tdb->inode = st.st_ino; 587 ZERO_STRUCT(st); 588 589 if (header.rwlocks != 0 && 590 header.rwlocks != TDB_FEATURE_FLAG_MAGIC && 591 header.rwlocks != TDB_HASH_RWLOCK_MAGIC) { 380 592 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: spinlocks no longer supported\n")); 381 goto fail; 382 } 383 384 if ((tdb->header.magic1_hash == 0) && (tdb->header.magic2_hash == 0)) { 593 errno = ENOSYS; 594 goto fail; 595 } 596 597 if (header.hash_size == 0) { 598 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: invalid database: 0 hash_size\n")); 599 errno = ENOSYS; 600 goto fail; 601 } 602 603 tdb->hash_size = header.hash_size; 604 605 if (header.rwlocks == TDB_FEATURE_FLAG_MAGIC) { 606 tdb->feature_flags = header.feature_flags; 607 } 608 609 if (tdb->feature_flags & ~TDB_SUPPORTED_FEATURE_FLAGS) { 610 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: unsupported " 611 "features in tdb %s: 0x%08x (supported: 0x%08x)\n", 612 name, (unsigned)tdb->feature_flags, 613 (unsigned)TDB_SUPPORTED_FEATURE_FLAGS)); 614 errno = ENOSYS; 615 goto fail; 616 } 617 618 if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { 619 if (!tdb_mutex_open_ok(tdb, &header)) { 620 errno = EINVAL; 621 goto fail; 622 } 623 624 /* 625 * We need to remember the hdr_ofs 626 * also for the TDB_NOLOCK case 627 * if the current library doesn't support 628 * mutex locking. 629 */ 630 tdb->hdr_ofs = header.mutex_size; 631 } 632 633 if ((header.magic1_hash == 0) && (header.magic2_hash == 0)) { 385 634 /* older TDB without magic hash references */ 386 635 tdb->hash_fn = tdb_old_hash; 387 } else if (!check_header_hash(tdb, !hash_fn, &magic1, &magic2)) { 636 } else if (!check_header_hash(tdb, &header, !hash_fn, 637 &magic1, &magic2)) { 388 638 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_open_ex: " 389 639 "%s was not created with %s hash function we are using\n" … … 391 641 "magic2_hash[0x%08X %s 0x%08X]\n", 392 642 name, hash_alg, 393 tdb->header.magic1_hash,394 ( tdb->header.magic1_hash == magic1) ? "==" : "!=",643 header.magic1_hash, 644 (header.magic1_hash == magic1) ? "==" : "!=", 395 645 magic1, 396 tdb->header.magic2_hash,397 ( tdb->header.magic2_hash == magic2) ? "==" : "!=",646 header.magic2_hash, 647 (header.magic2_hash == magic2) ? "==" : "!=", 398 648 magic2)); 399 649 errno = EINVAL; … … 402 652 403 653 /* Is it already in the open list? If so, fail. */ 404 if (tdb_already_open( st.st_dev, st.st_ino)) {654 if (tdb_already_open(tdb->device, tdb->inode)) { 405 655 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_open_ex: " 406 656 "%s (%d,%d) is already open in this process\n", 407 name, (int) st.st_dev, (int)st.st_ino));657 name, (int)tdb->device, (int)tdb->inode)); 408 658 errno = EBUSY; 409 659 goto fail; 410 660 } 411 661 412 tdb->map_size = st.st_size; 413 tdb->device = st.st_dev; 414 tdb->inode = st.st_ino; 415 tdb_mmap(tdb); 662 /* 663 * We had tdb_mmap(tdb) here before, 664 * but we need to use tdb_fstat(), 665 * which is triggered from tdb_oob() before calling tdb_mmap(). 666 * As this skips tdb->hdr_ofs. 667 */ 668 tdb->map_size = 0; 669 ret = tdb->methods->tdb_oob(tdb, 0, 1, 0); 670 if (ret == -1) { 671 errno = EIO; 672 goto fail; 673 } 674 675 if (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) { 676 if (!(tdb->flags & TDB_NOLOCK)) { 677 ret = tdb_mutex_mmap(tdb); 678 if (ret != 0) { 679 goto fail; 680 } 681 } 682 } 683 416 684 if (locked) { 417 685 if (tdb_nest_unlock(tdb, ACTIVE_LOCK, F_WRLCK, false) == -1) { … … 465 733 tdb->next = tdbs; 466 734 tdbs = tdb; 735 errno = orig_errno; 467 736 return tdb; 468 737 … … 523 792 tdb_munmap(tdb); 524 793 } 794 795 tdb_mutex_munmap(tdb); 796 525 797 SAFE_FREE(tdb->name); 526 798 if (tdb->fd != -1) { … … 594 866 goto fail; 595 867 } 868 /* 869 * We only use st.st_dev and st.st_ino from the raw fstat() 870 * call, everything else needs to use tdb_fstat() in order 871 * to skip tdb->hdr_ofs! 872 */ 596 873 if (fstat(tdb->fd, &st) != 0) { 597 874 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_reopen: fstat failed (%s)\n", strerror(errno))); … … 602 879 goto fail; 603 880 } 604 tdb_mmap(tdb); 881 ZERO_STRUCT(st); 882 883 /* 884 * We had tdb_mmap(tdb) here before, 885 * but we need to use tdb_fstat(), 886 * which is triggered from tdb_oob() before calling tdb_mmap(). 887 * As this skips tdb->hdr_ofs. 888 */ 889 tdb->map_size = 0; 890 if (tdb->methods->tdb_oob(tdb, 0, 1, 0) != 0) { 891 goto fail; 892 } 605 893 #endif /* fake pread or pwrite */ 606 894 … … 608 896 tdb->num_lockrecs = 0; 609 897 SAFE_FREE(tdb->lockrecs); 898 tdb->lockrecs_array_length = 0; 610 899 611 900 if (active_lock && tdb_nest_lock(tdb, ACTIVE_LOCK, F_RDLCK, TDB_LOCK_WAIT) == -1) { -
vendor/current/lib/tdb/common/summary.c
r986 r988 1 /* 1 /* 2 2 Trivial Database: human-readable summary code 3 3 Copyright (C) Rusty Russell 2010 4 4 5 5 This library is free software; you can redistribute it and/or 6 6 modify it under the terms of the GNU Lesser General Public … … 19 19 20 20 #define SUMMARY_FORMAT \ 21 "Size of file/data: %u/%zu\n" \ 21 "Size of file/data: %llu/%zu\n" \ 22 "Header offset/logical size: %zu/%zu\n" \ 22 23 "Number of records: %zu\n" \ 24 "Incompatible hash: %s\n" \ 25 "Active/supported feature flags: 0x%08x/0x%08x\n" \ 26 "Robust mutexes locking: %s\n" \ 23 27 "Smallest/average/largest keys: %zu/%zu/%zu\n" \ 24 28 "Smallest/average/largest data: %zu/%zu/%zu\n" \ … … 87 91 _PUBLIC_ char *tdb_summary(struct tdb_context *tdb) 88 92 { 93 off_t file_size; 89 94 tdb_off_t off, rec_off; 90 struct tally freet, keys, data, dead, extra, hash , uncoal;95 struct tally freet, keys, data, dead, extra, hashval, uncoal; 91 96 struct tdb_record rec; 92 97 char *ret = NULL; 93 98 bool locked; 94 size_t len, unc = 0; 99 size_t unc = 0; 100 int len; 95 101 struct tdb_record recovery; 96 102 … … 114 120 tally_init(&dead); 115 121 tally_init(&extra); 116 tally_init(&hash );122 tally_init(&hashval); 117 123 tally_init(&uncoal); 118 124 119 for (off = TDB_DATA_START(tdb->h eader.hash_size);125 for (off = TDB_DATA_START(tdb->hash_size); 120 126 off < tdb->map_size - 1; 121 127 off += sizeof(rec) + rec.rec_len) { … … 152 158 default: 153 159 TDB_LOG((tdb, TDB_DEBUG_ERROR, 154 "Unexpected record magic 0x%x at offset % d\n",160 "Unexpected record magic 0x%x at offset %u\n", 155 161 rec.magic, off)); 156 162 goto unlock; … … 160 166 tally_add(&uncoal, unc - 1); 161 167 162 for (off = 0; off < tdb->header.hash_size; off++) 163 tally_add(&hash, get_hash_length(tdb, off)); 164 165 /* 20 is max length of a %zu. */ 166 len = strlen(SUMMARY_FORMAT) + 35*20 + 1; 167 ret = (char *)malloc(len); 168 if (!ret) 169 goto unlock; 170 171 snprintf(ret, len, SUMMARY_FORMAT, 172 tdb->map_size, keys.total+data.total, 168 for (off = 0; off < tdb->hash_size; off++) 169 tally_add(&hashval, get_hash_length(tdb, off)); 170 171 file_size = tdb->hdr_ofs + tdb->map_size; 172 173 len = asprintf(&ret, SUMMARY_FORMAT, 174 (unsigned long long)file_size, keys.total+data.total, 175 (size_t)tdb->hdr_ofs, (size_t)tdb->map_size, 173 176 keys.num, 177 (tdb->hash_fn == tdb_jenkins_hash)?"yes":"no", 178 (unsigned)tdb->feature_flags, TDB_SUPPORTED_FEATURE_FLAGS, 179 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX)?"yes":"no", 174 180 keys.min, tally_mean(&keys), keys.max, 175 181 data.min, tally_mean(&data), data.max, … … 179 185 freet.num, 180 186 freet.min, tally_mean(&freet), freet.max, 181 hash .num,182 hash .min, tally_mean(&hash), hash.max,187 hashval.num, 188 hashval.min, tally_mean(&hashval), hashval.max, 183 189 uncoal.total, 184 190 uncoal.min, tally_mean(&uncoal), uncoal.max, 185 keys.total * 100.0 / tdb->map_size,186 data.total * 100.0 / tdb->map_size,187 extra.total * 100.0 / tdb->map_size,188 freet.total * 100.0 / tdb->map_size,189 dead.total * 100.0 / tdb->map_size,191 keys.total * 100.0 / file_size, 192 data.total * 100.0 / file_size, 193 extra.total * 100.0 / file_size, 194 freet.total * 100.0 / file_size, 195 dead.total * 100.0 / file_size, 190 196 (keys.num + freet.num + dead.num) 191 197 * (sizeof(struct tdb_record) + sizeof(uint32_t)) 192 * 100.0 / tdb->map_size, 193 tdb->header.hash_size * sizeof(tdb_off_t) 194 * 100.0 / tdb->map_size); 198 * 100.0 / file_size, 199 tdb->hash_size * sizeof(tdb_off_t) 200 * 100.0 / file_size); 201 if (len == -1) { 202 goto unlock; 203 } 195 204 196 205 unlock: -
vendor/current/lib/tdb/common/tdb.c
r986 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 … … 125 125 static TDB_DATA _tdb_fetch(struct tdb_context *tdb, TDB_DATA key); 126 126 127 static int tdb_update_hash_cmp(TDB_DATA key, TDB_DATA data, void *private_data) 128 { 129 TDB_DATA *dbuf = (TDB_DATA *)private_data; 130 131 if (dbuf->dsize != data.dsize) { 132 return -1; 133 } 134 if (memcmp(dbuf->dptr, data.dptr, data.dsize) != 0) { 135 return -1; 136 } 137 return 0; 138 } 139 127 140 /* update an entry in place - this only works if the new data size 128 141 is <= the old data size and the key exists. … … 140 153 /* it could be an exact duplicate of what is there - this is 141 154 * surprisingly common (eg. with a ldb re-index). */ 142 if (rec.key_len == key.dsize && 155 if (rec.key_len == key.dsize && 143 156 rec.data_len == dbuf.dsize && 144 rec.full_hash == hash) { 145 TDB_DATA data = _tdb_fetch(tdb, key); 146 if (data.dsize == dbuf.dsize && 147 memcmp(data.dptr, dbuf.dptr, data.dsize) == 0) { 148 if (data.dptr) { 149 free(data.dptr); 150 } 151 return 0; 152 } 153 if (data.dptr) { 154 free(data.dptr); 155 } 157 rec.full_hash == hash && 158 tdb_parse_record(tdb, key, tdb_update_hash_cmp, &dbuf) == 0) { 159 return 0; 156 160 } 157 161 … … 255 259 } 256 260 257 /* check if an entry in the database exists 261 /* check if an entry in the database exists 258 262 259 263 note that 1 is returned if the key is found and 0 is returned if not found … … 342 346 * Purge all DEAD records from a hash chain 343 347 */ 344 staticint tdb_purge_dead(struct tdb_context *tdb, uint32_t hash)348 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash) 345 349 { 346 350 int res = -1; … … 348 352 tdb_off_t rec_ptr; 349 353 350 if (tdb_lock(tdb, -1, F_WRLCK) == -1) { 354 if (tdb_lock_nonblock(tdb, -1, F_WRLCK) == -1) { 355 /* 356 * Don't block the freelist if not strictly necessary 357 */ 351 358 return -1; 352 359 } … … 384 391 int ret; 385 392 393 rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK, &rec); 394 if (rec_ptr == 0) { 395 return -1; 396 } 397 386 398 if (tdb->max_dead_records != 0) { 399 400 uint32_t magic = TDB_DEAD_MAGIC; 387 401 388 402 /* … … 390 404 * tdb's with a very high create/delete rate like locking.tdb. 391 405 */ 392 393 if (tdb_lock(tdb, BUCKET(hash), F_WRLCK) == -1)394 return -1;395 406 396 407 if (tdb_count_dead(tdb, hash) >= tdb->max_dead_records) { … … 402 413 } 403 414 404 if (!(rec_ptr = tdb_find(tdb, key, hash, &rec))) {405 tdb_unlock(tdb, BUCKET(hash), F_WRLCK);406 return -1;407 }408 409 415 /* 410 416 * Just mark the record as dead. 411 417 */ 412 rec.magic = TDB_DEAD_MAGIC; 413 ret = tdb_rec_write(tdb, rec_ptr, &rec); 418 ret = tdb_ofs_write( 419 tdb, rec_ptr + offsetof(struct tdb_record, magic), 420 &magic); 414 421 } 415 422 else { 416 if (!(rec_ptr = tdb_find_lock_hash(tdb, key, hash, F_WRLCK,417 &rec)))418 return -1;419 420 423 ret = tdb_do_delete(tdb, rec_ptr, &rec); 421 424 } … … 425 428 } 426 429 427 if (tdb_unlock(tdb, BUCKET( rec.full_hash), F_WRLCK) != 0)430 if (tdb_unlock(tdb, BUCKET(hash), F_WRLCK) != 0) 428 431 TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_delete: WARNING tdb_unlock failed!\n")); 429 432 return ret; … … 443 446 * See if we have a dead record around with enough space 444 447 */ 445 static tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash, 446 struct tdb_record *r, tdb_len_t length) 447 { 448 tdb_off_t rec_ptr; 448 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash, 449 struct tdb_record *r, tdb_len_t length, 450 tdb_off_t *p_last_ptr) 451 { 452 tdb_off_t rec_ptr, last_ptr; 453 tdb_off_t best_rec_ptr = 0; 454 tdb_off_t best_last_ptr = 0; 455 struct tdb_record best = { .rec_len = UINT32_MAX }; 456 457 length += sizeof(tdb_off_t); /* tailer */ 458 459 last_ptr = TDB_HASH_TOP(hash); 449 460 450 461 /* read in the hash top */ 451 if (tdb_ofs_read(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1)462 if (tdb_ofs_read(tdb, last_ptr, &rec_ptr) == -1) 452 463 return 0; 453 464 … … 457 468 return 0; 458 469 459 if (TDB_DEAD(r) && r->rec_len >= length) {460 /*461 * First fit for simple coding, TODO: change to best462 * fit463 */464 return rec_ptr;465 }470 if (TDB_DEAD(r) && (r->rec_len >= length) && 471 (r->rec_len < best.rec_len)) { 472 best_rec_ptr = rec_ptr; 473 best_last_ptr = last_ptr; 474 best = *r; 475 } 476 last_ptr = rec_ptr; 466 477 rec_ptr = r->next; 467 478 } 468 return 0; 479 480 if (best.rec_len == UINT32_MAX) { 481 return 0; 482 } 483 484 *r = best; 485 *p_last_ptr = best_last_ptr; 486 return best_rec_ptr; 469 487 } 470 488 … … 474 492 struct tdb_record rec; 475 493 tdb_off_t rec_ptr; 476 char *p = NULL;477 494 int ret = -1; 478 495 … … 495 512 } 496 513 } 497 /* reset the error code potentially set by the tdb_update () */514 /* reset the error code potentially set by the tdb_update_hash() */ 498 515 tdb->ecode = TDB_SUCCESS; 499 516 … … 504 521 tdb_delete_hash(tdb, key, hash); 505 522 506 /* Copy key+value *before* allocating free space in case malloc507 fails and we are left with a dead spot in the tdb. */508 509 if (!(p = (char *)malloc(key.dsize + dbuf.dsize))) {510 tdb->ecode = TDB_ERR_OOM;511 goto fail;512 }513 514 memcpy(p, key.dptr, key.dsize);515 if (dbuf.dsize)516 memcpy(p+key.dsize, dbuf.dptr, dbuf.dsize);517 518 if (tdb->max_dead_records != 0) {519 /*520 * Allow for some dead records per hash chain, look if we can521 * find one that can hold the new record. We need enough space522 * for key, data and tailer. If we find one, we don't have to523 * consult the central freelist.524 */525 rec_ptr = tdb_find_dead(526 tdb, hash, &rec,527 key.dsize + dbuf.dsize + sizeof(tdb_off_t));528 529 if (rec_ptr != 0) {530 rec.key_len = key.dsize;531 rec.data_len = dbuf.dsize;532 rec.full_hash = hash;533 rec.magic = TDB_MAGIC;534 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1535 || tdb->methods->tdb_write(536 tdb, rec_ptr + sizeof(rec),537 p, key.dsize + dbuf.dsize) == -1) {538 goto fail;539 }540 goto done;541 }542 }543 544 /*545 * We have to allocate some space from the freelist, so this means we546 * have to lock it. Use the chance to purge all the DEAD records from547 * the hash chain under the freelist lock.548 */549 550 if (tdb_lock(tdb, -1, F_WRLCK) == -1) {551 goto fail;552 }553 554 if ((tdb->max_dead_records != 0)555 && (tdb_purge_dead(tdb, hash) == -1)) {556 tdb_unlock(tdb, -1, F_WRLCK);557 goto fail;558 }559 560 523 /* we have to allocate some space */ 561 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize, &rec); 562 563 tdb_unlock(tdb, -1, F_WRLCK); 524 rec_ptr = tdb_allocate(tdb, hash, key.dsize + dbuf.dsize, &rec); 564 525 565 526 if (rec_ptr == 0) { … … 578 539 /* write out and point the top of the hash chain at it */ 579 540 if (tdb_rec_write(tdb, rec_ptr, &rec) == -1 580 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), p, key.dsize+dbuf.dsize)==-1 541 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec), 542 key.dptr, key.dsize) == -1 543 || tdb->methods->tdb_write(tdb, rec_ptr+sizeof(rec)+key.dsize, 544 dbuf.dptr, dbuf.dsize) == -1 581 545 || tdb_ofs_write(tdb, TDB_HASH_TOP(hash), &rec_ptr) == -1) { 582 546 /* Need to tdb_unallocate() here */ … … 590 554 tdb_increment_seqnum(tdb); 591 555 } 592 593 SAFE_FREE(p);594 556 return ret; 595 557 } … … 719 681 _PUBLIC_ int tdb_hash_size(struct tdb_context *tdb) 720 682 { 721 return tdb->h eader.hash_size;683 return tdb->hash_size; 722 684 } 723 685 … … 762 724 } 763 725 726 if ((flags & TDB_NOLOCK) && 727 (tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) && 728 (tdb->mutexes == NULL)) { 729 tdb->ecode = TDB_ERR_LOCK; 730 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_remove_flags: " 731 "Can not remove NOLOCK flag on mutexed databases")); 732 return; 733 } 734 764 735 if (flags & TDB_ALLOW_NESTING) { 765 736 tdb->flags |= TDB_DISALLOW_NESTING; … … 783 754 784 755 /* 785 add a region of the file to the freelist. Length is the size of the region in bytes, 756 add a region of the file to the freelist. Length is the size of the region in bytes, 786 757 which includes the free list header that needs to be added 787 758 */ … … 795 766 if (length + offset > tdb->map_size) { 796 767 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_free_region: adding region beyond end of file\n")); 797 return -1; 768 return -1; 798 769 } 799 770 memset(&rec,'\0',sizeof(rec)); … … 841 812 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_wipe_all: failed to read recovery record\n")); 842 813 return -1; 843 } 814 } 844 815 recovery_size = rec.rec_len + sizeof(rec); 845 816 } 846 817 847 818 /* wipe the hashes */ 848 for (i=0;i<tdb->h eader.hash_size;i++) {819 for (i=0;i<tdb->hash_size;i++) { 849 820 if (tdb_ofs_write(tdb, TDB_HASH_TOP(i), &offset) == -1) { 850 821 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to write hash %d\n", i)); … … 859 830 } 860 831 861 /* add all the rest of the file to the freelist, possibly leaving a gap 832 /* add all the rest of the file to the freelist, possibly leaving a gap 862 833 for the recovery area */ 863 834 if (recovery_size == 0) { 864 835 /* the simple case - the whole file can be used as a freelist */ 865 data_len = (tdb->map_size - TDB_DATA_START(tdb->h eader.hash_size));866 if (tdb_free_region(tdb, TDB_DATA_START(tdb->h eader.hash_size), data_len) != 0) {836 data_len = (tdb->map_size - TDB_DATA_START(tdb->hash_size)); 837 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) { 867 838 goto failed; 868 839 } 869 840 } else { 870 841 /* we need to add two freelist entries - one on either 871 side of the recovery area 842 side of the recovery area 872 843 873 844 Note that we cannot shift the recovery area during … … 876 847 corruption 877 848 */ 878 data_len = (recovery_head - TDB_DATA_START(tdb->h eader.hash_size));879 if (tdb_free_region(tdb, TDB_DATA_START(tdb->h eader.hash_size), data_len) != 0) {849 data_len = (recovery_head - TDB_DATA_START(tdb->hash_size)); 850 if (tdb_free_region(tdb, TDB_DATA_START(tdb->hash_size), data_len) != 0) { 880 851 goto failed; 881 852 } … … 887 858 } 888 859 860 tdb_increment_seqnum_nonblock(tdb); 861 889 862 if (tdb_unlockall(tdb) != 0) { 890 863 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_wipe_all: failed to unlock\n")); … … 946 919 tdb_transaction_cancel(tdb); 947 920 tdb_close(tmp_db); 948 return -1; 921 return -1; 949 922 } 950 923 … … 970 943 tdb_transaction_cancel(tdb); 971 944 tdb_close(tmp_db); 972 return -1; 945 return -1; 973 946 } 974 947 … … 1004 977 } 1005 978 979 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret) 980 { 981 tdb_off_t ret = a + b; 982 983 if ((ret < a) || (ret < b)) { 984 return false; 985 } 986 *pret = ret; 987 return true; 988 } 989 1006 990 #ifdef TDB_TRACE 1007 991 static void tdb_trace_write(struct tdb_context *tdb, const char *str) 1008 992 { 1009 if (!tdb_write_all tdb->tracefd, str, strlen(str)) {993 if (!tdb_write_all(tdb->tracefd, str, strlen(str))) { 1010 994 close(tdb->tracefd); 1011 995 tdb->tracefd = -1; -
vendor/current/lib/tdb/common/tdb_private.h
r986 r988 1 /* 1 #ifndef TDB_PRIVATE_H 2 #define TDB_PRIVATE_H 3 /* 2 4 Unix SMB/CIFS implementation. 3 5 … … 52 54 #define TDB_RECOVERY_INVALID_MAGIC (0x0) 53 55 #define TDB_HASH_RWLOCK_MAGIC (0xbad1a51U) 56 #define TDB_FEATURE_FLAG_MAGIC (0xbad1a52U) 54 57 #define TDB_ALIGNMENT 4 55 58 #define DEFAULT_HASH_SIZE 131 … … 60 63 #define TDB_BAD_MAGIC(r) ((r)->magic != TDB_MAGIC && !TDB_DEAD(r)) 61 64 #define TDB_HASH_TOP(hash) (FREELIST_TOP + (BUCKET(hash)+1)*sizeof(tdb_off_t)) 62 #define TDB_HASHTABLE_SIZE(tdb) ((tdb->h eader.hash_size+1)*sizeof(tdb_off_t))65 #define TDB_HASHTABLE_SIZE(tdb) ((tdb->hash_size+1)*sizeof(tdb_off_t)) 63 66 #define TDB_DATA_START(hash_size) (TDB_HASH_TOP(hash_size-1) + sizeof(tdb_off_t)) 64 67 #define TDB_RECOVERY_HEAD offsetof(struct tdb_header, recovery_start) … … 66 69 #define TDB_PAD_BYTE 0x42 67 70 #define TDB_PAD_U32 0x42424242 71 72 #define TDB_FEATURE_FLAG_MUTEX 0x00000001 73 74 #define TDB_SUPPORTED_FEATURE_FLAGS ( \ 75 TDB_FEATURE_FLAG_MUTEX | \ 76 0) 68 77 69 78 /* NB assumes there is a local variable called "tdb" that is the … … 113 122 #endif 114 123 115 #define BUCKET(hash) ((hash) % tdb->h eader.hash_size)124 #define BUCKET(hash) ((hash) % tdb->hash_size) 116 125 117 126 #define DOCONV() (tdb->flags & TDB_CONVERT) … … 151 160 uint32_t magic1_hash; /* hash of TDB_MAGIC_FOOD. */ 152 161 uint32_t magic2_hash; /* hash of TDB_MAGIC. */ 153 tdb_off_t reserved[27]; 162 uint32_t feature_flags; 163 tdb_len_t mutex_size; /* set if TDB_FEATURE_FLAG_MUTEX is set */ 164 tdb_off_t reserved[25]; 154 165 }; 155 166 … … 181 192 int (*tdb_write)(struct tdb_context *, tdb_off_t, const void *, tdb_len_t); 182 193 void (*next_hash_chain)(struct tdb_context *, uint32_t *); 183 int (*tdb_oob)(struct tdb_context *, tdb_off_t , int );194 int (*tdb_oob)(struct tdb_context *, tdb_off_t , tdb_len_t, int ); 184 195 int (*tdb_expand_file)(struct tdb_context *, tdb_off_t , tdb_off_t ); 185 196 }; 197 198 struct tdb_mutexes; 186 199 187 200 struct tdb_context { … … 196 209 int num_lockrecs; 197 210 struct tdb_lock_type *lockrecs; /* only real locks, all with count>0 */ 211 int lockrecs_array_length; 212 213 tdb_off_t hdr_ofs; /* this is 0 or header.mutex_size */ 214 struct tdb_mutexes *mutexes; /* mmap of the mutex area */ 215 198 216 enum TDB_ERROR ecode; /* error code for last tdb error */ 199 struct tdb_header header; /* a cached copy of the header */ 217 uint32_t hash_size; 218 uint32_t feature_flags; 200 219 uint32_t flags; /* the flags passed to tdb_open */ 201 220 struct tdb_traverse_lock travlocks; /* current traversal locks */ … … 221 240 */ 222 241 int tdb_munmap(struct tdb_context *tdb); 223 voidtdb_mmap(struct tdb_context *tdb);242 int tdb_mmap(struct tdb_context *tdb); 224 243 int tdb_lock(struct tdb_context *tdb, int list, int ltype); 225 244 int tdb_lock_nonblock(struct tdb_context *tdb, int list, int ltype); … … 253 272 void *tdb_convert(void *buf, uint32_t size); 254 273 int tdb_free(struct tdb_context *tdb, tdb_off_t offset, struct tdb_record *rec); 255 tdb_off_t tdb_allocate(struct tdb_context *tdb, tdb_len_t length, struct tdb_record *rec); 274 tdb_off_t tdb_allocate(struct tdb_context *tdb, int hash, tdb_len_t length, 275 struct tdb_record *rec); 256 276 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); 257 277 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d); … … 270 290 tdb_off_t tdb_find_lock_hash(struct tdb_context *tdb, TDB_DATA key, uint32_t hash, int locktype, 271 291 struct tdb_record *rec); 292 tdb_off_t tdb_find_dead(struct tdb_context *tdb, uint32_t hash, 293 struct tdb_record *r, tdb_len_t length, 294 tdb_off_t *p_last_ptr); 295 int tdb_purge_dead(struct tdb_context *tdb, uint32_t hash); 272 296 void tdb_io_init(struct tdb_context *tdb); 273 297 int tdb_expand(struct tdb_context *tdb, tdb_off_t size); 298 tdb_off_t tdb_expand_adjust(tdb_off_t map_size, tdb_off_t size, int page_size); 274 299 int tdb_rec_free_read(struct tdb_context *tdb, tdb_off_t off, 275 300 struct tdb_record *rec); … … 280 305 unsigned int tdb_old_hash(TDB_DATA *key); 281 306 size_t tdb_dead_space(struct tdb_context *tdb, tdb_off_t off); 307 bool tdb_add_off_t(tdb_off_t a, tdb_off_t b, tdb_off_t *pret); 308 309 /* tdb_off_t and tdb_len_t right now are both uint32_t */ 310 #define tdb_add_len_t tdb_add_off_t 311 312 size_t tdb_mutex_size(struct tdb_context *tdb); 313 bool tdb_have_mutexes(struct tdb_context *tdb); 314 int tdb_mutex_init(struct tdb_context *tdb); 315 int tdb_mutex_mmap(struct tdb_context *tdb); 316 int tdb_mutex_munmap(struct tdb_context *tdb); 317 bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len, 318 bool waitflag, int *pret); 319 bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len, 320 int *pret); 321 int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype, 322 enum tdb_lock_flags flags); 323 int tdb_mutex_allrecord_unlock(struct tdb_context *tdb); 324 int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb); 325 void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb); 326 327 #endif /* TDB_PRIVATE_H */ -
vendor/current/lib/tdb/common/transaction.c
r986 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 … … 83 83 84 84 - if TDB_NOSYNC is passed to flags in tdb_open then transactions are 85 still available, but no transaction recovery area is used and no 86 fsync/msync calls are made. 85 still available, but no fsync/msync calls are made. This means we 86 are still proof against a process dying during transaction commit, 87 but not against machine reboot. 87 88 88 89 - if TDB_ALLOW_NESTING is passed to flags in tdb open, or added using … … 148 149 of transaction elements, then if not do a real read 149 150 */ 150 static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf, 151 static int transaction_read(struct tdb_context *tdb, tdb_off_t off, void *buf, 151 152 tdb_len_t len, int cv) 152 153 { … … 195 196 196 197 fail: 197 TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=% d len=%d\n", off, len));198 TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_read: failed at off=%u len=%u\n", off, len)); 198 199 tdb->ecode = TDB_ERR_IO; 199 200 tdb->transaction->transaction_error = 1; … … 205 206 write while in a transaction 206 207 */ 207 static int transaction_write(struct tdb_context *tdb, tdb_off_t off, 208 static int transaction_write(struct tdb_context *tdb, tdb_off_t off, 208 209 const void *buf, tdb_len_t len) 209 210 { … … 249 250 uint8_t **new_blocks; 250 251 /* expand the blocks array */ 251 if (tdb->transaction->blocks == NULL) { 252 new_blocks = (uint8_t **)malloc( 253 (blk+1)*sizeof(uint8_t *)); 254 } else { 255 new_blocks = (uint8_t **)realloc( 256 tdb->transaction->blocks, 257 (blk+1)*sizeof(uint8_t *)); 258 } 252 new_blocks = (uint8_t **)realloc(tdb->transaction->blocks, 253 (blk+1)*sizeof(uint8_t *)); 259 254 if (new_blocks == NULL) { 260 255 tdb->ecode = TDB_ERR_OOM; 261 256 goto fail; 262 257 } 263 memset(&new_blocks[tdb->transaction->num_blocks], 0, 258 memset(&new_blocks[tdb->transaction->num_blocks], 0, 264 259 (1+(blk - tdb->transaction->num_blocks))*sizeof(uint8_t *)); 265 260 tdb->transaction->blocks = new_blocks; … … 274 269 tdb->ecode = TDB_ERR_OOM; 275 270 tdb->transaction->transaction_error = 1; 276 return -1; 271 return -1; 277 272 } 278 273 if (tdb->transaction->old_map_size > blk * tdb->transaction->block_size) { … … 281 276 len2 = tdb->transaction->old_map_size - (blk * tdb->transaction->block_size); 282 277 } 283 if (tdb->transaction->io_methods->tdb_read(tdb, blk * tdb->transaction->block_size, 284 tdb->transaction->blocks[blk], 278 if (tdb->transaction->io_methods->tdb_read(tdb, blk * tdb->transaction->block_size, 279 tdb->transaction->blocks[blk], 285 280 len2, 0) != 0) { 286 SAFE_FREE(tdb->transaction->blocks[blk]); 281 SAFE_FREE(tdb->transaction->blocks[blk]); 287 282 tdb->ecode = TDB_ERR_IO; 288 283 goto fail; … … 290 285 if (blk == tdb->transaction->num_blocks-1) { 291 286 tdb->transaction->last_block_size = len2; 292 } 287 } 293 288 } 294 289 } … … 309 304 310 305 fail: 311 TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=% d len=%d\n",306 TDB_LOG((tdb, TDB_DEBUG_FATAL, "transaction_write: failed at off=%u len=%u\n", 312 307 (blk*tdb->transaction->block_size) + off, len)); 313 308 tdb->transaction->transaction_error = 1; … … 317 312 318 313 /* 319 write while in a transaction - this vari ent never expands the transaction blocks, it only314 write while in a transaction - this variant never expands the transaction blocks, it only 320 315 updates existing blocks. This means it cannot change the recovery size 321 316 */ 322 static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off, 317 static int transaction_write_existing(struct tdb_context *tdb, tdb_off_t off, 323 318 const void *buf, tdb_len_t len) 324 319 { … … 371 366 { 372 367 uint32_t h = *chain; 373 for (;h < tdb->h eader.hash_size;h++) {368 for (;h < tdb->hash_size;h++) { 374 369 /* the +1 takes account of the freelist */ 375 370 if (0 != tdb->transaction->hash_heads[h+1]) { … … 383 378 out of bounds check during a transaction 384 379 */ 385 static int transaction_oob(struct tdb_context *tdb, tdb_off_t len, int probe) 386 { 387 if (len <= tdb->map_size) { 380 static int transaction_oob(struct tdb_context *tdb, tdb_off_t off, 381 tdb_len_t len, int probe) 382 { 383 if (off + len >= off && off + len <= tdb->map_size) { 388 384 return 0; 389 385 } … … 395 391 transaction version of tdb_expand(). 396 392 */ 397 static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size, 393 static int transaction_expand_file(struct tdb_context *tdb, tdb_off_t size, 398 394 tdb_off_t addition) 399 395 { … … 426 422 { 427 423 /* some sanity checks */ 428 if (tdb->read_only || (tdb->flags & TDB_INTERNAL) || tdb->traverse_read) { 424 if (tdb->read_only || (tdb->flags & TDB_INTERNAL) 425 || tdb->traverse_read) { 429 426 TDB_LOG((tdb, TDB_DEBUG_ERROR, "tdb_transaction_start: cannot start a transaction on a read-only or internal db\n")); 430 427 tdb->ecode = TDB_ERR_EINVAL; … … 439 436 } 440 437 tdb->transaction->nesting++; 441 TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n", 438 TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_start: nesting %d\n", 442 439 tdb->transaction->nesting)); 443 440 return 0; … … 494 491 traverse can be fast */ 495 492 tdb->transaction->hash_heads = (uint32_t *) 496 calloc(tdb->h eader.hash_size+1, sizeof(uint32_t));493 calloc(tdb->hash_size+1, sizeof(uint32_t)); 497 494 if (tdb->transaction->hash_heads == NULL) { 498 495 tdb->ecode = TDB_ERR_OOM; … … 508 505 /* make sure we know about any file expansions already done by 509 506 anyone else */ 510 tdb->methods->tdb_oob(tdb, tdb->map_size +1, 1);507 tdb->methods->tdb_oob(tdb, tdb->map_size, 1, 1); 511 508 tdb->transaction->old_map_size = tdb->map_size; 512 509 … … 544 541 */ 545 542 static int transaction_sync(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t length) 546 { 543 { 547 544 if (tdb->flags & TDB_NOSYNC) { 548 545 return 0; … … 561 558 if (tdb->map_ptr) { 562 559 tdb_off_t moffset = offset & ~(tdb->page_size-1); 563 if (msync(moffset + (char *)tdb->map_ptr, 560 if (msync(moffset + (char *)tdb->map_ptr, 564 561 length + (offset - moffset), MS_SYNC) != 0) { 565 562 tdb->ecode = TDB_ERR_IO; … … 575 572 576 573 static int _tdb_transaction_cancel(struct tdb_context *tdb) 577 { 574 { 578 575 int i, ret = 0; 579 576 … … 587 584 tdb->transaction->nesting--; 588 585 return 0; 589 } 586 } 590 587 591 588 tdb->map_size = tdb->transaction->old_map_size; … … 635 632 work out how much space the linearised recovery data will consume 636 633 */ 637 static tdb_len_t tdb_recovery_size(struct tdb_context *tdb)634 static bool tdb_recovery_size(struct tdb_context *tdb, tdb_len_t *result) 638 635 { 639 636 tdb_len_t recovery_size = 0; … … 642 639 recovery_size = sizeof(uint32_t); 643 640 for (i=0;i<tdb->transaction->num_blocks;i++) { 641 tdb_len_t block_size; 644 642 if (i * tdb->transaction->block_size >= tdb->transaction->old_map_size) { 645 643 break; … … 648 646 continue; 649 647 } 650 recovery_size += 2*sizeof(tdb_off_t); 648 if (!tdb_add_len_t(recovery_size, 2*sizeof(tdb_off_t), 649 &recovery_size)) { 650 return false; 651 } 651 652 if (i == tdb->transaction->num_blocks-1) { 652 recovery_size += tdb->transaction->last_block_size;653 block_size = tdb->transaction->last_block_size; 653 654 } else { 654 recovery_size += tdb->transaction->block_size; 655 } 656 } 657 658 return recovery_size; 655 block_size = tdb->transaction->block_size; 656 } 657 if (!tdb_add_len_t(recovery_size, block_size, 658 &recovery_size)) { 659 return false; 660 } 661 } 662 663 *result = recovery_size; 664 return true; 659 665 } 660 666 … … 691 697 large enough 692 698 */ 693 static int tdb_recovery_allocate(struct tdb_context *tdb, 699 static int tdb_recovery_allocate(struct tdb_context *tdb, 694 700 tdb_len_t *recovery_size, 695 701 tdb_off_t *recovery_offset, … … 698 704 struct tdb_record rec; 699 705 const struct tdb_methods *methods = tdb->transaction->io_methods; 700 tdb_off_t recovery_head ;706 tdb_off_t recovery_head, new_end; 701 707 702 708 if (tdb_recovery_area(tdb, methods, &recovery_head, &rec) == -1) { … … 705 711 } 706 712 707 *recovery_size = tdb_recovery_size(tdb); 708 713 if (!tdb_recovery_size(tdb, recovery_size)) { 714 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: " 715 "overflow recovery size\n")); 716 return -1; 717 } 718 719 /* Existing recovery area? */ 709 720 if (recovery_head != 0 && *recovery_size <= rec.rec_len) { 710 721 /* it fits in the existing area */ … … 714 725 } 715 726 716 /* we need to free up the old recovery area, then allocate a 717 new one at the end of the file. Note that we cannot use 718 tdb_allocate() to allocate the new one as that might return 719 us an area that is being currently used (as of the start of 720 the transaction) */ 721 if (recovery_head != 0) { 722 if (tdb_free(tdb, recovery_head, &rec) == -1) { 723 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to free previous recovery area\n")); 724 return -1; 725 } 726 } 727 728 /* the tdb_free() call might have increased the recovery size */ 729 *recovery_size = tdb_recovery_size(tdb); 730 731 /* round up to a multiple of page size */ 732 *recovery_max_size = TDB_ALIGN(sizeof(rec) + *recovery_size, tdb->page_size) - sizeof(rec); 733 *recovery_offset = tdb->map_size; 734 recovery_head = *recovery_offset; 735 736 if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, 737 (tdb->map_size - tdb->transaction->old_map_size) + 738 sizeof(rec) + *recovery_max_size) == -1) { 727 /* If recovery area in middle of file, we need a new one. */ 728 if (recovery_head == 0 729 || recovery_head + sizeof(rec) + rec.rec_len != tdb->map_size) { 730 /* we need to free up the old recovery area, then allocate a 731 new one at the end of the file. Note that we cannot use 732 tdb_allocate() to allocate the new one as that might return 733 us an area that is being currently used (as of the start of 734 the transaction) */ 735 if (recovery_head) { 736 if (tdb_free(tdb, recovery_head, &rec) == -1) { 737 TDB_LOG((tdb, TDB_DEBUG_FATAL, 738 "tdb_recovery_allocate: failed to" 739 " free previous recovery area\n")); 740 return -1; 741 } 742 743 /* the tdb_free() call might have increased 744 * the recovery size */ 745 if (!tdb_recovery_size(tdb, recovery_size)) { 746 TDB_LOG((tdb, TDB_DEBUG_FATAL, 747 "tdb_recovery_allocate: " 748 "overflow recovery size\n")); 749 return -1; 750 } 751 } 752 753 /* New head will be at end of file. */ 754 recovery_head = tdb->map_size; 755 } 756 757 /* Now we know where it will be. */ 758 *recovery_offset = recovery_head; 759 760 /* Expand by more than we need, so we don't do it often. */ 761 *recovery_max_size = tdb_expand_adjust(tdb->map_size, 762 *recovery_size, 763 tdb->page_size) 764 - sizeof(rec); 765 766 if (!tdb_add_off_t(recovery_head, sizeof(rec), &new_end) || 767 !tdb_add_off_t(new_end, *recovery_max_size, &new_end)) { 768 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: " 769 "overflow recovery area\n")); 770 return -1; 771 } 772 773 if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, 774 new_end - tdb->transaction->old_map_size) 775 == -1) { 739 776 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to create recovery area\n")); 740 777 return -1; … … 742 779 743 780 /* remap the file (if using mmap) */ 744 methods->tdb_oob(tdb, tdb->map_size +1, 1);781 methods->tdb_oob(tdb, tdb->map_size, 1, 1); 745 782 746 783 /* we have to reset the old map size so that we don't try to expand the file … … 751 788 as the magic ptr in the recovery record has not been set */ 752 789 CONVERT(recovery_head); 753 if (methods->tdb_write(tdb, TDB_RECOVERY_HEAD, 790 if (methods->tdb_write(tdb, TDB_RECOVERY_HEAD, 754 791 &recovery_head, sizeof(tdb_off_t)) == -1) { 755 792 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_recovery_allocate: failed to write recovery head\n")); … … 768 805 setup the recovery data that will be used on a crash during commit 769 806 */ 770 static int transaction_setup_recovery(struct tdb_context *tdb, 807 static int transaction_setup_recovery(struct tdb_context *tdb, 771 808 tdb_off_t *magic_offset) 772 809 { … … 783 820 check that the recovery area has enough space 784 821 */ 785 if (tdb_recovery_allocate(tdb, &recovery_size, 822 if (tdb_recovery_allocate(tdb, &recovery_size, 786 823 &recovery_offset, &recovery_max_size) == -1) { 787 824 return -1; … … 901 938 902 939 static int _tdb_transaction_prepare_commit(struct tdb_context *tdb) 903 { 940 { 904 941 const struct tdb_methods *methods; 905 942 … … 926 963 if (tdb->transaction->nesting != 0) { 927 964 return 0; 928 } 965 } 929 966 930 967 /* check for a null transaction */ … … 959 996 } 960 997 961 if (!(tdb->flags & TDB_NOSYNC)) { 962 /* write the recovery data to the end of the file */ 963 if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) { 964 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: failed to setup recovery data\n")); 965 _tdb_transaction_cancel(tdb); 966 return -1; 967 } 998 /* write the recovery data to the end of the file */ 999 if (transaction_setup_recovery(tdb, &tdb->transaction->magic_offset) == -1) { 1000 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_prepare_commit: failed to setup recovery data\n")); 1001 _tdb_transaction_cancel(tdb); 1002 return -1; 968 1003 } 969 1004 … … 972 1007 /* expand the file to the new size if needed */ 973 1008 if (tdb->map_size != tdb->transaction->old_map_size) { 974 if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, 975 tdb->map_size - 1009 if (methods->tdb_expand_file(tdb, tdb->transaction->old_map_size, 1010 tdb->map_size - 976 1011 tdb->transaction->old_map_size) == -1) { 977 1012 tdb->ecode = TDB_ERR_IO; … … 981 1016 } 982 1017 tdb->map_size = tdb->transaction->old_map_size; 983 methods->tdb_oob(tdb, tdb->map_size +1, 1);1018 methods->tdb_oob(tdb, tdb->map_size, 1, 1); 984 1019 } 985 1020 … … 1085 1120 run the crash recovery code */ 1086 1121 tdb->methods = methods; 1087 tdb_transaction_recover(tdb); 1122 tdb_transaction_recover(tdb); 1088 1123 1089 1124 _tdb_transaction_cancel(tdb); … … 1093 1128 } 1094 1129 SAFE_FREE(tdb->transaction->blocks[i]); 1095 } 1130 } 1096 1131 1097 1132 /* Do this before we drop lock or blocks. */ … … 1160 1195 1161 1196 /* read the recovery record */ 1162 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, 1197 if (tdb->methods->tdb_read(tdb, recovery_head, &rec, 1163 1198 sizeof(rec), DOCONV()) == -1) { 1164 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery record\n")); 1199 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery record\n")); 1165 1200 tdb->ecode = TDB_ERR_IO; 1166 1201 return -1; … … 1182 1217 data = (unsigned char *)malloc(rec.data_len); 1183 1218 if (data == NULL) { 1184 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to allocate recovery data\n")); 1219 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to allocate recovery data\n")); 1185 1220 tdb->ecode = TDB_ERR_OOM; 1186 1221 return -1; … … 1190 1225 if (tdb->methods->tdb_read(tdb, recovery_head + sizeof(rec), data, 1191 1226 rec.data_len, 0) == -1) { 1192 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery data\n")); 1227 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to read recovery data\n")); 1193 1228 tdb->ecode = TDB_ERR_IO; 1194 1229 return -1; … … 1207 1242 if (tdb->methods->tdb_write(tdb, ofs, p+8, len) == -1) { 1208 1243 free(data); 1209 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to recover % d bytes at offset %d\n", len, ofs));1244 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to recover %u bytes at offset %u\n", len, ofs)); 1210 1245 tdb->ecode = TDB_ERR_IO; 1211 1246 return -1; … … 1227 1262 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery head\n")); 1228 1263 tdb->ecode = TDB_ERR_IO; 1229 return -1; 1264 return -1; 1230 1265 } 1231 1266 } … … 1236 1271 TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_transaction_recover: failed to remove recovery magic\n")); 1237 1272 tdb->ecode = TDB_ERR_IO; 1238 return -1; 1273 return -1; 1239 1274 } 1240 1275 … … 1245 1280 } 1246 1281 1247 TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_recover: recovered % d byte database\n",1282 TDB_LOG((tdb, TDB_DEBUG_TRACE, "tdb_transaction_recover: recovered %u byte database\n", 1248 1283 recovery_eof)); 1249 1284 -
vendor/current/lib/tdb/common/traverse.c
r986 r988 1 /* 1 /* 2 2 Unix SMB/CIFS implementation. 3 3 … … 38 38 39 39 /* Lock each chain from the start one. */ 40 for (; tlock->hash < tdb->h eader.hash_size; tlock->hash++) {40 for (; tlock->hash < tdb->hash_size; tlock->hash++) { 41 41 if (!tlock->off && tlock->hash != 0) { 42 42 /* this is an optimisation for the common case where … … 69 69 */ 70 70 tdb->methods->next_hash_chain(tdb, &tlock->hash); 71 if (tlock->hash == tdb->h eader.hash_size) {71 if (tlock->hash == tdb->hash_size) { 72 72 continue; 73 73 } … … 118 118 current = tlock->off; 119 119 tlock->off = rec->next; 120 if (!(tdb->read_only || tdb->traverse_read) && 120 if (!(tdb->read_only || tdb->traverse_read) && 121 121 tdb_do_delete(tdb, current, rec) != 0) 122 122 goto fail; … … 141 141 a non-zero return value from fn() indicates that the traversal should stop 142 142 */ 143 static int tdb_traverse_internal(struct tdb_context *tdb, 143 static int tdb_traverse_internal(struct tdb_context *tdb, 144 144 tdb_traverse_func fn, void *private_data, 145 145 struct tdb_traverse_lock *tl) … … 150 150 tdb_off_t off; 151 151 152 /* This was in the initializat on, above, but the IRIX compiler152 /* This was in the initialization, above, but the IRIX compiler 153 153 * did not like it. crh 154 154 */ … … 166 166 count++; 167 167 /* now read the full record */ 168 key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec), 168 key.dptr = tdb_alloc_read(tdb, tl->off + sizeof(rec), 169 169 rec.key_len + rec.data_len); 170 170 if (!key.dptr) { … … 211 211 212 212 /* 213 a writestyle traverse - temporarily marks the db read only213 a read style traverse - temporarily marks the db read only 214 214 */ 215 _PUBLIC_ int tdb_traverse_read(struct tdb_context *tdb, 215 _PUBLIC_ int tdb_traverse_read(struct tdb_context *tdb, 216 216 tdb_traverse_func fn, void *private_data) 217 217 { … … 240 240 241 241 WARNING: The data buffer given to the callback fn does NOT meet the 242 alignment restrictions malloc gives you.242 alignment guarantees malloc gives you. 243 243 */ 244 _PUBLIC_ int tdb_traverse(struct tdb_context *tdb, 244 _PUBLIC_ int tdb_traverse(struct tdb_context *tdb, 245 245 tdb_traverse_func fn, void *private_data) 246 246 { 247 247 struct tdb_traverse_lock tl = { NULL, 0, 0, F_WRLCK }; 248 enum tdb_lock_flags lock_flags; 248 249 int ret; 249 250 … … 252 253 } 253 254 254 if (tdb_transaction_lock(tdb, F_WRLCK, TDB_LOCK_WAIT)) { 255 lock_flags = TDB_LOCK_WAIT; 256 257 if (tdb->allrecord_lock.count != 0) { 258 /* 259 * This avoids a deadlock between tdb_lockall() and 260 * tdb_traverse(). See 261 * https://bugzilla.samba.org/show_bug.cgi?id=11381 262 */ 263 lock_flags = TDB_LOCK_NOWAIT; 264 } 265 266 if (tdb_transaction_lock(tdb, F_WRLCK, lock_flags)) { 255 267 return -1; 256 268 }
Note:
See TracChangeset
for help on using the changeset viewer.