1 | /*
|
---|
2 | Unix SMB/CIFS implementation.
|
---|
3 |
|
---|
4 | trivial database library
|
---|
5 |
|
---|
6 | Copyright (C) Volker Lendecke 2012,2013
|
---|
7 | Copyright (C) Stefan Metzmacher 2013,2014
|
---|
8 | Copyright (C) Michael Adam 2014
|
---|
9 |
|
---|
10 | ** NOTE! The following LGPL license applies to the tdb
|
---|
11 | ** library. This does NOT imply that all of Samba is released
|
---|
12 | ** under the LGPL
|
---|
13 |
|
---|
14 | This library is free software; you can redistribute it and/or
|
---|
15 | modify it under the terms of the GNU Lesser General Public
|
---|
16 | License as published by the Free Software Foundation; either
|
---|
17 | version 3 of the License, or (at your option) any later version.
|
---|
18 |
|
---|
19 | This library is distributed in the hope that it will be useful,
|
---|
20 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
22 | Lesser General Public License for more details.
|
---|
23 |
|
---|
24 | You should have received a copy of the GNU Lesser General Public
|
---|
25 | License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
---|
26 | */
|
---|
27 | #include "tdb_private.h"
|
---|
28 | #include "system/threads.h"
|
---|
29 |
|
---|
30 | #ifdef USE_TDB_MUTEX_LOCKING
|
---|
31 |
|
---|
32 | /*
|
---|
33 | * If we run with mutexes, we store the "struct tdb_mutexes" at the
|
---|
34 | * beginning of the file. We store an additional tdb_header right
|
---|
35 | * beyond the mutex area, page aligned. All the offsets within the tdb
|
---|
36 | * are relative to the area behind the mutex area. tdb->map_ptr points
|
---|
37 | * behind the mmap area as well, so the read and write path in the
|
---|
38 | * mutex case can remain unchanged.
|
---|
39 | *
|
---|
40 | * Early in the mutex development the mutexes were placed between the hash
|
---|
41 | * chain pointers and the real tdb data. This had two drawbacks: First, it
|
---|
42 | * made pointer calculations more complex. Second, we had to mmap the mutex
|
---|
43 | * area twice. One was the normal map_ptr in the tdb. This frequently changed
|
---|
44 | * from within tdb_oob. At least the Linux glibc robust mutex code assumes
|
---|
45 | * constant pointers in memory, so a constantly changing mmap area destroys
|
---|
46 | * the mutex list. So we had to mmap the first bytes of the file with a second
|
---|
47 | * mmap call. With that scheme, very weird errors happened that could be
|
---|
48 | * easily fixed by doing the mutex mmap in a second file. It seemed that
|
---|
49 | * mapping the same memory area twice does not end up in accessing the same
|
---|
50 | * physical page, looking at the mutexes in gdb it seemed that old data showed
|
---|
51 | * up after some re-mapping. To avoid a separate mutex file, the code now puts
|
---|
52 | * the real content of the tdb file after the mutex area. This way we do not
|
---|
53 | * have overlapping mmap areas, the mutex area is mmapped once and not
|
---|
54 | * changed, the tdb data area's mmap is constantly changed but does not
|
---|
55 | * overlap.
|
---|
56 | */
|
---|
57 |
|
---|
58 | struct tdb_mutexes {
|
---|
59 | struct tdb_header hdr;
|
---|
60 |
|
---|
61 | /* protect allrecord_lock */
|
---|
62 | pthread_mutex_t allrecord_mutex;
|
---|
63 |
|
---|
64 | /*
|
---|
65 | * F_UNLCK: free,
|
---|
66 | * F_RDLCK: shared,
|
---|
67 | * F_WRLCK: exclusive
|
---|
68 | */
|
---|
69 | short int allrecord_lock;
|
---|
70 |
|
---|
71 | /*
|
---|
72 | * Index 0 is the freelist mutex, followed by
|
---|
73 | * one mutex per hashchain.
|
---|
74 | */
|
---|
75 | pthread_mutex_t hashchains[1];
|
---|
76 | };
|
---|
77 |
|
---|
78 | bool tdb_have_mutexes(struct tdb_context *tdb)
|
---|
79 | {
|
---|
80 | return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
|
---|
81 | }
|
---|
82 |
|
---|
83 | size_t tdb_mutex_size(struct tdb_context *tdb)
|
---|
84 | {
|
---|
85 | size_t mutex_size;
|
---|
86 |
|
---|
87 | if (!tdb_have_mutexes(tdb)) {
|
---|
88 | return 0;
|
---|
89 | }
|
---|
90 |
|
---|
91 | mutex_size = sizeof(struct tdb_mutexes);
|
---|
92 | mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
|
---|
93 |
|
---|
94 | return TDB_ALIGN(mutex_size, tdb->page_size);
|
---|
95 | }
|
---|
96 |
|
---|
97 | /*
|
---|
98 | * Get the index for a chain mutex
|
---|
99 | */
|
---|
100 | static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
|
---|
101 | unsigned *idx)
|
---|
102 | {
|
---|
103 | /*
|
---|
104 | * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
|
---|
105 | * the 4 bytes of the freelist start and the hash chain that is about
|
---|
106 | * to be locked. See lock_offset() where the freelist is -1 vs the
|
---|
107 | * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
|
---|
108 | * the tdb file itself as data, we need to adjust the offset here.
|
---|
109 | */
|
---|
110 | const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
|
---|
111 |
|
---|
112 | if (!tdb_have_mutexes(tdb)) {
|
---|
113 | return false;
|
---|
114 | }
|
---|
115 | if (len != 1) {
|
---|
116 | /* Possibly the allrecord lock */
|
---|
117 | return false;
|
---|
118 | }
|
---|
119 | if (off < freelist_lock_ofs) {
|
---|
120 | /* One of the special locks */
|
---|
121 | return false;
|
---|
122 | }
|
---|
123 | if (tdb->hash_size == 0) {
|
---|
124 | /* tdb not initialized yet, called from tdb_open_ex() */
|
---|
125 | return false;
|
---|
126 | }
|
---|
127 | if (off >= TDB_DATA_START(tdb->hash_size)) {
|
---|
128 | /* Single record lock from traverses */
|
---|
129 | return false;
|
---|
130 | }
|
---|
131 |
|
---|
132 | /*
|
---|
133 | * Now we know it's a freelist or hash chain lock. Those are always 4
|
---|
134 | * byte aligned. Paranoia check.
|
---|
135 | */
|
---|
136 | if ((off % sizeof(tdb_off_t)) != 0) {
|
---|
137 | abort();
|
---|
138 | }
|
---|
139 |
|
---|
140 | /*
|
---|
141 | * Re-index the fcntl offset into an offset into the mutex array
|
---|
142 | */
|
---|
143 | off -= freelist_lock_ofs; /* rebase to index 0 */
|
---|
144 | off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
|
---|
145 |
|
---|
146 | *idx = off;
|
---|
147 | return true;
|
---|
148 | }
|
---|
149 |
|
---|
150 | static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
|
---|
151 | {
|
---|
152 | size_t i;
|
---|
153 |
|
---|
154 | for (i=0; i < tdb->num_lockrecs; i++) {
|
---|
155 | bool ret;
|
---|
156 | unsigned idx;
|
---|
157 |
|
---|
158 | ret = tdb_mutex_index(tdb,
|
---|
159 | tdb->lockrecs[i].off,
|
---|
160 | tdb->lockrecs[i].count,
|
---|
161 | &idx);
|
---|
162 | if (!ret) {
|
---|
163 | continue;
|
---|
164 | }
|
---|
165 |
|
---|
166 | if (idx == 0) {
|
---|
167 | /* this is the freelist mutex */
|
---|
168 | continue;
|
---|
169 | }
|
---|
170 |
|
---|
171 | return true;
|
---|
172 | }
|
---|
173 |
|
---|
174 | return false;
|
---|
175 | }
|
---|
176 |
|
---|
177 | static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
|
---|
178 | {
|
---|
179 | int ret;
|
---|
180 |
|
---|
181 | if (waitflag) {
|
---|
182 | ret = pthread_mutex_lock(m);
|
---|
183 | } else {
|
---|
184 | ret = pthread_mutex_trylock(m);
|
---|
185 | }
|
---|
186 | if (ret != EOWNERDEAD) {
|
---|
187 | return ret;
|
---|
188 | }
|
---|
189 |
|
---|
190 | /*
|
---|
191 | * For chainlocks, we don't do any cleanup (yet?)
|
---|
192 | */
|
---|
193 | return pthread_mutex_consistent(m);
|
---|
194 | }
|
---|
195 |
|
---|
196 | static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
|
---|
197 | {
|
---|
198 | int ret;
|
---|
199 |
|
---|
200 | if (waitflag) {
|
---|
201 | ret = pthread_mutex_lock(&m->allrecord_mutex);
|
---|
202 | } else {
|
---|
203 | ret = pthread_mutex_trylock(&m->allrecord_mutex);
|
---|
204 | }
|
---|
205 | if (ret != EOWNERDEAD) {
|
---|
206 | return ret;
|
---|
207 | }
|
---|
208 |
|
---|
209 | /*
|
---|
210 | * The allrecord lock holder died. We need to reset the allrecord_lock
|
---|
211 | * to F_UNLCK. This should also be the indication for
|
---|
212 | * tdb_needs_recovery.
|
---|
213 | */
|
---|
214 | m->allrecord_lock = F_UNLCK;
|
---|
215 |
|
---|
216 | return pthread_mutex_consistent(&m->allrecord_mutex);
|
---|
217 | }
|
---|
218 |
|
---|
219 | bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
|
---|
220 | bool waitflag, int *pret)
|
---|
221 | {
|
---|
222 | struct tdb_mutexes *m = tdb->mutexes;
|
---|
223 | pthread_mutex_t *chain;
|
---|
224 | int ret;
|
---|
225 | unsigned idx;
|
---|
226 | bool allrecord_ok;
|
---|
227 |
|
---|
228 | if (!tdb_mutex_index(tdb, off, len, &idx)) {
|
---|
229 | return false;
|
---|
230 | }
|
---|
231 | chain = &m->hashchains[idx];
|
---|
232 |
|
---|
233 | again:
|
---|
234 | ret = chain_mutex_lock(chain, waitflag);
|
---|
235 | if (ret == EBUSY) {
|
---|
236 | ret = EAGAIN;
|
---|
237 | }
|
---|
238 | if (ret != 0) {
|
---|
239 | errno = ret;
|
---|
240 | goto fail;
|
---|
241 | }
|
---|
242 |
|
---|
243 | if (idx == 0) {
|
---|
244 | /*
|
---|
245 | * This is a freelist lock, which is independent to
|
---|
246 | * the allrecord lock. So we're done once we got the
|
---|
247 | * freelist mutex.
|
---|
248 | */
|
---|
249 | *pret = 0;
|
---|
250 | return true;
|
---|
251 | }
|
---|
252 |
|
---|
253 | if (tdb_have_mutex_chainlocks(tdb)) {
|
---|
254 | /*
|
---|
255 | * We can only check the allrecord lock once. If we do it with
|
---|
256 | * one chain mutex locked, we will deadlock with the allrecord
|
---|
257 | * locker process in the following way: We lock the first hash
|
---|
258 | * chain, we check for the allrecord lock. We keep the hash
|
---|
259 | * chain locked. Then the allrecord locker locks the
|
---|
260 | * allrecord_mutex. It walks the list of chain mutexes,
|
---|
261 | * locking them all in sequence. Meanwhile, we have the chain
|
---|
262 | * mutex locked, so the allrecord locker blocks trying to lock
|
---|
263 | * our chain mutex. Then we come in and try to lock the second
|
---|
264 | * chain lock, which in most cases will be the freelist. We
|
---|
265 | * see that the allrecord lock is locked and put ourselves on
|
---|
266 | * the allrecord_mutex. This will never be signalled though
|
---|
267 | * because the allrecord locker waits for us to give up the
|
---|
268 | * chain lock.
|
---|
269 | */
|
---|
270 |
|
---|
271 | *pret = 0;
|
---|
272 | return true;
|
---|
273 | }
|
---|
274 |
|
---|
275 | /*
|
---|
276 | * Check if someone is has the allrecord lock: queue if so.
|
---|
277 | */
|
---|
278 |
|
---|
279 | allrecord_ok = false;
|
---|
280 |
|
---|
281 | if (m->allrecord_lock == F_UNLCK) {
|
---|
282 | /*
|
---|
283 | * allrecord lock not taken
|
---|
284 | */
|
---|
285 | allrecord_ok = true;
|
---|
286 | }
|
---|
287 |
|
---|
288 | if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
|
---|
289 | /*
|
---|
290 | * allrecord shared lock taken, but we only want to read
|
---|
291 | */
|
---|
292 | allrecord_ok = true;
|
---|
293 | }
|
---|
294 |
|
---|
295 | if (allrecord_ok) {
|
---|
296 | *pret = 0;
|
---|
297 | return true;
|
---|
298 | }
|
---|
299 |
|
---|
300 | ret = pthread_mutex_unlock(chain);
|
---|
301 | if (ret != 0) {
|
---|
302 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
---|
303 | "(chain_mutex) failed: %s\n", strerror(ret)));
|
---|
304 | errno = ret;
|
---|
305 | goto fail;
|
---|
306 | }
|
---|
307 | ret = allrecord_mutex_lock(m, waitflag);
|
---|
308 | if (ret == EBUSY) {
|
---|
309 | ret = EAGAIN;
|
---|
310 | }
|
---|
311 | if (ret != 0) {
|
---|
312 | if (waitflag || (ret != EAGAIN)) {
|
---|
313 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
|
---|
314 | "(allrecord_mutex) failed: %s\n",
|
---|
315 | waitflag ? "" : "try_", strerror(ret)));
|
---|
316 | }
|
---|
317 | errno = ret;
|
---|
318 | goto fail;
|
---|
319 | }
|
---|
320 | ret = pthread_mutex_unlock(&m->allrecord_mutex);
|
---|
321 | if (ret != 0) {
|
---|
322 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
---|
323 | "(allrecord_mutex) failed: %s\n", strerror(ret)));
|
---|
324 | errno = ret;
|
---|
325 | goto fail;
|
---|
326 | }
|
---|
327 | goto again;
|
---|
328 |
|
---|
329 | fail:
|
---|
330 | *pret = -1;
|
---|
331 | return true;
|
---|
332 | }
|
---|
333 |
|
---|
334 | bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
|
---|
335 | int *pret)
|
---|
336 | {
|
---|
337 | struct tdb_mutexes *m = tdb->mutexes;
|
---|
338 | pthread_mutex_t *chain;
|
---|
339 | int ret;
|
---|
340 | unsigned idx;
|
---|
341 |
|
---|
342 | if (!tdb_mutex_index(tdb, off, len, &idx)) {
|
---|
343 | return false;
|
---|
344 | }
|
---|
345 | chain = &m->hashchains[idx];
|
---|
346 |
|
---|
347 | ret = pthread_mutex_unlock(chain);
|
---|
348 | if (ret == 0) {
|
---|
349 | *pret = 0;
|
---|
350 | return true;
|
---|
351 | }
|
---|
352 | errno = ret;
|
---|
353 | *pret = -1;
|
---|
354 | return true;
|
---|
355 | }
|
---|
356 |
|
---|
357 | int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
|
---|
358 | enum tdb_lock_flags flags)
|
---|
359 | {
|
---|
360 | struct tdb_mutexes *m = tdb->mutexes;
|
---|
361 | int ret;
|
---|
362 | uint32_t i;
|
---|
363 | bool waitflag = (flags & TDB_LOCK_WAIT);
|
---|
364 | int saved_errno;
|
---|
365 |
|
---|
366 | if (tdb->flags & TDB_NOLOCK) {
|
---|
367 | return 0;
|
---|
368 | }
|
---|
369 |
|
---|
370 | if (flags & TDB_LOCK_MARK_ONLY) {
|
---|
371 | return 0;
|
---|
372 | }
|
---|
373 |
|
---|
374 | ret = allrecord_mutex_lock(m, waitflag);
|
---|
375 | if (!waitflag && (ret == EBUSY)) {
|
---|
376 | errno = EAGAIN;
|
---|
377 | tdb->ecode = TDB_ERR_LOCK;
|
---|
378 | return -1;
|
---|
379 | }
|
---|
380 | if (ret != 0) {
|
---|
381 | if (!(flags & TDB_LOCK_PROBE)) {
|
---|
382 | TDB_LOG((tdb, TDB_DEBUG_TRACE,
|
---|
383 | "allrecord_mutex_lock() failed: %s\n",
|
---|
384 | strerror(ret)));
|
---|
385 | }
|
---|
386 | tdb->ecode = TDB_ERR_LOCK;
|
---|
387 | return -1;
|
---|
388 | }
|
---|
389 |
|
---|
390 | if (m->allrecord_lock != F_UNLCK) {
|
---|
391 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
|
---|
392 | (int)m->allrecord_lock));
|
---|
393 | goto fail_unlock_allrecord_mutex;
|
---|
394 | }
|
---|
395 | m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
|
---|
396 |
|
---|
397 | for (i=0; i<tdb->hash_size; i++) {
|
---|
398 |
|
---|
399 | /* ignore hashchains[0], the freelist */
|
---|
400 | pthread_mutex_t *chain = &m->hashchains[i+1];
|
---|
401 |
|
---|
402 | ret = chain_mutex_lock(chain, waitflag);
|
---|
403 | if (!waitflag && (ret == EBUSY)) {
|
---|
404 | errno = EAGAIN;
|
---|
405 | goto fail_unroll_allrecord_lock;
|
---|
406 | }
|
---|
407 | if (ret != 0) {
|
---|
408 | if (!(flags & TDB_LOCK_PROBE)) {
|
---|
409 | TDB_LOG((tdb, TDB_DEBUG_TRACE,
|
---|
410 | "chain_mutex_lock() failed: %s\n",
|
---|
411 | strerror(ret)));
|
---|
412 | }
|
---|
413 | errno = ret;
|
---|
414 | goto fail_unroll_allrecord_lock;
|
---|
415 | }
|
---|
416 |
|
---|
417 | ret = pthread_mutex_unlock(chain);
|
---|
418 | if (ret != 0) {
|
---|
419 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
---|
420 | "(chainlock) failed: %s\n", strerror(ret)));
|
---|
421 | errno = ret;
|
---|
422 | goto fail_unroll_allrecord_lock;
|
---|
423 | }
|
---|
424 | }
|
---|
425 | /*
|
---|
426 | * We leave this routine with m->allrecord_mutex locked
|
---|
427 | */
|
---|
428 | return 0;
|
---|
429 |
|
---|
430 | fail_unroll_allrecord_lock:
|
---|
431 | m->allrecord_lock = F_UNLCK;
|
---|
432 |
|
---|
433 | fail_unlock_allrecord_mutex:
|
---|
434 | saved_errno = errno;
|
---|
435 | ret = pthread_mutex_unlock(&m->allrecord_mutex);
|
---|
436 | if (ret != 0) {
|
---|
437 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
---|
438 | "(allrecord_mutex) failed: %s\n", strerror(ret)));
|
---|
439 | }
|
---|
440 | errno = saved_errno;
|
---|
441 | tdb->ecode = TDB_ERR_LOCK;
|
---|
442 | return -1;
|
---|
443 | }
|
---|
444 |
|
---|
445 | int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
|
---|
446 | {
|
---|
447 | struct tdb_mutexes *m = tdb->mutexes;
|
---|
448 | int ret;
|
---|
449 | uint32_t i;
|
---|
450 |
|
---|
451 | if (tdb->flags & TDB_NOLOCK) {
|
---|
452 | return 0;
|
---|
453 | }
|
---|
454 |
|
---|
455 | /*
|
---|
456 | * Our only caller tdb_allrecord_upgrade()
|
---|
457 | * garantees that we already own the allrecord lock.
|
---|
458 | *
|
---|
459 | * Which means m->allrecord_mutex is still locked by us.
|
---|
460 | */
|
---|
461 |
|
---|
462 | if (m->allrecord_lock != F_RDLCK) {
|
---|
463 | tdb->ecode = TDB_ERR_LOCK;
|
---|
464 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
|
---|
465 | (int)m->allrecord_lock));
|
---|
466 | return -1;
|
---|
467 | }
|
---|
468 |
|
---|
469 | m->allrecord_lock = F_WRLCK;
|
---|
470 |
|
---|
471 | for (i=0; i<tdb->hash_size; i++) {
|
---|
472 |
|
---|
473 | /* ignore hashchains[0], the freelist */
|
---|
474 | pthread_mutex_t *chain = &m->hashchains[i+1];
|
---|
475 |
|
---|
476 | ret = chain_mutex_lock(chain, true);
|
---|
477 | if (ret != 0) {
|
---|
478 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
|
---|
479 | "(chainlock) failed: %s\n", strerror(ret)));
|
---|
480 | goto fail_unroll_allrecord_lock;
|
---|
481 | }
|
---|
482 |
|
---|
483 | ret = pthread_mutex_unlock(chain);
|
---|
484 | if (ret != 0) {
|
---|
485 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
---|
486 | "(chainlock) failed: %s\n", strerror(ret)));
|
---|
487 | goto fail_unroll_allrecord_lock;
|
---|
488 | }
|
---|
489 | }
|
---|
490 |
|
---|
491 | return 0;
|
---|
492 |
|
---|
493 | fail_unroll_allrecord_lock:
|
---|
494 | m->allrecord_lock = F_RDLCK;
|
---|
495 | tdb->ecode = TDB_ERR_LOCK;
|
---|
496 | return -1;
|
---|
497 | }
|
---|
498 |
|
---|
499 | void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
|
---|
500 | {
|
---|
501 | struct tdb_mutexes *m = tdb->mutexes;
|
---|
502 |
|
---|
503 | /*
|
---|
504 | * Our only caller tdb_allrecord_upgrade() (in the error case)
|
---|
505 | * garantees that we already own the allrecord lock.
|
---|
506 | *
|
---|
507 | * Which means m->allrecord_mutex is still locked by us.
|
---|
508 | */
|
---|
509 |
|
---|
510 | if (m->allrecord_lock != F_WRLCK) {
|
---|
511 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
|
---|
512 | (int)m->allrecord_lock));
|
---|
513 | return;
|
---|
514 | }
|
---|
515 |
|
---|
516 | m->allrecord_lock = F_RDLCK;
|
---|
517 | return;
|
---|
518 | }
|
---|
519 |
|
---|
520 |
|
---|
521 | int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
|
---|
522 | {
|
---|
523 | struct tdb_mutexes *m = tdb->mutexes;
|
---|
524 | short old;
|
---|
525 | int ret;
|
---|
526 |
|
---|
527 | if (tdb->flags & TDB_NOLOCK) {
|
---|
528 | return 0;
|
---|
529 | }
|
---|
530 |
|
---|
531 | /*
|
---|
532 | * Our only callers tdb_allrecord_unlock() and
|
---|
533 | * tdb_allrecord_lock() (in the error path)
|
---|
534 | * garantee that we already own the allrecord lock.
|
---|
535 | *
|
---|
536 | * Which means m->allrecord_mutex is still locked by us.
|
---|
537 | */
|
---|
538 |
|
---|
539 | if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
|
---|
540 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
|
---|
541 | (int)m->allrecord_lock));
|
---|
542 | return -1;
|
---|
543 | }
|
---|
544 |
|
---|
545 | old = m->allrecord_lock;
|
---|
546 | m->allrecord_lock = F_UNLCK;
|
---|
547 |
|
---|
548 | ret = pthread_mutex_unlock(&m->allrecord_mutex);
|
---|
549 | if (ret != 0) {
|
---|
550 | m->allrecord_lock = old;
|
---|
551 | TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
|
---|
552 | "(allrecord_mutex) failed: %s\n", strerror(ret)));
|
---|
553 | return -1;
|
---|
554 | }
|
---|
555 | return 0;
|
---|
556 | }
|
---|
557 |
|
---|
558 | int tdb_mutex_init(struct tdb_context *tdb)
|
---|
559 | {
|
---|
560 | struct tdb_mutexes *m;
|
---|
561 | pthread_mutexattr_t ma;
|
---|
562 | int i, ret;
|
---|
563 |
|
---|
564 | ret = tdb_mutex_mmap(tdb);
|
---|
565 | if (ret == -1) {
|
---|
566 | return -1;
|
---|
567 | }
|
---|
568 | m = tdb->mutexes;
|
---|
569 |
|
---|
570 | ret = pthread_mutexattr_init(&ma);
|
---|
571 | if (ret != 0) {
|
---|
572 | goto fail_munmap;
|
---|
573 | }
|
---|
574 | ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
|
---|
575 | if (ret != 0) {
|
---|
576 | goto fail;
|
---|
577 | }
|
---|
578 | ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
|
---|
579 | if (ret != 0) {
|
---|
580 | goto fail;
|
---|
581 | }
|
---|
582 | ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
---|
583 | if (ret != 0) {
|
---|
584 | goto fail;
|
---|
585 | }
|
---|
586 |
|
---|
587 | for (i=0; i<tdb->hash_size+1; i++) {
|
---|
588 | pthread_mutex_t *chain = &m->hashchains[i];
|
---|
589 |
|
---|
590 | ret = pthread_mutex_init(chain, &ma);
|
---|
591 | if (ret != 0) {
|
---|
592 | goto fail;
|
---|
593 | }
|
---|
594 | }
|
---|
595 |
|
---|
596 | m->allrecord_lock = F_UNLCK;
|
---|
597 |
|
---|
598 | ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
|
---|
599 | if (ret != 0) {
|
---|
600 | goto fail;
|
---|
601 | }
|
---|
602 | ret = 0;
|
---|
603 | fail:
|
---|
604 | pthread_mutexattr_destroy(&ma);
|
---|
605 | fail_munmap:
|
---|
606 | tdb_mutex_munmap(tdb);
|
---|
607 |
|
---|
608 | if (ret == 0) {
|
---|
609 | return 0;
|
---|
610 | }
|
---|
611 |
|
---|
612 | errno = ret;
|
---|
613 | return -1;
|
---|
614 | }
|
---|
615 |
|
---|
616 | int tdb_mutex_mmap(struct tdb_context *tdb)
|
---|
617 | {
|
---|
618 | size_t len;
|
---|
619 | void *ptr;
|
---|
620 |
|
---|
621 | len = tdb_mutex_size(tdb);
|
---|
622 | if (len == 0) {
|
---|
623 | return 0;
|
---|
624 | }
|
---|
625 |
|
---|
626 | ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
|
---|
627 | tdb->fd, 0);
|
---|
628 | if (ptr == MAP_FAILED) {
|
---|
629 | return -1;
|
---|
630 | }
|
---|
631 | tdb->mutexes = (struct tdb_mutexes *)ptr;
|
---|
632 |
|
---|
633 | return 0;
|
---|
634 | }
|
---|
635 |
|
---|
636 | int tdb_mutex_munmap(struct tdb_context *tdb)
|
---|
637 | {
|
---|
638 | size_t len;
|
---|
639 |
|
---|
640 | len = tdb_mutex_size(tdb);
|
---|
641 | if (len == 0) {
|
---|
642 | return 0;
|
---|
643 | }
|
---|
644 |
|
---|
645 | return munmap(tdb->mutexes, len);
|
---|
646 | }
|
---|
647 |
|
---|
648 | static bool tdb_mutex_locking_cached;
|
---|
649 |
|
---|
650 | static bool tdb_mutex_locking_supported(void)
|
---|
651 | {
|
---|
652 | pthread_mutexattr_t ma;
|
---|
653 | pthread_mutex_t m;
|
---|
654 | int ret;
|
---|
655 | static bool initialized;
|
---|
656 |
|
---|
657 | if (initialized) {
|
---|
658 | return tdb_mutex_locking_cached;
|
---|
659 | }
|
---|
660 |
|
---|
661 | initialized = true;
|
---|
662 |
|
---|
663 | ret = pthread_mutexattr_init(&ma);
|
---|
664 | if (ret != 0) {
|
---|
665 | return false;
|
---|
666 | }
|
---|
667 | ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
|
---|
668 | if (ret != 0) {
|
---|
669 | goto cleanup_ma;
|
---|
670 | }
|
---|
671 | ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
|
---|
672 | if (ret != 0) {
|
---|
673 | goto cleanup_ma;
|
---|
674 | }
|
---|
675 | ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
---|
676 | if (ret != 0) {
|
---|
677 | goto cleanup_ma;
|
---|
678 | }
|
---|
679 | ret = pthread_mutex_init(&m, &ma);
|
---|
680 | if (ret != 0) {
|
---|
681 | goto cleanup_ma;
|
---|
682 | }
|
---|
683 | ret = pthread_mutex_lock(&m);
|
---|
684 | if (ret != 0) {
|
---|
685 | goto cleanup_m;
|
---|
686 | }
|
---|
687 | /*
|
---|
688 | * This makes sure we have real mutexes
|
---|
689 | * from a threading library instead of just
|
---|
690 | * stubs from libc.
|
---|
691 | */
|
---|
692 | ret = pthread_mutex_lock(&m);
|
---|
693 | if (ret != EDEADLK) {
|
---|
694 | goto cleanup_lock;
|
---|
695 | }
|
---|
696 | ret = pthread_mutex_unlock(&m);
|
---|
697 | if (ret != 0) {
|
---|
698 | goto cleanup_m;
|
---|
699 | }
|
---|
700 |
|
---|
701 | tdb_mutex_locking_cached = true;
|
---|
702 | goto cleanup_m;
|
---|
703 |
|
---|
704 | cleanup_lock:
|
---|
705 | pthread_mutex_unlock(&m);
|
---|
706 | cleanup_m:
|
---|
707 | pthread_mutex_destroy(&m);
|
---|
708 | cleanup_ma:
|
---|
709 | pthread_mutexattr_destroy(&ma);
|
---|
710 | return tdb_mutex_locking_cached;
|
---|
711 | }
|
---|
712 |
|
---|
713 | static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
|
---|
714 | static pid_t tdb_robust_mutex_pid = -1;
|
---|
715 |
|
---|
716 | static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
|
---|
717 | void (**p_old_handler)(int))
|
---|
718 | {
|
---|
719 | #ifdef HAVE_SIGACTION
|
---|
720 | struct sigaction act;
|
---|
721 | struct sigaction oldact;
|
---|
722 |
|
---|
723 | memset(&act, '\0', sizeof(act));
|
---|
724 |
|
---|
725 | act.sa_handler = handler;
|
---|
726 | #ifdef SA_RESTART
|
---|
727 | act.sa_flags = SA_RESTART;
|
---|
728 | #endif
|
---|
729 | sigemptyset(&act.sa_mask);
|
---|
730 | sigaddset(&act.sa_mask, SIGCHLD);
|
---|
731 | sigaction(SIGCHLD, &act, &oldact);
|
---|
732 | if (p_old_handler) {
|
---|
733 | *p_old_handler = oldact.sa_handler;
|
---|
734 | }
|
---|
735 | return true;
|
---|
736 | #else /* !HAVE_SIGACTION */
|
---|
737 | return false;
|
---|
738 | #endif
|
---|
739 | }
|
---|
740 |
|
---|
741 | static void tdb_robust_mutex_handler(int sig)
|
---|
742 | {
|
---|
743 | if (tdb_robust_mutex_pid != -1) {
|
---|
744 | pid_t pid;
|
---|
745 | int status;
|
---|
746 |
|
---|
747 | pid = waitpid(tdb_robust_mutex_pid, &status, WNOHANG);
|
---|
748 | if (pid == tdb_robust_mutex_pid) {
|
---|
749 | tdb_robust_mutex_pid = -1;
|
---|
750 | return;
|
---|
751 | }
|
---|
752 | }
|
---|
753 |
|
---|
754 | if (tdb_robust_mutext_old_handler == SIG_DFL) {
|
---|
755 | return;
|
---|
756 | }
|
---|
757 | if (tdb_robust_mutext_old_handler == SIG_IGN) {
|
---|
758 | return;
|
---|
759 | }
|
---|
760 | if (tdb_robust_mutext_old_handler == SIG_ERR) {
|
---|
761 | return;
|
---|
762 | }
|
---|
763 |
|
---|
764 | tdb_robust_mutext_old_handler(sig);
|
---|
765 | }
|
---|
766 |
|
---|
767 | _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
|
---|
768 | {
|
---|
769 | void *ptr;
|
---|
770 | pthread_mutex_t *m;
|
---|
771 | pthread_mutexattr_t ma;
|
---|
772 | int ret = 1;
|
---|
773 | int pipe_down[2] = { -1, -1 };
|
---|
774 | int pipe_up[2] = { -1, -1 };
|
---|
775 | ssize_t nread;
|
---|
776 | char c = 0;
|
---|
777 | bool ok;
|
---|
778 | int status;
|
---|
779 | static bool initialized;
|
---|
780 |
|
---|
781 | if (initialized) {
|
---|
782 | return tdb_mutex_locking_cached;
|
---|
783 | }
|
---|
784 |
|
---|
785 | initialized = true;
|
---|
786 |
|
---|
787 | ok = tdb_mutex_locking_supported();
|
---|
788 | if (!ok) {
|
---|
789 | return false;
|
---|
790 | }
|
---|
791 |
|
---|
792 | tdb_mutex_locking_cached = false;
|
---|
793 |
|
---|
794 | ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
|
---|
795 | MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
|
---|
796 | if (ptr == MAP_FAILED) {
|
---|
797 | return false;
|
---|
798 | }
|
---|
799 | m = (pthread_mutex_t *)ptr;
|
---|
800 |
|
---|
801 | ret = pipe(pipe_down);
|
---|
802 | if (ret != 0) {
|
---|
803 | goto cleanup_mmap;
|
---|
804 | }
|
---|
805 | ret = pipe(pipe_up);
|
---|
806 | if (ret != 0) {
|
---|
807 | goto cleanup_pipe;
|
---|
808 | }
|
---|
809 |
|
---|
810 | ret = pthread_mutexattr_init(&ma);
|
---|
811 | if (ret != 0) {
|
---|
812 | goto cleanup_pipe;
|
---|
813 | }
|
---|
814 | ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
|
---|
815 | if (ret != 0) {
|
---|
816 | goto cleanup_ma;
|
---|
817 | }
|
---|
818 | ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
|
---|
819 | if (ret != 0) {
|
---|
820 | goto cleanup_ma;
|
---|
821 | }
|
---|
822 | ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
|
---|
823 | if (ret != 0) {
|
---|
824 | goto cleanup_ma;
|
---|
825 | }
|
---|
826 | ret = pthread_mutex_init(m, &ma);
|
---|
827 | if (ret != 0) {
|
---|
828 | goto cleanup_ma;
|
---|
829 | }
|
---|
830 |
|
---|
831 | if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
|
---|
832 | &tdb_robust_mutext_old_handler) == false) {
|
---|
833 | goto cleanup_ma;
|
---|
834 | }
|
---|
835 |
|
---|
836 | tdb_robust_mutex_pid = fork();
|
---|
837 | if (tdb_robust_mutex_pid == 0) {
|
---|
838 | size_t nwritten;
|
---|
839 | close(pipe_down[1]);
|
---|
840 | close(pipe_up[0]);
|
---|
841 | ret = pthread_mutex_lock(m);
|
---|
842 | nwritten = write(pipe_up[1], &ret, sizeof(ret));
|
---|
843 | if (nwritten != sizeof(ret)) {
|
---|
844 | _exit(1);
|
---|
845 | }
|
---|
846 | if (ret != 0) {
|
---|
847 | _exit(1);
|
---|
848 | }
|
---|
849 | nread = read(pipe_down[0], &c, 1);
|
---|
850 | if (nread != 1) {
|
---|
851 | _exit(1);
|
---|
852 | }
|
---|
853 | /* leave locked */
|
---|
854 | _exit(0);
|
---|
855 | }
|
---|
856 | if (tdb_robust_mutex_pid == -1) {
|
---|
857 | goto cleanup_sig_child;
|
---|
858 | }
|
---|
859 | close(pipe_down[0]);
|
---|
860 | pipe_down[0] = -1;
|
---|
861 | close(pipe_up[1]);
|
---|
862 | pipe_up[1] = -1;
|
---|
863 |
|
---|
864 | nread = read(pipe_up[0], &ret, sizeof(ret));
|
---|
865 | if (nread != sizeof(ret)) {
|
---|
866 | goto cleanup_child;
|
---|
867 | }
|
---|
868 |
|
---|
869 | ret = pthread_mutex_trylock(m);
|
---|
870 | if (ret != EBUSY) {
|
---|
871 | if (ret == 0) {
|
---|
872 | pthread_mutex_unlock(m);
|
---|
873 | }
|
---|
874 | goto cleanup_child;
|
---|
875 | }
|
---|
876 |
|
---|
877 | if (write(pipe_down[1], &c, 1) != 1) {
|
---|
878 | goto cleanup_child;
|
---|
879 | }
|
---|
880 |
|
---|
881 | nread = read(pipe_up[0], &c, 1);
|
---|
882 | if (nread != 0) {
|
---|
883 | goto cleanup_child;
|
---|
884 | }
|
---|
885 |
|
---|
886 | while (tdb_robust_mutex_pid > 0) {
|
---|
887 | pid_t pid;
|
---|
888 |
|
---|
889 | errno = 0;
|
---|
890 | pid = waitpid(tdb_robust_mutex_pid, &status, 0);
|
---|
891 | if (pid == tdb_robust_mutex_pid) {
|
---|
892 | tdb_robust_mutex_pid = -1;
|
---|
893 | break;
|
---|
894 | }
|
---|
895 | if (pid == -1 && errno != EINTR) {
|
---|
896 | goto cleanup_child;
|
---|
897 | }
|
---|
898 | }
|
---|
899 | tdb_robust_mutex_setup_sigchild(tdb_robust_mutext_old_handler, NULL);
|
---|
900 |
|
---|
901 | ret = pthread_mutex_trylock(m);
|
---|
902 | if (ret != EOWNERDEAD) {
|
---|
903 | if (ret == 0) {
|
---|
904 | pthread_mutex_unlock(m);
|
---|
905 | }
|
---|
906 | goto cleanup_m;
|
---|
907 | }
|
---|
908 |
|
---|
909 | ret = pthread_mutex_consistent(m);
|
---|
910 | if (ret != 0) {
|
---|
911 | goto cleanup_m;
|
---|
912 | }
|
---|
913 |
|
---|
914 | ret = pthread_mutex_trylock(m);
|
---|
915 | if (ret != EDEADLK) {
|
---|
916 | pthread_mutex_unlock(m);
|
---|
917 | goto cleanup_m;
|
---|
918 | }
|
---|
919 |
|
---|
920 | ret = pthread_mutex_unlock(m);
|
---|
921 | if (ret != 0) {
|
---|
922 | goto cleanup_m;
|
---|
923 | }
|
---|
924 |
|
---|
925 | tdb_mutex_locking_cached = true;
|
---|
926 | goto cleanup_m;
|
---|
927 |
|
---|
928 | cleanup_child:
|
---|
929 | while (tdb_robust_mutex_pid > 0) {
|
---|
930 | pid_t pid;
|
---|
931 |
|
---|
932 | kill(tdb_robust_mutex_pid, SIGKILL);
|
---|
933 |
|
---|
934 | errno = 0;
|
---|
935 | pid = waitpid(tdb_robust_mutex_pid, &status, 0);
|
---|
936 | if (pid == tdb_robust_mutex_pid) {
|
---|
937 | tdb_robust_mutex_pid = -1;
|
---|
938 | break;
|
---|
939 | }
|
---|
940 | if (pid == -1 && errno != EINTR) {
|
---|
941 | break;
|
---|
942 | }
|
---|
943 | }
|
---|
944 | cleanup_sig_child:
|
---|
945 | tdb_robust_mutex_setup_sigchild(tdb_robust_mutext_old_handler, NULL);
|
---|
946 | cleanup_m:
|
---|
947 | pthread_mutex_destroy(m);
|
---|
948 | cleanup_ma:
|
---|
949 | pthread_mutexattr_destroy(&ma);
|
---|
950 | cleanup_pipe:
|
---|
951 | if (pipe_down[0] != -1) {
|
---|
952 | close(pipe_down[0]);
|
---|
953 | }
|
---|
954 | if (pipe_down[1] != -1) {
|
---|
955 | close(pipe_down[1]);
|
---|
956 | }
|
---|
957 | if (pipe_up[0] != -1) {
|
---|
958 | close(pipe_up[0]);
|
---|
959 | }
|
---|
960 | if (pipe_up[1] != -1) {
|
---|
961 | close(pipe_up[1]);
|
---|
962 | }
|
---|
963 | cleanup_mmap:
|
---|
964 | munmap(ptr, sizeof(pthread_mutex_t));
|
---|
965 |
|
---|
966 | return tdb_mutex_locking_cached;
|
---|
967 | }
|
---|
968 |
|
---|
969 | #else
|
---|
970 |
|
---|
971 | size_t tdb_mutex_size(struct tdb_context *tdb)
|
---|
972 | {
|
---|
973 | return 0;
|
---|
974 | }
|
---|
975 |
|
---|
976 | bool tdb_have_mutexes(struct tdb_context *tdb)
|
---|
977 | {
|
---|
978 | return false;
|
---|
979 | }
|
---|
980 |
|
---|
981 | int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
|
---|
982 | enum tdb_lock_flags flags)
|
---|
983 | {
|
---|
984 | tdb->ecode = TDB_ERR_LOCK;
|
---|
985 | return -1;
|
---|
986 | }
|
---|
987 |
|
---|
988 | int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
|
---|
989 | {
|
---|
990 | return -1;
|
---|
991 | }
|
---|
992 |
|
---|
993 | int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
|
---|
994 | {
|
---|
995 | tdb->ecode = TDB_ERR_LOCK;
|
---|
996 | return -1;
|
---|
997 | }
|
---|
998 |
|
---|
999 | void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
|
---|
1000 | {
|
---|
1001 | return;
|
---|
1002 | }
|
---|
1003 |
|
---|
1004 | int tdb_mutex_mmap(struct tdb_context *tdb)
|
---|
1005 | {
|
---|
1006 | errno = ENOSYS;
|
---|
1007 | return -1;
|
---|
1008 | }
|
---|
1009 |
|
---|
1010 | int tdb_mutex_munmap(struct tdb_context *tdb)
|
---|
1011 | {
|
---|
1012 | errno = ENOSYS;
|
---|
1013 | return -1;
|
---|
1014 | }
|
---|
1015 |
|
---|
1016 | int tdb_mutex_init(struct tdb_context *tdb)
|
---|
1017 | {
|
---|
1018 | errno = ENOSYS;
|
---|
1019 | return -1;
|
---|
1020 | }
|
---|
1021 |
|
---|
1022 | _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
|
---|
1023 | {
|
---|
1024 | return false;
|
---|
1025 | }
|
---|
1026 |
|
---|
1027 | #endif
|
---|