| 1 | /*
|
|---|
| 2 | Unix SMB/CIFS implementation.
|
|---|
| 3 | global locks based on dbwrap and messaging
|
|---|
| 4 | Copyright (C) 2009 by Volker Lendecke
|
|---|
| 5 |
|
|---|
| 6 | This program is free software; you can redistribute it and/or modify
|
|---|
| 7 | it under the terms of the GNU General Public License as published by
|
|---|
| 8 | the Free Software Foundation; either version 3 of the License, or
|
|---|
| 9 | (at your option) any later version.
|
|---|
| 10 |
|
|---|
| 11 | This program is distributed in the hope that it will be useful,
|
|---|
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 14 | GNU General Public License for more details.
|
|---|
| 15 |
|
|---|
| 16 | You should have received a copy of the GNU General Public License
|
|---|
| 17 | along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|---|
| 18 | */
|
|---|
| 19 |
|
|---|
| 20 | #include "includes.h"
|
|---|
| 21 | #include "g_lock.h"
|
|---|
| 22 |
|
|---|
| 23 | static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 24 | struct server_id pid);
|
|---|
| 25 |
|
|---|
| 26 | struct g_lock_ctx {
|
|---|
| 27 | struct db_context *db;
|
|---|
| 28 | struct messaging_context *msg;
|
|---|
| 29 | };
|
|---|
| 30 |
|
|---|
| 31 | /*
|
|---|
| 32 | * The "g_lock.tdb" file contains records, indexed by the 0-terminated
|
|---|
| 33 | * lockname. The record contains an array of "struct g_lock_rec"
|
|---|
| 34 | * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
|
|---|
| 35 | */
|
|---|
| 36 |
|
|---|
| 37 | struct g_lock_rec {
|
|---|
| 38 | enum g_lock_type lock_type;
|
|---|
| 39 | struct server_id pid;
|
|---|
| 40 | };
|
|---|
| 41 |
|
|---|
| 42 | struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
|
|---|
| 43 | struct messaging_context *msg)
|
|---|
| 44 | {
|
|---|
| 45 | struct g_lock_ctx *result;
|
|---|
| 46 |
|
|---|
| 47 | result = talloc(mem_ctx, struct g_lock_ctx);
|
|---|
| 48 | if (result == NULL) {
|
|---|
| 49 | return NULL;
|
|---|
| 50 | }
|
|---|
| 51 | result->msg = msg;
|
|---|
| 52 |
|
|---|
| 53 | result->db = db_open(result, lock_path("g_lock.tdb"), 0,
|
|---|
| 54 | TDB_CLEAR_IF_FIRST, O_RDWR|O_CREAT, 0700);
|
|---|
| 55 | if (result->db == NULL) {
|
|---|
| 56 | DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
|
|---|
| 57 | TALLOC_FREE(result);
|
|---|
| 58 | return NULL;
|
|---|
| 59 | }
|
|---|
| 60 | return result;
|
|---|
| 61 | }
|
|---|
| 62 |
|
|---|
| 63 | static bool g_lock_conflicts(enum g_lock_type lock_type,
|
|---|
| 64 | const struct g_lock_rec *rec)
|
|---|
| 65 | {
|
|---|
| 66 | enum g_lock_type rec_lock = rec->lock_type;
|
|---|
| 67 |
|
|---|
| 68 | if ((rec_lock & G_LOCK_PENDING) != 0) {
|
|---|
| 69 | return false;
|
|---|
| 70 | }
|
|---|
| 71 |
|
|---|
| 72 | /*
|
|---|
| 73 | * Only tested write locks so far. Very likely this routine
|
|---|
| 74 | * needs to be fixed for read locks....
|
|---|
| 75 | */
|
|---|
| 76 | if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) {
|
|---|
| 77 | return false;
|
|---|
| 78 | }
|
|---|
| 79 | return true;
|
|---|
| 80 | }
|
|---|
| 81 |
|
|---|
| 82 | static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
|
|---|
| 83 | int *pnum_locks, struct g_lock_rec **plocks)
|
|---|
| 84 | {
|
|---|
| 85 | int i, num_locks;
|
|---|
| 86 | struct g_lock_rec *locks;
|
|---|
| 87 |
|
|---|
| 88 | if ((data.dsize % sizeof(struct g_lock_rec)) != 0) {
|
|---|
| 89 | DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize));
|
|---|
| 90 | return false;
|
|---|
| 91 | }
|
|---|
| 92 |
|
|---|
| 93 | num_locks = data.dsize / sizeof(struct g_lock_rec);
|
|---|
| 94 | locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks);
|
|---|
| 95 | if (locks == NULL) {
|
|---|
| 96 | DEBUG(1, ("talloc failed\n"));
|
|---|
| 97 | return false;
|
|---|
| 98 | }
|
|---|
| 99 |
|
|---|
| 100 | memcpy(locks, data.dptr, data.dsize);
|
|---|
| 101 |
|
|---|
| 102 | DEBUG(10, ("locks:\n"));
|
|---|
| 103 | for (i=0; i<num_locks; i++) {
|
|---|
| 104 | DEBUGADD(10, ("%s: %s %s\n",
|
|---|
| 105 | procid_str(talloc_tos(), &locks[i].pid),
|
|---|
| 106 | ((locks[i].lock_type & 1) == G_LOCK_READ) ?
|
|---|
| 107 | "read" : "write",
|
|---|
| 108 | (locks[i].lock_type & G_LOCK_PENDING) ?
|
|---|
| 109 | "(pending)" : "(owner)"));
|
|---|
| 110 |
|
|---|
| 111 | if (((locks[i].lock_type & G_LOCK_PENDING) == 0)
|
|---|
| 112 | && !process_exists(locks[i].pid)) {
|
|---|
| 113 |
|
|---|
| 114 | DEBUGADD(10, ("lock owner %s died -- discarding\n",
|
|---|
| 115 | procid_str(talloc_tos(),
|
|---|
| 116 | &locks[i].pid)));
|
|---|
| 117 |
|
|---|
| 118 | if (i < (num_locks-1)) {
|
|---|
| 119 | locks[i] = locks[num_locks-1];
|
|---|
| 120 | }
|
|---|
| 121 | num_locks -= 1;
|
|---|
| 122 | }
|
|---|
| 123 | }
|
|---|
| 124 |
|
|---|
| 125 | *plocks = locks;
|
|---|
| 126 | *pnum_locks = num_locks;
|
|---|
| 127 | return true;
|
|---|
| 128 | }
|
|---|
| 129 |
|
|---|
| 130 | static void g_lock_cleanup(int *pnum_locks, struct g_lock_rec *locks)
|
|---|
| 131 | {
|
|---|
| 132 | int i, num_locks;
|
|---|
| 133 |
|
|---|
| 134 | num_locks = *pnum_locks;
|
|---|
| 135 |
|
|---|
| 136 | DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks));
|
|---|
| 137 |
|
|---|
| 138 | for (i=0; i<num_locks; i++) {
|
|---|
| 139 | if (process_exists(locks[i].pid)) {
|
|---|
| 140 | continue;
|
|---|
| 141 | }
|
|---|
| 142 | DEBUGADD(10, ("%s does not exist -- discarding\n",
|
|---|
| 143 | procid_str(talloc_tos(), &locks[i].pid)));
|
|---|
| 144 |
|
|---|
| 145 | if (i < (num_locks-1)) {
|
|---|
| 146 | locks[i] = locks[num_locks-1];
|
|---|
| 147 | }
|
|---|
| 148 | num_locks -= 1;
|
|---|
| 149 | }
|
|---|
| 150 | *pnum_locks = num_locks;
|
|---|
| 151 | return;
|
|---|
| 152 | }
|
|---|
| 153 |
|
|---|
| 154 | static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
|
|---|
| 155 | struct g_lock_rec *locks,
|
|---|
| 156 | int *pnum_locks,
|
|---|
| 157 | const struct server_id pid,
|
|---|
| 158 | enum g_lock_type lock_type)
|
|---|
| 159 | {
|
|---|
| 160 | struct g_lock_rec *result;
|
|---|
| 161 | int num_locks = *pnum_locks;
|
|---|
| 162 |
|
|---|
| 163 | result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
|
|---|
| 164 | num_locks+1);
|
|---|
| 165 | if (result == NULL) {
|
|---|
| 166 | return NULL;
|
|---|
| 167 | }
|
|---|
| 168 |
|
|---|
| 169 | result[num_locks].pid = pid;
|
|---|
| 170 | result[num_locks].lock_type = lock_type;
|
|---|
| 171 | *pnum_locks += 1;
|
|---|
| 172 | return result;
|
|---|
| 173 | }
|
|---|
| 174 |
|
|---|
| 175 | static void g_lock_got_retry(struct messaging_context *msg,
|
|---|
| 176 | void *private_data,
|
|---|
| 177 | uint32_t msg_type,
|
|---|
| 178 | struct server_id server_id,
|
|---|
| 179 | DATA_BLOB *data);
|
|---|
| 180 |
|
|---|
| 181 | static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 182 | enum g_lock_type lock_type)
|
|---|
| 183 | {
|
|---|
| 184 | struct db_record *rec = NULL;
|
|---|
| 185 | struct g_lock_rec *locks = NULL;
|
|---|
| 186 | int i, num_locks;
|
|---|
| 187 | struct server_id self;
|
|---|
| 188 | int our_index;
|
|---|
| 189 | TDB_DATA data;
|
|---|
| 190 | NTSTATUS status = NT_STATUS_OK;
|
|---|
| 191 | NTSTATUS store_status;
|
|---|
| 192 |
|
|---|
| 193 | again:
|
|---|
| 194 | rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
|
|---|
| 195 | string_term_tdb_data(name));
|
|---|
| 196 | if (rec == NULL) {
|
|---|
| 197 | DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
|
|---|
| 198 | status = NT_STATUS_LOCK_NOT_GRANTED;
|
|---|
| 199 | goto done;
|
|---|
| 200 | }
|
|---|
| 201 |
|
|---|
| 202 | if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
|
|---|
| 203 | DEBUG(10, ("g_lock_parse for %s failed\n", name));
|
|---|
| 204 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 205 | goto done;
|
|---|
| 206 | }
|
|---|
| 207 |
|
|---|
| 208 | self = procid_self();
|
|---|
| 209 | our_index = -1;
|
|---|
| 210 |
|
|---|
| 211 | for (i=0; i<num_locks; i++) {
|
|---|
| 212 | if (procid_equal(&self, &locks[i].pid)) {
|
|---|
| 213 | if (our_index != -1) {
|
|---|
| 214 | DEBUG(1, ("g_lock_trylock: Added ourself "
|
|---|
| 215 | "twice!\n"));
|
|---|
| 216 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 217 | goto done;
|
|---|
| 218 | }
|
|---|
| 219 | if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
|
|---|
| 220 | DEBUG(1, ("g_lock_trylock: Found ourself not "
|
|---|
| 221 | "pending!\n"));
|
|---|
| 222 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 223 | goto done;
|
|---|
| 224 | }
|
|---|
| 225 |
|
|---|
| 226 | our_index = i;
|
|---|
| 227 |
|
|---|
| 228 | /* never conflict with ourself */
|
|---|
| 229 | continue;
|
|---|
| 230 | }
|
|---|
| 231 | if (g_lock_conflicts(lock_type, &locks[i])) {
|
|---|
| 232 | struct server_id pid = locks[i].pid;
|
|---|
| 233 |
|
|---|
| 234 | if (!process_exists(pid)) {
|
|---|
| 235 | TALLOC_FREE(locks);
|
|---|
| 236 | TALLOC_FREE(rec);
|
|---|
| 237 | status = g_lock_force_unlock(ctx, name, pid);
|
|---|
| 238 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 239 | DEBUG(1, ("Could not unlock dead lock "
|
|---|
| 240 | "holder!\n"));
|
|---|
| 241 | goto done;
|
|---|
| 242 | }
|
|---|
| 243 | goto again;
|
|---|
| 244 | }
|
|---|
| 245 | lock_type |= G_LOCK_PENDING;
|
|---|
| 246 | }
|
|---|
| 247 | }
|
|---|
| 248 |
|
|---|
| 249 | if (our_index == -1) {
|
|---|
| 250 | /* First round, add ourself */
|
|---|
| 251 |
|
|---|
| 252 | locks = g_lock_addrec(talloc_tos(), locks, &num_locks,
|
|---|
| 253 | self, lock_type);
|
|---|
| 254 | if (locks == NULL) {
|
|---|
| 255 | DEBUG(10, ("g_lock_addrec failed\n"));
|
|---|
| 256 | status = NT_STATUS_NO_MEMORY;
|
|---|
| 257 | goto done;
|
|---|
| 258 | }
|
|---|
| 259 | } else {
|
|---|
| 260 | /*
|
|---|
| 261 | * Retry. We were pending last time. Overwrite the
|
|---|
| 262 | * stored lock_type with what we calculated, we might
|
|---|
| 263 | * have acquired the lock this time.
|
|---|
| 264 | */
|
|---|
| 265 | locks[our_index].lock_type = lock_type;
|
|---|
| 266 | }
|
|---|
| 267 |
|
|---|
| 268 | if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) {
|
|---|
| 269 | /*
|
|---|
| 270 | * Walk through the list of locks, search for dead entries
|
|---|
| 271 | */
|
|---|
| 272 | g_lock_cleanup(&num_locks, locks);
|
|---|
| 273 | }
|
|---|
| 274 |
|
|---|
| 275 | data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks));
|
|---|
| 276 | store_status = rec->store(rec, data, 0);
|
|---|
| 277 | if (!NT_STATUS_IS_OK(store_status)) {
|
|---|
| 278 | DEBUG(1, ("rec->store failed: %s\n",
|
|---|
| 279 | nt_errstr(store_status)));
|
|---|
| 280 | status = store_status;
|
|---|
| 281 | }
|
|---|
| 282 |
|
|---|
| 283 | done:
|
|---|
| 284 | TALLOC_FREE(locks);
|
|---|
| 285 | TALLOC_FREE(rec);
|
|---|
| 286 |
|
|---|
| 287 | if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) {
|
|---|
| 288 | return STATUS_PENDING;
|
|---|
| 289 | }
|
|---|
| 290 |
|
|---|
| 291 | return NT_STATUS_OK;
|
|---|
| 292 | }
|
|---|
| 293 |
|
|---|
| 294 | NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 295 | enum g_lock_type lock_type, struct timeval timeout)
|
|---|
| 296 | {
|
|---|
| 297 | struct tevent_timer *te = NULL;
|
|---|
| 298 | NTSTATUS status;
|
|---|
| 299 | bool retry = false;
|
|---|
| 300 | struct timeval timeout_end;
|
|---|
| 301 | struct timeval time_now;
|
|---|
| 302 |
|
|---|
| 303 | DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
|
|---|
| 304 | name));
|
|---|
| 305 |
|
|---|
| 306 | if (lock_type & ~1) {
|
|---|
| 307 | DEBUG(1, ("Got invalid lock type %d for %s\n",
|
|---|
| 308 | (int)lock_type, name));
|
|---|
| 309 | return NT_STATUS_INVALID_PARAMETER;
|
|---|
| 310 | }
|
|---|
| 311 |
|
|---|
| 312 | #ifdef CLUSTER_SUPPORT
|
|---|
| 313 | if (lp_clustering()) {
|
|---|
| 314 | status = ctdb_watch_us(messaging_ctdbd_connection());
|
|---|
| 315 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 316 | DEBUG(10, ("could not register retry with ctdb: %s\n",
|
|---|
| 317 | nt_errstr(status)));
|
|---|
| 318 | goto done;
|
|---|
| 319 | }
|
|---|
| 320 | }
|
|---|
| 321 | #endif
|
|---|
| 322 |
|
|---|
| 323 | status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY,
|
|---|
| 324 | g_lock_got_retry);
|
|---|
| 325 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 326 | DEBUG(10, ("messaging_register failed: %s\n",
|
|---|
| 327 | nt_errstr(status)));
|
|---|
| 328 | return status;
|
|---|
| 329 | }
|
|---|
| 330 |
|
|---|
| 331 | time_now = timeval_current();
|
|---|
| 332 | timeout_end = timeval_sum(&time_now, &timeout);
|
|---|
| 333 |
|
|---|
| 334 | while (true) {
|
|---|
| 335 | #ifdef CLUSTER_SUPPORT
|
|---|
| 336 | fd_set _r_fds;
|
|---|
| 337 | #endif
|
|---|
| 338 | fd_set *r_fds = NULL;
|
|---|
| 339 | int max_fd = 0;
|
|---|
| 340 | int ret;
|
|---|
| 341 | struct timeval timeout_remaining, select_timeout;
|
|---|
| 342 |
|
|---|
| 343 | status = g_lock_trylock(ctx, name, lock_type);
|
|---|
| 344 | if (NT_STATUS_IS_OK(status)) {
|
|---|
| 345 | DEBUG(10, ("Got lock %s\n", name));
|
|---|
| 346 | break;
|
|---|
| 347 | }
|
|---|
| 348 | if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) {
|
|---|
| 349 | DEBUG(10, ("g_lock_trylock failed: %s\n",
|
|---|
| 350 | nt_errstr(status)));
|
|---|
| 351 | break;
|
|---|
| 352 | }
|
|---|
| 353 |
|
|---|
| 354 | DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
|
|---|
| 355 |
|
|---|
| 356 | /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|---|
| 357 | * !!! HACK ALERT --- FIX ME !!!
|
|---|
| 358 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|---|
| 359 | * What we really want to do here is to react to
|
|---|
| 360 | * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent
|
|---|
| 361 | * by a client doing g_lock_unlock or by ourselves when
|
|---|
| 362 | * we receive a CTDB_SRVID_SAMBA_NOTIFY or
|
|---|
| 363 | * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when
|
|---|
| 364 | * either a client holding a lock or a complete node
|
|---|
| 365 | * has died.
|
|---|
| 366 | *
|
|---|
| 367 | * Doing this properly involves calling tevent_loop_once(),
|
|---|
| 368 | * but doing this here with the main ctdbd messaging context
|
|---|
| 369 | * creates a nested event loop when g_lock_lock() is called
|
|---|
| 370 | * from the main event loop, e.g. in a tcon_and_X where the
|
|---|
| 371 | * share_info.tdb needs to be initialized and is locked by
|
|---|
| 372 | * another process, or when the remore registry is accessed
|
|---|
| 373 | * for writing and some other process already holds a lock
|
|---|
| 374 | * on the registry.tdb.
|
|---|
| 375 | *
|
|---|
| 376 | * So as a quick fix, we act a little coarsely here: we do
|
|---|
| 377 | * a select on the ctdb connection fd and when it is readable
|
|---|
| 378 | * or we get EINTR, then we retry without actually parsing
|
|---|
| 379 | * any ctdb packages or dispatching messages. This means that
|
|---|
| 380 | * we retry more often than intended by design, but this does
|
|---|
| 381 | * not harm and it is unobtrusive. When we have finished,
|
|---|
| 382 | * the main loop will pick up all the messages and ctdb
|
|---|
| 383 | * packets. The only extra twist is that we cannot use timed
|
|---|
| 384 | * events here but have to handcode a timeout.
|
|---|
| 385 | */
|
|---|
| 386 |
|
|---|
| 387 | #ifdef CLUSTER_SUPPORT
|
|---|
| 388 | if (lp_clustering()) {
|
|---|
| 389 | struct ctdbd_connection *conn = messaging_ctdbd_connection();
|
|---|
| 390 |
|
|---|
| 391 | r_fds = &_r_fds;
|
|---|
| 392 | FD_ZERO(r_fds);
|
|---|
| 393 | max_fd = ctdbd_conn_get_fd(conn);
|
|---|
| 394 | if (max_fd >= 0 && max_fd < FD_SETSIZE) {
|
|---|
| 395 | FD_SET(max_fd, r_fds);
|
|---|
| 396 | }
|
|---|
| 397 | }
|
|---|
| 398 | #endif
|
|---|
| 399 |
|
|---|
| 400 | time_now = timeval_current();
|
|---|
| 401 | timeout_remaining = timeval_until(&time_now, &timeout_end);
|
|---|
| 402 | select_timeout = timeval_set(60, 0);
|
|---|
| 403 |
|
|---|
| 404 | select_timeout = timeval_min(&select_timeout,
|
|---|
| 405 | &timeout_remaining);
|
|---|
| 406 |
|
|---|
| 407 | ret = sys_select(max_fd + 1, r_fds, NULL, NULL,
|
|---|
| 408 | &select_timeout);
|
|---|
| 409 | if (ret == -1) {
|
|---|
| 410 | if (errno != EINTR) {
|
|---|
| 411 | DEBUG(1, ("error calling select: %s\n",
|
|---|
| 412 | strerror(errno)));
|
|---|
| 413 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 414 | break;
|
|---|
| 415 | }
|
|---|
| 416 | /*
|
|---|
| 417 | * errno == EINTR:
|
|---|
| 418 | * This means a signal was received.
|
|---|
| 419 | * It might have been a MSG_DBWRAP_G_LOCK_RETRY message.
|
|---|
| 420 | * ==> retry
|
|---|
| 421 | */
|
|---|
| 422 | } else if (ret == 0) {
|
|---|
| 423 | if (timeval_expired(&timeout_end)) {
|
|---|
| 424 | DEBUG(10, ("g_lock_lock timed out\n"));
|
|---|
| 425 | status = NT_STATUS_LOCK_NOT_GRANTED;
|
|---|
| 426 | break;
|
|---|
| 427 | } else {
|
|---|
| 428 | DEBUG(10, ("select returned 0 but timeout not "
|
|---|
| 429 | "not expired, retrying\n"));
|
|---|
| 430 | }
|
|---|
| 431 | } else if (ret != 1) {
|
|---|
| 432 | DEBUG(1, ("invalid return code of select: %d\n", ret));
|
|---|
| 433 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 434 | break;
|
|---|
| 435 | }
|
|---|
| 436 | /*
|
|---|
| 437 | * ret == 1:
|
|---|
| 438 | * This means ctdbd has sent us some data.
|
|---|
| 439 | * Might be a CTDB_SRVID_RECONFIGURE or a
|
|---|
| 440 | * CTDB_SRVID_SAMBA_NOTIFY message.
|
|---|
| 441 | * ==> retry
|
|---|
| 442 | */
|
|---|
| 443 | }
|
|---|
| 444 |
|
|---|
| 445 | #ifdef CLUSTER_SUPPORT
|
|---|
| 446 | done:
|
|---|
| 447 | #endif
|
|---|
| 448 |
|
|---|
| 449 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 450 | NTSTATUS unlock_status;
|
|---|
| 451 |
|
|---|
| 452 | unlock_status = g_lock_unlock(ctx, name);
|
|---|
| 453 |
|
|---|
| 454 | if (!NT_STATUS_IS_OK(unlock_status)) {
|
|---|
| 455 | DEBUG(1, ("Could not remove ourself from the locking "
|
|---|
| 456 | "db: %s\n", nt_errstr(status)));
|
|---|
| 457 | }
|
|---|
| 458 | }
|
|---|
| 459 |
|
|---|
| 460 | messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry);
|
|---|
| 461 | TALLOC_FREE(te);
|
|---|
| 462 |
|
|---|
| 463 | return status;
|
|---|
| 464 | }
|
|---|
| 465 |
|
|---|
| 466 | static void g_lock_got_retry(struct messaging_context *msg,
|
|---|
| 467 | void *private_data,
|
|---|
| 468 | uint32_t msg_type,
|
|---|
| 469 | struct server_id server_id,
|
|---|
| 470 | DATA_BLOB *data)
|
|---|
| 471 | {
|
|---|
| 472 | bool *pretry = (bool *)private_data;
|
|---|
| 473 |
|
|---|
| 474 | DEBUG(10, ("Got retry message from pid %s\n",
|
|---|
| 475 | procid_str(talloc_tos(), &server_id)));
|
|---|
| 476 |
|
|---|
| 477 | *pretry = true;
|
|---|
| 478 | }
|
|---|
| 479 |
|
|---|
| 480 | static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 481 | struct server_id pid)
|
|---|
| 482 | {
|
|---|
| 483 | struct db_record *rec = NULL;
|
|---|
| 484 | struct g_lock_rec *locks = NULL;
|
|---|
| 485 | int i, num_locks;
|
|---|
| 486 | enum g_lock_type lock_type;
|
|---|
| 487 | NTSTATUS status;
|
|---|
| 488 |
|
|---|
| 489 | rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
|
|---|
| 490 | string_term_tdb_data(name));
|
|---|
| 491 | if (rec == NULL) {
|
|---|
| 492 | DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
|
|---|
| 493 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 494 | goto done;
|
|---|
| 495 | }
|
|---|
| 496 |
|
|---|
| 497 | if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
|
|---|
| 498 | DEBUG(10, ("g_lock_parse for %s failed\n", name));
|
|---|
| 499 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 500 | goto done;
|
|---|
| 501 | }
|
|---|
| 502 |
|
|---|
| 503 | for (i=0; i<num_locks; i++) {
|
|---|
| 504 | if (procid_equal(&pid, &locks[i].pid)) {
|
|---|
| 505 | break;
|
|---|
| 506 | }
|
|---|
| 507 | }
|
|---|
| 508 |
|
|---|
| 509 | if (i == num_locks) {
|
|---|
| 510 | DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
|
|---|
| 511 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 512 | goto done;
|
|---|
| 513 | }
|
|---|
| 514 |
|
|---|
| 515 | lock_type = locks[i].lock_type;
|
|---|
| 516 |
|
|---|
| 517 | if (i < (num_locks-1)) {
|
|---|
| 518 | locks[i] = locks[num_locks-1];
|
|---|
| 519 | }
|
|---|
| 520 | num_locks -= 1;
|
|---|
| 521 |
|
|---|
| 522 | if (num_locks == 0) {
|
|---|
| 523 | status = rec->delete_rec(rec);
|
|---|
| 524 | } else {
|
|---|
| 525 | TDB_DATA data;
|
|---|
| 526 | data = make_tdb_data((uint8_t *)locks,
|
|---|
| 527 | sizeof(struct g_lock_rec) * num_locks);
|
|---|
| 528 | status = rec->store(rec, data, 0);
|
|---|
| 529 | }
|
|---|
| 530 |
|
|---|
| 531 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 532 | DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
|
|---|
| 533 | nt_errstr(status)));
|
|---|
| 534 | goto done;
|
|---|
| 535 | }
|
|---|
| 536 |
|
|---|
| 537 | TALLOC_FREE(rec);
|
|---|
| 538 |
|
|---|
| 539 | if ((lock_type & G_LOCK_PENDING) == 0) {
|
|---|
| 540 | int num_wakeups = 0;
|
|---|
| 541 |
|
|---|
| 542 | /*
|
|---|
| 543 | * We've been the lock holder. Others to retry. Don't
|
|---|
| 544 | * tell all others to avoid a thundering herd. In case
|
|---|
| 545 | * this leads to a complete stall because we miss some
|
|---|
| 546 | * processes, the loop in g_lock_lock tries at least
|
|---|
| 547 | * once a minute.
|
|---|
| 548 | */
|
|---|
| 549 |
|
|---|
| 550 | for (i=0; i<num_locks; i++) {
|
|---|
| 551 | if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
|
|---|
| 552 | continue;
|
|---|
| 553 | }
|
|---|
| 554 | if (!process_exists(locks[i].pid)) {
|
|---|
| 555 | continue;
|
|---|
| 556 | }
|
|---|
| 557 |
|
|---|
| 558 | /*
|
|---|
| 559 | * Ping all waiters to retry
|
|---|
| 560 | */
|
|---|
| 561 | status = messaging_send(ctx->msg, locks[i].pid,
|
|---|
| 562 | MSG_DBWRAP_G_LOCK_RETRY,
|
|---|
| 563 | &data_blob_null);
|
|---|
| 564 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 565 | DEBUG(1, ("sending retry to %s failed: %s\n",
|
|---|
| 566 | procid_str(talloc_tos(),
|
|---|
| 567 | &locks[i].pid),
|
|---|
| 568 | nt_errstr(status)));
|
|---|
| 569 | } else {
|
|---|
| 570 | num_wakeups += 1;
|
|---|
| 571 | }
|
|---|
| 572 | if (num_wakeups > 5) {
|
|---|
| 573 | break;
|
|---|
| 574 | }
|
|---|
| 575 | }
|
|---|
| 576 | }
|
|---|
| 577 | done:
|
|---|
| 578 | /*
|
|---|
| 579 | * For the error path, TALLOC_FREE(rec) as well. In the good
|
|---|
| 580 | * path we have already freed it.
|
|---|
| 581 | */
|
|---|
| 582 | TALLOC_FREE(rec);
|
|---|
| 583 |
|
|---|
| 584 | TALLOC_FREE(locks);
|
|---|
| 585 | return status;
|
|---|
| 586 | }
|
|---|
| 587 |
|
|---|
| 588 | NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
|
|---|
| 589 | {
|
|---|
| 590 | NTSTATUS status;
|
|---|
| 591 |
|
|---|
| 592 | status = g_lock_force_unlock(ctx, name, procid_self());
|
|---|
| 593 |
|
|---|
| 594 | #ifdef CLUSTER_SUPPORT
|
|---|
| 595 | if (lp_clustering()) {
|
|---|
| 596 | ctdb_unwatch(messaging_ctdbd_connection());
|
|---|
| 597 | }
|
|---|
| 598 | #endif
|
|---|
| 599 | return status;
|
|---|
| 600 | }
|
|---|
| 601 |
|
|---|
| 602 | struct g_lock_locks_state {
|
|---|
| 603 | int (*fn)(const char *name, void *private_data);
|
|---|
| 604 | void *private_data;
|
|---|
| 605 | };
|
|---|
| 606 |
|
|---|
| 607 | static int g_lock_locks_fn(struct db_record *rec, void *priv)
|
|---|
| 608 | {
|
|---|
| 609 | struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv;
|
|---|
| 610 |
|
|---|
| 611 | if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) {
|
|---|
| 612 | DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
|
|---|
| 613 | return 0;
|
|---|
| 614 | }
|
|---|
| 615 | return state->fn((char *)rec->key.dptr, state->private_data);
|
|---|
| 616 | }
|
|---|
| 617 |
|
|---|
| 618 | int g_lock_locks(struct g_lock_ctx *ctx,
|
|---|
| 619 | int (*fn)(const char *name, void *private_data),
|
|---|
| 620 | void *private_data)
|
|---|
| 621 | {
|
|---|
| 622 | struct g_lock_locks_state state;
|
|---|
| 623 |
|
|---|
| 624 | state.fn = fn;
|
|---|
| 625 | state.private_data = private_data;
|
|---|
| 626 |
|
|---|
| 627 | return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state);
|
|---|
| 628 | }
|
|---|
| 629 |
|
|---|
| 630 | NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 631 | int (*fn)(struct server_id pid,
|
|---|
| 632 | enum g_lock_type lock_type,
|
|---|
| 633 | void *private_data),
|
|---|
| 634 | void *private_data)
|
|---|
| 635 | {
|
|---|
| 636 | TDB_DATA data;
|
|---|
| 637 | int i, num_locks;
|
|---|
| 638 | struct g_lock_rec *locks = NULL;
|
|---|
| 639 | bool ret;
|
|---|
| 640 |
|
|---|
| 641 | if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name),
|
|---|
| 642 | &data) != 0) {
|
|---|
| 643 | return NT_STATUS_NOT_FOUND;
|
|---|
| 644 | }
|
|---|
| 645 |
|
|---|
| 646 | if ((data.dsize == 0) || (data.dptr == NULL)) {
|
|---|
| 647 | return NT_STATUS_OK;
|
|---|
| 648 | }
|
|---|
| 649 |
|
|---|
| 650 | ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks);
|
|---|
| 651 |
|
|---|
| 652 | TALLOC_FREE(data.dptr);
|
|---|
| 653 |
|
|---|
| 654 | if (!ret) {
|
|---|
| 655 | DEBUG(10, ("g_lock_parse for %s failed\n", name));
|
|---|
| 656 | return NT_STATUS_INTERNAL_ERROR;
|
|---|
| 657 | }
|
|---|
| 658 |
|
|---|
| 659 | for (i=0; i<num_locks; i++) {
|
|---|
| 660 | if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) {
|
|---|
| 661 | break;
|
|---|
| 662 | }
|
|---|
| 663 | }
|
|---|
| 664 | TALLOC_FREE(locks);
|
|---|
| 665 | return NT_STATUS_OK;
|
|---|
| 666 | }
|
|---|
| 667 |
|
|---|
| 668 | struct g_lock_get_state {
|
|---|
| 669 | bool found;
|
|---|
| 670 | struct server_id *pid;
|
|---|
| 671 | };
|
|---|
| 672 |
|
|---|
| 673 | static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type,
|
|---|
| 674 | void *priv)
|
|---|
| 675 | {
|
|---|
| 676 | struct g_lock_get_state *state = (struct g_lock_get_state *)priv;
|
|---|
| 677 |
|
|---|
| 678 | if ((lock_type & G_LOCK_PENDING) != 0) {
|
|---|
| 679 | return 0;
|
|---|
| 680 | }
|
|---|
| 681 |
|
|---|
| 682 | state->found = true;
|
|---|
| 683 | *state->pid = pid;
|
|---|
| 684 | return 1;
|
|---|
| 685 | }
|
|---|
| 686 |
|
|---|
| 687 | NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 688 | struct server_id *pid)
|
|---|
| 689 | {
|
|---|
| 690 | struct g_lock_get_state state;
|
|---|
| 691 | NTSTATUS status;
|
|---|
| 692 |
|
|---|
| 693 | state.found = false;
|
|---|
| 694 | state.pid = pid;
|
|---|
| 695 |
|
|---|
| 696 | status = g_lock_dump(ctx, name, g_lock_get_fn, &state);
|
|---|
| 697 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 698 | return status;
|
|---|
| 699 | }
|
|---|
| 700 | if (!state.found) {
|
|---|
| 701 | return NT_STATUS_NOT_FOUND;
|
|---|
| 702 | }
|
|---|
| 703 | return NT_STATUS_OK;
|
|---|
| 704 | }
|
|---|
| 705 |
|
|---|
| 706 | static bool g_lock_init_all(TALLOC_CTX *mem_ctx,
|
|---|
| 707 | struct tevent_context **pev,
|
|---|
| 708 | struct messaging_context **pmsg,
|
|---|
| 709 | struct g_lock_ctx **pg_ctx)
|
|---|
| 710 | {
|
|---|
| 711 | struct tevent_context *ev = NULL;
|
|---|
| 712 | struct messaging_context *msg = NULL;
|
|---|
| 713 | struct g_lock_ctx *g_ctx = NULL;
|
|---|
| 714 |
|
|---|
| 715 | ev = tevent_context_init(mem_ctx);
|
|---|
| 716 | if (ev == NULL) {
|
|---|
| 717 | d_fprintf(stderr, "ERROR: could not init event context\n");
|
|---|
| 718 | goto fail;
|
|---|
| 719 | }
|
|---|
| 720 | msg = messaging_init(mem_ctx, procid_self(), ev);
|
|---|
| 721 | if (msg == NULL) {
|
|---|
| 722 | d_fprintf(stderr, "ERROR: could not init messaging context\n");
|
|---|
| 723 | goto fail;
|
|---|
| 724 | }
|
|---|
| 725 | g_ctx = g_lock_ctx_init(mem_ctx, msg);
|
|---|
| 726 | if (g_ctx == NULL) {
|
|---|
| 727 | d_fprintf(stderr, "ERROR: could not init g_lock context\n");
|
|---|
| 728 | goto fail;
|
|---|
| 729 | }
|
|---|
| 730 |
|
|---|
| 731 | *pev = ev;
|
|---|
| 732 | *pmsg = msg;
|
|---|
| 733 | *pg_ctx = g_ctx;
|
|---|
| 734 | return true;
|
|---|
| 735 | fail:
|
|---|
| 736 | TALLOC_FREE(g_ctx);
|
|---|
| 737 | TALLOC_FREE(msg);
|
|---|
| 738 | TALLOC_FREE(ev);
|
|---|
| 739 | return false;
|
|---|
| 740 | }
|
|---|
| 741 |
|
|---|
| 742 | NTSTATUS g_lock_do(const char *name, enum g_lock_type lock_type,
|
|---|
| 743 | struct timeval timeout,
|
|---|
| 744 | void (*fn)(void *private_data), void *private_data)
|
|---|
| 745 | {
|
|---|
| 746 | struct tevent_context *ev = NULL;
|
|---|
| 747 | struct messaging_context *msg = NULL;
|
|---|
| 748 | struct g_lock_ctx *g_ctx = NULL;
|
|---|
| 749 | NTSTATUS status;
|
|---|
| 750 |
|
|---|
| 751 | if (!g_lock_init_all(talloc_tos(), &ev, &msg, &g_ctx)) {
|
|---|
| 752 | status = NT_STATUS_ACCESS_DENIED;
|
|---|
| 753 | goto done;
|
|---|
| 754 | }
|
|---|
| 755 |
|
|---|
| 756 | status = g_lock_lock(g_ctx, name, lock_type, timeout);
|
|---|
| 757 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 758 | goto done;
|
|---|
| 759 | }
|
|---|
| 760 | fn(private_data);
|
|---|
| 761 | g_lock_unlock(g_ctx, name);
|
|---|
| 762 |
|
|---|
| 763 | done:
|
|---|
| 764 | TALLOC_FREE(g_ctx);
|
|---|
| 765 | TALLOC_FREE(msg);
|
|---|
| 766 | TALLOC_FREE(ev);
|
|---|
| 767 | return status;
|
|---|
| 768 | }
|
|---|