| 1 | /*
|
|---|
| 2 | Unix SMB/CIFS implementation.
|
|---|
| 3 | global locks based on dbwrap and messaging
|
|---|
| 4 | Copyright (C) 2009 by Volker Lendecke
|
|---|
| 5 |
|
|---|
| 6 | This program is free software; you can redistribute it and/or modify
|
|---|
| 7 | it under the terms of the GNU General Public License as published by
|
|---|
| 8 | the Free Software Foundation; either version 3 of the License, or
|
|---|
| 9 | (at your option) any later version.
|
|---|
| 10 |
|
|---|
| 11 | This program is distributed in the hope that it will be useful,
|
|---|
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 14 | GNU General Public License for more details.
|
|---|
| 15 |
|
|---|
| 16 | You should have received a copy of the GNU General Public License
|
|---|
| 17 | along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|---|
| 18 | */
|
|---|
| 19 |
|
|---|
| 20 | #include "includes.h"
|
|---|
| 21 | #include "system/filesys.h"
|
|---|
| 22 | #include "g_lock.h"
|
|---|
| 23 | #include "util_tdb.h"
|
|---|
| 24 | #include "ctdbd_conn.h"
|
|---|
| 25 | #include "../lib/util/select.h"
|
|---|
| 26 | #include "system/select.h"
|
|---|
| 27 | #include "messages.h"
|
|---|
| 28 |
|
|---|
| 29 | static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 30 | struct server_id pid);
|
|---|
| 31 |
|
|---|
| 32 | struct g_lock_ctx {
|
|---|
| 33 | struct db_context *db;
|
|---|
| 34 | struct messaging_context *msg;
|
|---|
| 35 | };
|
|---|
| 36 |
|
|---|
| 37 | /*
|
|---|
| 38 | * The "g_lock.tdb" file contains records, indexed by the 0-terminated
|
|---|
| 39 | * lockname. The record contains an array of "struct g_lock_rec"
|
|---|
| 40 | * structures. Waiters have the lock_type with G_LOCK_PENDING or'ed.
|
|---|
| 41 | */
|
|---|
| 42 |
|
|---|
| 43 | struct g_lock_rec {
|
|---|
| 44 | enum g_lock_type lock_type;
|
|---|
| 45 | struct server_id pid;
|
|---|
| 46 | };
|
|---|
| 47 |
|
|---|
| 48 | struct g_lock_ctx *g_lock_ctx_init(TALLOC_CTX *mem_ctx,
|
|---|
| 49 | struct messaging_context *msg)
|
|---|
| 50 | {
|
|---|
| 51 | struct g_lock_ctx *result;
|
|---|
| 52 |
|
|---|
| 53 | result = talloc(mem_ctx, struct g_lock_ctx);
|
|---|
| 54 | if (result == NULL) {
|
|---|
| 55 | return NULL;
|
|---|
| 56 | }
|
|---|
| 57 | result->msg = msg;
|
|---|
| 58 |
|
|---|
| 59 | result->db = db_open(result, lock_path("g_lock.tdb"), 0,
|
|---|
| 60 | TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH, O_RDWR|O_CREAT, 0700);
|
|---|
| 61 | if (result->db == NULL) {
|
|---|
| 62 | DEBUG(1, ("g_lock_init: Could not open g_lock.tdb"));
|
|---|
| 63 | TALLOC_FREE(result);
|
|---|
| 64 | return NULL;
|
|---|
| 65 | }
|
|---|
| 66 | return result;
|
|---|
| 67 | }
|
|---|
| 68 |
|
|---|
| 69 | static bool g_lock_conflicts(enum g_lock_type lock_type,
|
|---|
| 70 | const struct g_lock_rec *rec)
|
|---|
| 71 | {
|
|---|
| 72 | enum g_lock_type rec_lock = rec->lock_type;
|
|---|
| 73 |
|
|---|
| 74 | if ((rec_lock & G_LOCK_PENDING) != 0) {
|
|---|
| 75 | return false;
|
|---|
| 76 | }
|
|---|
| 77 |
|
|---|
| 78 | /*
|
|---|
| 79 | * Only tested write locks so far. Very likely this routine
|
|---|
| 80 | * needs to be fixed for read locks....
|
|---|
| 81 | */
|
|---|
| 82 | if ((lock_type == G_LOCK_READ) && (rec_lock == G_LOCK_READ)) {
|
|---|
| 83 | return false;
|
|---|
| 84 | }
|
|---|
| 85 | return true;
|
|---|
| 86 | }
|
|---|
| 87 |
|
|---|
| 88 | static bool g_lock_parse(TALLOC_CTX *mem_ctx, TDB_DATA data,
|
|---|
| 89 | int *pnum_locks, struct g_lock_rec **plocks)
|
|---|
| 90 | {
|
|---|
| 91 | int i, num_locks;
|
|---|
| 92 | struct g_lock_rec *locks;
|
|---|
| 93 |
|
|---|
| 94 | if ((data.dsize % sizeof(struct g_lock_rec)) != 0) {
|
|---|
| 95 | DEBUG(1, ("invalid lock record length %d\n", (int)data.dsize));
|
|---|
| 96 | return false;
|
|---|
| 97 | }
|
|---|
| 98 |
|
|---|
| 99 | num_locks = data.dsize / sizeof(struct g_lock_rec);
|
|---|
| 100 | locks = talloc_array(mem_ctx, struct g_lock_rec, num_locks);
|
|---|
| 101 | if (locks == NULL) {
|
|---|
| 102 | DEBUG(1, ("talloc failed\n"));
|
|---|
| 103 | return false;
|
|---|
| 104 | }
|
|---|
| 105 |
|
|---|
| 106 | memcpy(locks, data.dptr, data.dsize);
|
|---|
| 107 |
|
|---|
| 108 | DEBUG(10, ("locks:\n"));
|
|---|
| 109 | for (i=0; i<num_locks; i++) {
|
|---|
| 110 | DEBUGADD(10, ("%s: %s %s\n",
|
|---|
| 111 | procid_str(talloc_tos(), &locks[i].pid),
|
|---|
| 112 | ((locks[i].lock_type & 1) == G_LOCK_READ) ?
|
|---|
| 113 | "read" : "write",
|
|---|
| 114 | (locks[i].lock_type & G_LOCK_PENDING) ?
|
|---|
| 115 | "(pending)" : "(owner)"));
|
|---|
| 116 |
|
|---|
| 117 | if (((locks[i].lock_type & G_LOCK_PENDING) == 0)
|
|---|
| 118 | && !process_exists(locks[i].pid)) {
|
|---|
| 119 |
|
|---|
| 120 | DEBUGADD(10, ("lock owner %s died -- discarding\n",
|
|---|
| 121 | procid_str(talloc_tos(),
|
|---|
| 122 | &locks[i].pid)));
|
|---|
| 123 |
|
|---|
| 124 | if (i < (num_locks-1)) {
|
|---|
| 125 | locks[i] = locks[num_locks-1];
|
|---|
| 126 | }
|
|---|
| 127 | num_locks -= 1;
|
|---|
| 128 | }
|
|---|
| 129 | }
|
|---|
| 130 |
|
|---|
| 131 | *plocks = locks;
|
|---|
| 132 | *pnum_locks = num_locks;
|
|---|
| 133 | return true;
|
|---|
| 134 | }
|
|---|
| 135 |
|
|---|
| 136 | static void g_lock_cleanup(int *pnum_locks, struct g_lock_rec *locks)
|
|---|
| 137 | {
|
|---|
| 138 | int i, num_locks;
|
|---|
| 139 |
|
|---|
| 140 | num_locks = *pnum_locks;
|
|---|
| 141 |
|
|---|
| 142 | DEBUG(10, ("g_lock_cleanup: %d locks\n", num_locks));
|
|---|
| 143 |
|
|---|
| 144 | for (i=0; i<num_locks; i++) {
|
|---|
| 145 | if (process_exists(locks[i].pid)) {
|
|---|
| 146 | continue;
|
|---|
| 147 | }
|
|---|
| 148 | DEBUGADD(10, ("%s does not exist -- discarding\n",
|
|---|
| 149 | procid_str(talloc_tos(), &locks[i].pid)));
|
|---|
| 150 |
|
|---|
| 151 | if (i < (num_locks-1)) {
|
|---|
| 152 | locks[i] = locks[num_locks-1];
|
|---|
| 153 | }
|
|---|
| 154 | num_locks -= 1;
|
|---|
| 155 | }
|
|---|
| 156 | *pnum_locks = num_locks;
|
|---|
| 157 | return;
|
|---|
| 158 | }
|
|---|
| 159 |
|
|---|
| 160 | static struct g_lock_rec *g_lock_addrec(TALLOC_CTX *mem_ctx,
|
|---|
| 161 | struct g_lock_rec *locks,
|
|---|
| 162 | int *pnum_locks,
|
|---|
| 163 | const struct server_id pid,
|
|---|
| 164 | enum g_lock_type lock_type)
|
|---|
| 165 | {
|
|---|
| 166 | struct g_lock_rec *result;
|
|---|
| 167 | int num_locks = *pnum_locks;
|
|---|
| 168 |
|
|---|
| 169 | result = talloc_realloc(mem_ctx, locks, struct g_lock_rec,
|
|---|
| 170 | num_locks+1);
|
|---|
| 171 | if (result == NULL) {
|
|---|
| 172 | return NULL;
|
|---|
| 173 | }
|
|---|
| 174 |
|
|---|
| 175 | result[num_locks].pid = pid;
|
|---|
| 176 | result[num_locks].lock_type = lock_type;
|
|---|
| 177 | *pnum_locks += 1;
|
|---|
| 178 | return result;
|
|---|
| 179 | }
|
|---|
| 180 |
|
|---|
| 181 | static void g_lock_got_retry(struct messaging_context *msg,
|
|---|
| 182 | void *private_data,
|
|---|
| 183 | uint32_t msg_type,
|
|---|
| 184 | struct server_id server_id,
|
|---|
| 185 | DATA_BLOB *data);
|
|---|
| 186 |
|
|---|
| 187 | static NTSTATUS g_lock_trylock(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 188 | enum g_lock_type lock_type)
|
|---|
| 189 | {
|
|---|
| 190 | struct db_record *rec = NULL;
|
|---|
| 191 | struct g_lock_rec *locks = NULL;
|
|---|
| 192 | int i, num_locks;
|
|---|
| 193 | struct server_id self;
|
|---|
| 194 | int our_index;
|
|---|
| 195 | TDB_DATA data;
|
|---|
| 196 | NTSTATUS status = NT_STATUS_OK;
|
|---|
| 197 | NTSTATUS store_status;
|
|---|
| 198 |
|
|---|
| 199 | again:
|
|---|
| 200 | rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
|
|---|
| 201 | string_term_tdb_data(name));
|
|---|
| 202 | if (rec == NULL) {
|
|---|
| 203 | DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
|
|---|
| 204 | status = NT_STATUS_LOCK_NOT_GRANTED;
|
|---|
| 205 | goto done;
|
|---|
| 206 | }
|
|---|
| 207 |
|
|---|
| 208 | if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
|
|---|
| 209 | DEBUG(10, ("g_lock_parse for %s failed\n", name));
|
|---|
| 210 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 211 | goto done;
|
|---|
| 212 | }
|
|---|
| 213 |
|
|---|
| 214 | self = messaging_server_id(ctx->msg);
|
|---|
| 215 | our_index = -1;
|
|---|
| 216 |
|
|---|
| 217 | for (i=0; i<num_locks; i++) {
|
|---|
| 218 | if (procid_equal(&self, &locks[i].pid)) {
|
|---|
| 219 | if (our_index != -1) {
|
|---|
| 220 | DEBUG(1, ("g_lock_trylock: Added ourself "
|
|---|
| 221 | "twice!\n"));
|
|---|
| 222 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 223 | goto done;
|
|---|
| 224 | }
|
|---|
| 225 | if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
|
|---|
| 226 | DEBUG(1, ("g_lock_trylock: Found ourself not "
|
|---|
| 227 | "pending!\n"));
|
|---|
| 228 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 229 | goto done;
|
|---|
| 230 | }
|
|---|
| 231 |
|
|---|
| 232 | our_index = i;
|
|---|
| 233 |
|
|---|
| 234 | /* never conflict with ourself */
|
|---|
| 235 | continue;
|
|---|
| 236 | }
|
|---|
| 237 | if (g_lock_conflicts(lock_type, &locks[i])) {
|
|---|
| 238 | struct server_id pid = locks[i].pid;
|
|---|
| 239 |
|
|---|
| 240 | if (!process_exists(pid)) {
|
|---|
| 241 | TALLOC_FREE(locks);
|
|---|
| 242 | TALLOC_FREE(rec);
|
|---|
| 243 | status = g_lock_force_unlock(ctx, name, pid);
|
|---|
| 244 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 245 | DEBUG(1, ("Could not unlock dead lock "
|
|---|
| 246 | "holder!\n"));
|
|---|
| 247 | goto done;
|
|---|
| 248 | }
|
|---|
| 249 | goto again;
|
|---|
| 250 | }
|
|---|
| 251 | lock_type |= G_LOCK_PENDING;
|
|---|
| 252 | }
|
|---|
| 253 | }
|
|---|
| 254 |
|
|---|
| 255 | if (our_index == -1) {
|
|---|
| 256 | /* First round, add ourself */
|
|---|
| 257 |
|
|---|
| 258 | locks = g_lock_addrec(talloc_tos(), locks, &num_locks,
|
|---|
| 259 | self, lock_type);
|
|---|
| 260 | if (locks == NULL) {
|
|---|
| 261 | DEBUG(10, ("g_lock_addrec failed\n"));
|
|---|
| 262 | status = NT_STATUS_NO_MEMORY;
|
|---|
| 263 | goto done;
|
|---|
| 264 | }
|
|---|
| 265 | } else {
|
|---|
| 266 | /*
|
|---|
| 267 | * Retry. We were pending last time. Overwrite the
|
|---|
| 268 | * stored lock_type with what we calculated, we might
|
|---|
| 269 | * have acquired the lock this time.
|
|---|
| 270 | */
|
|---|
| 271 | locks[our_index].lock_type = lock_type;
|
|---|
| 272 | }
|
|---|
| 273 |
|
|---|
| 274 | if (NT_STATUS_IS_OK(status) && ((lock_type & G_LOCK_PENDING) == 0)) {
|
|---|
| 275 | /*
|
|---|
| 276 | * Walk through the list of locks, search for dead entries
|
|---|
| 277 | */
|
|---|
| 278 | g_lock_cleanup(&num_locks, locks);
|
|---|
| 279 | }
|
|---|
| 280 |
|
|---|
| 281 | data = make_tdb_data((uint8_t *)locks, num_locks * sizeof(*locks));
|
|---|
| 282 | store_status = rec->store(rec, data, 0);
|
|---|
| 283 | if (!NT_STATUS_IS_OK(store_status)) {
|
|---|
| 284 | DEBUG(1, ("rec->store failed: %s\n",
|
|---|
| 285 | nt_errstr(store_status)));
|
|---|
| 286 | status = store_status;
|
|---|
| 287 | }
|
|---|
| 288 |
|
|---|
| 289 | done:
|
|---|
| 290 | TALLOC_FREE(locks);
|
|---|
| 291 | TALLOC_FREE(rec);
|
|---|
| 292 |
|
|---|
| 293 | if (NT_STATUS_IS_OK(status) && (lock_type & G_LOCK_PENDING) != 0) {
|
|---|
| 294 | return STATUS_PENDING;
|
|---|
| 295 | }
|
|---|
| 296 |
|
|---|
| 297 | return NT_STATUS_OK;
|
|---|
| 298 | }
|
|---|
| 299 |
|
|---|
| 300 | NTSTATUS g_lock_lock(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 301 | enum g_lock_type lock_type, struct timeval timeout)
|
|---|
| 302 | {
|
|---|
| 303 | struct tevent_timer *te = NULL;
|
|---|
| 304 | NTSTATUS status;
|
|---|
| 305 | bool retry = false;
|
|---|
| 306 | struct timeval timeout_end;
|
|---|
| 307 | struct timeval time_now;
|
|---|
| 308 |
|
|---|
| 309 | DEBUG(10, ("Trying to acquire lock %d for %s\n", (int)lock_type,
|
|---|
| 310 | name));
|
|---|
| 311 |
|
|---|
| 312 | if (lock_type & ~1) {
|
|---|
| 313 | DEBUG(1, ("Got invalid lock type %d for %s\n",
|
|---|
| 314 | (int)lock_type, name));
|
|---|
| 315 | return NT_STATUS_INVALID_PARAMETER;
|
|---|
| 316 | }
|
|---|
| 317 |
|
|---|
| 318 | #ifdef CLUSTER_SUPPORT
|
|---|
| 319 | if (lp_clustering()) {
|
|---|
| 320 | status = ctdb_watch_us(messaging_ctdbd_connection());
|
|---|
| 321 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 322 | DEBUG(10, ("could not register retry with ctdb: %s\n",
|
|---|
| 323 | nt_errstr(status)));
|
|---|
| 324 | goto done;
|
|---|
| 325 | }
|
|---|
| 326 | }
|
|---|
| 327 | #endif
|
|---|
| 328 |
|
|---|
| 329 | status = messaging_register(ctx->msg, &retry, MSG_DBWRAP_G_LOCK_RETRY,
|
|---|
| 330 | g_lock_got_retry);
|
|---|
| 331 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 332 | DEBUG(10, ("messaging_register failed: %s\n",
|
|---|
| 333 | nt_errstr(status)));
|
|---|
| 334 | return status;
|
|---|
| 335 | }
|
|---|
| 336 |
|
|---|
| 337 | time_now = timeval_current();
|
|---|
| 338 | timeout_end = timeval_sum(&time_now, &timeout);
|
|---|
| 339 |
|
|---|
| 340 | while (true) {
|
|---|
| 341 | struct pollfd *pollfds;
|
|---|
| 342 | int num_pollfds;
|
|---|
| 343 | int saved_errno;
|
|---|
| 344 | int ret;
|
|---|
| 345 | struct timeval timeout_remaining, select_timeout;
|
|---|
| 346 |
|
|---|
| 347 | status = g_lock_trylock(ctx, name, lock_type);
|
|---|
| 348 | if (NT_STATUS_IS_OK(status)) {
|
|---|
| 349 | DEBUG(10, ("Got lock %s\n", name));
|
|---|
| 350 | break;
|
|---|
| 351 | }
|
|---|
| 352 | if (!NT_STATUS_EQUAL(status, STATUS_PENDING)) {
|
|---|
| 353 | DEBUG(10, ("g_lock_trylock failed: %s\n",
|
|---|
| 354 | nt_errstr(status)));
|
|---|
| 355 | break;
|
|---|
| 356 | }
|
|---|
| 357 |
|
|---|
| 358 | DEBUG(10, ("g_lock_trylock: Did not get lock, waiting...\n"));
|
|---|
| 359 |
|
|---|
| 360 | /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|---|
| 361 | * !!! HACK ALERT --- FIX ME !!!
|
|---|
| 362 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|---|
| 363 | * What we really want to do here is to react to
|
|---|
| 364 | * MSG_DBWRAP_G_LOCK_RETRY messages that are either sent
|
|---|
| 365 | * by a client doing g_lock_unlock or by ourselves when
|
|---|
| 366 | * we receive a CTDB_SRVID_SAMBA_NOTIFY or
|
|---|
| 367 | * CTDB_SRVID_RECONFIGURE message from ctdbd, i.e. when
|
|---|
| 368 | * either a client holding a lock or a complete node
|
|---|
| 369 | * has died.
|
|---|
| 370 | *
|
|---|
| 371 | * Doing this properly involves calling tevent_loop_once(),
|
|---|
| 372 | * but doing this here with the main ctdbd messaging context
|
|---|
| 373 | * creates a nested event loop when g_lock_lock() is called
|
|---|
| 374 | * from the main event loop, e.g. in a tcon_and_X where the
|
|---|
| 375 | * share_info.tdb needs to be initialized and is locked by
|
|---|
| 376 | * another process, or when the remore registry is accessed
|
|---|
| 377 | * for writing and some other process already holds a lock
|
|---|
| 378 | * on the registry.tdb.
|
|---|
| 379 | *
|
|---|
| 380 | * So as a quick fix, we act a little coarsely here: we do
|
|---|
| 381 | * a select on the ctdb connection fd and when it is readable
|
|---|
| 382 | * or we get EINTR, then we retry without actually parsing
|
|---|
| 383 | * any ctdb packages or dispatching messages. This means that
|
|---|
| 384 | * we retry more often than intended by design, but this does
|
|---|
| 385 | * not harm and it is unobtrusive. When we have finished,
|
|---|
| 386 | * the main loop will pick up all the messages and ctdb
|
|---|
| 387 | * packets. The only extra twist is that we cannot use timed
|
|---|
| 388 | * events here but have to handcode a timeout.
|
|---|
| 389 | */
|
|---|
| 390 |
|
|---|
| 391 | /*
|
|---|
| 392 | * We allocate 2 entries here. One is needed anyway for
|
|---|
| 393 | * sys_poll and in the clustering case we might have to add
|
|---|
| 394 | * the ctdb fd. This avoids the realloc then.
|
|---|
| 395 | */
|
|---|
| 396 | pollfds = TALLOC_ARRAY(talloc_tos(), struct pollfd, 2);
|
|---|
| 397 | if (pollfds == NULL) {
|
|---|
| 398 | status = NT_STATUS_NO_MEMORY;
|
|---|
| 399 | break;
|
|---|
| 400 | }
|
|---|
| 401 | num_pollfds = 0;
|
|---|
| 402 |
|
|---|
| 403 | #ifdef CLUSTER_SUPPORT
|
|---|
| 404 | if (lp_clustering()) {
|
|---|
| 405 | struct ctdbd_connection *conn;
|
|---|
| 406 | conn = messaging_ctdbd_connection();
|
|---|
| 407 |
|
|---|
| 408 | pollfds[0].fd = ctdbd_conn_get_fd(conn);
|
|---|
| 409 | pollfds[0].events = POLLIN|POLLHUP;
|
|---|
| 410 |
|
|---|
| 411 | num_pollfds += 1;
|
|---|
| 412 | }
|
|---|
| 413 | #endif
|
|---|
| 414 |
|
|---|
| 415 | time_now = timeval_current();
|
|---|
| 416 | timeout_remaining = timeval_until(&time_now, &timeout_end);
|
|---|
| 417 | select_timeout = timeval_set(60, 0);
|
|---|
| 418 |
|
|---|
| 419 | select_timeout = timeval_min(&select_timeout,
|
|---|
| 420 | &timeout_remaining);
|
|---|
| 421 |
|
|---|
| 422 | ret = sys_poll(pollfds, num_pollfds,
|
|---|
| 423 | timeval_to_msec(select_timeout));
|
|---|
| 424 |
|
|---|
| 425 | /*
|
|---|
| 426 | * We're not *really interested in the actual flags. We just
|
|---|
| 427 | * need to retry this whole thing.
|
|---|
| 428 | */
|
|---|
| 429 | saved_errno = errno;
|
|---|
| 430 | TALLOC_FREE(pollfds);
|
|---|
| 431 | errno = saved_errno;
|
|---|
| 432 |
|
|---|
| 433 | if (ret == -1) {
|
|---|
| 434 | if (errno != EINTR) {
|
|---|
| 435 | DEBUG(1, ("error calling select: %s\n",
|
|---|
| 436 | strerror(errno)));
|
|---|
| 437 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 438 | break;
|
|---|
| 439 | }
|
|---|
| 440 | /*
|
|---|
| 441 | * errno == EINTR:
|
|---|
| 442 | * This means a signal was received.
|
|---|
| 443 | * It might have been a MSG_DBWRAP_G_LOCK_RETRY message.
|
|---|
| 444 | * ==> retry
|
|---|
| 445 | */
|
|---|
| 446 | } else if (ret == 0) {
|
|---|
| 447 | if (timeval_expired(&timeout_end)) {
|
|---|
| 448 | DEBUG(10, ("g_lock_lock timed out\n"));
|
|---|
| 449 | status = NT_STATUS_LOCK_NOT_GRANTED;
|
|---|
| 450 | break;
|
|---|
| 451 | } else {
|
|---|
| 452 | DEBUG(10, ("select returned 0 but timeout not "
|
|---|
| 453 | "not expired, retrying\n"));
|
|---|
| 454 | }
|
|---|
| 455 | } else if (ret != 1) {
|
|---|
| 456 | DEBUG(1, ("invalid return code of select: %d\n", ret));
|
|---|
| 457 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 458 | break;
|
|---|
| 459 | }
|
|---|
| 460 | /*
|
|---|
| 461 | * ret == 1:
|
|---|
| 462 | * This means ctdbd has sent us some data.
|
|---|
| 463 | * Might be a CTDB_SRVID_RECONFIGURE or a
|
|---|
| 464 | * CTDB_SRVID_SAMBA_NOTIFY message.
|
|---|
| 465 | * ==> retry
|
|---|
| 466 | */
|
|---|
| 467 | }
|
|---|
| 468 |
|
|---|
| 469 | #ifdef CLUSTER_SUPPORT
|
|---|
| 470 | done:
|
|---|
| 471 | #endif
|
|---|
| 472 |
|
|---|
| 473 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 474 | NTSTATUS unlock_status;
|
|---|
| 475 |
|
|---|
| 476 | unlock_status = g_lock_unlock(ctx, name);
|
|---|
| 477 |
|
|---|
| 478 | if (!NT_STATUS_IS_OK(unlock_status)) {
|
|---|
| 479 | DEBUG(1, ("Could not remove ourself from the locking "
|
|---|
| 480 | "db: %s\n", nt_errstr(status)));
|
|---|
| 481 | }
|
|---|
| 482 | }
|
|---|
| 483 |
|
|---|
| 484 | messaging_deregister(ctx->msg, MSG_DBWRAP_G_LOCK_RETRY, &retry);
|
|---|
| 485 | TALLOC_FREE(te);
|
|---|
| 486 |
|
|---|
| 487 | return status;
|
|---|
| 488 | }
|
|---|
| 489 |
|
|---|
| 490 | static void g_lock_got_retry(struct messaging_context *msg,
|
|---|
| 491 | void *private_data,
|
|---|
| 492 | uint32_t msg_type,
|
|---|
| 493 | struct server_id server_id,
|
|---|
| 494 | DATA_BLOB *data)
|
|---|
| 495 | {
|
|---|
| 496 | bool *pretry = (bool *)private_data;
|
|---|
| 497 |
|
|---|
| 498 | DEBUG(10, ("Got retry message from pid %s\n",
|
|---|
| 499 | procid_str(talloc_tos(), &server_id)));
|
|---|
| 500 |
|
|---|
| 501 | *pretry = true;
|
|---|
| 502 | }
|
|---|
| 503 |
|
|---|
| 504 | static NTSTATUS g_lock_force_unlock(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 505 | struct server_id pid)
|
|---|
| 506 | {
|
|---|
| 507 | struct db_record *rec = NULL;
|
|---|
| 508 | struct g_lock_rec *locks = NULL;
|
|---|
| 509 | int i, num_locks;
|
|---|
| 510 | enum g_lock_type lock_type;
|
|---|
| 511 | NTSTATUS status;
|
|---|
| 512 |
|
|---|
| 513 | rec = ctx->db->fetch_locked(ctx->db, talloc_tos(),
|
|---|
| 514 | string_term_tdb_data(name));
|
|---|
| 515 | if (rec == NULL) {
|
|---|
| 516 | DEBUG(10, ("fetch_locked(\"%s\") failed\n", name));
|
|---|
| 517 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 518 | goto done;
|
|---|
| 519 | }
|
|---|
| 520 |
|
|---|
| 521 | if (!g_lock_parse(talloc_tos(), rec->value, &num_locks, &locks)) {
|
|---|
| 522 | DEBUG(10, ("g_lock_parse for %s failed\n", name));
|
|---|
| 523 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 524 | goto done;
|
|---|
| 525 | }
|
|---|
| 526 |
|
|---|
| 527 | for (i=0; i<num_locks; i++) {
|
|---|
| 528 | if (procid_equal(&pid, &locks[i].pid)) {
|
|---|
| 529 | break;
|
|---|
| 530 | }
|
|---|
| 531 | }
|
|---|
| 532 |
|
|---|
| 533 | if (i == num_locks) {
|
|---|
| 534 | DEBUG(10, ("g_lock_force_unlock: Lock not found\n"));
|
|---|
| 535 | status = NT_STATUS_INTERNAL_ERROR;
|
|---|
| 536 | goto done;
|
|---|
| 537 | }
|
|---|
| 538 |
|
|---|
| 539 | lock_type = locks[i].lock_type;
|
|---|
| 540 |
|
|---|
| 541 | if (i < (num_locks-1)) {
|
|---|
| 542 | locks[i] = locks[num_locks-1];
|
|---|
| 543 | }
|
|---|
| 544 | num_locks -= 1;
|
|---|
| 545 |
|
|---|
| 546 | if (num_locks == 0) {
|
|---|
| 547 | status = rec->delete_rec(rec);
|
|---|
| 548 | } else {
|
|---|
| 549 | TDB_DATA data;
|
|---|
| 550 | data = make_tdb_data((uint8_t *)locks,
|
|---|
| 551 | sizeof(struct g_lock_rec) * num_locks);
|
|---|
| 552 | status = rec->store(rec, data, 0);
|
|---|
| 553 | }
|
|---|
| 554 |
|
|---|
| 555 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 556 | DEBUG(1, ("g_lock_force_unlock: Could not store record: %s\n",
|
|---|
| 557 | nt_errstr(status)));
|
|---|
| 558 | goto done;
|
|---|
| 559 | }
|
|---|
| 560 |
|
|---|
| 561 | TALLOC_FREE(rec);
|
|---|
| 562 |
|
|---|
| 563 | if ((lock_type & G_LOCK_PENDING) == 0) {
|
|---|
| 564 | int num_wakeups = 0;
|
|---|
| 565 |
|
|---|
| 566 | /*
|
|---|
| 567 | * We've been the lock holder. Others to retry. Don't
|
|---|
| 568 | * tell all others to avoid a thundering herd. In case
|
|---|
| 569 | * this leads to a complete stall because we miss some
|
|---|
| 570 | * processes, the loop in g_lock_lock tries at least
|
|---|
| 571 | * once a minute.
|
|---|
| 572 | */
|
|---|
| 573 |
|
|---|
| 574 | for (i=0; i<num_locks; i++) {
|
|---|
| 575 | if ((locks[i].lock_type & G_LOCK_PENDING) == 0) {
|
|---|
| 576 | continue;
|
|---|
| 577 | }
|
|---|
| 578 | if (!process_exists(locks[i].pid)) {
|
|---|
| 579 | continue;
|
|---|
| 580 | }
|
|---|
| 581 |
|
|---|
| 582 | /*
|
|---|
| 583 | * Ping all waiters to retry
|
|---|
| 584 | */
|
|---|
| 585 | status = messaging_send(ctx->msg, locks[i].pid,
|
|---|
| 586 | MSG_DBWRAP_G_LOCK_RETRY,
|
|---|
| 587 | &data_blob_null);
|
|---|
| 588 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 589 | DEBUG(1, ("sending retry to %s failed: %s\n",
|
|---|
| 590 | procid_str(talloc_tos(),
|
|---|
| 591 | &locks[i].pid),
|
|---|
| 592 | nt_errstr(status)));
|
|---|
| 593 | } else {
|
|---|
| 594 | num_wakeups += 1;
|
|---|
| 595 | }
|
|---|
| 596 | if (num_wakeups > 5) {
|
|---|
| 597 | break;
|
|---|
| 598 | }
|
|---|
| 599 | }
|
|---|
| 600 | }
|
|---|
| 601 | done:
|
|---|
| 602 | /*
|
|---|
| 603 | * For the error path, TALLOC_FREE(rec) as well. In the good
|
|---|
| 604 | * path we have already freed it.
|
|---|
| 605 | */
|
|---|
| 606 | TALLOC_FREE(rec);
|
|---|
| 607 |
|
|---|
| 608 | TALLOC_FREE(locks);
|
|---|
| 609 | return status;
|
|---|
| 610 | }
|
|---|
| 611 |
|
|---|
| 612 | NTSTATUS g_lock_unlock(struct g_lock_ctx *ctx, const char *name)
|
|---|
| 613 | {
|
|---|
| 614 | NTSTATUS status;
|
|---|
| 615 |
|
|---|
| 616 | status = g_lock_force_unlock(ctx, name, messaging_server_id(ctx->msg));
|
|---|
| 617 |
|
|---|
| 618 | #ifdef CLUSTER_SUPPORT
|
|---|
| 619 | if (lp_clustering()) {
|
|---|
| 620 | ctdb_unwatch(messaging_ctdbd_connection());
|
|---|
| 621 | }
|
|---|
| 622 | #endif
|
|---|
| 623 | return status;
|
|---|
| 624 | }
|
|---|
| 625 |
|
|---|
| 626 | struct g_lock_locks_state {
|
|---|
| 627 | int (*fn)(const char *name, void *private_data);
|
|---|
| 628 | void *private_data;
|
|---|
| 629 | };
|
|---|
| 630 |
|
|---|
| 631 | static int g_lock_locks_fn(struct db_record *rec, void *priv)
|
|---|
| 632 | {
|
|---|
| 633 | struct g_lock_locks_state *state = (struct g_lock_locks_state *)priv;
|
|---|
| 634 |
|
|---|
| 635 | if ((rec->key.dsize == 0) || (rec->key.dptr[rec->key.dsize-1] != 0)) {
|
|---|
| 636 | DEBUG(1, ("invalid key in g_lock.tdb, ignoring\n"));
|
|---|
| 637 | return 0;
|
|---|
| 638 | }
|
|---|
| 639 | return state->fn((char *)rec->key.dptr, state->private_data);
|
|---|
| 640 | }
|
|---|
| 641 |
|
|---|
| 642 | int g_lock_locks(struct g_lock_ctx *ctx,
|
|---|
| 643 | int (*fn)(const char *name, void *private_data),
|
|---|
| 644 | void *private_data)
|
|---|
| 645 | {
|
|---|
| 646 | struct g_lock_locks_state state;
|
|---|
| 647 |
|
|---|
| 648 | state.fn = fn;
|
|---|
| 649 | state.private_data = private_data;
|
|---|
| 650 |
|
|---|
| 651 | return ctx->db->traverse_read(ctx->db, g_lock_locks_fn, &state);
|
|---|
| 652 | }
|
|---|
| 653 |
|
|---|
| 654 | NTSTATUS g_lock_dump(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 655 | int (*fn)(struct server_id pid,
|
|---|
| 656 | enum g_lock_type lock_type,
|
|---|
| 657 | void *private_data),
|
|---|
| 658 | void *private_data)
|
|---|
| 659 | {
|
|---|
| 660 | TDB_DATA data;
|
|---|
| 661 | int i, num_locks;
|
|---|
| 662 | struct g_lock_rec *locks = NULL;
|
|---|
| 663 | bool ret;
|
|---|
| 664 |
|
|---|
| 665 | if (ctx->db->fetch(ctx->db, talloc_tos(), string_term_tdb_data(name),
|
|---|
| 666 | &data) != 0) {
|
|---|
| 667 | return NT_STATUS_NOT_FOUND;
|
|---|
| 668 | }
|
|---|
| 669 |
|
|---|
| 670 | if ((data.dsize == 0) || (data.dptr == NULL)) {
|
|---|
| 671 | return NT_STATUS_OK;
|
|---|
| 672 | }
|
|---|
| 673 |
|
|---|
| 674 | ret = g_lock_parse(talloc_tos(), data, &num_locks, &locks);
|
|---|
| 675 |
|
|---|
| 676 | TALLOC_FREE(data.dptr);
|
|---|
| 677 |
|
|---|
| 678 | if (!ret) {
|
|---|
| 679 | DEBUG(10, ("g_lock_parse for %s failed\n", name));
|
|---|
| 680 | return NT_STATUS_INTERNAL_ERROR;
|
|---|
| 681 | }
|
|---|
| 682 |
|
|---|
| 683 | for (i=0; i<num_locks; i++) {
|
|---|
| 684 | if (fn(locks[i].pid, locks[i].lock_type, private_data) != 0) {
|
|---|
| 685 | break;
|
|---|
| 686 | }
|
|---|
| 687 | }
|
|---|
| 688 | TALLOC_FREE(locks);
|
|---|
| 689 | return NT_STATUS_OK;
|
|---|
| 690 | }
|
|---|
| 691 |
|
|---|
| 692 | struct g_lock_get_state {
|
|---|
| 693 | bool found;
|
|---|
| 694 | struct server_id *pid;
|
|---|
| 695 | };
|
|---|
| 696 |
|
|---|
| 697 | static int g_lock_get_fn(struct server_id pid, enum g_lock_type lock_type,
|
|---|
| 698 | void *priv)
|
|---|
| 699 | {
|
|---|
| 700 | struct g_lock_get_state *state = (struct g_lock_get_state *)priv;
|
|---|
| 701 |
|
|---|
| 702 | if ((lock_type & G_LOCK_PENDING) != 0) {
|
|---|
| 703 | return 0;
|
|---|
| 704 | }
|
|---|
| 705 |
|
|---|
| 706 | state->found = true;
|
|---|
| 707 | *state->pid = pid;
|
|---|
| 708 | return 1;
|
|---|
| 709 | }
|
|---|
| 710 |
|
|---|
| 711 | NTSTATUS g_lock_get(struct g_lock_ctx *ctx, const char *name,
|
|---|
| 712 | struct server_id *pid)
|
|---|
| 713 | {
|
|---|
| 714 | struct g_lock_get_state state;
|
|---|
| 715 | NTSTATUS status;
|
|---|
| 716 |
|
|---|
| 717 | state.found = false;
|
|---|
| 718 | state.pid = pid;
|
|---|
| 719 |
|
|---|
| 720 | status = g_lock_dump(ctx, name, g_lock_get_fn, &state);
|
|---|
| 721 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 722 | return status;
|
|---|
| 723 | }
|
|---|
| 724 | if (!state.found) {
|
|---|
| 725 | return NT_STATUS_NOT_FOUND;
|
|---|
| 726 | }
|
|---|
| 727 | return NT_STATUS_OK;
|
|---|
| 728 | }
|
|---|
| 729 |
|
|---|
| 730 | static bool g_lock_init_all(TALLOC_CTX *mem_ctx,
|
|---|
| 731 | struct tevent_context **pev,
|
|---|
| 732 | struct messaging_context **pmsg,
|
|---|
| 733 | const struct server_id self,
|
|---|
| 734 | struct g_lock_ctx **pg_ctx)
|
|---|
| 735 | {
|
|---|
| 736 | struct tevent_context *ev = NULL;
|
|---|
| 737 | struct messaging_context *msg = NULL;
|
|---|
| 738 | struct g_lock_ctx *g_ctx = NULL;
|
|---|
| 739 |
|
|---|
| 740 | ev = tevent_context_init(mem_ctx);
|
|---|
| 741 | if (ev == NULL) {
|
|---|
| 742 | d_fprintf(stderr, "ERROR: could not init event context\n");
|
|---|
| 743 | goto fail;
|
|---|
| 744 | }
|
|---|
| 745 | msg = messaging_init(mem_ctx, self, ev);
|
|---|
| 746 | if (msg == NULL) {
|
|---|
| 747 | d_fprintf(stderr, "ERROR: could not init messaging context\n");
|
|---|
| 748 | goto fail;
|
|---|
| 749 | }
|
|---|
| 750 | g_ctx = g_lock_ctx_init(mem_ctx, msg);
|
|---|
| 751 | if (g_ctx == NULL) {
|
|---|
| 752 | d_fprintf(stderr, "ERROR: could not init g_lock context\n");
|
|---|
| 753 | goto fail;
|
|---|
| 754 | }
|
|---|
| 755 |
|
|---|
| 756 | *pev = ev;
|
|---|
| 757 | *pmsg = msg;
|
|---|
| 758 | *pg_ctx = g_ctx;
|
|---|
| 759 | return true;
|
|---|
| 760 | fail:
|
|---|
| 761 | TALLOC_FREE(g_ctx);
|
|---|
| 762 | TALLOC_FREE(msg);
|
|---|
| 763 | TALLOC_FREE(ev);
|
|---|
| 764 | return false;
|
|---|
| 765 | }
|
|---|
| 766 |
|
|---|
| 767 | NTSTATUS g_lock_do(const char *name, enum g_lock_type lock_type,
|
|---|
| 768 | struct timeval timeout, const struct server_id self,
|
|---|
| 769 | void (*fn)(void *private_data), void *private_data)
|
|---|
| 770 | {
|
|---|
| 771 | struct tevent_context *ev = NULL;
|
|---|
| 772 | struct messaging_context *msg = NULL;
|
|---|
| 773 | struct g_lock_ctx *g_ctx = NULL;
|
|---|
| 774 | NTSTATUS status;
|
|---|
| 775 |
|
|---|
| 776 | if (!g_lock_init_all(talloc_tos(), &ev, &msg, self, &g_ctx)) {
|
|---|
| 777 | status = NT_STATUS_ACCESS_DENIED;
|
|---|
| 778 | goto done;
|
|---|
| 779 | }
|
|---|
| 780 |
|
|---|
| 781 | status = g_lock_lock(g_ctx, name, lock_type, timeout);
|
|---|
| 782 | if (!NT_STATUS_IS_OK(status)) {
|
|---|
| 783 | goto done;
|
|---|
| 784 | }
|
|---|
| 785 | fn(private_data);
|
|---|
| 786 | g_lock_unlock(g_ctx, name);
|
|---|
| 787 |
|
|---|
| 788 | done:
|
|---|
| 789 | TALLOC_FREE(g_ctx);
|
|---|
| 790 | TALLOC_FREE(msg);
|
|---|
| 791 | TALLOC_FREE(ev);
|
|---|
| 792 | return status;
|
|---|
| 793 | }
|
|---|