| 1 | /*
|
|---|
| 2 | * Unix SMB/CIFS implementation.
|
|---|
| 3 | * Support for OneFS kernel oplocks
|
|---|
| 4 | *
|
|---|
| 5 | * Copyright (C) Volker Lendecke 2007
|
|---|
| 6 | * Copyright (C) Tim Prouty, 2009
|
|---|
| 7 | *
|
|---|
| 8 | * This program is free software; you can redistribute it and/or modify
|
|---|
| 9 | * it under the terms of the GNU General Public License as published by
|
|---|
| 10 | * the Free Software Foundation; either version 3 of the License, or
|
|---|
| 11 | * (at your option) any later version.
|
|---|
| 12 | *
|
|---|
| 13 | * This program is distributed in the hope that it will be useful,
|
|---|
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 16 | * GNU General Public License for more details.
|
|---|
| 17 | *
|
|---|
| 18 | * You should have received a copy of the GNU General Public License
|
|---|
| 19 | * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|---|
| 20 | */
|
|---|
| 21 |
|
|---|
| 22 | #define DBGC_CLASS DBGC_LOCKING
|
|---|
| 23 |
|
|---|
| 24 | #include "includes.h"
|
|---|
| 25 |
|
|---|
| 26 | #if HAVE_ONEFS
|
|---|
| 27 | #include "oplock_onefs.h"
|
|---|
| 28 | #include "smbd/smbd.h"
|
|---|
| 29 | #include "smbd/globals.h"
|
|---|
| 30 |
|
|---|
| 31 | #include <ifs/ifs_syscalls.h>
|
|---|
| 32 | #include <isi_ecs/isi_ecs_oplocks.h>
|
|---|
| 33 | #include <sys/proc.h>
|
|---|
| 34 |
|
|---|
| 35 | struct onefs_oplocks_context {
|
|---|
| 36 | struct kernel_oplocks *ctx;
|
|---|
| 37 | const struct oplocks_event_ops *onefs_ops;
|
|---|
| 38 | int onefs_event_fd;
|
|---|
| 39 | struct fd_event *read_fde;
|
|---|
| 40 | };
|
|---|
| 41 |
|
|---|
| 42 | enum onefs_callback_state {
|
|---|
| 43 | ONEFS_OPEN_FILE,
|
|---|
| 44 | ONEFS_WAITING_FOR_OPLOCK
|
|---|
| 45 | };
|
|---|
| 46 |
|
|---|
| 47 | struct onefs_callback_record {
|
|---|
| 48 | struct onefs_callback_record *prev, *next;
|
|---|
| 49 | uint64_t id;
|
|---|
| 50 | enum onefs_callback_state state;
|
|---|
| 51 | union {
|
|---|
| 52 | files_struct *fsp; /* ONEFS_OPEN_FILE */
|
|---|
| 53 | uint64_t mid; /* ONEFS_WAITING_FOR_OPLOCK */
|
|---|
| 54 | } data;
|
|---|
| 55 | };
|
|---|
| 56 |
|
|---|
| 57 | /**
|
|---|
| 58 | * Internal list of files (along with additional state) that have outstanding
|
|---|
| 59 | * oplocks or requests for oplocks.
|
|---|
| 60 | */
|
|---|
| 61 | struct onefs_callback_record *callback_recs;
|
|---|
| 62 |
|
|---|
| 63 | /**
|
|---|
| 64 | * Convert a onefs_callback_record to a debug string using the dbg_ctx().
|
|---|
| 65 | */
|
|---|
| 66 | const char *onefs_cb_record_str_dbg(const struct onefs_callback_record *r)
|
|---|
| 67 | {
|
|---|
| 68 | char *result;
|
|---|
| 69 |
|
|---|
| 70 | if (r == NULL) {
|
|---|
| 71 | result = talloc_strdup(talloc_tos(), "NULL callback record");
|
|---|
| 72 | return result;
|
|---|
| 73 | }
|
|---|
| 74 |
|
|---|
| 75 | switch (r->state) {
|
|---|
| 76 | case ONEFS_OPEN_FILE:
|
|---|
| 77 | result = talloc_asprintf(talloc_tos(), "cb record %llu for "
|
|---|
| 78 | "file %s", r->id,
|
|---|
| 79 | fsp_str_dbg(r->data.fsp));
|
|---|
| 80 | case ONEFS_WAITING_FOR_OPLOCK:
|
|---|
| 81 | result = talloc_asprintf(talloc_tos(), "cb record %llu for "
|
|---|
| 82 | "pending mid %llu", r->id,
|
|---|
| 83 | (unsigned long long)r->data.mid);
|
|---|
| 84 | break;
|
|---|
| 85 | default:
|
|---|
| 86 | result = talloc_asprintf(talloc_tos(), "cb record %llu unknown "
|
|---|
| 87 | "state %d", r->id, r->state);
|
|---|
| 88 | break;
|
|---|
| 89 | }
|
|---|
| 90 |
|
|---|
| 91 | return result;
|
|---|
| 92 | }
|
|---|
| 93 |
|
|---|
| 94 | /**
|
|---|
| 95 | * Traverse the list of onefs_callback_records and print all entries.
|
|---|
| 96 | */
|
|---|
| 97 | static void debug_cb_records(const char *fn)
|
|---|
| 98 | {
|
|---|
| 99 | struct onefs_callback_record *rec;
|
|---|
| 100 |
|
|---|
| 101 | if (DEBUGLEVEL < 10)
|
|---|
| 102 | return;
|
|---|
| 103 |
|
|---|
| 104 | DEBUG(10, ("cb records (%s):\n", fn));
|
|---|
| 105 |
|
|---|
| 106 | for (rec = callback_recs; rec; rec = rec->next) {
|
|---|
| 107 | DEBUGADD(10, ("%s\n", onefs_cb_record_str_dbg(rec)));
|
|---|
| 108 | }
|
|---|
| 109 | }
|
|---|
| 110 |
|
|---|
| 111 | /**
|
|---|
| 112 | * Find a callback record in the list of outstanding oplock operations.
|
|---|
| 113 | *
|
|---|
| 114 | * Once n ifs_createfile requests an oplock on a file, the kernel communicates
|
|---|
| 115 | * with samba via the oplock event channel by sending events that reference an
|
|---|
| 116 | * id. This function maps that id to the onefs_callback_record that was
|
|---|
| 117 | * created for it during the initial setup on open (onefs_oplock_wait_record).
|
|---|
| 118 | * When a matching id is found in the onefs_callback_record list, the
|
|---|
| 119 | * callback_type is checked to make sure the record is in in the correct
|
|---|
| 120 | * state.
|
|---|
| 121 | */
|
|---|
| 122 | static struct onefs_callback_record *onefs_find_cb(uint64_t id,
|
|---|
| 123 | enum onefs_callback_state expected_state)
|
|---|
| 124 | {
|
|---|
| 125 | struct onefs_callback_record *rec;
|
|---|
| 126 |
|
|---|
| 127 | debug_cb_records("onefs_find_cb");
|
|---|
| 128 |
|
|---|
| 129 | for (rec = callback_recs; rec; rec = rec->next) {
|
|---|
| 130 | if (rec->id == id) {
|
|---|
| 131 | DEBUG(10, ("found %s\n",
|
|---|
| 132 | onefs_cb_record_str_dbg(rec)));
|
|---|
| 133 | break;
|
|---|
| 134 | }
|
|---|
| 135 | }
|
|---|
| 136 |
|
|---|
| 137 | if (rec == NULL) {
|
|---|
| 138 | DEBUG(5, ("Could not find callback record for id %llu\n", id));
|
|---|
| 139 | return NULL;
|
|---|
| 140 | }
|
|---|
| 141 |
|
|---|
| 142 | if (rec->state != expected_state) {
|
|---|
| 143 | DEBUG(0, ("Expected cb type %d, got %s", expected_state,
|
|---|
| 144 | onefs_cb_record_str_dbg(rec)));
|
|---|
| 145 | SMB_ASSERT(0);
|
|---|
| 146 | return NULL;
|
|---|
| 147 | }
|
|---|
| 148 |
|
|---|
| 149 | return rec;
|
|---|
| 150 | }
|
|---|
| 151 |
|
|---|
| 152 | /**
|
|---|
| 153 | * Remove and free a callback record from the callback record list.
|
|---|
| 154 | */
|
|---|
| 155 | void destroy_onefs_callback_record(uint64_t id)
|
|---|
| 156 | {
|
|---|
| 157 | struct onefs_callback_record *rec;
|
|---|
| 158 |
|
|---|
| 159 | debug_cb_records("destroy_onefs_callback_record");
|
|---|
| 160 |
|
|---|
| 161 | if (id == 0) {
|
|---|
| 162 | DEBUG(10, ("destroy_onefs_callback_record: Nothing to "
|
|---|
| 163 | "destroy\n"));
|
|---|
| 164 | return;
|
|---|
| 165 | }
|
|---|
| 166 |
|
|---|
| 167 | for (rec = callback_recs; rec; rec = rec->next) {
|
|---|
| 168 | if (rec->id == id) {
|
|---|
| 169 | DLIST_REMOVE(callback_recs, rec);
|
|---|
| 170 | SAFE_FREE(rec);
|
|---|
| 171 | DEBUG(10, ("removed cb rec %llu\n", id));
|
|---|
| 172 | return;
|
|---|
| 173 | }
|
|---|
| 174 | }
|
|---|
| 175 |
|
|---|
| 176 | DEBUG(0, ("Could not find cb rec %llu to delete", id));
|
|---|
| 177 | SMB_ASSERT(0);
|
|---|
| 178 | }
|
|---|
| 179 |
|
|---|
| 180 | /**
|
|---|
| 181 | * Initialize a callback record and add it to the list of outstanding callback
|
|---|
| 182 | * records.
|
|---|
| 183 | *
|
|---|
| 184 | * This is called in the open path before ifs_createfile so an id can be
|
|---|
| 185 | * passed in. Each callback record can be in one of two states:
|
|---|
| 186 | *
|
|---|
| 187 | * 1. WAITING_FOR_OPLOCK: This is the initial state for all callback
|
|---|
| 188 | * records. If ifs_createfile can be completed syncronously without needing
|
|---|
| 189 | * to break any level I oplocks, the state is transitioned to OPEN_FILE.
|
|---|
| 190 | * Otherwise ifs_createfile will finish asynchronously and the open is
|
|---|
| 191 | * deferred. When the necessary level I opocks have been broken, and the
|
|---|
| 192 | * open can be done, an event is sent by the kernel on the oplock event
|
|---|
| 193 | * channel, which is handled by semlock_available_handler. At this point
|
|---|
| 194 | * the deferred open is retried. Unless a level I oplock was acquired by
|
|---|
| 195 | * another client, ifs_createfile will now complete synchronously.
|
|---|
| 196 | *
|
|---|
| 197 | * 2. OPEN_FILE: Once ifs_createfile completes, the callback record is
|
|---|
| 198 | * transitioned to this state via onefs_set_oplock_callback.
|
|---|
| 199 | */
|
|---|
| 200 | uint64_t onefs_oplock_wait_record(uint64_t mid)
|
|---|
| 201 | {
|
|---|
| 202 | struct onefs_callback_record *result;
|
|---|
| 203 | static uint64_t id_generator = 0;
|
|---|
| 204 |
|
|---|
| 205 | if (!(result = SMB_MALLOC_P(struct onefs_callback_record))) {
|
|---|
| 206 | DEBUG(0, ("talloc failed\n"));
|
|---|
| 207 | return 0;
|
|---|
| 208 | }
|
|---|
| 209 |
|
|---|
| 210 | memset(result, '\0', sizeof(result));
|
|---|
| 211 |
|
|---|
| 212 | id_generator += 1;
|
|---|
| 213 | if (id_generator == 0) {
|
|---|
| 214 | /* Wow, that's a long-running smbd... */
|
|---|
| 215 | id_generator += 1;
|
|---|
| 216 | }
|
|---|
| 217 |
|
|---|
| 218 | result->id = id_generator;
|
|---|
| 219 |
|
|---|
| 220 | result->state = ONEFS_WAITING_FOR_OPLOCK;
|
|---|
| 221 | result->data.mid = mid;
|
|---|
| 222 | DLIST_ADD(callback_recs, result);
|
|---|
| 223 |
|
|---|
| 224 | DEBUG(10, ("New cb rec %llu created\n", result->id));
|
|---|
| 225 |
|
|---|
| 226 | return result->id;
|
|---|
| 227 | }
|
|---|
| 228 |
|
|---|
| 229 | /**
|
|---|
| 230 | * Transition the callback record state to OPEN_FILE.
|
|---|
| 231 | *
|
|---|
| 232 | * This is called after the file is opened and an fsp struct has been
|
|---|
| 233 | * allocated. The mid is dropped in favor of storing the fsp.
|
|---|
| 234 | */
|
|---|
| 235 | void onefs_set_oplock_callback(uint64_t id, files_struct *fsp)
|
|---|
| 236 | {
|
|---|
| 237 | struct onefs_callback_record *cb;
|
|---|
| 238 | char *msg;
|
|---|
| 239 |
|
|---|
| 240 | DEBUG(10, ("onefs_set_oplock_callback called for cb rec %llu\n", id));
|
|---|
| 241 |
|
|---|
| 242 | if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
|
|---|
| 243 | if (asprintf(&msg, "Got invalid callback %lld\n", id) != -1) {
|
|---|
| 244 | smb_panic(msg);
|
|---|
| 245 | }
|
|---|
| 246 | smb_panic("Got invalid callback id\n");
|
|---|
| 247 | }
|
|---|
| 248 |
|
|---|
| 249 | /*
|
|---|
| 250 | * Paranoia check
|
|---|
| 251 | */
|
|---|
| 252 | if (open_was_deferred(cb->data.mid)) {
|
|---|
| 253 | if (asprintf(&msg, "Trying to upgrade callback for deferred "
|
|---|
| 254 | "open mid=%llu\n", (unsigned long long)cb->data.mid) != -1) {
|
|---|
| 255 | smb_panic(msg);
|
|---|
| 256 | }
|
|---|
| 257 | smb_panic("Trying to upgrade callback for deferred open "
|
|---|
| 258 | "mid\n");
|
|---|
| 259 | }
|
|---|
| 260 |
|
|---|
| 261 | cb->state = ONEFS_OPEN_FILE;
|
|---|
| 262 | cb->data.fsp = fsp;
|
|---|
| 263 | }
|
|---|
| 264 |
|
|---|
| 265 | /**
|
|---|
| 266 | * Using a callback record, initialize a share mode entry to pass to
|
|---|
| 267 | * share_mode_entry_to_message to send samba IPC messages.
|
|---|
| 268 | */
|
|---|
| 269 | static void init_share_mode_entry(struct share_mode_entry *sme,
|
|---|
| 270 | struct onefs_callback_record *cb,
|
|---|
| 271 | int op_type)
|
|---|
| 272 | {
|
|---|
| 273 | ZERO_STRUCT(*sme);
|
|---|
| 274 |
|
|---|
| 275 | sme->pid = procid_self();
|
|---|
| 276 | sme->op_type = op_type;
|
|---|
| 277 | sme->id = cb->data.fsp->file_id;
|
|---|
| 278 | sme->share_file_id = cb->data.fsp->fh->gen_id;
|
|---|
| 279 | }
|
|---|
| 280 |
|
|---|
| 281 | /**
|
|---|
| 282 | * Callback when a break-to-none event is received from the kernel.
|
|---|
| 283 | *
|
|---|
| 284 | * On OneFS level 1 oplocks are always broken to level 2 first, therefore an
|
|---|
| 285 | * async level 2 break message is always sent when breaking to none. The
|
|---|
| 286 | * downside of this is that OneFS currently has no way to express breaking
|
|---|
| 287 | * directly from level 1 to none.
|
|---|
| 288 | */
|
|---|
| 289 | static void oplock_break_to_none_handler(uint64_t id)
|
|---|
| 290 | {
|
|---|
| 291 | struct onefs_callback_record *cb;
|
|---|
| 292 | struct share_mode_entry sme;
|
|---|
| 293 | char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
|
|---|
| 294 |
|
|---|
| 295 | DEBUG(10, ("oplock_break_to_none_handler called for id %llu\n", id));
|
|---|
| 296 |
|
|---|
| 297 | if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
|
|---|
| 298 | DEBUG(3, ("oplock_break_to_none_handler: could not find "
|
|---|
| 299 | "callback id %llu\n", id));
|
|---|
| 300 | return;
|
|---|
| 301 | }
|
|---|
| 302 |
|
|---|
| 303 | DEBUG(10, ("oplock_break_to_none_handler called for file %s\n",
|
|---|
| 304 | fsp_str_dbg(cb->data.fsp)));
|
|---|
| 305 |
|
|---|
| 306 | init_share_mode_entry(&sme, cb, FORCE_OPLOCK_BREAK_TO_NONE);
|
|---|
| 307 | share_mode_entry_to_message(msg, &sme);
|
|---|
| 308 | messaging_send_buf(smbd_messaging_context(),
|
|---|
| 309 | sme.pid,
|
|---|
| 310 | MSG_SMB_ASYNC_LEVEL2_BREAK,
|
|---|
| 311 | (uint8_t *)msg,
|
|---|
| 312 | MSG_SMB_SHARE_MODE_ENTRY_SIZE);
|
|---|
| 313 |
|
|---|
| 314 | /*
|
|---|
| 315 | * We could still receive an OPLOCK_REVOKED message, so keep the
|
|---|
| 316 | * oplock_callback_id around.
|
|---|
| 317 | */
|
|---|
| 318 | }
|
|---|
| 319 |
|
|---|
| 320 | /**
|
|---|
| 321 | * Callback when a break-to-level2 event is received from the kernel.
|
|---|
| 322 | *
|
|---|
| 323 | * Breaks from level 1 to level 2.
|
|---|
| 324 | */
|
|---|
| 325 | static void oplock_break_to_level_two_handler(uint64_t id)
|
|---|
| 326 | {
|
|---|
| 327 | struct onefs_callback_record *cb;
|
|---|
| 328 | struct share_mode_entry sme;
|
|---|
| 329 | char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
|
|---|
| 330 |
|
|---|
| 331 | DEBUG(10, ("oplock_break_to_level_two_handler called for id %llu\n",
|
|---|
| 332 | id));
|
|---|
| 333 |
|
|---|
| 334 | if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
|
|---|
| 335 | DEBUG(3, ("oplock_break_to_level_two_handler: could not find "
|
|---|
| 336 | "callback id %llu\n", id));
|
|---|
| 337 | return;
|
|---|
| 338 | }
|
|---|
| 339 |
|
|---|
| 340 | DEBUG(10, ("oplock_break_to_level_two_handler called for file %s\n",
|
|---|
| 341 | fsp_str_dbg(cb->data.fsp)));
|
|---|
| 342 |
|
|---|
| 343 | init_share_mode_entry(&sme, cb, LEVEL_II_OPLOCK);
|
|---|
| 344 | share_mode_entry_to_message(msg, &sme);
|
|---|
| 345 | messaging_send_buf(smbd_messaging_context(),
|
|---|
| 346 | sme.pid,
|
|---|
| 347 | MSG_SMB_BREAK_REQUEST,
|
|---|
| 348 | (uint8_t *)msg,
|
|---|
| 349 | MSG_SMB_SHARE_MODE_ENTRY_SIZE);
|
|---|
| 350 |
|
|---|
| 351 | /*
|
|---|
| 352 | * We could still receive an OPLOCK_REVOKED or OPLOCK_BREAK_TO_NONE
|
|---|
| 353 | * message, so keep the oplock_callback_id around.
|
|---|
| 354 | */
|
|---|
| 355 | }
|
|---|
| 356 |
|
|---|
| 357 | /**
|
|---|
| 358 | * Revoke an oplock from an unresponsive client.
|
|---|
| 359 | *
|
|---|
| 360 | * The kernel will send this message when it times out waiting for a level 1
|
|---|
| 361 | * oplock break to be acknowledged by the client. The oplock is then
|
|---|
| 362 | * immediately removed.
|
|---|
| 363 | */
|
|---|
| 364 | static void oplock_revoked_handler(uint64_t id)
|
|---|
| 365 | {
|
|---|
| 366 | struct onefs_callback_record *cb;
|
|---|
| 367 | files_struct *fsp = NULL;
|
|---|
| 368 |
|
|---|
| 369 | DEBUG(10, ("oplock_revoked_handler called for id %llu\n", id));
|
|---|
| 370 |
|
|---|
| 371 | if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
|
|---|
| 372 | DEBUG(3, ("oplock_revoked_handler: could not find "
|
|---|
| 373 | "callback id %llu\n", id));
|
|---|
| 374 | return;
|
|---|
| 375 | }
|
|---|
| 376 |
|
|---|
| 377 | fsp = cb->data.fsp;
|
|---|
| 378 |
|
|---|
| 379 | SMB_ASSERT(fsp->oplock_timeout == NULL);
|
|---|
| 380 |
|
|---|
| 381 | DEBUG(0,("Level 1 oplock break failed for file %s. Forcefully "
|
|---|
| 382 | "revoking oplock\n", fsp_str_dbg(fsp)));
|
|---|
| 383 |
|
|---|
| 384 | remove_oplock(fsp);
|
|---|
| 385 |
|
|---|
| 386 | /*
|
|---|
| 387 | * cb record is cleaned up in fsp ext data destructor on close, so
|
|---|
| 388 | * leave it in the list.
|
|---|
| 389 | */
|
|---|
| 390 | }
|
|---|
| 391 |
|
|---|
| 392 | /**
|
|---|
| 393 | * Asynchronous ifs_createfile callback
|
|---|
| 394 | *
|
|---|
| 395 | * If ifs_createfile had to asynchronously break any oplocks, this function is
|
|---|
| 396 | * called when the kernel sends an event that the open can be retried.
|
|---|
| 397 | */
|
|---|
| 398 | static void semlock_available_handler(uint64_t id)
|
|---|
| 399 | {
|
|---|
| 400 | struct onefs_callback_record *cb;
|
|---|
| 401 |
|
|---|
| 402 | DEBUG(10, ("semlock_available_handler called: %llu\n", id));
|
|---|
| 403 |
|
|---|
| 404 | if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
|
|---|
| 405 | DEBUG(5, ("semlock_available_handler: Did not find callback "
|
|---|
| 406 | "%llu\n", id));
|
|---|
| 407 | return;
|
|---|
| 408 | }
|
|---|
| 409 |
|
|---|
| 410 | DEBUG(10, ("Got semlock available for mid %llu\n",
|
|---|
| 411 | (unsigned long long)cb->data.mid));
|
|---|
| 412 |
|
|---|
| 413 | /* Paranoia check */
|
|---|
| 414 | if (!(open_was_deferred(cb->data.mid))) {
|
|---|
| 415 | char *msg;
|
|---|
| 416 | if (asprintf(&msg, "Semlock available on an open that wasn't "
|
|---|
| 417 | "deferred: %s\n",
|
|---|
| 418 | onefs_cb_record_str_dbg(cb)) != -1) {
|
|---|
| 419 | smb_panic(msg);
|
|---|
| 420 | }
|
|---|
| 421 | smb_panic("Semlock available on an open that wasn't "
|
|---|
| 422 | "deferred\n");
|
|---|
| 423 | }
|
|---|
| 424 |
|
|---|
| 425 | schedule_deferred_open_smb_message(cb->data.mid);
|
|---|
| 426 |
|
|---|
| 427 | /* Cleanup the callback record since the open will be retried. */
|
|---|
| 428 | destroy_onefs_callback_record(id);
|
|---|
| 429 |
|
|---|
| 430 | return;
|
|---|
| 431 | }
|
|---|
| 432 |
|
|---|
| 433 | /**
|
|---|
| 434 | * Asynchronous ifs_createfile failure callback
|
|---|
| 435 | *
|
|---|
| 436 | * If ifs_createfile had to asynchronously break any oplocks, but an error was
|
|---|
| 437 | * encountered in the kernel, the open will be retried with the state->failed
|
|---|
| 438 | * set to true. This will prompt the open path to send an INTERNAL_ERROR
|
|---|
| 439 | * error message to the client.
|
|---|
| 440 | */
|
|---|
| 441 | static void semlock_async_failure_handler(uint64_t id)
|
|---|
| 442 | {
|
|---|
| 443 | struct onefs_callback_record *cb;
|
|---|
| 444 | struct deferred_open_record *state;
|
|---|
| 445 |
|
|---|
| 446 | DEBUG(1, ("semlock_async_failure_handler called: %llu\n", id));
|
|---|
| 447 |
|
|---|
| 448 | if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
|
|---|
| 449 | DEBUG(5, ("semlock_async_failure_handler: Did not find callback "
|
|---|
| 450 | "%llu\n", id));
|
|---|
| 451 | return;
|
|---|
| 452 | }
|
|---|
| 453 |
|
|---|
| 454 | DEBUG(1, ("Got semlock_async_failure message for mid %llu\n",
|
|---|
| 455 | (unsigned long long)cb->data.mid));
|
|---|
| 456 |
|
|---|
| 457 | /* Paranoia check */
|
|---|
| 458 | if (!(open_was_deferred(cb->data.mid))) {
|
|---|
| 459 | char *msg;
|
|---|
| 460 | if (asprintf(&msg, "Semlock failure on an open that wasn't "
|
|---|
| 461 | "deferred: %s\n",
|
|---|
| 462 | onefs_cb_record_str_dbg(cb)) != -1) {
|
|---|
| 463 | smb_panic(msg);
|
|---|
| 464 | }
|
|---|
| 465 | smb_panic("Semlock failure on an open that wasn't deferred\n");
|
|---|
| 466 | }
|
|---|
| 467 |
|
|---|
| 468 | /* Find the actual deferred open record. */
|
|---|
| 469 | if (!get_open_deferred_message_state(cb->data.mid, NULL, &state)) {
|
|---|
| 470 | DEBUG(0, ("Could not find deferred request for "
|
|---|
| 471 | "mid %d\n", cb->data.mid));
|
|---|
| 472 | destroy_onefs_callback_record(id);
|
|---|
| 473 | return;
|
|---|
| 474 | }
|
|---|
| 475 |
|
|---|
| 476 | /* Update to failed so the client can be notified on retried open. */
|
|---|
| 477 | state->failed = true;
|
|---|
| 478 |
|
|---|
| 479 | /* Schedule deferred open for immediate retry. */
|
|---|
| 480 | schedule_deferred_open_smb_message(cb->data.mid);
|
|---|
| 481 |
|
|---|
| 482 | /* Cleanup the callback record here since the open will be retried. */
|
|---|
| 483 | destroy_onefs_callback_record(id);
|
|---|
| 484 |
|
|---|
| 485 | return;
|
|---|
| 486 | }
|
|---|
| 487 |
|
|---|
| 488 | /**
|
|---|
| 489 | * OneFS acquires all oplocks via ifs_createfile, so this is a no-op.
|
|---|
| 490 | */
|
|---|
| 491 | static bool onefs_set_kernel_oplock(struct kernel_oplocks *_ctx,
|
|---|
| 492 | files_struct *fsp, int oplock_type) {
|
|---|
| 493 | return true;
|
|---|
| 494 | }
|
|---|
| 495 |
|
|---|
| 496 | /**
|
|---|
| 497 | * Release the kernel oplock.
|
|---|
| 498 | */
|
|---|
| 499 | static void onefs_release_kernel_oplock(struct kernel_oplocks *_ctx,
|
|---|
| 500 | files_struct *fsp, int oplock_type)
|
|---|
| 501 | {
|
|---|
| 502 | enum oplock_type oplock = onefs_samba_oplock_to_oplock(oplock_type);
|
|---|
| 503 |
|
|---|
| 504 | DEBUG(10, ("onefs_release_kernel_oplock: Releasing %s to type %s\n",
|
|---|
| 505 | fsp_str_dbg(fsp), onefs_oplock_str(oplock)));
|
|---|
| 506 |
|
|---|
| 507 | if (fsp->fh->fd == -1) {
|
|---|
| 508 | DEBUG(1, ("no fd\n"));
|
|---|
| 509 | return;
|
|---|
| 510 | }
|
|---|
| 511 |
|
|---|
| 512 | /* Downgrade oplock to either SHARED or NONE. */
|
|---|
| 513 | if (ifs_oplock_downgrade(fsp->fh->fd, oplock)) {
|
|---|
| 514 | DEBUG(1,("ifs_oplock_downgrade failed: %s\n",
|
|---|
| 515 | strerror(errno)));
|
|---|
| 516 | }
|
|---|
| 517 | }
|
|---|
| 518 |
|
|---|
| 519 | /**
|
|---|
| 520 | * Wrap ifs_semlock_write so it is only called on operations that aren't
|
|---|
| 521 | * already contended in the kernel.
|
|---|
| 522 | */
|
|---|
| 523 | static void onefs_semlock_write(int fd, enum level2_contention_type type,
|
|---|
| 524 | enum semlock_operation semlock_op)
|
|---|
| 525 | {
|
|---|
| 526 | int ret;
|
|---|
| 527 |
|
|---|
| 528 | switch (type) {
|
|---|
| 529 | case LEVEL2_CONTEND_ALLOC_GROW:
|
|---|
| 530 | case LEVEL2_CONTEND_POSIX_BRL:
|
|---|
| 531 | DEBUG(10, ("Taking %d write semlock for cmd %d on fd: %d\n",
|
|---|
| 532 | semlock_op, type, fd));
|
|---|
| 533 | ret = ifs_semlock_write(fd, semlock_op);
|
|---|
| 534 | if (ret) {
|
|---|
| 535 | DEBUG(0,("ifs_semlock_write failed taking %d write "
|
|---|
| 536 | "semlock for cmd %d on fd: %d: %s",
|
|---|
| 537 | semlock_op, type, fd, strerror(errno)));
|
|---|
| 538 | }
|
|---|
| 539 | break;
|
|---|
| 540 | default:
|
|---|
| 541 | DEBUG(10, ("Skipping write semlock for cmd %d on fd: %d\n",
|
|---|
| 542 | type, fd));
|
|---|
| 543 | }
|
|---|
| 544 | }
|
|---|
| 545 |
|
|---|
| 546 | /**
|
|---|
| 547 | * Contend level 2 oplocks in the kernel and smbd.
|
|---|
| 548 | *
|
|---|
| 549 | * Taking a write semlock will contend all level 2 oplocks in all smbds across
|
|---|
| 550 | * the cluster except the fsp's own level 2 oplock. This lack of
|
|---|
| 551 | * self-contention is a limitation of the current OneFS kernel oplocks
|
|---|
| 552 | * implementation. Luckily it is easy to contend our own level 2 oplock by
|
|---|
| 553 | * checking the the fsp's oplock_type. If it's a level2, send a break message
|
|---|
| 554 | * to the client and remove the oplock.
|
|---|
| 555 | */
|
|---|
| 556 | static void onefs_contend_level2_oplocks_begin(files_struct *fsp,
|
|---|
| 557 | enum level2_contention_type type)
|
|---|
| 558 | {
|
|---|
| 559 | /* Take care of level 2 kernel contention. */
|
|---|
| 560 | onefs_semlock_write(fsp->fh->fd, type, SEMLOCK_LOCK);
|
|---|
| 561 |
|
|---|
| 562 | /* Take care of level 2 self contention. */
|
|---|
| 563 | if (LEVEL_II_OPLOCK_TYPE(fsp->oplock_type))
|
|---|
| 564 | break_level2_to_none_async(fsp);
|
|---|
| 565 | }
|
|---|
| 566 |
|
|---|
| 567 | /**
|
|---|
| 568 | * Unlock the write semlock when the level 2 contending operation ends.
|
|---|
| 569 | */
|
|---|
| 570 | static void onefs_contend_level2_oplocks_end(files_struct *fsp,
|
|---|
| 571 | enum level2_contention_type type)
|
|---|
| 572 | {
|
|---|
| 573 | /* Take care of level 2 kernel contention. */
|
|---|
| 574 | onefs_semlock_write(fsp->fh->fd, type, SEMLOCK_UNLOCK);
|
|---|
| 575 | }
|
|---|
| 576 |
|
|---|
| 577 | /**
|
|---|
| 578 | * Return string value of onefs oplock types.
|
|---|
| 579 | */
|
|---|
| 580 | const char *onefs_oplock_str(enum oplock_type onefs_oplock_type)
|
|---|
| 581 | {
|
|---|
| 582 | switch (onefs_oplock_type) {
|
|---|
| 583 | case OPLOCK_NONE:
|
|---|
| 584 | return "OPLOCK_NONE";
|
|---|
| 585 | case OPLOCK_EXCLUSIVE:
|
|---|
| 586 | return "OPLOCK_EXCLUSIVE";
|
|---|
| 587 | case OPLOCK_BATCH:
|
|---|
| 588 | return "OPLOCK_BATCH";
|
|---|
| 589 | case OPLOCK_SHARED:
|
|---|
| 590 | return "OPLOCK_SHARED";
|
|---|
| 591 | default:
|
|---|
| 592 | break;
|
|---|
| 593 | }
|
|---|
| 594 | return "UNKNOWN";
|
|---|
| 595 | }
|
|---|
| 596 |
|
|---|
| 597 | /**
|
|---|
| 598 | * Convert from onefs to samba oplock.
|
|---|
| 599 | */
|
|---|
| 600 | int onefs_oplock_to_samba_oplock(enum oplock_type onefs_oplock)
|
|---|
| 601 | {
|
|---|
| 602 | switch (onefs_oplock) {
|
|---|
| 603 | case OPLOCK_NONE:
|
|---|
| 604 | return NO_OPLOCK;
|
|---|
| 605 | case OPLOCK_EXCLUSIVE:
|
|---|
| 606 | return EXCLUSIVE_OPLOCK;
|
|---|
| 607 | case OPLOCK_BATCH:
|
|---|
| 608 | return BATCH_OPLOCK;
|
|---|
| 609 | case OPLOCK_SHARED:
|
|---|
| 610 | return LEVEL_II_OPLOCK;
|
|---|
| 611 | default:
|
|---|
| 612 | DEBUG(0, ("unknown oplock type %d found\n", onefs_oplock));
|
|---|
| 613 | break;
|
|---|
| 614 | }
|
|---|
| 615 | return NO_OPLOCK;
|
|---|
| 616 | }
|
|---|
| 617 |
|
|---|
| 618 | /**
|
|---|
| 619 | * Convert from samba to onefs oplock.
|
|---|
| 620 | */
|
|---|
| 621 | enum oplock_type onefs_samba_oplock_to_oplock(int samba_oplock_type)
|
|---|
| 622 | {
|
|---|
| 623 | if (BATCH_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_BATCH;
|
|---|
| 624 | if (EXCLUSIVE_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_EXCLUSIVE;
|
|---|
| 625 | if (LEVEL_II_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_SHARED;
|
|---|
| 626 | return OPLOCK_NONE;
|
|---|
| 627 | }
|
|---|
| 628 |
|
|---|
| 629 | /**
|
|---|
| 630 | * Oplock event handler.
|
|---|
| 631 | *
|
|---|
| 632 | * Call into the event system dispatcher to handle each event.
|
|---|
| 633 | */
|
|---|
| 634 | static void onefs_oplocks_read_fde_handler(struct event_context *ev,
|
|---|
| 635 | struct fd_event *fde,
|
|---|
| 636 | uint16_t flags,
|
|---|
| 637 | void *private_data)
|
|---|
| 638 | {
|
|---|
| 639 | struct onefs_oplocks_context *ctx =
|
|---|
| 640 | talloc_get_type(private_data, struct onefs_oplocks_context);
|
|---|
| 641 |
|
|---|
| 642 | if (oplocks_event_dispatcher(ctx->onefs_ops)) {
|
|---|
| 643 | DEBUG(0, ("oplocks_event_dispatcher failed: %s\n",
|
|---|
| 644 | strerror(errno)));
|
|---|
| 645 | }
|
|---|
| 646 | }
|
|---|
| 647 |
|
|---|
| 648 | /**
|
|---|
| 649 | * Setup kernel oplocks
|
|---|
| 650 | */
|
|---|
| 651 | static const struct kernel_oplocks_ops onefs_koplocks_ops = {
|
|---|
| 652 | .set_oplock = onefs_set_kernel_oplock,
|
|---|
| 653 | .release_oplock = onefs_release_kernel_oplock,
|
|---|
| 654 | .contend_level2_oplocks_begin = onefs_contend_level2_oplocks_begin,
|
|---|
| 655 | .contend_level2_oplocks_end = onefs_contend_level2_oplocks_end,
|
|---|
| 656 | };
|
|---|
| 657 |
|
|---|
| 658 | static const struct oplocks_event_ops onefs_dispatch_ops = {
|
|---|
| 659 | .oplock_break_to_none = oplock_break_to_none_handler,
|
|---|
| 660 | .oplock_break_to_level_two = oplock_break_to_level_two_handler,
|
|---|
| 661 | .oplock_revoked = oplock_revoked_handler,
|
|---|
| 662 | .semlock_available = semlock_available_handler,
|
|---|
| 663 | .semlock_async_failure = semlock_async_failure_handler,
|
|---|
| 664 | };
|
|---|
| 665 |
|
|---|
| 666 | struct kernel_oplocks *onefs_init_kernel_oplocks(TALLOC_CTX *mem_ctx)
|
|---|
| 667 | {
|
|---|
| 668 | struct kernel_oplocks *_ctx = NULL;
|
|---|
| 669 | struct onefs_oplocks_context *ctx = NULL;
|
|---|
| 670 | struct procoptions po = PROCOPTIONS_INIT;
|
|---|
| 671 |
|
|---|
| 672 | DEBUG(10, ("onefs_init_kernel_oplocks called\n"));
|
|---|
| 673 |
|
|---|
| 674 | /* Set the non-blocking proc flag */
|
|---|
| 675 | po.po_flags_on |= P_NON_BLOCKING_SEMLOCK;
|
|---|
| 676 | if (setprocoptions(&po) != 0) {
|
|---|
| 677 | DEBUG(0, ("setprocoptions failed: %s.\n", strerror(errno)));
|
|---|
| 678 | return NULL;
|
|---|
| 679 | }
|
|---|
| 680 |
|
|---|
| 681 | /* Setup the oplock contexts */
|
|---|
| 682 | _ctx = talloc_zero(mem_ctx, struct kernel_oplocks);
|
|---|
| 683 | if (!_ctx) {
|
|---|
| 684 | return NULL;
|
|---|
| 685 | }
|
|---|
| 686 |
|
|---|
| 687 | ctx = talloc_zero(_ctx, struct onefs_oplocks_context);
|
|---|
| 688 | if (!ctx) {
|
|---|
| 689 | goto err_out;
|
|---|
| 690 | }
|
|---|
| 691 |
|
|---|
| 692 | _ctx->ops = &onefs_koplocks_ops;
|
|---|
| 693 | _ctx->flags = (KOPLOCKS_LEVEL2_SUPPORTED |
|
|---|
| 694 | KOPLOCKS_DEFERRED_OPEN_NOTIFICATION |
|
|---|
| 695 | KOPLOCKS_TIMEOUT_NOTIFICATION |
|
|---|
| 696 | KOPLOCKS_OPLOCK_BROKEN_NOTIFICATION);
|
|---|
| 697 | _ctx->private_data = ctx;
|
|---|
| 698 | ctx->ctx = _ctx;
|
|---|
| 699 | ctx->onefs_ops = &onefs_dispatch_ops;
|
|---|
| 700 |
|
|---|
| 701 | /* Register an kernel event channel for oplocks */
|
|---|
| 702 | ctx->onefs_event_fd = oplocks_event_register();
|
|---|
| 703 | if (ctx->onefs_event_fd == -1) {
|
|---|
| 704 | DEBUG(0, ("oplocks_event_register failed: %s\n",
|
|---|
| 705 | strerror(errno)));
|
|---|
| 706 | goto err_out;
|
|---|
| 707 | }
|
|---|
| 708 |
|
|---|
| 709 | DEBUG(10, ("oplock event_fd = %d\n", ctx->onefs_event_fd));
|
|---|
| 710 |
|
|---|
| 711 | /* Register the oplock event_fd with samba's event system */
|
|---|
| 712 | ctx->read_fde = event_add_fd(smbd_event_context(),
|
|---|
| 713 | ctx,
|
|---|
| 714 | ctx->onefs_event_fd,
|
|---|
| 715 | EVENT_FD_READ,
|
|---|
| 716 | onefs_oplocks_read_fde_handler,
|
|---|
| 717 | ctx);
|
|---|
| 718 | return _ctx;
|
|---|
| 719 |
|
|---|
| 720 | err_out:
|
|---|
| 721 | talloc_free(_ctx);
|
|---|
| 722 | return NULL;
|
|---|
| 723 | }
|
|---|
| 724 |
|
|---|
| 725 | #else
|
|---|
| 726 | void oplock_onefs_dummy(void);
|
|---|
| 727 | void oplock_onefs_dummy(void) {}
|
|---|
| 728 | #endif /* HAVE_ONEFS */
|
|---|