1 | /*
|
---|
2 | * Unix SMB/CIFS implementation.
|
---|
3 | * Support for OneFS kernel oplocks
|
---|
4 | *
|
---|
5 | * Copyright (C) Volker Lendecke 2007
|
---|
6 | * Copyright (C) Tim Prouty, 2009
|
---|
7 | *
|
---|
8 | * This program is free software; you can redistribute it and/or modify
|
---|
9 | * it under the terms of the GNU General Public License as published by
|
---|
10 | * the Free Software Foundation; either version 3 of the License, or
|
---|
11 | * (at your option) any later version.
|
---|
12 | *
|
---|
13 | * This program is distributed in the hope that it will be useful,
|
---|
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
16 | * GNU General Public License for more details.
|
---|
17 | *
|
---|
18 | * You should have received a copy of the GNU General Public License
|
---|
19 | * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
---|
20 | */
|
---|
21 |
|
---|
22 | #define DBGC_CLASS DBGC_LOCKING
|
---|
23 |
|
---|
24 | #include "includes.h"
|
---|
25 |
|
---|
26 | #if HAVE_ONEFS
|
---|
27 | #include "oplock_onefs.h"
|
---|
28 | #include "smbd/globals.h"
|
---|
29 |
|
---|
30 | #include <ifs/ifs_syscalls.h>
|
---|
31 | #include <isi_ecs/isi_ecs_oplocks.h>
|
---|
32 | #include <sys/proc.h>
|
---|
33 |
|
---|
34 | struct onefs_oplocks_context {
|
---|
35 | struct kernel_oplocks *ctx;
|
---|
36 | const struct oplocks_event_ops *onefs_ops;
|
---|
37 | int onefs_event_fd;
|
---|
38 | struct fd_event *read_fde;
|
---|
39 | };
|
---|
40 |
|
---|
41 | enum onefs_callback_state {
|
---|
42 | ONEFS_OPEN_FILE,
|
---|
43 | ONEFS_WAITING_FOR_OPLOCK
|
---|
44 | };
|
---|
45 |
|
---|
46 | struct onefs_callback_record {
|
---|
47 | struct onefs_callback_record *prev, *next;
|
---|
48 | uint64_t id;
|
---|
49 | enum onefs_callback_state state;
|
---|
50 | union {
|
---|
51 | files_struct *fsp; /* ONEFS_OPEN_FILE */
|
---|
52 | uint16_t mid; /* ONEFS_WAITING_FOR_OPLOCK */
|
---|
53 | } data;
|
---|
54 | };
|
---|
55 |
|
---|
56 | /**
|
---|
57 | * Internal list of files (along with additional state) that have outstanding
|
---|
58 | * oplocks or requests for oplocks.
|
---|
59 | */
|
---|
60 | struct onefs_callback_record *callback_recs;
|
---|
61 |
|
---|
62 | /**
|
---|
63 | * Convert a onefs_callback_record to a debug string using the dbg_ctx().
|
---|
64 | */
|
---|
65 | const char *onefs_cb_record_str_dbg(const struct onefs_callback_record *r)
|
---|
66 | {
|
---|
67 | char *result;
|
---|
68 |
|
---|
69 | if (r == NULL) {
|
---|
70 | result = talloc_strdup(talloc_tos(), "NULL callback record");
|
---|
71 | return result;
|
---|
72 | }
|
---|
73 |
|
---|
74 | switch (r->state) {
|
---|
75 | case ONEFS_OPEN_FILE:
|
---|
76 | result = talloc_asprintf(talloc_tos(), "cb record %llu for "
|
---|
77 | "file %s", r->id,
|
---|
78 | fsp_str_dbg(r->data.fsp));
|
---|
79 | case ONEFS_WAITING_FOR_OPLOCK:
|
---|
80 | result = talloc_asprintf(talloc_tos(), "cb record %llu for "
|
---|
81 | "pending mid %d", r->id,
|
---|
82 | (int)r->data.mid);
|
---|
83 | break;
|
---|
84 | default:
|
---|
85 | result = talloc_asprintf(talloc_tos(), "cb record %llu unknown "
|
---|
86 | "state %d", r->id, r->state);
|
---|
87 | break;
|
---|
88 | }
|
---|
89 |
|
---|
90 | return result;
|
---|
91 | }
|
---|
92 |
|
---|
93 | /**
|
---|
94 | * Traverse the list of onefs_callback_records and print all entries.
|
---|
95 | */
|
---|
96 | static void debug_cb_records(const char *fn)
|
---|
97 | {
|
---|
98 | struct onefs_callback_record *rec;
|
---|
99 |
|
---|
100 | if (DEBUGLEVEL < 10)
|
---|
101 | return;
|
---|
102 |
|
---|
103 | DEBUG(10, ("cb records (%s):\n", fn));
|
---|
104 |
|
---|
105 | for (rec = callback_recs; rec; rec = rec->next) {
|
---|
106 | DEBUGADD(10, ("%s\n", onefs_cb_record_str_dbg(rec)));
|
---|
107 | }
|
---|
108 | }
|
---|
109 |
|
---|
110 | /**
|
---|
111 | * Find a callback record in the list of outstanding oplock operations.
|
---|
112 | *
|
---|
113 | * Once n ifs_createfile requests an oplock on a file, the kernel communicates
|
---|
114 | * with samba via the oplock event channel by sending events that reference an
|
---|
115 | * id. This function maps that id to the onefs_callback_record that was
|
---|
116 | * created for it during the initial setup on open (onefs_oplock_wait_record).
|
---|
117 | * When a matching id is found in the onefs_callback_record list, the
|
---|
118 | * callback_type is checked to make sure the record is in in the correct
|
---|
119 | * state.
|
---|
120 | */
|
---|
121 | static struct onefs_callback_record *onefs_find_cb(uint64_t id,
|
---|
122 | enum onefs_callback_state expected_state)
|
---|
123 | {
|
---|
124 | struct onefs_callback_record *rec;
|
---|
125 |
|
---|
126 | debug_cb_records("onefs_find_cb");
|
---|
127 |
|
---|
128 | for (rec = callback_recs; rec; rec = rec->next) {
|
---|
129 | if (rec->id == id) {
|
---|
130 | DEBUG(10, ("found %s\n",
|
---|
131 | onefs_cb_record_str_dbg(rec)));
|
---|
132 | break;
|
---|
133 | }
|
---|
134 | }
|
---|
135 |
|
---|
136 | if (rec == NULL) {
|
---|
137 | DEBUG(5, ("Could not find callback record for id %llu\n", id));
|
---|
138 | return NULL;
|
---|
139 | }
|
---|
140 |
|
---|
141 | if (rec->state != expected_state) {
|
---|
142 | DEBUG(0, ("Expected cb type %d, got %s", expected_state,
|
---|
143 | onefs_cb_record_str_dbg(rec)));
|
---|
144 | SMB_ASSERT(0);
|
---|
145 | return NULL;
|
---|
146 | }
|
---|
147 |
|
---|
148 | return rec;
|
---|
149 | }
|
---|
150 |
|
---|
151 | /**
|
---|
152 | * Remove and free a callback record from the callback record list.
|
---|
153 | */
|
---|
154 | void destroy_onefs_callback_record(uint64_t id)
|
---|
155 | {
|
---|
156 | struct onefs_callback_record *rec;
|
---|
157 |
|
---|
158 | debug_cb_records("destroy_onefs_callback_record");
|
---|
159 |
|
---|
160 | if (id == 0) {
|
---|
161 | DEBUG(10, ("destroy_onefs_callback_record: Nothing to "
|
---|
162 | "destroy\n"));
|
---|
163 | return;
|
---|
164 | }
|
---|
165 |
|
---|
166 | for (rec = callback_recs; rec; rec = rec->next) {
|
---|
167 | if (rec->id == id) {
|
---|
168 | DLIST_REMOVE(callback_recs, rec);
|
---|
169 | SAFE_FREE(rec);
|
---|
170 | DEBUG(10, ("removed cb rec %llu\n", id));
|
---|
171 | return;
|
---|
172 | }
|
---|
173 | }
|
---|
174 |
|
---|
175 | DEBUG(0, ("Could not find cb rec %llu to delete", id));
|
---|
176 | SMB_ASSERT(0);
|
---|
177 | }
|
---|
178 |
|
---|
179 | /**
|
---|
180 | * Initialize a callback record and add it to the list of outstanding callback
|
---|
181 | * records.
|
---|
182 | *
|
---|
183 | * This is called in the open path before ifs_createfile so an id can be
|
---|
184 | * passed in. Each callback record can be in one of two states:
|
---|
185 | *
|
---|
186 | * 1. WAITING_FOR_OPLOCK: This is the initial state for all callback
|
---|
187 | * records. If ifs_createfile can be completed syncronously without needing
|
---|
188 | * to break any level I oplocks, the state is transitioned to OPEN_FILE.
|
---|
189 | * Otherwise ifs_createfile will finish asynchronously and the open is
|
---|
190 | * deferred. When the necessary level I opocks have been broken, and the
|
---|
191 | * open can be done, an event is sent by the kernel on the oplock event
|
---|
192 | * channel, which is handled by semlock_available_handler. At this point
|
---|
193 | * the deferred open is retried. Unless a level I oplock was acquired by
|
---|
194 | * another client, ifs_createfile will now complete synchronously.
|
---|
195 | *
|
---|
196 | * 2. OPEN_FILE: Once ifs_createfile completes, the callback record is
|
---|
197 | * transitioned to this state via onefs_set_oplock_callback.
|
---|
198 | */
|
---|
199 | uint64_t onefs_oplock_wait_record(uint16_t mid)
|
---|
200 | {
|
---|
201 | struct onefs_callback_record *result;
|
---|
202 | static uint64_t id_generator = 0;
|
---|
203 |
|
---|
204 | if (!(result = SMB_MALLOC_P(struct onefs_callback_record))) {
|
---|
205 | DEBUG(0, ("talloc failed\n"));
|
---|
206 | return 0;
|
---|
207 | }
|
---|
208 |
|
---|
209 | memset(result, '\0', sizeof(result));
|
---|
210 |
|
---|
211 | id_generator += 1;
|
---|
212 | if (id_generator == 0) {
|
---|
213 | /* Wow, that's a long-running smbd... */
|
---|
214 | id_generator += 1;
|
---|
215 | }
|
---|
216 |
|
---|
217 | result->id = id_generator;
|
---|
218 |
|
---|
219 | result->state = ONEFS_WAITING_FOR_OPLOCK;
|
---|
220 | result->data.mid = mid;
|
---|
221 | DLIST_ADD(callback_recs, result);
|
---|
222 |
|
---|
223 | DEBUG(10, ("New cb rec %llu created\n", result->id));
|
---|
224 |
|
---|
225 | return result->id;
|
---|
226 | }
|
---|
227 |
|
---|
228 | /**
|
---|
229 | * Transition the callback record state to OPEN_FILE.
|
---|
230 | *
|
---|
231 | * This is called after the file is opened and an fsp struct has been
|
---|
232 | * allocated. The mid is dropped in favor of storing the fsp.
|
---|
233 | */
|
---|
234 | void onefs_set_oplock_callback(uint64_t id, files_struct *fsp)
|
---|
235 | {
|
---|
236 | struct onefs_callback_record *cb;
|
---|
237 | char *msg;
|
---|
238 |
|
---|
239 | DEBUG(10, ("onefs_set_oplock_callback called for cb rec %llu\n", id));
|
---|
240 |
|
---|
241 | if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
|
---|
242 | if (asprintf(&msg, "Got invalid callback %lld\n", id) != -1) {
|
---|
243 | smb_panic(msg);
|
---|
244 | }
|
---|
245 | smb_panic("Got invalid callback id\n");
|
---|
246 | }
|
---|
247 |
|
---|
248 | /*
|
---|
249 | * Paranoia check
|
---|
250 | */
|
---|
251 | if (open_was_deferred(cb->data.mid)) {
|
---|
252 | if (asprintf(&msg, "Trying to upgrade callback for deferred "
|
---|
253 | "open mid=%d\n", cb->data.mid) != -1) {
|
---|
254 | smb_panic(msg);
|
---|
255 | }
|
---|
256 | smb_panic("Trying to upgrade callback for deferred open "
|
---|
257 | "mid\n");
|
---|
258 | }
|
---|
259 |
|
---|
260 | cb->state = ONEFS_OPEN_FILE;
|
---|
261 | cb->data.fsp = fsp;
|
---|
262 | }
|
---|
263 |
|
---|
264 | /**
|
---|
265 | * Using a callback record, initialize a share mode entry to pass to
|
---|
266 | * share_mode_entry_to_message to send samba IPC messages.
|
---|
267 | */
|
---|
268 | static void init_share_mode_entry(struct share_mode_entry *sme,
|
---|
269 | struct onefs_callback_record *cb,
|
---|
270 | int op_type)
|
---|
271 | {
|
---|
272 | ZERO_STRUCT(*sme);
|
---|
273 |
|
---|
274 | sme->pid = procid_self();
|
---|
275 | sme->op_type = op_type;
|
---|
276 | sme->id = cb->data.fsp->file_id;
|
---|
277 | sme->share_file_id = cb->data.fsp->fh->gen_id;
|
---|
278 | }
|
---|
279 |
|
---|
280 | /**
|
---|
281 | * Callback when a break-to-none event is received from the kernel.
|
---|
282 | *
|
---|
283 | * On OneFS level 1 oplocks are always broken to level 2 first, therefore an
|
---|
284 | * async level 2 break message is always sent when breaking to none. The
|
---|
285 | * downside of this is that OneFS currently has no way to express breaking
|
---|
286 | * directly from level 1 to none.
|
---|
287 | */
|
---|
288 | static void oplock_break_to_none_handler(uint64_t id)
|
---|
289 | {
|
---|
290 | struct onefs_callback_record *cb;
|
---|
291 | struct share_mode_entry sme;
|
---|
292 | char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
|
---|
293 |
|
---|
294 | DEBUG(10, ("oplock_break_to_none_handler called for id %llu\n", id));
|
---|
295 |
|
---|
296 | if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
|
---|
297 | DEBUG(3, ("oplock_break_to_none_handler: could not find "
|
---|
298 | "callback id %llu\n", id));
|
---|
299 | return;
|
---|
300 | }
|
---|
301 |
|
---|
302 | DEBUG(10, ("oplock_break_to_none_handler called for file %s\n",
|
---|
303 | fsp_str_dbg(cb->data.fsp)));
|
---|
304 |
|
---|
305 | init_share_mode_entry(&sme, cb, FORCE_OPLOCK_BREAK_TO_NONE);
|
---|
306 | share_mode_entry_to_message(msg, &sme);
|
---|
307 | messaging_send_buf(smbd_messaging_context(),
|
---|
308 | sme.pid,
|
---|
309 | MSG_SMB_ASYNC_LEVEL2_BREAK,
|
---|
310 | (uint8_t *)msg,
|
---|
311 | MSG_SMB_SHARE_MODE_ENTRY_SIZE);
|
---|
312 |
|
---|
313 | /*
|
---|
314 | * We could still receive an OPLOCK_REVOKED message, so keep the
|
---|
315 | * oplock_callback_id around.
|
---|
316 | */
|
---|
317 | }
|
---|
318 |
|
---|
319 | /**
|
---|
320 | * Callback when a break-to-level2 event is received from the kernel.
|
---|
321 | *
|
---|
322 | * Breaks from level 1 to level 2.
|
---|
323 | */
|
---|
324 | static void oplock_break_to_level_two_handler(uint64_t id)
|
---|
325 | {
|
---|
326 | struct onefs_callback_record *cb;
|
---|
327 | struct share_mode_entry sme;
|
---|
328 | char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
|
---|
329 |
|
---|
330 | DEBUG(10, ("oplock_break_to_level_two_handler called for id %llu\n",
|
---|
331 | id));
|
---|
332 |
|
---|
333 | if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
|
---|
334 | DEBUG(3, ("oplock_break_to_level_two_handler: could not find "
|
---|
335 | "callback id %llu\n", id));
|
---|
336 | return;
|
---|
337 | }
|
---|
338 |
|
---|
339 | DEBUG(10, ("oplock_break_to_level_two_handler called for file %s\n",
|
---|
340 | fsp_str_dbg(cb->data.fsp)));
|
---|
341 |
|
---|
342 | init_share_mode_entry(&sme, cb, LEVEL_II_OPLOCK);
|
---|
343 | share_mode_entry_to_message(msg, &sme);
|
---|
344 | messaging_send_buf(smbd_messaging_context(),
|
---|
345 | sme.pid,
|
---|
346 | MSG_SMB_BREAK_REQUEST,
|
---|
347 | (uint8_t *)msg,
|
---|
348 | MSG_SMB_SHARE_MODE_ENTRY_SIZE);
|
---|
349 |
|
---|
350 | /*
|
---|
351 | * We could still receive an OPLOCK_REVOKED or OPLOCK_BREAK_TO_NONE
|
---|
352 | * message, so keep the oplock_callback_id around.
|
---|
353 | */
|
---|
354 | }
|
---|
355 |
|
---|
356 | /**
|
---|
357 | * Revoke an oplock from an unresponsive client.
|
---|
358 | *
|
---|
359 | * The kernel will send this message when it times out waiting for a level 1
|
---|
360 | * oplock break to be acknowledged by the client. The oplock is then
|
---|
361 | * immediately removed.
|
---|
362 | */
|
---|
363 | static void oplock_revoked_handler(uint64_t id)
|
---|
364 | {
|
---|
365 | struct onefs_callback_record *cb;
|
---|
366 | files_struct *fsp = NULL;
|
---|
367 |
|
---|
368 | DEBUG(10, ("oplock_revoked_handler called for id %llu\n", id));
|
---|
369 |
|
---|
370 | if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
|
---|
371 | DEBUG(3, ("oplock_revoked_handler: could not find "
|
---|
372 | "callback id %llu\n", id));
|
---|
373 | return;
|
---|
374 | }
|
---|
375 |
|
---|
376 | fsp = cb->data.fsp;
|
---|
377 |
|
---|
378 | SMB_ASSERT(fsp->oplock_timeout == NULL);
|
---|
379 |
|
---|
380 | DEBUG(0,("Level 1 oplock break failed for file %s. Forcefully "
|
---|
381 | "revoking oplock\n", fsp_str_dbg(fsp)));
|
---|
382 |
|
---|
383 | global_client_failed_oplock_break = True;
|
---|
384 | remove_oplock(fsp);
|
---|
385 |
|
---|
386 | /*
|
---|
387 | * cb record is cleaned up in fsp ext data destructor on close, so
|
---|
388 | * leave it in the list.
|
---|
389 | */
|
---|
390 | }
|
---|
391 |
|
---|
392 | /**
|
---|
393 | * Asynchronous ifs_createfile callback
|
---|
394 | *
|
---|
395 | * If ifs_createfile had to asynchronously break any oplocks, this function is
|
---|
396 | * called when the kernel sends an event that the open can be retried.
|
---|
397 | */
|
---|
398 | static void semlock_available_handler(uint64_t id)
|
---|
399 | {
|
---|
400 | struct onefs_callback_record *cb;
|
---|
401 |
|
---|
402 | DEBUG(10, ("semlock_available_handler called: %llu\n", id));
|
---|
403 |
|
---|
404 | if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
|
---|
405 | DEBUG(5, ("semlock_available_handler: Did not find callback "
|
---|
406 | "%llu\n", id));
|
---|
407 | return;
|
---|
408 | }
|
---|
409 |
|
---|
410 | DEBUG(10, ("Got semlock available for mid %d\n", cb->data.mid));
|
---|
411 |
|
---|
412 | /* Paranoia check */
|
---|
413 | if (!(open_was_deferred(cb->data.mid))) {
|
---|
414 | char *msg;
|
---|
415 | if (asprintf(&msg, "Semlock available on an open that wasn't "
|
---|
416 | "deferred: %s\n",
|
---|
417 | onefs_cb_record_str_dbg(cb)) != -1) {
|
---|
418 | smb_panic(msg);
|
---|
419 | }
|
---|
420 | smb_panic("Semlock available on an open that wasn't "
|
---|
421 | "deferred\n");
|
---|
422 | }
|
---|
423 |
|
---|
424 | schedule_deferred_open_smb_message(cb->data.mid);
|
---|
425 |
|
---|
426 | /* Cleanup the callback record since the open will be retried. */
|
---|
427 | destroy_onefs_callback_record(id);
|
---|
428 |
|
---|
429 | return;
|
---|
430 | }
|
---|
431 |
|
---|
432 | /**
|
---|
433 | * Asynchronous ifs_createfile failure callback
|
---|
434 | *
|
---|
435 | * If ifs_createfile had to asynchronously break any oplocks, but an error was
|
---|
436 | * encountered in the kernel, the open will be retried with the state->failed
|
---|
437 | * set to true. This will prompt the open path to send an INTERNAL_ERROR
|
---|
438 | * error message to the client.
|
---|
439 | */
|
---|
440 | static void semlock_async_failure_handler(uint64_t id)
|
---|
441 | {
|
---|
442 | struct onefs_callback_record *cb;
|
---|
443 | struct pending_message_list *pml;
|
---|
444 | struct deferred_open_record *state;
|
---|
445 |
|
---|
446 | DEBUG(1, ("semlock_async_failure_handler called: %llu\n", id));
|
---|
447 |
|
---|
448 | if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
|
---|
449 | DEBUG(5, ("semlock_async_failure_handler: Did not find callback "
|
---|
450 | "%llu\n", id));
|
---|
451 | return;
|
---|
452 | }
|
---|
453 |
|
---|
454 | DEBUG(1, ("Got semlock_async_failure message for mid %d\n", cb->data.mid));
|
---|
455 |
|
---|
456 | /* Paranoia check */
|
---|
457 | if (!(open_was_deferred(cb->data.mid))) {
|
---|
458 | char *msg;
|
---|
459 | if (asprintf(&msg, "Semlock failure on an open that wasn't "
|
---|
460 | "deferred: %s\n",
|
---|
461 | onefs_cb_record_str_dbg(cb)) != -1) {
|
---|
462 | smb_panic(msg);
|
---|
463 | }
|
---|
464 | smb_panic("Semlock failure on an open that wasn't deferred\n");
|
---|
465 | }
|
---|
466 |
|
---|
467 | /* Find the actual deferred open record. */
|
---|
468 | if (!(pml = get_open_deferred_message(cb->data.mid))) {
|
---|
469 | DEBUG(0, ("Could not find deferred request for "
|
---|
470 | "mid %d\n", cb->data.mid));
|
---|
471 | destroy_onefs_callback_record(id);
|
---|
472 | return;
|
---|
473 | }
|
---|
474 | state = (struct deferred_open_record *)pml->private_data.data;
|
---|
475 |
|
---|
476 | /* Update to failed so the client can be notified on retried open. */
|
---|
477 | state->failed = true;
|
---|
478 |
|
---|
479 | /* Schedule deferred open for immediate retry. */
|
---|
480 | schedule_deferred_open_smb_message(cb->data.mid);
|
---|
481 |
|
---|
482 | /* Cleanup the callback record here since the open will be retried. */
|
---|
483 | destroy_onefs_callback_record(id);
|
---|
484 |
|
---|
485 | return;
|
---|
486 | }
|
---|
487 |
|
---|
488 | /**
|
---|
489 | * OneFS acquires all oplocks via ifs_createfile, so this is a no-op.
|
---|
490 | */
|
---|
491 | static bool onefs_set_kernel_oplock(struct kernel_oplocks *_ctx,
|
---|
492 | files_struct *fsp, int oplock_type) {
|
---|
493 | return true;
|
---|
494 | }
|
---|
495 |
|
---|
496 | /**
|
---|
497 | * Release the kernel oplock.
|
---|
498 | */
|
---|
499 | static void onefs_release_kernel_oplock(struct kernel_oplocks *_ctx,
|
---|
500 | files_struct *fsp, int oplock_type)
|
---|
501 | {
|
---|
502 | enum oplock_type oplock = onefs_samba_oplock_to_oplock(oplock_type);
|
---|
503 |
|
---|
504 | DEBUG(10, ("onefs_release_kernel_oplock: Releasing %s to type %s\n",
|
---|
505 | fsp_str_dbg(fsp), onefs_oplock_str(oplock)));
|
---|
506 |
|
---|
507 | if (fsp->fh->fd == -1) {
|
---|
508 | DEBUG(1, ("no fd\n"));
|
---|
509 | return;
|
---|
510 | }
|
---|
511 |
|
---|
512 | /* Downgrade oplock to either SHARED or NONE. */
|
---|
513 | if (ifs_oplock_downgrade(fsp->fh->fd, oplock)) {
|
---|
514 | DEBUG(1,("ifs_oplock_downgrade failed: %s\n",
|
---|
515 | strerror(errno)));
|
---|
516 | }
|
---|
517 | }
|
---|
518 |
|
---|
519 | /**
|
---|
520 | * Wrap ifs_semlock_write so it is only called on operations that aren't
|
---|
521 | * already contended in the kernel.
|
---|
522 | */
|
---|
523 | static void onefs_semlock_write(int fd, enum level2_contention_type type,
|
---|
524 | enum semlock_operation semlock_op)
|
---|
525 | {
|
---|
526 | int ret;
|
---|
527 |
|
---|
528 | switch (type) {
|
---|
529 | case LEVEL2_CONTEND_ALLOC_GROW:
|
---|
530 | case LEVEL2_CONTEND_POSIX_BRL:
|
---|
531 | DEBUG(10, ("Taking %d write semlock for cmd %d on fd: %d\n",
|
---|
532 | semlock_op, type, fd));
|
---|
533 | ret = ifs_semlock_write(fd, semlock_op);
|
---|
534 | if (ret) {
|
---|
535 | DEBUG(0,("ifs_semlock_write failed taking %d write "
|
---|
536 | "semlock for cmd %d on fd: %d: %s",
|
---|
537 | semlock_op, type, fd, strerror(errno)));
|
---|
538 | }
|
---|
539 | break;
|
---|
540 | default:
|
---|
541 | DEBUG(10, ("Skipping write semlock for cmd %d on fd: %d\n",
|
---|
542 | type, fd));
|
---|
543 | }
|
---|
544 | }
|
---|
545 |
|
---|
546 | /**
|
---|
547 | * Contend level 2 oplocks in the kernel and smbd.
|
---|
548 | *
|
---|
549 | * Taking a write semlock will contend all level 2 oplocks in all smbds across
|
---|
550 | * the cluster except the fsp's own level 2 oplock. This lack of
|
---|
551 | * self-contention is a limitation of the current OneFS kernel oplocks
|
---|
552 | * implementation. Luckily it is easy to contend our own level 2 oplock by
|
---|
553 | * checking the the fsp's oplock_type. If it's a level2, send a break message
|
---|
554 | * to the client and remove the oplock.
|
---|
555 | */
|
---|
556 | static void onefs_contend_level2_oplocks_begin(files_struct *fsp,
|
---|
557 | enum level2_contention_type type)
|
---|
558 | {
|
---|
559 | /* Take care of level 2 kernel contention. */
|
---|
560 | onefs_semlock_write(fsp->fh->fd, type, SEMLOCK_LOCK);
|
---|
561 |
|
---|
562 | /* Take care of level 2 self contention. */
|
---|
563 | if (LEVEL_II_OPLOCK_TYPE(fsp->oplock_type))
|
---|
564 | break_level2_to_none_async(fsp);
|
---|
565 | }
|
---|
566 |
|
---|
567 | /**
|
---|
568 | * Unlock the write semlock when the level 2 contending operation ends.
|
---|
569 | */
|
---|
570 | static void onefs_contend_level2_oplocks_end(files_struct *fsp,
|
---|
571 | enum level2_contention_type type)
|
---|
572 | {
|
---|
573 | /* Take care of level 2 kernel contention. */
|
---|
574 | onefs_semlock_write(fsp->fh->fd, type, SEMLOCK_UNLOCK);
|
---|
575 | }
|
---|
576 |
|
---|
577 | /**
|
---|
578 | * Return string value of onefs oplock types.
|
---|
579 | */
|
---|
580 | const char *onefs_oplock_str(enum oplock_type onefs_oplock_type)
|
---|
581 | {
|
---|
582 | switch (onefs_oplock_type) {
|
---|
583 | case OPLOCK_NONE:
|
---|
584 | return "OPLOCK_NONE";
|
---|
585 | case OPLOCK_EXCLUSIVE:
|
---|
586 | return "OPLOCK_EXCLUSIVE";
|
---|
587 | case OPLOCK_BATCH:
|
---|
588 | return "OPLOCK_BATCH";
|
---|
589 | case OPLOCK_SHARED:
|
---|
590 | return "OPLOCK_SHARED";
|
---|
591 | default:
|
---|
592 | break;
|
---|
593 | }
|
---|
594 | return "UNKNOWN";
|
---|
595 | }
|
---|
596 |
|
---|
597 | /**
|
---|
598 | * Convert from onefs to samba oplock.
|
---|
599 | */
|
---|
600 | int onefs_oplock_to_samba_oplock(enum oplock_type onefs_oplock)
|
---|
601 | {
|
---|
602 | switch (onefs_oplock) {
|
---|
603 | case OPLOCK_NONE:
|
---|
604 | return NO_OPLOCK;
|
---|
605 | case OPLOCK_EXCLUSIVE:
|
---|
606 | return EXCLUSIVE_OPLOCK;
|
---|
607 | case OPLOCK_BATCH:
|
---|
608 | return BATCH_OPLOCK;
|
---|
609 | case OPLOCK_SHARED:
|
---|
610 | return LEVEL_II_OPLOCK;
|
---|
611 | default:
|
---|
612 | DEBUG(0, ("unknown oplock type %d found\n", onefs_oplock));
|
---|
613 | break;
|
---|
614 | }
|
---|
615 | return NO_OPLOCK;
|
---|
616 | }
|
---|
617 |
|
---|
618 | /**
|
---|
619 | * Convert from samba to onefs oplock.
|
---|
620 | */
|
---|
621 | enum oplock_type onefs_samba_oplock_to_oplock(int samba_oplock_type)
|
---|
622 | {
|
---|
623 | if (BATCH_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_BATCH;
|
---|
624 | if (EXCLUSIVE_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_EXCLUSIVE;
|
---|
625 | if (LEVEL_II_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_SHARED;
|
---|
626 | return OPLOCK_NONE;
|
---|
627 | }
|
---|
628 |
|
---|
629 | /**
|
---|
630 | * Oplock event handler.
|
---|
631 | *
|
---|
632 | * Call into the event system dispatcher to handle each event.
|
---|
633 | */
|
---|
634 | static void onefs_oplocks_read_fde_handler(struct event_context *ev,
|
---|
635 | struct fd_event *fde,
|
---|
636 | uint16_t flags,
|
---|
637 | void *private_data)
|
---|
638 | {
|
---|
639 | struct onefs_oplocks_context *ctx =
|
---|
640 | talloc_get_type(private_data, struct onefs_oplocks_context);
|
---|
641 |
|
---|
642 | if (oplocks_event_dispatcher(ctx->onefs_ops)) {
|
---|
643 | DEBUG(0, ("oplocks_event_dispatcher failed: %s\n",
|
---|
644 | strerror(errno)));
|
---|
645 | }
|
---|
646 | }
|
---|
647 |
|
---|
648 | /**
|
---|
649 | * Setup kernel oplocks
|
---|
650 | */
|
---|
651 | static const struct kernel_oplocks_ops onefs_koplocks_ops = {
|
---|
652 | .set_oplock = onefs_set_kernel_oplock,
|
---|
653 | .release_oplock = onefs_release_kernel_oplock,
|
---|
654 | .contend_level2_oplocks_begin = onefs_contend_level2_oplocks_begin,
|
---|
655 | .contend_level2_oplocks_end = onefs_contend_level2_oplocks_end,
|
---|
656 | };
|
---|
657 |
|
---|
658 | static const struct oplocks_event_ops onefs_dispatch_ops = {
|
---|
659 | .oplock_break_to_none = oplock_break_to_none_handler,
|
---|
660 | .oplock_break_to_level_two = oplock_break_to_level_two_handler,
|
---|
661 | .oplock_revoked = oplock_revoked_handler,
|
---|
662 | .semlock_available = semlock_available_handler,
|
---|
663 | .semlock_async_failure = semlock_async_failure_handler,
|
---|
664 | };
|
---|
665 |
|
---|
666 | struct kernel_oplocks *onefs_init_kernel_oplocks(TALLOC_CTX *mem_ctx)
|
---|
667 | {
|
---|
668 | struct kernel_oplocks *_ctx = NULL;
|
---|
669 | struct onefs_oplocks_context *ctx = NULL;
|
---|
670 | struct procoptions po = PROCOPTIONS_INIT;
|
---|
671 |
|
---|
672 | DEBUG(10, ("onefs_init_kernel_oplocks called\n"));
|
---|
673 |
|
---|
674 | /* Set the non-blocking proc flag */
|
---|
675 | po.po_flags_on |= P_NON_BLOCKING_SEMLOCK;
|
---|
676 | if (setprocoptions(&po) != 0) {
|
---|
677 | DEBUG(0, ("setprocoptions failed: %s.\n", strerror(errno)));
|
---|
678 | return NULL;
|
---|
679 | }
|
---|
680 |
|
---|
681 | /* Setup the oplock contexts */
|
---|
682 | _ctx = talloc_zero(mem_ctx, struct kernel_oplocks);
|
---|
683 | if (!_ctx) {
|
---|
684 | return NULL;
|
---|
685 | }
|
---|
686 |
|
---|
687 | ctx = talloc_zero(_ctx, struct onefs_oplocks_context);
|
---|
688 | if (!ctx) {
|
---|
689 | goto err_out;
|
---|
690 | }
|
---|
691 |
|
---|
692 | _ctx->ops = &onefs_koplocks_ops;
|
---|
693 | _ctx->flags = (KOPLOCKS_LEVEL2_SUPPORTED |
|
---|
694 | KOPLOCKS_DEFERRED_OPEN_NOTIFICATION |
|
---|
695 | KOPLOCKS_TIMEOUT_NOTIFICATION |
|
---|
696 | KOPLOCKS_OPLOCK_BROKEN_NOTIFICATION);
|
---|
697 | _ctx->private_data = ctx;
|
---|
698 | ctx->ctx = _ctx;
|
---|
699 | ctx->onefs_ops = &onefs_dispatch_ops;
|
---|
700 |
|
---|
701 | /* Register an kernel event channel for oplocks */
|
---|
702 | ctx->onefs_event_fd = oplocks_event_register();
|
---|
703 | if (ctx->onefs_event_fd == -1) {
|
---|
704 | DEBUG(0, ("oplocks_event_register failed: %s\n",
|
---|
705 | strerror(errno)));
|
---|
706 | goto err_out;
|
---|
707 | }
|
---|
708 |
|
---|
709 | DEBUG(10, ("oplock event_fd = %d\n", ctx->onefs_event_fd));
|
---|
710 |
|
---|
711 | /* Register the oplock event_fd with samba's event system */
|
---|
712 | ctx->read_fde = event_add_fd(smbd_event_context(),
|
---|
713 | ctx,
|
---|
714 | ctx->onefs_event_fd,
|
---|
715 | EVENT_FD_READ,
|
---|
716 | onefs_oplocks_read_fde_handler,
|
---|
717 | ctx);
|
---|
718 | return _ctx;
|
---|
719 |
|
---|
720 | err_out:
|
---|
721 | talloc_free(_ctx);
|
---|
722 | return NULL;
|
---|
723 | }
|
---|
724 |
|
---|
725 | #else
|
---|
726 | void oplock_onefs_dummy(void);
|
---|
727 | void oplock_onefs_dummy(void) {}
|
---|
728 | #endif /* HAVE_ONEFS */
|
---|