source: trunk/server/source3/modules/vfs_aio_pthread.c

Last change on this file was 862, checked in by Silvan Scherrer, 11 years ago

Samba Server: update trunk to 3.6.23

File size: 16.6 KB
Line 
1/*
2 * Simulate Posix AIO using pthreads.
3 *
4 * Based on the aio_fork work from Volker and Volker's pthreadpool library.
5 *
6 * Copyright (C) Volker Lendecke 2008
7 * Copyright (C) Jeremy Allison 2012
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 3 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24#include "includes.h"
25#include "system/filesys.h"
26#include "system/shmem.h"
27#include "smbd/smbd.h"
28#include "lib/pthreadpool/pthreadpool.h"
29
30struct aio_extra;
31static struct pthreadpool *pool;
32static int aio_pthread_jobid;
33
34struct aio_private_data {
35 struct aio_private_data *prev, *next;
36 int jobid;
37 SMB_STRUCT_AIOCB *aiocb;
38 ssize_t ret_size;
39 int ret_errno;
40 bool cancelled;
41 bool write_command;
42};
43
44/* List of outstanding requests we have. */
45static struct aio_private_data *pd_list;
46
47static void aio_pthread_handle_completion(struct event_context *event_ctx,
48 struct fd_event *event,
49 uint16 flags,
50 void *p);
51
52/************************************************************************
53 How many threads to initialize ?
54 100 per process seems insane as a default until you realize that
55 (a) Threads terminate after 1 second when idle.
56 (b) Throttling is done in SMB2 via the crediting algorithm.
57 (c) SMB1 clients are limited to max_mux (50) outstanding requests and
58 Windows clients don't use this anyway.
59 Essentially we want this to be unlimited unless smb.conf says different.
60***********************************************************************/
61
62static int aio_get_num_threads(struct vfs_handle_struct *handle)
63{
64 return lp_parm_int(SNUM(handle->conn),
65 "aio_pthread", "aio num threads", 100);
66}
67
68/************************************************************************
69 Ensure thread pool is initialized.
70***********************************************************************/
71
72static bool init_aio_threadpool(struct vfs_handle_struct *handle)
73{
74 struct fd_event *sock_event = NULL;
75 int ret = 0;
76 int num_threads;
77 int fd;
78
79 if (pool) {
80 return true;
81 }
82
83 num_threads = aio_get_num_threads(handle);
84 ret = pthreadpool_init(num_threads, &pool);
85 if (ret) {
86 errno = ret;
87 return false;
88 }
89
90 fd = pthreadpool_signal_fd(pool);
91
92 set_blocking(fd, false);
93
94 sock_event = tevent_add_fd(server_event_context(),
95 NULL,
96 fd,
97 TEVENT_FD_READ,
98 aio_pthread_handle_completion,
99 NULL);
100 if (sock_event == NULL) {
101 pthreadpool_destroy(pool);
102 pool = NULL;
103 return false;
104 }
105
106 DEBUG(10,("init_aio_threadpool: initialized with up to %d threads\n",
107 num_threads));
108
109 return true;
110}
111
112
113/************************************************************************
114 Worker function - core of the pthread aio engine.
115 This is the function that actually does the IO.
116***********************************************************************/
117
118static void aio_worker(void *private_data)
119{
120 struct aio_private_data *pd =
121 (struct aio_private_data *)private_data;
122
123 if (pd->write_command) {
124 pd->ret_size = sys_pwrite(pd->aiocb->aio_fildes,
125 (const void *)pd->aiocb->aio_buf,
126 pd->aiocb->aio_nbytes,
127 pd->aiocb->aio_offset);
128 if (pd->ret_size == -1 && errno == ESPIPE) {
129 /* Maintain the fiction that pipes can
130 be seeked (sought?) on. */
131 pd->ret_size = sys_write(pd->aiocb->aio_fildes,
132 (const void *)pd->aiocb->aio_buf,
133 pd->aiocb->aio_nbytes);
134 }
135 } else {
136 pd->ret_size = sys_pread(pd->aiocb->aio_fildes,
137 (void *)pd->aiocb->aio_buf,
138 pd->aiocb->aio_nbytes,
139 pd->aiocb->aio_offset);
140 if (pd->ret_size == -1 && errno == ESPIPE) {
141 /* Maintain the fiction that pipes can
142 be seeked (sought?) on. */
143 pd->ret_size = sys_read(pd->aiocb->aio_fildes,
144 (void *)pd->aiocb->aio_buf,
145 pd->aiocb->aio_nbytes);
146 }
147 }
148 if (pd->ret_size == -1) {
149 pd->ret_errno = errno;
150 } else {
151 pd->ret_errno = 0;
152 }
153}
154
155/************************************************************************
156 Private data destructor.
157***********************************************************************/
158
159static int pd_destructor(struct aio_private_data *pd)
160{
161 DLIST_REMOVE(pd_list, pd);
162 return 0;
163}
164
165/************************************************************************
166 Create and initialize a private data struct.
167***********************************************************************/
168
169static struct aio_private_data *create_private_data(TALLOC_CTX *ctx,
170 SMB_STRUCT_AIOCB *aiocb)
171{
172 struct aio_private_data *pd = talloc_zero(ctx, struct aio_private_data);
173 if (!pd) {
174 return NULL;
175 }
176 pd->jobid = aio_pthread_jobid++;
177 pd->aiocb = aiocb;
178 pd->ret_size = -1;
179 pd->ret_errno = EINPROGRESS;
180 talloc_set_destructor(pd, pd_destructor);
181 DLIST_ADD_END(pd_list, pd, struct aio_private_data *);
182 return pd;
183}
184
185/************************************************************************
186 Spin off a threadpool (if needed) and initiate a pread call.
187***********************************************************************/
188
189static int aio_pthread_read(struct vfs_handle_struct *handle,
190 struct files_struct *fsp,
191 SMB_STRUCT_AIOCB *aiocb)
192{
193 struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
194 struct aio_private_data *pd = NULL;
195 int ret;
196
197 if (!init_aio_threadpool(handle)) {
198 return -1;
199 }
200
201 pd = create_private_data(aio_ex, aiocb);
202 if (pd == NULL) {
203 DEBUG(10, ("aio_pthread_read: Could not create private data.\n"));
204 return -1;
205 }
206
207 ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
208 if (ret) {
209 errno = ret;
210 return -1;
211 }
212
213 DEBUG(10, ("aio_pthread_read: jobid=%d pread requested "
214 "of %llu bytes at offset %llu\n",
215 pd->jobid,
216 (unsigned long long)pd->aiocb->aio_nbytes,
217 (unsigned long long)pd->aiocb->aio_offset));
218
219 return 0;
220}
221
222/************************************************************************
223 Spin off a threadpool (if needed) and initiate a pwrite call.
224***********************************************************************/
225
226static int aio_pthread_write(struct vfs_handle_struct *handle,
227 struct files_struct *fsp,
228 SMB_STRUCT_AIOCB *aiocb)
229{
230 struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
231 struct aio_private_data *pd = NULL;
232 int ret;
233
234 if (!init_aio_threadpool(handle)) {
235 return -1;
236 }
237
238 pd = create_private_data(aio_ex, aiocb);
239 if (pd == NULL) {
240 DEBUG(10, ("aio_pthread_write: Could not create private data.\n"));
241 return -1;
242 }
243
244 pd->write_command = true;
245
246 ret = pthreadpool_add_job(pool, pd->jobid, aio_worker, (void *)pd);
247 if (ret) {
248 errno = ret;
249 return -1;
250 }
251
252 DEBUG(10, ("aio_pthread_write: jobid=%d pwrite requested "
253 "of %llu bytes at offset %llu\n",
254 pd->jobid,
255 (unsigned long long)pd->aiocb->aio_nbytes,
256 (unsigned long long)pd->aiocb->aio_offset));
257
258 return 0;
259}
260
261/************************************************************************
262 Find the private data by jobid.
263***********************************************************************/
264
265static struct aio_private_data *find_private_data_by_jobid(int jobid)
266{
267 struct aio_private_data *pd;
268
269 for (pd = pd_list; pd != NULL; pd = pd->next) {
270 if (pd->jobid == jobid) {
271 return pd;
272 }
273 }
274
275 return NULL;
276}
277
278/************************************************************************
279 Callback when an IO completes.
280***********************************************************************/
281
282static void aio_pthread_handle_completion(struct event_context *event_ctx,
283 struct fd_event *event,
284 uint16 flags,
285 void *p)
286{
287 struct aio_extra *aio_ex = NULL;
288 struct aio_private_data *pd = NULL;
289 int jobid = 0;
290 int ret;
291
292 DEBUG(10, ("aio_pthread_handle_completion called with flags=%d\n",
293 (int)flags));
294
295 if ((flags & EVENT_FD_READ) == 0) {
296 return;
297 }
298
299 while (true) {
300 ret = pthreadpool_finished_job(pool, &jobid);
301
302 if (ret == EINTR || ret == EAGAIN) {
303 return;
304 }
305#ifdef EWOULDBLOCK
306 if (ret == EWOULDBLOCK) {
307 return;
308 }
309#endif
310
311 if (ret == ECANCELED) {
312 return;
313 }
314
315 if (ret) {
316 smb_panic("aio_pthread_handle_completion");
317 return;
318 }
319
320 pd = find_private_data_by_jobid(jobid);
321 if (pd == NULL) {
322 DEBUG(1, ("aio_pthread_handle_completion cannot find "
323 "jobid %d\n", jobid));
324 return;
325 }
326
327 aio_ex = (struct aio_extra *)
328 pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
329
330 smbd_aio_complete_aio_ex(aio_ex);
331
332 DEBUG(10,("aio_pthread_handle_completion: jobid %d "
333 "completed\n", jobid ));
334 TALLOC_FREE(aio_ex);
335 }
336}
337
338/************************************************************************
339 Find the private data by aiocb.
340***********************************************************************/
341
342static struct aio_private_data *find_private_data_by_aiocb(SMB_STRUCT_AIOCB *aiocb)
343{
344 struct aio_private_data *pd;
345
346 for (pd = pd_list; pd != NULL; pd = pd->next) {
347 if (pd->aiocb == aiocb) {
348 return pd;
349 }
350 }
351
352 return NULL;
353}
354
355/************************************************************************
356 Called to return the result of a completed AIO.
357 Should only be called if aio_error returns something other than EINPROGRESS.
358 Returns:
359 Any other value - return from IO operation.
360***********************************************************************/
361
362static ssize_t aio_pthread_return_fn(struct vfs_handle_struct *handle,
363 struct files_struct *fsp,
364 SMB_STRUCT_AIOCB *aiocb)
365{
366 struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
367
368 if (pd == NULL) {
369 errno = EINVAL;
370 DEBUG(0, ("aio_pthread_return_fn: returning EINVAL\n"));
371 return -1;
372 }
373
374 pd->aiocb = NULL;
375
376 if (pd->ret_size == -1) {
377 errno = pd->ret_errno;
378 }
379
380 return pd->ret_size;
381}
382
383/************************************************************************
384 Called to check the result of an AIO.
385 Returns:
386 EINPROGRESS - still in progress.
387 EINVAL - invalid aiocb.
388 ECANCELED - request was cancelled.
389 0 - request completed successfully.
390 Any other value - errno from IO operation.
391***********************************************************************/
392
393static int aio_pthread_error_fn(struct vfs_handle_struct *handle,
394 struct files_struct *fsp,
395 SMB_STRUCT_AIOCB *aiocb)
396{
397 struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
398
399 if (pd == NULL) {
400 return EINVAL;
401 }
402 if (pd->cancelled) {
403 return ECANCELED;
404 }
405 return pd->ret_errno;
406}
407
408/************************************************************************
409 Called to request the cancel of an AIO, or all of them on a specific
410 fsp if aiocb == NULL.
411***********************************************************************/
412
413static int aio_pthread_cancel(struct vfs_handle_struct *handle,
414 struct files_struct *fsp,
415 SMB_STRUCT_AIOCB *aiocb)
416{
417 struct aio_private_data *pd = NULL;
418
419 for (pd = pd_list; pd != NULL; pd = pd->next) {
420 if (pd->aiocb == NULL) {
421 continue;
422 }
423 if (pd->aiocb->aio_fildes != fsp->fh->fd) {
424 continue;
425 }
426 if ((aiocb != NULL) && (pd->aiocb != aiocb)) {
427 continue;
428 }
429
430 /*
431 * We let the child do its job, but we discard the result when
432 * it's finished.
433 */
434
435 pd->cancelled = true;
436 }
437
438 return AIO_CANCELED;
439}
440
441/************************************************************************
442 Callback for a previously detected job completion.
443***********************************************************************/
444
445static void aio_pthread_handle_immediate(struct tevent_context *ctx,
446 struct tevent_immediate *im,
447 void *private_data)
448{
449 struct aio_extra *aio_ex = NULL;
450 int *pjobid = (int *)private_data;
451 struct aio_private_data *pd = find_private_data_by_jobid(*pjobid);
452
453 if (pd == NULL) {
454 DEBUG(1, ("aio_pthread_handle_immediate cannot find jobid %d\n",
455 *pjobid));
456 TALLOC_FREE(pjobid);
457 return;
458 }
459
460 TALLOC_FREE(pjobid);
461 aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
462 smbd_aio_complete_aio_ex(aio_ex);
463 TALLOC_FREE(aio_ex);
464}
465
466/************************************************************************
467 Private data struct used in suspend completion code.
468***********************************************************************/
469
470struct suspend_private {
471 int num_entries;
472 int num_finished;
473 const SMB_STRUCT_AIOCB * const *aiocb_array;
474};
475
476/************************************************************************
477 Callback when an IO completes from a suspend call.
478***********************************************************************/
479
480static void aio_pthread_handle_suspend_completion(struct event_context *event_ctx,
481 struct fd_event *event,
482 uint16 flags,
483 void *p)
484{
485 struct suspend_private *sp = (struct suspend_private *)p;
486 struct aio_private_data *pd = NULL;
487 struct tevent_immediate *im = NULL;
488 int *pjobid = NULL;
489 int i;
490
491 DEBUG(10, ("aio_pthread_handle_suspend_completion called with flags=%d\n",
492 (int)flags));
493
494 if ((flags & EVENT_FD_READ) == 0) {
495 return;
496 }
497
498 pjobid = talloc_array(NULL, int, 1);
499 if (pjobid == NULL) {
500 smb_panic("aio_pthread_handle_suspend_completion: no memory.");
501 }
502
503 if (pthreadpool_finished_job(pool, pjobid)) {
504 smb_panic("aio_pthread_handle_suspend_completion: can't find job.");
505 return;
506 }
507
508 pd = find_private_data_by_jobid(*pjobid);
509 if (pd == NULL) {
510 DEBUG(1, ("aio_pthread_handle_completion cannot find jobid %d\n",
511 *pjobid));
512 TALLOC_FREE(pjobid);
513 return;
514 }
515
516 /* Is this a jobid with an aiocb we're interested in ? */
517 for (i = 0; i < sp->num_entries; i++) {
518 if (sp->aiocb_array[i] == pd->aiocb) {
519 sp->num_finished++;
520 TALLOC_FREE(pjobid);
521 return;
522 }
523 }
524
525 /* Jobid completed we weren't waiting for.
526 We must reshedule this as an immediate event
527 on the main event context. */
528 im = tevent_create_immediate(NULL);
529 if (!im) {
530 exit_server_cleanly("aio_pthread_handle_suspend_completion: no memory");
531 }
532
533 DEBUG(10,("aio_pthread_handle_suspend_completion: "
534 "re-scheduling job id %d\n",
535 *pjobid));
536
537 tevent_schedule_immediate(im,
538 server_event_context(),
539 aio_pthread_handle_immediate,
540 (void *)pjobid);
541}
542
543
544static void aio_pthread_suspend_timed_out(struct tevent_context *event_ctx,
545 struct tevent_timer *te,
546 struct timeval now,
547 void *private_data)
548{
549 bool *timed_out = (bool *)private_data;
550 /* Remove this timed event handler. */
551 TALLOC_FREE(te);
552 *timed_out = true;
553}
554
555/************************************************************************
556 Called to request everything to stop until all IO is completed.
557***********************************************************************/
558
559static int aio_pthread_suspend(struct vfs_handle_struct *handle,
560 struct files_struct *fsp,
561 const SMB_STRUCT_AIOCB * const aiocb_array[],
562 int n,
563 const struct timespec *timeout)
564{
565 struct event_context *ev = NULL;
566 struct fd_event *sock_event = NULL;
567 int ret = -1;
568 struct suspend_private sp;
569 bool timed_out = false;
570 TALLOC_CTX *frame = talloc_stackframe();
571
572 /* This is a blocking call, and has to use a sub-event loop. */
573 ev = event_context_init(frame);
574 if (ev == NULL) {
575 errno = ENOMEM;
576 goto out;
577 }
578
579 if (timeout) {
580 struct timeval tv = convert_timespec_to_timeval(*timeout);
581 struct tevent_timer *te = tevent_add_timer(ev,
582 frame,
583 timeval_current_ofs(tv.tv_sec,
584 tv.tv_usec),
585 aio_pthread_suspend_timed_out,
586 &timed_out);
587 if (!te) {
588 errno = ENOMEM;
589 goto out;
590 }
591 }
592
593 ZERO_STRUCT(sp);
594 sp.num_entries = n;
595 sp.aiocb_array = aiocb_array;
596 sp.num_finished = 0;
597
598 sock_event = tevent_add_fd(ev,
599 frame,
600 pthreadpool_signal_fd(pool),
601 TEVENT_FD_READ,
602 aio_pthread_handle_suspend_completion,
603 (void *)&sp);
604 if (sock_event == NULL) {
605 pthreadpool_destroy(pool);
606 pool = NULL;
607 goto out;
608 }
609 /*
610 * We're going to cheat here. We know that smbd/aio.c
611 * only calls this when it's waiting for every single
612 * outstanding call to finish on a close, so just wait
613 * individually for each IO to complete. We don't care
614 * what order they finish - only that they all do. JRA.
615 */
616 while (sp.num_entries != sp.num_finished) {
617 if (tevent_loop_once(ev) == -1) {
618 goto out;
619 }
620
621 if (timed_out) {
622 errno = EAGAIN;
623 goto out;
624 }
625 }
626
627 ret = 0;
628
629 out:
630
631 TALLOC_FREE(frame);
632 return ret;
633}
634
635static struct vfs_fn_pointers vfs_aio_pthread_fns = {
636 .aio_read = aio_pthread_read,
637 .aio_write = aio_pthread_write,
638 .aio_return_fn = aio_pthread_return_fn,
639 .aio_cancel = aio_pthread_cancel,
640 .aio_error_fn = aio_pthread_error_fn,
641 .aio_suspend = aio_pthread_suspend,
642};
643
644NTSTATUS vfs_aio_pthread_init(void);
645NTSTATUS vfs_aio_pthread_init(void)
646{
647 return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
648 "aio_pthread", &vfs_aio_pthread_fns);
649}
Note: See TracBrowser for help on using the repository browser.