1 | /*
|
---|
2 | Unix SMB/CIFS implementation.
|
---|
3 |
|
---|
4 | main select loop and event handling - epoll implementation
|
---|
5 |
|
---|
6 | Copyright (C) Andrew Tridgell 2003-2005
|
---|
7 | Copyright (C) Stefan Metzmacher 2005-2013
|
---|
8 | Copyright (C) Jeremy Allison 2013
|
---|
9 |
|
---|
10 | ** NOTE! The following LGPL license applies to the tevent
|
---|
11 | ** library. This does NOT imply that all of Samba is released
|
---|
12 | ** under the LGPL
|
---|
13 |
|
---|
14 | This library is free software; you can redistribute it and/or
|
---|
15 | modify it under the terms of the GNU Lesser General Public
|
---|
16 | License as published by the Free Software Foundation; either
|
---|
17 | version 3 of the License, or (at your option) any later version.
|
---|
18 |
|
---|
19 | This library is distributed in the hope that it will be useful,
|
---|
20 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
21 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
22 | Lesser General Public License for more details.
|
---|
23 |
|
---|
24 | You should have received a copy of the GNU Lesser General Public
|
---|
25 | License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
---|
26 | */
|
---|
27 |
|
---|
28 | #include "replace.h"
|
---|
29 | #include "system/filesys.h"
|
---|
30 | #include "system/select.h"
|
---|
31 | #include "tevent.h"
|
---|
32 | #include "tevent_internal.h"
|
---|
33 | #include "tevent_util.h"
|
---|
34 |
|
---|
35 | struct epoll_event_context {
|
---|
36 | /* a pointer back to the generic event_context */
|
---|
37 | struct tevent_context *ev;
|
---|
38 |
|
---|
39 | /* when using epoll this is the handle from epoll_create */
|
---|
40 | int epoll_fd;
|
---|
41 |
|
---|
42 | pid_t pid;
|
---|
43 |
|
---|
44 | bool panic_force_replay;
|
---|
45 | bool *panic_state;
|
---|
46 | bool (*panic_fallback)(struct tevent_context *ev, bool replay);
|
---|
47 | };
|
---|
48 |
|
---|
49 | #define EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT (1<<0)
|
---|
50 | #define EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR (1<<1)
|
---|
51 | #define EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR (1<<2)
|
---|
52 | #define EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX (1<<3)
|
---|
53 |
|
---|
54 | #ifdef TEST_PANIC_FALLBACK
|
---|
55 |
|
---|
56 | static int epoll_create_panic_fallback(struct epoll_event_context *epoll_ev,
|
---|
57 | int size)
|
---|
58 | {
|
---|
59 | if (epoll_ev->panic_fallback == NULL) {
|
---|
60 | return epoll_create(size);
|
---|
61 | }
|
---|
62 |
|
---|
63 | /* 50% of the time, fail... */
|
---|
64 | if ((random() % 2) == 0) {
|
---|
65 | errno = EINVAL;
|
---|
66 | return -1;
|
---|
67 | }
|
---|
68 |
|
---|
69 | return epoll_create(size);
|
---|
70 | }
|
---|
71 |
|
---|
72 | static int epoll_ctl_panic_fallback(struct epoll_event_context *epoll_ev,
|
---|
73 | int epfd, int op, int fd,
|
---|
74 | struct epoll_event *event)
|
---|
75 | {
|
---|
76 | if (epoll_ev->panic_fallback == NULL) {
|
---|
77 | return epoll_ctl(epfd, op, fd, event);
|
---|
78 | }
|
---|
79 |
|
---|
80 | /* 50% of the time, fail... */
|
---|
81 | if ((random() % 2) == 0) {
|
---|
82 | errno = EINVAL;
|
---|
83 | return -1;
|
---|
84 | }
|
---|
85 |
|
---|
86 | return epoll_ctl(epfd, op, fd, event);
|
---|
87 | }
|
---|
88 |
|
---|
89 | static int epoll_wait_panic_fallback(struct epoll_event_context *epoll_ev,
|
---|
90 | int epfd,
|
---|
91 | struct epoll_event *events,
|
---|
92 | int maxevents,
|
---|
93 | int timeout)
|
---|
94 | {
|
---|
95 | if (epoll_ev->panic_fallback == NULL) {
|
---|
96 | return epoll_wait(epfd, events, maxevents, timeout);
|
---|
97 | }
|
---|
98 |
|
---|
99 | /* 50% of the time, fail... */
|
---|
100 | if ((random() % 2) == 0) {
|
---|
101 | errno = EINVAL;
|
---|
102 | return -1;
|
---|
103 | }
|
---|
104 |
|
---|
105 | return epoll_wait(epfd, events, maxevents, timeout);
|
---|
106 | }
|
---|
107 |
|
---|
108 | #define epoll_create(_size) \
|
---|
109 | epoll_create_panic_fallback(epoll_ev, _size)
|
---|
110 | #define epoll_ctl(_epfd, _op, _fd, _event) \
|
---|
111 | epoll_ctl_panic_fallback(epoll_ev,_epfd, _op, _fd, _event)
|
---|
112 | #define epoll_wait(_epfd, _events, _maxevents, _timeout) \
|
---|
113 | epoll_wait_panic_fallback(epoll_ev, _epfd, _events, _maxevents, _timeout)
|
---|
114 | #endif
|
---|
115 |
|
---|
116 | /*
|
---|
117 | called to set the panic fallback function.
|
---|
118 | */
|
---|
119 | _PRIVATE_ void tevent_epoll_set_panic_fallback(struct tevent_context *ev,
|
---|
120 | bool (*panic_fallback)(struct tevent_context *ev,
|
---|
121 | bool replay))
|
---|
122 | {
|
---|
123 | struct epoll_event_context *epoll_ev =
|
---|
124 | talloc_get_type_abort(ev->additional_data,
|
---|
125 | struct epoll_event_context);
|
---|
126 |
|
---|
127 | epoll_ev->panic_fallback = panic_fallback;
|
---|
128 | }
|
---|
129 |
|
---|
130 | /*
|
---|
131 | called when a epoll call fails
|
---|
132 | */
|
---|
133 | static void epoll_panic(struct epoll_event_context *epoll_ev,
|
---|
134 | const char *reason, bool replay)
|
---|
135 | {
|
---|
136 | struct tevent_context *ev = epoll_ev->ev;
|
---|
137 | bool (*panic_fallback)(struct tevent_context *ev, bool replay);
|
---|
138 |
|
---|
139 | panic_fallback = epoll_ev->panic_fallback;
|
---|
140 |
|
---|
141 | if (epoll_ev->panic_state != NULL) {
|
---|
142 | *epoll_ev->panic_state = true;
|
---|
143 | }
|
---|
144 |
|
---|
145 | if (epoll_ev->panic_force_replay) {
|
---|
146 | replay = true;
|
---|
147 | }
|
---|
148 |
|
---|
149 | TALLOC_FREE(ev->additional_data);
|
---|
150 |
|
---|
151 | if (panic_fallback == NULL) {
|
---|
152 | tevent_debug(ev, TEVENT_DEBUG_FATAL,
|
---|
153 | "%s (%s) replay[%u] - calling abort()\n",
|
---|
154 | reason, strerror(errno), (unsigned)replay);
|
---|
155 | abort();
|
---|
156 | }
|
---|
157 |
|
---|
158 | tevent_debug(ev, TEVENT_DEBUG_ERROR,
|
---|
159 | "%s (%s) replay[%u] - calling panic_fallback\n",
|
---|
160 | reason, strerror(errno), (unsigned)replay);
|
---|
161 |
|
---|
162 | if (!panic_fallback(ev, replay)) {
|
---|
163 | /* Fallback failed. */
|
---|
164 | tevent_debug(ev, TEVENT_DEBUG_FATAL,
|
---|
165 | "%s (%s) replay[%u] - calling abort()\n",
|
---|
166 | reason, strerror(errno), (unsigned)replay);
|
---|
167 | abort();
|
---|
168 | }
|
---|
169 | }
|
---|
170 |
|
---|
171 | /*
|
---|
172 | map from TEVENT_FD_* to EPOLLIN/EPOLLOUT
|
---|
173 | */
|
---|
174 | static uint32_t epoll_map_flags(uint16_t flags)
|
---|
175 | {
|
---|
176 | uint32_t ret = 0;
|
---|
177 | if (flags & TEVENT_FD_READ) ret |= (EPOLLIN | EPOLLERR | EPOLLHUP);
|
---|
178 | if (flags & TEVENT_FD_WRITE) ret |= (EPOLLOUT | EPOLLERR | EPOLLHUP);
|
---|
179 | return ret;
|
---|
180 | }
|
---|
181 |
|
---|
182 | /*
|
---|
183 | free the epoll fd
|
---|
184 | */
|
---|
185 | static int epoll_ctx_destructor(struct epoll_event_context *epoll_ev)
|
---|
186 | {
|
---|
187 | close(epoll_ev->epoll_fd);
|
---|
188 | epoll_ev->epoll_fd = -1;
|
---|
189 | return 0;
|
---|
190 | }
|
---|
191 |
|
---|
192 | /*
|
---|
193 | init the epoll fd
|
---|
194 | */
|
---|
195 | static int epoll_init_ctx(struct epoll_event_context *epoll_ev)
|
---|
196 | {
|
---|
197 | epoll_ev->epoll_fd = epoll_create(64);
|
---|
198 | if (epoll_ev->epoll_fd == -1) {
|
---|
199 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
|
---|
200 | "Failed to create epoll handle.\n");
|
---|
201 | return -1;
|
---|
202 | }
|
---|
203 |
|
---|
204 | if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
|
---|
205 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
|
---|
206 | "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
|
---|
207 | }
|
---|
208 |
|
---|
209 | epoll_ev->pid = getpid();
|
---|
210 | talloc_set_destructor(epoll_ev, epoll_ctx_destructor);
|
---|
211 |
|
---|
212 | return 0;
|
---|
213 | }
|
---|
214 |
|
---|
215 | static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde);
|
---|
216 |
|
---|
217 | /*
|
---|
218 | reopen the epoll handle when our pid changes
|
---|
219 | see http://junkcode.samba.org/ftp/unpacked/junkcode/epoll_fork.c for an
|
---|
220 | demonstration of why this is needed
|
---|
221 | */
|
---|
222 | static void epoll_check_reopen(struct epoll_event_context *epoll_ev)
|
---|
223 | {
|
---|
224 | struct tevent_fd *fde;
|
---|
225 | bool *caller_panic_state = epoll_ev->panic_state;
|
---|
226 | bool panic_triggered = false;
|
---|
227 |
|
---|
228 | if (epoll_ev->pid == getpid()) {
|
---|
229 | return;
|
---|
230 | }
|
---|
231 |
|
---|
232 | close(epoll_ev->epoll_fd);
|
---|
233 | epoll_ev->epoll_fd = epoll_create(64);
|
---|
234 | if (epoll_ev->epoll_fd == -1) {
|
---|
235 | epoll_panic(epoll_ev, "epoll_create() failed", false);
|
---|
236 | return;
|
---|
237 | }
|
---|
238 |
|
---|
239 | if (!ev_set_close_on_exec(epoll_ev->epoll_fd)) {
|
---|
240 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
|
---|
241 | "Failed to set close-on-exec, file descriptor may be leaked to children.\n");
|
---|
242 | }
|
---|
243 |
|
---|
244 | epoll_ev->pid = getpid();
|
---|
245 | epoll_ev->panic_state = &panic_triggered;
|
---|
246 | for (fde=epoll_ev->ev->fd_events;fde;fde=fde->next) {
|
---|
247 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
248 | epoll_update_event(epoll_ev, fde);
|
---|
249 |
|
---|
250 | if (panic_triggered) {
|
---|
251 | if (caller_panic_state != NULL) {
|
---|
252 | *caller_panic_state = true;
|
---|
253 | }
|
---|
254 | return;
|
---|
255 | }
|
---|
256 | }
|
---|
257 | epoll_ev->panic_state = NULL;
|
---|
258 | }
|
---|
259 |
|
---|
260 | /*
|
---|
261 | epoll cannot add the same file descriptor twice, once
|
---|
262 | with read, once with write which is allowed by the
|
---|
263 | tevent backend. Multiplex the existing fde, flag it
|
---|
264 | as such so we can search for the correct fde on
|
---|
265 | event triggering.
|
---|
266 | */
|
---|
267 |
|
---|
268 | static int epoll_add_multiplex_fd(struct epoll_event_context *epoll_ev,
|
---|
269 | struct tevent_fd *add_fde)
|
---|
270 | {
|
---|
271 | struct epoll_event event;
|
---|
272 | struct tevent_fd *mpx_fde;
|
---|
273 | int ret;
|
---|
274 |
|
---|
275 | /* Find the existing fde that caused the EEXIST error. */
|
---|
276 | for (mpx_fde = epoll_ev->ev->fd_events; mpx_fde; mpx_fde = mpx_fde->next) {
|
---|
277 | if (mpx_fde->fd != add_fde->fd) {
|
---|
278 | continue;
|
---|
279 | }
|
---|
280 |
|
---|
281 | if (mpx_fde == add_fde) {
|
---|
282 | continue;
|
---|
283 | }
|
---|
284 |
|
---|
285 | break;
|
---|
286 | }
|
---|
287 | if (mpx_fde == NULL) {
|
---|
288 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
|
---|
289 | "can't find multiplex fde for fd[%d]",
|
---|
290 | add_fde->fd);
|
---|
291 | return -1;
|
---|
292 | }
|
---|
293 |
|
---|
294 | if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
|
---|
295 | /* Logic error. Can't have more than 2 multiplexed fde's. */
|
---|
296 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
|
---|
297 | "multiplex fde for fd[%d] is already multiplexed\n",
|
---|
298 | mpx_fde->fd);
|
---|
299 | return -1;
|
---|
300 | }
|
---|
301 |
|
---|
302 | /*
|
---|
303 | * The multiplex fde must have the same fd, and also
|
---|
304 | * already have an epoll event attached.
|
---|
305 | */
|
---|
306 | if (!(mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT)) {
|
---|
307 | /* Logic error. Can't have more than 2 multiplexed fde's. */
|
---|
308 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_FATAL,
|
---|
309 | "multiplex fde for fd[%d] has no event\n",
|
---|
310 | mpx_fde->fd);
|
---|
311 | return -1;
|
---|
312 | }
|
---|
313 |
|
---|
314 | /* Modify the mpx_fde to add in the new flags. */
|
---|
315 | ZERO_STRUCT(event);
|
---|
316 | event.events = epoll_map_flags(mpx_fde->flags);
|
---|
317 | event.events |= epoll_map_flags(add_fde->flags);
|
---|
318 | event.data.ptr = mpx_fde;
|
---|
319 | ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, mpx_fde->fd, &event);
|
---|
320 | if (ret != 0 && errno == EBADF) {
|
---|
321 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
|
---|
322 | "EPOLL_CTL_MOD EBADF for "
|
---|
323 | "add_fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
|
---|
324 | add_fde, mpx_fde, add_fde->fd);
|
---|
325 | DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
|
---|
326 | mpx_fde->event_ctx = NULL;
|
---|
327 | DLIST_REMOVE(epoll_ev->ev->fd_events, add_fde);
|
---|
328 | add_fde->event_ctx = NULL;
|
---|
329 | return 0;
|
---|
330 | } else if (ret != 0) {
|
---|
331 | return ret;
|
---|
332 | }
|
---|
333 |
|
---|
334 | /*
|
---|
335 | * Make each fde->additional_data pointers point at each other
|
---|
336 | * so we can look them up from each other. They are now paired.
|
---|
337 | */
|
---|
338 | mpx_fde->additional_data = (struct tevent_fd *)add_fde;
|
---|
339 | add_fde->additional_data = (struct tevent_fd *)mpx_fde;
|
---|
340 |
|
---|
341 | /* Now flag both fde's as being multiplexed. */
|
---|
342 | mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
|
---|
343 | add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
|
---|
344 |
|
---|
345 | /* we need to keep the GOT_ERROR flag */
|
---|
346 | if (mpx_fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR) {
|
---|
347 | add_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
|
---|
348 | }
|
---|
349 |
|
---|
350 | return 0;
|
---|
351 | }
|
---|
352 |
|
---|
353 | /*
|
---|
354 | add the epoll event to the given fd_event
|
---|
355 | */
|
---|
356 | static void epoll_add_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
|
---|
357 | {
|
---|
358 | struct epoll_event event;
|
---|
359 | int ret;
|
---|
360 | struct tevent_fd *mpx_fde = NULL;
|
---|
361 |
|
---|
362 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
363 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
364 |
|
---|
365 | if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
|
---|
366 | /*
|
---|
367 | * This is a multiplexed fde, we need to include both
|
---|
368 | * flags in the modified event.
|
---|
369 | */
|
---|
370 | mpx_fde = talloc_get_type_abort(fde->additional_data,
|
---|
371 | struct tevent_fd);
|
---|
372 |
|
---|
373 | mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
374 | mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
375 | }
|
---|
376 |
|
---|
377 | ZERO_STRUCT(event);
|
---|
378 | event.events = epoll_map_flags(fde->flags);
|
---|
379 | if (mpx_fde != NULL) {
|
---|
380 | event.events |= epoll_map_flags(mpx_fde->flags);
|
---|
381 | }
|
---|
382 | event.data.ptr = fde;
|
---|
383 | ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_ADD, fde->fd, &event);
|
---|
384 | if (ret != 0 && errno == EBADF) {
|
---|
385 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
|
---|
386 | "EPOLL_CTL_ADD EBADF for "
|
---|
387 | "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
|
---|
388 | fde, mpx_fde, fde->fd);
|
---|
389 | DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
|
---|
390 | fde->event_ctx = NULL;
|
---|
391 | if (mpx_fde != NULL) {
|
---|
392 | DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
|
---|
393 | mpx_fde->event_ctx = NULL;
|
---|
394 | }
|
---|
395 | return;
|
---|
396 | } else if (ret != 0 && errno == EEXIST && mpx_fde == NULL) {
|
---|
397 | ret = epoll_add_multiplex_fd(epoll_ev, fde);
|
---|
398 | if (ret != 0) {
|
---|
399 | epoll_panic(epoll_ev, "epoll_add_multiplex_fd failed",
|
---|
400 | false);
|
---|
401 | return;
|
---|
402 | }
|
---|
403 | } else if (ret != 0) {
|
---|
404 | epoll_panic(epoll_ev, "EPOLL_CTL_ADD failed", false);
|
---|
405 | return;
|
---|
406 | }
|
---|
407 |
|
---|
408 | fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
409 | /* only if we want to read we want to tell the event handler about errors */
|
---|
410 | if (fde->flags & TEVENT_FD_READ) {
|
---|
411 | fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
412 | }
|
---|
413 |
|
---|
414 | if (mpx_fde == NULL) {
|
---|
415 | return;
|
---|
416 | }
|
---|
417 |
|
---|
418 | mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
419 | /* only if we want to read we want to tell the event handler about errors */
|
---|
420 | if (mpx_fde->flags & TEVENT_FD_READ) {
|
---|
421 | mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
422 | }
|
---|
423 | }
|
---|
424 |
|
---|
425 | /*
|
---|
426 | delete the epoll event for given fd_event
|
---|
427 | */
|
---|
428 | static void epoll_del_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
|
---|
429 | {
|
---|
430 | struct epoll_event event;
|
---|
431 | int ret;
|
---|
432 | struct tevent_fd *mpx_fde = NULL;
|
---|
433 |
|
---|
434 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
435 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
436 |
|
---|
437 | if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
|
---|
438 | /*
|
---|
439 | * This is a multiplexed fde, we need to modify both events.
|
---|
440 | */
|
---|
441 | mpx_fde = talloc_get_type_abort(fde->additional_data,
|
---|
442 | struct tevent_fd);
|
---|
443 |
|
---|
444 | mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
445 | mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
446 | }
|
---|
447 |
|
---|
448 | ZERO_STRUCT(event);
|
---|
449 | ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_DEL, fde->fd, &event);
|
---|
450 | if (ret != 0 && errno == ENOENT) {
|
---|
451 | /*
|
---|
452 | * This can happen after a epoll_check_reopen
|
---|
453 | * within epoll_event_fd_destructor.
|
---|
454 | */
|
---|
455 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_TRACE,
|
---|
456 | "EPOLL_CTL_DEL ignoring ENOENT for fd[%d]\n",
|
---|
457 | fde->fd);
|
---|
458 | return;
|
---|
459 | } else if (ret != 0 && errno == EBADF) {
|
---|
460 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_WARNING,
|
---|
461 | "EPOLL_CTL_DEL EBADF for "
|
---|
462 | "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
|
---|
463 | fde, mpx_fde, fde->fd);
|
---|
464 | DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
|
---|
465 | fde->event_ctx = NULL;
|
---|
466 | if (mpx_fde != NULL) {
|
---|
467 | DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
|
---|
468 | mpx_fde->event_ctx = NULL;
|
---|
469 | }
|
---|
470 | return;
|
---|
471 | } else if (ret != 0) {
|
---|
472 | epoll_panic(epoll_ev, "EPOLL_CTL_DEL failed", false);
|
---|
473 | return;
|
---|
474 | }
|
---|
475 | }
|
---|
476 |
|
---|
477 | /*
|
---|
478 | change the epoll event to the given fd_event
|
---|
479 | */
|
---|
480 | static void epoll_mod_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
|
---|
481 | {
|
---|
482 | struct tevent_fd *mpx_fde = NULL;
|
---|
483 | struct epoll_event event;
|
---|
484 | int ret;
|
---|
485 |
|
---|
486 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
487 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
488 |
|
---|
489 | if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
|
---|
490 | /*
|
---|
491 | * This is a multiplexed fde, we need to include both
|
---|
492 | * flags in the modified event.
|
---|
493 | */
|
---|
494 | mpx_fde = talloc_get_type_abort(fde->additional_data,
|
---|
495 | struct tevent_fd);
|
---|
496 |
|
---|
497 | mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
498 | mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
499 | }
|
---|
500 |
|
---|
501 | ZERO_STRUCT(event);
|
---|
502 | event.events = epoll_map_flags(fde->flags);
|
---|
503 | if (mpx_fde != NULL) {
|
---|
504 | event.events |= epoll_map_flags(mpx_fde->flags);
|
---|
505 | }
|
---|
506 | event.data.ptr = fde;
|
---|
507 | ret = epoll_ctl(epoll_ev->epoll_fd, EPOLL_CTL_MOD, fde->fd, &event);
|
---|
508 | if (ret != 0 && errno == EBADF) {
|
---|
509 | tevent_debug(epoll_ev->ev, TEVENT_DEBUG_ERROR,
|
---|
510 | "EPOLL_CTL_MOD EBADF for "
|
---|
511 | "fde[%p] mpx_fde[%p] fd[%d] - disabling\n",
|
---|
512 | fde, mpx_fde, fde->fd);
|
---|
513 | DLIST_REMOVE(epoll_ev->ev->fd_events, fde);
|
---|
514 | fde->event_ctx = NULL;
|
---|
515 | if (mpx_fde != NULL) {
|
---|
516 | DLIST_REMOVE(epoll_ev->ev->fd_events, mpx_fde);
|
---|
517 | mpx_fde->event_ctx = NULL;
|
---|
518 | }
|
---|
519 | return;
|
---|
520 | } else if (ret != 0) {
|
---|
521 | epoll_panic(epoll_ev, "EPOLL_CTL_MOD failed", false);
|
---|
522 | return;
|
---|
523 | }
|
---|
524 |
|
---|
525 | fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
526 | /* only if we want to read we want to tell the event handler about errors */
|
---|
527 | if (fde->flags & TEVENT_FD_READ) {
|
---|
528 | fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
529 | }
|
---|
530 |
|
---|
531 | if (mpx_fde == NULL) {
|
---|
532 | return;
|
---|
533 | }
|
---|
534 |
|
---|
535 | mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
536 | /* only if we want to read we want to tell the event handler about errors */
|
---|
537 | if (mpx_fde->flags & TEVENT_FD_READ) {
|
---|
538 | mpx_fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR;
|
---|
539 | }
|
---|
540 | }
|
---|
541 |
|
---|
542 | static void epoll_update_event(struct epoll_event_context *epoll_ev, struct tevent_fd *fde)
|
---|
543 | {
|
---|
544 | bool got_error = (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR);
|
---|
545 | bool want_read = (fde->flags & TEVENT_FD_READ);
|
---|
546 | bool want_write= (fde->flags & TEVENT_FD_WRITE);
|
---|
547 | struct tevent_fd *mpx_fde = NULL;
|
---|
548 |
|
---|
549 | if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
|
---|
550 | /*
|
---|
551 | * work out what the multiplexed fde wants.
|
---|
552 | */
|
---|
553 | mpx_fde = talloc_get_type_abort(fde->additional_data,
|
---|
554 | struct tevent_fd);
|
---|
555 |
|
---|
556 | if (mpx_fde->flags & TEVENT_FD_READ) {
|
---|
557 | want_read = true;
|
---|
558 | }
|
---|
559 |
|
---|
560 | if (mpx_fde->flags & TEVENT_FD_WRITE) {
|
---|
561 | want_write = true;
|
---|
562 | }
|
---|
563 | }
|
---|
564 |
|
---|
565 | /* there's already an event */
|
---|
566 | if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT) {
|
---|
567 | if (want_read || (want_write && !got_error)) {
|
---|
568 | epoll_mod_event(epoll_ev, fde);
|
---|
569 | return;
|
---|
570 | }
|
---|
571 | /*
|
---|
572 | * if we want to match the select behavior, we need to remove the epoll_event
|
---|
573 | * when the caller isn't interested in events.
|
---|
574 | *
|
---|
575 | * this is because epoll reports EPOLLERR and EPOLLHUP, even without asking for them
|
---|
576 | */
|
---|
577 | epoll_del_event(epoll_ev, fde);
|
---|
578 | return;
|
---|
579 | }
|
---|
580 |
|
---|
581 | /* there's no epoll_event attached to the fde */
|
---|
582 | if (want_read || (want_write && !got_error)) {
|
---|
583 | epoll_add_event(epoll_ev, fde);
|
---|
584 | return;
|
---|
585 | }
|
---|
586 | }
|
---|
587 |
|
---|
588 | /*
|
---|
589 | Cope with epoll returning EPOLLHUP|EPOLLERR on an event.
|
---|
590 | Return true if there's nothing else to do, false if
|
---|
591 | this event needs further handling.
|
---|
592 | */
|
---|
593 | static bool epoll_handle_hup_or_err(struct epoll_event_context *epoll_ev,
|
---|
594 | struct tevent_fd *fde)
|
---|
595 | {
|
---|
596 | if (fde == NULL) {
|
---|
597 | /* Nothing to do if no event. */
|
---|
598 | return true;
|
---|
599 | }
|
---|
600 |
|
---|
601 | fde->additional_flags |= EPOLL_ADDITIONAL_FD_FLAG_GOT_ERROR;
|
---|
602 | /*
|
---|
603 | * if we only wait for TEVENT_FD_WRITE, we should not tell the
|
---|
604 | * event handler about it, and remove the epoll_event,
|
---|
605 | * as we only report errors when waiting for read events,
|
---|
606 | * to match the select() behavior
|
---|
607 | */
|
---|
608 | if (!(fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_REPORT_ERROR)) {
|
---|
609 | /*
|
---|
610 | * Do the same as the poll backend and
|
---|
611 | * remove the writeable flag.
|
---|
612 | */
|
---|
613 | fde->flags &= ~TEVENT_FD_WRITE;
|
---|
614 | return true;
|
---|
615 | }
|
---|
616 | /* This has TEVENT_FD_READ set, we're not finished. */
|
---|
617 | return false;
|
---|
618 | }
|
---|
619 |
|
---|
620 | /*
|
---|
621 | event loop handling using epoll
|
---|
622 | */
|
---|
623 | static int epoll_event_loop(struct epoll_event_context *epoll_ev, struct timeval *tvalp)
|
---|
624 | {
|
---|
625 | int ret, i;
|
---|
626 | #define MAXEVENTS 1
|
---|
627 | struct epoll_event events[MAXEVENTS];
|
---|
628 | int timeout = -1;
|
---|
629 | int wait_errno;
|
---|
630 |
|
---|
631 | if (tvalp) {
|
---|
632 | /* it's better to trigger timed events a bit later than too early */
|
---|
633 | timeout = ((tvalp->tv_usec+999) / 1000) + (tvalp->tv_sec*1000);
|
---|
634 | }
|
---|
635 |
|
---|
636 | if (epoll_ev->ev->signal_events &&
|
---|
637 | tevent_common_check_signal(epoll_ev->ev)) {
|
---|
638 | return 0;
|
---|
639 | }
|
---|
640 |
|
---|
641 | tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_BEFORE_WAIT);
|
---|
642 | ret = epoll_wait(epoll_ev->epoll_fd, events, MAXEVENTS, timeout);
|
---|
643 | wait_errno = errno;
|
---|
644 | tevent_trace_point_callback(epoll_ev->ev, TEVENT_TRACE_AFTER_WAIT);
|
---|
645 |
|
---|
646 | if (ret == -1 && wait_errno == EINTR && epoll_ev->ev->signal_events) {
|
---|
647 | if (tevent_common_check_signal(epoll_ev->ev)) {
|
---|
648 | return 0;
|
---|
649 | }
|
---|
650 | }
|
---|
651 |
|
---|
652 | if (ret == -1 && wait_errno != EINTR) {
|
---|
653 | epoll_panic(epoll_ev, "epoll_wait() failed", true);
|
---|
654 | return -1;
|
---|
655 | }
|
---|
656 |
|
---|
657 | if (ret == 0 && tvalp) {
|
---|
658 | /* we don't care about a possible delay here */
|
---|
659 | tevent_common_loop_timer_delay(epoll_ev->ev);
|
---|
660 | return 0;
|
---|
661 | }
|
---|
662 |
|
---|
663 | for (i=0;i<ret;i++) {
|
---|
664 | struct tevent_fd *fde = talloc_get_type(events[i].data.ptr,
|
---|
665 | struct tevent_fd);
|
---|
666 | uint16_t flags = 0;
|
---|
667 | struct tevent_fd *mpx_fde = NULL;
|
---|
668 |
|
---|
669 | if (fde == NULL) {
|
---|
670 | epoll_panic(epoll_ev, "epoll_wait() gave bad data", true);
|
---|
671 | return -1;
|
---|
672 | }
|
---|
673 | if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
|
---|
674 | /*
|
---|
675 | * Save off the multiplexed event in case we need
|
---|
676 | * to use it to call the handler function.
|
---|
677 | */
|
---|
678 | mpx_fde = talloc_get_type_abort(fde->additional_data,
|
---|
679 | struct tevent_fd);
|
---|
680 | }
|
---|
681 | if (events[i].events & (EPOLLHUP|EPOLLERR)) {
|
---|
682 | bool handled_fde = epoll_handle_hup_or_err(epoll_ev, fde);
|
---|
683 | bool handled_mpx = epoll_handle_hup_or_err(epoll_ev, mpx_fde);
|
---|
684 |
|
---|
685 | if (handled_fde && handled_mpx) {
|
---|
686 | epoll_update_event(epoll_ev, fde);
|
---|
687 | continue;
|
---|
688 | }
|
---|
689 |
|
---|
690 | if (!handled_mpx) {
|
---|
691 | /*
|
---|
692 | * If the mpx event was the one that needs
|
---|
693 | * further handling, it's the TEVENT_FD_READ
|
---|
694 | * event so switch over and call that handler.
|
---|
695 | */
|
---|
696 | fde = mpx_fde;
|
---|
697 | mpx_fde = NULL;
|
---|
698 | }
|
---|
699 | flags |= TEVENT_FD_READ;
|
---|
700 | }
|
---|
701 | if (events[i].events & EPOLLIN) flags |= TEVENT_FD_READ;
|
---|
702 | if (events[i].events & EPOLLOUT) flags |= TEVENT_FD_WRITE;
|
---|
703 |
|
---|
704 | if (flags & TEVENT_FD_WRITE) {
|
---|
705 | if (fde->flags & TEVENT_FD_WRITE) {
|
---|
706 | mpx_fde = NULL;
|
---|
707 | }
|
---|
708 | if (mpx_fde && mpx_fde->flags & TEVENT_FD_WRITE) {
|
---|
709 | fde = mpx_fde;
|
---|
710 | mpx_fde = NULL;
|
---|
711 | }
|
---|
712 | }
|
---|
713 |
|
---|
714 | if (mpx_fde) {
|
---|
715 | /* Ensure we got the right fde. */
|
---|
716 | if ((flags & fde->flags) == 0) {
|
---|
717 | fde = mpx_fde;
|
---|
718 | mpx_fde = NULL;
|
---|
719 | }
|
---|
720 | }
|
---|
721 |
|
---|
722 | /*
|
---|
723 | * make sure we only pass the flags
|
---|
724 | * the handler is expecting.
|
---|
725 | */
|
---|
726 | flags &= fde->flags;
|
---|
727 | if (flags) {
|
---|
728 | fde->handler(epoll_ev->ev, fde, flags, fde->private_data);
|
---|
729 | break;
|
---|
730 | }
|
---|
731 | }
|
---|
732 |
|
---|
733 | return 0;
|
---|
734 | }
|
---|
735 |
|
---|
736 | /*
|
---|
737 | create a epoll_event_context structure.
|
---|
738 | */
|
---|
739 | static int epoll_event_context_init(struct tevent_context *ev)
|
---|
740 | {
|
---|
741 | int ret;
|
---|
742 | struct epoll_event_context *epoll_ev;
|
---|
743 |
|
---|
744 | /*
|
---|
745 | * We might be called during tevent_re_initialise()
|
---|
746 | * which means we need to free our old additional_data.
|
---|
747 | */
|
---|
748 | TALLOC_FREE(ev->additional_data);
|
---|
749 |
|
---|
750 | epoll_ev = talloc_zero(ev, struct epoll_event_context);
|
---|
751 | if (!epoll_ev) return -1;
|
---|
752 | epoll_ev->ev = ev;
|
---|
753 | epoll_ev->epoll_fd = -1;
|
---|
754 |
|
---|
755 | ret = epoll_init_ctx(epoll_ev);
|
---|
756 | if (ret != 0) {
|
---|
757 | talloc_free(epoll_ev);
|
---|
758 | return ret;
|
---|
759 | }
|
---|
760 |
|
---|
761 | ev->additional_data = epoll_ev;
|
---|
762 | return 0;
|
---|
763 | }
|
---|
764 |
|
---|
765 | /*
|
---|
766 | destroy an fd_event
|
---|
767 | */
|
---|
768 | static int epoll_event_fd_destructor(struct tevent_fd *fde)
|
---|
769 | {
|
---|
770 | struct tevent_context *ev = fde->event_ctx;
|
---|
771 | struct epoll_event_context *epoll_ev = NULL;
|
---|
772 | bool panic_triggered = false;
|
---|
773 | struct tevent_fd *mpx_fde = NULL;
|
---|
774 | int flags = fde->flags;
|
---|
775 |
|
---|
776 | if (ev == NULL) {
|
---|
777 | return tevent_common_fd_destructor(fde);
|
---|
778 | }
|
---|
779 |
|
---|
780 | epoll_ev = talloc_get_type_abort(ev->additional_data,
|
---|
781 | struct epoll_event_context);
|
---|
782 |
|
---|
783 | /*
|
---|
784 | * we must remove the event from the list
|
---|
785 | * otherwise a panic fallback handler may
|
---|
786 | * reuse invalid memory
|
---|
787 | */
|
---|
788 | DLIST_REMOVE(ev->fd_events, fde);
|
---|
789 |
|
---|
790 | if (fde->additional_flags & EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX) {
|
---|
791 | mpx_fde = talloc_get_type_abort(fde->additional_data,
|
---|
792 | struct tevent_fd);
|
---|
793 |
|
---|
794 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
|
---|
795 | mpx_fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_MPX;
|
---|
796 |
|
---|
797 | fde->additional_data = NULL;
|
---|
798 | mpx_fde->additional_data = NULL;
|
---|
799 |
|
---|
800 | fde->additional_flags &= ~EPOLL_ADDITIONAL_FD_FLAG_HAS_EVENT;
|
---|
801 | }
|
---|
802 |
|
---|
803 | epoll_ev->panic_state = &panic_triggered;
|
---|
804 | epoll_check_reopen(epoll_ev);
|
---|
805 | if (panic_triggered) {
|
---|
806 | return tevent_common_fd_destructor(fde);
|
---|
807 | }
|
---|
808 |
|
---|
809 | if (mpx_fde != NULL) {
|
---|
810 | epoll_update_event(epoll_ev, mpx_fde);
|
---|
811 | if (panic_triggered) {
|
---|
812 | return tevent_common_fd_destructor(fde);
|
---|
813 | }
|
---|
814 | }
|
---|
815 |
|
---|
816 | fde->flags = 0;
|
---|
817 | epoll_update_event(epoll_ev, fde);
|
---|
818 | fde->flags = flags;
|
---|
819 | if (panic_triggered) {
|
---|
820 | return tevent_common_fd_destructor(fde);
|
---|
821 | }
|
---|
822 | epoll_ev->panic_state = NULL;
|
---|
823 |
|
---|
824 | return tevent_common_fd_destructor(fde);
|
---|
825 | }
|
---|
826 |
|
---|
827 | /*
|
---|
828 | add a fd based event
|
---|
829 | return NULL on failure (memory allocation error)
|
---|
830 | */
|
---|
831 | static struct tevent_fd *epoll_event_add_fd(struct tevent_context *ev, TALLOC_CTX *mem_ctx,
|
---|
832 | int fd, uint16_t flags,
|
---|
833 | tevent_fd_handler_t handler,
|
---|
834 | void *private_data,
|
---|
835 | const char *handler_name,
|
---|
836 | const char *location)
|
---|
837 | {
|
---|
838 | struct epoll_event_context *epoll_ev =
|
---|
839 | talloc_get_type_abort(ev->additional_data,
|
---|
840 | struct epoll_event_context);
|
---|
841 | struct tevent_fd *fde;
|
---|
842 | bool panic_triggered = false;
|
---|
843 |
|
---|
844 | fde = tevent_common_add_fd(ev, mem_ctx, fd, flags,
|
---|
845 | handler, private_data,
|
---|
846 | handler_name, location);
|
---|
847 | if (!fde) return NULL;
|
---|
848 |
|
---|
849 | talloc_set_destructor(fde, epoll_event_fd_destructor);
|
---|
850 |
|
---|
851 | epoll_ev->panic_state = &panic_triggered;
|
---|
852 | epoll_check_reopen(epoll_ev);
|
---|
853 | if (panic_triggered) {
|
---|
854 | return fde;
|
---|
855 | }
|
---|
856 | epoll_ev->panic_state = NULL;
|
---|
857 |
|
---|
858 | epoll_update_event(epoll_ev, fde);
|
---|
859 |
|
---|
860 | return fde;
|
---|
861 | }
|
---|
862 |
|
---|
863 | /*
|
---|
864 | set the fd event flags
|
---|
865 | */
|
---|
866 | static void epoll_event_set_fd_flags(struct tevent_fd *fde, uint16_t flags)
|
---|
867 | {
|
---|
868 | struct tevent_context *ev;
|
---|
869 | struct epoll_event_context *epoll_ev;
|
---|
870 | bool panic_triggered = false;
|
---|
871 |
|
---|
872 | if (fde->flags == flags) return;
|
---|
873 |
|
---|
874 | ev = fde->event_ctx;
|
---|
875 | epoll_ev = talloc_get_type_abort(ev->additional_data,
|
---|
876 | struct epoll_event_context);
|
---|
877 |
|
---|
878 | fde->flags = flags;
|
---|
879 |
|
---|
880 | epoll_ev->panic_state = &panic_triggered;
|
---|
881 | epoll_check_reopen(epoll_ev);
|
---|
882 | if (panic_triggered) {
|
---|
883 | return;
|
---|
884 | }
|
---|
885 | epoll_ev->panic_state = NULL;
|
---|
886 |
|
---|
887 | epoll_update_event(epoll_ev, fde);
|
---|
888 | }
|
---|
889 |
|
---|
890 | /*
|
---|
891 | do a single event loop using the events defined in ev
|
---|
892 | */
|
---|
893 | static int epoll_event_loop_once(struct tevent_context *ev, const char *location)
|
---|
894 | {
|
---|
895 | struct epoll_event_context *epoll_ev =
|
---|
896 | talloc_get_type_abort(ev->additional_data,
|
---|
897 | struct epoll_event_context);
|
---|
898 | struct timeval tval;
|
---|
899 | bool panic_triggered = false;
|
---|
900 |
|
---|
901 | if (ev->signal_events &&
|
---|
902 | tevent_common_check_signal(ev)) {
|
---|
903 | return 0;
|
---|
904 | }
|
---|
905 |
|
---|
906 | if (ev->immediate_events &&
|
---|
907 | tevent_common_loop_immediate(ev)) {
|
---|
908 | return 0;
|
---|
909 | }
|
---|
910 |
|
---|
911 | tval = tevent_common_loop_timer_delay(ev);
|
---|
912 | if (tevent_timeval_is_zero(&tval)) {
|
---|
913 | return 0;
|
---|
914 | }
|
---|
915 |
|
---|
916 | epoll_ev->panic_state = &panic_triggered;
|
---|
917 | epoll_ev->panic_force_replay = true;
|
---|
918 | epoll_check_reopen(epoll_ev);
|
---|
919 | if (panic_triggered) {
|
---|
920 | errno = EINVAL;
|
---|
921 | return -1;
|
---|
922 | }
|
---|
923 | epoll_ev->panic_force_replay = false;
|
---|
924 | epoll_ev->panic_state = NULL;
|
---|
925 |
|
---|
926 | return epoll_event_loop(epoll_ev, &tval);
|
---|
927 | }
|
---|
928 |
|
---|
929 | static const struct tevent_ops epoll_event_ops = {
|
---|
930 | .context_init = epoll_event_context_init,
|
---|
931 | .add_fd = epoll_event_add_fd,
|
---|
932 | .set_fd_close_fn = tevent_common_fd_set_close_fn,
|
---|
933 | .get_fd_flags = tevent_common_fd_get_flags,
|
---|
934 | .set_fd_flags = epoll_event_set_fd_flags,
|
---|
935 | .add_timer = tevent_common_add_timer_v2,
|
---|
936 | .schedule_immediate = tevent_common_schedule_immediate,
|
---|
937 | .add_signal = tevent_common_add_signal,
|
---|
938 | .loop_once = epoll_event_loop_once,
|
---|
939 | .loop_wait = tevent_common_loop_wait,
|
---|
940 | };
|
---|
941 |
|
---|
942 | _PRIVATE_ bool tevent_epoll_init(void)
|
---|
943 | {
|
---|
944 | return tevent_register_backend("epoll", &epoll_event_ops);
|
---|
945 | }
|
---|