diff --git a/ev.3 b/ev.3 index f8db65c..f9301a5 100644 --- a/ev.3 +++ b/ev.3 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "LIBEV 3" -.TH LIBEV 3 "2019-06-23" "libev-4.25" "libev - high performance full featured event loop" +.TH LIBEV 3 "2019-06-24" "libev-4.25" "libev - high performance full featured event loop" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -712,10 +712,9 @@ be detected and this backend will be skipped. .Sp This backend can batch oneshot requests and supports a user-space ring buffer to receive events. It also doesn't suffer from most of the design -problems of epoll (such as not being able to remove event sources from -the epoll set), and generally sounds too good to be true. Because, this -being the linux kernel, of course it suffers from a whole new set of -limitations. +problems of epoll (such as not being able to remove event sources from the +epoll set), and generally sounds too good to be true. Because, this being +the linux kernel, of course it suffers from a whole new set of limitations. .Sp For one, it is not easily embeddable (but probably could be done using an event fd at some extra overhead). It also is subject to a system wide @@ -726,10 +725,14 @@ backend will be skipped during initialisation. Most problematic in practise, however, is that not all file descriptors work with it. For example, in linux 5.1, tcp sockets, pipes, event fds, files, \fI/dev/null\fR and a few others are supported, but ttys do not work -(probably because of a bug), so this is not (yet?) a generic event polling -interface. +properly (a known bug that the kernel developers don't care about, see +), so this is not +(yet?) a generic event polling interface. .Sp -To work around this latter problem, the current version of libev uses +Overall, it seems the linux developers just don't want it to have a +generic event handling mechanism other than \f(CW\*(C`select\*(C'\fR or \f(CW\*(C`poll\*(C'\fR. +.Sp +To work around the fd type problem, the current version of libev uses epoll as a fallback for file deescriptor types that do not work. Epoll is used in, kind of, slow mode that hopefully avoids most of its design problems and requires 1\-3 extra syscalls per active fd every iteration. diff --git a/ev_linuxaio.c b/ev_linuxaio.c index 159fcd9..9b3af38 100644 --- a/ev_linuxaio.c +++ b/ev_linuxaio.c @@ -184,6 +184,58 @@ linuxaio_modify (EV_P_ int fd, int oev, int nev) } } +#if EPOLL_FALLBACK + +static void +linuxaio_rearm_epoll (EV_P_ struct iocb *iocb, int op) +{ + struct epoll_event eev; + + eev.events = EPOLLONESHOT; + if (iocb->aio_buf & POLLIN ) eev.events |= EPOLLIN ; + if (iocb->aio_buf & POLLOUT) eev.events |= EPOLLOUT; + eev.data.fd = iocb->aio_fildes; + + if (epoll_ctl (backend_fd, op, iocb->aio_fildes, &eev) < 0) + ev_syserr ("(libeio) linuxaio epoll_ctl"); +} + +static void +linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents) +{ + struct epoll_event events[16]; + + for (;;) + { + int idx; + int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0); + + if (expect_false (res < 0)) + ev_syserr ("(libev) linuxaio epoll_wait"); + else if (!res) + break; + + for (idx = res; idx--; ) + { + int fd = events [idx].data.fd; + uint32_t ev = events [idx].events; + + assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax)); + + linuxaio_rearm_epoll (EV_A_ &linuxaio_iocbps [fd]->io, EPOLL_CTL_MOD); + + fd_event (EV_A_ fd, + (ev & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0) + | (ev & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0)); + } + + if (res < sizeof (events) / sizeof (events [0])) + break; + } +} + +#endif + static void linuxaio_parse_events (EV_P_ struct io_event *ev, int nr) { @@ -194,7 +246,7 @@ linuxaio_parse_events (EV_P_ struct io_event *ev, int nr) assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax)); - /* linux aio is oneshot: rearm fd */ + /* linux aio is oneshot: rearm fd. TODO: this does more work than needed */ linuxaio_iocbps [fd]->io.aio_buf = 0; anfds [fd].events = 0; fd_change (EV_A_ fd, 0); @@ -274,11 +326,15 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout) /* this degrades to one event per loop iteration */ /* if the ring buffer changes layout, but so be it */ + EV_RELEASE_CB; + ts.tv_sec = (long)timeout; ts.tv_nsec = (long)((timeout - ts.tv_sec) * 1e9); res = ev_io_getevents (linuxaio_ctx, 1, sizeof (ioev) / sizeof (ioev [0]), ioev, &ts); + EV_ACQUIRE_CB; + if (res < 0) if (errno == EINTR) /* ignored */; @@ -292,22 +348,6 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout) } } -#if EPOLL_FALLBACK -static void -linuxaio_rearm_epoll (EV_P_ struct iocb *iocb, int op) -{ - struct epoll_event eev; - - eev.events = EPOLLONESHOT; - if (iocb->aio_buf & POLLIN ) eev.events |= EPOLLIN ; - if (iocb->aio_buf & POLLOUT) eev.events |= EPOLLOUT; - eev.data.fd = iocb->aio_fildes; - - if (epoll_ctl (backend_fd, op, iocb->aio_fildes, &eev) < 0) - ev_syserr ("(libeio) linuxaio epoll_ctl"); -} -#endif - static void linuxaio_poll (EV_P_ ev_tstamp timeout) { @@ -320,7 +360,15 @@ linuxaio_poll (EV_P_ ev_tstamp timeout) /* which allows us to pinpoint the errornous iocb */ for (submitted = 0; submitted < linuxaio_submitcnt; ) { +#if 0 + int res; + if (linuxaio_submits[submitted]->aio_fildes == backend_fd) + res = ev_io_submit (linuxaio_ctx, 1, linuxaio_submits + submitted); + else + { res = -1; errno = EINVAL; }; +#else int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted); +#endif if (expect_false (res < 0)) if (errno == EAGAIN) @@ -331,9 +379,14 @@ linuxaio_poll (EV_P_ ev_tstamp timeout) * In this case, we just try in next loop iteration. * This should not result in a few fds taking priority, as the interface * is one-shot, and we submit iocb's in a round-robin fashion. + * TODO: maybe make "submitted" persistent, so we don't have to memmove? */ - memmove (linuxaio_submits, linuxaio_submits + submitted, (linuxaio_submitcnt - submitted) * sizeof (*linuxaio_submits)); - linuxaio_submitcnt -= submitted; + if (ecb_expect_false (submitted)) + { + memmove (linuxaio_submits, linuxaio_submits + submitted, (linuxaio_submitcnt - submitted) * sizeof (*linuxaio_submits)); + linuxaio_submitcnt -= submitted; + } + timeout = 0; break; } @@ -343,8 +396,9 @@ linuxaio_poll (EV_P_ ev_tstamp timeout) /* This happens for unsupported fds, officially, but in my testing, * also randomly happens for supported fds. We fall back to good old * poll() here, under the assumption that this is a very rare case. - * See https://lore.kernel.org/patchwork/patch/1047453/ for evidence - * that the problem is known, but ignored. + * See https://lore.kernel.org/patchwork/patch/1047453/ to see + * discussion about such a case (ttys) where polling for POLLIN + * fails but POLLIN|POLLOUT works. */ struct iocb *iocb = linuxaio_submits [submitted]; res = 1; /* skip this iocb */ @@ -366,44 +420,6 @@ linuxaio_poll (EV_P_ ev_tstamp timeout) linuxaio_get_events (EV_A_ timeout); } -#if EPOLL_FALLBACK - -static void -linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents) -{ - struct epoll_event events[16]; - - for (;;) - { - int idx; - int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0); - - if (expect_false (res < 0)) - ev_syserr ("(libev) linuxaio epoll_wait"); - else if (!res) - break; - - for (idx = res; idx--; ) - { - int fd = events [idx].data.fd; - uint32_t ev = events [idx].events; - - assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax)); - - linuxaio_rearm_epoll (EV_A_ &linuxaio_iocbps [fd]->io, EPOLL_CTL_MOD); - - fd_event (EV_A_ fd, - (ev & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0) - | (ev & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0)); - } - - if (res < sizeof (events) / sizeof (events [0])) - break; - } -} - -#endif - inline_size int linuxaio_init (EV_P_ int flags) @@ -433,6 +449,7 @@ linuxaio_init (EV_P_ int flags) } ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ); + ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI); ev_io_start (EV_A_ &linuxaio_epoll_w); ev_unref (EV_A); /* watcher should not keep loop alive */ #endif