Browse Source

*** empty log message ***

master
Marc Alexander Lehmann 2 years ago
parent
commit
f4db15fc26
  1. 19
      ev.3
  2. 135
      ev_linuxaio.c

19
ev.3

@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "LIBEV 3"
.TH LIBEV 3 "2019-06-23" "libev-4.25" "libev - high performance full featured event loop"
.TH LIBEV 3 "2019-06-24" "libev-4.25" "libev - high performance full featured event loop"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@ -712,10 +712,9 @@ be detected and this backend will be skipped.
.Sp
This backend can batch oneshot requests and supports a user-space ring
buffer to receive events. It also doesn't suffer from most of the design
problems of epoll (such as not being able to remove event sources from
the epoll set), and generally sounds too good to be true. Because, this
being the linux kernel, of course it suffers from a whole new set of
limitations.
problems of epoll (such as not being able to remove event sources from the
epoll set), and generally sounds too good to be true. Because, this being
the linux kernel, of course it suffers from a whole new set of limitations.
.Sp
For one, it is not easily embeddable (but probably could be done using
an event fd at some extra overhead). It also is subject to a system wide
@ -726,10 +725,14 @@ backend will be skipped during initialisation.
Most problematic in practise, however, is that not all file descriptors
work with it. For example, in linux 5.1, tcp sockets, pipes, event fds,
files, \fI/dev/null\fR and a few others are supported, but ttys do not work
(probably because of a bug), so this is not (yet?) a generic event polling
interface.
properly (a known bug that the kernel developers don't care about, see
<https://lore.kernel.org/patchwork/patch/1047453/>), so this is not
(yet?) a generic event polling interface.
.Sp
To work around this latter problem, the current version of libev uses
Overall, it seems the linux developers just don't want it to have a
generic event handling mechanism other than \f(CW\*(C`select\*(C'\fR or \f(CW\*(C`poll\*(C'\fR.
.Sp
To work around the fd type problem, the current version of libev uses
epoll as a fallback for file deescriptor types that do not work. Epoll
is used in, kind of, slow mode that hopefully avoids most of its design
problems and requires 1\-3 extra syscalls per active fd every iteration.

135
ev_linuxaio.c

@ -184,6 +184,58 @@ linuxaio_modify (EV_P_ int fd, int oev, int nev)
}
}
#if EPOLL_FALLBACK
static void
linuxaio_rearm_epoll (EV_P_ struct iocb *iocb, int op)
{
struct epoll_event eev;
eev.events = EPOLLONESHOT;
if (iocb->aio_buf & POLLIN ) eev.events |= EPOLLIN ;
if (iocb->aio_buf & POLLOUT) eev.events |= EPOLLOUT;
eev.data.fd = iocb->aio_fildes;
if (epoll_ctl (backend_fd, op, iocb->aio_fildes, &eev) < 0)
ev_syserr ("(libeio) linuxaio epoll_ctl");
}
static void
linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents)
{
struct epoll_event events[16];
for (;;)
{
int idx;
int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0);
if (expect_false (res < 0))
ev_syserr ("(libev) linuxaio epoll_wait");
else if (!res)
break;
for (idx = res; idx--; )
{
int fd = events [idx].data.fd;
uint32_t ev = events [idx].events;
assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax));
linuxaio_rearm_epoll (EV_A_ &linuxaio_iocbps [fd]->io, EPOLL_CTL_MOD);
fd_event (EV_A_ fd,
(ev & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0)
| (ev & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0));
}
if (res < sizeof (events) / sizeof (events [0]))
break;
}
}
#endif
static void
linuxaio_parse_events (EV_P_ struct io_event *ev, int nr)
{
@ -194,7 +246,7 @@ linuxaio_parse_events (EV_P_ struct io_event *ev, int nr)
assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax));
/* linux aio is oneshot: rearm fd */
/* linux aio is oneshot: rearm fd. TODO: this does more work than needed */
linuxaio_iocbps [fd]->io.aio_buf = 0;
anfds [fd].events = 0;
fd_change (EV_A_ fd, 0);
@ -274,11 +326,15 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
/* this degrades to one event per loop iteration */
/* if the ring buffer changes layout, but so be it */
EV_RELEASE_CB;
ts.tv_sec = (long)timeout;
ts.tv_nsec = (long)((timeout - ts.tv_sec) * 1e9);
res = ev_io_getevents (linuxaio_ctx, 1, sizeof (ioev) / sizeof (ioev [0]), ioev, &ts);
EV_ACQUIRE_CB;
if (res < 0)
if (errno == EINTR)
/* ignored */;
@ -292,22 +348,6 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
}
}
#if EPOLL_FALLBACK
static void
linuxaio_rearm_epoll (EV_P_ struct iocb *iocb, int op)
{
struct epoll_event eev;
eev.events = EPOLLONESHOT;
if (iocb->aio_buf & POLLIN ) eev.events |= EPOLLIN ;
if (iocb->aio_buf & POLLOUT) eev.events |= EPOLLOUT;
eev.data.fd = iocb->aio_fildes;
if (epoll_ctl (backend_fd, op, iocb->aio_fildes, &eev) < 0)
ev_syserr ("(libeio) linuxaio epoll_ctl");
}
#endif
static void
linuxaio_poll (EV_P_ ev_tstamp timeout)
{
@ -320,7 +360,15 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
/* which allows us to pinpoint the errornous iocb */
for (submitted = 0; submitted < linuxaio_submitcnt; )
{
#if 0
int res;
if (linuxaio_submits[submitted]->aio_fildes == backend_fd)
res = ev_io_submit (linuxaio_ctx, 1, linuxaio_submits + submitted);
else
{ res = -1; errno = EINVAL; };
#else
int res = ev_io_submit (linuxaio_ctx, linuxaio_submitcnt - submitted, linuxaio_submits + submitted);
#endif
if (expect_false (res < 0))
if (errno == EAGAIN)
@ -331,9 +379,14 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
* In this case, we just try in next loop iteration.
* This should not result in a few fds taking priority, as the interface
* is one-shot, and we submit iocb's in a round-robin fashion.
* TODO: maybe make "submitted" persistent, so we don't have to memmove?
*/
memmove (linuxaio_submits, linuxaio_submits + submitted, (linuxaio_submitcnt - submitted) * sizeof (*linuxaio_submits));
linuxaio_submitcnt -= submitted;
if (ecb_expect_false (submitted))
{
memmove (linuxaio_submits, linuxaio_submits + submitted, (linuxaio_submitcnt - submitted) * sizeof (*linuxaio_submits));
linuxaio_submitcnt -= submitted;
}
timeout = 0;
break;
}
@ -343,8 +396,9 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
/* This happens for unsupported fds, officially, but in my testing,
* also randomly happens for supported fds. We fall back to good old
* poll() here, under the assumption that this is a very rare case.
* See https://lore.kernel.org/patchwork/patch/1047453/ for evidence
* that the problem is known, but ignored.
* See https://lore.kernel.org/patchwork/patch/1047453/ to see
* discussion about such a case (ttys) where polling for POLLIN
* fails but POLLIN|POLLOUT works.
*/
struct iocb *iocb = linuxaio_submits [submitted];
res = 1; /* skip this iocb */
@ -366,44 +420,6 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
linuxaio_get_events (EV_A_ timeout);
}
#if EPOLL_FALLBACK
static void
linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents)
{
struct epoll_event events[16];
for (;;)
{
int idx;
int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0);
if (expect_false (res < 0))
ev_syserr ("(libev) linuxaio epoll_wait");
else if (!res)
break;
for (idx = res; idx--; )
{
int fd = events [idx].data.fd;
uint32_t ev = events [idx].events;
assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax));
linuxaio_rearm_epoll (EV_A_ &linuxaio_iocbps [fd]->io, EPOLL_CTL_MOD);
fd_event (EV_A_ fd,
(ev & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0)
| (ev & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0));
}
if (res < sizeof (events) / sizeof (events [0]))
break;
}
}
#endif
inline_size
int
linuxaio_init (EV_P_ int flags)
@ -433,6 +449,7 @@ linuxaio_init (EV_P_ int flags)
}
ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
ev_set_priority (&linuxaio_epoll_w, EV_MAXPRI);
ev_io_start (EV_A_ &linuxaio_epoll_w);
ev_unref (EV_A); /* watcher should not keep loop alive */
#endif

Loading…
Cancel
Save