Browse Source

*** empty log message ***

master
Marc Alexander Lehmann 2 years ago
parent
commit
fdae2327be
  1. 48
      ev.pod
  2. 116
      ev_linuxaio.c
  3. 1
      ev_vars.h
  4. 2
      ev_wrap.h

48
ev.pod

@ -576,33 +576,37 @@ C<EVBACKEND_POLL>.
=item C<EVBACKEND_LINUXAIO> (value 64, Linux)
Use the linux-specific linux aio (I<not> C<< aio(7) >>) event interface
available in post-4.18 kernels.
Use the linux-specific linux aio (I<not> C<< aio(7) >> but C<<
io_submit(2) >>) event interface available in post-4.18 kernels.
If this backend works for you (as of this writing, it was very
experimental and only supports a subset of file types), it is the best
event interface available on linux and might be well worth it enabling it
- if it isn't available in your kernel this will be detected and another
backend will be chosen.
experimental), it is the best event interface available on linux and might
be well worth enabling it - if it isn't available in your kernel this will
be detected and this backend will be skipped.
This backend can batch oneshot requests and uses a user-space ring buffer
to receive events. It also doesn't suffer from most of the design problems
of epoll (such as not being able to remove event sources from the epoll
set), and generally sounds too good to be true. Because, this being the
linux kernel, of course it suffers from a whole new set of limitations.
This backend can batch oneshot requests and supports a user-space ring
buffer to receive events. It also doesn't suffer from most of the design
problems of epoll (such as not being able to remove event sources from
the epoll set), and generally sounds too good to be true. Because, this
being the linux kernel, of course it suffers from a whole new set of
limitations.
For one, it is not easily embeddable (but probably could be done using
an event fd at some extra overhead). It also is subject to various
arbitrary limits that can be configured in F</proc/sys/fs/aio-max-nr>
and F</proc/sys/fs/aio-nr>), which could lead to it being skipped during
initialisation.
Most problematic in practise, however, is that, like kqueue, it requires
special support from drivers, and, not surprisingly, not all drivers
implement it. For example, in linux 4.19, tcp sockets, pipes, event fds,
files, F</dev/null> and a few others are supported, but ttys are not, so
this is not (yet?) a generic event polling interface but is probably still
be very useful in a web server or similar program.
an event fd at some extra overhead). It also is subject to a system wide
limit that can be configured in F</proc/sys/fs/aio-max-nr> - each loop
currently requires C<61> of this number. If no aio requests are left, this
backend will be skipped during initialisation.
Most problematic in practise, however, is that not all file descriptors
work with it. For example, in linux 5.1, tcp sockets, pipes, event fds,
files, F</dev/null> and a few others are supported, but ttys do not work
(probably because of a bug), so this is not (yet?) a generic event polling
interface.
To work around this latter problem, the current version of libev uses
epoll as a fallback for file deescriptor types that do not work. Epoll
is used in, kind of, slow mode that hopefully avoids most of its design
problems.
This backend maps C<EV_READ> and C<EV_WRITE> in the same way as
C<EVBACKEND_POLL>.

116
ev_linuxaio.c

@ -37,10 +37,16 @@
* either the BSD or the GPL.
*/
#define EPOLL_FALLBACK 1
#include <sys/time.h> /* actually linux/time.h, but we must assume they are compatible */
#include <poll.h>
#include <linux/aio_abi.h>
#if EPOLL_FALLBACK
# include <sys/epoll.h>
#endif
/* we try to fill 4kB pages exactly.
* the ring buffer header is 32 bytes, every io event is 32 bytes.
* the kernel takes the io event number, doubles it, adds 2, adds the ring buffer.
@ -151,6 +157,14 @@ linuxaio_modify (EV_P_ int fd, int oev, int nev)
array_needsize (ANIOCBP, linuxaio_iocbps, linuxaio_iocbpmax, fd + 1, linuxaio_array_needsize_iocbp);
struct aniocb *iocb = linuxaio_iocbps [fd];
#if EPOLL_FALLBACK
if (iocb->io.aio_reqprio < 0)
{
epoll_ctl (backend_fd, EPOLL_CTL_DEL, fd, 0);
iocb->io.aio_reqprio = 0;
}
#endif
if (iocb->io.aio_buf)
ev_io_cancel (linuxaio_ctx, &iocb->io, (struct io_event *)0); /* always returns an error relevant kernels */
@ -257,7 +271,10 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
res = ev_io_getevents (linuxaio_ctx, 1, 1, &ioev, &ts);
if (res < 0)
ev_syserr ("(libev) linuxaio io_getevents");
if (errno == EINTR)
/* ignored */;
else
ev_syserr ("(libev) linuxaio io_getevents");
else if (res)
{
/* at least one event received, handle it and any remaining ones in the ring buffer */
@ -266,6 +283,22 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
}
}
#if EPOLL_FALLBACK
static void
linuxaio_rearm_epoll (EV_P_ struct iocb *iocb, int op)
{
struct epoll_event eev;
eev.events = EPOLLONESHOT;
if (iocb->aio_buf & POLLIN ) eev.events |= EPOLLIN ;
if (iocb->aio_buf & POLLOUT) eev.events |= EPOLLOUT;
eev.data.fd = iocb->aio_fildes;
if (epoll_ctl (backend_fd, op, iocb->aio_fildes, &eev) < 0)
ev_syserr ("(libeio) linuxaio epoll_ctl");
}
#endif
static void
linuxaio_poll (EV_P_ ev_tstamp timeout)
{
@ -285,8 +318,8 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
{
/* This happens when the ring buffer is full, at least. I assume this means
* that the event was queued synchronously during io_submit, and thus
* the buffer overflowd.
* In this case, we just try next loop iteration.
* the buffer overflowed.
* In this case, we just try in next loop iteration.
* This should not result in a few fds taking priority, as the interface
* is one-shot, and we submit iocb's in a round-robin fashion.
*/
@ -295,6 +328,20 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
timeout = 0;
break;
}
#if EPOLL_FALLBACK
else if (errno == EINVAL)
{
/* This hapüpens for unsupported fds, officially, but in my testing,
* also randomly happens for supported fds. We fall back to good old
* poll() here, under the assumption that this is a very rare case.
*/
struct iocb *iocb = linuxaio_submits [submitted];
res = 1; /* skip this iocb */
linuxaio_rearm_epoll (EV_A_ iocb, EPOLL_CTL_ADD);
iocb->aio_reqprio = -1; /* mark iocb as epoll */
}
#endif
else
ev_syserr ("(libev) linuxaio io_submit");
@ -308,6 +355,44 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
linuxaio_get_events (EV_A_ timeout);
}
#if EPOLL_FALLBACK
static void
linuxaio_epoll_cb (EV_P_ struct ev_io *w, int revents)
{
struct epoll_event events[16];
for (;;)
{
int idx;
int res = epoll_wait (backend_fd, events, sizeof (events) / sizeof (events [0]), 0);
if (ecb_expect_false (res < 0))
ev_syserr ("(libev) linuxaio epoll_wait");
else if (!res)
break;
for (idx = res; idx--; )
{
int fd = events [idx].data.fd;
uint32_t ev = events [idx].events;
assert (("libev: iocb fd must be in-bounds", fd >= 0 && fd < anfdmax));
linuxaio_rearm_epoll (EV_A_ &linuxaio_iocbps [fd]->io, EPOLL_CTL_MOD);
fd_event (EV_A_ fd,
(ev & (EPOLLOUT | EPOLLERR | EPOLLHUP) ? EV_WRITE : 0)
| (ev & (EPOLLIN | EPOLLERR | EPOLLHUP) ? EV_READ : 0));
}
if (res < sizeof (events) / sizeof (events [0]))
break;
}
}
#endif
inline_size
int
linuxaio_init (EV_P_ int flags)
@ -321,6 +406,18 @@ linuxaio_init (EV_P_ int flags)
if (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0)
return 0;
#if EPOLL_FALLBACK
backend_fd = ev_epoll_create ();
if (backend_fd < 0)
{
ev_io_destroy (linuxaio_ctx);
return 0;
}
ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
ev_io_start (EV_A_ &linuxaio_epoll_w);
#endif
backend_modify = linuxaio_modify;
backend_poll = linuxaio_poll;
@ -338,6 +435,9 @@ inline_size
void
linuxaio_destroy (EV_P)
{
#if EPOLL_FALLBACK
close (backend_fd);
#endif
linuxaio_free_iocbp (EV_A);
ev_io_destroy (linuxaio_ctx);
}
@ -354,6 +454,16 @@ linuxaio_fork (EV_P)
while (ev_io_setup (EV_LINUXAIO_DEPTH, &linuxaio_ctx) < 0)
ev_syserr ("(libev) linuxaio io_setup");
#if EPOLL_FALLBACK
while ((backend_fd = ev_epoll_create ()) < 0)
ev_syserr ("(libev) linuxaio epoll_create");
ev_io_stop (EV_A_ &linuxaio_epoll_w);
ev_io_init (EV_A_ &linuxaio_epoll_w, linuxaio_epoll_cb, backend_fd, EV_READ);
ev_io_start (EV_A_ &linuxaio_epoll_w);
ev_unref (EV_A); /* watcher should not keep loop alive */
#endif
fd_rearm_all (EV_A);
}

1
ev_vars.h

@ -114,6 +114,7 @@ VARx(int, linuxaio_iocbpmax)
VARx(struct iocb **, linuxaio_submits)
VARx(int, linuxaio_submitcnt)
VARx(int, linuxaio_submitmax)
VARx(ev_io, linuxaio_epoll_w)
#endif
#if EV_USE_KQUEUE || EV_GENWRAP

2
ev_wrap.h

@ -51,6 +51,7 @@
#define kqueue_events ((loop)->kqueue_events)
#define kqueue_fd_pid ((loop)->kqueue_fd_pid)
#define linuxaio_ctx ((loop)->linuxaio_ctx)
#define linuxaio_epoll_w ((loop)->linuxaio_epoll_w)
#define linuxaio_iocbpmax ((loop)->linuxaio_iocbpmax)
#define linuxaio_iocbps ((loop)->linuxaio_iocbps)
#define linuxaio_submitcnt ((loop)->linuxaio_submitcnt)
@ -156,6 +157,7 @@
#undef kqueue_events
#undef kqueue_fd_pid
#undef linuxaio_ctx
#undef linuxaio_epoll_w
#undef linuxaio_iocbpmax
#undef linuxaio_iocbps
#undef linuxaio_submitcnt

Loading…
Cancel
Save