*** empty log message ***

master
Marc Alexander Lehmann 2019-06-22 16:25:53 +00:00
parent 9af345119e
commit fe5d7898ac
3 changed files with 75 additions and 23 deletions

6
ev.c
View File

@ -327,7 +327,11 @@
#endif
#ifndef EV_USE_LINUXAIO
# define EV_USE_LINUXAIO 0
# if __linux /* libev currently assumes linux/aio_abi.h is always available on linux */
# define EV_USE_LINUXAIO 1
# else
# define EV_USE_LINUXAIO 0
# endif
#endif
#ifndef EV_USE_INOTIFY

78
ev.pod
View File

@ -107,10 +107,10 @@ watcher.
=head2 FEATURES
Libev supports C<select>, C<poll>, the Linux-specific C<epoll>, the
BSD-specific C<kqueue> and the Solaris-specific event port mechanisms
for file descriptor events (C<ev_io>), the Linux C<inotify> interface
(for C<ev_stat>), Linux eventfd/signalfd (for faster and cleaner
Libev supports C<select>, C<poll>, the Linux-specific aio and C<epoll>
interfaces, the BSD-specific C<kqueue> and the Solaris-specific event port
mechanisms for file descriptor events (C<ev_io>), the Linux C<inotify>
interface (for C<ev_stat>), Linux eventfd/signalfd (for faster and cleaner
inter-thread wakeup (C<ev_async>)/signal handling (C<ev_signal>)) relative
timers (C<ev_timer>), absolute timers with customised rescheduling
(C<ev_periodic>), synchronous signals (C<ev_signal>), process status
@ -569,7 +569,40 @@ faster than epoll for maybe up to a hundred file descriptors, depending on
the usage. So sad.
While nominally embeddable in other event loops, this feature is broken in
all kernel versions tested so far.
a lot of kernel revisions, but probably(!) works in current versions.
This backend maps C<EV_READ> and C<EV_WRITE> in the same way as
C<EVBACKEND_POLL>.
=item C<EVBACKEND_LINUXAIO> (value 64, Linux)
Use the linux-specific linux aio (I<not> C<< aio(7) >>) event interface
available in post-4.18 kernels.
If this backend works for you (as of this writing, it was very
experimental and only supports a subset of file types), it is the best
event interface available on linux and might be well worth it enabling it
- if it isn't available in your kernel this will be detected and another
backend will be chosen.
This backend can batch oneshot requests and uses a user-space ring buffer
to receive events. It also doesn't suffer from most of the design problems
of epoll (such as not being able to remove event sources from the epoll
set), and generally sounds too good to be true. Because, this being the
linux kernel, of course it suffers from a whole new set of limitations.
For one, it is not easily embeddable (but probably could be done using
an event fd at some extra overhead). It also is subject to various
arbitrary limits that can be configured in F</proc/sys/fs/aio-max-nr>
and F</proc/sys/fs/aio-nr>), which could lead to it being skipped during
initialisation.
Most problematic in practise, however, is that, like kqueue, it requires
special support from drivers, and, not surprisingly, not all drivers
implement it. For example, in linux 4.19, tcp sockets, pipes, event fds,
files, F</dev/null> and a few others are supported, but ttys are not, so
this is not (yet?) a generic event polling interface but is probably still
be very useful in a web server or similar program.
This backend maps C<EV_READ> and C<EV_WRITE> in the same way as
C<EVBACKEND_POLL>.
@ -680,6 +713,12 @@ used if available.
struct ev_loop *loop = ev_loop_new (ev_recommended_backends () | EVBACKEND_KQUEUE);
Example: Similarly, on linux, you mgiht want to take advantage of the
linux aio backend if possible, but fall back to something else if that
isn't available.
struct ev_loop *loop = ev_loop_new (ev_recommended_backends () | EVBACKEND_LINUXAIO);
=item ev_loop_destroy (loop)
Destroys an event loop object (frees all memory and kernel state
@ -1632,13 +1671,13 @@ But really, best use non-blocking mode.
=head3 The special problem of disappearing file descriptors
Some backends (e.g. kqueue, epoll) need to be told about closing a file
descriptor (either due to calling C<close> explicitly or any other means,
such as C<dup2>). The reason is that you register interest in some file
descriptor, but when it goes away, the operating system will silently drop
this interest. If another file descriptor with the same number then is
registered with libev, there is no efficient way to see that this is, in
fact, a different file descriptor.
Some backends (e.g. kqueue, epoll, linuxaio) need to be told about closing
a file descriptor (either due to calling C<close> explicitly or any other
means, such as C<dup2>). The reason is that you register interest in some
file descriptor, but when it goes away, the operating system will silently
drop this interest. If another file descriptor with the same number then
is registered with libev, there is no efficient way to see that this is,
in fact, a different file descriptor.
To avoid having to explicitly tell libev about such cases, libev follows
the following policy: Each time C<ev_io_set> is being called, libev
@ -1697,9 +1736,10 @@ reuse the same code path.
=head3 The special problem of fork
Some backends (epoll, kqueue) do not support C<fork ()> at all or exhibit
useless behaviour. Libev fully supports fork, but needs to be told about
it in the child if you want to continue to use it in the child.
Some backends (epoll, kqueue, probably linuxaio) do not support C<fork ()>
at all or exhibit useless behaviour. Libev fully supports fork, but needs
to be told about it in the child if you want to continue to use it in the
child.
To support fork in your child processes, you have to call C<ev_loop_fork
()> after a fork in the child, enable C<EVFLAG_FORKCHECK>, or resort to
@ -4430,6 +4470,7 @@ in your include path (e.g. in libev/ when using -Ilibev):
ev_select.c only when select backend is enabled
ev_poll.c only when poll backend is enabled
ev_epoll.c only when the epoll backend is enabled
ev_linuxaio.c only when the linux aio backend is enabled
ev_kqueue.c only when the kqueue backend is enabled
ev_port.c only when the solaris port backend is enabled
@ -4630,6 +4671,13 @@ otherwise another method will be used as fallback. This is the preferred
backend for GNU/Linux systems. If undefined, it will be enabled if the
headers indicate GNU/Linux + Glibc 2.4 or newer, otherwise disabled.
=item EV_USE_LINUXAIO
If defined to be C<1>, libev will compile in support for the Linux
aio backend. Due to it's currenbt limitations it has to be requested
explicitly. If undefined, it will be enabled on linux, otherwise
disabled.
=item EV_USE_KQUEUE
If defined to be C<1>, libev will compile in support for the BSD style

View File

@ -112,6 +112,7 @@ ev_io_getevents (aio_context_t ctx_id, long min_nr, long nr, struct io_event *ev
/*****************************************************************************/
/* actual backed implementation */
/* we use out own wrapper structure in acse we ever want to do something "clever" */
typedef struct aniocb
{
struct iocb io;
@ -122,13 +123,13 @@ inline_size
void
linuxaio_array_needsize_iocbp (ANIOCBP *base, int count)
{
/* TODO: quite the overhead to allocate every iocb separately */
/* TODO: quite the overhead to allocate every iocb separately, maybe use our own alocator? */
while (count--)
{
*base = (ANIOCBP)ev_malloc (sizeof (**base));
/* TODO: full zero initialize required? */
memset (*base, 0, sizeof (**base));
/* would be nice to initialize fd/data as well */
/* would be nice to initialize fd/data as well, but array_needsize API doesn't support that */
(*base)->io.aio_lio_opcode = IOCB_CMD_POLL;
++base;
}
@ -214,6 +215,7 @@ linuxaio_get_events_from_ring (EV_P)
if (head == tail)
return 0;
/* bail out if the ring buffer doesn't match the expected layout */
if (ecb_expect_false (ring->magic != AIO_RING_MAGIC)
|| ring->incompat_features != AIO_RING_INCOMPAT_FEATURES
|| ring->header_length != sizeof (struct aio_ring)) /* TODO: or use it to find io_event[0]? */
@ -246,7 +248,7 @@ linuxaio_get_events (EV_P_ ev_tstamp timeout)
return;
/* no events, so wait for at least one, then poll ring buffer again */
/* this degraded to one event per loop iteration */
/* this degrades to one event per loop iteration */
/* if the ring buffer changes layout, but so be it */
ts.tv_sec = (long)timeout;
@ -285,6 +287,8 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
* that the event was queued synchronously during io_submit, and thus
* the buffer overflowd.
* In this case, we just try next loop iteration.
* This should not result in a few fds taking priority, as the interface
* is one-shot, and we submit iocb's in a round-robin fashion.
*/
memmove (linuxaio_submits, linuxaio_submits + submitted, (linuxaio_submitcnt - submitted) * sizeof (*linuxaio_submits));
linuxaio_submitcnt -= submitted;
@ -292,8 +296,6 @@ linuxaio_poll (EV_P_ ev_tstamp timeout)
break;
}
else
/* TODO: we get EAGAIN when the ring buffer is full for some reason */
/* TODO: should we always just try next time? */
ev_syserr ("(libev) io_submit");
submitted += res;
@ -344,8 +346,6 @@ inline_size
void
linuxaio_fork (EV_P)
{
/* TODO: verify and test */
/* this frees all iocbs, which is very heavy-handed */
linuxaio_destroy (EV_A);
linuxaio_submitcnt = 0; /* all pointers were invalidated */