Browse Source

[core] graceful and immediate restart option

graceful and (nearly) immediate lighttpd restart option

For *some* configurations, it *may* be safe to background the current
lighttpd server (or workers) to continue processing active requests
and, in parallel, to start up a new lighttpd server with a new
configuration.  For other configurations, doing so might not be safe!

Therefore, this option must be explicitly configured to enable:
  server.feature-flags += ("server.graceful-restart-bg" => "enable")
  server.systemd-socket-activation = "enable"

Along with enabling server.feature-flags "server.graceful-restart-bg",
enabling server.systemd-socket-activation allows transfer of open
listening sockets to the new lighttpd server instance, and occurs
without closing the listening sockets and without destroying the
kernel listen backlog queue on the socket.

Safe configurations may include lighttpd.conf which connect to
standalone backend daemons, e.g. proxying to other servers,
including PHP-FPM backends.

Unsafe configurations include lighttpd.conf which use "bin-path" option
in *.server configs, instructing lighttpd to execute the backends.
Using the graceful-and-immediate-restart option is likely *unsafe* if
the backend daemon expects only one instance of itself to run at a time.

Current implementation of graceful and immediate restart option keeps
the backgrounded lighttpd in the same process group, so that subsequent
SIGINT or SIGTERM will shut down both the new and the backgrounded
servers.  (An alternative option (commented out in the code) is to
background and detach from the new lighttpd process.)  Regardless,
existing subprocesses, such as CGI, remain in original process group.
As a result, the new lighttpd server may receive SIGCHLD for unknown
processes inherited from the old server, which the new lighttpd server
will reap and discard.  The original lighttpd server, now a child, will
be unable to detect exit or reap and report status on those pre-existing
subprocesses.

Graceful restart is triggered in lighttpd by sending lighttpd SIGUSR1.
If lighttpd is configured with workers, then SIGINT (not SIGUSR1) is
sent to the process group, including other processes started by
lighttpd, e.g. CGI.  To work well with graceful restart, CGI scripts and
other processes should trap SIGINT (and SIGUSR1 for good measure).
Long-running scripts may want to checkpoint and close, e.g. a CGI script
implementing a long-running websocket connection.
master
Glenn Strauss 1 year ago
parent
commit
352d5d776d
  1. 2
      src/base.h
  2. 41
      src/network.c
  3. 3
      src/network.h
  4. 99
      src/server.c

2
src/base.h

@ -197,6 +197,8 @@ struct server {
gid_t gid;
pid_t pid;
int stdin_fd;
char **argv;
};

41
src/network.c

@ -452,6 +452,47 @@ int network_close(server *srv) {
return 0;
}
void network_socket_activation_to_env (server * const srv) {
/* set up listening sockets for systemd socket activation
* and ensure FD_CLOEXEC flag is not set on listen fds */
int fd = 3; /* #define SD_LISTEN_FDS_START 3 */
for (uint32_t n = 0, i; n < srv->srv_sockets.used; ++n) {
server_socket *srv_socket = srv->srv_sockets.ptr[n];
if (srv_socket->fd < fd) continue;
if (srv_socket->fd == fd) {
fdevent_clrfd_cloexec(fd);
++fd;
continue;
}
/* (expecting ordered list, but check if fd is later in list)*/
for (i = n+1; i < srv->srv_sockets.used; ++i) {
if (fd == srv->srv_sockets.ptr[i]->fd)
break;
}
if (i < srv->srv_sockets.used) {
fdevent_clrfd_cloexec(fd);
++fd;
--n; /* loop to reprocess this entry */
continue;
}
/* dup2() removes FD_CLOEXEC on newfd */
if (fd != dup2(srv_socket->fd, fd)) continue;
++fd;
/* old fd will be closed upon execv() due to its FD_CLOEXEC flag
* (if not already closed by another dup2() over it) */
}
fd -= 3; /* now num fds; #define SD_LISTEN_FDS_START 3 */
if (0 == fd) return; /*(no active sockets?)*/
buffer * const tb = srv->tmp_buf;
buffer_clear(tb);
buffer_append_int(tb, fd);
setenv("LISTEN_FDS", tb->ptr, 1);
buffer_clear(tb);
buffer_append_int(tb, srv->pid); /* getpid() */
setenv("LISTEN_PID", tb->ptr, 1);
}
static int network_socket_activation_nfds(server *srv, network_socket_config *s, int nfds) {
buffer *host = buffer_init();
socklen_t addr_len;

3
src/network.h

@ -20,4 +20,7 @@ int network_register_fdevents(server *srv);
__attribute_cold__
void network_unregister_sock(server *srv, struct server_socket *srv_socket);
__attribute_cold__
void network_socket_activation_to_env (server *srv);
#endif

99
src/server.c

@ -646,6 +646,101 @@ static void server_sockets_close (server *srv) {
srv->sockets_disabled = 3;
}
__attribute_cold__
static int server_graceful_state_bg (server *srv) {
/*assert(graceful_restart);*/
/*(SIGUSR1 set to SIG_IGN in workers, so should not reach here if worker)*/
if (srv_shutdown) return 0;
if (NULL == srv->srvconf.feature_flags) return 0;
/* check if server should fork and background (bg) itself
* to continue processing requests already in progress */
data_unset * const du =
array_get_element_klen(srv->srvconf.feature_flags,
CONST_STR_LEN("server.graceful-restart-bg"));
if (!config_plugin_value_tobool(du, 0)) return 0;
/*(set flag to false to avoid repeating)*/
if (du->type == TYPE_STRING)
buffer_copy_string_len(&((data_string *)du)->value,
CONST_STR_LEN("false"));
else /* (du->type == TYPE_INTEGER) */
((data_integer *)du)->value = 0;
/* require exec'd via absolute path or daemon in foreground
* and exec'd with path containing '/' (e.g. "./xxxxx") */
char ** const argv = srv->argv;
if (0 == srv->srvconf.dont_daemonize
? argv[0][0] != '/'
: NULL == strchr(argv[0], '/')) return 0;
#if 0
/* disabled; not fully implemented
* srv->srvconf.systemd_socket_activation might be cleared in network_init()
* leading to issuing a false warning
*/
/* warn if server.systemd-socket-activation not enabled
* (While this warns on existing config rather than new config,
* it is probably a decent predictor for presence in new config) */
if (!srv->srvconf.systemd_socket_activation)
log_error(srv->errh, __FILE__, __LINE__,
"[note] server.systemd-socket-activation not enabled; "
"listen sockets will be closed and reopened");
#endif
/* flush log buffers to avoid potential duplication of entries
* server_handle_sighup(srv) does the following, but skip logging */
plugins_call_handle_sighup(srv);
config_log_error_cycle(srv);
/* backgrounding to continue processing requests in progress */
/* re-exec lighttpd in original process
* Note: using path in re-exec is portable and allows lighttpd upgrade.
* OTOH, getauxval() AT_EXECFD and fexecve() could be used on Linux to
* re-exec without access to original executable on disk, which might be
* desirable in some situations, but is not implemented here.
* Alternatively, if argv[] was not available, could use readlink() on
* /proc/self/exe (Linux-specific), though there are ways on many other
* platforms to achieve the same:
* https://stackoverflow.com/questions/1023306/finding-current-executables-path-without-proc-self-exe
*/
pid_t pid = fork();
if (pid) { /* original process */
if (pid < 0) return 0;
network_socket_activation_to_env(srv);
/*while (waitpid(pid, NULL, 0) < 0 && errno == EINTR) ;*//* detach? */
execv(argv[0], argv);
_exit(1);
}
/* else child/grandchild */
/*if (-1 == setsid()) _exit(1);*//* should we detach? */
server_sockets_close(srv); /*(close before parent reaps pid in waitpid)*/
/*if (0 != fork()) _exit(0);*//* should we detach? */
/*(grandchild is now backgrounded and detached from original process)*/
/* XXX: might extend code to have new server.feature-flags param specify
* max lifetime before aborting remaining connections */
/* (reached if lighttpd workers or if sole process w/o workers)
* use same code as comment elsewhere in server.c:
* make sure workers do not muck with pid-file */
if (0 <= pid_fd) {
close(pid_fd);
pid_fd = -1;
}
if (srv->srvconf.pid_file) buffer_clear(srv->srvconf.pid_file);
/* (original process is backgrounded -- even if no active connections --
* to allow graceful shutdown tasks to be run by server and by modules) */
log_error(srv->errh, __FILE__, __LINE__,
"[note] pid %lld continuing to handle %u connection(s) in progress",
(long long)getpid(), srv->conns.used);
graceful_restart = 0;
return 1;
}
__attribute_cold__
static void server_graceful_state (server *srv) {
@ -662,7 +757,8 @@ static void server_graceful_state (server *srv) {
graceful_restart = 0;
if (graceful_restart) {
server_sockets_unregister(srv);
if (!server_graceful_state_bg(srv))
server_sockets_unregister(srv);
if (pid_fd > 0) pid_fd = -pid_fd; /*(flag to skip removing pid file)*/
}
else {
@ -756,6 +852,7 @@ static int server_main_setup (server * const srv, int argc, char **argv) {
/*memset(graceful_sockets, 0, sizeof(graceful_sockets));*/
/*memset(inherited_sockets, 0, sizeof(inherited_sockets));*/
/*pid_fd = -1;*/
srv->argv = argv;
while(-1 != (o = getopt(argc, argv, "f:m:i:hvVD1pt"))) {
switch(o) {

Loading…
Cancel
Save