Forks and watches multiple instances of a program in the same context (environment + fds) https://redmine.lighttpd.net/projects/multiwatch
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

311 lines
7.6 KiB

  1. #include <glib.h>
  2. #include <ev.h>
  3. #include <signal.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include <sys/types.h>
  7. #include <sys/wait.h>
  8. #include <unistd.h>
  9. #include <errno.h>
  10. #ifdef HAVE_CONFIG_H
  11. #include "config.h"
  12. #endif
  13. #define UNUSED(x) ((void)(x))
  14. #define PACKAGE_DESC (PACKAGE_NAME " v" PACKAGE_VERSION " - forks and watches multiple instances of a program in the same environment")
  15. typedef struct {
  16. gchar **app;
  17. gint forks;
  18. /* how many times we try to spawn a child */
  19. gint retry;
  20. /* time within a dieing child is handled as "spawn failed"
  21. * if it dies after the timeout, the retry counter is reset and
  22. * we try to get it up again
  23. */
  24. gint retry_timeout_ms;
  25. gboolean show_version;
  26. /* terminate signal to kill children */
  27. gint sig_nice_kill;
  28. } options;
  29. struct data;
  30. typedef struct data data;
  31. struct child;
  32. typedef struct child child;
  33. struct child {
  34. data *d;
  35. int id;
  36. pid_t pid;
  37. gint tries;
  38. ev_tstamp last_spawn;
  39. ev_child watcher;
  40. };
  41. struct data {
  42. child *children;
  43. guint running;
  44. gboolean shutdown;
  45. struct ev_loop *loop;
  46. ev_signal sigHUP, sigINT, sigQUIT, sigTERM, sigUSR1, sigUSR2;
  47. gint return_status;
  48. };
  49. static options opts = {
  50. /* app: */ NULL,
  51. /* forks: */ 1,
  52. /* retry: */ 3,
  53. /* timeout: */ 10000,
  54. /* version: */ FALSE,
  55. /* sig: */ SIGUSR1
  56. };
  57. typedef struct signal_action signal_action;
  58. struct signal_action {
  59. const char *signame;
  60. int signum;
  61. gboolean terminate; /* not used yet */
  62. };
  63. static signal_action signal_actions[] = {
  64. { "HUP", SIGHUP, TRUE },
  65. { "INT", SIGINT, TRUE },
  66. { "QUIT", SIGQUIT, TRUE },
  67. { "TERM", SIGTERM, TRUE },
  68. { "USR1", SIGUSR1, TRUE },
  69. { "USR2", SIGUSR2, FALSE },
  70. { NULL, 0, FALSE }
  71. };
  72. static gint signame2num(const char *name) {
  73. gint i;
  74. for (i = 0; signal_actions[i].signame; i++) {
  75. if (0 == strcmp(signal_actions[i].signame, name)) {
  76. return signal_actions[i].signum;
  77. }
  78. }
  79. return -1;
  80. }
  81. static void forward_sig_cb(struct ev_loop *loop, ev_signal *w, int revents) {
  82. data *d = (data*) w->data;
  83. UNUSED(loop);
  84. UNUSED(revents);
  85. for (gint i = 0; i < opts.forks; i++) {
  86. if (d->children[i].pid != -1) {
  87. kill(d->children[i].pid, w->signum);
  88. }
  89. }
  90. }
  91. static void terminate_forward_sig_cb(struct ev_loop *loop, ev_signal *w, int revents) {
  92. data *d = (data*) w->data;
  93. gint signum = opts.sig_nice_kill; /* terminate children with "nice" signal */
  94. UNUSED(loop);
  95. UNUSED(revents);
  96. /* on second signal forward original signal */
  97. if (d->shutdown || signum < 0) {
  98. signum = w->signum;
  99. }
  100. d->shutdown = TRUE;
  101. opts.sig_nice_kill = -1;
  102. for (gint i = 0; i < opts.forks; i++) {
  103. if (d->children[i].pid != -1) {
  104. kill(d->children[i].pid, signum);
  105. }
  106. }
  107. }
  108. static void spawn(child* c) {
  109. pid_t pid;
  110. if (c->tries++ > opts.retry) {
  111. g_printerr("Child[%i] died to often, not forking again\n", c->id);
  112. return;
  113. }
  114. switch (pid = fork()) {
  115. case -1:
  116. g_printerr("Fatal Error: Couldn't fork child[%i]: %s\n", c->id, g_strerror(errno));
  117. if (0 == c->d->running) {
  118. g_printerr("No child running and fork failed -> exit\n");
  119. c->d->return_status = -100;
  120. ev_unloop(c->d->loop, EVUNLOOP_ALL);
  121. }
  122. /* Do not retry... */
  123. break;
  124. case 0:
  125. /* child */
  126. /* Need to reset the signal mask; signal actions don't need to be reset
  127. * according to libev documentation:
  128. * http://pod.tst.eu/http://cvs.schmorp.de/libev/ev.pod#The_special_problem_of_inheritance_o
  129. */
  130. {
  131. sigset_t set;
  132. sigemptyset(&set);
  133. sigprocmask(SIG_SETMASK, &set, NULL);
  134. }
  135. execv(opts.app[0], opts.app);
  136. g_printerr("Exec failed: %s\n", g_strerror(errno));
  137. exit(errno);
  138. break;
  139. default:
  140. c->pid = pid;
  141. c->d->running++;
  142. c->last_spawn = ev_now(c->d->loop);
  143. ev_child_set(&c->watcher, c->pid, 0);
  144. ev_child_start(c->d->loop, &c->watcher);
  145. break;
  146. }
  147. }
  148. static void child_died(struct ev_loop *loop, ev_child *w, int revents) {
  149. child *c = (child*) w->data;
  150. UNUSED(revents);
  151. ev_child_stop(loop, w);
  152. c->d->running--;
  153. c->pid = -1;
  154. if (c->d->shutdown) return;
  155. if (ev_now(c->d->loop) - c->last_spawn > (opts.retry_timeout_ms / (ev_tstamp) 1000)) {
  156. g_printerr("Child[%i] died, respawn\n", c->id);
  157. c->tries = 0;
  158. } else {
  159. g_printerr("Spawning child[%i] failed, next try\n", c->id);
  160. }
  161. spawn(c);
  162. }
  163. static gboolean parse_use_signal_arg(const gchar *option_name, const gchar *value, gpointer d, GError **error) {
  164. gint sig = signame2num(value);
  165. UNUSED(option_name);
  166. UNUSED(d);
  167. if (-1 == sig) {
  168. g_set_error(error, G_OPTION_ERROR, G_OPTION_ERROR_FAILED, "Unknown signal name: '%s'", value);
  169. return FALSE;
  170. }
  171. opts.sig_nice_kill = sig;
  172. return TRUE;
  173. }
  174. static const GOptionEntry entries[] = {
  175. { "forks", 'f', 0, G_OPTION_ARG_INT, &opts.forks, "Number of children to fork and watch (default 1)", "children" },
  176. { "retry", 'r', 0, G_OPTION_ARG_INT, &opts.retry, "Number of retries to fork a single child (default 3)", "retries" },
  177. { "timeout", 't', 0, G_OPTION_ARG_INT, &opts.retry_timeout_ms, "Retry timeout in ms; if the child dies after the timeout the retry counter is reset (default 10000)", "ms" },
  178. { "version", 'v', 0, G_OPTION_ARG_NONE, &opts.show_version, "Show version", NULL },
  179. { "signal", 's', 0, G_OPTION_ARG_CALLBACK, (void*)(intptr_t)parse_use_signal_arg, "Signal to send to children to signal 'graceful' termination (HUP,INT,QUIT,TERM,USR1,USR2)", "signame" },
  180. { G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_STRING_ARRAY, &opts.app, "<application> [app arguments]", NULL },
  181. { NULL, 0, 0, 0, NULL, NULL, NULL }
  182. };
  183. int main(int argc, char **argv) {
  184. GOptionContext *context;
  185. GError *error = NULL;
  186. gint res;
  187. context = g_option_context_new("<application> [app arguments]");
  188. g_option_context_add_main_entries(context, entries, NULL);
  189. g_option_context_set_summary(context, PACKAGE_DESC);
  190. if (!g_option_context_parse (context, &argc, &argv, &error)) {
  191. g_printerr("Option parsing failed: %s\n", error->message);
  192. return -1;
  193. }
  194. if (opts.show_version) {
  195. g_printerr(PACKAGE_DESC);
  196. g_printerr("\nBuild-Date: " __DATE__ " " __TIME__ "\n");
  197. return 0;
  198. }
  199. if (!opts.app || !opts.app[0]) {
  200. g_printerr("Missing application\n");
  201. return -2;
  202. }
  203. if (opts.forks < 1) {
  204. g_printerr("Invalid forks argument: %i\n", opts.forks);
  205. return -3;
  206. }
  207. if (opts.retry < 1) {
  208. g_printerr("Invalid retry argument: %i\n", opts.retry);
  209. return -4;
  210. }
  211. if (opts.retry_timeout_ms < 0) {
  212. g_printerr("Invalid timeout argument: %i\n", opts.retry_timeout_ms);
  213. return -5;
  214. }
  215. data *d = g_slice_new0(data);
  216. d->children = (child*) g_slice_alloc0(sizeof(child) * opts.forks);
  217. d->running = 0;
  218. d->shutdown = FALSE;
  219. d->return_status = 0;
  220. d->loop = ev_default_loop(0);
  221. #define WATCH_SIG(x) do { ev_signal_init(&d->sig##x, forward_sig_cb, SIG##x); d->sig##x.data = d; ev_signal_start(d->loop, &d->sig##x); ev_unref(d->loop); } while (0)
  222. #define WATCH_TERM_SIG(x) do { ev_signal_init(&d->sig##x, terminate_forward_sig_cb, SIG##x); d->sig##x.data = d; ev_signal_start(d->loop, &d->sig##x); ev_unref(d->loop); } while (0)
  223. #define UNWATCH_SIG(x) do { ev_ref(d->loop); ev_signal_stop(d->loop, &d->sig##x); } while (0)
  224. WATCH_TERM_SIG(HUP);
  225. WATCH_TERM_SIG(INT);
  226. WATCH_TERM_SIG(QUIT);
  227. WATCH_TERM_SIG(TERM);
  228. WATCH_TERM_SIG(USR1);
  229. WATCH_SIG(USR2);
  230. for (gint i = 0; i < opts.forks; i++) {
  231. d->children[i].d = d;
  232. d->children[i].id = i;
  233. d->children[i].pid = -1;
  234. d->children[i].tries = 0;
  235. d->children[i].watcher.data = &d->children[i];
  236. ev_child_init(&d->children[i].watcher, child_died, -1, 0);
  237. spawn(&d->children[i]);
  238. }
  239. ev_loop(d->loop, 0);
  240. res = d->return_status;
  241. g_slice_free1(sizeof(child) * opts.forks, d->children);
  242. g_slice_free(data, d);
  243. UNWATCH_SIG(HUP);
  244. UNWATCH_SIG(INT);
  245. UNWATCH_SIG(QUIT);
  246. UNWATCH_SIG(TERM);
  247. UNWATCH_SIG(USR1);
  248. UNWATCH_SIG(USR2);
  249. return res;
  250. }