fiss

Friedel's Initialization and Service Supervision
Log | Files | Refs | LICENSE

supervise.c (3658B)


      1 #include "config.h"
      2 #include "service.h"
      3 #include "util.h"
      4 
      5 #include <errno.h>
      6 #include <fcntl.h>
      7 #include <limits.h>
      8 #include <setjmp.h>
      9 #include <stdio.h>
     10 #include <stdlib.h>
     11 #include <string.h>
     12 #include <sys/socket.h>
     13 #include <sys/stat.h>
     14 #include <sys/un.h>
     15 #include <sys/wait.h>
     16 #include <unistd.h>
     17 
     18 
     19 static void signal_child(int unused) {
     20 	(void) unused;
     21 
     22 	int             status;
     23 	pid_t           died_pid;
     24 	struct service* s = NULL;
     25 
     26 	if ((died_pid = wait(&status)) == -1) {
     27 		fprint(1, "error: cannot wait for process: %r\n");
     28 		return;
     29 	}
     30 
     31 	if (!WIFEXITED(status) && !WIFSIGNALED(status))
     32 		return;
     33 
     34 	for (int i = 0; i < services_size; i++) {
     35 		if (services[i].pid == died_pid) {
     36 			s = &services[i];
     37 			break;
     38 		}
     39 	}
     40 	if (s == NULL)
     41 		return;
     42 
     43 	service_handle_exit(s, WIFSIGNALED(status), WIFSIGNALED(status) ? WTERMSIG(status) : WEXITSTATUS(status));
     44 }
     45 
     46 static void update_services(void) {
     47 	struct service* s;
     48 
     49 	for (int i = 0; i < services_size; i++) {
     50 		s = &services[i];
     51 		if (s->state == STATE_INACTIVE || s->state == STATE_ERROR)
     52 			s->stop_timeout = 0;
     53 
     54 		if (s->state == STATE_ERROR)
     55 			continue;
     56 
     57 		if (s->stop_timeout != 0) {
     58 			if (time(NULL) - s->stop_timeout >= SV_STOP_TIMEOUT) {
     59 				print(":: service '%s' doesn't terminate, killing...\n", s->name);
     60 				service_kill(s, SIGKILL);
     61 				s->stop_timeout = 0;
     62 			}
     63 		} else if (s->state == STATE_INACTIVE && service_need_restart(s)) {
     64 			service_start(s);
     65 		}
     66 	}
     67 }
     68 
     69 static void control_sockets(void) {
     70 	struct service* s;
     71 	char            cmd;
     72 
     73 	for (int i = 0; i < services_size; i++) {
     74 		s = &services[i];
     75 		while (read(s->control, &cmd, 1) == 1) {
     76 			print("handling '%c' from %s\n", cmd, s->name);
     77 			service_handle_command(s, cmd);
     78 		}
     79 	}
     80 }
     81 
     82 void stop_dummies(void) {
     83 	for (int i = 0; i < services_size; i++) {
     84 		if (services[i].state != STATE_ACTIVE_DUMMY || services[i].restart == S_RESTART)
     85 			continue;
     86 
     87 		for (int j = 0; j < services[i].children_size; j++) {
     88 			struct service* dep = services[i].children[j];
     89 			if (dep->state != STATE_INACTIVE && dep->state != STATE_ERROR)
     90 				goto dont_stop;
     91 		}
     92 
     93 		service_stop(&services[i]);
     94 
     95 	dont_stop:;
     96 	}
     97 }
     98 
     99 int service_supervise(const char* service_dir_, const char* service, bool once) {
    100 	struct sigaction sigact = { 0 };
    101 	struct service*  s;
    102 
    103 	daemon_running = true;
    104 
    105 	sigact.sa_handler = signal_child;
    106 	sigaction(SIGCHLD, &sigact, NULL);
    107 	sigact.sa_handler = SIG_IGN;
    108 	sigaction(SIGPIPE, &sigact, NULL);
    109 
    110 	service_dir_path = service_dir_;
    111 	if ((service_dir = open(service_dir_, O_DIRECTORY)) == -1) {
    112 		print_errno("error: cannot open directory %s: %s\n", service_dir_);
    113 		return 1;
    114 	}
    115 
    116 	if ((null_fd = open("/dev/null", O_RDWR)) == -1) {
    117 		fprint(1, "error: cannot open /dev/null: %r\n");
    118 		null_fd = 1;
    119 	}
    120 
    121 	print(":: starting services\n");
    122 
    123 	service_refresh_directory();
    124 
    125 	if ((s = service_get(service)) == NULL) {
    126 		fprint(1, "error: cannot start '%s': not found\n", service);
    127 		goto cleanup;
    128 	}
    129 
    130 	s->restart = once ? S_ONCE : S_RESTART;
    131 	service_start(s);
    132 
    133 
    134 	bool cont;
    135 	// accept connections and handle requests
    136 	do {
    137 		if (!daemon_running) {
    138 			for (int i = 0; i < services_size; i++) {
    139 				s = &services[i];
    140 				service_stop(s);
    141 			}
    142 		}
    143 
    144 		service_refresh_directory();
    145 		stop_dummies();
    146 		control_sockets();
    147 		update_services();
    148 
    149 		sleep(SV_CHECK_INTERVAL);
    150 
    151 		cont = false;
    152 		for (int i = 0; i < services_size; i++) {
    153 			if (services[i].state != STATE_INACTIVE && services[i].state != STATE_ERROR)
    154 				cont = true;
    155 		}
    156 	} while (cont);
    157 
    158 	print(":: terminating\n");
    159 
    160 	print(":: all services stopped\n");
    161 
    162 cleanup:
    163 
    164 	close(service_dir);
    165 	close(null_fd);
    166 
    167 	signal(SIGPIPE, SIG_DFL);
    168 	signal(SIGCHLD, SIG_DFL);
    169 	return 0;
    170 }