supervise.c (3658B)
1 #include "config.h" 2 #include "service.h" 3 #include "util.h" 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <limits.h> 8 #include <setjmp.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <sys/socket.h> 13 #include <sys/stat.h> 14 #include <sys/un.h> 15 #include <sys/wait.h> 16 #include <unistd.h> 17 18 19 static void signal_child(int unused) { 20 (void) unused; 21 22 int status; 23 pid_t died_pid; 24 struct service* s = NULL; 25 26 if ((died_pid = wait(&status)) == -1) { 27 fprint(1, "error: cannot wait for process: %r\n"); 28 return; 29 } 30 31 if (!WIFEXITED(status) && !WIFSIGNALED(status)) 32 return; 33 34 for (int i = 0; i < services_size; i++) { 35 if (services[i].pid == died_pid) { 36 s = &services[i]; 37 break; 38 } 39 } 40 if (s == NULL) 41 return; 42 43 service_handle_exit(s, WIFSIGNALED(status), WIFSIGNALED(status) ? WTERMSIG(status) : WEXITSTATUS(status)); 44 } 45 46 static void update_services(void) { 47 struct service* s; 48 49 for (int i = 0; i < services_size; i++) { 50 s = &services[i]; 51 if (s->state == STATE_INACTIVE || s->state == STATE_ERROR) 52 s->stop_timeout = 0; 53 54 if (s->state == STATE_ERROR) 55 continue; 56 57 if (s->stop_timeout != 0) { 58 if (time(NULL) - s->stop_timeout >= SV_STOP_TIMEOUT) { 59 print(":: service '%s' doesn't terminate, killing...\n", s->name); 60 service_kill(s, SIGKILL); 61 s->stop_timeout = 0; 62 } 63 } else if (s->state == STATE_INACTIVE && service_need_restart(s)) { 64 service_start(s); 65 } 66 } 67 } 68 69 static void control_sockets(void) { 70 struct service* s; 71 char cmd; 72 73 for (int i = 0; i < services_size; i++) { 74 s = &services[i]; 75 while (read(s->control, &cmd, 1) == 1) { 76 print("handling '%c' from %s\n", cmd, s->name); 77 service_handle_command(s, cmd); 78 } 79 } 80 } 81 82 void stop_dummies(void) { 83 for (int i = 0; i < services_size; i++) { 84 if (services[i].state != STATE_ACTIVE_DUMMY || services[i].restart == S_RESTART) 85 continue; 86 87 for (int j = 0; j < services[i].children_size; j++) { 88 struct service* dep = services[i].children[j]; 89 if (dep->state != STATE_INACTIVE && dep->state != STATE_ERROR) 90 goto dont_stop; 91 } 92 93 service_stop(&services[i]); 94 95 dont_stop:; 96 } 97 } 98 99 int service_supervise(const char* service_dir_, const char* service, bool once) { 100 struct sigaction sigact = { 0 }; 101 struct service* s; 102 103 daemon_running = true; 104 105 sigact.sa_handler = signal_child; 106 sigaction(SIGCHLD, &sigact, NULL); 107 sigact.sa_handler = SIG_IGN; 108 sigaction(SIGPIPE, &sigact, NULL); 109 110 service_dir_path = service_dir_; 111 if ((service_dir = open(service_dir_, O_DIRECTORY)) == -1) { 112 print_errno("error: cannot open directory %s: %s\n", service_dir_); 113 return 1; 114 } 115 116 if ((null_fd = open("/dev/null", O_RDWR)) == -1) { 117 fprint(1, "error: cannot open /dev/null: %r\n"); 118 null_fd = 1; 119 } 120 121 print(":: starting services\n"); 122 123 service_refresh_directory(); 124 125 if ((s = service_get(service)) == NULL) { 126 fprint(1, "error: cannot start '%s': not found\n", service); 127 goto cleanup; 128 } 129 130 s->restart = once ? S_ONCE : S_RESTART; 131 service_start(s); 132 133 134 bool cont; 135 // accept connections and handle requests 136 do { 137 if (!daemon_running) { 138 for (int i = 0; i < services_size; i++) { 139 s = &services[i]; 140 service_stop(s); 141 } 142 } 143 144 service_refresh_directory(); 145 stop_dummies(); 146 control_sockets(); 147 update_services(); 148 149 sleep(SV_CHECK_INTERVAL); 150 151 cont = false; 152 for (int i = 0; i < services_size; i++) { 153 if (services[i].state != STATE_INACTIVE && services[i].state != STATE_ERROR) 154 cont = true; 155 } 156 } while (cont); 157 158 print(":: terminating\n"); 159 160 print(":: all services stopped\n"); 161 162 cleanup: 163 164 close(service_dir); 165 close(null_fd); 166 167 signal(SIGPIPE, SIG_DFL); 168 signal(SIGCHLD, SIG_DFL); 169 return 0; 170 }