Fawkes API
Fawkes Development Version
|
00001 00002 /*************************************************************************** 00003 * ffwatchdog.cpp - Fawkes process watchdog 00004 * 00005 * Created: Thu Mar 31 09:53:53 2011 (RoboCup German Open 2011) 00006 * Copyright 2011 Tim Niemueller [www.niemueller.de] 00007 * 00008 ****************************************************************************/ 00009 00010 /* This program is free software; you can redistribute it and/or modify 00011 * it under the terms of the GNU General Public License as published by 00012 * the Free Software Foundation; either version 2 of the License, or 00013 * (at your option) any later version. 00014 * 00015 * This program is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00018 * GNU Library General Public License for more details. 00019 * 00020 * Read the full text in the LICENSE.GPL file in the doc directory. 00021 */ 00022 00023 #include <core/exception.h> 00024 00025 #include <unistd.h> 00026 #include <sys/wait.h> 00027 #include <cstdio> 00028 #include <cstdlib> 00029 #include <csignal> 00030 #include <cstring> 00031 #include <cerrno> 00032 00033 #ifdef HAVE_LIBDAEMON 00034 # include <cerrno> 00035 # include <cstring> 00036 # include <libdaemon/dfork.h> 00037 # include <libdaemon/dlog.h> 00038 # include <libdaemon/dpid.h> 00039 # include <sys/stat.h> 00040 # include <sys/wait.h> 00041 #endif 00042 00043 int g_quit = 0; 00044 bool g_force_quit = false; 00045 int g_signum = SIGINT; 00046 00047 void 00048 handle_signal(int signum) 00049 { 00050 printf("Received %s signal\n", strsignal(signum)); 00051 g_signum = signum; 00052 switch (signum) { 00053 case SIGINT: g_quit += 1; break; // sigint escalates 00054 case SIGTERM: g_quit = 3; break; 00055 case SIGKILL: g_quit = 4; break; 00056 default: break; 00057 } 00058 } 00059 00060 /** Print usage instructions. 00061 * @param progname program name 00062 */ 00063 void 00064 usage(const char *progname) 00065 { 00066 printf("Usage: %s [options] <progfile> [args...]\n" 00067 "progfile full absolute path to executable\n" 00068 "args any number of arguments, passed to program as-is\n\n" 00069 "where [options] passed in before <progfile> are one or more of:\n" 00070 #ifdef HAVE_LIBDAEMON 00071 " -D[pid file] Run daemonized in the background, pid file is optional,\n" 00072 " defaults to /var/run/ffwatchdog_basename.pid, must be absolute path.\n" 00073 " -D[pid file] -k Kill a daemonized process running in the background,\n" 00074 " pid file is optional as above.\n" 00075 " -D[pid file] -s Check status of daemon.\n" 00076 #endif 00077 " -h Show help instructions.\n\n", 00078 progname); 00079 } 00080 00081 00082 pid_t 00083 fork_and_exec(int argc, char **argv, int prog_start) 00084 { 00085 pid_t pid = fork(); 00086 if (pid == -1) { 00087 // error 00088 printf("Forking for new process failed: %s\n", strerror(errno)); 00089 throw fawkes::Exception(errno, "Forking for new process failed: %s"); 00090 } else if (pid == 0) { 00091 // child 00092 setsid(); 00093 signal(SIGINT, SIG_IGN); 00094 if (execve(argv[prog_start], &argv[prog_start], environ) == -1) { 00095 printf("Failed to execute %s, exited with %i: %s\n", 00096 argv[prog_start], errno, strerror(errno)); 00097 exit(-1); 00098 } 00099 } 00100 00101 return pid; 00102 } 00103 00104 00105 #ifdef HAVE_LIBDAEMON 00106 void 00107 daemonize_cleanup() 00108 { 00109 daemon_retval_send(-1); 00110 daemon_retval_done(); 00111 daemon_pid_file_remove();} 00112 00113 pid_t 00114 daemonize(int argc, char **argv) 00115 { 00116 pid_t pid; 00117 mode_t old_umask = umask(0); 00118 00119 // Prepare for return value passing 00120 daemon_retval_init(); 00121 00122 // Do the fork 00123 if ((pid = daemon_fork()) < 0) { 00124 return -1; 00125 00126 } else if (pid) { // the parent 00127 int ret; 00128 00129 // Wait for 20 seconds for the return value passed from the daemon process 00130 if ((ret = daemon_retval_wait(20)) < 0) { 00131 daemon_log(LOG_ERR, "Could not recieve return value from daemon process."); 00132 return -1; 00133 } 00134 00135 if ( ret != 0 ) { 00136 daemon_log(LOG_ERR, "*** Daemon startup failed, see syslog for details. ***"); 00137 switch (ret) { 00138 case 1: 00139 daemon_log(LOG_ERR, "Daemon failed to close file descriptors"); 00140 break; 00141 case 2: 00142 daemon_log(LOG_ERR, "Daemon failed to create PID file"); 00143 break; 00144 } 00145 return -1; 00146 } else { 00147 return pid; 00148 } 00149 00150 } else { // the daemon 00151 #ifdef DAEMON_CLOSE_ALL_AVAILABLE 00152 if (daemon_close_all(-1) < 0) { 00153 daemon_log(LOG_ERR, "Failed to close all file descriptors: %s", 00154 strerror(errno)); 00155 // Send the error condition to the parent process 00156 daemon_retval_send(1); 00157 return -1; 00158 } 00159 #endif 00160 00161 // Create the PID file 00162 if (daemon_pid_file_create() < 0) { 00163 printf("Could not create PID file (%s).", strerror(errno)); 00164 daemon_log(LOG_ERR, "Could not create PID file (%s).", strerror(errno)); 00165 00166 // Send the error condition to the parent process 00167 daemon_retval_send(2); 00168 return -1; 00169 } 00170 00171 // Send OK to parent process 00172 daemon_retval_send(0); 00173 00174 daemon_log(LOG_INFO, "Sucessfully started"); 00175 00176 umask(old_umask); 00177 return 0; 00178 } 00179 } 00180 00181 /** Global variable containing the path to the PID file. 00182 * unfortunately needed for libdaemon */ 00183 const char *ffwatchdog_pid_file; 00184 00185 /** Function that returns the PID file name. 00186 * @return PID file name 00187 */ 00188 const char * 00189 ffwatchdog_daemon_pid_file_proc() 00190 { 00191 return ffwatchdog_pid_file; 00192 } 00193 #endif // HAVE_LIBDAEMON 00194 00195 00196 00197 /** Watchdog main. 00198 * @param argc argument count 00199 * @param argv arguments 00200 */ 00201 int 00202 main(int argc, char **argv) 00203 { 00204 if (argc < 2) { 00205 usage(argv[0]); 00206 exit(1); 00207 } 00208 00209 bool arg_verbose = false; 00210 bool arg_daemonize = false; 00211 bool arg_daemon_kill = false; 00212 bool arg_daemon_status = false; 00213 const char *daemon_pid_file = NULL; 00214 00215 int prog_start; 00216 for (prog_start = 1; prog_start < argc; ++prog_start) { 00217 if (argv[prog_start][0] == '-') { 00218 // argument starts 00219 char param = argv[prog_start][1]; 00220 if (param == '-') { 00221 ++prog_start; 00222 break; 00223 } else { 00224 if (param == 'D') { 00225 arg_daemonize = true; 00226 daemon_pid_file = NULL; 00227 if (strlen(&argv[prog_start][1]) > 1) { 00228 daemon_pid_file = &argv[prog_start][2]; 00229 } 00230 } else if (param == 'k') { 00231 arg_daemon_kill = true; 00232 } else if (param == 's') { 00233 arg_daemon_status = true; 00234 } else if (param == 'v') { 00235 arg_verbose = true; 00236 } else if (param == 'h') { 00237 usage(argv[0]); 00238 exit(0); 00239 } else { 00240 printf("Unknown argument '%c'\n", param); 00241 usage(argv[0]); 00242 exit(3); 00243 } 00244 } 00245 } else { 00246 break; 00247 } 00248 } 00249 00250 if (prog_start >= argc) { 00251 usage(argv[0]); 00252 exit(1); 00253 } 00254 00255 if (access(argv[prog_start], X_OK) != 0) { 00256 printf("Cannot execute '%s': %s\n\n", argv[1], strerror(errno)); 00257 usage(argv[0]); 00258 exit(2); 00259 } 00260 00261 #ifdef HAVE_LIBDAEMON 00262 pid_t dpid; 00263 int ret; 00264 00265 char *daemon_ident = NULL; 00266 00267 if ( arg_daemonize ) { 00268 // Set identification string for the daemon for both syslog and PID file 00269 00270 char *argv_copy = strdup(argv[prog_start]); 00271 if (asprintf(&daemon_ident, "ffwatchdog_%s", basename(argv_copy)) == -1) { 00272 free(argv_copy); 00273 printf("Failed to create daemon ident, not enough memory\n"); 00274 exit(5); 00275 } 00276 free(argv_copy); 00277 daemon_pid_file_ident = daemon_log_ident = daemon_ident; 00278 if ( daemon_pid_file != NULL ) { 00279 ffwatchdog_pid_file = daemon_pid_file; 00280 daemon_pid_file_proc = ffwatchdog_daemon_pid_file_proc; 00281 } 00282 00283 // We should daemonize, check if we were called to kill a daemonized copy 00284 if (arg_daemon_kill) { 00285 // Check that the daemon is not run twice a the same time 00286 if ((dpid = daemon_pid_file_is_running()) < 0) { 00287 daemon_log(LOG_ERR, "Watchdog daemon for %s not running.", 00288 argv[prog_start]); 00289 return 1; 00290 } 00291 00292 // Kill daemon with SIGINT 00293 if ((ret = daemon_pid_file_kill_wait(SIGINT, 5)) < 0) { 00294 daemon_log(LOG_WARNING, "Failed to kill watchdog daemon for %s", 00295 argv[prog_start]); 00296 } 00297 return (ret < 0) ? 1 : 0; 00298 } 00299 00300 if (arg_daemon_status) { 00301 // Check daemon status 00302 if (daemon_pid_file_is_running() < 0) { 00303 if (arg_verbose) { 00304 printf("Watchdog daemon for %s is not running\n", argv[prog_start]); 00305 } 00306 return 1; 00307 } else { 00308 if (arg_verbose) { 00309 printf("Watchdog daemon for %s is running\n", argv[prog_start]); 00310 } 00311 return 0; 00312 } 00313 } 00314 00315 // Check that the daemon is not run twice a the same time 00316 if ((dpid = daemon_pid_file_is_running()) >= 0) { 00317 daemon_log(LOG_ERR, "Watchdog daemon for %s already running on (PID %u)", 00318 argv[prog_start], dpid); 00319 return 201; 00320 } 00321 00322 dpid = daemonize(argc, argv); 00323 if ( dpid < 0 ) { 00324 daemonize_cleanup(); 00325 return 201; 00326 } else if (dpid) { 00327 // parent 00328 return 0; 00329 } // else child, continue as usual 00330 } 00331 #else 00332 if (daemonize) { 00333 printf("Daemonize support was not available at compile time.\n"); 00334 exit(13); 00335 } 00336 #endif 00337 00338 struct sigaction sa; 00339 sa.sa_handler = handle_signal; 00340 sigemptyset(&sa.sa_mask); 00341 sa.sa_flags = 0; 00342 sigaction(SIGINT, &sa, NULL); 00343 sigaction(SIGKILL, &sa, NULL); 00344 sigaction(SIGTERM, &sa, NULL); 00345 sigaction(SIGUSR1, &sa, NULL); 00346 sigaction(SIGUSR2, &sa, NULL); 00347 00348 pid_t pid = -1; 00349 while (! g_quit) { 00350 pid = fork_and_exec(argc, argv, prog_start); 00351 00352 while (pid != -1 && ! g_quit) { 00353 00354 int status = 0; 00355 pid_t cpid = waitpid(pid, &status, WUNTRACED | WCONTINUED); 00356 printf("Wait returned\n"); 00357 00358 if (cpid == -1) { 00359 printf("Failed to wait for child: %s\n", strerror(errno)); 00360 } else if (WIFEXITED(status)) { 00361 printf("%i|%s exited, status=%d\n", cpid, argv[prog_start], 00362 WEXITSTATUS(status)); 00363 pid = -1; 00364 } else if (WIFSIGNALED(status)) { 00365 printf("%i|%s killed by signal %s\n", cpid, argv[prog_start], 00366 strsignal(WTERMSIG(status))); 00367 pid = -1; 00368 } else if (WIFSTOPPED(status)) { 00369 printf("%i|%s stopped by signal %s\n", cpid, argv[prog_start], 00370 strsignal(WSTOPSIG(status))); 00371 pid = -1; 00372 } else if (WIFCONTINUED(status)) { 00373 printf("%i|%s continued\n", cpid, argv[prog_start]); 00374 } 00375 } 00376 } 00377 00378 if (pid != -1) { 00379 00380 int last_quit = 0; 00381 printf("Stopping child. Press Ctrl-C again to escalate.\n"); 00382 00383 for (unsigned int i = 0; i < 600; ++i) { 00384 if (last_quit != g_quit) { 00385 int signum; 00386 if (g_quit <= 2) { 00387 signum = SIGINT; 00388 } else if (g_quit == 3) { 00389 signum = SIGTERM; 00390 } else { 00391 signum = SIGKILL; 00392 } 00393 00394 printf("Killing %s with signal %s\n", argv[prog_start], 00395 strsignal(signum)); 00396 if (kill(pid, signum) == -1) { 00397 printf("Failed to kill %s: %s\n", argv[prog_start], strerror(errno)); 00398 } 00399 } 00400 last_quit = g_quit; 00401 00402 usleep(10000); 00403 int status; 00404 int rv = waitpid(pid, &status, WNOHANG); 00405 if (rv == -1) { 00406 if (errno == EINTR) continue; 00407 if (errno == ECHILD) { 00408 pid = -1; 00409 break; 00410 } 00411 } else if (rv > 0) { 00412 pid = -1; 00413 break; 00414 } 00415 if (i >= 300) g_quit = 2; 00416 if (i >= 500) g_quit = 3; 00417 } 00418 } 00419 00420 #ifdef HAVE_LIBDAEMON 00421 if (arg_daemonize) { 00422 daemonize_cleanup(); 00423 } 00424 #endif 00425 00426 return 0; 00427 }