Fawkes API  Fawkes Development Version
ffwatchdog.cpp
00001 
00002 /***************************************************************************
00003  *  ffwatchdog.cpp - Fawkes process watchdog
00004  *
00005  *  Created: Thu Mar 31 09:53:53 2011 (RoboCup German Open 2011)
00006  *  Copyright  2011  Tim Niemueller [www.niemueller.de]
00007  *
00008  ****************************************************************************/
00009 
00010 /*  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; either version 2 of the License, or
00013  *  (at your option) any later version.
00014  *
00015  *  This program is distributed in the hope that it will be useful,
00016  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  *  GNU Library General Public License for more details.
00019  *
00020  *  Read the full text in the LICENSE.GPL file in the doc directory.
00021  */
00022 
00023 #include <core/exception.h>
00024 
00025 #include <unistd.h>
00026 #include <sys/wait.h>
00027 #include <cstdio>
00028 #include <cstdlib>
00029 #include <csignal>
00030 #include <cstring>
00031 #include <cerrno>
00032 
00033 #ifdef HAVE_LIBDAEMON
00034 #  include <cerrno>
00035 #  include <cstring>
00036 #  include <libdaemon/dfork.h>
00037 #  include <libdaemon/dlog.h>
00038 #  include <libdaemon/dpid.h>
00039 #  include <sys/stat.h>
00040 #  include <sys/wait.h>
00041 #endif
00042 
00043 int  g_quit = 0;
00044 bool g_force_quit = false;
00045 int  g_signum = SIGINT;
00046 
00047 void
00048 handle_signal(int signum)
00049 {
00050   printf("Received %s signal\n", strsignal(signum));
00051   g_signum = signum;
00052   switch (signum) {
00053   case SIGINT:   g_quit += 1; break; // sigint escalates
00054   case SIGTERM:  g_quit  = 3; break;
00055   case SIGKILL:  g_quit  = 4; break;
00056   default: break;
00057   }
00058 }
00059 
00060 /** Print usage instructions.
00061  * @param progname program name
00062  */
00063 void
00064 usage(const char *progname)
00065 {
00066   printf("Usage: %s [options] <progfile> [args...]\n"
00067          "progfile   full absolute path to executable\n"
00068          "args       any number of arguments, passed to program as-is\n\n"
00069          "where [options] passed in before <progfile> are one or more of:\n"
00070 #ifdef HAVE_LIBDAEMON
00071          " -D[pid file]     Run daemonized in the background, pid file is optional,\n"
00072          "                  defaults to /var/run/ffwatchdog_basename.pid, must be absolute path.\n"
00073          " -D[pid file] -k  Kill a daemonized process running in the background,\n"
00074          "                  pid file is optional as above.\n"
00075          " -D[pid file] -s  Check status of daemon.\n"
00076 #endif
00077          " -h               Show help instructions.\n\n",
00078          progname);
00079 }
00080 
00081 
00082 pid_t
00083 fork_and_exec(int argc, char **argv, int prog_start)
00084 {
00085   pid_t pid = fork();
00086   if (pid == -1) {
00087     // error
00088     printf("Forking for new process failed: %s\n", strerror(errno));
00089     throw fawkes::Exception(errno, "Forking for new process failed: %s");
00090   } else if (pid == 0) {
00091     // child
00092     setsid();
00093     signal(SIGINT, SIG_IGN);    
00094     if (execve(argv[prog_start], &argv[prog_start], environ) == -1) {
00095       printf("Failed to execute %s, exited with %i: %s\n",
00096              argv[prog_start], errno, strerror(errno));
00097       exit(-1);
00098     }
00099   }
00100 
00101   return pid;
00102 }
00103 
00104 
00105 #ifdef HAVE_LIBDAEMON
00106 void
00107 daemonize_cleanup()
00108 {
00109   daemon_retval_send(-1);
00110   daemon_retval_done();
00111   daemon_pid_file_remove();}
00112 
00113 pid_t
00114 daemonize(int argc, char **argv)
00115 {
00116   pid_t pid;
00117   mode_t old_umask = umask(0);
00118 
00119   // Prepare for return value passing
00120   daemon_retval_init();
00121 
00122   // Do the fork
00123   if ((pid = daemon_fork()) < 0) {
00124     return -1;
00125         
00126   } else if (pid) { // the parent
00127     int ret;
00128 
00129     // Wait for 20 seconds for the return value passed from the daemon process
00130     if ((ret = daemon_retval_wait(20)) < 0) {
00131       daemon_log(LOG_ERR, "Could not recieve return value from daemon process.");
00132       return -1;
00133     }
00134 
00135     if ( ret != 0 ) {
00136       daemon_log(LOG_ERR, "*** Daemon startup failed, see syslog for details. ***");
00137       switch (ret) {
00138       case 1:
00139         daemon_log(LOG_ERR, "Daemon failed to close file descriptors");
00140         break;
00141       case 2:
00142         daemon_log(LOG_ERR, "Daemon failed to create PID file");
00143         break;
00144       }
00145       return -1;
00146     } else {
00147       return pid;
00148     }
00149 
00150   } else { // the daemon
00151 #ifdef DAEMON_CLOSE_ALL_AVAILABLE
00152     if (daemon_close_all(-1) < 0) {
00153       daemon_log(LOG_ERR, "Failed to close all file descriptors: %s",
00154                  strerror(errno));
00155       // Send the error condition to the parent process
00156       daemon_retval_send(1);
00157       return -1;
00158     }
00159 #endif
00160 
00161     // Create the PID file
00162     if (daemon_pid_file_create() < 0) {
00163       printf("Could not create PID file (%s).", strerror(errno));
00164       daemon_log(LOG_ERR, "Could not create PID file (%s).", strerror(errno));
00165 
00166       // Send the error condition to the parent process
00167       daemon_retval_send(2);
00168       return -1;
00169     }
00170 
00171     // Send OK to parent process
00172     daemon_retval_send(0);
00173 
00174     daemon_log(LOG_INFO, "Sucessfully started");
00175 
00176     umask(old_umask);
00177     return 0;
00178   }
00179 }
00180 
00181 /** Global variable containing the path to the PID file.
00182  * unfortunately needed for libdaemon */
00183 const char *ffwatchdog_pid_file;
00184 
00185 /** Function that returns the PID file name.
00186  * @return PID file name
00187  */
00188 const char *
00189 ffwatchdog_daemon_pid_file_proc()
00190 {
00191   return ffwatchdog_pid_file;
00192 }
00193 #endif // HAVE_LIBDAEMON
00194 
00195 
00196 
00197 /** Watchdog main.
00198  * @param argc argument count
00199  * @param argv arguments
00200  */
00201 int
00202 main(int argc, char **argv)
00203 {
00204   if (argc < 2) {
00205     usage(argv[0]);
00206     exit(1);
00207   }
00208 
00209   bool arg_verbose = false;
00210   bool arg_daemonize = false;
00211   bool arg_daemon_kill = false;
00212   bool arg_daemon_status = false;
00213   const char *daemon_pid_file = NULL;
00214 
00215   int prog_start;
00216   for (prog_start = 1; prog_start < argc; ++prog_start) {
00217     if (argv[prog_start][0] == '-') {
00218       // argument starts
00219       char param = argv[prog_start][1];
00220       if (param == '-') {
00221         ++prog_start;
00222         break;
00223       } else {
00224         if (param == 'D') {
00225           arg_daemonize = true;
00226           daemon_pid_file = NULL;
00227           if (strlen(&argv[prog_start][1]) > 1) {
00228             daemon_pid_file = &argv[prog_start][2];
00229           }
00230         } else if (param == 'k') {
00231           arg_daemon_kill = true;
00232         } else if (param == 's') {
00233           arg_daemon_status = true;
00234         } else if (param == 'v') {
00235           arg_verbose = true;
00236         } else if (param == 'h') {
00237           usage(argv[0]);
00238           exit(0);
00239         } else {
00240           printf("Unknown argument '%c'\n", param);
00241           usage(argv[0]);
00242           exit(3);
00243         }
00244       }
00245     } else {
00246       break;
00247     }
00248   }
00249 
00250   if (prog_start >= argc) {
00251     usage(argv[0]);
00252     exit(1);
00253   }
00254 
00255   if (access(argv[prog_start], X_OK) != 0) {
00256     printf("Cannot execute '%s': %s\n\n", argv[1], strerror(errno));
00257     usage(argv[0]);
00258     exit(2);
00259   }
00260 
00261 #ifdef HAVE_LIBDAEMON
00262   pid_t dpid;
00263   int ret;
00264 
00265   char *daemon_ident = NULL;
00266 
00267   if ( arg_daemonize ) {
00268     // Set identification string for the daemon for both syslog and PID file
00269 
00270     char *argv_copy = strdup(argv[prog_start]);
00271     if (asprintf(&daemon_ident, "ffwatchdog_%s", basename(argv_copy)) == -1) {
00272       free(argv_copy);
00273       printf("Failed to create daemon ident, not enough memory\n");
00274       exit(5);
00275     }
00276     free(argv_copy);
00277     daemon_pid_file_ident = daemon_log_ident = daemon_ident;
00278     if ( daemon_pid_file != NULL ) {
00279       ffwatchdog_pid_file  = daemon_pid_file;
00280       daemon_pid_file_proc = ffwatchdog_daemon_pid_file_proc;
00281     }
00282 
00283     // We should daemonize, check if we were called to kill a daemonized copy
00284     if (arg_daemon_kill) {
00285       // Check that the daemon is not run twice a the same time
00286       if ((dpid = daemon_pid_file_is_running()) < 0) {
00287         daemon_log(LOG_ERR, "Watchdog daemon for %s not running.",
00288                    argv[prog_start]);
00289         return 1;
00290       }
00291 
00292       // Kill daemon with SIGINT
00293       if ((ret = daemon_pid_file_kill_wait(SIGINT, 5)) < 0) {
00294         daemon_log(LOG_WARNING, "Failed to kill watchdog daemon for %s",
00295                    argv[prog_start]);
00296       }
00297       return (ret < 0) ? 1 : 0;
00298     }
00299 
00300     if (arg_daemon_status) {
00301       // Check daemon status
00302       if (daemon_pid_file_is_running() < 0) {
00303         if (arg_verbose) {
00304           printf("Watchdog daemon for %s is not running\n", argv[prog_start]);
00305         }
00306         return 1;
00307       } else {
00308         if (arg_verbose) {
00309           printf("Watchdog daemon for %s is running\n", argv[prog_start]);
00310         }
00311         return 0;
00312       }
00313     }
00314 
00315     // Check that the daemon is not run twice a the same time
00316     if ((dpid = daemon_pid_file_is_running()) >= 0) {
00317       daemon_log(LOG_ERR, "Watchdog daemon for %s already running on (PID %u)",
00318                  argv[prog_start], dpid);
00319       return 201;
00320     }
00321 
00322     dpid = daemonize(argc, argv);
00323     if ( dpid < 0 ) {
00324       daemonize_cleanup();
00325       return 201;
00326     } else if (dpid) {
00327       // parent
00328       return 0;
00329     } // else child, continue as usual
00330   }
00331 #else
00332   if (daemonize) {
00333     printf("Daemonize support was not available at compile time.\n");
00334     exit(13);
00335   }
00336 #endif
00337 
00338   struct sigaction sa;
00339   sa.sa_handler = handle_signal;
00340   sigemptyset(&sa.sa_mask);
00341   sa.sa_flags = 0;
00342   sigaction(SIGINT, &sa, NULL);
00343   sigaction(SIGKILL, &sa, NULL);
00344   sigaction(SIGTERM, &sa, NULL);
00345   sigaction(SIGUSR1, &sa, NULL);
00346   sigaction(SIGUSR2, &sa, NULL);
00347 
00348   pid_t pid = -1;
00349   while (! g_quit) {
00350     pid = fork_and_exec(argc, argv, prog_start);
00351 
00352     while (pid != -1 && ! g_quit) {
00353 
00354       int status = 0;
00355       pid_t cpid = waitpid(pid, &status, WUNTRACED | WCONTINUED);
00356       printf("Wait returned\n");
00357 
00358       if (cpid == -1) {
00359         printf("Failed to wait for child: %s\n", strerror(errno));
00360       } else if (WIFEXITED(status)) {
00361         printf("%i|%s exited, status=%d\n", cpid, argv[prog_start],
00362                WEXITSTATUS(status));
00363         pid = -1;
00364       } else if (WIFSIGNALED(status)) {
00365         printf("%i|%s killed by signal %s\n", cpid, argv[prog_start],
00366                strsignal(WTERMSIG(status)));
00367         pid = -1;
00368       } else if (WIFSTOPPED(status)) {
00369         printf("%i|%s stopped by signal %s\n", cpid, argv[prog_start],
00370                strsignal(WSTOPSIG(status)));
00371         pid = -1;
00372       } else if (WIFCONTINUED(status)) {
00373         printf("%i|%s continued\n", cpid, argv[prog_start]);
00374       }
00375     }
00376   }
00377 
00378   if (pid != -1) {
00379 
00380     int last_quit = 0;
00381     printf("Stopping child. Press Ctrl-C again to escalate.\n");
00382 
00383     for (unsigned int i = 0; i < 600; ++i) {
00384       if (last_quit != g_quit) {
00385         int signum;
00386         if (g_quit <= 2) {
00387           signum = SIGINT;
00388         } else if (g_quit == 3) {
00389           signum = SIGTERM;
00390         } else {
00391           signum = SIGKILL;
00392         }
00393 
00394         printf("Killing %s with signal %s\n", argv[prog_start],
00395                strsignal(signum));
00396         if (kill(pid, signum) == -1) {
00397           printf("Failed to kill %s: %s\n", argv[prog_start], strerror(errno));
00398         }
00399       }
00400       last_quit = g_quit;
00401 
00402       usleep(10000);
00403       int status;
00404       int rv = waitpid(pid, &status, WNOHANG);
00405       if (rv == -1) {
00406         if (errno == EINTR)  continue;
00407         if (errno == ECHILD) {
00408           pid = -1;
00409           break;
00410         }
00411       } else if (rv > 0) {
00412         pid = -1;
00413         break;
00414       }
00415       if (i >= 300) g_quit = 2;
00416       if (i >= 500) g_quit = 3;
00417     }
00418   }
00419 
00420 #ifdef HAVE_LIBDAEMON
00421   if (arg_daemonize) {
00422     daemonize_cleanup();
00423   }
00424 #endif
00425 
00426   return 0;
00427 }