Fawkes API  Fawkes Development Version
ffwatchdog.cpp
1 
2 /***************************************************************************
3  * ffwatchdog.cpp - Fawkes process watchdog
4  *
5  * Created: Thu Mar 31 09:53:53 2011 (RoboCup German Open 2011)
6  * Copyright 2011 Tim Niemueller [www.niemueller.de]
7  *
8  ****************************************************************************/
9 
10 /* This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU Library General Public License for more details.
19  *
20  * Read the full text in the LICENSE.GPL file in the doc directory.
21  */
22 
23 #include <core/exception.h>
24 
25 #include <unistd.h>
26 #include <sys/wait.h>
27 #include <cstdio>
28 #include <cstdlib>
29 #include <csignal>
30 #include <cstring>
31 #include <cerrno>
32 
33 #ifdef HAVE_LIBDAEMON
34 # include <cerrno>
35 # include <cstring>
36 # include <libdaemon/dfork.h>
37 # include <libdaemon/dlog.h>
38 # include <libdaemon/dpid.h>
39 # include <sys/stat.h>
40 # include <sys/wait.h>
41 #endif
42 
43 int g_quit = 0;
44 bool g_force_quit = false;
45 int g_signum = SIGINT;
46 
47 void
48 handle_signal(int signum)
49 {
50  printf("Received %s signal\n", strsignal(signum));
51  g_signum = signum;
52  switch (signum) {
53  case SIGINT: g_quit += 1; break; // sigint escalates
54  case SIGTERM: g_quit = 3; break;
55  case SIGKILL: g_quit = 4; break;
56  default: break;
57  }
58 }
59 
60 /** Print usage instructions.
61  * @param progname program name
62  */
63 void
64 usage(const char *progname)
65 {
66  printf("Usage: %s [options] <progfile> [args...]\n"
67  "progfile full absolute path to executable\n"
68  "args any number of arguments, passed to program as-is\n\n"
69  "where [options] passed in before <progfile> are one or more of:\n"
70 #ifdef HAVE_LIBDAEMON
71  " -D[pid file] Run daemonized in the background, pid file is optional,\n"
72  " defaults to /var/run/ffwatchdog_basename.pid, must be absolute path.\n"
73  " -D[pid file] -k Kill a daemonized process running in the background,\n"
74  " pid file is optional as above.\n"
75  " -D[pid file] -s Check status of daemon.\n"
76 #endif
77  " -h Show help instructions.\n\n",
78  progname);
79 }
80 
81 
82 pid_t
83 fork_and_exec(int argc, char **argv, int prog_start)
84 {
85  pid_t pid = fork();
86  if (pid == -1) {
87  // error
88  printf("Forking for new process failed: %s\n", strerror(errno));
89  throw fawkes::Exception(errno, "Forking for new process failed: %s");
90  } else if (pid == 0) {
91  // child
92  setsid();
93  signal(SIGINT, SIG_IGN);
94  if (execve(argv[prog_start], &argv[prog_start], environ) == -1) {
95  printf("Failed to execute %s, exited with %i: %s\n",
96  argv[prog_start], errno, strerror(errno));
97  exit(-1);
98  }
99  }
100 
101  return pid;
102 }
103 
104 
105 #ifdef HAVE_LIBDAEMON
106 void
107 daemonize_cleanup()
108 {
109  daemon_retval_send(-1);
110  daemon_retval_done();
111  daemon_pid_file_remove();}
112 
113 pid_t
114 daemonize(int argc, char **argv)
115 {
116  pid_t pid;
117  mode_t old_umask = umask(0);
118 
119  // Prepare for return value passing
120  daemon_retval_init();
121 
122  // Do the fork
123  if ((pid = daemon_fork()) < 0) {
124  return -1;
125 
126  } else if (pid) { // the parent
127  int ret;
128 
129  // Wait for 20 seconds for the return value passed from the daemon process
130  if ((ret = daemon_retval_wait(20)) < 0) {
131  daemon_log(LOG_ERR, "Could not recieve return value from daemon process.");
132  return -1;
133  }
134 
135  if ( ret != 0 ) {
136  daemon_log(LOG_ERR, "*** Daemon startup failed, see syslog for details. ***");
137  switch (ret) {
138  case 1:
139  daemon_log(LOG_ERR, "Daemon failed to close file descriptors");
140  break;
141  case 2:
142  daemon_log(LOG_ERR, "Daemon failed to create PID file");
143  break;
144  }
145  return -1;
146  } else {
147  return pid;
148  }
149 
150  } else { // the daemon
151 #ifdef DAEMON_CLOSE_ALL_AVAILABLE
152  if (daemon_close_all(-1) < 0) {
153  daemon_log(LOG_ERR, "Failed to close all file descriptors: %s",
154  strerror(errno));
155  // Send the error condition to the parent process
156  daemon_retval_send(1);
157  return -1;
158  }
159 #endif
160 
161  // Create the PID file
162  if (daemon_pid_file_create() < 0) {
163  printf("Could not create PID file (%s).", strerror(errno));
164  daemon_log(LOG_ERR, "Could not create PID file (%s).", strerror(errno));
165 
166  // Send the error condition to the parent process
167  daemon_retval_send(2);
168  return -1;
169  }
170 
171  // Send OK to parent process
172  daemon_retval_send(0);
173 
174  daemon_log(LOG_INFO, "Sucessfully started");
175 
176  umask(old_umask);
177  return 0;
178  }
179 }
180 
181 /** Global variable containing the path to the PID file.
182  * unfortunately needed for libdaemon */
183 const char *ffwatchdog_pid_file;
184 
185 /** Function that returns the PID file name.
186  * @return PID file name
187  */
188 const char *
189 ffwatchdog_daemon_pid_file_proc()
190 {
191  return ffwatchdog_pid_file;
192 }
193 #endif // HAVE_LIBDAEMON
194 
195 
196 
197 /** Watchdog main.
198  * @param argc argument count
199  * @param argv arguments
200  */
201 int
202 main(int argc, char **argv)
203 {
204  if (argc < 2) {
205  usage(argv[0]);
206  exit(1);
207  }
208 
209  bool arg_verbose = false;
210  bool arg_daemonize = false;
211  bool arg_daemon_kill = false;
212  bool arg_daemon_status = false;
213  const char *daemon_pid_file = NULL;
214 
215  int prog_start;
216  for (prog_start = 1; prog_start < argc; ++prog_start) {
217  if (argv[prog_start][0] == '-') {
218  // argument starts
219  char param = argv[prog_start][1];
220  if (param == '-') {
221  ++prog_start;
222  break;
223  } else {
224  if (param == 'D') {
225  arg_daemonize = true;
226  daemon_pid_file = NULL;
227  if (strlen(&argv[prog_start][1]) > 1) {
228  daemon_pid_file = &argv[prog_start][2];
229  }
230  } else if (param == 'k') {
231  arg_daemon_kill = true;
232  } else if (param == 's') {
233  arg_daemon_status = true;
234  } else if (param == 'v') {
235  arg_verbose = true;
236  } else if (param == 'h') {
237  usage(argv[0]);
238  exit(0);
239  } else {
240  printf("Unknown argument '%c'\n", param);
241  usage(argv[0]);
242  exit(3);
243  }
244  }
245  } else {
246  break;
247  }
248  }
249 
250  if (prog_start >= argc) {
251  usage(argv[0]);
252  exit(1);
253  }
254 
255  if (access(argv[prog_start], X_OK) != 0) {
256  printf("Cannot execute '%s': %s\n\n", argv[1], strerror(errno));
257  usage(argv[0]);
258  exit(2);
259  }
260 
261 #ifdef HAVE_LIBDAEMON
262  pid_t dpid;
263  int ret;
264 
265  char *daemon_ident = NULL;
266 
267  if ( arg_daemonize ) {
268  // Set identification string for the daemon for both syslog and PID file
269 
270  char *argv_copy = strdup(argv[prog_start]);
271  if (asprintf(&daemon_ident, "ffwatchdog_%s", basename(argv_copy)) == -1) {
272  free(argv_copy);
273  printf("Failed to create daemon ident, not enough memory\n");
274  exit(5);
275  }
276  free(argv_copy);
277  daemon_pid_file_ident = daemon_log_ident = daemon_ident;
278  if ( daemon_pid_file != NULL ) {
279  ffwatchdog_pid_file = daemon_pid_file;
280  daemon_pid_file_proc = ffwatchdog_daemon_pid_file_proc;
281  }
282 
283  // We should daemonize, check if we were called to kill a daemonized copy
284  if (arg_daemon_kill) {
285  // Check that the daemon is not run twice a the same time
286  if ((dpid = daemon_pid_file_is_running()) < 0) {
287  daemon_log(LOG_ERR, "Watchdog daemon for %s not running.",
288  argv[prog_start]);
289  return 1;
290  }
291 
292  // Kill daemon with SIGINT
293  if ((ret = daemon_pid_file_kill_wait(SIGINT, 5)) < 0) {
294  daemon_log(LOG_WARNING, "Failed to kill watchdog daemon for %s",
295  argv[prog_start]);
296  }
297  return (ret < 0) ? 1 : 0;
298  }
299 
300  if (arg_daemon_status) {
301  // Check daemon status
302  if (daemon_pid_file_is_running() < 0) {
303  if (arg_verbose) {
304  printf("Watchdog daemon for %s is not running\n", argv[prog_start]);
305  }
306  return 1;
307  } else {
308  if (arg_verbose) {
309  printf("Watchdog daemon for %s is running\n", argv[prog_start]);
310  }
311  return 0;
312  }
313  }
314 
315  // Check that the daemon is not run twice a the same time
316  if ((dpid = daemon_pid_file_is_running()) >= 0) {
317  daemon_log(LOG_ERR, "Watchdog daemon for %s already running on (PID %u)",
318  argv[prog_start], dpid);
319  return 201;
320  }
321 
322  dpid = daemonize(argc, argv);
323  if ( dpid < 0 ) {
324  daemonize_cleanup();
325  return 201;
326  } else if (dpid) {
327  // parent
328  return 0;
329  } // else child, continue as usual
330  }
331 #else
332  if (daemonize) {
333  printf("Daemonize support was not available at compile time.\n");
334  exit(13);
335  }
336 #endif
337 
338  struct sigaction sa;
339  sa.sa_handler = handle_signal;
340  sigemptyset(&sa.sa_mask);
341  sa.sa_flags = 0;
342  sigaction(SIGINT, &sa, NULL);
343  sigaction(SIGKILL, &sa, NULL);
344  sigaction(SIGTERM, &sa, NULL);
345  sigaction(SIGUSR1, &sa, NULL);
346  sigaction(SIGUSR2, &sa, NULL);
347 
348  pid_t pid = -1;
349  while (! g_quit) {
350  pid = fork_and_exec(argc, argv, prog_start);
351 
352  while (pid != -1 && ! g_quit) {
353 
354  int status = 0;
355  pid_t cpid = waitpid(pid, &status, WUNTRACED | WCONTINUED);
356  printf("Wait returned\n");
357 
358  if (cpid == -1) {
359  printf("Failed to wait for child: %s\n", strerror(errno));
360  } else if (WIFEXITED(status)) {
361  printf("%i|%s exited, status=%d\n", cpid, argv[prog_start],
362  WEXITSTATUS(status));
363  pid = -1;
364  } else if (WIFSIGNALED(status)) {
365  printf("%i|%s killed by signal %s\n", cpid, argv[prog_start],
366  strsignal(WTERMSIG(status)));
367  pid = -1;
368  } else if (WIFSTOPPED(status)) {
369  printf("%i|%s stopped by signal %s\n", cpid, argv[prog_start],
370  strsignal(WSTOPSIG(status)));
371  pid = -1;
372  } else if (WIFCONTINUED(status)) {
373  printf("%i|%s continued\n", cpid, argv[prog_start]);
374  }
375  }
376  }
377 
378  if (pid != -1) {
379 
380  int last_quit = 0;
381  printf("Stopping child. Press Ctrl-C again to escalate.\n");
382 
383  for (unsigned int i = 0; i < 600; ++i) {
384  if (last_quit != g_quit) {
385  int signum;
386  if (g_quit <= 2) {
387  signum = SIGINT;
388  } else if (g_quit == 3) {
389  signum = SIGTERM;
390  } else {
391  signum = SIGKILL;
392  }
393 
394  printf("Killing %s with signal %s\n", argv[prog_start],
395  strsignal(signum));
396  if (kill(pid, signum) == -1) {
397  printf("Failed to kill %s: %s\n", argv[prog_start], strerror(errno));
398  }
399  }
400  last_quit = g_quit;
401 
402  usleep(10000);
403  int status;
404  int rv = waitpid(pid, &status, WNOHANG);
405  if (rv == -1) {
406  if (errno == EINTR) continue;
407  if (errno == ECHILD) {
408  pid = -1;
409  break;
410  }
411  } else if (rv > 0) {
412  pid = -1;
413  break;
414  }
415  if (i >= 300) g_quit = 2;
416  if (i >= 500) g_quit = 3;
417  }
418  }
419 
420 #ifdef HAVE_LIBDAEMON
421  if (arg_daemonize) {
422  daemonize_cleanup();
423  }
424 #endif
425 
426  return 0;
427 }
Base class for exceptions in Fawkes.
Definition: exception.h:36