mirror of
https://git.FreeBSD.org/src.git
synced 2025-01-17 15:27:36 +00:00
watchdogd(8) and watchdog(4) enhancements.
The following support was added to watchdog(4): - Support to query the outstanding timeout. - Support to set a software pre-timeout function watchdog with an 'action' - Support to set a software only watchdog with a configurable 'action' 'action' can be a mask specifying a single operation or a combination of: log(9), printf(9), panic(9) and/or kdb_enter(9). Support the following in watchdogged: - Support to utilize the new additions to watchdog(4). - Support to warn if a watchdog script runs for too long. - Support for "dry run" where we do not actually arm the watchdog, but only report on our timing. Sponsored by: iXsystems, Inc. MFC after: 1 month
This commit is contained in:
parent
dc1558d1cd
commit
4b9b732ac0
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=247405
@ -1,5 +1,8 @@
|
||||
/*-
|
||||
* Copyright (c) 2004 Poul-Henning Kamp
|
||||
* Copyright (c) 2013 iXsystems.com,
|
||||
* author: Alfred Perlstein <alfred@freebsd.org>
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -29,21 +32,40 @@
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/syslog.h>
|
||||
#include <sys/watchdog.h>
|
||||
#include <sys/bus.h>
|
||||
#include <machine/bus.h>
|
||||
|
||||
static struct cdev *wd_dev;
|
||||
static volatile u_int wd_last_u;
|
||||
#include <sys/syscallsubr.h> /* kern_clock_gettime() */
|
||||
|
||||
static int
|
||||
kern_do_pat(u_int utim)
|
||||
static int wd_set_pretimeout(int newtimeout, int disableiftoolong);
|
||||
static void wd_timeout_cb(void *arg);
|
||||
|
||||
static struct callout wd_pretimeo_handle;
|
||||
static int wd_pretimeout;
|
||||
static int wd_pretimeout_act = WD_SOFT_LOG;
|
||||
|
||||
static struct callout wd_softtimeo_handle;
|
||||
static int wd_softtimer; /* true = use softtimer instead of hardware
|
||||
watchdog */
|
||||
static int wd_softtimeout_act = WD_SOFT_LOG; /* action for the software timeout */
|
||||
|
||||
static struct cdev *wd_dev;
|
||||
static volatile u_int wd_last_u; /* last timeout value set by kern_do_pat */
|
||||
|
||||
static int wd_lastpat_valid = 0;
|
||||
static time_t wd_lastpat = 0; /* when the watchdog was last patted */
|
||||
|
||||
int
|
||||
wdog_kern_pat(u_int utim)
|
||||
{
|
||||
int error;
|
||||
|
||||
@ -51,11 +73,20 @@ kern_do_pat(u_int utim)
|
||||
return (EINVAL);
|
||||
|
||||
if ((utim & WD_LASTVAL) != 0) {
|
||||
/*
|
||||
* if WD_LASTVAL is set, fill in the bits for timeout
|
||||
* from the saved value in wd_last_u.
|
||||
*/
|
||||
MPASS((wd_last_u & ~WD_INTERVAL) == 0);
|
||||
utim &= ~WD_LASTVAL;
|
||||
utim |= wd_last_u;
|
||||
} else
|
||||
} else {
|
||||
/*
|
||||
* Otherwise save the new interval.
|
||||
* This can be zero (to disable the watchdog)
|
||||
*/
|
||||
wd_last_u = (utim & WD_INTERVAL);
|
||||
}
|
||||
if ((utim & WD_INTERVAL) == WD_TO_NEVER) {
|
||||
utim = 0;
|
||||
|
||||
@ -65,18 +96,49 @@ kern_do_pat(u_int utim)
|
||||
/* Assume no watchdog available; watchdog flags success */
|
||||
error = EOPNOTSUPP;
|
||||
}
|
||||
EVENTHANDLER_INVOKE(watchdog_list, utim, &error);
|
||||
if (wd_softtimer) {
|
||||
if (utim == 0) {
|
||||
callout_stop(&wd_softtimeo_handle);
|
||||
} else {
|
||||
(void) callout_reset(&wd_softtimeo_handle,
|
||||
hz*utim, wd_timeout_cb, "soft");
|
||||
}
|
||||
error = 0;
|
||||
} else {
|
||||
EVENTHANDLER_INVOKE(watchdog_list, utim, &error);
|
||||
}
|
||||
wd_set_pretimeout(wd_pretimeout, true);
|
||||
/*
|
||||
* If we were able to arm/strobe the watchdog, then
|
||||
* update the last time it was strobed for WDIOC_GETTIMELEFT
|
||||
*/
|
||||
if (!error) {
|
||||
struct timespec ts;
|
||||
|
||||
error = kern_clock_gettime(curthread /* XXX */,
|
||||
CLOCK_MONOTONIC_FAST, &ts);
|
||||
if (!error) {
|
||||
wd_lastpat = ts.tv_sec;
|
||||
wd_lastpat_valid = 1;
|
||||
}
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
|
||||
int flags __unused, struct thread *td)
|
||||
wd_valid_act(int act)
|
||||
{
|
||||
|
||||
if ((act & ~(WD_SOFT_MASK)) != 0)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int
|
||||
wd_ioctl_patpat(caddr_t data)
|
||||
{
|
||||
u_int u;
|
||||
|
||||
if (cmd != WDIOCPATPAT)
|
||||
return (ENOIOCTL);
|
||||
u = *(u_int *)data;
|
||||
if (u & ~(WD_ACTIVE | WD_PASSIVE | WD_LASTVAL | WD_INTERVAL))
|
||||
return (EINVAL);
|
||||
@ -89,9 +151,157 @@ wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
|
||||
return (ENOSYS); /* XXX Not implemented yet */
|
||||
u &= ~(WD_ACTIVE | WD_PASSIVE);
|
||||
|
||||
return (kern_do_pat(u));
|
||||
return (wdog_kern_pat(u));
|
||||
}
|
||||
|
||||
static int
|
||||
wd_get_time_left(struct thread *td, time_t *remainp)
|
||||
{
|
||||
struct timespec ts;
|
||||
int error;
|
||||
|
||||
error = kern_clock_gettime(td, CLOCK_MONOTONIC_FAST, &ts);
|
||||
if (error)
|
||||
return (error);
|
||||
if (!wd_lastpat_valid)
|
||||
return (ENOENT);
|
||||
*remainp = ts.tv_sec - wd_lastpat;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
wd_timeout_cb(void *arg)
|
||||
{
|
||||
const char *type = arg;
|
||||
|
||||
#ifdef DDB
|
||||
if ((wd_pretimeout_act & WD_SOFT_DDB)) {
|
||||
char kdb_why[80];
|
||||
snprintf(kdb_why, sizeof(buf), "watchdog %s timeout", type);
|
||||
kdb_backtrace();
|
||||
kdb_enter(KDB_WHY_WATCHDOG, kdb_why);
|
||||
}
|
||||
#endif
|
||||
if ((wd_pretimeout_act & WD_SOFT_LOG))
|
||||
log(LOG_EMERG, "watchdog %s-timeout, WD_SOFT_LOG", type);
|
||||
if ((wd_pretimeout_act & WD_SOFT_PRINTF))
|
||||
printf("watchdog %s-timeout, WD_SOFT_PRINTF\n", type);
|
||||
if ((wd_pretimeout_act & WD_SOFT_PANIC))
|
||||
panic("watchdog %s-timeout, WD_SOFT_PANIC set", type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to manage timeouts.
|
||||
* newtimeout needs to be in the range of 0 to actual watchdog timeout.
|
||||
* if 0, we disable the pre-timeout.
|
||||
* otherwise we set the pre-timeout provided it's not greater than the
|
||||
* current actual watchdog timeout.
|
||||
*/
|
||||
static int
|
||||
wd_set_pretimeout(int newtimeout, int disableiftoolong)
|
||||
{
|
||||
u_int utime;
|
||||
|
||||
utime = wdog_kern_last_timeout();
|
||||
/* do not permit a pre-timeout >= than the timeout. */
|
||||
if (newtimeout >= utime) {
|
||||
/*
|
||||
* If 'disableiftoolong' then just fall through
|
||||
* so as to disable the pre-watchdog
|
||||
*/
|
||||
if (disableiftoolong)
|
||||
newtimeout = 0;
|
||||
else
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
/* disable the pre-timeout */
|
||||
if (newtimeout == 0) {
|
||||
wd_pretimeout = 0;
|
||||
callout_stop(&wd_pretimeo_handle);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* We determined the value is sane, so reset the callout */
|
||||
(void) callout_reset(&wd_pretimeo_handle, hz*(utime - newtimeout),
|
||||
wd_timeout_cb, "pre-timeout");
|
||||
wd_pretimeout = newtimeout;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
wd_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
|
||||
int flags __unused, struct thread *td)
|
||||
{
|
||||
u_int u;
|
||||
time_t timeleft;
|
||||
int error;
|
||||
|
||||
error = 0;
|
||||
|
||||
switch (cmd) {
|
||||
case WDIOC_SETSOFT:
|
||||
u = *(int *)data;
|
||||
/* do nothing? */
|
||||
if (u == wd_softtimer)
|
||||
break;
|
||||
/* If there is a pending timeout disallow this ioctl */
|
||||
if (wd_last_u != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
wd_softtimer = u;
|
||||
break;
|
||||
case WDIOC_SETSOFTTIMEOUTACT:
|
||||
u = *(int *)data;
|
||||
if (wd_valid_act(u)) {
|
||||
wd_softtimeout_act = u;
|
||||
} else {
|
||||
error = EINVAL;
|
||||
}
|
||||
break;
|
||||
case WDIOC_SETPRETIMEOUTACT:
|
||||
u = *(int *)data;
|
||||
if (wd_valid_act(u)) {
|
||||
wd_pretimeout_act = u;
|
||||
} else {
|
||||
error = EINVAL;
|
||||
}
|
||||
break;
|
||||
case WDIOC_GETPRETIMEOUT:
|
||||
*(int *)data = (int)wd_pretimeout;
|
||||
break;
|
||||
case WDIOC_SETPRETIMEOUT:
|
||||
error = wd_set_pretimeout(*(int *)data, false);
|
||||
break;
|
||||
case WDIOC_GETTIMELEFT:
|
||||
error = wd_get_time_left(td, &timeleft);
|
||||
if (error)
|
||||
break;
|
||||
*(int *)data = (int)timeleft;
|
||||
break;
|
||||
case WDIOC_SETTIMEOUT:
|
||||
u = *(u_int *)data;
|
||||
error = wdog_kern_pat(u);
|
||||
break;
|
||||
case WDIOC_GETTIMEOUT:
|
||||
u = wdog_kern_last_timeout();
|
||||
*(u_int *)data = u;
|
||||
break;
|
||||
case WDIOCPATPAT:
|
||||
error = wd_ioctl_patpat(data);
|
||||
break;
|
||||
default:
|
||||
error = ENOIOCTL;
|
||||
break;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the last timeout set, this is NOT the seconds from NOW until timeout,
|
||||
* rather it is the amount of seconds passed to WDIOCPATPAT/WDIOC_SETTIMEOUT.
|
||||
*/
|
||||
u_int
|
||||
wdog_kern_last_timeout(void)
|
||||
{
|
||||
@ -99,16 +309,6 @@ wdog_kern_last_timeout(void)
|
||||
return (wd_last_u);
|
||||
}
|
||||
|
||||
int
|
||||
wdog_kern_pat(u_int utim)
|
||||
{
|
||||
|
||||
if (utim & ~(WD_LASTVAL | WD_INTERVAL))
|
||||
return (EINVAL);
|
||||
|
||||
return (kern_do_pat(utim));
|
||||
}
|
||||
|
||||
static struct cdevsw wd_cdevsw = {
|
||||
.d_version = D_VERSION,
|
||||
.d_ioctl = wd_ioctl,
|
||||
@ -120,10 +320,16 @@ watchdog_modevent(module_t mod __unused, int type, void *data __unused)
|
||||
{
|
||||
switch(type) {
|
||||
case MOD_LOAD:
|
||||
callout_init(&wd_pretimeo_handle, true);
|
||||
callout_init(&wd_softtimeo_handle, true);
|
||||
wd_dev = make_dev(&wd_cdevsw, 0,
|
||||
UID_ROOT, GID_WHEEL, 0600, _PATH_WATCHDOG);
|
||||
return 0;
|
||||
case MOD_UNLOAD:
|
||||
callout_stop(&wd_pretimeo_handle);
|
||||
callout_stop(&wd_softtimeo_handle);
|
||||
callout_drain(&wd_pretimeo_handle);
|
||||
callout_drain(&wd_softtimeo_handle);
|
||||
destroy_dev(wd_dev);
|
||||
return 0;
|
||||
case MOD_SHUTDOWN:
|
||||
|
@ -1,5 +1,8 @@
|
||||
/*-
|
||||
* Copyright (c) 2003 Poul-Henning Kamp
|
||||
* Copyright (c) 2013 iXsystems.com,
|
||||
* author: Alfred Perlstein <alfred@freebsd.org>
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -32,7 +35,18 @@
|
||||
|
||||
#define _PATH_WATCHDOG "fido"
|
||||
|
||||
#define WDIOCPATPAT _IOW('W', 42, u_int)
|
||||
#define WDIOCPATPAT _IOW('W', 42, u_int) /* pat the watchdog */
|
||||
#define WDIOC_SETTIMEOUT _IOW('W', 43, int) /* set/reset the timer */
|
||||
#define WDIOC_GETTIMEOUT _IOR('W', 44, int) /* get total timeout */
|
||||
#define WDIOC_GETTIMELEFT _IOR('W', 45, int) /* get time left */
|
||||
#define WDIOC_GETPRETIMEOUT _IOR('W', 46, int) /* get the pre-timeout */
|
||||
#define WDIOC_SETPRETIMEOUT _IOW('W', 47, int) /* set the pre-timeout */
|
||||
/* set the action when a pre-timeout occurs see: WD_SOFT_* */
|
||||
#define WDIOC_SETPRETIMEOUTACT _IOW('W', 48, int)
|
||||
|
||||
/* use software watchdog instead of hardware */
|
||||
#define WDIOC_SETSOFT _IOW('W', 49, int)
|
||||
#define WDIOC_SETSOFTTIMEOUTACT _IOW('W', 50, int)
|
||||
|
||||
#define WD_ACTIVE 0x8000000
|
||||
/*
|
||||
@ -76,6 +90,15 @@
|
||||
#define WD_TO_8SEC 33
|
||||
#define WD_TO_16SEC 34
|
||||
#define WD_TO_32SEC 35
|
||||
#define WD_TO_64SEC 36
|
||||
#define WD_TO_128SEC 37
|
||||
|
||||
/* action on pre-timeout trigger */
|
||||
#define WD_SOFT_PANIC 0x01 /* panic */
|
||||
#define WD_SOFT_DDB 0x02 /* enter debugger */
|
||||
#define WD_SOFT_LOG 0x04 /* log(9) */
|
||||
#define WD_SOFT_PRINTF 0x08 /* printf(9) */
|
||||
#define WD_SOFT_MASK 0x0f /* all of the above */
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
.\" Copyright (c) 2013 iXsystems.com,
|
||||
.\" author: Alfred Perlstein <alfred@freebsd.org>
|
||||
.\" Copyright (c) 2004 Poul-Henning Kamp <phk@FreeBSD.org>
|
||||
.\" Copyright (c) 2003 Sean M. Kelly <smkelly@FreeBSD.org>
|
||||
.\" All rights reserved.
|
||||
@ -25,7 +27,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd September 2, 2006
|
||||
.Dd September 2, 2013
|
||||
.Dt WATCHDOGD 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -33,11 +35,17 @@
|
||||
.Nd watchdog daemon
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl d
|
||||
.Op Fl dnw
|
||||
.Op Fl -debug
|
||||
.Op Fl -softtimeout
|
||||
.Op Fl -softtimeout-action Ar action
|
||||
.Op Fl -pretimeout Ar timeout
|
||||
.Op Fl -pretimeout-action Ar action
|
||||
.Op Fl e Ar cmd
|
||||
.Op Fl I Ar file
|
||||
.Op Fl s Ar sleep
|
||||
.Op Fl t Ar timeout
|
||||
.Op Fl T Ar script_timeout
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
@ -62,6 +70,13 @@ is not specified, the daemon will perform a trivial file system
|
||||
check instead.
|
||||
.Pp
|
||||
The
|
||||
.Fl n
|
||||
argument 'dry-run' will cause watchdog not to arm the system watchdog and
|
||||
instead only run the watchdog function and report on failures.
|
||||
This is useful for developing new watchdogd scripts as the system will not
|
||||
reboot if there are problems with the script.
|
||||
.Pp
|
||||
The
|
||||
.Fl s Ar sleep
|
||||
argument can be used to control the sleep period between each execution
|
||||
of the check and defaults to one second.
|
||||
@ -78,6 +93,16 @@ If this occurs,
|
||||
will no longer execute and thus the kernel's watchdog routines will take
|
||||
action after a configurable timeout.
|
||||
.Pp
|
||||
The
|
||||
.Fl T Ar script_timeout
|
||||
specifies the threshold (in seconds) at which the watchdogd will complain
|
||||
that its script has run for too long.
|
||||
If unset
|
||||
.Ar script_timeout
|
||||
defaults to the value specified by the
|
||||
.Fl s Ar sleep
|
||||
option.
|
||||
.Pp
|
||||
Upon receiving the
|
||||
.Dv SIGTERM
|
||||
or
|
||||
@ -90,17 +115,85 @@ will terminate.
|
||||
The
|
||||
.Nm
|
||||
utility recognizes the following runtime options:
|
||||
.Bl -tag -width ".Fl I Ar file"
|
||||
.Bl -tag -width ".Fl -softtimeout-action Ar action "
|
||||
.It Fl I Ar file
|
||||
Write the process ID of the
|
||||
.Nm
|
||||
utility in the specified file.
|
||||
.It Fl d
|
||||
.It Fl d Fl -debug
|
||||
Do not fork.
|
||||
When this option is specified,
|
||||
.Nm
|
||||
will not fork into the background at startup.
|
||||
.Pp
|
||||
.It Fl w
|
||||
Complain when the watchdog script takes too long.
|
||||
This flag will cause watchdogd to complain when the amount of time to
|
||||
execute the watchdog script exceeds the threshold of 'sleep' option.
|
||||
.Pp
|
||||
.It Fl -pretimeout Ar timeout
|
||||
Set a "pretimeout" watchdog. At "timeout" seconds before the watchdog
|
||||
will fire attempt an action. The action is set by the --pretimeout-action
|
||||
flag. The default is just to log a message (WD_SOFT_LOG) via
|
||||
.Xr log 9 .
|
||||
.Pp
|
||||
.It Fl -pretimeout-action Ar action
|
||||
Set the timeout action for the pretimeout. See the section
|
||||
.Sx Timeout Actions .
|
||||
.Pp
|
||||
.It Fl -softtimeout
|
||||
Instead of arming the various hardware watchdogs, only use a basic software
|
||||
watchdog. The default action is just to
|
||||
.Xr log 9
|
||||
a message (WD_SOFT_LOG).
|
||||
.Pp
|
||||
.It Fl -softtimeout-action Ar action
|
||||
Set the timeout action for the softtimeout. See the section
|
||||
.Sx Timeout Actions .
|
||||
.Pp
|
||||
.El
|
||||
.Sh Timeout Actions
|
||||
The following timeout actions are available via the
|
||||
.Fl -pretimeout-action
|
||||
and
|
||||
.Fl -softtimeout-action
|
||||
flags:
|
||||
.Bl -tag -width ".Ar printf "
|
||||
.It Ar panic
|
||||
Call
|
||||
.Xr panic 9
|
||||
when the timeout is reached.
|
||||
.Pp
|
||||
.It Ar ddb
|
||||
Enter the kernel debugger via
|
||||
.Xr kdb_enter 9
|
||||
when the timeout is reached.
|
||||
.Pp
|
||||
.It Ar log
|
||||
Log a message using
|
||||
.Xr log 9
|
||||
when the timeout is reached.
|
||||
.Pp
|
||||
.It Ar printf
|
||||
call the kernel
|
||||
.Xr printf 9
|
||||
to display a message to the console and
|
||||
.Xr dmesg 8
|
||||
buffer.
|
||||
.Pp
|
||||
.El
|
||||
Actions can be combined in a comma separated list as so:
|
||||
.Ar log,printf
|
||||
which would both
|
||||
.Xr printf 9
|
||||
and
|
||||
.Xr log 9
|
||||
which will send messages both to
|
||||
.Xr dmesg 8
|
||||
and the kernel
|
||||
.Xr log 4
|
||||
device for
|
||||
.Xr syslog 8 .
|
||||
.Sh FILES
|
||||
.Bl -tag -width ".Pa /var/run/watchdogd.pid" -compact
|
||||
.It Pa /var/run/watchdogd.pid
|
||||
@ -125,3 +218,6 @@ and
|
||||
.Pp
|
||||
Some contributions made by
|
||||
.An Jeff Roberson Aq jeff@FreeBSD.org .
|
||||
.Pp
|
||||
The pretimeout and softtimeout action system was added by
|
||||
.An Alfred Perlstein Aq alfred@freebsd.org .
|
||||
|
@ -1,5 +1,8 @@
|
||||
/*-
|
||||
* Copyright (c) 2003-2004 Sean M. Kelly <smkelly@FreeBSD.org>
|
||||
* Copyright (c) 2013 iXsystems.com,
|
||||
* author: Alfred Perlstein <alfred@freebsd.org>
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -50,8 +53,11 @@ __FBSDID("$FreeBSD$");
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <sysexits.h>
|
||||
#include <syslog.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
static void parseargs(int, char *[]);
|
||||
static void sighandler(int);
|
||||
static void watchdog_loop(void);
|
||||
@ -63,13 +69,39 @@ static void usage(void);
|
||||
static int debugging = 0;
|
||||
static int end_program = 0;
|
||||
static const char *pidfile = _PATH_VARRUN "watchdogd.pid";
|
||||
static u_int timeout = WD_TO_16SEC;
|
||||
static u_int timeout = WD_TO_128SEC;
|
||||
static u_int pretimeout = 0;
|
||||
static u_int passive = 0;
|
||||
static int is_daemon = 0;
|
||||
static int is_dry_run = 0; /* do not arm the watchdog, only
|
||||
report on timing of the watch
|
||||
program */
|
||||
static int do_timedog = 0;
|
||||
static int do_syslog = 0;
|
||||
static int fd = -1;
|
||||
static int nap = 1;
|
||||
static int carp_thresh_seconds = -1;
|
||||
static char *test_cmd = NULL;
|
||||
|
||||
static const char *getopt_shortopts;
|
||||
|
||||
static int pretimeout_set;
|
||||
static int pretimeout_act;
|
||||
static int pretimeout_act_set;
|
||||
|
||||
static int softtimeout_set;
|
||||
static int softtimeout_act;
|
||||
static int softtimeout_act_set;
|
||||
|
||||
static struct option longopts[] = {
|
||||
{ "debug", no_argument, &debugging, 1 },
|
||||
{ "pretimeout", required_argument, &pretimeout_set, 1 },
|
||||
{ "pretimeout-action", required_argument, &pretimeout_act_set, 1 },
|
||||
{ "softtimeout", no_argument, &softtimeout_set, 1 },
|
||||
{ "softtimeout-action", required_argument, &softtimeout_act_set, 1 },
|
||||
{ NULL, 0, NULL, 0}
|
||||
};
|
||||
|
||||
/*
|
||||
* Ask malloc() to map minimum-sized chunks of virtual address space at a time,
|
||||
* so that mlockall() won't needlessly wire megabytes of unused memory into the
|
||||
@ -93,12 +125,18 @@ main(int argc, char *argv[])
|
||||
|
||||
parseargs(argc, argv);
|
||||
|
||||
if (do_syslog) {
|
||||
openlog("watchdogd", LOG_CONS|LOG_NDELAY|LOG_PERROR,
|
||||
LOG_DAEMON);
|
||||
|
||||
}
|
||||
|
||||
rtp.type = RTP_PRIO_REALTIME;
|
||||
rtp.prio = 0;
|
||||
if (rtprio(RTP_SET, 0, &rtp) == -1)
|
||||
err(EX_OSERR, "rtprio");
|
||||
|
||||
if (watchdog_init() == -1)
|
||||
if (!is_dry_run && watchdog_init() == -1)
|
||||
errx(EX_SOFTWARE, "unable to initialize watchdog");
|
||||
|
||||
if (is_daemon) {
|
||||
@ -108,6 +146,7 @@ main(int argc, char *argv[])
|
||||
pfh = pidfile_open(pidfile, 0600, &otherpid);
|
||||
if (pfh == NULL) {
|
||||
if (errno == EEXIST) {
|
||||
watchdog_onoff(0);
|
||||
errx(EX_SOFTWARE, "%s already running, pid: %d",
|
||||
getprogname(), otherpid);
|
||||
}
|
||||
@ -164,6 +203,9 @@ static int
|
||||
watchdog_init(void)
|
||||
{
|
||||
|
||||
if (is_dry_run)
|
||||
return 0;
|
||||
|
||||
fd = open("/dev/" _PATH_WATCHDOG, O_RDWR);
|
||||
if (fd >= 0)
|
||||
return (0);
|
||||
@ -171,27 +213,99 @@ watchdog_init(void)
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are doing timing, then get the time.
|
||||
*/
|
||||
static int
|
||||
watchdog_getuptime(struct timespec *tp)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!do_timedog)
|
||||
return 0;
|
||||
|
||||
error = clock_gettime(CLOCK_UPTIME_FAST, tp);
|
||||
if (error)
|
||||
warn("clock_gettime");
|
||||
return (error);
|
||||
}
|
||||
|
||||
static long
|
||||
watchdog_check_dogfunction_time(struct timespec *tp_start,
|
||||
struct timespec *tp_end)
|
||||
{
|
||||
struct timeval tv_start, tv_end, tv;
|
||||
const char *cmd_prefix, *cmd;
|
||||
int sec;
|
||||
|
||||
if (!do_timedog)
|
||||
return (0);
|
||||
|
||||
TIMESPEC_TO_TIMEVAL(&tv_start, tp_start);
|
||||
TIMESPEC_TO_TIMEVAL(&tv_end, tp_end);
|
||||
timersub(&tv_end, &tv_start, &tv);
|
||||
sec = tv.tv_sec;
|
||||
if (sec < carp_thresh_seconds)
|
||||
return (sec);
|
||||
|
||||
if (test_cmd) {
|
||||
cmd_prefix = "Watchdog program";
|
||||
cmd = test_cmd;
|
||||
} else {
|
||||
cmd_prefix = "Watchdog operation";
|
||||
cmd = "stat(\"/etc\", &sb)";
|
||||
}
|
||||
if (do_syslog)
|
||||
syslog(LOG_CRIT, "%s: '%s' took too long: "
|
||||
"%d.%06ld seconds >= %d seconds threshhold",
|
||||
cmd_prefix, cmd, sec, (long)tv.tv_usec,
|
||||
carp_thresh_seconds);
|
||||
warnx("%s: '%s' took too long: "
|
||||
"%d.%06ld seconds >= %d seconds threshhold",
|
||||
cmd_prefix, cmd, sec, (long)tv.tv_usec, carp_thresh_seconds);
|
||||
return (sec);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Main program loop which is iterated every second.
|
||||
*/
|
||||
static void
|
||||
watchdog_loop(void)
|
||||
{
|
||||
struct timespec ts_start, ts_end;
|
||||
struct stat sb;
|
||||
int failed;
|
||||
long waited;
|
||||
int error, failed;
|
||||
|
||||
while (end_program != 2) {
|
||||
failed = 0;
|
||||
|
||||
error = watchdog_getuptime(&ts_start);
|
||||
if (error) {
|
||||
end_program = 1;
|
||||
goto try_end;
|
||||
}
|
||||
|
||||
if (test_cmd != NULL)
|
||||
failed = system(test_cmd);
|
||||
else
|
||||
failed = stat("/etc", &sb);
|
||||
|
||||
error = watchdog_getuptime(&ts_end);
|
||||
if (error) {
|
||||
end_program = 1;
|
||||
goto try_end;
|
||||
}
|
||||
|
||||
waited = watchdog_check_dogfunction_time(&ts_start, &ts_end);
|
||||
|
||||
if (failed == 0)
|
||||
watchdog_patpat(timeout|WD_ACTIVE);
|
||||
sleep(nap);
|
||||
if (nap - waited > 0)
|
||||
sleep(nap - waited);
|
||||
|
||||
try_end:
|
||||
if (end_program != 0) {
|
||||
if (watchdog_onoff(0) == 0) {
|
||||
end_program = 2;
|
||||
@ -211,6 +325,9 @@ static int
|
||||
watchdog_patpat(u_int t)
|
||||
{
|
||||
|
||||
if (is_dry_run)
|
||||
return 0;
|
||||
|
||||
return ioctl(fd, WDIOCPATPAT, &t);
|
||||
}
|
||||
|
||||
@ -221,11 +338,62 @@ watchdog_patpat(u_int t)
|
||||
static int
|
||||
watchdog_onoff(int onoff)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (onoff)
|
||||
/* fake successful watchdog op if a dry run */
|
||||
if (is_dry_run)
|
||||
return 0;
|
||||
|
||||
if (onoff) {
|
||||
/*
|
||||
* Call the WDIOC_SETSOFT regardless of softtimeout_set
|
||||
* because we'll need to turn it off if someone had turned
|
||||
* it on.
|
||||
*/
|
||||
error = ioctl(fd, WDIOC_SETSOFT, &softtimeout_set);
|
||||
if (error) {
|
||||
warn("setting WDIOC_SETSOFT %d", softtimeout_set);
|
||||
return (error);
|
||||
}
|
||||
error = watchdog_patpat((timeout|WD_ACTIVE));
|
||||
if (error) {
|
||||
warn("watchdog_patpat failed");
|
||||
goto failsafe;
|
||||
}
|
||||
if (softtimeout_act_set) {
|
||||
error = ioctl(fd, WDIOC_SETSOFTTIMEOUTACT,
|
||||
&softtimeout_act);
|
||||
if (error) {
|
||||
warn("setting WDIOC_SETSOFTTIMEOUTACT %d",
|
||||
softtimeout_act);
|
||||
goto failsafe;
|
||||
}
|
||||
}
|
||||
if (pretimeout_set) {
|
||||
error = ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout);
|
||||
if (error) {
|
||||
warn("setting WDIOC_SETPRETIMEOUT %d",
|
||||
pretimeout);
|
||||
goto failsafe;
|
||||
}
|
||||
}
|
||||
if (pretimeout_act_set) {
|
||||
error = ioctl(fd, WDIOC_SETPRETIMEOUTACT,
|
||||
&pretimeout_act);
|
||||
if (error) {
|
||||
warn("setting WDIOC_SETPRETIMEOUTACT %d",
|
||||
pretimeout_act);
|
||||
goto failsafe;
|
||||
}
|
||||
}
|
||||
/* pat one more time for good measure */
|
||||
return watchdog_patpat((timeout|WD_ACTIVE));
|
||||
else
|
||||
} else {
|
||||
return watchdog_patpat(0);
|
||||
}
|
||||
failsafe:
|
||||
watchdog_patpat(0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -235,27 +403,132 @@ static void
|
||||
usage(void)
|
||||
{
|
||||
if (is_daemon)
|
||||
fprintf(stderr, "usage: watchdogd [-d] [-e cmd] [-I file] [-s sleep] [-t timeout]\n");
|
||||
fprintf(stderr, "usage:\n"
|
||||
" watchdogd [-dnw] [-e cmd] [-I file] [-s sleep] [-t timeout]\n"
|
||||
" [-T script_timeout]\n"
|
||||
" [--debug]\n"
|
||||
" [--pretimeout seconds] [-pretimeout-action action]\n"
|
||||
" [--softtimeout] [-softtimeout-action action]\n"
|
||||
);
|
||||
else
|
||||
fprintf(stderr, "usage: watchdog [-d] [-t timeout]\n");
|
||||
exit(EX_USAGE);
|
||||
}
|
||||
|
||||
static long
|
||||
fetchtimeout(int opt, const char *longopt, const char *myoptarg)
|
||||
{
|
||||
const char *errstr;
|
||||
char *p;
|
||||
long rv;
|
||||
|
||||
errstr = NULL;
|
||||
p = NULL;
|
||||
errno = 0;
|
||||
rv = strtol(myoptarg, &p, 0);
|
||||
if ((p != NULL && *p != '\0') || errno != 0)
|
||||
errstr = "is not a number";
|
||||
if (rv <= 0)
|
||||
errstr = "must be greater than zero";
|
||||
if (errstr) {
|
||||
if (longopt)
|
||||
errx(EX_USAGE, "--%s argument %s", longopt, errstr);
|
||||
else
|
||||
errx(EX_USAGE, "-%c argument %s", opt, errstr);
|
||||
}
|
||||
return (rv);
|
||||
}
|
||||
|
||||
struct act_tbl {
|
||||
const char *at_act;
|
||||
int at_value;
|
||||
};
|
||||
|
||||
struct act_tbl act_tbl[] = {
|
||||
{ "panic", WD_SOFT_PANIC },
|
||||
{ "ddb", WD_SOFT_DDB },
|
||||
{ "log", WD_SOFT_LOG },
|
||||
{ "printf", WD_SOFT_PRINTF },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
|
||||
static void
|
||||
timeout_act_error(const char *lopt, const char *badact)
|
||||
{
|
||||
char *opts, *oldopts;
|
||||
int i;
|
||||
|
||||
opts = NULL;
|
||||
for (i = 0; act_tbl[i].at_act != NULL; i++) {
|
||||
oldopts = opts;
|
||||
if (asprintf(&opts, "%s%s%s",
|
||||
oldopts == NULL ? "" : oldopts,
|
||||
oldopts == NULL ? "" : ", ",
|
||||
act_tbl[i].at_act) == -1)
|
||||
err(EX_OSERR, "malloc");
|
||||
free(oldopts);
|
||||
}
|
||||
warnx("bad --%s argument '%s' must be one of (%s).",
|
||||
lopt, badact, opts);
|
||||
usage();
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a comma separated list of actions and or the flags
|
||||
* together for the ioctl.
|
||||
*/
|
||||
static int
|
||||
timeout_act_str2int(const char *lopt, const char *acts)
|
||||
{
|
||||
int i;
|
||||
char *dupacts, *tofree;
|
||||
char *o;
|
||||
int rv = 0;
|
||||
|
||||
tofree = dupacts = strdup(acts);
|
||||
if (!tofree)
|
||||
err(EX_OSERR, "malloc");
|
||||
while ((o = strsep(&dupacts, ",")) != NULL) {
|
||||
for (i = 0; act_tbl[i].at_act != NULL; i++) {
|
||||
if (!strcmp(o, act_tbl[i].at_act)) {
|
||||
rv |= act_tbl[i].at_value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (act_tbl[i].at_act == NULL)
|
||||
timeout_act_error(lopt, o);
|
||||
}
|
||||
free(tofree);
|
||||
return rv;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the few command line arguments supported.
|
||||
*/
|
||||
static void
|
||||
parseargs(int argc, char *argv[])
|
||||
{
|
||||
int longindex;
|
||||
int c;
|
||||
char *p;
|
||||
const char *lopt;
|
||||
double a;
|
||||
|
||||
/*
|
||||
* if we end with a 'd' aka 'watchdogd' then we are the daemon program,
|
||||
* otherwise run as a command line utility.
|
||||
*/
|
||||
c = strlen(argv[0]);
|
||||
if (argv[0][c - 1] == 'd')
|
||||
is_daemon = 1;
|
||||
while ((c = getopt(argc, argv,
|
||||
is_daemon ? "I:de:s:t:?" : "dt:?")) != -1) {
|
||||
|
||||
if (is_daemon)
|
||||
getopt_shortopts = "I:de:ns:t:ST:w?";
|
||||
else
|
||||
getopt_shortopts = "dt:?";
|
||||
|
||||
while ((c = getopt_long(argc, argv, getopt_shortopts, longopts,
|
||||
&longindex)) != -1) {
|
||||
switch (c) {
|
||||
case 'I':
|
||||
pidfile = optarg;
|
||||
@ -266,17 +539,19 @@ parseargs(int argc, char *argv[])
|
||||
case 'e':
|
||||
test_cmd = strdup(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
is_dry_run = 1;
|
||||
break;
|
||||
#ifdef notyet
|
||||
case 'p':
|
||||
passive = 1;
|
||||
break;
|
||||
#endif
|
||||
case 's':
|
||||
p = NULL;
|
||||
errno = 0;
|
||||
nap = strtol(optarg, &p, 0);
|
||||
if ((p != NULL && *p != '\0') || errno != 0)
|
||||
errx(EX_USAGE, "-s argument is not a number");
|
||||
nap = fetchtimeout(c, NULL, optarg);
|
||||
break;
|
||||
case 'S':
|
||||
do_syslog = 1;
|
||||
break;
|
||||
case 't':
|
||||
p = NULL;
|
||||
@ -286,6 +561,7 @@ parseargs(int argc, char *argv[])
|
||||
errx(EX_USAGE, "-t argument is not a number");
|
||||
if (a < 0)
|
||||
errx(EX_USAGE, "-t argument must be positive");
|
||||
|
||||
if (a == 0)
|
||||
timeout = WD_TO_NEVER;
|
||||
else
|
||||
@ -294,12 +570,39 @@ parseargs(int argc, char *argv[])
|
||||
printf("Timeout is 2^%d nanoseconds\n",
|
||||
timeout);
|
||||
break;
|
||||
case 'T':
|
||||
carp_thresh_seconds = fetchtimeout(c, "NULL", optarg);
|
||||
break;
|
||||
case 'w':
|
||||
do_timedog = 1;
|
||||
break;
|
||||
case 0:
|
||||
lopt = longopts[longindex].name;
|
||||
if (!strcmp(lopt, "pretimeout")) {
|
||||
pretimeout = fetchtimeout(0, lopt, optarg);
|
||||
} else if (!strcmp(lopt, "pretimeout-action")) {
|
||||
pretimeout_act = timeout_act_str2int(lopt,
|
||||
optarg);
|
||||
} else if (!strcmp(lopt, "softtimeout-action")) {
|
||||
softtimeout_act = timeout_act_str2int(lopt,
|
||||
optarg);
|
||||
} else {
|
||||
/* warnx("bad option at index %d: %s", optind,
|
||||
argv[optind]);
|
||||
usage();
|
||||
*/
|
||||
}
|
||||
break;
|
||||
case '?':
|
||||
default:
|
||||
usage();
|
||||
/* NOTREACHED */
|
||||
}
|
||||
}
|
||||
|
||||
if (carp_thresh_seconds == -1)
|
||||
carp_thresh_seconds = nap;
|
||||
|
||||
if (argc != optind)
|
||||
errx(EX_USAGE, "extra arguments.");
|
||||
if (is_daemon && timeout < WD_TO_1SEC)
|
||||
|
Loading…
Reference in New Issue
Block a user