-
-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathwatchdogd.conf
133 lines (123 loc) · 4.42 KB
/
watchdogd.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# /etc/watchdogd.conf sample
# Commented out values are program defaults.
#
# The checker/monitor `warning` and `critical` levels are 0.00-1.00,
# i.e. 0-100%, except for load average which can vary a lot between
# systems and use-cases, not just because of the number of CPU cores.
# Use the `script = ...` setting to call script when `warning` and
# `critical` are reached for a monitor. In `critical` the monitor
# otherwise triggers an unconditional reboot.
#
# NOTE: `critical` is optional, omitting it disables the reboot action.
#
###
# Do not set WDT timeout and kick interval too low, the daemon runs at
# SCHED_OTHER level with all other tasks, unless the process supervisor
# is enabled. The monitor plugins (below) need CPU time as well.
#timeout = 20
#interval = 10
###
# With safe-exit enabled (true) the daemon will ask the driver disable
# the WDT before exiting (SIGINT). However, some WDT drivers (or HW)
# may not support this.
#safe-exit = false
### Supervisor
# Instrumented processes can have their main loop supervised. Processes
# subscribe to this service using the libwdog API, see the docs for more
# on this. When the supervisor is enabled and the priority is set to a
# value > 0, watchdogd runs as a SCHED_RR process with elevated realtime
# priority. When disabled, or the priority is set to zero (0), it runs
# as a regular SCHED_OTHER process, this is the default.
#
# When a supervised process fails to meet its deadline, the daemon will
# perform an unconditional reset having saved the reset reason. If a
# script is provided in this section it will be called instead. The
# script is called as:
#
# script.sh supervisor CODE PID LABEL
#
# Availabel CODEs for the reset reason are avilable in wdog.h
#
supervisor {
# !!!REMEMBER TO ENABLE reset-reason (below) AS WELL!!!
# enabled = true
# priority = 98
script = "/path/to/supervisor-script.sh"
}
### Reset reason
# The following section controls if/how the reset reason & reset counter
# is tracked. By default this is disabled, since not all systems allow
# writing to disk, e.g. embedded systems using MTD devices with limited
# number of write cycles.
#
# The default file setting is a non-volatile path, according to the FHS.
# It can be changed to another location, but make sure that location is
# writable first.
reset-reason {
# enabled = true
file = "/var/lib/misc/watchdogd.state"
}
### Checkers/Monitors ##################################################
#
# Script or command to run instead of reboot when a monitor plugin
# reaches any of its critical or warning level. Setting this will
# disable the built-in reboot on critical, it is therefore up to the
# script to perform reboot, if needed. The script is called as:
#
# script.sh {filenr, fsmon, loadavg, meminfo} {crit, warn} VALUE
#
#script = "/path/to/reboot-action.sh"
# Monitors file descriptor leaks based on /proc/sys/fs/file-nr
filenr {
# enabled = true
interval = 300
logmark = false
warning = 0.9
critical = 1.0
# script = "/path/to/alt-reboot-action.sh"
}
# Monitors file system, blocks and inode usage against watermarks
# The script is called with fsmon as the first argument and there
# are two environment variables FSMON_NAME, for the monitored path,
# and FSMON_TYPE indicating either 'blocks' or 'inodes'.
#fsmon /var {
# enabled = true
# interval = 300
# logmark = false
# warning = 0.95
# critical = 1.0
# script = "/path/to/alt-reboot-action.sh"
#}
# Monitors load average based on sysinfo() from /proc/loadavg
# The level is composed from the average of the 1 and 5 min marks.
loadavg {
# enabled = true
interval = 300
logmark = false
warning = 1.0
critical = 2.0
# script = "/path/to/alt-reboot-action.sh"
}
# Monitors free RAM based on data from /proc/meminfo
meminfo {
# enabled = true
interval = 300
logmark = false
warning = 0.9
critical = 0.95
# script = "/path/to/alt-reboot-action.sh"
}
# Monitor a generic script, executes 'monitor-script' every 'interval'
# seconds, with a max runtime of 'timeout' seconds. When the exit code
# of the monitor script is above the critical level watchdogd either
# starts the reboot, or calls the alternate 'script' to determin the
# next cause of action.
generic {
# enabled = true
interval = 300
timeout = 60
warning = 1
critical = 10
# monitor-script = "/path/to/monitor-script.sh"
# script = "/path/to/alt-reboot-action.sh"
}