-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_load
131 lines (122 loc) · 3.84 KB
/
check_load
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#! /usr/bin/python
#import modules
import sys, psutil, getopt
from datetime import timedelta
#nagios return codes
UNKNOWN = -1
OK = 0
WARNING = 1
CRITICAL = 2
usage = 'usage: ./check_load.py -w/--warn <integer> -c/--crit <integer>'
# get uptime
def uptime():
with open('/proc/uptime', 'r') as f:
uptime_seconds = float(f.readline().split()[0])
uptime_string = str(timedelta(seconds = uptime_seconds))
return uptime_string
# get overall cpu use
def total_cpu():
total_cpu_use = psutil.cpu_percent(interval=1, percpu=False)
return total_cpu_use
# get per core cpu use
def percore_cpu():
percore_cpu_use = []
cpu_id = 0
for cpu in psutil.cpu_percent(interval=1, percpu=True):
array_line = str(cpu_id), cpu
percore_cpu_use.append(array_line)
cpu_id += 1
return percore_cpu_use
#Nagios performance data
def performance_data(total_cpu_use, percore_cpu_use, warn, crit):
percore_data = ()
for percore_cpu in percore_cpu_use:
percore_data += str('CPU-') + str(percore_cpu[0]), '=', str(percore_cpu[1]), str('%;'), str('99'), ';', str(''), ';', str('0'), ';', str('')
percore_cpu_data = "".join(percore_data)
# print percore_cpu_data
total_data = str('Total'), '=', str(total_cpu_use), str('%;'), str(warn), ';', str(crit), ';', str('0'), ';', str('100')
total_cpu_data = "".join(total_data)
# print total_cpu_data
performance_data = total_cpu_data + percore_cpu_data
return performance_data
#check total CPU
def total_cpu_check(total_cpu_use, percore_cpu_use, warn, crit, perf_data):
result = None
total_cpu_use = total_cpu()
uptime_now = uptime()
if total_cpu_use > crit:
print 'CRITICAL - Total CPU use is', total_cpu_use, '% |', perf_data
result = 1
sys.exit(CRITICAL)
elif total_cpu_use > warn:
print 'WARNING - Total CPU use is', total_cpu_use, '% - uptime:', uptime_now,'|', perf_data
result = 1
sys.exit(WARNING)
else:
for core in percore_cpu_use:
if core[1] > 99:
print 'WARNING - CPU Core', core[0], 'is at', core[1], '% |', perf_data
result = 1
sys.exit(WARNING)
else:
continue
return result
# define command lnie options and validate data. Show usage or provide info on required options
def command_line_validate(argv):
try:
opts, args = getopt.getopt(argv, 'w:c:o:', ['warn=' ,'crit='])
except getopt.GetoptError:
print usage
try:
for opt, arg in opts:
if opt in ('-w', '--warn'):
try:
warn = int(arg)
except:
print '***warn value must be an integer***'
sys.exit(CRITICAL)
elif opt in ('-c', '--crit'):
try:
crit = int(arg)
except:
print '***crit value must be an integer***'
sys.exit(CRITICAL)
else:
print usage
try:
isinstance(warn, int)
#print 'warn level:', warn
except:
print '***warn level is required***'
print usage
sys.exit(CRITICAL)
try:
isinstance(crit, int)
#print 'crit level:', crit
except:
print '***crit level is required***'
print usage
sys.exit(CRITICAL)
except:
sys.exit(CRITICAL)
# confirm that warning level is less than critical level, alert and exit if check fails
if warn > crit:
print '***warning level must be less than critical level***'
sys.exit(CRITICAL)
return warn, crit
# main function
def main():
argv = sys.argv[1:]
warn, crit = command_line_validate(argv)
total_cpu_use = total_cpu()
uptime_now = uptime()
# print total_cpu_use
percore_cpu_use = percore_cpu()
# print percore_cpu_use
perf_data = performance_data(total_cpu_use, percore_cpu_use, warn, crit)
# print perf_data
result = total_cpu_check(total_cpu_use, percore_cpu_use, warn, crit, perf_data)
if result == None:
print 'OK - uptime:', uptime_now, '- ', total_cpu_use, '% |', perf_data
if __name__ == '__main__':
main()