Commit a1eeb872bfc97d8d856c164b2f79e6a7a83d47c6
1 parent
4a52b302
added check_temperature plugin to monitor temperatures on a RaspberryPi
Showing
2 changed files
with
331 additions
and
0 deletions
README.md
@@ -24,6 +24,8 @@ This repository contains the following scripts: | @@ -24,6 +24,8 @@ This repository contains the following scripts: | ||
24 | monitor multi-home and dual-stack (i.e. ipv4 and ipv6) servers. | 24 | monitor multi-home and dual-stack (i.e. ipv4 and ipv6) servers. |
25 | * [check_otp](#check_otp) | 25 | * [check_otp](#check_otp) |
26 | plugin to monitor PrivacyIDEA (and LinOTP) OTP validation | 26 | plugin to monitor PrivacyIDEA (and LinOTP) OTP validation |
27 | + * [check_temperature](#check_temperature) | ||
28 | + plugin to monitor the temperature of a 1-wire sensor on a RaspberryPi | ||
27 | * [nagiosstatus](#nagiosstatus) | 29 | * [nagiosstatus](#nagiosstatus) |
28 | CGI-BIN script to report the status of nagios (to monitor nagios itself) | 30 | CGI-BIN script to report the status of nagios (to monitor nagios itself) |
29 | 31 | ||
@@ -233,6 +235,63 @@ define service { | @@ -233,6 +235,63 @@ define service { | ||
233 | ~~~ | 235 | ~~~ |
234 | 236 | ||
235 | 237 | ||
238 | +<a name=check_temperature>plugins/check_temperature</a> | ||
239 | +------------------------------------------------------- | ||
240 | +Plugin (check) to monitor monitor the temperature using a sensor connected to a | ||
241 | +RaspberryPi. This implementation is specifically for the DS18B20 1-wire | ||
242 | +temperature sensor. Other methods and interfaces can be plugged in easily (just | ||
243 | +raise a request or provide a patch). For information on how to connect sensor | ||
244 | +to the RaspberryPi and to get it working please see [this Adafruit tutorial]( | ||
245 | +https://learn.adafruit.com/adafruits-raspberry-pi-lesson-11-ds18b20-temperature-sensing). | ||
246 | + | ||
247 | +To enable the 1-wire interface support on the RaspberryPi one can use the | ||
248 | +command: | ||
249 | +~~~ | ||
250 | + sudo raspi-config nonint do_onewire 0 | ||
251 | +~~~ | ||
252 | +or use `raspi-config` in interactive mode (9. Advanced Options --> A9. 1-Wire). | ||
253 | +Please note that changing this requires a reboot. | ||
254 | + | ||
255 | +Installation for is straightforward, after installing the script on the server | ||
256 | +add the following to your Nagios `commands.cmd` configuration file: | ||
257 | + | ||
258 | +~~~ | ||
259 | +# 'check_temperature' command definition to monitor a single temperature in C | ||
260 | +# parameters: warning (ARG1) and critical (ARG2) temperature in Celcius | ||
261 | +define command { | ||
262 | + command_name check_temperature | ||
263 | + command_line [install_path]/plugins/check_temperature -w $ARG1$ -c $ARG2$ | ||
264 | +} | ||
265 | + | ||
266 | +# 'check_ftemperature' command definition to monitor a single temperature in F | ||
267 | +# parameters: warning (ARG1) and critical (ARG2) temperature in Farenheit | ||
268 | +define command { | ||
269 | + command_name check_ftemperature | ||
270 | + command_line [install_path]/plugins/check_temperature -F -w $ARG1$ -c $ARG2$ | ||
271 | +} | ||
272 | + | ||
273 | +# 'check_temperature_sensor' command definition to monitor a single temperature in C | ||
274 | +# parameters: sensor serial (ARG1), warning (ARG2) and critical (ARG3) temperature in Celcius | ||
275 | +define command { | ||
276 | + command_name check_temperature_sensor | ||
277 | + command_line [install_path]/plugins/check_temperature -s $ARG1$ -w $ARG2$ -c $ARG3$ | ||
278 | +} | ||
279 | + | ||
280 | +# 'check_ftemperature_sensor' command definition to monitor a single temperature in F | ||
281 | +# parameters: sensor serial (ARG1), warning (ARG2) and critical (ARG3) temperature in Farenheit | ||
282 | +define command { | ||
283 | + command_name check_ftemperature_sensor | ||
284 | + command_line [install_path]/plugins/check_temperature -F -s $ARG1$ -w $ARG2$ -c $ARG3$ | ||
285 | +} | ||
286 | + | ||
287 | +~~~ | ||
288 | + | ||
289 | +Make sure to replace `[install_path]/plugins` with the location of the script. | ||
290 | + | ||
291 | +Please run `check_temperature -h` after installation for an overview of the | ||
292 | +available command line options (e.g. to enable logging to a file). | ||
293 | + | ||
294 | + | ||
236 | <a name=nagiosstatus>cgi-bin/nagiosstatus.sh</a> | 295 | <a name=nagiosstatus>cgi-bin/nagiosstatus.sh</a> |
237 | ------------------------------------------------ | 296 | ------------------------------------------------ |
238 | Very simplistic CGI-BIN script that checkes whether nagios is still running and | 297 | Very simplistic CGI-BIN script that checkes whether nagios is still running and |
plugins/check_temperature
0 โ 100755
1 | +#! /usr/bin/env python | ||
2 | +# | ||
3 | +# check_temperature - Nagios temperature check for DS18B20 sensor on RaspberryPi | ||
4 | +# | ||
5 | +# Version 1.0, latest version, documentation and bugtracker available at: | ||
6 | +# https://gitlab.lindenaar.net/scripts/nagios-plugins | ||
7 | +# | ||
8 | +# Copyright (c) 2016 Frederik Lindenaar | ||
9 | +# | ||
10 | +# This script is free software: you can redistribute and/or modify it under the | ||
11 | +# terms of version 3 of the GNU General Public License as published by the Free | ||
12 | +# Software Foundation, or (at your option) any later version of the license. | ||
13 | +# | ||
14 | +# This script is distributed in the hope that it will be useful but WITHOUT ANY | ||
15 | +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR | ||
16 | +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. | ||
17 | +# | ||
18 | +# You should have received a copy of the GNU General Public License along with | ||
19 | +# this program. If not, visit <http://www.gnu.org/licenses/> to download it. | ||
20 | + | ||
21 | +from sys import exit | ||
22 | +from os.path import basename, splitext | ||
23 | +from glob import glob | ||
24 | +from time import time, sleep | ||
25 | +from argparse import ArgumentParser as StandardArgumentParser, FileType, \ | ||
26 | + _StoreAction as StoreAction, _StoreConstAction as StoreConstAction | ||
27 | +import logging | ||
28 | + | ||
29 | +# Constants (no need to change but allows for easy customization) | ||
30 | +VERSION="1.0" | ||
31 | +PROG_NAME=splitext(basename(__file__))[0] | ||
32 | +PROG_VERSION=PROG_NAME + ' ' + VERSION | ||
33 | +SENSOR_SCALE=1000 | ||
34 | +SENSOR_DEV_DIR = '/sys/bus/w1/devices/' | ||
35 | +SENSOR_DEV_PREFIX = '28-' | ||
36 | +SENSOR_DEV_SUFFIX = '/w1_slave' | ||
37 | +SENSOR_READ_RETRIES=10 | ||
38 | + | ||
39 | +LOG_FORMAT='%(levelname)s - %(message)s' | ||
40 | +LOG_FORMAT_FILE='%(asctime)s - ' + LOG_FORMAT | ||
41 | +LOGGING_NONE=logging.CRITICAL + 10 | ||
42 | +NAGIOS_OK = ( 'OK', 0) | ||
43 | +NAGIOS_WARN = ( 'WARNING', 1) | ||
44 | +NAGIOS_CRITICAL = ( 'CRITICAL', 2 ) | ||
45 | +NAGIOS_UNKNOWN = ( 'UNKNOWN', 3 ) | ||
46 | + | ||
47 | +# Setup logging | ||
48 | +logging.basicConfig(format=LOG_FORMAT) | ||
49 | +logging.addLevelName(LOGGING_NONE, 'NONE') | ||
50 | +logger = logging.getLogger(PROG_NAME) | ||
51 | +logger.setLevel(logging.CRITICAL) | ||
52 | + | ||
53 | +################[ wrapper to stop ArgumentParser from exiting ]################ | ||
54 | +# based on http://stackoverflow.com/questions/14728376/i-want-python-argparse-to-throw-an-exception-rather-than-usage/14728477#14728477 | ||
55 | +# the only way to do this is overriding the error method and throw and Exception | ||
56 | +class ArgumentParserError(Exception): pass | ||
57 | + | ||
58 | +class ArgumentParser(StandardArgumentParser): | ||
59 | + """ArgumentParser not exiting with non-Nagios format message upon errors""" | ||
60 | + def error(self, message): | ||
61 | + raise ArgumentParserError(message) | ||
62 | + | ||
63 | +##################[ Action to immediately set the log level ]################## | ||
64 | +class SetLogLevel(StoreConstAction): | ||
65 | + """ArgumentParser action to set log level to provided const value""" | ||
66 | + def __call__(self, parser, namespace, values, option_string=None): | ||
67 | + logging.getLogger(PROG_NAME).setLevel(self.const) | ||
68 | + | ||
69 | +####################[ Action to immediately log to a file ]#################### | ||
70 | +class SetLogFile(StoreAction): | ||
71 | + """ArgumentParser action to log to file (sets up FileHandler accordingly)""" | ||
72 | + def __call__(self, parser, namespace, values, option_string=None): | ||
73 | + super(SetLogFile, self).__call__(parser,namespace,values,option_string) | ||
74 | + formatter = logging.Formatter(LOG_FORMAT_FILE) | ||
75 | + handler = logging.FileHandler(values) | ||
76 | + handler.setFormatter(formatter) | ||
77 | + logger = logging.getLogger(PROG_NAME) | ||
78 | + logger.propagate = False | ||
79 | + logger.addHandler(handler) | ||
80 | + | ||
81 | +############################################################################### | ||
82 | + | ||
83 | +def convert_celcius(temp_read): | ||
84 | + """Converts raw temperature sensore value to degrees Celcius""" | ||
85 | + return float(temp_read) / float(SENSOR_SCALE) | ||
86 | +CONVERT_CELCIUS = ( convert_celcius, 'C', 'Celcius' ) | ||
87 | + | ||
88 | + | ||
89 | +def convert_farenheit(temp_read): | ||
90 | + """Converts raw temperature sensore value to degrees Farenheit""" | ||
91 | + return float(temp_read * 9) / float(5 * SENSOR_SCALE) + 32.0 | ||
92 | +CONVERT_FARENHEIT = ( convert_farenheit, 'F', 'Farenheit' ) | ||
93 | + | ||
94 | + | ||
95 | +def isempty(string): | ||
96 | + """Checks whether string 'str' provided is unset or empty""" | ||
97 | + return string is None or len(string) == 0 | ||
98 | + | ||
99 | + | ||
100 | +def parse_args(): | ||
101 | + """Parse command line and get parameters from environment, if present""" | ||
102 | + | ||
103 | + # Setup argument parser, the workhorse gluing it all together | ||
104 | + parser = ArgumentParser( | ||
105 | + epilog='(*) by default the script will look for the first device that ' | ||
106 | + 'matches %s* in %s, if multiple entries are found -s or -f must ' | ||
107 | + 'be used to specify which sensor to read.' % | ||
108 | + (SENSOR_DEV_PREFIX, SENSOR_DEV_DIR), | ||
109 | + description='Nagios check plugin for 1-wire temp. sensor on RaspberryPi' | ||
110 | + ) | ||
111 | + parser.add_argument('-V', '--version',action="version",version=PROG_VERSION) | ||
112 | + | ||
113 | + pgroup = parser.add_mutually_exclusive_group(required=False) | ||
114 | + pgroup.add_argument('-C', '--celcius', action='store_const', | ||
115 | + dest='converter', const=CONVERT_CELCIUS, | ||
116 | + help='measure, critical and warn values in Celcius ' | ||
117 | + '(default)', default=CONVERT_CELCIUS) | ||
118 | + pgroup.add_argument('-F', '--farenheit',action='store_const', | ||
119 | + dest='converter', const=CONVERT_FARENHEIT, | ||
120 | + help='measure, critical and warn values in Farenheit') | ||
121 | + | ||
122 | + parser.add_argument('-w', '--warn', type=float, | ||
123 | + help='temperature for warning status') | ||
124 | + parser.add_argument('-c','--critical', type=float, | ||
125 | + help='temperature for critical status') | ||
126 | + | ||
127 | + parser.add_argument('-r', '--retries', type=int,default=SENSOR_READ_RETRIES, | ||
128 | + help='number of times to retry reading sensor data when' | ||
129 | + ' unstable (defaults to %d)' % SENSOR_READ_RETRIES) | ||
130 | + | ||
131 | + pgroup = parser.add_mutually_exclusive_group(required=False) | ||
132 | + pgroup.add_argument('-s', '--serial', | ||
133 | + help='(unique part of) temperature sensor serial (*)') | ||
134 | + pgroup.add_argument('-f', '--file', | ||
135 | + help='input file (or device) to obtain data from (*)') | ||
136 | + | ||
137 | + pgroup = parser.add_mutually_exclusive_group(required=False) | ||
138 | + pgroup.add_argument('-q', '--quiet', default=logging.CRITICAL, | ||
139 | + action=SetLogLevel, const=LOGGING_NONE, | ||
140 | + help='quiet (no output, only exit with exit code)') | ||
141 | + pgroup.add_argument('-v', '--verbose', help='more verbose output', | ||
142 | + action=SetLogLevel, const=logging.INFO) | ||
143 | + pgroup.add_argument('-d', '--debug', help='debug output (more verbose)', | ||
144 | + action=SetLogLevel, const=logging.DEBUG) | ||
145 | + | ||
146 | + parser.add_argument('-l', '--logfile', action=SetLogFile, | ||
147 | + help='send logging output to logfile') | ||
148 | + | ||
149 | + # parse arguments and post-process command line options | ||
150 | + args = parser.parse_args() | ||
151 | + | ||
152 | + # if we got here all seems OK | ||
153 | + return args | ||
154 | + | ||
155 | + | ||
156 | +def get_sensor_device_filename(args, dev_dir=SENSOR_DEV_DIR, | ||
157 | + prefix=SENSOR_DEV_PREFIX, suffix=SENSOR_DEV_SUFFIX): | ||
158 | + """Auto-determine sensor datafile name (unless args.file is set)""" | ||
159 | + if isempty(args.file): | ||
160 | + search_pat = dev_dir + ('/' if dev_dir[-1]!='/' else '') | ||
161 | + search_pat+= prefix + '*' if isempty(args.serial) else '*' + args.serial | ||
162 | + logger.debug('looking for sensors with search pattern %s', search_pat) | ||
163 | + | ||
164 | + device_folders = glob(search_pat) | ||
165 | + if len(device_folders) == 1: | ||
166 | + filename = device_folders[0] + suffix | ||
167 | + else: | ||
168 | + if len(device_folders) == 0: | ||
169 | + errmsg = 'no supported temperature sensors in %s' % dev_dir | ||
170 | + else: | ||
171 | + serials=map(lambda x: basename(x) if x.find(prefix)<0 | ||
172 | + else basename(x)[len(prefix):], device_folders) | ||
173 | + errmsg = 'found multiple temperature sensors (%s), please '\ | ||
174 | + 'specify which one to use' % ', '.join(serials) | ||
175 | + logger.critical(errmsg) | ||
176 | + raise ValueError(errmsg) | ||
177 | + else: | ||
178 | + filename = args.file | ||
179 | + logger.debug('using temperature sensor at %s', filename) | ||
180 | + return filename | ||
181 | + | ||
182 | + | ||
183 | +def read_sensor_raw(device_file): | ||
184 | + """Reads the raw data from the sensor device file, returns array of lines""" | ||
185 | + with open(device_file, 'r') as f: | ||
186 | + lines = f.readlines() | ||
187 | + logger.debug('Temperature sensor data read from %s: %s', f.name, lines) | ||
188 | + return lines | ||
189 | + | ||
190 | + | ||
191 | +def read_temp(device_file, converter=CONVERT_CELCIUS, maxretries=10): | ||
192 | + """Reads sensor data and converts it to desired unit, returns temperature""" | ||
193 | + lines = read_sensor_raw(device_file) | ||
194 | + tries = 1 | ||
195 | + while lines[0].strip()[-3:] != 'YES' and tries <= maxretries: | ||
196 | + tries += 1 | ||
197 | + sleep(0.2) | ||
198 | + logger.warn('Temperature sensor data not stable, reading once more') | ||
199 | + lines = read_temp_raw(device_file) | ||
200 | + | ||
201 | + if lines[0].strip()[-3:] != 'YES': | ||
202 | + errmsg = 'no stable temperature sensor data after %d tries' % tries | ||
203 | + else: | ||
204 | + equals_pos = lines[1].find('t=') | ||
205 | + if equals_pos == -1: | ||
206 | + errmsg = 'temperature sensor data format is not supported' | ||
207 | + else: | ||
208 | + temp_read = int(lines[1][equals_pos+2:]) | ||
209 | + temp = converter[0](temp_read) | ||
210 | + logger.debug('Temperature sensor value %d is %.2f%s', temp_read, | ||
211 | + temp, converter[1]) | ||
212 | + return temp, tries | ||
213 | + | ||
214 | + logger.critical(errmsg) | ||
215 | + raise ValueError(errmsg) | ||
216 | + | ||
217 | + | ||
218 | +def nagios_exit(status, message, data=None): | ||
219 | + """exit 'nagios-style', print status and message followed by perf. data""" | ||
220 | + if logger.isEnabledFor(logging.CRITICAL): | ||
221 | + if data is not None and len(data) > 0: | ||
222 | + perfdata=map(lambda (k,v): "'%s'=%s" %(k,v if not isinstance(v,list) | ||
223 | + else ';'.join(map(lambda x:'' if x is None else str(x),v))) | ||
224 | + ,data.iteritems()) | ||
225 | + perfstr = ' | ' + ' '.join(perfdata) | ||
226 | + else: | ||
227 | + perfstr = '' | ||
228 | + print 'Temperature %s: %s%s' % (status[0], message, perfstr) | ||
229 | + exit(status[1]) | ||
230 | + | ||
231 | + | ||
232 | +if __name__ == '__main__': | ||
233 | + try: | ||
234 | + args = parse_args() | ||
235 | + except ArgumentParserError as e: | ||
236 | + nagios_exit(NAGIOS_UNKNOWN,'error with setup: ' + e.message) | ||
237 | + except (KeyboardInterrupt, EOFError) as e: | ||
238 | |||
239 | + nagios_exit(NAGIOS_UNKNOWN,'initialization aborted') | ||
240 | + | ||
241 | + try: | ||
242 | + starttime = time() | ||
243 | + devicefile = get_sensor_device_filename(args) | ||
244 | + temperature, tries = read_temp(devicefile, args.converter, args.retries) | ||
245 | + endtime = time() | ||
246 | + | ||
247 | + except (KeyboardInterrupt) as e: | ||
248 | + nagios_exit(NAGIOS_UNKNOWN,'temperature sensor read aborted by user') | ||
249 | + | ||
250 | + except (IOError, ValueError) as e: | ||
251 | + nagios_exit(NAGIOS_UNKNOWN,'temperature sensor read failed: %s' % e) | ||
252 | + | ||
253 | + elapse = endtime-starttime | ||
254 | + logger.info('Got temperature reading of %.2f degrees %s in %fs', | ||
255 | + temperature, args.converter[2], elapse) | ||
256 | + | ||
257 | + temp_unit = args.converter[1] | ||
258 | + message = 'current temperature is %.2f%s' % (temperature, temp_unit) | ||
259 | + if args.critical is not None and temperature > args.critical: | ||
260 | + nagiosresult = NAGIOS_CRITICAL | ||
261 | + message += ', above critical threshold %.2f%s'%(args.critical,temp_unit) | ||
262 | + elif args.warn is not None and temperature > args.warn: | ||
263 | + nagiosresult = NAGIOS_WARN | ||
264 | + message += ', above warning threshold %.2f%s' % (args.warn, temp_unit) | ||
265 | + else: | ||
266 | + nagiosresult = NAGIOS_OK | ||
267 | + | ||
268 | + nagios_exit(nagiosresult, message, { | ||
269 | + 'temperature': [ temperature, args.warn, args.critical, None, None], | ||
270 | + 'retries': [ tries-1, None, args.retries, 0, None ], | ||
271 | + 'time': [ '%f' % elapse, None, None, 0, None] | ||
272 | + }) |