Commit a1eeb872bfc97d8d856c164b2f79e6a7a83d47c6
1 parent
4a52b302
added check_temperature plugin to monitor temperatures on a RaspberryPi
Showing
2 changed files
with
331 additions
and
0 deletions
README.md
... | ... | @@ -24,6 +24,8 @@ This repository contains the following scripts: |
24 | 24 | monitor multi-home and dual-stack (i.e. ipv4 and ipv6) servers. |
25 | 25 | * [check_otp](#check_otp) |
26 | 26 | plugin to monitor PrivacyIDEA (and LinOTP) OTP validation |
27 | + * [check_temperature](#check_temperature) | |
28 | + plugin to monitor the temperature of a 1-wire sensor on a RaspberryPi | |
27 | 29 | * [nagiosstatus](#nagiosstatus) |
28 | 30 | CGI-BIN script to report the status of nagios (to monitor nagios itself) |
29 | 31 | |
... | ... | @@ -233,6 +235,63 @@ define service { |
233 | 235 | ~~~ |
234 | 236 | |
235 | 237 | |
238 | +<a name=check_temperature>plugins/check_temperature</a> | |
239 | +------------------------------------------------------- | |
240 | +Plugin (check) to monitor monitor the temperature using a sensor connected to a | |
241 | +RaspberryPi. This implementation is specifically for the DS18B20 1-wire | |
242 | +temperature sensor. Other methods and interfaces can be plugged in easily (just | |
243 | +raise a request or provide a patch). For information on how to connect sensor | |
244 | +to the RaspberryPi and to get it working please see [this Adafruit tutorial]( | |
245 | +https://learn.adafruit.com/adafruits-raspberry-pi-lesson-11-ds18b20-temperature-sensing). | |
246 | + | |
247 | +To enable the 1-wire interface support on the RaspberryPi one can use the | |
248 | +command: | |
249 | +~~~ | |
250 | + sudo raspi-config nonint do_onewire 0 | |
251 | +~~~ | |
252 | +or use `raspi-config` in interactive mode (9. Advanced Options --> A9. 1-Wire). | |
253 | +Please note that changing this requires a reboot. | |
254 | + | |
255 | +Installation for is straightforward, after installing the script on the server | |
256 | +add the following to your Nagios `commands.cmd` configuration file: | |
257 | + | |
258 | +~~~ | |
259 | +# 'check_temperature' command definition to monitor a single temperature in C | |
260 | +# parameters: warning (ARG1) and critical (ARG2) temperature in Celcius | |
261 | +define command { | |
262 | + command_name check_temperature | |
263 | + command_line [install_path]/plugins/check_temperature -w $ARG1$ -c $ARG2$ | |
264 | +} | |
265 | + | |
266 | +# 'check_ftemperature' command definition to monitor a single temperature in F | |
267 | +# parameters: warning (ARG1) and critical (ARG2) temperature in Farenheit | |
268 | +define command { | |
269 | + command_name check_ftemperature | |
270 | + command_line [install_path]/plugins/check_temperature -F -w $ARG1$ -c $ARG2$ | |
271 | +} | |
272 | + | |
273 | +# 'check_temperature_sensor' command definition to monitor a single temperature in C | |
274 | +# parameters: sensor serial (ARG1), warning (ARG2) and critical (ARG3) temperature in Celcius | |
275 | +define command { | |
276 | + command_name check_temperature_sensor | |
277 | + command_line [install_path]/plugins/check_temperature -s $ARG1$ -w $ARG2$ -c $ARG3$ | |
278 | +} | |
279 | + | |
280 | +# 'check_ftemperature_sensor' command definition to monitor a single temperature in F | |
281 | +# parameters: sensor serial (ARG1), warning (ARG2) and critical (ARG3) temperature in Farenheit | |
282 | +define command { | |
283 | + command_name check_ftemperature_sensor | |
284 | + command_line [install_path]/plugins/check_temperature -F -s $ARG1$ -w $ARG2$ -c $ARG3$ | |
285 | +} | |
286 | + | |
287 | +~~~ | |
288 | + | |
289 | +Make sure to replace `[install_path]/plugins` with the location of the script. | |
290 | + | |
291 | +Please run `check_temperature -h` after installation for an overview of the | |
292 | +available command line options (e.g. to enable logging to a file). | |
293 | + | |
294 | + | |
236 | 295 | <a name=nagiosstatus>cgi-bin/nagiosstatus.sh</a> |
237 | 296 | ------------------------------------------------ |
238 | 297 | Very simplistic CGI-BIN script that checkes whether nagios is still running and |
... | ... |
plugins/check_temperature
0 โ 100755
1 | +#! /usr/bin/env python | |
2 | +# | |
3 | +# check_temperature - Nagios temperature check for DS18B20 sensor on RaspberryPi | |
4 | +# | |
5 | +# Version 1.0, latest version, documentation and bugtracker available at: | |
6 | +# https://gitlab.lindenaar.net/scripts/nagios-plugins | |
7 | +# | |
8 | +# Copyright (c) 2016 Frederik Lindenaar | |
9 | +# | |
10 | +# This script is free software: you can redistribute and/or modify it under the | |
11 | +# terms of version 3 of the GNU General Public License as published by the Free | |
12 | +# Software Foundation, or (at your option) any later version of the license. | |
13 | +# | |
14 | +# This script is distributed in the hope that it will be useful but WITHOUT ANY | |
15 | +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR | |
16 | +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. | |
17 | +# | |
18 | +# You should have received a copy of the GNU General Public License along with | |
19 | +# this program. If not, visit <http://www.gnu.org/licenses/> to download it. | |
20 | + | |
21 | +from sys import exit | |
22 | +from os.path import basename, splitext | |
23 | +from glob import glob | |
24 | +from time import time, sleep | |
25 | +from argparse import ArgumentParser as StandardArgumentParser, FileType, \ | |
26 | + _StoreAction as StoreAction, _StoreConstAction as StoreConstAction | |
27 | +import logging | |
28 | + | |
29 | +# Constants (no need to change but allows for easy customization) | |
30 | +VERSION="1.0" | |
31 | +PROG_NAME=splitext(basename(__file__))[0] | |
32 | +PROG_VERSION=PROG_NAME + ' ' + VERSION | |
33 | +SENSOR_SCALE=1000 | |
34 | +SENSOR_DEV_DIR = '/sys/bus/w1/devices/' | |
35 | +SENSOR_DEV_PREFIX = '28-' | |
36 | +SENSOR_DEV_SUFFIX = '/w1_slave' | |
37 | +SENSOR_READ_RETRIES=10 | |
38 | + | |
39 | +LOG_FORMAT='%(levelname)s - %(message)s' | |
40 | +LOG_FORMAT_FILE='%(asctime)s - ' + LOG_FORMAT | |
41 | +LOGGING_NONE=logging.CRITICAL + 10 | |
42 | +NAGIOS_OK = ( 'OK', 0) | |
43 | +NAGIOS_WARN = ( 'WARNING', 1) | |
44 | +NAGIOS_CRITICAL = ( 'CRITICAL', 2 ) | |
45 | +NAGIOS_UNKNOWN = ( 'UNKNOWN', 3 ) | |
46 | + | |
47 | +# Setup logging | |
48 | +logging.basicConfig(format=LOG_FORMAT) | |
49 | +logging.addLevelName(LOGGING_NONE, 'NONE') | |
50 | +logger = logging.getLogger(PROG_NAME) | |
51 | +logger.setLevel(logging.CRITICAL) | |
52 | + | |
53 | +################[ wrapper to stop ArgumentParser from exiting ]################ | |
54 | +# based on http://stackoverflow.com/questions/14728376/i-want-python-argparse-to-throw-an-exception-rather-than-usage/14728477#14728477 | |
55 | +# the only way to do this is overriding the error method and throw and Exception | |
56 | +class ArgumentParserError(Exception): pass | |
57 | + | |
58 | +class ArgumentParser(StandardArgumentParser): | |
59 | + """ArgumentParser not exiting with non-Nagios format message upon errors""" | |
60 | + def error(self, message): | |
61 | + raise ArgumentParserError(message) | |
62 | + | |
63 | +##################[ Action to immediately set the log level ]################## | |
64 | +class SetLogLevel(StoreConstAction): | |
65 | + """ArgumentParser action to set log level to provided const value""" | |
66 | + def __call__(self, parser, namespace, values, option_string=None): | |
67 | + logging.getLogger(PROG_NAME).setLevel(self.const) | |
68 | + | |
69 | +####################[ Action to immediately log to a file ]#################### | |
70 | +class SetLogFile(StoreAction): | |
71 | + """ArgumentParser action to log to file (sets up FileHandler accordingly)""" | |
72 | + def __call__(self, parser, namespace, values, option_string=None): | |
73 | + super(SetLogFile, self).__call__(parser,namespace,values,option_string) | |
74 | + formatter = logging.Formatter(LOG_FORMAT_FILE) | |
75 | + handler = logging.FileHandler(values) | |
76 | + handler.setFormatter(formatter) | |
77 | + logger = logging.getLogger(PROG_NAME) | |
78 | + logger.propagate = False | |
79 | + logger.addHandler(handler) | |
80 | + | |
81 | +############################################################################### | |
82 | + | |
83 | +def convert_celcius(temp_read): | |
84 | + """Converts raw temperature sensore value to degrees Celcius""" | |
85 | + return float(temp_read) / float(SENSOR_SCALE) | |
86 | +CONVERT_CELCIUS = ( convert_celcius, 'C', 'Celcius' ) | |
87 | + | |
88 | + | |
89 | +def convert_farenheit(temp_read): | |
90 | + """Converts raw temperature sensore value to degrees Farenheit""" | |
91 | + return float(temp_read * 9) / float(5 * SENSOR_SCALE) + 32.0 | |
92 | +CONVERT_FARENHEIT = ( convert_farenheit, 'F', 'Farenheit' ) | |
93 | + | |
94 | + | |
95 | +def isempty(string): | |
96 | + """Checks whether string 'str' provided is unset or empty""" | |
97 | + return string is None or len(string) == 0 | |
98 | + | |
99 | + | |
100 | +def parse_args(): | |
101 | + """Parse command line and get parameters from environment, if present""" | |
102 | + | |
103 | + # Setup argument parser, the workhorse gluing it all together | |
104 | + parser = ArgumentParser( | |
105 | + epilog='(*) by default the script will look for the first device that ' | |
106 | + 'matches %s* in %s, if multiple entries are found -s or -f must ' | |
107 | + 'be used to specify which sensor to read.' % | |
108 | + (SENSOR_DEV_PREFIX, SENSOR_DEV_DIR), | |
109 | + description='Nagios check plugin for 1-wire temp. sensor on RaspberryPi' | |
110 | + ) | |
111 | + parser.add_argument('-V', '--version',action="version",version=PROG_VERSION) | |
112 | + | |
113 | + pgroup = parser.add_mutually_exclusive_group(required=False) | |
114 | + pgroup.add_argument('-C', '--celcius', action='store_const', | |
115 | + dest='converter', const=CONVERT_CELCIUS, | |
116 | + help='measure, critical and warn values in Celcius ' | |
117 | + '(default)', default=CONVERT_CELCIUS) | |
118 | + pgroup.add_argument('-F', '--farenheit',action='store_const', | |
119 | + dest='converter', const=CONVERT_FARENHEIT, | |
120 | + help='measure, critical and warn values in Farenheit') | |
121 | + | |
122 | + parser.add_argument('-w', '--warn', type=float, | |
123 | + help='temperature for warning status') | |
124 | + parser.add_argument('-c','--critical', type=float, | |
125 | + help='temperature for critical status') | |
126 | + | |
127 | + parser.add_argument('-r', '--retries', type=int,default=SENSOR_READ_RETRIES, | |
128 | + help='number of times to retry reading sensor data when' | |
129 | + ' unstable (defaults to %d)' % SENSOR_READ_RETRIES) | |
130 | + | |
131 | + pgroup = parser.add_mutually_exclusive_group(required=False) | |
132 | + pgroup.add_argument('-s', '--serial', | |
133 | + help='(unique part of) temperature sensor serial (*)') | |
134 | + pgroup.add_argument('-f', '--file', | |
135 | + help='input file (or device) to obtain data from (*)') | |
136 | + | |
137 | + pgroup = parser.add_mutually_exclusive_group(required=False) | |
138 | + pgroup.add_argument('-q', '--quiet', default=logging.CRITICAL, | |
139 | + action=SetLogLevel, const=LOGGING_NONE, | |
140 | + help='quiet (no output, only exit with exit code)') | |
141 | + pgroup.add_argument('-v', '--verbose', help='more verbose output', | |
142 | + action=SetLogLevel, const=logging.INFO) | |
143 | + pgroup.add_argument('-d', '--debug', help='debug output (more verbose)', | |
144 | + action=SetLogLevel, const=logging.DEBUG) | |
145 | + | |
146 | + parser.add_argument('-l', '--logfile', action=SetLogFile, | |
147 | + help='send logging output to logfile') | |
148 | + | |
149 | + # parse arguments and post-process command line options | |
150 | + args = parser.parse_args() | |
151 | + | |
152 | + # if we got here all seems OK | |
153 | + return args | |
154 | + | |
155 | + | |
156 | +def get_sensor_device_filename(args, dev_dir=SENSOR_DEV_DIR, | |
157 | + prefix=SENSOR_DEV_PREFIX, suffix=SENSOR_DEV_SUFFIX): | |
158 | + """Auto-determine sensor datafile name (unless args.file is set)""" | |
159 | + if isempty(args.file): | |
160 | + search_pat = dev_dir + ('/' if dev_dir[-1]!='/' else '') | |
161 | + search_pat+= prefix + '*' if isempty(args.serial) else '*' + args.serial | |
162 | + logger.debug('looking for sensors with search pattern %s', search_pat) | |
163 | + | |
164 | + device_folders = glob(search_pat) | |
165 | + if len(device_folders) == 1: | |
166 | + filename = device_folders[0] + suffix | |
167 | + else: | |
168 | + if len(device_folders) == 0: | |
169 | + errmsg = 'no supported temperature sensors in %s' % dev_dir | |
170 | + else: | |
171 | + serials=map(lambda x: basename(x) if x.find(prefix)<0 | |
172 | + else basename(x)[len(prefix):], device_folders) | |
173 | + errmsg = 'found multiple temperature sensors (%s), please '\ | |
174 | + 'specify which one to use' % ', '.join(serials) | |
175 | + logger.critical(errmsg) | |
176 | + raise ValueError(errmsg) | |
177 | + else: | |
178 | + filename = args.file | |
179 | + logger.debug('using temperature sensor at %s', filename) | |
180 | + return filename | |
181 | + | |
182 | + | |
183 | +def read_sensor_raw(device_file): | |
184 | + """Reads the raw data from the sensor device file, returns array of lines""" | |
185 | + with open(device_file, 'r') as f: | |
186 | + lines = f.readlines() | |
187 | + logger.debug('Temperature sensor data read from %s: %s', f.name, lines) | |
188 | + return lines | |
189 | + | |
190 | + | |
191 | +def read_temp(device_file, converter=CONVERT_CELCIUS, maxretries=10): | |
192 | + """Reads sensor data and converts it to desired unit, returns temperature""" | |
193 | + lines = read_sensor_raw(device_file) | |
194 | + tries = 1 | |
195 | + while lines[0].strip()[-3:] != 'YES' and tries <= maxretries: | |
196 | + tries += 1 | |
197 | + sleep(0.2) | |
198 | + logger.warn('Temperature sensor data not stable, reading once more') | |
199 | + lines = read_temp_raw(device_file) | |
200 | + | |
201 | + if lines[0].strip()[-3:] != 'YES': | |
202 | + errmsg = 'no stable temperature sensor data after %d tries' % tries | |
203 | + else: | |
204 | + equals_pos = lines[1].find('t=') | |
205 | + if equals_pos == -1: | |
206 | + errmsg = 'temperature sensor data format is not supported' | |
207 | + else: | |
208 | + temp_read = int(lines[1][equals_pos+2:]) | |
209 | + temp = converter[0](temp_read) | |
210 | + logger.debug('Temperature sensor value %d is %.2f%s', temp_read, | |
211 | + temp, converter[1]) | |
212 | + return temp, tries | |
213 | + | |
214 | + logger.critical(errmsg) | |
215 | + raise ValueError(errmsg) | |
216 | + | |
217 | + | |
218 | +def nagios_exit(status, message, data=None): | |
219 | + """exit 'nagios-style', print status and message followed by perf. data""" | |
220 | + if logger.isEnabledFor(logging.CRITICAL): | |
221 | + if data is not None and len(data) > 0: | |
222 | + perfdata=map(lambda (k,v): "'%s'=%s" %(k,v if not isinstance(v,list) | |
223 | + else ';'.join(map(lambda x:'' if x is None else str(x),v))) | |
224 | + ,data.iteritems()) | |
225 | + perfstr = ' | ' + ' '.join(perfdata) | |
226 | + else: | |
227 | + perfstr = '' | |
228 | + print 'Temperature %s: %s%s' % (status[0], message, perfstr) | |
229 | + exit(status[1]) | |
230 | + | |
231 | + | |
232 | +if __name__ == '__main__': | |
233 | + try: | |
234 | + args = parse_args() | |
235 | + except ArgumentParserError as e: | |
236 | + nagios_exit(NAGIOS_UNKNOWN,'error with setup: ' + e.message) | |
237 | + except (KeyboardInterrupt, EOFError) as e: | |
238 | ||
239 | + nagios_exit(NAGIOS_UNKNOWN,'initialization aborted') | |
240 | + | |
241 | + try: | |
242 | + starttime = time() | |
243 | + devicefile = get_sensor_device_filename(args) | |
244 | + temperature, tries = read_temp(devicefile, args.converter, args.retries) | |
245 | + endtime = time() | |
246 | + | |
247 | + except (KeyboardInterrupt) as e: | |
248 | + nagios_exit(NAGIOS_UNKNOWN,'temperature sensor read aborted by user') | |
249 | + | |
250 | + except (IOError, ValueError) as e: | |
251 | + nagios_exit(NAGIOS_UNKNOWN,'temperature sensor read failed: %s' % e) | |
252 | + | |
253 | + elapse = endtime-starttime | |
254 | + logger.info('Got temperature reading of %.2f degrees %s in %fs', | |
255 | + temperature, args.converter[2], elapse) | |
256 | + | |
257 | + temp_unit = args.converter[1] | |
258 | + message = 'current temperature is %.2f%s' % (temperature, temp_unit) | |
259 | + if args.critical is not None and temperature > args.critical: | |
260 | + nagiosresult = NAGIOS_CRITICAL | |
261 | + message += ', above critical threshold %.2f%s'%(args.critical,temp_unit) | |
262 | + elif args.warn is not None and temperature > args.warn: | |
263 | + nagiosresult = NAGIOS_WARN | |
264 | + message += ', above warning threshold %.2f%s' % (args.warn, temp_unit) | |
265 | + else: | |
266 | + nagiosresult = NAGIOS_OK | |
267 | + | |
268 | + nagios_exit(nagiosresult, message, { | |
269 | + 'temperature': [ temperature, args.warn, args.critical, None, None], | |
270 | + 'retries': [ tries-1, None, args.retries, 0, None ], | |
271 | + 'time': [ '%f' % elapse, None, None, 0, None] | |
272 | + }) | |
... | ... |