Commit a1eeb872bfc97d8d856c164b2f79e6a7a83d47c6

Authored by Frederik Lindenaar
1 parent 4a52b302

added check_temperature plugin to monitor temperatures on a RaspberryPi

README.md
... ... @@ -24,6 +24,8 @@ This repository contains the following scripts:
24 24 monitor multi-home and dual-stack (i.e. ipv4 and ipv6) servers.
25 25 * [check_otp](#check_otp)
26 26 plugin to monitor PrivacyIDEA (and LinOTP) OTP validation
  27 + * [check_temperature](#check_temperature)
  28 + plugin to monitor the temperature of a 1-wire sensor on a RaspberryPi
27 29 * [nagiosstatus](#nagiosstatus)
28 30 CGI-BIN script to report the status of nagios (to monitor nagios itself)
29 31  
... ... @@ -233,6 +235,63 @@ define service {
233 235 ~~~
234 236  
235 237  
  238 +<a name=check_temperature>plugins/check_temperature</a>
  239 +-------------------------------------------------------
  240 +Plugin (check) to monitor monitor the temperature using a sensor connected to a
  241 +RaspberryPi. This implementation is specifically for the DS18B20 1-wire
  242 +temperature sensor. Other methods and interfaces can be plugged in easily (just
  243 +raise a request or provide a patch). For information on how to connect sensor
  244 +to the RaspberryPi and to get it working please see [this Adafruit tutorial](
  245 +https://learn.adafruit.com/adafruits-raspberry-pi-lesson-11-ds18b20-temperature-sensing).
  246 +
  247 +To enable the 1-wire interface support on the RaspberryPi one can use the
  248 +command:
  249 +~~~
  250 + sudo raspi-config nonint do_onewire 0
  251 +~~~
  252 +or use `raspi-config` in interactive mode (9. Advanced Options --> A9. 1-Wire).
  253 +Please note that changing this requires a reboot.
  254 +
  255 +Installation for is straightforward, after installing the script on the server
  256 +add the following to your Nagios `commands.cmd` configuration file:
  257 +
  258 +~~~
  259 +# 'check_temperature' command definition to monitor a single temperature in C
  260 +# parameters: warning (ARG1) and critical (ARG2) temperature in Celcius
  261 +define command {
  262 + command_name check_temperature
  263 + command_line [install_path]/plugins/check_temperature -w $ARG1$ -c $ARG2$
  264 +}
  265 +
  266 +# 'check_ftemperature' command definition to monitor a single temperature in F
  267 +# parameters: warning (ARG1) and critical (ARG2) temperature in Farenheit
  268 +define command {
  269 + command_name check_ftemperature
  270 + command_line [install_path]/plugins/check_temperature -F -w $ARG1$ -c $ARG2$
  271 +}
  272 +
  273 +# 'check_temperature_sensor' command definition to monitor a single temperature in C
  274 +# parameters: sensor serial (ARG1), warning (ARG2) and critical (ARG3) temperature in Celcius
  275 +define command {
  276 + command_name check_temperature_sensor
  277 + command_line [install_path]/plugins/check_temperature -s $ARG1$ -w $ARG2$ -c $ARG3$
  278 +}
  279 +
  280 +# 'check_ftemperature_sensor' command definition to monitor a single temperature in F
  281 +# parameters: sensor serial (ARG1), warning (ARG2) and critical (ARG3) temperature in Farenheit
  282 +define command {
  283 + command_name check_ftemperature_sensor
  284 + command_line [install_path]/plugins/check_temperature -F -s $ARG1$ -w $ARG2$ -c $ARG3$
  285 +}
  286 +
  287 +~~~
  288 +
  289 +Make sure to replace `[install_path]/plugins` with the location of the script.
  290 +
  291 +Please run `check_temperature -h` after installation for an overview of the
  292 +available command line options (e.g. to enable logging to a file).
  293 +
  294 +
236 295 <a name=nagiosstatus>cgi-bin/nagiosstatus.sh</a>
237 296 ------------------------------------------------
238 297 Very simplistic CGI-BIN script that checkes whether nagios is still running and
... ...
plugins/check_temperature 0 โ†’ 100755
  1 +#! /usr/bin/env python
  2 +#
  3 +# check_temperature - Nagios temperature check for DS18B20 sensor on RaspberryPi
  4 +#
  5 +# Version 1.0, latest version, documentation and bugtracker available at:
  6 +# https://gitlab.lindenaar.net/scripts/nagios-plugins
  7 +#
  8 +# Copyright (c) 2016 Frederik Lindenaar
  9 +#
  10 +# This script is free software: you can redistribute and/or modify it under the
  11 +# terms of version 3 of the GNU General Public License as published by the Free
  12 +# Software Foundation, or (at your option) any later version of the license.
  13 +#
  14 +# This script is distributed in the hope that it will be useful but WITHOUT ANY
  15 +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  16 +# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  17 +#
  18 +# You should have received a copy of the GNU General Public License along with
  19 +# this program. If not, visit <http://www.gnu.org/licenses/> to download it.
  20 +
  21 +from sys import exit
  22 +from os.path import basename, splitext
  23 +from glob import glob
  24 +from time import time, sleep
  25 +from argparse import ArgumentParser as StandardArgumentParser, FileType, \
  26 + _StoreAction as StoreAction, _StoreConstAction as StoreConstAction
  27 +import logging
  28 +
  29 +# Constants (no need to change but allows for easy customization)
  30 +VERSION="1.0"
  31 +PROG_NAME=splitext(basename(__file__))[0]
  32 +PROG_VERSION=PROG_NAME + ' ' + VERSION
  33 +SENSOR_SCALE=1000
  34 +SENSOR_DEV_DIR = '/sys/bus/w1/devices/'
  35 +SENSOR_DEV_PREFIX = '28-'
  36 +SENSOR_DEV_SUFFIX = '/w1_slave'
  37 +SENSOR_READ_RETRIES=10
  38 +
  39 +LOG_FORMAT='%(levelname)s - %(message)s'
  40 +LOG_FORMAT_FILE='%(asctime)s - ' + LOG_FORMAT
  41 +LOGGING_NONE=logging.CRITICAL + 10
  42 +NAGIOS_OK = ( 'OK', 0)
  43 +NAGIOS_WARN = ( 'WARNING', 1)
  44 +NAGIOS_CRITICAL = ( 'CRITICAL', 2 )
  45 +NAGIOS_UNKNOWN = ( 'UNKNOWN', 3 )
  46 +
  47 +# Setup logging
  48 +logging.basicConfig(format=LOG_FORMAT)
  49 +logging.addLevelName(LOGGING_NONE, 'NONE')
  50 +logger = logging.getLogger(PROG_NAME)
  51 +logger.setLevel(logging.CRITICAL)
  52 +
  53 +################[ wrapper to stop ArgumentParser from exiting ]################
  54 +# based on http://stackoverflow.com/questions/14728376/i-want-python-argparse-to-throw-an-exception-rather-than-usage/14728477#14728477
  55 +# the only way to do this is overriding the error method and throw and Exception
  56 +class ArgumentParserError(Exception): pass
  57 +
  58 +class ArgumentParser(StandardArgumentParser):
  59 + """ArgumentParser not exiting with non-Nagios format message upon errors"""
  60 + def error(self, message):
  61 + raise ArgumentParserError(message)
  62 +
  63 +##################[ Action to immediately set the log level ]##################
  64 +class SetLogLevel(StoreConstAction):
  65 + """ArgumentParser action to set log level to provided const value"""
  66 + def __call__(self, parser, namespace, values, option_string=None):
  67 + logging.getLogger(PROG_NAME).setLevel(self.const)
  68 +
  69 +####################[ Action to immediately log to a file ]####################
  70 +class SetLogFile(StoreAction):
  71 + """ArgumentParser action to log to file (sets up FileHandler accordingly)"""
  72 + def __call__(self, parser, namespace, values, option_string=None):
  73 + super(SetLogFile, self).__call__(parser,namespace,values,option_string)
  74 + formatter = logging.Formatter(LOG_FORMAT_FILE)
  75 + handler = logging.FileHandler(values)
  76 + handler.setFormatter(formatter)
  77 + logger = logging.getLogger(PROG_NAME)
  78 + logger.propagate = False
  79 + logger.addHandler(handler)
  80 +
  81 +###############################################################################
  82 +
  83 +def convert_celcius(temp_read):
  84 + """Converts raw temperature sensore value to degrees Celcius"""
  85 + return float(temp_read) / float(SENSOR_SCALE)
  86 +CONVERT_CELCIUS = ( convert_celcius, 'C', 'Celcius' )
  87 +
  88 +
  89 +def convert_farenheit(temp_read):
  90 + """Converts raw temperature sensore value to degrees Farenheit"""
  91 + return float(temp_read * 9) / float(5 * SENSOR_SCALE) + 32.0
  92 +CONVERT_FARENHEIT = ( convert_farenheit, 'F', 'Farenheit' )
  93 +
  94 +
  95 +def isempty(string):
  96 + """Checks whether string 'str' provided is unset or empty"""
  97 + return string is None or len(string) == 0
  98 +
  99 +
  100 +def parse_args():
  101 + """Parse command line and get parameters from environment, if present"""
  102 +
  103 + # Setup argument parser, the workhorse gluing it all together
  104 + parser = ArgumentParser(
  105 + epilog='(*) by default the script will look for the first device that '
  106 + 'matches %s* in %s, if multiple entries are found -s or -f must '
  107 + 'be used to specify which sensor to read.' %
  108 + (SENSOR_DEV_PREFIX, SENSOR_DEV_DIR),
  109 + description='Nagios check plugin for 1-wire temp. sensor on RaspberryPi'
  110 + )
  111 + parser.add_argument('-V', '--version',action="version",version=PROG_VERSION)
  112 +
  113 + pgroup = parser.add_mutually_exclusive_group(required=False)
  114 + pgroup.add_argument('-C', '--celcius', action='store_const',
  115 + dest='converter', const=CONVERT_CELCIUS,
  116 + help='measure, critical and warn values in Celcius '
  117 + '(default)', default=CONVERT_CELCIUS)
  118 + pgroup.add_argument('-F', '--farenheit',action='store_const',
  119 + dest='converter', const=CONVERT_FARENHEIT,
  120 + help='measure, critical and warn values in Farenheit')
  121 +
  122 + parser.add_argument('-w', '--warn', type=float,
  123 + help='temperature for warning status')
  124 + parser.add_argument('-c','--critical', type=float,
  125 + help='temperature for critical status')
  126 +
  127 + parser.add_argument('-r', '--retries', type=int,default=SENSOR_READ_RETRIES,
  128 + help='number of times to retry reading sensor data when'
  129 + ' unstable (defaults to %d)' % SENSOR_READ_RETRIES)
  130 +
  131 + pgroup = parser.add_mutually_exclusive_group(required=False)
  132 + pgroup.add_argument('-s', '--serial',
  133 + help='(unique part of) temperature sensor serial (*)')
  134 + pgroup.add_argument('-f', '--file',
  135 + help='input file (or device) to obtain data from (*)')
  136 +
  137 + pgroup = parser.add_mutually_exclusive_group(required=False)
  138 + pgroup.add_argument('-q', '--quiet', default=logging.CRITICAL,
  139 + action=SetLogLevel, const=LOGGING_NONE,
  140 + help='quiet (no output, only exit with exit code)')
  141 + pgroup.add_argument('-v', '--verbose', help='more verbose output',
  142 + action=SetLogLevel, const=logging.INFO)
  143 + pgroup.add_argument('-d', '--debug', help='debug output (more verbose)',
  144 + action=SetLogLevel, const=logging.DEBUG)
  145 +
  146 + parser.add_argument('-l', '--logfile', action=SetLogFile,
  147 + help='send logging output to logfile')
  148 +
  149 + # parse arguments and post-process command line options
  150 + args = parser.parse_args()
  151 +
  152 + # if we got here all seems OK
  153 + return args
  154 +
  155 +
  156 +def get_sensor_device_filename(args, dev_dir=SENSOR_DEV_DIR,
  157 + prefix=SENSOR_DEV_PREFIX, suffix=SENSOR_DEV_SUFFIX):
  158 + """Auto-determine sensor datafile name (unless args.file is set)"""
  159 + if isempty(args.file):
  160 + search_pat = dev_dir + ('/' if dev_dir[-1]!='/' else '')
  161 + search_pat+= prefix + '*' if isempty(args.serial) else '*' + args.serial
  162 + logger.debug('looking for sensors with search pattern %s', search_pat)
  163 +
  164 + device_folders = glob(search_pat)
  165 + if len(device_folders) == 1:
  166 + filename = device_folders[0] + suffix
  167 + else:
  168 + if len(device_folders) == 0:
  169 + errmsg = 'no supported temperature sensors in %s' % dev_dir
  170 + else:
  171 + serials=map(lambda x: basename(x) if x.find(prefix)<0
  172 + else basename(x)[len(prefix):], device_folders)
  173 + errmsg = 'found multiple temperature sensors (%s), please '\
  174 + 'specify which one to use' % ', '.join(serials)
  175 + logger.critical(errmsg)
  176 + raise ValueError(errmsg)
  177 + else:
  178 + filename = args.file
  179 + logger.debug('using temperature sensor at %s', filename)
  180 + return filename
  181 +
  182 +
  183 +def read_sensor_raw(device_file):
  184 + """Reads the raw data from the sensor device file, returns array of lines"""
  185 + with open(device_file, 'r') as f:
  186 + lines = f.readlines()
  187 + logger.debug('Temperature sensor data read from %s: %s', f.name, lines)
  188 + return lines
  189 +
  190 +
  191 +def read_temp(device_file, converter=CONVERT_CELCIUS, maxretries=10):
  192 + """Reads sensor data and converts it to desired unit, returns temperature"""
  193 + lines = read_sensor_raw(device_file)
  194 + tries = 1
  195 + while lines[0].strip()[-3:] != 'YES' and tries <= maxretries:
  196 + tries += 1
  197 + sleep(0.2)
  198 + logger.warn('Temperature sensor data not stable, reading once more')
  199 + lines = read_temp_raw(device_file)
  200 +
  201 + if lines[0].strip()[-3:] != 'YES':
  202 + errmsg = 'no stable temperature sensor data after %d tries' % tries
  203 + else:
  204 + equals_pos = lines[1].find('t=')
  205 + if equals_pos == -1:
  206 + errmsg = 'temperature sensor data format is not supported'
  207 + else:
  208 + temp_read = int(lines[1][equals_pos+2:])
  209 + temp = converter[0](temp_read)
  210 + logger.debug('Temperature sensor value %d is %.2f%s', temp_read,
  211 + temp, converter[1])
  212 + return temp, tries
  213 +
  214 + logger.critical(errmsg)
  215 + raise ValueError(errmsg)
  216 +
  217 +
  218 +def nagios_exit(status, message, data=None):
  219 + """exit 'nagios-style', print status and message followed by perf. data"""
  220 + if logger.isEnabledFor(logging.CRITICAL):
  221 + if data is not None and len(data) > 0:
  222 + perfdata=map(lambda (k,v): "'%s'=%s" %(k,v if not isinstance(v,list)
  223 + else ';'.join(map(lambda x:'' if x is None else str(x),v)))
  224 + ,data.iteritems())
  225 + perfstr = ' | ' + ' '.join(perfdata)
  226 + else:
  227 + perfstr = ''
  228 + print 'Temperature %s: %s%s' % (status[0], message, perfstr)
  229 + exit(status[1])
  230 +
  231 +
  232 +if __name__ == '__main__':
  233 + try:
  234 + args = parse_args()
  235 + except ArgumentParserError as e:
  236 + nagios_exit(NAGIOS_UNKNOWN,'error with setup: ' + e.message)
  237 + except (KeyboardInterrupt, EOFError) as e:
  238 + print
  239 + nagios_exit(NAGIOS_UNKNOWN,'initialization aborted')
  240 +
  241 + try:
  242 + starttime = time()
  243 + devicefile = get_sensor_device_filename(args)
  244 + temperature, tries = read_temp(devicefile, args.converter, args.retries)
  245 + endtime = time()
  246 +
  247 + except (KeyboardInterrupt) as e:
  248 + nagios_exit(NAGIOS_UNKNOWN,'temperature sensor read aborted by user')
  249 +
  250 + except (IOError, ValueError) as e:
  251 + nagios_exit(NAGIOS_UNKNOWN,'temperature sensor read failed: %s' % e)
  252 +
  253 + elapse = endtime-starttime
  254 + logger.info('Got temperature reading of %.2f degrees %s in %fs',
  255 + temperature, args.converter[2], elapse)
  256 +
  257 + temp_unit = args.converter[1]
  258 + message = 'current temperature is %.2f%s' % (temperature, temp_unit)
  259 + if args.critical is not None and temperature > args.critical:
  260 + nagiosresult = NAGIOS_CRITICAL
  261 + message += ', above critical threshold %.2f%s'%(args.critical,temp_unit)
  262 + elif args.warn is not None and temperature > args.warn:
  263 + nagiosresult = NAGIOS_WARN
  264 + message += ', above warning threshold %.2f%s' % (args.warn, temp_unit)
  265 + else:
  266 + nagiosresult = NAGIOS_OK
  267 +
  268 + nagios_exit(nagiosresult, message, {
  269 + 'temperature': [ temperature, args.warn, args.critical, None, None],
  270 + 'retries': [ tries-1, None, args.retries, 0, None ],
  271 + 'time': [ '%f' % elapse, None, None, 0, None]
  272 + })
... ...