From a1eeb872bfc97d8d856c164b2f79e6a7a83d47c6 Mon Sep 17 00:00:00 2001 From: Frederik Lindenaar <frederik@lindenaar.nl> Date: Mon, 17 Oct 2016 18:44:33 +0200 Subject: [PATCH] added check_temperature plugin to monitor temperatures on a RaspberryPi --- README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ plugins/check_temperature | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 331 insertions(+), 0 deletions(-) create mode 100755 plugins/check_temperature diff --git a/README.md b/README.md index 0f5e5c8..46e93a9 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ This repository contains the following scripts: monitor multi-home and dual-stack (i.e. ipv4 and ipv6) servers. * [check_otp](#check_otp) plugin to monitor PrivacyIDEA (and LinOTP) OTP validation + * [check_temperature](#check_temperature) + plugin to monitor the temperature of a 1-wire sensor on a RaspberryPi * [nagiosstatus](#nagiosstatus) CGI-BIN script to report the status of nagios (to monitor nagios itself) @@ -233,6 +235,63 @@ define service { ~~~ +<a name=check_temperature>plugins/check_temperature</a> +------------------------------------------------------- +Plugin (check) to monitor monitor the temperature using a sensor connected to a +RaspberryPi. This implementation is specifically for the DS18B20 1-wire +temperature sensor. Other methods and interfaces can be plugged in easily (just +raise a request or provide a patch). For information on how to connect sensor +to the RaspberryPi and to get it working please see [this Adafruit tutorial]( +https://learn.adafruit.com/adafruits-raspberry-pi-lesson-11-ds18b20-temperature-sensing). + +To enable the 1-wire interface support on the RaspberryPi one can use the +command: +~~~ + sudo raspi-config nonint do_onewire 0 +~~~ +or use `raspi-config` in interactive mode (9. Advanced Options --> A9. 1-Wire). +Please note that changing this requires a reboot. + +Installation for is straightforward, after installing the script on the server +add the following to your Nagios `commands.cmd` configuration file: + +~~~ +# 'check_temperature' command definition to monitor a single temperature in C +# parameters: warning (ARG1) and critical (ARG2) temperature in Celcius +define command { + command_name check_temperature + command_line [install_path]/plugins/check_temperature -w $ARG1$ -c $ARG2$ +} + +# 'check_ftemperature' command definition to monitor a single temperature in F +# parameters: warning (ARG1) and critical (ARG2) temperature in Farenheit +define command { + command_name check_ftemperature + command_line [install_path]/plugins/check_temperature -F -w $ARG1$ -c $ARG2$ +} + +# 'check_temperature_sensor' command definition to monitor a single temperature in C +# parameters: sensor serial (ARG1), warning (ARG2) and critical (ARG3) temperature in Celcius +define command { + command_name check_temperature_sensor + command_line [install_path]/plugins/check_temperature -s $ARG1$ -w $ARG2$ -c $ARG3$ +} + +# 'check_ftemperature_sensor' command definition to monitor a single temperature in F +# parameters: sensor serial (ARG1), warning (ARG2) and critical (ARG3) temperature in Farenheit +define command { + command_name check_ftemperature_sensor + command_line [install_path]/plugins/check_temperature -F -s $ARG1$ -w $ARG2$ -c $ARG3$ +} + +~~~ + +Make sure to replace `[install_path]/plugins` with the location of the script. + +Please run `check_temperature -h` after installation for an overview of the +available command line options (e.g. to enable logging to a file). + + <a name=nagiosstatus>cgi-bin/nagiosstatus.sh</a> ------------------------------------------------ Very simplistic CGI-BIN script that checkes whether nagios is still running and diff --git a/plugins/check_temperature b/plugins/check_temperature new file mode 100755 index 0000000..6d87bba --- /dev/null +++ b/plugins/check_temperature @@ -0,0 +1,272 @@ +#! /usr/bin/env python +# +# check_temperature - Nagios temperature check for DS18B20 sensor on RaspberryPi +# +# Version 1.0, latest version, documentation and bugtracker available at: +# https://gitlab.lindenaar.net/scripts/nagios-plugins +# +# Copyright (c) 2016 Frederik Lindenaar +# +# This script is free software: you can redistribute and/or modify it under the +# terms of version 3 of the GNU General Public License as published by the Free +# Software Foundation, or (at your option) any later version of the license. +# +# This script is distributed in the hope that it will be useful but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along with +# this program. If not, visit <http://www.gnu.org/licenses/> to download it. + +from sys import exit +from os.path import basename, splitext +from glob import glob +from time import time, sleep +from argparse import ArgumentParser as StandardArgumentParser, FileType, \ + _StoreAction as StoreAction, _StoreConstAction as StoreConstAction +import logging + +# Constants (no need to change but allows for easy customization) +VERSION="1.0" +PROG_NAME=splitext(basename(__file__))[0] +PROG_VERSION=PROG_NAME + ' ' + VERSION +SENSOR_SCALE=1000 +SENSOR_DEV_DIR = '/sys/bus/w1/devices/' +SENSOR_DEV_PREFIX = '28-' +SENSOR_DEV_SUFFIX = '/w1_slave' +SENSOR_READ_RETRIES=10 + +LOG_FORMAT='%(levelname)s - %(message)s' +LOG_FORMAT_FILE='%(asctime)s - ' + LOG_FORMAT +LOGGING_NONE=logging.CRITICAL + 10 +NAGIOS_OK = ( 'OK', 0) +NAGIOS_WARN = ( 'WARNING', 1) +NAGIOS_CRITICAL = ( 'CRITICAL', 2 ) +NAGIOS_UNKNOWN = ( 'UNKNOWN', 3 ) + +# Setup logging +logging.basicConfig(format=LOG_FORMAT) +logging.addLevelName(LOGGING_NONE, 'NONE') +logger = logging.getLogger(PROG_NAME) +logger.setLevel(logging.CRITICAL) + +################[ wrapper to stop ArgumentParser from exiting ]################ +# based on http://stackoverflow.com/questions/14728376/i-want-python-argparse-to-throw-an-exception-rather-than-usage/14728477#14728477 +# the only way to do this is overriding the error method and throw and Exception +class ArgumentParserError(Exception): pass + +class ArgumentParser(StandardArgumentParser): + """ArgumentParser not exiting with non-Nagios format message upon errors""" + def error(self, message): + raise ArgumentParserError(message) + +##################[ Action to immediately set the log level ]################## +class SetLogLevel(StoreConstAction): + """ArgumentParser action to set log level to provided const value""" + def __call__(self, parser, namespace, values, option_string=None): + logging.getLogger(PROG_NAME).setLevel(self.const) + +####################[ Action to immediately log to a file ]#################### +class SetLogFile(StoreAction): + """ArgumentParser action to log to file (sets up FileHandler accordingly)""" + def __call__(self, parser, namespace, values, option_string=None): + super(SetLogFile, self).__call__(parser,namespace,values,option_string) + formatter = logging.Formatter(LOG_FORMAT_FILE) + handler = logging.FileHandler(values) + handler.setFormatter(formatter) + logger = logging.getLogger(PROG_NAME) + logger.propagate = False + logger.addHandler(handler) + +############################################################################### + +def convert_celcius(temp_read): + """Converts raw temperature sensore value to degrees Celcius""" + return float(temp_read) / float(SENSOR_SCALE) +CONVERT_CELCIUS = ( convert_celcius, 'C', 'Celcius' ) + + +def convert_farenheit(temp_read): + """Converts raw temperature sensore value to degrees Farenheit""" + return float(temp_read * 9) / float(5 * SENSOR_SCALE) + 32.0 +CONVERT_FARENHEIT = ( convert_farenheit, 'F', 'Farenheit' ) + + +def isempty(string): + """Checks whether string 'str' provided is unset or empty""" + return string is None or len(string) == 0 + + +def parse_args(): + """Parse command line and get parameters from environment, if present""" + + # Setup argument parser, the workhorse gluing it all together + parser = ArgumentParser( + epilog='(*) by default the script will look for the first device that ' + 'matches %s* in %s, if multiple entries are found -s or -f must ' + 'be used to specify which sensor to read.' % + (SENSOR_DEV_PREFIX, SENSOR_DEV_DIR), + description='Nagios check plugin for 1-wire temp. sensor on RaspberryPi' + ) + parser.add_argument('-V', '--version',action="version",version=PROG_VERSION) + + pgroup = parser.add_mutually_exclusive_group(required=False) + pgroup.add_argument('-C', '--celcius', action='store_const', + dest='converter', const=CONVERT_CELCIUS, + help='measure, critical and warn values in Celcius ' + '(default)', default=CONVERT_CELCIUS) + pgroup.add_argument('-F', '--farenheit',action='store_const', + dest='converter', const=CONVERT_FARENHEIT, + help='measure, critical and warn values in Farenheit') + + parser.add_argument('-w', '--warn', type=float, + help='temperature for warning status') + parser.add_argument('-c','--critical', type=float, + help='temperature for critical status') + + parser.add_argument('-r', '--retries', type=int,default=SENSOR_READ_RETRIES, + help='number of times to retry reading sensor data when' + ' unstable (defaults to %d)' % SENSOR_READ_RETRIES) + + pgroup = parser.add_mutually_exclusive_group(required=False) + pgroup.add_argument('-s', '--serial', + help='(unique part of) temperature sensor serial (*)') + pgroup.add_argument('-f', '--file', + help='input file (or device) to obtain data from (*)') + + pgroup = parser.add_mutually_exclusive_group(required=False) + pgroup.add_argument('-q', '--quiet', default=logging.CRITICAL, + action=SetLogLevel, const=LOGGING_NONE, + help='quiet (no output, only exit with exit code)') + pgroup.add_argument('-v', '--verbose', help='more verbose output', + action=SetLogLevel, const=logging.INFO) + pgroup.add_argument('-d', '--debug', help='debug output (more verbose)', + action=SetLogLevel, const=logging.DEBUG) + + parser.add_argument('-l', '--logfile', action=SetLogFile, + help='send logging output to logfile') + + # parse arguments and post-process command line options + args = parser.parse_args() + + # if we got here all seems OK + return args + + +def get_sensor_device_filename(args, dev_dir=SENSOR_DEV_DIR, + prefix=SENSOR_DEV_PREFIX, suffix=SENSOR_DEV_SUFFIX): + """Auto-determine sensor datafile name (unless args.file is set)""" + if isempty(args.file): + search_pat = dev_dir + ('/' if dev_dir[-1]!='/' else '') + search_pat+= prefix + '*' if isempty(args.serial) else '*' + args.serial + logger.debug('looking for sensors with search pattern %s', search_pat) + + device_folders = glob(search_pat) + if len(device_folders) == 1: + filename = device_folders[0] + suffix + else: + if len(device_folders) == 0: + errmsg = 'no supported temperature sensors in %s' % dev_dir + else: + serials=map(lambda x: basename(x) if x.find(prefix)<0 + else basename(x)[len(prefix):], device_folders) + errmsg = 'found multiple temperature sensors (%s), please '\ + 'specify which one to use' % ', '.join(serials) + logger.critical(errmsg) + raise ValueError(errmsg) + else: + filename = args.file + logger.debug('using temperature sensor at %s', filename) + return filename + + +def read_sensor_raw(device_file): + """Reads the raw data from the sensor device file, returns array of lines""" + with open(device_file, 'r') as f: + lines = f.readlines() + logger.debug('Temperature sensor data read from %s: %s', f.name, lines) + return lines + + +def read_temp(device_file, converter=CONVERT_CELCIUS, maxretries=10): + """Reads sensor data and converts it to desired unit, returns temperature""" + lines = read_sensor_raw(device_file) + tries = 1 + while lines[0].strip()[-3:] != 'YES' and tries <= maxretries: + tries += 1 + sleep(0.2) + logger.warn('Temperature sensor data not stable, reading once more') + lines = read_temp_raw(device_file) + + if lines[0].strip()[-3:] != 'YES': + errmsg = 'no stable temperature sensor data after %d tries' % tries + else: + equals_pos = lines[1].find('t=') + if equals_pos == -1: + errmsg = 'temperature sensor data format is not supported' + else: + temp_read = int(lines[1][equals_pos+2:]) + temp = converter[0](temp_read) + logger.debug('Temperature sensor value %d is %.2f%s', temp_read, + temp, converter[1]) + return temp, tries + + logger.critical(errmsg) + raise ValueError(errmsg) + + +def nagios_exit(status, message, data=None): + """exit 'nagios-style', print status and message followed by perf. data""" + if logger.isEnabledFor(logging.CRITICAL): + if data is not None and len(data) > 0: + perfdata=map(lambda (k,v): "'%s'=%s" %(k,v if not isinstance(v,list) + else ';'.join(map(lambda x:'' if x is None else str(x),v))) + ,data.iteritems()) + perfstr = ' | ' + ' '.join(perfdata) + else: + perfstr = '' + print 'Temperature %s: %s%s' % (status[0], message, perfstr) + exit(status[1]) + + +if __name__ == '__main__': + try: + args = parse_args() + except ArgumentParserError as e: + nagios_exit(NAGIOS_UNKNOWN,'error with setup: ' + e.message) + except (KeyboardInterrupt, EOFError) as e: + print + nagios_exit(NAGIOS_UNKNOWN,'initialization aborted') + + try: + starttime = time() + devicefile = get_sensor_device_filename(args) + temperature, tries = read_temp(devicefile, args.converter, args.retries) + endtime = time() + + except (KeyboardInterrupt) as e: + nagios_exit(NAGIOS_UNKNOWN,'temperature sensor read aborted by user') + + except (IOError, ValueError) as e: + nagios_exit(NAGIOS_UNKNOWN,'temperature sensor read failed: %s' % e) + + elapse = endtime-starttime + logger.info('Got temperature reading of %.2f degrees %s in %fs', + temperature, args.converter[2], elapse) + + temp_unit = args.converter[1] + message = 'current temperature is %.2f%s' % (temperature, temp_unit) + if args.critical is not None and temperature > args.critical: + nagiosresult = NAGIOS_CRITICAL + message += ', above critical threshold %.2f%s'%(args.critical,temp_unit) + elif args.warn is not None and temperature > args.warn: + nagiosresult = NAGIOS_WARN + message += ', above warning threshold %.2f%s' % (args.warn, temp_unit) + else: + nagiosresult = NAGIOS_OK + + nagios_exit(nagiosresult, message, { + 'temperature': [ temperature, args.warn, args.critical, None, None], + 'retries': [ tries-1, None, args.retries, 0, None ], + 'time': [ '%f' % elapse, None, None, 0, None] + }) -- libgit2 0.22.2