You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

165 lines
5.7 KiB
Bash

#!/bin/bash
nagBase="/usr/local/nagios"
nagLog="$nagBase/var/nagios.log"
archives="$nagBase/var/archives"
# [fromTime] aSource aType: host;service;state;severity;num;text
#fromTime=`date -d "" +"%s"`
files="$nagLog"
tailMode=""
fromTime=""
toTime=""
noTime=""
host=""
service=""
state=""
severity=""
raw=""
num=""
# These two need to be regexp wildcards to match everything when nothing is specified
aSource="(HOST|SERVICE)"
aType="(ALERT|NOTIFICATION|EVENT HANDLER)"
verbose=""
do_debug() {
[ -n "$verbose" ] && echo "### DEBUG: $*"
}
print_help() {
cat << HELP_EOF
Usage:
-d (debug mode)
-h|--host <host>
-s|--service <service>
--current show CURRENT HOST|SERVICE STATE information
--state <warning type (OK, WARNING, CRITICAL, UNKNOWN)> no default
--ok|--warn|--warning|--crit|--critical only print things that match
--up|--down|--un only print things that match (un=unreachable)
--hard|--sort only print things that match
--soft only print soft stuff
--sev|--severity <type (HARD, SOFT)>, default=all
-Q does a quick version of HARD CRITICAL
-n <#> selects the alert number (of max_retries), no default
--type <alert type (ALERT, EVENT, NOTIFICATION)>, default=all
--event set alert type to EVENT HANDLER
--src|--source <alert source (HOST, SERVICE)>, default=all
--from <from time>, default=today at midnight
--to <to time>, default=now
--notime don't convert timestamp to human time
-c|--constant) the equivalent of a tail -f on the Nagios log file
--file <file[s]>) scan <file[s]> instead of $files; use "..." if wildcards
--raw Just print the raw file but convert time stamps to human readable form
-v turn on verbose/debug mode
All input is evaluated as a case-insensitive regexp surrounded by wildcards.
Time values can be in the following formats:
HH:MM[:SS] YYYY-MM-DD YYYY-MM-DD HH:MM[:SS]*
This program does not directly scan log files other than the current one. If you want
to scan older files, use the --file option. Note that this can be combined with the
--from and --to options, but keep in mind that file names are named based on the date they
were rotated, not the date of their contents.
Some options set others, so options earlier on the command line can be overwritten by later ones
*Due to the awk pattern matching, dates before 2020-09-09 will produce empty output.
HELP_EOF
exit;
}
while [ -n "$1" ]; do
case "$1" in
--help) print_help;;
--file) files="$2"; shift 2;;
-h|--host) host="$2"; shift 2;;
-s|--service) service="$2"; shift 2;;
--state) state="$2"; shift 2;;
--ok) state="OK"; shift 1;;
--up) state="UP"; shift ;;
--down) state="DOWN"; shift 1;;
--un) state="UNREACHABLE"; shift 1;;
--current) aSource="(HOST|SERVICE)"; aType="STATE"; shift 1;;
--warn|--warning) state="WARNING"; shift 1;;
--crit|--critical) state="CRITICAL"; shift 1;;
--hard) severity="HARD"; shift 1;;
--soft) severity="SOFT"; shift 1;;
--raw) raw="true"; shift 1;;
--sev|--severity) severity="$2"; shift 2;;
-Q) severity="HARD"; state="CRITICAL"; shift 1;;
-n|--num) num="$2"; shift 2;;
--type) aType="$2"; shift 2;;
--event) aType="EVENT HANDLER"; shift 1;;
--src|--source) aSource="$2"; shift 2;;
--from) fromTime="$2"; shift 2;;
--to) toTime="$2"; shift 2;;
--notime) noTime="true"; shift 1;;
-c|--constant) tailMode="--follow=name"; shift 1;;
-v) verbose="true"; shift 1;;
*) shift 1;;
esac
done
# $1 $2 $3 $4 $5 $6
# [fromTime] aSource aType: host;service;state;severity;num;text
# $1 $2 $3 $4 $5 $NF
# [1723708041] SERVICE ALERT: localhost; URL Status: frommyhive.com; CRITICAL; SOFT; 1; CRITICAL - Socket timeout
# [1723722735] HOST ALERT: Security Cameras; DOWN; SOFT; 1; CRITICAL - 192.168.1.88: rta nan, lost 100%
# (NF-3)(NF-2)(NF-1)
# [1723694400] CURRENT HOST STATE: localhost; UP;HARD;1;OK - 127.0.0.1 rta 0.067ms lost 0%
# (HOST|SERVICE) (DOWNTIME|FLAPPING)? (ALERT|NOTIFICATION)
timeSel=""
if [ -n "$fromTime" ]; then
fromTimeUnix=$(date -d "$fromTime" +"%s")
timeSel="&& substr (\$1, 2, 10) >= $fromTimeUnix"
fi
if [ -n "$toTime" ]; then
toTimeUnix=$(date -d "$toTime" +"%s")
timeSel="$timeSel && substr (\$1, 2, 10) <= $toTimeUnix"
fi
timeSub=""
[ -z "$noTime" ] && timeSub="&& sub (/^\[[0-9]{10}]/, strftime (\"%Y-%m-%d %H:%M:%S\", substr (\$1, 2, 10)), \$1)"
awkString="/^\[[0-9]{10}] (GLOBAL |CURRENT )?$aSource[^ ]* (DOWNTIME |FLAPPING )?$aType[^:]*: .*$host.*$service/"
[ -n "$raw" ] && awkString="/./"
do_debug "host=$host"
do_debug "service=$service"
do_debug "awk=$awkString"
do_debug "state=$state"
do_debug "severity=$severity"
do_debug "num=$num"
# $(NF) = the text of the output
# $(NF-1) = check number (of max_retries)
# $(NF-2) = HARD/SOFT
# $(NF-3) = UP/DOWN/UNREACHABLE/OK/WARNING/CRITICAL/etc
if [ -n "$raw" ]; then
tail -n +1 $tailMode $files | gawk -F\; "\
$awkString \
$timeSel \
$timeSub \
{
print
}"
else
tail -n +1 $tailMode $files | gawk -F\; "\
BEGIN {IGNORECASE=1} \
$awkString \
&& \$(NF-3)~/$state/ \
&& \$(NF-2)~/$severity/ \
&& \$(NF-1)~/$num/ \
$timeSel \
$timeSub \
{
sub(/OK/, \"\x1b[32mOK\x1b[0m\", \$(NF-3))
sub(/WARNING/, \"\x1b[31mWARNING\x1b[0m\", \$(NF-3))
sub(/CRITICAL/, \"\x1b[33mCRITICAL\x1b[0m\", \$(NF-3))
sub(/UP/, \"\x1b[32mUP\x1b[0m\", \$(NF-3))
sub(/DOWN/, \"\x1b[33mDOWN\x1b[0m\", \$(NF-3))
sub(/UNREACHABLE/, \"\x1b[31mUNREACHABLE\x1b[0m\", \$(NF-3))
sub(/HARD/, \"\x1b[35mHARD\x1b[0m\", \$(NF-2))
sub(/SOFT/, \"\x1b[36mSOFT\x1b[0m\", \$(NF-2))
printf \"%s: %s;%s;%s;%s;%s\n\", \$1, \$2, \$3, \$4, \$5, \$6
}"
fi