You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
367 lines
8.8 KiB
Bash
367 lines
8.8 KiB
Bash
#!/bin/bash
|
|
#
|
|
# nagios_hpeilo_traps -- The script aims to collect the SNMP traps
|
|
# received from HP ProLiant Server and to
|
|
# update the corresponding status in real-time
|
|
#
|
|
# (C) Copyright [2015] Hewlett-Packard Enterprise Development Company, L.P.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of version 2 of the GNU General Public License as
|
|
# published by the Free Software Foundation.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
#
|
|
# See the GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to:
|
|
# Free Software Foundation, Inc.
|
|
# 51 Franklin Street, Fifth Floor
|
|
# Boston, MA 02110-1301, USA.
|
|
#
|
|
# Written by Adrian Huang <adrian.huang@hp.com>.
|
|
|
|
prefix=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
|
|
|
|
NagiosLibexec="${prefix}"
|
|
NagiosHpiloEngine="nagios_hpeilo_engine"
|
|
|
|
HpiloLogFile="/var/log/nagios_hpeilo_traps.log"
|
|
HpiloSnmpComm="public" # hpeilo ilo community string
|
|
HpiloServices=("System Status" "Fans" "Memory" "Network" "Power Supplies" "Processors" "Storage" "Temperatures")
|
|
|
|
SnmpTrapOid="iso.3.6.1.6.3.1.1.4.1.0"
|
|
|
|
HlthStatusChgTrap="HlthStuatsChangeTrap"
|
|
|
|
function get_hlth_service_type () {
|
|
case $1 in
|
|
603[5-9] | 6055 ) # Fan Group Trap
|
|
echo "${HpiloServices[1]}"
|
|
;;
|
|
603[0-4] | 604[8-9] | 6050 | 605[4-5] ) # Power Supply Group Trap
|
|
echo "${HpiloServices[4]}"
|
|
;;
|
|
604[0-2] ) # Temperature Group Trap
|
|
echo "${HpiloServices[7]}"
|
|
;;
|
|
602[3-4] ) # Processors Group Trap
|
|
echo "${HpiloServices[5]}"
|
|
;;
|
|
11020 ) # Health Status Array Change Trap
|
|
echo "$HlthStatusChgTrap"
|
|
;;
|
|
*)
|
|
echo "None"
|
|
;;
|
|
esac
|
|
}
|
|
|
|
# argument 1 ($1): the iLO IP which generated the trap this time
|
|
# argument 2 ($2): service type
|
|
# argument 3 ($3): service status (NAGIOS_OK or NAGIOS_WARNING or ...)
|
|
# argument 4 ($4): status information
|
|
function nagios_passive_check () {
|
|
|
|
|
|
write_log "PROCESS_SERVICE_CHECK_RESULT;$1;$2;$3;$4"
|
|
|
|
now=$((`date +%s`))
|
|
printf "[%lu] PROCESS_SERVICE_CHECK_RESULT;$1;$2;$3;$4\n" $now > $NagiosCmdFile
|
|
|
|
}
|
|
|
|
# argument 1 ($1): The data to be logged
|
|
function write_log () {
|
|
if [ "$VerboseMode" = "1" ]
|
|
then
|
|
echo -e "[`date "+%b %d %T"`] $1" >> $HpiloLogFile
|
|
fi
|
|
}
|
|
|
|
function get_nagios_cfg_file() {
|
|
local nagios_cfg=
|
|
|
|
|
|
# The format might be like:
|
|
# 1) NagiosIloCfgFile=${prefix}/etc/nagios.cfg
|
|
# 2) NAGIOSCFG="/etc/nagios3/nagios.cfg"
|
|
nagios_cfg=`grep -E "=.*etc.*nagios.cfg" /etc/init.d/nagios* | \
|
|
cut -d '=' -f 2`
|
|
|
|
# trim double quotes
|
|
nagios_cfg=`echo $nagios_cfg | tr -d '"'`
|
|
|
|
if [[ $nagios_cfg = *prefix* ]]; then
|
|
# The /etc/init.d/nagios* has the fomrat like this:
|
|
# NagiosIloCfgFile=${prefix}/etc/nagios.cfg
|
|
# So, we need to replace ${prefix} with the actual value
|
|
|
|
# Find the nagios prefix path
|
|
local nagios_prefix=`grep -i "^prefix=" /etc/init.d/nagios* | \
|
|
cut -d '=' -f 2`
|
|
|
|
# ${prefix}/etc/nagios.cfg -> will be etc/nagios.cfg
|
|
local nagios_suffix=`echo $nagios_cfg | cut -d '/' -f 2-`
|
|
|
|
nagios_cfg="${nagios_prefix}/$nagios_suffix"
|
|
fi
|
|
|
|
echo "$nagios_cfg"
|
|
|
|
}
|
|
|
|
function get_nagios_ilo_cfg_path() {
|
|
local nagios_cfg=$(get_nagios_cfg_file)
|
|
local nagios_path=`dirname $nagios_cfg`
|
|
local NagiosIloCfgFile="$nagios_path/ilo/ilo.cfg"
|
|
echo $NagiosIloCfgFile
|
|
}
|
|
|
|
# argument 1 ($1): the iLO IP which generated the trap this time
|
|
# argument 2 ($2): the iLO IP trap number
|
|
# argument 3 ($3): service type
|
|
function update_hlth_status () {
|
|
local nagios_cfg_file=$(get_nagios_cfg_file)
|
|
local oid_idx=
|
|
local nagios_service_status=
|
|
|
|
write_log "$nagios_cfg_file"
|
|
|
|
NagiosCmdFile=`grep -r command_file $nagios_cfg_file | \
|
|
awk -F = '{print $2}'`
|
|
|
|
|
|
for((idx=0;idx<${#HpiloServices[@]};idx++))
|
|
do
|
|
if [ "$3" = "${HpiloServices[$idx]}" ]; then
|
|
oid_idx=$idx
|
|
break
|
|
fi
|
|
done
|
|
|
|
# The oid_idx must be increased by 1 since the oid index of the
|
|
# NagiosHpiloEngine is started from 1 rather than 0.
|
|
write_log "$NagiosLibexec/$NagiosHpiloEngine -H $IloIP -C $HpiloSnmpComm -o $((oid_idx+1))"
|
|
output=`$NagiosLibexec/$NagiosHpiloEngine -H $IloIP -C $HpiloSnmpComm -o $((oid_idx+1))`
|
|
|
|
nagios_service_status=$?
|
|
|
|
nagios_passive_check "$1" "$3" "$nagios_service_status" "$output"
|
|
|
|
|
|
}
|
|
|
|
# argument 1 ($1): the iLO IP
|
|
# argument 2 ($2): the iLO IP RO community string
|
|
function update_hlth_status_from_status_array () {
|
|
local nagios_cfg_file=$(get_nagios_cfg_file)
|
|
local oid_idx=0
|
|
local curr_oid_idx=$3
|
|
local curr_oid_idx=$((curr_oid_idx-1))
|
|
local curr_odi_status=
|
|
local curr_oid_disc=
|
|
nagios_service_status=0
|
|
local hlth_array_count=0
|
|
write_log "$nagios_cfg_file"
|
|
|
|
NagiosCmdFile=`grep -r command_file $nagios_cfg_file | \
|
|
awk -F = '{print $2}'`
|
|
|
|
|
|
# The oid_idx must be increased by 1 since the oid index of the
|
|
# NagiosHpiloEngine is started from 1 rather than 0.
|
|
write_log "$NagiosLibexec/$NagiosHpiloEngine -H $1 -C $2 -o $((oid_idx))"
|
|
output=`$NagiosLibexec/$NagiosHpiloEngine -H $1 -C $2 -o $((oid_idx))`
|
|
nagios_service_status=$?
|
|
write_log "${HpiloServices[0]} nagios_service_status : $nagios_service_status - $output"
|
|
IFS=';' read -a hlth_status_array <<< "$output"
|
|
hlth_array_count=${#hlth_status_array[@]}
|
|
for ((xy=0;xy<${#HpiloServices[@]};xy++));do
|
|
service_key=${HpiloServices[$xy]}
|
|
case $service_key in
|
|
"System Status") service_index=0 ;;
|
|
"Fans") service_index=4 ;;
|
|
"Memory") service_index=3 ;;
|
|
"Network") service_index=14 ;;
|
|
"Power Supplies") service_index=6 ;;
|
|
"Processors") service_index=2 ;;
|
|
"Storage") service_index=9 ;;
|
|
"Temperatures") service_index=5 ;;
|
|
*) ;;
|
|
esac
|
|
|
|
write_log "hlth_array_count : $hlth_array_count"
|
|
if [ $hlth_array_count = 1 ]; then
|
|
status_key=${hlth_status_array[0]}
|
|
status_value=3
|
|
write_log "status_key : $status_key"
|
|
else
|
|
status_key=${hlth_status_array[$service_index]}
|
|
case $status_key in
|
|
"OK") status_value=0 ;;
|
|
"Degraded") status_value=1 ;;
|
|
"Failed") status_value=2 ;;
|
|
"NA" | "Other") status_value=3 ;;
|
|
*) ;;
|
|
esac
|
|
fi
|
|
|
|
if [ "$status_value" == "1" ] || [ "$status_value" == "2" ]; then
|
|
case "$xy" in
|
|
[1] | [4] | [6])
|
|
status_key=`$NagiosLibexec/$NagiosHpiloEngine -H $1 -C $2 -o $((xy+1))`
|
|
nagios_service_status=$?
|
|
;;
|
|
*)
|
|
;;
|
|
esac
|
|
elif [ "$status_value" == "3" ]; then
|
|
case "$xy" in
|
|
3)
|
|
status_key="Other: Probable Cause might be AMS service is not installed or started on host OS"
|
|
;;
|
|
4)
|
|
status_key="Other: Verify status on OA for Power Supplies of blade server"
|
|
;;
|
|
*)
|
|
;;
|
|
esac
|
|
fi
|
|
if [ $curr_oid_idx = $xy ]; then
|
|
curr_odi_status=$status_value
|
|
curr_oid_disc="$status_key"
|
|
continue
|
|
fi
|
|
nagios_passive_check "$1" "${HpiloServices[$xy]}" "$status_value" "$status_key"
|
|
sleep 0.001
|
|
done
|
|
output=$curr_oid_disc
|
|
nagios_service_status=$curr_odi_status
|
|
write_log "5. $1 ${HpiloServices[0]} $nagios_service_status $output "
|
|
|
|
}
|
|
|
|
|
|
# $1: file name
|
|
# $2: search pattern
|
|
function search_string() {
|
|
local str=
|
|
|
|
if [ -f $1 ]; then
|
|
str=`grep "$2" -A 1 $1`
|
|
fi
|
|
echo $str
|
|
}
|
|
|
|
#start here
|
|
|
|
VerboseMode=0
|
|
ActiveMode=0
|
|
|
|
|
|
# parse the arguments
|
|
while [ "x$1" != "x" ]; do
|
|
option=$1
|
|
|
|
case $option in
|
|
-H)
|
|
|
|
# Get the Host IP
|
|
shift
|
|
|
|
host_ip=$1
|
|
;;
|
|
|
|
-C)
|
|
# Get the community string
|
|
shift
|
|
comm_str=$1
|
|
|
|
;;
|
|
|
|
-o)
|
|
# Get the OID
|
|
shift
|
|
oid_id=$1
|
|
|
|
;;
|
|
|
|
-v)
|
|
# enable verbose mode
|
|
VerboseMode=1
|
|
;;
|
|
|
|
-A)
|
|
# Active mode: Get Health Status Array and Update to web page
|
|
ActiveMode=1
|
|
;;
|
|
*)
|
|
echo "ERROR: unknown option $option"
|
|
|
|
exit 1
|
|
;;
|
|
|
|
esac
|
|
shift
|
|
done
|
|
|
|
|
|
if [ "$ActiveMode" = "0" ]
|
|
then
|
|
|
|
read IloIP
|
|
read protocol
|
|
|
|
# Get the iLO trap ID
|
|
while read oid val
|
|
do
|
|
if [ "$oid" = "$SnmpTrapOid" ]; then
|
|
# get the iLO trap ID
|
|
IloTrapNumber=`echo $val | awk -F . '{print $NF}'`
|
|
break;
|
|
fi
|
|
done
|
|
|
|
|
|
ServiceType=$(get_hlth_service_type $IloTrapNumber)
|
|
|
|
write_log "ilo_ip: $IloIP trap_number: $IloTrapNumber, type: $ServiceType"
|
|
|
|
|
|
NagiosIloCfgFile=$(get_nagios_ilo_cfg_path)
|
|
has_configured=$(search_string $NagiosIloCfgFile $IloIP)
|
|
if [ "x${has_configured}" != "x" ]; then
|
|
comm_str_id=`echo $has_configured | awk '{print $3}'`
|
|
if [ "x${comm_str_id}" != "x" ]; then
|
|
HpiloSnmpComm=`echo $has_configured | awk '{print $4}'`
|
|
fi
|
|
fi
|
|
|
|
|
|
if [ "$ServiceType" != "None" ]
|
|
then
|
|
|
|
if [ "$ServiceType" != "$HlthStatusChgTrap" ]
|
|
then
|
|
update_hlth_status $IloIP $IloTrapNumber "$ServiceType"
|
|
fi
|
|
|
|
# update system health status
|
|
update_hlth_status $IloIP $IloTrapNumber "${HpiloServices[0]}"
|
|
fi
|
|
else
|
|
|
|
write_log "host_ip : $host_ip comm_str: $comm_str oid_idx: $oid_id"
|
|
#if [ "$oid_id" = "1" ]; then
|
|
write_log "Active Schedule Mode"
|
|
update_hlth_status_from_status_array $host_ip $comm_str $oid_id
|
|
printf "$output\n"
|
|
exit $nagios_service_status
|
|
#fi
|
|
|
|
fi
|