You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Princeton/pu/libexec/eventhandlers/hub_recovery.bak

88 lines
2.1 KiB
Bash

#!/bin/bash
# This script bounces the Selenium Hub service and the Gearmand service on the slave to kick the connections back to the master
# It needs to run on master recovery ONLY
if [[ "$#" -ne "3" ]]; then
echo "Usage: $0 "'"$HOSTNAME$" $SERVICESTATE$ $SERVICEATTEMPT$'
exit 1
fi
. /home/nagios/.ssh/.ssh-agent
hname="$1"
sstate="$2"
satt="$3"
logfile="/usr/local/monitoring/log/hub_recovery"
echo >> $logfile
echo "### Running event handler..." >> $logfile
echo "###$hname###$sstate###$satt###" >> $logfile
if [[ "$hname" == "mon204l" ]]; then
slave="mon205l"
elif [[ "$hname" == "mon302l" ]]; then
slave="mon303l"
else
exit 1
fi
case $sstate in
OK) # Recovery
if [[ ! -f "/usr/local/monitoring/tmp/${hname}_tmp" ]]; then exit 0; fi
echo "*****************" >> $logfile
echo "$(date): " >> $logfile
res=`/usr/bin/ssh $slave '
echo "Killing Selenium Hub"
/usr/bin/sudo /bin/systemctl stop selenium
echo "Stopping Gearman"
/usr/bin/sudo /bin/systemctl stop gearmand
echo "Giving it 20 seconds..."
sleep 20
echo "Starting Selenium"
/usr/bin/sudo /bin/systemctl start selenium
echo "Starting Gearman"
/usr/bin/sudo /bin/systemctl start gearmand
exit 0
' 2>&1`
stat="$?"
rm -f /usr/local/monitoring/tmp/${hname}_tmp
echo "$res" >> $logfile
echo "******************" >> $logfile
echo >> $logfile
echo "$res" | /bin/mail -s "RECOVERY: Master $hname has recovered. Bounced slave processes on $slave" igubenko@Princeton.EDU
exit $stat
;;
WARNING)
if (( satt < 3 )); then exit 0; fi
echo >> $logfile
echo "### WARNING state with at least 3 attempts" >> $logfile
echo "###$(date)####$hname###$satt###" >> $logfile
touch /usr/local/monitoring/tmp/${hname}_tmp
;;
CRITICAL)
if (( satt < 3 )); then exit 0; fi
echo >> $logfile
echo "### CRITICAL state with at least 3 attempts" >> $logfile
echo "###$(date)####$hname###$satt###" >> $logfile
# touch /usr/local/monitoring/tmp/${hname}_tmp
;;
UNKNOWN)
echo "Service state is unknown"
exit 1
;;
*)
echo "Invalid service state passed"
exit 1
;;
esac
exit 0