You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
88 lines
2.1 KiB
Bash
88 lines
2.1 KiB
Bash
#!/bin/bash
|
|
|
|
# This script bounces the Selenium Hub service and the Gearmand service on the slave to kick the connections back to the master
|
|
# It needs to run on master recovery ONLY
|
|
|
|
if [[ "$#" -ne "3" ]]; then
|
|
echo "Usage: $0 "'"$HOSTNAME$" $SERVICESTATE$ $SERVICEATTEMPT$'
|
|
exit 1
|
|
fi
|
|
|
|
. /home/nagios/.ssh/.ssh-agent
|
|
|
|
hname="$1"
|
|
sstate="$2"
|
|
satt="$3"
|
|
|
|
logfile="/usr/local/monitoring/log/hub_recovery"
|
|
|
|
echo >> $logfile
|
|
echo "### Running event handler..." >> $logfile
|
|
echo "###$hname###$sstate###$satt###" >> $logfile
|
|
|
|
if [[ "$hname" == "mon204l" ]]; then
|
|
slave="mon205l"
|
|
elif [[ "$hname" == "mon302l" ]]; then
|
|
slave="mon303l"
|
|
else
|
|
exit 1
|
|
fi
|
|
|
|
case $sstate in
|
|
OK) # Recovery
|
|
if [[ ! -f "/usr/local/monitoring/tmp/${hname}_tmp" ]]; then exit 0; fi
|
|
|
|
echo "*****************" >> $logfile
|
|
echo "$(date): " >> $logfile
|
|
|
|
res=`/usr/bin/ssh $slave '
|
|
echo "Killing Selenium Hub"
|
|
/usr/bin/sudo /bin/systemctl stop selenium
|
|
echo "Stopping Gearman"
|
|
/usr/bin/sudo /bin/systemctl stop gearmand
|
|
echo "Giving it 20 seconds..."
|
|
sleep 20
|
|
echo "Starting Selenium"
|
|
/usr/bin/sudo /bin/systemctl start selenium
|
|
echo "Starting Gearman"
|
|
/usr/bin/sudo /bin/systemctl start gearmand
|
|
exit 0
|
|
' 2>&1`
|
|
stat="$?"
|
|
rm -f /usr/local/monitoring/tmp/${hname}_tmp
|
|
echo "$res" >> $logfile
|
|
echo "******************" >> $logfile
|
|
echo >> $logfile
|
|
echo "$res" | /bin/mail -s "RECOVERY: Master $hname has recovered. Bounced slave processes on $slave" igubenko@Princeton.EDU
|
|
exit $stat
|
|
;;
|
|
|
|
WARNING)
|
|
if (( satt < 3 )); then exit 0; fi
|
|
echo >> $logfile
|
|
echo "### WARNING state with at least 3 attempts" >> $logfile
|
|
echo "###$(date)####$hname###$satt###" >> $logfile
|
|
touch /usr/local/monitoring/tmp/${hname}_tmp
|
|
;;
|
|
|
|
CRITICAL)
|
|
if (( satt < 3 )); then exit 0; fi
|
|
echo >> $logfile
|
|
echo "### CRITICAL state with at least 3 attempts" >> $logfile
|
|
echo "###$(date)####$hname###$satt###" >> $logfile
|
|
# touch /usr/local/monitoring/tmp/${hname}_tmp
|
|
;;
|
|
|
|
UNKNOWN)
|
|
echo "Service state is unknown"
|
|
exit 1
|
|
;;
|
|
|
|
*)
|
|
echo "Invalid service state passed"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
exit 0
|