#!/bin/bash # This script bounces the Selenium Hub service and the Gearmand service on the slave to kick the connections back to the master # It needs to run on master recovery ONLY if [[ "$#" -ne "3" ]]; then echo "Usage: $0 "'"$HOSTNAME$" $SERVICESTATE$ $SERVICEATTEMPT$' exit 1 fi . /home/nagios/.ssh/.ssh-agent hname="$1" sstate="$2" satt="$3" logfile="/usr/local/monitoring/log/hub_recovery" echo >> $logfile echo "### Running event handler..." >> $logfile echo "###$hname###$sstate###$satt###" >> $logfile if [[ "$hname" == "mon204l" ]]; then slave="mon205l" elif [[ "$hname" == "mon302l" ]]; then slave="mon303l" else exit 1 fi case $sstate in OK) # Recovery if [[ ! -f "/usr/local/monitoring/tmp/${hname}_tmp" ]]; then exit 0; fi echo "*****************" >> $logfile echo "$(date): " >> $logfile res=`/usr/bin/ssh $slave ' echo "Killing Selenium Hub" /usr/bin/sudo /bin/systemctl stop selenium echo "Stopping Gearman" /usr/bin/sudo /bin/systemctl stop gearmand echo "Giving it 20 seconds..." sleep 20 echo "Starting Selenium" /usr/bin/sudo /bin/systemctl start selenium echo "Starting Gearman" /usr/bin/sudo /bin/systemctl start gearmand exit 0 ' 2>&1` stat="$?" rm -f /usr/local/monitoring/tmp/${hname}_tmp echo "$res" >> $logfile echo "******************" >> $logfile echo >> $logfile echo "$res" | /bin/mail -s "RECOVERY: Master $hname has recovered. Bounced slave processes on $slave" igubenko@Princeton.EDU exit $stat ;; WARNING) if (( satt < 3 )); then exit 0; fi echo >> $logfile echo "### WARNING state with at least 3 attempts" >> $logfile echo "###$(date)####$hname###$satt###" >> $logfile touch /usr/local/monitoring/tmp/${hname}_tmp ;; CRITICAL) if (( satt < 3 )); then exit 0; fi echo >> $logfile echo "### CRITICAL state with at least 3 attempts" >> $logfile echo "###$(date)####$hname###$satt###" >> $logfile # touch /usr/local/monitoring/tmp/${hname}_tmp ;; UNKNOWN) echo "Service state is unknown" exit 1 ;; *) echo "Invalid service state passed" exit 1 ;; esac exit 0