You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
124 lines
3.4 KiB
Bash
124 lines
3.4 KiB
Bash
#!/bin/bash
|
|
|
|
if [[ "$#" -ne "4" ]]; then
|
|
echo "Usage: $0 "'"$SERVICENAME$" $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$'
|
|
exit 1
|
|
fi
|
|
|
|
. /home/nagios/.ssh/.ssh-agent
|
|
|
|
sname="$1"
|
|
sstate="$2"
|
|
stype="$3"
|
|
sattempt="$4"
|
|
|
|
# Failover clusterines
|
|
declare -A cluster=( \
|
|
["NS Untrust"]="ims318" \
|
|
["151 Untrust"]="ims319" \
|
|
["NS PublicDMZ"]="ims314" \
|
|
["151 PublicDMZ"]="ims315" \
|
|
["NS PrivateDMZ"]="ims320" \
|
|
["151 PrivateDMZ"]="ims321" \
|
|
["NS Trust"]="ims320" \
|
|
["151 Trust"]="ims321" \
|
|
)
|
|
|
|
# stop failover
|
|
declare -A worker=( \
|
|
["ims318 151 Untrust alef"]="ims319 gimel" \
|
|
["ims318 151 Untrust bet"]="ims319 dalet" \
|
|
["ims319 NS Untrust alef"]="ims318 gimel" \
|
|
["ims319 NS Untrust bet"]="ims318 dalet" \
|
|
["ims314 151 PublicDMZ alef"]="ims315 gimel" \
|
|
["ims314 151 PublicDMZ bet"]="ims315 dalet" \
|
|
["ims315 NS PublicDMZ alef"]="ims314 gimel" \
|
|
["ims315 NS PublicDMZ bet"]="ims314 dalet" \
|
|
["ims320 151 PrivateDMZ alef"]="ims321 gimel" \
|
|
["ims320 151 PrivateDMZ bet"]="ims321 dalet" \
|
|
["ims321 NS PrivateDMZ hei"]="ims320 gimel" \
|
|
["ims321 NS PrivateDMZ vav"]="ims320 dalet" \
|
|
["ims320 151 Trust hei"]="ims321 zain" \
|
|
["ims320 151 Trust vav"]="ims321 het" \
|
|
["ims321 NS Trust alef"]="ims320 zain" \
|
|
["ims321 NS Trust bet"]="ims320 het" \
|
|
)
|
|
|
|
zone=`echo $sname | sed -n 's/^Gearman worker //p'`
|
|
|
|
case $sstate in
|
|
OK) # Recovery
|
|
# This needs executing ONLY if we are NOT a cluster -- stop redundant workers
|
|
if [[ -z ${worker[$zone]} ]]; then # cluster
|
|
exit 0
|
|
fi
|
|
|
|
clusterine=`echo ${worker[$zone]} | cut -d' ' -f1`
|
|
worker=`echo ${worker[$zone]} | cut -d' ' -f2`
|
|
|
|
/usr/bin/ssh $clusterine '/usr/bin/sudo /sbin/service mod_gearman_worker stop '"$worker"' force' 2>&1 | mail -s "RECOVERY: Gearman worker $worker stop on $clusterine" epm-list@Princeton.EDU igubenko@Princeton.EDU
|
|
exit $?
|
|
;;
|
|
|
|
WARNING) # Don't care for cluster - one worker is still good to go
|
|
;;
|
|
|
|
CRITICAL)
|
|
case $sattempt in
|
|
2) # Attempt to restart a failed worker only
|
|
if [[ -z ${worker[$zone]} ]]; then # not a worker
|
|
exit 0
|
|
fi
|
|
|
|
mach=`echo $zone | cut -d' ' -f1`
|
|
wid=`echo $zone | sed 's/.* //'`
|
|
|
|
/usr/bin/ssh $mach '/usr/bin/sudo /sbin/service mod_gearman_worker restart '"$wid" 2>&1 | mail -s "Gearman worker $wid restart on $mach" epm-list@Princeton.EDU igubenko@Princeton.EDU
|
|
|
|
exit $?
|
|
;;
|
|
|
|
3) # Start redundant workers
|
|
if [[ -z ${cluster[$zone]} ]]; then # not a cluster
|
|
exit 0
|
|
fi
|
|
|
|
for wrkr in "${!worker[@]}"; do
|
|
if echo "$wrkr" | grep -q "$zone"; then
|
|
wid=`echo ${worker[$wrkr]} | cut -d' ' -f2`
|
|
/usr/bin/ssh ${cluster[$zone]} '/usr/bin/sudo /sbin/service mod_gearman_worker start '"$wid"' force' 2>&1 | mail -s "Starting redundant Gearman worker $wid on ${cluster[$zone]}" epm-list@Princeton.EDU igubenko@Princeton.EDU
|
|
fi
|
|
done
|
|
exit $?
|
|
;;
|
|
|
|
4) # Last resort - attempt to restart Gearman server
|
|
if [[ -z ${cluster[$zone]} ]]; then # not a cluster
|
|
exit 0
|
|
fi
|
|
|
|
/usr/bin/ssh mon302l 'sudo systemctl restart gearmand 2>&1' | mail -s "LAST RESORT: Gearman server restart" epm-list@Princeton.EDU igubenko@Princeton.EDU
|
|
/usr/bin/ssh mon303l 'sudo systemctl restart gearmand 2>&1' | mail -s "LAST RESORT: Gearman server restart" epm-list@Princeton.EDU igubenko@Princeton.EDU
|
|
|
|
exit $?
|
|
;;
|
|
|
|
*)
|
|
# Do nothing - too early to do anything
|
|
;;
|
|
esac
|
|
;;
|
|
|
|
UNKNOWN)
|
|
echo "Service state is unknown"
|
|
exit 1
|
|
;;
|
|
|
|
*)
|
|
echo "Invalid service state passed"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
exit 0
|