You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
137 lines
3.8 KiB
Bash
137 lines
3.8 KiB
Bash
#!/bin/bash
|
|
|
|
if [[ "$#" -ne "4" ]]; then
|
|
echo "Usage: $0 "'"$SERVICENAME$" $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$'
|
|
exit 1
|
|
fi
|
|
|
|
. /home/nagios/.ssh/.ssh-agent
|
|
|
|
sname="$1"
|
|
sstate="$2"
|
|
stype="$3"
|
|
sattempt="$4"
|
|
|
|
# Failover clusterines
|
|
declare -A cluster=( \
|
|
["NS Untrust"]="ims204" \
|
|
["151 Untrust"]="ims205" \
|
|
["NS PrivateDMZ"]="ims220" \
|
|
["151 PrivateDMZ"]="ims221" \
|
|
["NS Trust"]="ims222" \
|
|
["151 Trust"]="ims223" \
|
|
["NS PublicDMZ"]="ims224" \
|
|
["151 PublicDMZ"]="ims225" \
|
|
)
|
|
|
|
# ["ims205 151 Cross hei"]="ims205 hei" \
|
|
# ["ims204 NS Cross hei"]="ims204 hei" \
|
|
|
|
# stop failover
|
|
declare -A worker=( \
|
|
["ims204 151 Untrust alef"]="ims205 gimel" \
|
|
["ims204 151 Untrust bet"]="ims205 dalet" \
|
|
["ims205 NS Untrust alef"]="ims204 gimel" \
|
|
["ims205 NS Untrust bet"]="ims204 dalet" \
|
|
["ims218 151 PrivateDMZ alef"]="ims219 gimel" \
|
|
["ims218 151 PrivateDMZ bet"]="ims219 dalet" \
|
|
["ims219 NS PrivateDMZ alef"]="ims218 gimel" \
|
|
["ims219 NS PrivateDMZ bet"]="ims218 dalet" \
|
|
["ims220 151 PrivateDMZ alef"]="ims221 gimel" \
|
|
["ims220 151 PrivateDMZ bet"]="ims221 dalet" \
|
|
["ims221 NS PrivateDMZ alef"]="ims220 gimel" \
|
|
["ims221 NS PrivateDMZ bet"]="ims220 dalet" \
|
|
["ims222 151 Trust alef"]="ims223 gimel" \
|
|
["ims222 151 Trust bet"]="ims223 dalet" \
|
|
["ims223 NS Trust alef"]="ims222 gimel" \
|
|
["ims223 NS Trust bet"]="ims222 dalet" \
|
|
["ims224 151 PublicDMZ alef"]="ims225 gimel" \
|
|
["ims224 151 PublicDMZ bet"]="ims225 dalet" \
|
|
["ims225 NS PublicDMZ alef"]="ims224 gimel" \
|
|
["ims225 NS PublicDMZ bet"]="ims224 dalet" \
|
|
["ims316 EBT alef"]="ims317 gimel" \
|
|
["ims316 EBT bet"]="ims317 dalet" \
|
|
["ims317 EBT alef"]="ims316 gimel" \
|
|
["ims317 EBT bet"]="ims316 dalet" \
|
|
)
|
|
|
|
zone=`echo $sname | sed -n 's/^Gearman worker //p'`
|
|
|
|
case $sstate in
|
|
OK) # Recovery
|
|
# This needs executing ONLY if we are NOT a cluster -- stop redundant workers
|
|
if [[ -z ${worker[$zone]} ]]; then # cluster
|
|
exit 0
|
|
fi
|
|
|
|
clusterine=`echo ${worker[$zone]} | cut -d' ' -f1`
|
|
worker=`echo ${worker[$zone]} | cut -d' ' -f2`
|
|
|
|
/usr/bin/ssh $clusterine '/usr/bin/sudo /sbin/service mod_gearman_worker stop '"$worker"' force' 2>&1 | mail -s "RECOVERY: Gearman worker $worker stop on $clusterine" networking@Princeton.EDU
|
|
exit $?
|
|
;;
|
|
|
|
WARNING) # Don't care for cluster - one worker is still good to go
|
|
;;
|
|
|
|
CRITICAL)
|
|
case $sattempt in
|
|
2) # Attempt to restart a failed worker only
|
|
if [[ -z ${worker[$zone]} ]]; then # not a worker
|
|
exit 0
|
|
fi
|
|
|
|
mach=`echo $zone | cut -d' ' -f1`
|
|
wid=`echo $zone | sed 's/.* //'`
|
|
|
|
/usr/bin/ssh $mach '/usr/bin/sudo /sbin/service mod_gearman_worker restart '"$wid" 2>&1 | mail -s "Gearman worker $wid restart on $mach" networking@Princeton.EDU
|
|
|
|
exit $?
|
|
;;
|
|
|
|
3) # Start redundant workers
|
|
if [[ -z ${cluster[$zone]} ]]; then # not a cluster
|
|
exit 0
|
|
fi
|
|
|
|
for wrkr in "${!worker[@]}"; do
|
|
if echo "$wrkr" | grep -q "$zone"; then
|
|
wid=`echo ${worker[$wrkr]} | cut -d' ' -f2`
|
|
/usr/bin/ssh ${cluster[$zone]} '/usr/bin/sudo /sbin/service mod_gearman_worker start '"$wid"' force' 2>&1 | mail -s "Starting redundant Gearman worker $wid on ${cluster[$zone]}" networking@Princeton.EDU
|
|
fi
|
|
done
|
|
exit $?
|
|
;;
|
|
|
|
4) # Last resort - attempt to restart Gearman server
|
|
if [[ -z ${cluster[$zone]} ]]; then # not a cluster
|
|
exit 0
|
|
fi
|
|
|
|
/usr/bin/ssh mon204l '/usr/bin/sudo systemctl gearmand restart' 2>&1 | mail -s "LAST RESORT: Gearman server restart on mon204l" networking@Princeton.EDU
|
|
sleep 10
|
|
/usr/bin/ssh mon205l '/usr/bin/sudo systemctl gearmand restart' 2>&1 | mail -s "LAST RESORT: Gearman server restart on mon205l" networking@Princeton.EDU
|
|
|
|
|
|
exit $?
|
|
;;
|
|
|
|
*)
|
|
# Do nothing - too early to do anything
|
|
;;
|
|
esac
|
|
;;
|
|
|
|
UNKNOWN)
|
|
echo "Service state is unknown"
|
|
exit 1
|
|
;;
|
|
|
|
*)
|
|
echo "Invalid service state passed"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
exit 0
|