From 7527af17cfc3fd43bf30e57b45790f8e378782ea Mon Sep 17 00:00:00 2001 From: Eric Loyd Date: Sun, 22 Sep 2019 12:44:25 -0400 Subject: [PATCH] Scan nagios.cfg for non-standard directories and include them. Properly deal with retention.dat. Confirmed working to failover to newly installed box. --- failover/Makefile | 3 +++ failover/README.md | 9 +++++++++ failover/failover.sh | 2 +- failover/restore_xi.sh | 2 ++ failover/rsync_xi.sh | 30 +++++++++++++++++++----------- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/failover/Makefile b/failover/Makefile index 71bf5b3..5d1c136 100644 --- a/failover/Makefile +++ b/failover/Makefile @@ -6,3 +6,6 @@ secondary: sync: rsync -av /home/nagios/bin/ nagiosxi-f.princeton.edu:/home/nagios/bin/ + +copy: + rsync -av /root/princeton/failover/ /home/nagios/bin; chown -R nagios. /home/nagios/bin diff --git a/failover/README.md b/failover/README.md index b01b5bf..af848bd 100644 --- a/failover/README.md +++ b/failover/README.md @@ -13,3 +13,12 @@ Failover from primary Nagios XI to secondary Nagios XI is a Disaster Recovery ef * The root user has the ability to SSH from the primary to the secondary without entering a passphrase. This is how the rsync and database copies are performed * root on primary (and root on secondary) have crontab requirements that will be detailed separately. +* nagios on both primary and secondary needs to be able ot sudo to root without a password and execute the rsync command: + + NAGIOSXI ALL = NOPASSWD:/usr/bin/rsync * + +* nagios user on primary needs to be able to SSH to nagios on secondary without a passphrase (thus, an SSH key and .ssh directory needs to be set up) + +* Make sure any ramdisk (such as /ramdisk) is copied if it exists + +* Note that any gearman addons such as /etc/mod_gearman or whatever are NOT copied as part of this procedure. These types of things need to be set up on both boxes the same way before this process is set up. diff --git a/failover/failover.sh b/failover/failover.sh index f44163f..f4e17e9 100755 --- a/failover/failover.sh +++ b/failover/failover.sh @@ -47,7 +47,7 @@ do_secondary() { [ -z "$syncfile" ] && error "No sync file ($syncfile) found." && exit 2 ./restore_xi.sh /store/backups/nagiosxi/$syncfile if [ $? -eq 0 ]; then - verbose "Stopping Nagios services" + verbose "Stopping Nagios services because we are the secondary" sleep 2 ./nagios_startstop.sh stop cp /usr/local/nagiosxi/html/includes/components/custom-includes/css/header-gradient.css.secondary /usr/local/nagiosxi/html/includes/components/custom-includes/css/header-gradient.css diff --git a/failover/restore_xi.sh b/failover/restore_xi.sh index 85ca429..f76ea39 100755 --- a/failover/restore_xi.sh +++ b/failover/restore_xi.sh @@ -72,9 +72,11 @@ backupdir=`pwd` # SHUTDOWN SERVICES ############################## verbose "Shutting down services..." +cp /usr/local/nagios/var/retention.dat $backupdir sudo $BASEDIR/manage_services.sh stop nagios sudo $BASEDIR/manage_services.sh stop ndo2db sudo $BASEDIR/manage_services.sh stop npcd +cp $backupdir/retention.dat /usr/local/nagios/var/retention.dat rootdir=/ diff --git a/failover/rsync_xi.sh b/failover/rsync_xi.sh index 51043c4..d9cd68a 100755 --- a/failover/rsync_xi.sh +++ b/failover/rsync_xi.sh @@ -63,8 +63,10 @@ done # Restart nagios to forcibly update retention.dat echo "Restart nagios to forcibly update retention.dat" -sudo $BASEDIR/manage_services.sh restart nagios -sleep 10 +sudo $BASEDIR/manage_services.sh stop nagios +sleep 5 +sudo $BASEDIR/manage_services.sh start nagios + if [ -z $rootdir ]; then rootdir="/store/backups/nagiosxi" @@ -140,7 +142,13 @@ do_rsync $httpdconfdir/nagiosmobile.conf do_rsync $httpdconfdir/nagvis.conf do_rsync $httpdconfdir/nrdp.conf -echo "Backing up MySQL databases..." +verbose "Checking nagios.cfg for nonstandard directories..." +for dir in `egrep "[^#].*=" /usr/local/nagios/etc/nagios.cfg | awk -F= '/\// {print $2}' | egrep -v "(usr/lib|/usr/local/nagios|/tmp|/var/run)" | rev | cut -d "/" -f 2- | rev | sort -u`; do + echo "Directory $dir needs to be copied" + do_rsync "${dir}/" +done + +verbose "Backing up MySQL databases..." mkdir -p $mydir/mysql if [[ "$cfg__db_info__ndoutils__dbserver" == *":"* ]]; then ndoutils_dbport=`echo "$cfg__db_info__ndoutils__dbserver" | cut -f2 -d":"` @@ -152,7 +160,7 @@ fi mysqldump -h "$ndoutils_dbserver" --port="$ndoutils_dbport" -u $cfg__db_info__ndoutils__user --password="$cfg__db_info__ndoutils__pwd" --add-drop-database -B $cfg__db_info__ndoutils__db > $mydir/mysql/nagios.sql res=$? if [ $res != 0 ]; then - echo "Error backing up MySQL database 'nagios' - check the password in this script!" | tee -a $SBLOG + error "Error backing up MySQL database 'nagios' - check the password in this script!" | tee -a $SBLOG rm -rf $mydir exit $res; fi @@ -166,13 +174,13 @@ fi mysqldump -h "$nagiosql_dbserver" --port="$nagiosql_dbport" -u $cfg__db_info__nagiosql__user --password="$cfg__db_info__nagiosql__pwd" --add-drop-database -B $cfg__db_info__nagiosql__db > $mydir/mysql/nagiosql.sql res=$? if [ $res != 0 ]; then - echo "Error backing up MySQL database 'nagiosql' - check the password in this script!" | tee -a $SBLOG + error "Error backing up MySQL database 'nagiosql' - check the password in this script!" | tee -a $SBLOG rm -rf $mydir exit $res; fi # Only backup PostgresQL if we are still using it if [ $cfg__db_info__nagiosxi__dbtype == "pgsql" ]; then - echo "Backing up PostgresQL databases..." + verbose "Backing up PostgresQL databases..." mkdir -p $mydir/pgsql if [ -z $cfg__db_info__nagiosxi__dbserver ]; then cfg__db_info__nagiosxi__dbserver="localhost" @@ -180,7 +188,7 @@ if [ $cfg__db_info__nagiosxi__dbtype == "pgsql" ]; then pg_dump -h $cfg__db_info__nagiosxi__dbserver -c -U $cfg__db_info__nagiosxi__user $cfg__db_info__nagiosxi__db > $mydir/pgsql/nagiosxi.sql res=$? if [ $res != 0 ]; then - echo "Error backing up PostgresQL database 'nagiosxi' !" | tee -a $SBLOG + error "Error backing up PostgresQL database 'nagiosxi' !" | tee -a $SBLOG rm -rf $mydir exit $res; fi @@ -195,7 +203,7 @@ else mysqldump -h "$nagiosxi_dbserver" --port="$nagiosxi_dbport" -u $cfg__db_info__nagiosxi__user --password="$cfg__db_info__nagiosxi__pwd" --add-drop-database -B $cfg__db_info__nagiosxi__db > $mydir/mysql/nagiosxi.sql res=$? if [ $res != 0 ]; then - echo "Error backing up MySQL database 'nagiosxi' - check the password in this script!" | tee -a $SBLOG + error "Error backing up MySQL database 'nagiosxi' - check the password in this script!" | tee -a $SBLOG rm -rf $mydir exit $res; fi @@ -204,7 +212,7 @@ fi ############################## # COMPRESS BACKUP ############################## -echo "Compressing backup..." +verbose "Compressing backup..." tar czfp $name.tar.gz $name rm -rf $name @@ -216,13 +224,13 @@ if [ -s $name.tar.gz ];then echo "===============" echo "BACKUP COMPLETE" echo "===============" - echo "Backup stored in $rootdir/$name.tar.gz" + verbose "Backup stored in $rootdir/$name.tar.gz" else echo " " echo "===============" echo "BACKUP FAILED" echo "===============" - echo "File was not created at $rootdir/$name.tar.gz" + error "File was not created at $rootdir/$name.tar.gz" rm -rf $mydir exit 1; fi