You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Princeton/pu/libexec/check_snmp_cpu.pl

365 lines
9.4 KiB
Perl

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

#!/usr/local/bin/perl -w
#
# $Id$
#
# check_snmp_cpu.pl checks CPU values through SNMP. Copied from check_cpu.pl
#
# Copyright 2007 GroundWork Open Source, Inc. (“GroundWork”)
# All rights reserved. This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 2 as published
# by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with this
# program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
# Fifth Floor, Boston, MA 02110-1301, USA.
#
# Change Log
#----------------
# 4-Nov-2005 - Harper Mann
# Initial revision
#
use strict;
my @sar_vals = undef;
my @lines = undef;
my @res = undef;
my $cpu = -1;
my $userwarn = -1;
my $usercrit = -1;
my $nicewarn =-1;
my $nicecrit = -1;
my $syswarn = -1;
my $syscrit = -1;
my $idlewarn = -1;
my $idlecrit = -1;
my $debug = 0;
my $perf = 0;
use SNMP;
use Getopt::Long;
use vars qw($opt_V $opt_c $opt_s $opt_n $opt_u $opt_i $opt_D $opt_p $opt_h);
use vars qw($opt_H $opt_m $opt_v $opt_o);
$opt_c = -1;
$opt_m = "public";
$opt_o = 161;
$opt_v = "2c";
# Watch out for this: snmpd updates every 5 secs by default
my $sleeptime = 6; # seconds
use vars qw($PROGNAME);
use lib "/usr/local/monitoring/nagios/libexec";
use utils qw($TIMEOUT %ERRORS &print_revision &support &usage);
sub print_help ();
sub print_usage ();
$PROGNAME = "check_cpu";
Getopt::Long::Configure('bundling');
my $status = GetOptions (
"V" => \$opt_V, "progVersion" => \$opt_V,
"H=s" => \$opt_H, "host=s" => \$opt_H,
"C=s" => \$opt_m, "Community=s" => \$opt_m,
"t" => \$TIMEOUT, "timeout" => \$TIMEOUT,
"S" => \$sleeptime, "sleeptime" => \$sleeptime,
"v" => \$opt_v, "version" => \$opt_v,
"u=s" => \$opt_u, "user=s" => \$opt_u,
"n=s" => \$opt_n, "nice=s" => \$opt_n,
"s=s" => \$opt_s, "system=s" => \$opt_s,
"i=s" => \$opt_i, "idle=s" => \$opt_i,
"D" => \$opt_D, "debug" => \$opt_D,
"o" => \$opt_o, "port" => \$opt_o,
"p" => \$opt_p, "performance" => \$opt_p,
"h" => \$opt_h, "help" => \$opt_h
);
if ($status == 0) { print_usage() ; exit $ERRORS{'UNKNOWN'}; }
# Need host name
if (!$opt_H) { die "-H <hostname> is required\n" }
# check snmp version
if ($opt_v && $opt_v !~ /1|2c/) { die "SNMP V1 or V2c only\n" }
# Debug switch
if ($opt_D) { $SNMP::debugging = 1; $debug = 1 }
# Cpu switch
if ($opt_c >= 0) { $cpu = $opt_c; }
# Performance switch
if ($opt_p) { $perf = 1; }
# Version
if ($opt_V) {
print_revision($PROGNAME,'$Revision$');
exit $ERRORS{'OK'};
}
if ($opt_h) {print_help(); exit $ERRORS{'UNKNOWN'}}
# Options checking
# Percent CPU system utilization
if ($opt_s) {
($syswarn, $syscrit) = split /:/, $opt_s;
($syswarn && $syscrit) || usage ("missing value -s <warn:crit>\n");
($syswarn =~ /^\d{1,3}$/ && $syswarn > 0 && $syswarn <= 100) &&
($syscrit =~ /^\d{1,3}$/ && $syscrit > 0 && $syscrit <= 100) ||
usage("Invalid value: -s <warn:crit> (system percent): $opt_s\n");
($syscrit > $syswarn) ||
usage("system critical (-s $opt_s <warn:crit>) must be > warning\n");
}
# Percent CPU nice utilization
if ($opt_n) {
($nicewarn, $nicecrit) = split /:/, $opt_n;
($nicewarn && $nicecrit) || usage ("missing value -n <warn:crit>\n");
($nicewarn =~ /^\d{1,3}$/ && $nicewarn > 0 && $nicewarn <= 100) &&
($nicecrit =~ /^\d{1,3}$/ && $nicecrit > 0 && $nicecrit <= 100) ||
usage("Invalid value: -n <warn:crit> (nice percent): $opt_n\n");
($nicecrit > $nicewarn) ||
usage("nice critical (-n $opt_n <warn:crit>) must be > warning\n");
}
# Percent CPU user utilzation
if ($opt_u) {
($userwarn, $usercrit) = split /:/, $opt_u;
($userwarn && $usercrit) || usage ("missing value -u <warn:crit>\n");
($userwarn =~ /^\d{1,3}$/ && $userwarn > 0 && $userwarn <= 100) &&
($usercrit =~ /^\d{1,3}$/ && $usercrit > 0 && $usercrit <= 100) ||
usage("Invalid value: -u <warn:crit> (user percent): $opt_u\n");
($usercrit > $userwarn) ||
usage("user critical (-u $opt_u <warn:crit>) must be < warning\n");
}
# Percent CPU idle utilzation
if ($opt_i) {
($idlewarn, $idlecrit) = split /:/, $opt_i;
($idlewarn && $idlecrit) || usage ("missing value -i <warn:crit>\n");
($idlewarn =~ /^\d{1,3}$/ && $idlewarn > 0 && $idlewarn <= 100) &&
($idlecrit =~ /^\d{1,3}$/ && $idlecrit > 0 && $idlecrit <= 100) ||
usage("Invalid value: -i <warn:crit> (idle percent): $opt_i\n");
($idlecrit < $idlewarn) ||
usage("idle critical (-i $opt_i <warn:crit>) must be > warning\n");
}
# Read /proc/stat values. The first "cpu " line has aggregate values if
# the system is SMP, otherwise, just get the requested CPU
my ($lbl, $user, $nice, $sys, $idle, $total) = undef;
my ($tmp_user, $tmp_nice, $tmp_sys, $tmp_idle) = undef;
# Get the kernel/system statistic values from SNMP
alarm ( $TIMEOUT ); # Don't hang Nagios
my $snmp_session = new SNMP::Session (
DestHost => $opt_H,
Community => $opt_m,
RemotePort => $opt_o,
Version => $opt_v
);
# retrieve the data from the remote host
($tmp_user, $tmp_sys, $tmp_nice, $tmp_idle) = $snmp_session->bulkwalk( 0, 4,
[['ssCpuRawUser'],
['ssCpuRawSystem'],
['ssCpuRawNice'],
['ssCpuRawIdle']]
);
check_for_errors();
# Grab the values from the arrays
$tmp_user = scalar(@$tmp_user[0]->val);
$tmp_sys = scalar(@$tmp_sys[0]->val);
$tmp_nice = scalar(@$tmp_nice[0]->val);
$tmp_idle = scalar(@$tmp_idle[0]->val);
# need to sleep to get delta
sleep $sleeptime;
# retrieve the data from the remote host
($user, $sys, $nice, $idle) = $snmp_session->bulkwalk( 0, 4,
[['ssCpuRawUser'],
['ssCpuRawSystem'],
['ssCpuRawNice'],
['ssCpuRawIdle']]
);
check_for_errors();
alarm (0); # Done with network
# Grab the values from the arrays
$user = scalar(@$user[0]->val);
$sys = scalar(@$sys[0]->val);
$nice = scalar(@$nice[0]->val);
$idle = scalar(@$idle[0]->val);
# The query returns values from uptime, we want over the last sleeptime.
$user = $user - $tmp_user;
$sys = $sys - $tmp_sys;
$nice = $nice - $tmp_nice;
$idle = $idle - $tmp_idle;
print "SNMP raw: user: $user sys: $sys nice: $nice idle: $idle\n" if $debug;
# Here we convert to percents
$total = $user + $sys + +$nice + $idle;
$user = $user / $total * 100;
$sys = $sys / $total * 100;
$idle = $idle / $total * 100;
$nice = $nice / $total * 100;
# Threshold checks
my $out = undef;
my $c;
$c = ($cpu < 0) ? "ALL" : $cpu;
$out = $out."(cpu: $c) ";
$out = $out . sprintf("user: %.2f%% ", $user);
if ($usercrit > 0) {
($user > $usercrit) ? ($out = $out . "(Critical) ") :
($user > $userwarn) ? ($out=$out . "(Warning) ") :
($out=$out."(OK) ");
} else {
$out=$out."(OK) ";
}
$out = $out . sprintf("nice: %.2f%% ", $nice);
if ($nicecrit > 0) {
($nice > $nicecrit) ? ($out=$out."(Critical) ") :
($nice > $nicewarn) ? ($out=$out."(Warning) ") :
($out=$out."(OK) ");
} else {
$out=$out."(OK) ";
}
$out=$out . sprintf("sys: %.2f%% ", $sys);
if ($syscrit > 0) {
($sys > $syscrit) ? ($out=$out."(Critical) ") :
($sys > $syswarn) ? ($out=$out."(Warning) ") :
($out=$out."(OK) ");
} else {
$out=$out."(OK) ";
}
$out=$out . sprintf("idle: %.2f%% ", $idle);
if ($idlecrit > 0) {
($idle < $idlecrit) ? ($out=$out."(Critical) ") :
($idle < $idlewarn) ? ($out=$out."(Warning) ") :
($out=$out."(OK) ");
} else {
$out=$out."(OK) ";
}
# Main output
print "$out";
# Performance output
if ($perf) {;
print " |";
if ($usercrit < 0) { printf(" user=%.2f%%;;;;", $user) }
else { printf(" user=%.2f%%;%d;%d;;", $user,$userwarn,$usercrit) }
if ($nicecrit < 0) { printf(" nice=%.2f%%;;;;", $nice) }
else { printf(" nice=%.2f%%;%d;%d;;", $nice,$nicewarn,$nicecrit) }
if ($syscrit < 0) { printf(" sys=%.2f%%;;;;", $sys) }
else { printf(" sys=%.2f%%;%d;%d;;", $sys,$syswarn,$syscrit) }
if ($idlecrit < 0) { printf(" idle=%.2f%%;;;;", $idle) }
else { printf(" idle=%.2f%%;%d;%d;;", $idle,$idlewarn,$idlecrit) }
}
print "\n";
# Plugin output
# $worst == $ERRORS{'OK'} ? print "CPU OK @goodlist" : print "@badlist";
# Performance?
if ($out =~ /Critical/) { exit $ERRORS {'CRITICAL'} }
if ($out =~ /Warning/) { exit $ERRORS {'WARNING'} }
exit (0); #OK
# Usage sub
sub print_usage () {
print "Usage: $PROGNAME
[-C], --Community <community>
[-h], --help
[-H], --host
[-i], --idle <warn:crit> percent (NOTE: idle less than x)
[-n], --nice <warn:crit> percent
[-o], --port <SNMP port>
[-p] (output Nagios performance data)
[-s], --system <warn:crit> percent
[-t], --timeout
[-u], --user <warn:crit> percent
[-D] (debug) [-h] (help) [-V] (Version)\n";
}
# Help sub
sub print_help () {
print_revision($PROGNAME,'$Revision$');
# Perl device CPU check plugin for Nagios
print_usage();
print "
-C, --Community
SNMP Community string
-D, --debug
Debug output
-h, --help
Print help
-H, --host
Hostname of the target system
-i, --idle
If less than Percent CPU idle
-n, --nice
Percent CPU nice
-o, --port
SNMP port to use
-p, --performance
Report Nagios performance data after the ouput string
-s, --system=STRING
Percent CPU system
-t, --timeout
Plugin timeout
-u, --user
Percent CPU user
-v, --version
SNMP version
-V, --progVersion
Print version of plugin
";
}
sub check_for_errors {
if ( $snmp_session->{ErrorNum} ) {
print "UNKNOWN - error retrieving SNMP data: $snmp_session->{ErrorStr}\n";
exit $ERRORS{UNKNOWN};
}
}