#!/usr/bin/perl -w #============================================================================= # # process_rrd.pl, threshold monitoring of rrd file data sets # # Copyright 2006 The University of Wisconsin Board of Regents # Licensed and distributed under the terms of the Perl Artistic # License, see http://net.doit.wisc.edu/~dwcarder/scripts/ for # details. # # Written by: Dale W. Carder, dwcarder@doit.wisc.edu # Network Services Group # Division of Information Technology # University of Wisconsin at Madison # # For more about this and other tools, see # http://net.doit.wisc.edu/~dwcarder/scripts/ # #============================================================================= my $ver = '$Revision: 1.19 $ $Date: 2006/04/06 00:50:34 $'; $ver =~ s/\$//g; #================================================================= # U S E / R E Q U I R E S #================================================================= #use strict; use RRDs; use Getopt::Long; sub printHeader(); #================================================================= # C O N F I G #================================================================= # Note, both thresholds need to be crossed to be considered bad my $Allcnf = "/var/local/mrtg/XXI/all.cfg"; my $Dir = "/var/local/mrtg/XXI"; # where my rrd's be at? my $BaseURL = "https://stats.net.wisc.edu/cgi-bin/14all_search.cgi"; my $Mailcmd = "/usr/sbin/sendmail -t"; # path to sendmail to read from stdin #================================================================= # M A I N #================================================================= my $opt_a; my $opt_b; my $opt_d; my $opt_e; my $opt_i; my $opt_h; my $opt_m; my $opt_p; my $opt_r; my $opt_v; my $opt_rrdpath; my $opt_mrtgcfg; my $opt_nomrtg; my $opt_url; my $opt_title; my $opt_guessmax; GetOptions( "average=i" =>\$opt_a, "bytes" =>\$opt_b, "debug" =>\$opt_d, "email=s" =>\$opt_e, "interval=i" =>\$opt_i, "help" =>\$opt_h, "m=i" =>\$opt_m, "percentage" =>\$opt_p, "r=s" =>\$opt_r, "verbose" =>\$opt_v, "rrdpath=s" =>\$opt_rrdpath, "mrtgcfg=s" =>\$opt_mrtgcfg, "nomrtg" =>\$opt_nomrtg, "url=s" =>\$opt_url, "title=s" =>\$opt_title, "guessmax" =>\$opt_guessmax ); if ($opt_h) { &usage; exit; } my $Interval; # how many hours of data do we look at if (defined($opt_i)) { if (!($opt_i =~ m/\d+/)) { &usage; exit; } $Interval = $opt_i; } else { $Interval = 1; } my $DeviceRegex; # the files we will parse if ($opt_r) { $DeviceRegex = $opt_r; } else { $DeviceRegex = '.errs$'; } my $send_mail; # 1, send email. 0 use stdout if ($opt_e) { $send_mail=1; $emailaddr = $opt_e; } else { $send_mail=0;} my $Threshold; # datapoint errors > than this number is bad if ($opt_m) { if (!($opt_m =~ m/\d+/)) { &usage; exit; } $Threshold = $opt_m; } else { $Threshold=1; } my $Percentage; if ($opt_p) { $Percentage =1; } else { $Percentage=0; $opt_p=0;} my $Avg_threshold; # avg errors > than this number is bad if ($opt_a) { $Avg_threshold = $opt_a; } else { $Avg_threshold=1; } my $Verbose; if ($opt_v) { $Verbose = $opt_v; } else { $Verbose =0; } my $Debug; if ($opt_d) { $Debug = $opt_d; } else { $Debug =0; } my $Bits; if ($opt_b) { $Bits = 1; } else { $Bits = 0; $opt_b=0;} if ($opt_rrdpath) { $Dir = $opt_rrdpath; } if ($opt_mrtgcfg) { $Allcnf = $opt_mrtgcfg; } if ($opt_url) { $BaseURL = $opt_url; } # Ok, now calculate the start and end times we are looking at my $now = time(); my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime($now); my $EndTime = $now - ($min * 60) - $sec; # subtract one hour to get the start time. my $StartTime = $EndTime - ($Interval *3600); my $ErrorsFound=0; # keep track of what we find my @output; my $TotalDPs=0; my $MissingDPs=0; # get a list of rrd files to process my @files; # strip off rrd file suffix, if given $DeviceRegex =~ s/\.rrd$//; if (defined($opt_nomrtg)) { # old dumb way to do this, or we're not using a mrtg config file if ($Debug) { print "Path to rrd's: $Dir\n" } opendir(DH,$Dir); @files = grep { /${DeviceRegex}/ } readdir(DH); close(DH); } else { # way to only get files active in the mrtg configuration open(ALLCFG,$Allcnf); if ($Verbose || $Debug) { print "Reading in MRTG Configuration... (this may take a minute)\n"; } while () { if (m/^Target\[(.+)\]:/) { my $target = $1; if ($target =~ m/${DeviceRegex}/) { #print "matched $target\n"; my $file = lc("$target.rrd"); push(@files,$file); } } } close(ALLCFG); } my $filemax = scalar(@files); my $filenum=0; if ($Verbose || $Debug) { if (defined($opt_title)) { print "$opt_title \n"; } print "$filemax files to scan \n"; print "Start time: " . localtime($StartTime) . ", End time: " . localtime($EndTime) . "\n"; } foreach my $file (@files) { my $theoretical_max = 0; my %packetcounter; # we store the packet counters in here for later use if ($Percentage) { # This approach only works for byte counters # let's try and get the maximum value available overall # so we can compute percentages. if ($file =~ m/bytes.rrd/) { if (defined($opt_guessmax)) { if ($file =~ m/_fa\d/i) { $theoretical_max = 100000000; } elsif ($file =~ m/_gi\d/i) { $theoretical_max = 1000000000; } elsif ($file =~ m/_te\d/i) { $theoretical_max = 10000000000; } elsif ($file =~ m/_vl\d/i) { $theoretical_max = 10000000000; } elsif ($file =~ m/_se\d/i) { $theoretical_max = 1540000; } else { $theoretical_max = 1000000000; } $theoretical_max = $theoretical_max / 8; if ($Debug) { print "Interface Max Val is $theoretical_max bytes/sec\n"; } } else { my $hash = RRDs::info "$Dir/$file"; $theoretical_max = $$hash{"ds[ds0].max"}; if ($Debug) { print "Interface Max Val is $theoretical_max bytes/sec\n"; } } } else { # For error counters, we would have to look at the error counter and # use the *packet* counter for that time period to determine the # percentage of errored packets. # Based on how HCInterfaceTemplate.pl generates target names, we can # guess the filename of the packet rate rrd file. So, we read in # all of its data for later use when we check the threshold as we read # out of the error counter file. # generate the name of the packet counter file my $packetfile = $file; $packetfile =~ m/_([a-zA-Z0-9]+).rrd$/; $packetfile =~ s/$1/pkts/; if ($Debug) { print "file: $file, packet file: $packetfile\n"; } my ($start,$step,$names,$data) = RRDs::fetch("$Dir/$packetfile","MAX","-s",$StartTime,"-e",$EndTime); my $ds; foreach my $line (@$data) { $ds=0; foreach my $val (@$line) { if (defined($val)) { if ($Debug) { print " ", scalar localtime($start); print " $packetfile "; printf("%12.1f %d\n", $val, $ds); } # we only want to record DS 0, which is inbound, because we # only record inbound errors. if ($ds eq 0) { $packetcounter{$start} = $val; } $TotalDPs++; } else { # datapoint is missing $MissingDPs++; if ($Debug) { print " ", scalar localtime($start); print " $packetfile "; printf " missing\n"; } } $ds=1; } $start += $step; } } # end elsif } if ($Verbose || $Debug) { my $t = ($filenum % 500); $i = $filenum; if ($filenum eq 0) { $i = $filenum+1; } if ($t eq 0) { print "processing file $i of $filemax\n"; } #print "$t \n"; $filenum++; if ($Debug) { print "Threshold $Threshold, Avg_threshold $Avg_threshold\n"; } } my ($start,$step,$names,$data) = RRDs::fetch("$Dir/$file","MAX","-s",$StartTime,"-e",$EndTime); # if there was a problem opening the file: if (! defined($start) ) { next; } if ($Debug) { print "file $file\n"; print "Start: ", scalar localtime($start), " ($start)\n"; print "Step size: $step seconds\n"; print "DS names: ", join (", ", @$names)."\n"; print "Data points: ", $#$data + 1, "\n"; print "Data:\n"; } my $maxval=0; my $maxvaltime=0; my $averageval=0; my $datapoints=0; my $threshhold_trigger=0; foreach my $line (@$data) { foreach my $val (@$line) { $TotalDPs++; if (defined($val)) { # update max value seen if ($val > $maxval) { $maxval = $val; $maxvaltime = scalar(localtime($start)); } # update average value $averageval = $averageval + $val; $datapoints++; if ($Debug) { print " ", scalar localtime($start); print " $file "; printf "%12.1f ", $val; #print " $datapoints"; } # if the value exceeds our threshold if ($Percentage && ($file =~ m/bytes.rrd/)) { my $perc = ($val / $theoretical_max * 100); if ($perc > $Threshold) { $threshhold_trigger=1; } if ($Debug) { printf(" %2d", $perc); } } elsif ($Percentage && ($file =~ m/errs.rrd/)) { if (defined($packetcounter{$start})) { if ($packetcounter{$start} > 0) { my $perc = ($val / $packetcounter{$start} * 100); if ($perc > $Threshold) { $threshhold_trigger=1; } if ($Debug) { printf(" %2d", $perc); } } } } elsif ($val > $Threshold) { $threshhold_trigger=1; } if ($Debug) { print "\n" }; } else { # datapoint is missing $MissingDPs++; if ($Debug) { print " ", scalar localtime($start); print " $file "; print " missing\n"; } } } $start += $step; } if ($threshhold_trigger) { $averageval = $averageval / $datapoints; if ($averageval > $Avg_threshold) { $ErrorsFound++; my $target = $file; $target =~ s/.rrd//; if ($Bits) { $maxval = $maxval * 8; $averageval = $averageval * 8; } # Print out the email header, if we haven't allready if ((! $Have_printed_header) && $send_mail ) { open(MAIL, "| $Mailcmd"); printHeader(); $Have_printed_header=1; } if ($send_mail) { printf MAIL ("%-45s %10d/sec max, %10d/sec avg\n", $file, $maxval, $averageval); if ($file =~ m/bytes.rrd/) { print MAIL $BaseURL . "?dsType=bytes&log=" . $target . " \n \n"; } else { print MAIL $BaseURL . "?log=" . $target . " \n \n"; } } else { printf ("\n%-45s %10d/sec max, %10d/sec avg\n", $file, $maxval, $averageval); if ($file =~ m/bytes.rrd/) { print $BaseURL . "?dsType=bytes&log=" . $target . " \n \n"; } else { print $BaseURL . "?log=" . $target . " \n \n"; } } } } } my $dropped; my $percent; if ($TotalDPs ne 0) { $dropped=$MissingDPs/$TotalDPs *100; $percent = sprintf("\nLooked at $filemax files containing $TotalDPs datapoints of which $MissingDPs (%.1f%%) datapoints were missing\n",$dropped); } if ($ErrorsFound && $send_mail) { print MAIL $percent; close(MAIL); } exit(0); sub printHeader() { my $subject='Results from process_rrd'; my $timestr= "Start time: " . localtime($StartTime) . ", End time: " . localtime($EndTime); if (defined($opt_title)) { $timestr = $opt_title . "\n$timestr"; $subject = $opt_title; } my $ttext = $Threshold; my $atext = $Avg_threshold; if ($Percentage) { $ttext = "$Threshold%"; $atext = "$Avg_threshold%"; } my $bittext="\n"; if ($Bits) { $bittext = "Rates are in bits/sec \n"; } my $HeaderText=< Reply-to: Subject: $subject To: $emailaddr Results for running process_rrd.pl on $DeviceRegex with options: max threshold: $ttext, avg threshold: $atext $bittext $timestr -------------------------------------------------------------------------------------- EOM print MAIL $HeaderText; } sub usage { my $text=< number of hours in history to search (default 1) -r pattern of filenames to match (default 'errs'), .rrd is assumed -m max threshold (default 1) -a average threshold (default 1) -p thresholds specified as percentages instead of raw values -e email output to this address --rrdpath path to the rrd files (default: $Dir) --mrtgcfg mrtg's config file (default: $Allcnf) --nomrtg don't use mrtg conf file, just scan the directory for rrd files --title title string to put on reports --guessmax guess max rate from filename instead of rrd file's max value field --url URL of 14all.cgi (default: $BaseURL) EOM print $text; }