summaryrefslogtreecommitdiff
path: root/check_postgres.pl
diff options
context:
space:
mode:
authorGreg Sabino Mullane2009-07-10 15:58:58 +0000
committerGreg Sabino Mullane2009-07-10 15:58:58 +0000
commit85e4d6872f9f0d43b6a512737cfc424705b2fe2f (patch)
treed7c117623a47a6358c5200cfd69e65807c6ca096 /check_postgres.pl
parent5b8e2de510962e6b42c941cea96138cd15534b59 (diff)
Allow the check_backends check to detect 'too many clients' message and return a critical.
Thanks to Jürgen Schulz-Brüssel for the idea. Add 'fatalregex' to run_command to allow known error messages through. Add 'quickreturn' for piling up database handles.
Diffstat (limited to 'check_postgres.pl')
-rwxr-xr-xcheck_postgres.pl25
1 files changed, 20 insertions, 5 deletions
diff --git a/check_postgres.pl b/check_postgres.pl
index d96f88558..00a69ca77 100755
--- a/check_postgres.pl
+++ b/check_postgres.pl
@@ -84,6 +84,7 @@ our @get_methods = (
## no critic (RequireInterpolationOfMetachars)
our %msg = (
'en' => {
+ 'backends-fatal' => q{Could not connect: too many connections},
'backends-mrtg' => q{DB=$1 Max connections=$2},
'backends-msg' => q{$1 of $2 connections ($3%)},
'backends-nomax' => q{Could not determine max_connections},
@@ -277,14 +278,15 @@ our %msg = (
'version-ok' => q{version $1},
},
'fr' => {
+'backends-fatal' => q{Could not connect: too many connections},
'backends-mrtg' => q{DB=$1 Connexions maximum=$2},
'backends-msg' => q{$1 connexions sur $2 ($3%)},
'backends-nomax' => q{N'a pas pu déterminer max_connections},
'backends-oknone' => q{Aucune connexion},
'backends-users' => q{$1 pour le nombre d'utilisateurs doit être un nombre ou un pourcentage},
-'bloat-index' => q{(db $1) index $2 lignes:$3 pages:$4 devrait être:$5 ($6X) octets perdus:$7 ($8)},
+ 'bloat-index' => q{(db $1) index $2 lignes:$3 pages:$4 devrait être:$5 ($6X) octets perdus:$7 ($8)},
'bloat-nomin' => q{aucune relation n'atteint le critère minimum de fragmentation},
-'bloat-table' => q{(db $1) table $2.$3 lignes:$4 pages:$5 devrait être:$6 ($7X) place perdue:$8 ($9)},
+ 'bloat-table' => q{(db $1) table $2.$3 lignes:$4 pages:$5 devrait être:$6 ($7X) place perdue:$8 ($9)},
'checkpoint-baddir' => q{data_directory invalide : "$1"},
'checkpoint-baddir2' => q{pg_controldata could not read the given data directory: "$1"},
'checkpoint-badver' => q{Failed to run pg_controldata - probably the wrong version},
@@ -1396,6 +1398,7 @@ sub run_command {
## Run a command string against each of our databases using psql
## Optional args in a hashref:
## "failok" - don't report if we failed
+ ## "fatalregex" - allow this FATAL regex through
## "target" - use this targetlist instead of generating one
## "timeout" - change the timeout from the default of $opt{timeout}
## "regex" - the query must match this or we throw an error
@@ -1627,7 +1630,7 @@ sub run_command {
if ($err =~ /Timed out/) {
ndie msg('runcommand-timeout', $timeout);
}
- else {ndie $res;
+ else {
ndie msg('runcommand-err');
}
}
@@ -1647,7 +1650,13 @@ sub run_command {
}
if ($db->{error} =~ /FATAL/) {
- ndie "$db->{error}";
+ if ($db->{error} =~ /$arg->{fatalregex}/) {
+ $info->{fatalregex} = $db->{error};
+ next;
+ }
+ else {
+ ndie "$db->{error}";
+ }
}
elsif ($db->{error} =~ /statement timeout/) {
@@ -2237,7 +2246,13 @@ sub check_backends {
my $GROUPBY = q{GROUP BY 2,3};
$SQL = "SELECT COUNT(datid), ($MAXSQL), d.datname FROM pg_database d ".
"LEFT JOIN pg_stat_activity s ON (s.datid = d.oid) $NOIDLE $GROUPBY ORDER BY datname";
- my $info = run_command($SQL, {regex => qr[\s*\d+ \| \d+\s+\|] } );
+ my $info = run_command($SQL, {regex => qr[\s*\d+ \| \d+\s+\|], fatalregex => 'too many clients' } );
+
+ ## If we cannot connect because of too many clients, we treat as a critical error
+ if (exists $info->{fatalregex} and $info->{fatalregex} =~ /too many clients/) {
+ add_critical msg('backends-fatal');
+ return;
+ }
## There may be no entries returned if we catch pg_stat_activity at the right
## moment in older versions of Postgres