Log more info when wait-for-catchup tests time out.
authorTom Lane <[email protected]>
Mon, 12 Aug 2024 17:18:36 +0000 (13:18 -0400)
committerTom Lane <[email protected]>
Mon, 12 Aug 2024 17:18:36 +0000 (13:18 -0400)
Cluster.pm's wait_for_catchup and allied subroutines don't provide
enough information to diagnose the problem when a wait times out.
In hopes of debugging some intermittent buildfarm failures, let's
dump the ending state of the relevant system view when that happens.

Add this to v17 too, but not stable branches.

Discussion: https://postgr.es/m/352068.1723422725@sss.pgh.pa.us

src/test/perl/PostgreSQL/Test/Cluster.pm

index 32ee98aebccd1700d2341d5ca59d8f73ada449a5..fe6ebf10f76cfe002859f8eebe14377c89dfcd72 100644 (file)
@@ -2948,6 +2948,11 @@ sub wait_for_catchup
        }
        else
        {
+           # Fetch additional detail for debugging purposes
+           $query = qq[SELECT * FROM pg_catalog.pg_stat_replication];
+           my $details = $self->safe_psql('postgres', $query);
+           diag qq(Last pg_stat_replication contents:
+${details});
            croak "timed out waiting for catchup";
        }
    }
@@ -3015,8 +3020,15 @@ sub wait_for_slot_catchup
      . $self->name . "\n";
    my $query =
      qq[SELECT '$target_lsn' <= ${mode}_lsn FROM pg_catalog.pg_replication_slots WHERE slot_name = '$slot_name';];
-   $self->poll_query_until('postgres', $query)
-     or croak "timed out waiting for catchup";
+   if (!$self->poll_query_until('postgres', $query))
+   {
+       # Fetch additional detail for debugging purposes
+       $query = qq[SELECT * FROM pg_catalog.pg_replication_slots];
+       my $details = $self->safe_psql('postgres', $query);
+       diag qq(Last pg_replication_slots contents:
+${details});
+       croak "timed out waiting for catchup";
+   }
    print "done\n";
    return;
 }
@@ -3051,8 +3063,15 @@ sub wait_for_subscription_sync
    print "Waiting for all subscriptions in \"$name\" to synchronize data\n";
    my $query =
      qq[SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');];
-   $self->poll_query_until($dbname, $query)
-     or croak "timed out waiting for subscriber to synchronize data";
+   if (!$self->poll_query_until($dbname, $query))
+   {
+       # Fetch additional detail for debugging purposes
+       $query = qq[SELECT * FROM pg_subscription_rel];
+       my $details = $self->safe_psql($dbname, $query);
+       diag qq(Last pg_subscription_rel contents:
+${details});
+       croak "timed out waiting for subscriber to synchronize data";
+   }
 
    # Then, wait for the replication to catchup if required.
    if (defined($publisher))