summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Paquier2025-03-05 01:17:39 +0000
committerMichael Paquier2025-03-05 01:17:39 +0000
commitf4694e0f35b218238cbc87bcf8f8f5c6639bb1d4 (patch)
tree58fc05ad2d329321698ade9f0360faaffecb4589
parent54d23601b978d2552696fb7fe35ae5d6102ea2cb (diff)
Fix some gaps in pg_stat_io with WAL receiver and WAL summarizer
The WAL receiver and WAL summarizer processes gain each one a call to pgstat_report_wal(), to make sure that they report their WAL statistics to pgstats, gathering data for pg_stat_io. In the WAL receiver, the stats reports are timed with status updates sent to the primary, that depend on wal_receiver_status_interval and wal_receiver_timeout. This is a conservative choice, but perhaps we could be more aggressive with the frequency of the stats reports. An interesting historical fact is that the WAL receiver does writes and syncs of WAL, but it has never reported its statistics to pgstats in pg_stat_wal. In the WAL summarizer, the stats reports are done each time the process waits for WAL. While on it, pg_stat_io is adjusted so as these two processes do not report any rows when IOObject is not WAL, making the view easier to use with less rows. Two tests are added in TAP, checking statistics for the WAL summarizer and the WAL receiver. Status updates in the WAL receiver are currently possible in the recovery test 001_stream_rep.pl. Reviewed-by: Bertrand Drouvot <[email protected]> Discussion: https://postgr.es/m/[email protected]
-rw-r--r--src/backend/postmaster/walsummarizer.c4
-rw-r--r--src/backend/replication/walreceiver.c10
-rw-r--r--src/backend/utils/activity/pgstat_io.c12
-rw-r--r--src/bin/pg_walsummary/t/002_blocks.pl7
-rw-r--r--src/test/recovery/t/001_stream_rep.pl7
5 files changed, 39 insertions, 1 deletions
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index f4d61c1f3bb..4b95f6a5213 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -33,6 +33,7 @@
#include "common/blkreftable.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
+#include "pgstat.h"
#include "postmaster/auxprocess.h"
#include "postmaster/interrupt.h"
#include "postmaster/walsummarizer.h"
@@ -1636,6 +1637,9 @@ summarizer_wait_for_wal(void)
sleep_quanta -= pages_read_since_last_sleep;
}
+ /* Report pending statistics to the cumulative stats system. */
+ pgstat_report_wal(false);
+
/* OK, now sleep. */
(void) WaitLatch(MyLatch,
WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 82f7302ff9f..83129cb92af 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -584,6 +584,16 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len)
bool requestReply = false;
/*
+ * Report pending statistics to the cumulative stats
+ * system. This location is useful for the report as it
+ * is not within a tight loop in the WAL receiver, to
+ * avoid bloating pgstats with requests, while also making
+ * sure that the reports happen each time a status update
+ * is sent.
+ */
+ pgstat_report_wal(false);
+
+ /*
* Check if time since last receive from primary has
* reached the configured limit.
*/
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index ba11545a17f..eb575025596 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -435,13 +435,23 @@ pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
*/
no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER ||
- bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP;
+ bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP ||
+ bktype == B_WAL_SUMMARIZER || bktype == B_WAL_WRITER ||
+ bktype == B_WAL_RECEIVER;
if (no_temp_rel && io_context == IOCONTEXT_NORMAL &&
io_object == IOOBJECT_TEMP_RELATION)
return false;
/*
+ * Some BackendTypes only perform IO under IOOBJECT_WAL, hence exclude all
+ * rows for all the other objects for these.
+ */
+ if ((bktype == B_WAL_SUMMARIZER || bktype == B_WAL_RECEIVER ||
+ bktype == B_WAL_WRITER) && io_object != IOOBJECT_WAL)
+ return false;
+
+ /*
* Some BackendTypes do not currently perform any IO in certain
* IOContexts, and, while it may not be inherently incorrect for them to
* do so, excluding those rows from the view makes the view easier to use.
diff --git a/src/bin/pg_walsummary/t/002_blocks.pl b/src/bin/pg_walsummary/t/002_blocks.pl
index 27f29a3b0c6..270332780a4 100644
--- a/src/bin/pg_walsummary/t/002_blocks.pl
+++ b/src/bin/pg_walsummary/t/002_blocks.pl
@@ -46,6 +46,13 @@ SELECT EXISTS (
EOM
ok($result, "WAL summarization caught up after insert");
+# The WAL summarizer should have generated some IO statistics.
+my $stats_reads = $node1->safe_psql(
+ 'postgres',
+ qq{SELECT sum(reads) > 0 FROM pg_stat_io
+ WHERE backend_type = 'walsummarizer' AND object = 'wal'});
+is($stats_reads, 't', "WAL summarizer generates statistics for WAL reads");
+
# Find the highest LSN that is summarized on disk.
my $summarized_lsn = $node1->safe_psql('postgres', <<EOM);
SELECT MAX(end_lsn) AS summarized_lsn FROM pg_available_wal_summaries()
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index ee57d234c86..3945f00ab88 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -506,6 +506,13 @@ $node_standby_2->append_conf('postgresql.conf', "primary_slot_name = ''");
$node_standby_2->enable_streaming($node_primary);
$node_standby_2->reload;
+# The WAL receiver should have generated some IO statistics.
+my $stats_reads = $node_standby_1->safe_psql(
+ 'postgres',
+ qq{SELECT sum(writes) > 0 FROM pg_stat_io
+ WHERE backend_type = 'walreceiver' AND object = 'wal'});
+is($stats_reads, 't', "WAL receiver generates statistics for WAL writes");
+
# be sure do not streaming from cascade
$node_standby_1->stop;