Skip to content

Commit 5891c7a

Browse files
committed
pgstat: store statistics in shared memory.
Previously the statistics collector received statistics updates via UDP and shared statistics data by writing them out to temporary files regularly. These files can reach tens of megabytes and are written out up to twice a second. This has repeatedly prevented us from adding additional useful statistics. Now statistics are stored in shared memory. Statistics for variable-numbered objects are stored in a dshash hashtable (backed by dynamic shared memory). Fixed-numbered stats are stored in plain shared memory. The header for pgstat.c contains an overview of the architecture. The stats collector is not needed anymore, remove it. By utilizing the transactional statistics drop infrastructure introduced in a prior commit statistics entries cannot "leak" anymore. Previously leaked statistics were dropped by pgstat_vacuum_stat(), called from [auto-]vacuum. On systems with many small relations pgstat_vacuum_stat() could be quite expensive. Now that replicas drop statistics entries for dropped objects, it is not necessary anymore to reset stats when starting from a cleanly shut down replica. Subsequent commits will perform some further code cleanup, adapt docs and add tests. Bumps PGSTAT_FILE_FORMAT_ID. Author: Kyotaro Horiguchi <[email protected]> Author: Andres Freund <[email protected]> Author: Melanie Plageman <[email protected]> Reviewed-By: Andres Freund <[email protected]> Reviewed-By: Thomas Munro <[email protected]> Reviewed-By: Justin Pryzby <[email protected]> Reviewed-By: "David G. Johnston" <[email protected]> Reviewed-By: Tomas Vondra <[email protected]> (in a much earlier version) Reviewed-By: Arthur Zakirov <[email protected]> (in a much earlier version) Reviewed-By: Antonin Houska <[email protected]> (in a much earlier version) Discussion: https://postgr.es/m/[email protected] Discussion: https://postgr.es/m/[email protected] Discussion: https://postgr.es/m/[email protected]
1 parent be902e2 commit 5891c7a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+4253
-5343
lines changed

doc/src/sgml/monitoring.sgml

+14-5
Original file line numberDiff line numberDiff line change
@@ -1110,10 +1110,6 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
11101110
<entry><literal>LogicalLauncherMain</literal></entry>
11111111
<entry>Waiting in main loop of logical replication launcher process.</entry>
11121112
</row>
1113-
<row>
1114-
<entry><literal>PgStatMain</literal></entry>
1115-
<entry>Waiting in main loop of statistics collector process.</entry>
1116-
</row>
11171113
<row>
11181114
<entry><literal>RecoveryWalStream</literal></entry>
11191115
<entry>Waiting in main loop of startup process for WAL to arrive, during
@@ -2115,6 +2111,18 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
21152111
<entry>Waiting to access the list of predicate locks held by
21162112
serializable transactions.</entry>
21172113
</row>
2114+
<row>
2115+
<entry><literal>PgStatsDSA</literal></entry>
2116+
<entry>Waiting for stats dynamic shared memory allocator access</entry>
2117+
</row>
2118+
<row>
2119+
<entry><literal>PgStatsHash</literal></entry>
2120+
<entry>Waiting for stats shared memory hash table access</entry>
2121+
</row>
2122+
<row>
2123+
<entry><literal>PgStatsData</literal></entry>
2124+
<entry>Waiting for shared memory stats data access</entry>
2125+
</row>
21182126
<row>
21192127
<entry><literal>SerializableXactHash</literal></entry>
21202128
<entry>Waiting to read or update information about serializable
@@ -5142,7 +5150,8 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
51425150
<returnvalue>timestamp with time zone</returnvalue>
51435151
</para>
51445152
<para>
5145-
Returns the timestamp of the current statistics snapshot.
5153+
Returns the timestamp of the current statistics snapshot, or NULL if
5154+
no statistics snapshot has been taken.
51465155
</para></entry>
51475156
</row>
51485157

src/backend/access/transam/xlog.c

+27-12
Original file line numberDiff line numberDiff line change
@@ -1842,7 +1842,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
18421842
WriteRqst.Flush = 0;
18431843
XLogWrite(WriteRqst, tli, false);
18441844
LWLockRelease(WALWriteLock);
1845-
WalStats.m_wal_buffers_full++;
1845+
PendingWalStats.wal_buffers_full++;
18461846
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
18471847
}
18481848
/* Re-acquire WALBufMappingLock and retry */
@@ -2200,10 +2200,10 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
22002200

22012201
INSTR_TIME_SET_CURRENT(duration);
22022202
INSTR_TIME_SUBTRACT(duration, start);
2203-
WalStats.m_wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
2203+
PendingWalStats.wal_write_time += INSTR_TIME_GET_MICROSEC(duration);
22042204
}
22052205

2206-
WalStats.m_wal_write++;
2206+
PendingWalStats.wal_write++;
22072207

22082208
if (written <= 0)
22092209
{
@@ -4877,6 +4877,7 @@ StartupXLOG(void)
48774877
XLogCtlInsert *Insert;
48784878
CheckPoint checkPoint;
48794879
bool wasShutdown;
4880+
bool didCrash;
48804881
bool haveTblspcMap;
48814882
bool haveBackupLabel;
48824883
XLogRecPtr EndOfLog;
@@ -4994,7 +4995,10 @@ StartupXLOG(void)
49944995
{
49954996
RemoveTempXlogFiles();
49964997
SyncDataDirectory();
4998+
didCrash = true;
49974999
}
5000+
else
5001+
didCrash = false;
49985002

49995003
/*
50005004
* Prepare for WAL recovery if needed.
@@ -5106,6 +5110,22 @@ StartupXLOG(void)
51065110
*/
51075111
restoreTwoPhaseData();
51085112

5113+
/*
5114+
* When starting with crash recovery, reset pgstat data - it might not be
5115+
* valid. Otherwise restore pgstat data. It's safe to do this here,
5116+
* because postmaster will not yet have started any other processes.
5117+
*
5118+
* NB: Restoring replication slot stats relies on slot state to have
5119+
* already been restored from disk.
5120+
*
5121+
* TODO: With a bit of extra work we could just start with a pgstat file
5122+
* associated with the checkpoint redo location we're starting from.
5123+
*/
5124+
if (didCrash)
5125+
pgstat_discard_stats();
5126+
else
5127+
pgstat_restore_stats();
5128+
51095129
lastFullPageWrites = checkPoint.fullPageWrites;
51105130

51115131
RedoRecPtr = XLogCtl->RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
@@ -5180,11 +5200,6 @@ StartupXLOG(void)
51805200
LocalMinRecoveryPointTLI = 0;
51815201
}
51825202

5183-
/*
5184-
* Reset pgstat data, because it may be invalid after recovery.
5185-
*/
5186-
pgstat_reset_all();
5187-
51885203
/* Check that the GUCs used to generate the WAL allow recovery */
51895204
CheckRequiredParameterValues();
51905205

@@ -6081,8 +6096,8 @@ LogCheckpointEnd(bool restartpoint)
60816096
CheckpointStats.ckpt_sync_end_t);
60826097

60836098
/* Accumulate checkpoint timing summary data, in milliseconds. */
6084-
PendingCheckpointerStats.m_checkpoint_write_time += write_msecs;
6085-
PendingCheckpointerStats.m_checkpoint_sync_time += sync_msecs;
6099+
PendingCheckpointerStats.checkpoint_write_time += write_msecs;
6100+
PendingCheckpointerStats.checkpoint_sync_time += sync_msecs;
60866101

60876102
/*
60886103
* All of the published timing statistics are accounted for. Only
@@ -8009,10 +8024,10 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
80098024

80108025
INSTR_TIME_SET_CURRENT(duration);
80118026
INSTR_TIME_SUBTRACT(duration, start);
8012-
WalStats.m_wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
8027+
PendingWalStats.wal_sync_time += INSTR_TIME_GET_MICROSEC(duration);
80138028
}
80148029

8015-
WalStats.m_wal_sync++;
8030+
PendingWalStats.wal_sync++;
80168031
}
80178032

80188033
/*

src/backend/commands/vacuum.c

-7
Original file line numberDiff line numberDiff line change
@@ -351,13 +351,6 @@ vacuum(List *relations, VacuumParams *params,
351351
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
352352
errmsg("PROCESS_TOAST required with VACUUM FULL")));
353353

354-
/*
355-
* Send info about dead objects to the cumulative stats system, unless
356-
* we are in autovacuum --- autovacuum.c does this for itself.
357-
*/
358-
if ((params->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess())
359-
pgstat_vacuum_stat();
360-
361354
/*
362355
* Create special memory context for cross-transaction storage.
363356
*

src/backend/commands/vacuumparallel.c

+2
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,15 @@
2828

2929
#include "access/amapi.h"
3030
#include "access/table.h"
31+
#include "access/xact.h"
3132
#include "catalog/index.h"
3233
#include "commands/vacuum.h"
3334
#include "optimizer/paths.h"
3435
#include "pgstat.h"
3536
#include "storage/bufmgr.h"
3637
#include "tcop/tcopprot.h"
3738
#include "utils/lsyscache.h"
39+
#include "utils/rel.h"
3840

3941
/*
4042
* DSM keys for parallel vacuum. Unlike other parallel execution code, since

0 commit comments

Comments
 (0)