Skip to content

Commit 623a9ba

Browse files
committed
snapshot scalability: cache snapshots using a xact completion counter.
Previous commits made it faster/more scalable to compute snapshots. But not building a snapshot is still faster. Now that GetSnapshotData() does not maintain RecentGlobal* anymore, that is actually not too hard: This commit introduces xactCompletionCount, which tracks the number of top-level transactions with xids (i.e. which may have modified the database) that completed in some form since the start of the server. We can avoid rebuilding the snapshot's contents whenever the current xactCompletionCount is the same as it was when the snapshot was originally built. Currently this check happens while holding ProcArrayLock. While it's likely possible to perform the check without acquiring ProcArrayLock, it seems better to do that separately / later, some careful analysis is required. Even with the lock this is a significant win on its own. On a smaller two socket machine this gains another ~1.03x, on a larger machine the effect is roughly double (earlier patch version tested though). If we were able to safely avoid the lock there'd be another significant gain on top of that. Author: Andres Freund <[email protected]> Reviewed-By: Robert Haas <[email protected]> Reviewed-By: Thomas Munro <[email protected]> Reviewed-By: David Rowley <[email protected]> Discussion: https://postgr.es/m/[email protected]
1 parent 51300b4 commit 623a9ba

File tree

5 files changed

+126
-20
lines changed

5 files changed

+126
-20
lines changed

src/backend/replication/logical/snapbuild.c

+1
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,7 @@ SnapBuildBuildSnapshot(SnapBuild *builder)
524524
snapshot->curcid = FirstCommandId;
525525
snapshot->active_count = 0;
526526
snapshot->regd_count = 0;
527+
snapshot->snapXactCompletionCount = 0;
527528

528529
return snapshot;
529530
}

src/backend/storage/ipc/procarray.c

+105-20
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ CreateSharedProcArray(void)
407407
procArray->lastOverflowedXid = InvalidTransactionId;
408408
procArray->replication_slot_xmin = InvalidTransactionId;
409409
procArray->replication_slot_catalog_xmin = InvalidTransactionId;
410+
ShmemVariableCache->xactCompletionCount = 1;
410411
}
411412

412413
allProcs = ProcGlobal->allProcs;
@@ -534,6 +535,9 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid)
534535
/* Advance global latestCompletedXid while holding the lock */
535536
MaintainLatestCompletedXid(latestXid);
536537

538+
/* Same with xactCompletionCount */
539+
ShmemVariableCache->xactCompletionCount++;
540+
537541
ProcGlobal->xids[proc->pgxactoff] = 0;
538542
ProcGlobal->subxidStates[proc->pgxactoff].overflowed = false;
539543
ProcGlobal->subxidStates[proc->pgxactoff].count = 0;
@@ -667,6 +671,7 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
667671
{
668672
size_t pgxactoff = proc->pgxactoff;
669673

674+
Assert(LWLockHeldByMe(ProcArrayLock));
670675
Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff]));
671676
Assert(ProcGlobal->xids[pgxactoff] == proc->xid);
672677

@@ -698,6 +703,9 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
698703

699704
/* Also advance global latestCompletedXid while holding the lock */
700705
MaintainLatestCompletedXid(latestXid);
706+
707+
/* Same with xactCompletionCount */
708+
ShmemVariableCache->xactCompletionCount++;
701709
}
702710

703711
/*
@@ -1916,6 +1924,93 @@ GetMaxSnapshotSubxidCount(void)
19161924
return TOTAL_MAX_CACHED_SUBXIDS;
19171925
}
19181926

1927+
/*
1928+
* Initialize old_snapshot_threshold specific parts of a newly build snapshot.
1929+
*/
1930+
static void
1931+
GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
1932+
{
1933+
if (!OldSnapshotThresholdActive())
1934+
{
1935+
/*
1936+
* If not using "snapshot too old" feature, fill related fields with
1937+
* dummy values that don't require any locking.
1938+
*/
1939+
snapshot->lsn = InvalidXLogRecPtr;
1940+
snapshot->whenTaken = 0;
1941+
}
1942+
else
1943+
{
1944+
/*
1945+
* Capture the current time and WAL stream location in case this
1946+
* snapshot becomes old enough to need to fall back on the special
1947+
* "old snapshot" logic.
1948+
*/
1949+
snapshot->lsn = GetXLogInsertRecPtr();
1950+
snapshot->whenTaken = GetSnapshotCurrentTimestamp();
1951+
MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
1952+
}
1953+
}
1954+
1955+
/*
1956+
* Helper function for GetSnapshotData() that checks if the bulk of the
1957+
* visibility information in the snapshot is still valid. If so, it updates
1958+
* the fields that need to change and returns true. Otherwise it returns
1959+
* false.
1960+
*
1961+
* This very likely can be evolved to not need ProcArrayLock held (at very
1962+
* least in the case we already hold a snapshot), but that's for another day.
1963+
*/
1964+
static bool
1965+
GetSnapshotDataReuse(Snapshot snapshot)
1966+
{
1967+
uint64 curXactCompletionCount;
1968+
1969+
Assert(LWLockHeldByMe(ProcArrayLock));
1970+
1971+
if (unlikely(snapshot->snapXactCompletionCount == 0))
1972+
return false;
1973+
1974+
curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
1975+
if (curXactCompletionCount != snapshot->snapXactCompletionCount)
1976+
return false;
1977+
1978+
/*
1979+
* If the current xactCompletionCount is still the same as it was at the
1980+
* time the snapshot was built, we can be sure that rebuilding the
1981+
* contents of the snapshot the hard way would result in the same snapshot
1982+
* contents:
1983+
*
1984+
* As explained in transam/README, the set of xids considered running by
1985+
* GetSnapshotData() cannot change while ProcArrayLock is held. Snapshot
1986+
* contents only depend on transactions with xids and xactCompletionCount
1987+
* is incremented whenever a transaction with an xid finishes (while
1988+
* holding ProcArrayLock) exclusively). Thus the xactCompletionCount check
1989+
* ensures we would detect if the snapshot would have changed.
1990+
*
1991+
* As the snapshot contents are the same as it was before, it is is safe
1992+
* to re-enter the snapshot's xmin into the PGPROC array. None of the rows
1993+
* visible under the snapshot could already have been removed (that'd
1994+
* require the set of running transactions to change) and it fulfills the
1995+
* requirement that concurrent GetSnapshotData() calls yield the same
1996+
* xmin.
1997+
*/
1998+
if (!TransactionIdIsValid(MyProc->xmin))
1999+
MyProc->xmin = TransactionXmin = snapshot->xmin;
2000+
2001+
RecentXmin = snapshot->xmin;
2002+
Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin));
2003+
2004+
snapshot->curcid = GetCurrentCommandId(false);
2005+
snapshot->active_count = 0;
2006+
snapshot->regd_count = 0;
2007+
snapshot->copied = false;
2008+
2009+
GetSnapshotDataInitOldSnapshot(snapshot);
2010+
2011+
return true;
2012+
}
2013+
19192014
/*
19202015
* GetSnapshotData -- returns information about running transactions.
19212016
*
@@ -1963,6 +2058,7 @@ GetSnapshotData(Snapshot snapshot)
19632058
TransactionId oldestxid;
19642059
int mypgxactoff;
19652060
TransactionId myxid;
2061+
uint64 curXactCompletionCount;
19662062

19672063
TransactionId replication_slot_xmin = InvalidTransactionId;
19682064
TransactionId replication_slot_catalog_xmin = InvalidTransactionId;
@@ -2007,12 +2103,19 @@ GetSnapshotData(Snapshot snapshot)
20072103
*/
20082104
LWLockAcquire(ProcArrayLock, LW_SHARED);
20092105

2106+
if (GetSnapshotDataReuse(snapshot))
2107+
{
2108+
LWLockRelease(ProcArrayLock);
2109+
return snapshot;
2110+
}
2111+
20102112
latest_completed = ShmemVariableCache->latestCompletedXid;
20112113
mypgxactoff = MyProc->pgxactoff;
20122114
myxid = other_xids[mypgxactoff];
20132115
Assert(myxid == MyProc->xid);
20142116

20152117
oldestxid = ShmemVariableCache->oldestXid;
2118+
curXactCompletionCount = ShmemVariableCache->xactCompletionCount;
20162119

20172120
/* xmax is always latestCompletedXid + 1 */
20182121
xmax = XidFromFullTransactionId(latest_completed);
@@ -2266,6 +2369,7 @@ GetSnapshotData(Snapshot snapshot)
22662369
snapshot->xcnt = count;
22672370
snapshot->subxcnt = subcount;
22682371
snapshot->suboverflowed = suboverflowed;
2372+
snapshot->snapXactCompletionCount = curXactCompletionCount;
22692373

22702374
snapshot->curcid = GetCurrentCommandId(false);
22712375

@@ -2277,26 +2381,7 @@ GetSnapshotData(Snapshot snapshot)
22772381
snapshot->regd_count = 0;
22782382
snapshot->copied = false;
22792383

2280-
if (old_snapshot_threshold < 0)
2281-
{
2282-
/*
2283-
* If not using "snapshot too old" feature, fill related fields with
2284-
* dummy values that don't require any locking.
2285-
*/
2286-
snapshot->lsn = InvalidXLogRecPtr;
2287-
snapshot->whenTaken = 0;
2288-
}
2289-
else
2290-
{
2291-
/*
2292-
* Capture the current time and WAL stream location in case this
2293-
* snapshot becomes old enough to need to fall back on the special
2294-
* "old snapshot" logic.
2295-
*/
2296-
snapshot->lsn = GetXLogInsertRecPtr();
2297-
snapshot->whenTaken = GetSnapshotCurrentTimestamp();
2298-
MaintainOldSnapshotTimeMapping(snapshot->whenTaken, xmin);
2299-
}
2384+
GetSnapshotDataInitOldSnapshot(snapshot);
23002385

23012386
return snapshot;
23022387
}

src/backend/utils/time/snapmgr.c

+4
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,8 @@ SetTransactionSnapshot(Snapshot sourcesnap, VirtualTransactionId *sourcevxid,
597597
CurrentSnapshot->takenDuringRecovery = sourcesnap->takenDuringRecovery;
598598
/* NB: curcid should NOT be copied, it's a local matter */
599599

600+
CurrentSnapshot->snapXactCompletionCount = 0;
601+
600602
/*
601603
* Now we have to fix what GetSnapshotData did with MyProc->xmin and
602604
* TransactionXmin. There is a race condition: to make sure we are not
@@ -672,6 +674,7 @@ CopySnapshot(Snapshot snapshot)
672674
newsnap->regd_count = 0;
673675
newsnap->active_count = 0;
674676
newsnap->copied = true;
677+
newsnap->snapXactCompletionCount = 0;
675678

676679
/* setup XID array */
677680
if (snapshot->xcnt > 0)
@@ -2209,6 +2212,7 @@ RestoreSnapshot(char *start_address)
22092212
snapshot->curcid = serialized_snapshot.curcid;
22102213
snapshot->whenTaken = serialized_snapshot.whenTaken;
22112214
snapshot->lsn = serialized_snapshot.lsn;
2215+
snapshot->snapXactCompletionCount = 0;
22122216

22132217
/* Copy XIDs, if present. */
22142218
if (serialized_snapshot.xcnt > 0)

src/include/access/transam.h

+9
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,15 @@ typedef struct VariableCacheData
231231
FullTransactionId latestCompletedXid; /* newest full XID that has
232232
* committed or aborted */
233233

234+
/*
235+
* Number of top-level transactions with xids (i.e. which may have
236+
* modified the database) that completed in some form since the start of
237+
* the server. This currently is solely used to check whether
238+
* GetSnapshotData() needs to recompute the contents of the snapshot, or
239+
* not. There are likely other users of this. Always above 1.
240+
*/
241+
uint64 xactCompletionCount;
242+
234243
/*
235244
* These fields are protected by XactTruncationLock
236245
*/

src/include/utils/snapshot.h

+7
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,13 @@ typedef struct SnapshotData
207207

208208
TimestampTz whenTaken; /* timestamp when snapshot was taken */
209209
XLogRecPtr lsn; /* position in the WAL stream when taken */
210+
211+
/*
212+
* The transaction completion count at the time GetSnapshotData() built
213+
* this snapshot. Allows to avoid re-computing static snapshots when no
214+
* transactions completed since the last GetSnapshotData().
215+
*/
216+
uint64 snapXactCompletionCount;
210217
} SnapshotData;
211218

212219
#endif /* SNAPSHOT_H */

0 commit comments

Comments
 (0)