4545 * anything we saw during replay.
4646 *
4747 * We are able to remove segments no longer necessary by carefully tracking
48- * each table's used values: during vacuum, any multixact older than a
49- * certain value is removed; the cutoff value is stored in pg_class.
50- * The minimum value in each database is stored in pg_database, and the
51- * global minimum is part of pg_control. Any vacuum that is able to
52- * advance its database's minimum value also computes a new global minimum,
53- * and uses this value to truncate older segments. When new multixactid
54- * values are to be created, care is taken that the counter does not
55- * fall within the wraparound horizon considering the global minimum value.
48+ * each table's used values: during vacuum, any multixact older than a certain
49+ * value is removed; the cutoff value is stored in pg_class. The minimum value
50+ * across all tables in each database is stored in pg_database, and the global
51+ * minimum across all databases is part of pg_control and is kept in shared
52+ * memory. At checkpoint time, after the value is known flushed in WAL, any
53+ * files that correspond to multixacts older than that value are removed.
54+ * (These files are also removed when a restartpoint is executed.)
55+ *
56+ * When new multixactid values are to be created, care is taken that the
57+ * counter does not fall within the wraparound horizon considering the global
58+ * minimum value.
5659 *
5760 * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
5861 * Portions Copyright (c) 1994, Regents of the University of California
9194 * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
9295 * MultiXact page numbering also wraps around at
9396 * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
94- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need
97+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
9598 * take no explicit notice of that fact in this module, except when comparing
9699 * segment and page numbers in TruncateMultiXact (see
97100 * MultiXactOffsetPagePrecedes).
@@ -188,16 +191,20 @@ typedef struct MultiXactStateData
188191 /* next-to-be-assigned offset */
189192 MultiXactOffset nextOffset ;
190193
191- /* the Offset SLRU area was last truncated at this MultiXactId */
192- MultiXactId lastTruncationPoint ;
193-
194194 /*
195- * oldest multixact that is still on disk. Anything older than this
196- * should not be consulted.
195+ * Oldest multixact that is still on disk. Anything older than this
196+ * should not be consulted. These values are updated by vacuum.
197197 */
198198 MultiXactId oldestMultiXactId ;
199199 Oid oldestMultiXactDB ;
200200
201+ /*
202+ * This is what the previous checkpoint stored as the truncate position.
203+ * This value is the oldestMultiXactId that was valid when a checkpoint
204+ * was last executed.
205+ */
206+ MultiXactId lastCheckpointedOldest ;
207+
201208 /* support for anti-wraparound measures */
202209 MultiXactId multiVacLimit ;
203210 MultiXactId multiWarnLimit ;
@@ -234,12 +241,20 @@ typedef struct MultiXactStateData
234241 * than its own OldestVisibleMXactId[] setting; this is necessary because
235242 * the checkpointer could truncate away such data at any instant.
236243 *
237- * The checkpointer can compute the safe truncation point as the oldest
238- * valid value among all the OldestMemberMXactId[] and
239- * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
240- * Clearly, it is not possible for any later-computed OldestVisibleMXactId
241- * value to be older than this, and so there is no risk of truncating data
242- * that is still needed.
244+ * The oldest valid value among all of the OldestMemberMXactId[] and
245+ * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
246+ * possible value still having any live member transaction. Subtracting
247+ * vacuum_multixact_freeze_min_age from that value we obtain the freezing
248+ * point for multixacts for that table. Any value older than that is
249+ * removed from tuple headers (or "frozen"; see FreezeMultiXactId. Note
250+ * that multis that have member xids that are older than the cutoff point
251+ * for xids must also be frozen, even if the multis themselves are newer
252+ * than the multixid cutoff point). Whenever a full table vacuum happens,
253+ * the freezing point so computed is used as the new pg_class.relminmxid
254+ * value. The minimum of all those values in a database is stored as
255+ * pg_database.datminmxid. In turn, the minimum of all of those values is
256+ * stored in pg_control and used as truncation point for pg_multixact. At
257+ * checkpoint or restartpoint, unneeded segments are removed.
243258 */
244259 MultiXactId perBackendXactIds [1 ]; /* VARIABLE LENGTH ARRAY */
245260} MultiXactStateData ;
@@ -1121,8 +1136,8 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
11211136 * We check known limits on MultiXact before resorting to the SLRU area.
11221137 *
11231138 * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
1124- * useful; it should have already been removed by vacuum. We've truncated
1125- * the on-disk structures anyway . Returning the wrong values could lead
1139+ * useful; it has already been removed, or will be removed shortly, by
1140+ * truncation . Returning the wrong values could lead
11261141 * to an incorrect visibility result. However, to support pg_upgrade we
11271142 * need to allow an empty set to be returned regardless, if the caller is
11281143 * willing to accept it; the caller is expected to check that it's an
@@ -1932,14 +1947,14 @@ TrimMultiXact(void)
19321947 LWLockAcquire (MultiXactOffsetControlLock , LW_EXCLUSIVE );
19331948
19341949 /*
1935- * (Re-)Initialize our idea of the latest page number.
1950+ * (Re-)Initialize our idea of the latest page number for offsets .
19361951 */
19371952 pageno = MultiXactIdToOffsetPage (multi );
19381953 MultiXactOffsetCtl -> shared -> latest_page_number = pageno ;
19391954
19401955 /*
19411956 * Zero out the remainder of the current offsets page. See notes in
1942- * StartupCLOG () for motivation.
1957+ * TrimCLOG () for motivation.
19431958 */
19441959 entryno = MultiXactIdToOffsetEntry (multi );
19451960 if (entryno != 0 )
@@ -1962,7 +1977,7 @@ TrimMultiXact(void)
19621977 LWLockAcquire (MultiXactMemberControlLock , LW_EXCLUSIVE );
19631978
19641979 /*
1965- * (Re-)Initialize our idea of the latest page number.
1980+ * (Re-)Initialize our idea of the latest page number for members .
19661981 */
19671982 pageno = MXOffsetToMemberPage (offset );
19681983 MultiXactMemberCtl -> shared -> latest_page_number = pageno ;
@@ -2240,6 +2255,18 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
22402255 SetMultiXactIdLimit (oldestMulti , oldestMultiDB );
22412256}
22422257
2258+ /*
2259+ * Update the "safe truncation point". This is the newest value of oldestMulti
2260+ * that is known to be flushed as part of a checkpoint record.
2261+ */
2262+ void
2263+ MultiXactSetSafeTruncate (MultiXactId safeTruncateMulti )
2264+ {
2265+ LWLockAcquire (MultiXactGenLock , LW_EXCLUSIVE );
2266+ MultiXactState -> lastCheckpointedOldest = safeTruncateMulti ;
2267+ LWLockRelease (MultiXactGenLock );
2268+ }
2269+
22432270/*
22442271 * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
22452272 *
@@ -2478,25 +2505,31 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data)
24782505 * Remove all MultiXactOffset and MultiXactMember segments before the oldest
24792506 * ones still of interest.
24802507 *
2481- * On a primary, this is called by vacuum after it has successfully advanced a
2482- * database's datminmxid value; the cutoff value we're passed is the minimum of
2483- * all databases' datminmxid values.
2484- *
2485- * During crash recovery, it's called from CreateRestartPoint() instead. We
2486- * rely on the fact that xlog_redo() will already have called
2487- * MultiXactAdvanceOldest(). Our latest_page_number will already have been
2488- * initialized by StartupMultiXact() and kept up to date as new pages are
2489- * zeroed.
2508+ * On a primary, this is called by the checkpointer process after a checkpoint
2509+ * has been flushed; during crash recovery, it's called from
2510+ * CreateRestartPoint(). In the latter case, we rely on the fact that
2511+ * xlog_redo() will already have called MultiXactAdvanceOldest(). Our
2512+ * latest_page_number will already have been initialized by StartupMultiXact()
2513+ * and kept up to date as new pages are zeroed.
24902514 */
24912515void
2492- TruncateMultiXact (MultiXactId oldestMXact )
2516+ TruncateMultiXact (void )
24932517{
2518+ MultiXactId oldestMXact ;
24942519 MultiXactOffset oldestOffset ;
24952520 MultiXactOffset nextOffset ;
24962521 mxtruncinfo trunc ;
24972522 MultiXactId earliest ;
24982523 MembersLiveRange range ;
24992524
2525+ Assert (AmCheckpointerProcess () || AmStartupProcess () ||
2526+ !IsPostmasterEnvironment );
2527+
2528+ LWLockAcquire (MultiXactGenLock , LW_SHARED );
2529+ oldestMXact = MultiXactState -> lastCheckpointedOldest ;
2530+ LWLockRelease (MultiXactGenLock );
2531+ Assert (MultiXactIdIsValid (oldestMXact ));
2532+
25002533 /*
25012534 * Note we can't just plow ahead with the truncation; it's possible that
25022535 * there are no segments to truncate, which is a problem because we are
@@ -2507,15 +2540,16 @@ TruncateMultiXact(MultiXactId oldestMXact)
25072540 trunc .earliestExistingPage = -1 ;
25082541 SlruScanDirectory (MultiXactOffsetCtl , SlruScanDirCbFindEarliest , & trunc );
25092542 earliest = trunc .earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE ;
2543+ if (earliest < FirstMultiXactId )
2544+ earliest = FirstMultiXactId ;
25102545
25112546 /* nothing to do */
25122547 if (MultiXactIdPrecedes (oldestMXact , earliest ))
25132548 return ;
25142549
25152550 /*
25162551 * First, compute the safe truncation point for MultiXactMember. This is
2517- * the starting offset of the multixact we were passed as MultiXactOffset
2518- * cutoff.
2552+ * the starting offset of the oldest multixact.
25192553 */
25202554 {
25212555 int pageno ;
@@ -2538,10 +2572,6 @@ TruncateMultiXact(MultiXactId oldestMXact)
25382572 LWLockRelease (MultiXactOffsetControlLock );
25392573 }
25402574
2541- /* truncate MultiXactOffset */
2542- SimpleLruTruncate (MultiXactOffsetCtl ,
2543- MultiXactIdToOffsetPage (oldestMXact ));
2544-
25452575 /*
25462576 * To truncate MultiXactMembers, we need to figure out the active page
25472577 * range and delete all files outside that range. The start point is the
@@ -2559,6 +2589,11 @@ TruncateMultiXact(MultiXactId oldestMXact)
25592589 range .rangeEnd = MXOffsetToMemberPage (nextOffset );
25602590
25612591 SlruScanDirectory (MultiXactMemberCtl , SlruScanDirCbRemoveMembers , & range );
2592+
2593+ /* Now we can truncate MultiXactOffset */
2594+ SimpleLruTruncate (MultiXactOffsetCtl ,
2595+ MultiXactIdToOffsetPage (oldestMXact ));
2596+
25622597}
25632598
25642599/*
0 commit comments