From ff55cd9e244a9a48b9b61e733dc2cbdfd2a9ee49 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 31 Jan 2025 08:45:10 +0900 Subject: [PATCH 1/2] Integrate more FullTransactionIds into 2PC code This refactoring will help in fixing a follow-up issue. Backpatch-through: 17 (?) --- src/backend/access/transam/multixact.c | 16 +- src/backend/access/transam/twophase.c | 242 +++++++++++-------- src/backend/access/transam/xact.c | 13 +- src/backend/storage/lmgr/lock.c | 20 +- src/backend/storage/lmgr/predicate.c | 11 +- src/backend/utils/activity/pgstat_relation.c | 4 +- src/include/access/multixact.h | 9 +- src/include/access/twophase.h | 12 +- src/include/access/twophase_rmgr.h | 4 +- src/include/pgstat.h | 4 +- src/include/storage/lock.h | 11 +- src/include/storage/predicate.h | 7 +- 12 files changed, 199 insertions(+), 154 deletions(-) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 9d25a7df0d32..e3d3bfc051c9 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1847,7 +1847,7 @@ AtPrepare_MultiXact(void) * Clean up after successful PREPARE TRANSACTION */ void -PostPrepare_MultiXact(TransactionId xid) +PostPrepare_MultiXact(FullTransactionId fxid) { MultiXactId myOldestMember; @@ -1858,7 +1858,7 @@ PostPrepare_MultiXact(TransactionId xid) myOldestMember = OldestMemberMXactId[MyProcNumber]; if (MultiXactIdIsValid(myOldestMember)) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false); /* * Even though storing MultiXactId is atomic, acquire lock to make @@ -1896,10 +1896,10 @@ PostPrepare_MultiXact(TransactionId xid) * Recover the state of a prepared transaction at startup */ void -multixact_twophase_recover(TransactionId xid, uint16 info, +multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false); MultiXactId oldestMember; /* @@ -1917,10 +1917,10 @@ multixact_twophase_recover(TransactionId xid, uint16 info, * Similar to AtEOXact_MultiXact but for COMMIT PREPARED */ void -multixact_twophase_postcommit(TransactionId xid, uint16 info, +multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, true); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true); Assert(len == sizeof(MultiXactId)); @@ -1932,10 +1932,10 @@ multixact_twophase_postcommit(TransactionId xid, uint16 info, * This is actually just the same as the COMMIT case. */ void -multixact_twophase_postabort(TransactionId xid, uint16 info, +multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - multixact_twophase_postcommit(xid, info, recdata, len); + multixact_twophase_postcommit(fxid, info, recdata, len); } /* diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 73a80559194e..4f5b45426620 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -159,7 +159,7 @@ typedef struct GlobalTransactionData */ XLogRecPtr prepare_start_lsn; /* XLOG offset of prepare record start */ XLogRecPtr prepare_end_lsn; /* XLOG offset of prepare record end */ - TransactionId xid; /* The GXACT id */ + FullTransactionId fxid; /* The GXACT full xid */ Oid owner; /* ID of user that executed the xact */ ProcNumber locking_backend; /* backend currently working on the xact */ @@ -197,6 +197,7 @@ static GlobalTransaction MyLockedGxact = NULL; static bool twophaseExitRegistered = false; +static void PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning); static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, @@ -216,19 +217,19 @@ static void RecordTransactionAbortPrepared(TransactionId xid, int nstats, xl_xact_stats_item *stats, const char *gid); -static void ProcessRecords(char *bufptr, TransactionId xid, +static void ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[]); static void RemoveGXact(GlobalTransaction gxact); static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len); -static char *ProcessTwoPhaseBuffer(TransactionId xid, +static char *ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid); -static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, +static void MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid); -static void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning); -static void RecreateTwoPhaseFile(TransactionId xid, void *content, int len); +static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning); +static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len); /* * Initialization of shared memory @@ -356,7 +357,7 @@ PostPrepare_Twophase(void) * Reserve the GID for the given transaction. */ GlobalTransaction -MarkAsPreparing(TransactionId xid, const char *gid, +MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid) { GlobalTransaction gxact; @@ -407,7 +408,7 @@ MarkAsPreparing(TransactionId xid, const char *gid, gxact = TwoPhaseState->freeGXacts; TwoPhaseState->freeGXacts = gxact->next; - MarkAsPreparingGuts(gxact, xid, gid, prepared_at, owner, databaseid); + MarkAsPreparingGuts(gxact, fxid, gid, prepared_at, owner, databaseid); gxact->ondisk = false; @@ -430,11 +431,13 @@ MarkAsPreparing(TransactionId xid, const char *gid, * Note: This function should be called with appropriate locks held. */ static void -MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, - TimestampTz prepared_at, Oid owner, Oid databaseid) +MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, + const char *gid, TimestampTz prepared_at, Oid owner, + Oid databaseid) { PGPROC *proc; int i; + TransactionId xid = XidFromFullTransactionId(fxid); Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE)); @@ -479,7 +482,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, proc->subxidStatus.count = 0; gxact->prepared_at = prepared_at; - gxact->xid = xid; + gxact->fxid = fxid; gxact->owner = owner; gxact->locking_backend = MyProcNumber; gxact->valid = false; @@ -797,12 +800,12 @@ pg_prepared_xact(PG_FUNCTION_ARGS) * caller had better hold it. */ static GlobalTransaction -TwoPhaseGetGXact(TransactionId xid, bool lock_held) +TwoPhaseGetGXact(FullTransactionId fxid, bool lock_held) { GlobalTransaction result = NULL; int i; - static TransactionId cached_xid = InvalidTransactionId; + static FullTransactionId cached_fxid = {0}; static GlobalTransaction cached_gxact = NULL; Assert(!lock_held || LWLockHeldByMe(TwoPhaseStateLock)); @@ -811,7 +814,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called * repeatedly for the same XID. We can save work with a simple cache. */ - if (xid == cached_xid) + if (FullTransactionIdEquals(fxid, cached_fxid)) return cached_gxact; if (!lock_held) @@ -821,7 +824,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; - if (gxact->xid == xid) + if (FullTransactionIdEquals(gxact->fxid, fxid)) { result = gxact; break; @@ -832,9 +835,10 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) LWLockRelease(TwoPhaseStateLock); if (result == NULL) /* should not happen */ - elog(ERROR, "failed to find GlobalTransaction for xid %u", xid); + elog(ERROR, "failed to find GlobalTransaction for xid %u", + XidFromFullTransactionId(fxid)); - cached_xid = xid; + cached_fxid = fxid; cached_gxact = result; return result; @@ -881,7 +885,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, *have_more = true; break; } - result = gxact->xid; + result = XidFromFullTransactionId(gxact->fxid); } } @@ -892,7 +896,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, /* * TwoPhaseGetDummyProcNumber - * Get the dummy proc number for prepared transaction specified by XID + * Get the dummy proc number for prepared transaction * * Dummy proc numbers are similar to proc numbers of real backends. They * start at MaxBackends, and are unique across all currently active real @@ -900,24 +904,24 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, * TwoPhaseStateLock will not be taken, so the caller had better hold it. */ ProcNumber -TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held) +TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held) { - GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held); + GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held); return gxact->pgprocno; } /* * TwoPhaseGetDummyProc - * Get the PGPROC that represents a prepared transaction specified by XID + * Get the PGPROC that represents a prepared transaction * * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the * caller had better hold it. */ PGPROC * -TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) +TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held) { - GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held); + GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held); return GetPGProcByNumber(gxact->pgprocno); } @@ -942,10 +946,8 @@ AdjustToFullTransactionId(TransactionId xid) } static inline int -TwoPhaseFilePath(char *path, TransactionId xid) +TwoPhaseFilePath(char *path, FullTransactionId fxid) { - FullTransactionId fxid = AdjustToFullTransactionId(xid); - return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X", EpochFromFullTransactionId(fxid), XidFromFullTransactionId(fxid)); @@ -1049,7 +1051,7 @@ void StartPrepare(GlobalTransaction gxact) { PGPROC *proc = GetPGProcByNumber(gxact->pgprocno); - TransactionId xid = gxact->xid; + TransactionId xid = XidFromFullTransactionId(gxact->fxid); TwoPhaseFileHeader hdr; TransactionId *children; RelFileLocator *commitrels; @@ -1281,10 +1283,11 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, * If it looks OK (has a valid magic number and CRC), return the palloc'd * contents of the file, issuing an error when finding corrupted data. If * missing_ok is true, which indicates that missing files can be safely - * ignored, then return NULL. This state can be reached when doing recovery. + * ignored, then return NULL. This state can be reached when doing recovery + * after discarding two-phase files from frozen epochs. */ static char * -ReadTwoPhaseFile(TransactionId xid, bool missing_ok) +ReadTwoPhaseFile(FullTransactionId fxid, bool missing_ok) { char path[MAXPGPATH]; char *buf; @@ -1296,7 +1299,7 @@ ReadTwoPhaseFile(TransactionId xid, bool missing_ok) file_crc; int r; - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); if (fd < 0) @@ -1461,6 +1464,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid) char *buf; TwoPhaseFileHeader *hdr; bool result; + FullTransactionId fxid; Assert(TransactionIdIsValid(xid)); @@ -1468,7 +1472,8 @@ StandbyTransactionIdIsPrepared(TransactionId xid) return false; /* nothing to do */ /* Read and validate file */ - buf = ReadTwoPhaseFile(xid, true); + fxid = FullTransactionIdFromAllowableAt(TransamVariables->nextXid, xid); + buf = ReadTwoPhaseFile(fxid, true); if (buf == NULL) return false; @@ -1488,6 +1493,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) { GlobalTransaction gxact; PGPROC *proc; + FullTransactionId fxid; TransactionId xid; bool ondisk; char *buf; @@ -1509,7 +1515,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit) */ gxact = LockGXact(gid, GetUserId()); proc = GetPGProcByNumber(gxact->pgprocno); - xid = gxact->xid; + fxid = gxact->fxid; + xid = XidFromFullTransactionId(fxid); /* * Read and validate 2PC state data. State data will typically be stored @@ -1517,7 +1524,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * to disk if for some reason they have lived for a long time. */ if (gxact->ondisk) - buf = ReadTwoPhaseFile(xid, false); + buf = ReadTwoPhaseFile(fxid, false); else XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL); @@ -1636,11 +1643,11 @@ FinishPreparedTransaction(const char *gid, bool isCommit) /* And now do the callbacks */ if (isCommit) - ProcessRecords(bufptr, xid, twophase_postcommit_callbacks); + ProcessRecords(bufptr, fxid, twophase_postcommit_callbacks); else - ProcessRecords(bufptr, xid, twophase_postabort_callbacks); + ProcessRecords(bufptr, fxid, twophase_postabort_callbacks); - PredicateLockTwoPhaseFinish(xid, isCommit); + PredicateLockTwoPhaseFinish(fxid, isCommit); /* * Read this value while holding the two-phase lock, as the on-disk 2PC @@ -1664,7 +1671,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * And now we can clean up any files we may have left. */ if (ondisk) - RemoveTwoPhaseFile(xid, true); + RemoveTwoPhaseFile(fxid, true); MyLockedGxact = NULL; @@ -1677,7 +1684,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * Scan 2PC state data in memory and call the indicated callbacks for each 2PC record. */ static void -ProcessRecords(char *bufptr, TransactionId xid, +ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[]) { for (;;) @@ -1691,24 +1698,28 @@ ProcessRecords(char *bufptr, TransactionId xid, bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk)); if (callbacks[record->rmid] != NULL) - callbacks[record->rmid] (xid, record->info, bufptr, record->len); + callbacks[record->rmid] (fxid, record->info, bufptr, record->len); bufptr += MAXALIGN(record->len); } } /* - * Remove the 2PC file for the specified XID. + * Remove the 2PC file. * * If giveWarning is false, do not complain about file-not-present; * this is an expected case during WAL replay. + * + * This routine is used at early stages at recovery where future and + * past orphaned files are checked, hence the FullTransactionId to build + * a complete file name fit for the removal. */ static void -RemoveTwoPhaseFile(TransactionId xid, bool giveWarning) +RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning) { char path[MAXPGPATH]; - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); if (unlink(path)) if (errno != ENOENT || giveWarning) ereport(WARNING, @@ -1723,7 +1734,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning) * Note: content and len don't include CRC. */ static void -RecreateTwoPhaseFile(TransactionId xid, void *content, int len) +RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len) { char path[MAXPGPATH]; pg_crc32c statefile_crc; @@ -1734,7 +1745,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len) COMP_CRC32C(statefile_crc, content, len); FIN_CRC32C(statefile_crc); - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); fd = OpenTransientFile(path, O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY); @@ -1846,7 +1857,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) int len; XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, &len); - RecreateTwoPhaseFile(gxact->xid, buf, len); + RecreateTwoPhaseFile(gxact->fxid, buf, len); gxact->ondisk = true; gxact->prepare_start_lsn = InvalidXLogRecPtr; gxact->prepare_end_lsn = InvalidXLogRecPtr; @@ -1897,19 +1908,17 @@ restoreTwoPhaseData(void) if (strlen(clde->d_name) == 16 && strspn(clde->d_name, "0123456789ABCDEF") == 16) { - TransactionId xid; FullTransactionId fxid; char *buf; fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16)); - xid = XidFromFullTransactionId(fxid); - buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, + buf = ProcessTwoPhaseBuffer(fxid, InvalidXLogRecPtr, true, false, false); if (buf == NULL) continue; - PrepareRedoAdd(buf, InvalidXLogRecPtr, + PrepareRedoAdd(fxid, buf, InvalidXLogRecPtr, InvalidXLogRecPtr, InvalidRepOriginId); } } @@ -1968,9 +1977,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) Assert(gxact->inredo); - xid = gxact->xid; - - buf = ProcessTwoPhaseBuffer(xid, + xid = XidFromFullTransactionId(gxact->fxid); + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, false, true); @@ -2036,15 +2044,12 @@ StandbyRecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - TransactionId xid; char *buf; GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; Assert(gxact->inredo); - xid = gxact->xid; - - buf = ProcessTwoPhaseBuffer(xid, + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, true, false); if (buf != NULL) @@ -2077,16 +2082,14 @@ RecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - TransactionId xid; char *buf; GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + FullTransactionId fxid = gxact->fxid; char *bufptr; TwoPhaseFileHeader *hdr; TransactionId *subxids; const char *gid; - xid = gxact->xid; - /* * Reconstruct subtrans state for the transaction --- needed because * pg_subtrans is not preserved over a restart. Note that we are @@ -2096,17 +2099,20 @@ RecoverPreparedTransactions(void) * SubTransSetParent has been set before, if the prepared transaction * generated xid assignment records. */ - buf = ProcessTwoPhaseBuffer(xid, + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, true, false); if (buf == NULL) continue; ereport(LOG, - (errmsg("recovering prepared transaction %u from shared memory", xid))); + (errmsg("recovering prepared transaction %u of epoch %u from shared memory", + XidFromFullTransactionId(gxact->fxid), + EpochFromFullTransactionId(gxact->fxid)))); hdr = (TwoPhaseFileHeader *) buf; - Assert(TransactionIdEquals(hdr->xid, xid)); + Assert(TransactionIdEquals(hdr->xid, + XidFromFullTransactionId(gxact->fxid))); bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); gid = (const char *) bufptr; bufptr += MAXALIGN(hdr->gidlen); @@ -2122,7 +2128,7 @@ RecoverPreparedTransactions(void) * Recreate its GXACT and dummy PGPROC. But, check whether it was * added in redo and already has a shmem entry for it. */ - MarkAsPreparingGuts(gxact, xid, gid, + MarkAsPreparingGuts(gxact, gxact->fxid, gid, hdr->prepared_at, hdr->owner, hdr->database); @@ -2137,7 +2143,7 @@ RecoverPreparedTransactions(void) /* * Recover other state (notably locks) using resource managers. */ - ProcessRecords(bufptr, xid, twophase_recover_callbacks); + ProcessRecords(bufptr, fxid, twophase_recover_callbacks); /* * Release locks held by the standby process after we process each @@ -2145,7 +2151,7 @@ RecoverPreparedTransactions(void) * additional locks at any one time. */ if (InHotStandby) - StandbyReleaseLockTree(xid, hdr->nsubxacts, subxids); + StandbyReleaseLockTree(hdr->xid, hdr->nsubxacts, subxids); /* * We're done with recovering this transaction. Clear MyLockedGxact, @@ -2164,7 +2170,7 @@ RecoverPreparedTransactions(void) /* * ProcessTwoPhaseBuffer * - * Given a transaction id, read it either from disk or read it directly + * Given a FullTransactionId, read it either from disk or read it directly * via shmem xlog record pointer using the provided "prepare_start_lsn". * * If setParent is true, set up subtransaction parent linkages. @@ -2173,13 +2179,12 @@ RecoverPreparedTransactions(void) * value scanned. */ static char * -ProcessTwoPhaseBuffer(TransactionId xid, +ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid) { FullTransactionId nextXid = TransamVariables->nextXid; - TransactionId origNextXid = XidFromFullTransactionId(nextXid); TransactionId *subxids; char *buf; TwoPhaseFileHeader *hdr; @@ -2191,41 +2196,46 @@ ProcessTwoPhaseBuffer(TransactionId xid, Assert(prepare_start_lsn != InvalidXLogRecPtr); /* Already processed? */ - if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) + if (TransactionIdDidCommit(XidFromFullTransactionId(fxid)) || + TransactionIdDidAbort(XidFromFullTransactionId(fxid))) { if (fromdisk) { ereport(WARNING, - (errmsg("removing stale two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); + (errmsg("removing stale two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + RemoveTwoPhaseFile(fxid, true); } else { ereport(WARNING, - (errmsg("removing stale two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); + (errmsg("removing stale two-phase state from memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + PrepareRedoRemoveFull(fxid, true); } return NULL; } /* Reject XID if too new */ - if (TransactionIdFollowsOrEquals(xid, origNextXid)) + if (FullTransactionIdFollowsOrEquals(fxid, nextXid)) { if (fromdisk) { ereport(WARNING, - (errmsg("removing future two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); + (errmsg("removing future two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + RemoveTwoPhaseFile(fxid, true); } else { ereport(WARNING, - (errmsg("removing future two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); + (errmsg("removing future two-phase state from memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + PrepareRedoRemoveFull(fxid, true); } return NULL; } @@ -2233,7 +2243,7 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) { /* Read and validate file */ - buf = ReadTwoPhaseFile(xid, false); + buf = ReadTwoPhaseFile(fxid, false); } else { @@ -2243,18 +2253,20 @@ ProcessTwoPhaseBuffer(TransactionId xid, /* Deconstruct header */ hdr = (TwoPhaseFileHeader *) buf; - if (!TransactionIdEquals(hdr->xid, xid)) + if (!TransactionIdEquals(hdr->xid, XidFromFullTransactionId(fxid))) { if (fromdisk) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("corrupted two-phase state file for transaction %u", - xid))); + errmsg("corrupted two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("corrupted two-phase state in memory for transaction %u", - xid))); + errmsg("corrupted two-phase state in memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); } /* @@ -2268,14 +2280,14 @@ ProcessTwoPhaseBuffer(TransactionId xid, { TransactionId subxid = subxids[i]; - Assert(TransactionIdFollows(subxid, xid)); + Assert(TransactionIdFollows(subxid, XidFromFullTransactionId(fxid))); /* update nextXid if needed */ if (setNextXid) AdvanceNextFullTransactionIdPastXid(subxid); if (setParent) - SubTransSetParent(subxid, xid); + SubTransSetParent(subxid, XidFromFullTransactionId(fxid)); } return buf; @@ -2466,8 +2478,9 @@ RecordTransactionAbortPrepared(TransactionId xid, * data, the entry is marked as located on disk. */ void -PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, - XLogRecPtr end_lsn, RepOriginId origin_id) +PrepareRedoAdd(FullTransactionId fxid, char *buf, + XLogRecPtr start_lsn, XLogRecPtr end_lsn, + RepOriginId origin_id) { TwoPhaseFileHeader *hdr = (TwoPhaseFileHeader *) buf; char *bufptr; @@ -2477,6 +2490,10 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE)); Assert(RecoveryInProgress()); + if (!FullTransactionIdIsValid(fxid)) + fxid = FullTransactionIdFromAllowableAt(TransamVariables->nextXid, + hdr->xid); + bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); gid = (const char *) bufptr; @@ -2505,7 +2522,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, { char path[MAXPGPATH]; - TwoPhaseFilePath(path, hdr->xid); + Assert(InRecovery); + TwoPhaseFilePath(path, fxid); if (access(path, F_OK) == 0) { @@ -2536,7 +2554,7 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, gxact->prepared_at = hdr->prepared_at; gxact->prepare_start_lsn = start_lsn; gxact->prepare_end_lsn = end_lsn; - gxact->xid = hdr->xid; + gxact->fxid = fxid; gxact->owner = hdr->owner; gxact->locking_backend = INVALID_PROC_NUMBER; gxact->valid = false; @@ -2555,11 +2573,13 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, false /* backward */ , false /* WAL */ ); } - elog(DEBUG2, "added 2PC data in shared memory for transaction %u", gxact->xid); + elog(DEBUG2, "added 2PC data in shared memory for transaction %u of epoch %u", + XidFromFullTransactionId(gxact->fxid), + EpochFromFullTransactionId(gxact->fxid)); } /* - * PrepareRedoRemove + * PrepareRedoRemoveFull * * Remove the corresponding gxact entry from TwoPhaseState. Also remove * the 2PC file if a prepared transaction was saved via an earlier checkpoint. @@ -2567,8 +2587,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, * Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState * is updated. */ -void -PrepareRedoRemove(TransactionId xid, bool giveWarning) +static void +PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning) { GlobalTransaction gxact = NULL; int i; @@ -2581,7 +2601,7 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning) { gxact = TwoPhaseState->prepXacts[i]; - if (gxact->xid == xid) + if (FullTransactionIdEquals(gxact->fxid, fxid)) { Assert(gxact->inredo); found = true; @@ -2598,12 +2618,28 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning) /* * And now we can clean up any files we may have left. */ - elog(DEBUG2, "removing 2PC data for transaction %u", xid); + elog(DEBUG2, "removing 2PC data for transaction %u of epoch %u ", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)); + if (gxact->ondisk) - RemoveTwoPhaseFile(xid, giveWarning); + RemoveTwoPhaseFile(fxid, giveWarning); + RemoveGXact(gxact); } +/* + * Wrapper of PrepareRedoRemoveFull(), for TransactionIds. + */ +void +PrepareRedoRemove(TransactionId xid, bool giveWarning) +{ + FullTransactionId fxid = + FullTransactionIdFromAllowableAt(TransamVariables->nextXid, xid); + + PrepareRedoRemoveFull(fxid, giveWarning); +} + /* * LookupGXact * Check if the prepared transaction with the given GID, lsn and timestamp @@ -2648,7 +2684,7 @@ LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, * between publisher and subscriber. */ if (gxact->ondisk) - buf = ReadTwoPhaseFile(gxact->xid, false); + buf = ReadTwoPhaseFile(gxact->fxid, false); else { Assert(gxact->prepare_start_lsn); diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index b885513f7654..41601fcb2803 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2515,7 +2515,7 @@ static void PrepareTransaction(void) { TransactionState s = CurrentTransactionState; - TransactionId xid = GetCurrentTransactionId(); + FullTransactionId fxid = GetCurrentFullTransactionId(); GlobalTransaction gxact; TimestampTz prepared_at; @@ -2644,7 +2644,7 @@ PrepareTransaction(void) * Reserve the GID for this transaction. This could fail if the requested * GID is invalid or already in use. */ - gxact = MarkAsPreparing(xid, prepareGID, prepared_at, + gxact = MarkAsPreparing(fxid, prepareGID, prepared_at, GetUserId(), MyDatabaseId); prepareGID = NULL; @@ -2694,7 +2694,7 @@ PrepareTransaction(void) * ProcArrayClearTransaction(). Otherwise, a GetLockConflicts() would * conclude "xact already committed or aborted" for our locks. */ - PostPrepare_Locks(xid); + PostPrepare_Locks(fxid); /* * Let others know about no transaction in progress by me. This has to be @@ -2738,9 +2738,9 @@ PrepareTransaction(void) PostPrepare_smgr(); - PostPrepare_MultiXact(xid); + PostPrepare_MultiXact(fxid); - PostPrepare_PredicateLocks(xid); + PostPrepare_PredicateLocks(fxid); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_LOCKS, @@ -6420,7 +6420,8 @@ xact_redo(XLogReaderState *record) * gxact entry. */ LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); - PrepareRedoAdd(XLogRecGetData(record), + PrepareRedoAdd(InvalidFullTransactionId, + XLogRecGetData(record), record->ReadRecPtr, record->EndRecPtr, XLogRecGetOrigin(record)); diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 002303664aa5..6d99e55f9962 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -3536,9 +3536,9 @@ AtPrepare_Locks(void) * but that probably costs more cycles. */ void -PostPrepare_Locks(TransactionId xid) +PostPrepare_Locks(FullTransactionId fxid) { - PGPROC *newproc = TwoPhaseGetDummyProc(xid, false); + PGPROC *newproc = TwoPhaseGetDummyProc(fxid, false); HASH_SEQ_STATUS status; LOCALLOCK *locallock; LOCK *lock; @@ -4321,11 +4321,11 @@ DumpAllLocks(void) * and PANIC anyway. */ void -lock_twophase_recover(TransactionId xid, uint16 info, +lock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; - PGPROC *proc = TwoPhaseGetDummyProc(xid, false); + PGPROC *proc = TwoPhaseGetDummyProc(fxid, false); LOCKTAG *locktag; LOCKMODE lockmode; LOCKMETHODID lockmethodid; @@ -4502,7 +4502,7 @@ lock_twophase_recover(TransactionId xid, uint16 info, * starting up into hot standby mode. */ void -lock_twophase_standby_recover(TransactionId xid, uint16 info, +lock_twophase_standby_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; @@ -4521,7 +4521,7 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info, if (lockmode == AccessExclusiveLock && locktag->locktag_type == LOCKTAG_RELATION) { - StandbyAcquireAccessExclusiveLock(xid, + StandbyAcquireAccessExclusiveLock(XidFromFullTransactionId(fxid), locktag->locktag_field1 /* dboid */ , locktag->locktag_field2 /* reloid */ ); } @@ -4534,11 +4534,11 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info, * Find and release the lock indicated by the 2PC record. */ void -lock_twophase_postcommit(TransactionId xid, uint16 info, +lock_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; - PGPROC *proc = TwoPhaseGetDummyProc(xid, true); + PGPROC *proc = TwoPhaseGetDummyProc(fxid, true); LOCKTAG *locktag; LOCKMETHODID lockmethodid; LockMethod lockMethodTable; @@ -4560,10 +4560,10 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, * This is actually just the same as the COMMIT case. */ void -lock_twophase_postabort(TransactionId xid, uint16 info, +lock_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - lock_twophase_postcommit(xid, info, recdata, len); + lock_twophase_postcommit(fxid, info, recdata, len); } /* diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index d82114ffca16..c07fb5883555 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -191,7 +191,7 @@ * AtPrepare_PredicateLocks(void); * PostPrepare_PredicateLocks(TransactionId xid); * PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit); - * predicatelock_twophase_recover(TransactionId xid, uint16 info, + * predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, * void *recdata, uint32 len); */ @@ -4856,7 +4856,7 @@ AtPrepare_PredicateLocks(void) * anyway. We only need to clean up our local state. */ void -PostPrepare_PredicateLocks(TransactionId xid) +PostPrepare_PredicateLocks(FullTransactionId fxid) { if (MySerializableXact == InvalidSerializableXact) return; @@ -4879,12 +4879,12 @@ PostPrepare_PredicateLocks(TransactionId xid) * commits or aborts. */ void -PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit) +PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit) { SERIALIZABLEXID *sxid; SERIALIZABLEXIDTAG sxidtag; - sxidtag.xid = xid; + sxidtag.xid = XidFromFullTransactionId(fxid); LWLockAcquire(SerializableXactHashLock, LW_SHARED); sxid = (SERIALIZABLEXID *) @@ -4906,10 +4906,11 @@ PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit) * Re-acquire a predicate lock belonging to a transaction that was prepared. */ void -predicatelock_twophase_recover(TransactionId xid, uint16 info, +predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePredicateRecord *record; + TransactionId xid = XidFromFullTransactionId(fxid); Assert(len == sizeof(TwoPhasePredicateRecord)); diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index eeb2d43cb10f..9cd426889730 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -744,7 +744,7 @@ PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) * Load the saved counts into our local pgstats state. */ void -pgstat_twophase_postcommit(TransactionId xid, uint16 info, +pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; @@ -780,7 +780,7 @@ pgstat_twophase_postcommit(TransactionId xid, uint16 info, * as aborted. */ void -pgstat_twophase_postabort(TransactionId xid, uint16 info, +pgstat_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 4e6b0eec2ff4..b876e98f46ed 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -11,6 +11,7 @@ #ifndef MULTIXACT_H #define MULTIXACT_H +#include "access/transam.h" #include "access/xlogreader.h" #include "lib/stringinfo.h" #include "storage/sync.h" @@ -119,7 +120,7 @@ extern int multixactmemberssyncfiletag(const FileTag *ftag, char *path); extern void AtEOXact_MultiXact(void); extern void AtPrepare_MultiXact(void); -extern void PostPrepare_MultiXact(TransactionId xid); +extern void PostPrepare_MultiXact(FullTransactionId fxid); extern Size MultiXactShmemSize(void); extern void MultiXactShmemInit(void); @@ -145,11 +146,11 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern int MultiXactMemberFreezeThreshold(void); -extern void multixact_twophase_recover(TransactionId xid, uint16 info, +extern void multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void multixact_twophase_postcommit(TransactionId xid, uint16 info, +extern void multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void multixact_twophase_postabort(TransactionId xid, uint16 info, +extern void multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern void multixact_redo(XLogReaderState *record); diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h index 9fa823550337..0ab8b3e64a71 100644 --- a/src/include/access/twophase.h +++ b/src/include/access/twophase.h @@ -14,6 +14,7 @@ #ifndef TWOPHASE_H #define TWOPHASE_H +#include "access/transam.h" #include "access/xact.h" #include "access/xlogdefs.h" #include "datatype/timestamp.h" @@ -36,10 +37,10 @@ extern void PostPrepare_Twophase(void); extern TransactionId TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, bool *have_more); -extern PGPROC *TwoPhaseGetDummyProc(TransactionId xid, bool lock_held); -extern int TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held); +extern PGPROC *TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held); +extern int TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held); -extern GlobalTransaction MarkAsPreparing(TransactionId xid, const char *gid, +extern GlobalTransaction MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid); @@ -56,8 +57,9 @@ extern void CheckPointTwoPhase(XLogRecPtr redo_horizon); extern void FinishPreparedTransaction(const char *gid, bool isCommit); -extern void PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, - XLogRecPtr end_lsn, RepOriginId origin_id); +extern void PrepareRedoAdd(FullTransactionId fxid, char *buf, + XLogRecPtr start_lsn, XLogRecPtr end_lsn, + RepOriginId origin_id); extern void PrepareRedoRemove(TransactionId xid, bool giveWarning); extern void restoreTwoPhaseData(void); extern bool LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h index 3ed154bb2312..8f576402e360 100644 --- a/src/include/access/twophase_rmgr.h +++ b/src/include/access/twophase_rmgr.h @@ -14,7 +14,9 @@ #ifndef TWOPHASE_RMGR_H #define TWOPHASE_RMGR_H -typedef void (*TwoPhaseCallback) (TransactionId xid, uint16 info, +#include "access/transam.h" + +typedef void (*TwoPhaseCallback) (FullTransactionId fxid, uint16 info, void *recdata, uint32 len); typedef uint8 TwoPhaseRmgrId; diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 378f2f2c2ba2..202bd2d5aced 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -718,9 +718,9 @@ extern void pgstat_count_heap_delete(Relation rel); extern void pgstat_count_truncate(Relation rel); extern void pgstat_update_heap_dead_tuples(Relation rel, int delta); -extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, +extern void pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, +extern void pgstat_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 6f2108a44e8f..0f0fc446a197 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -18,6 +18,7 @@ #error "lock.h may not be included from frontend code" #endif +#include "access/transam.h" #include "lib/ilist.h" #include "storage/lockdefs.h" #include "storage/lwlock.h" @@ -581,7 +582,7 @@ extern bool LockHasWaiters(const LOCKTAG *locktag, extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp); extern void AtPrepare_Locks(void); -extern void PostPrepare_Locks(TransactionId xid); +extern void PostPrepare_Locks(FullTransactionId fxid); extern bool LockCheckConflicts(LockMethod lockMethodTable, LOCKMODE lockmode, LOCK *lock, PROCLOCK *proclock); @@ -597,13 +598,13 @@ extern BlockedProcsData *GetBlockerStatusData(int blocked_pid); extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks); extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode); -extern void lock_twophase_recover(TransactionId xid, uint16 info, +extern void lock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_postcommit(TransactionId xid, uint16 info, +extern void lock_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_postabort(TransactionId xid, uint16 info, +extern void lock_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_standby_recover(TransactionId xid, uint16 info, +extern void lock_twophase_standby_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern DeadLockState DeadLockCheck(PGPROC *proc); diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h index 267d5d90e948..4d3f218f93bc 100644 --- a/src/include/storage/predicate.h +++ b/src/include/storage/predicate.h @@ -14,6 +14,7 @@ #ifndef PREDICATE_H #define PREDICATE_H +#include "access/transam.h" #include "storage/itemptr.h" #include "storage/lock.h" #include "utils/relcache.h" @@ -72,9 +73,9 @@ extern void PreCommit_CheckForSerializationFailure(void); /* two-phase commit support */ extern void AtPrepare_PredicateLocks(void); -extern void PostPrepare_PredicateLocks(TransactionId xid); -extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit); -extern void predicatelock_twophase_recover(TransactionId xid, uint16 info, +extern void PostPrepare_PredicateLocks(FullTransactionId fxid); +extern void PredicateLockTwoPhaseFinish(FullTransactionId xid, bool isCommit); +extern void predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); /* parallel query support */ From 67206ab01ea87fa4bbb4fbe7e52c0526e3c8145f Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 31 Jan 2025 08:47:24 +0900 Subject: [PATCH 2/2] Fix issues with 2PC file handling at recovery This addresses two issues: - Avoid CLOG file lookups until we are sure that this is safe. This is now done at the end of recovery. - Avoid mishandling of 2PC shmem state data. Tests are added to show the problems possible. Backpatch-through: 13 --- src/backend/access/transam/twophase.c | 125 ++++++++++++----------- src/test/recovery/t/009_twophase.pl | 140 ++++++++++++++++++++++++++ 2 files changed, 209 insertions(+), 56 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 4f5b45426620..c8a8d774b107 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1893,13 +1893,16 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) * Scan pg_twophase and fill TwoPhaseState depending on the on-disk data. * This is called once at the beginning of recovery, saving any extra * lookups in the future. Two-phase files that are newer than the - * minimum XID horizon are discarded on the way. + * minimum XID horizon are discarded on the way, as much as files that + * are older than the oldest XID horizon. */ void restoreTwoPhaseData(void) { DIR *cldir; struct dirent *clde; + FullTransactionId nextXid = TransamVariables->nextXid; + FullTransactionId oldestXid = AdjustToFullTransactionId(TransamVariables->oldestXid); LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); cldir = AllocateDir(TWOPHASE_DIR); @@ -1913,10 +1916,26 @@ restoreTwoPhaseData(void) fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16)); + /* Reject XID if too new or too old */ + if (FullTransactionIdFollowsOrEquals(fxid, nextXid) || + FullTransactionIdPrecedes(fxid, oldestXid)) + { + if (FullTransactionIdFollowsOrEquals(fxid, nextXid)) + ereport(WARNING, + (errmsg("removing future two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + else + ereport(WARNING, + (errmsg("removing past two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + RemoveTwoPhaseFile(fxid, true); + continue; + } + buf = ProcessTwoPhaseBuffer(fxid, InvalidXLogRecPtr, true, false, false); - if (buf == NULL) - continue; PrepareRedoAdd(fxid, buf, InvalidXLogRecPtr, InvalidXLogRecPtr, InvalidRepOriginId); @@ -1982,9 +2001,6 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) gxact->prepare_start_lsn, gxact->ondisk, false, true); - if (buf == NULL) - continue; - /* * OK, we think this file is valid. Incorporate xid into the * running-minimum result. @@ -2052,8 +2068,7 @@ StandbyRecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, true, false); - if (buf != NULL) - pfree(buf); + pfree(buf); } LWLockRelease(TwoPhaseStateLock); } @@ -2078,8 +2093,21 @@ void RecoverPreparedTransactions(void) { int i; + FullTransactionId *remove_fxids; + int remove_fxids_cnt; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); + + /* + * Track XIDs candidate for removal if found as already committed or + * aborted, once the first scan through TwoPhaseState is done. This + * cannot happen while going through the entries in TwoPhaseState as + * PrepareRedoRemove() manipulates it. + */ + remove_fxids_cnt = 0; + remove_fxids = (FullTransactionId *) palloc(TwoPhaseState->numPrepXacts * + sizeof(FullTransactionId)); + for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { char *buf; @@ -2090,6 +2118,27 @@ RecoverPreparedTransactions(void) TransactionId *subxids; const char *gid; + /* + * Is this transaction already aborted or committed? If yes, mark it + * for removal. + * + * Checking CLOGs if these transactions have been already aborted or + * committed is safe at this stage; we are at the end of recovery and + * all WAL has been replayed, all 2PC transactions are reinstated and + * should be tracked in TwoPhaseState. + */ + if (TransactionIdDidCommit(XidFromFullTransactionId(fxid)) || + TransactionIdDidAbort(XidFromFullTransactionId(fxid))) + { + /* + * Track this transaction ID for its removal from the shared + * memory state at the end. + */ + remove_fxids[remove_fxids_cnt] = fxid; + remove_fxids_cnt++; + continue; + } + /* * Reconstruct subtrans state for the transaction --- needed because * pg_subtrans is not preserved over a restart. Note that we are @@ -2102,8 +2151,6 @@ RecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, true, false); - if (buf == NULL) - continue; ereport(LOG, (errmsg("recovering prepared transaction %u of epoch %u from shared memory", @@ -2164,6 +2211,18 @@ RecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); } + for (i = 0; i < remove_fxids_cnt; i++) + { + FullTransactionId fxid = remove_fxids[i]; + + ereport(WARNING, + (errmsg("removing stale two-phase state from memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + + PrepareRedoRemoveFull(fxid, true); + } + LWLockRelease(TwoPhaseStateLock); } @@ -2184,7 +2243,6 @@ ProcessTwoPhaseBuffer(FullTransactionId fxid, bool fromdisk, bool setParent, bool setNextXid) { - FullTransactionId nextXid = TransamVariables->nextXid; TransactionId *subxids; char *buf; TwoPhaseFileHeader *hdr; @@ -2195,51 +2253,6 @@ ProcessTwoPhaseBuffer(FullTransactionId fxid, if (!fromdisk) Assert(prepare_start_lsn != InvalidXLogRecPtr); - /* Already processed? */ - if (TransactionIdDidCommit(XidFromFullTransactionId(fxid)) || - TransactionIdDidAbort(XidFromFullTransactionId(fxid))) - { - if (fromdisk) - { - ereport(WARNING, - (errmsg("removing stale two-phase state file for transaction %u of epoch %u", - XidFromFullTransactionId(fxid), - EpochFromFullTransactionId(fxid)))); - RemoveTwoPhaseFile(fxid, true); - } - else - { - ereport(WARNING, - (errmsg("removing stale two-phase state from memory for transaction %u of epoch %u", - XidFromFullTransactionId(fxid), - EpochFromFullTransactionId(fxid)))); - PrepareRedoRemoveFull(fxid, true); - } - return NULL; - } - - /* Reject XID if too new */ - if (FullTransactionIdFollowsOrEquals(fxid, nextXid)) - { - if (fromdisk) - { - ereport(WARNING, - (errmsg("removing future two-phase state file for transaction %u of epoch %u", - XidFromFullTransactionId(fxid), - EpochFromFullTransactionId(fxid)))); - RemoveTwoPhaseFile(fxid, true); - } - else - { - ereport(WARNING, - (errmsg("removing future two-phase state from memory for transaction %u of epoch %u", - XidFromFullTransactionId(fxid), - EpochFromFullTransactionId(fxid)))); - PrepareRedoRemoveFull(fxid, true); - } - return NULL; - } - if (fromdisk) { /* Read and validate file */ diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 1a662ebe499d..3a3714a2d8b0 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -5,6 +5,7 @@ use strict; use warnings FATAL => 'all'; +use File::Copy; use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; @@ -28,6 +29,15 @@ sub configure_and_reload return; } +sub twophase_file_name +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $epoch = shift; + my $xid = shift; + return sprintf("%08X%08X", $epoch, $xid); +} + # Set up two nodes, which will alternately be primary and replication standby. # Setup london node @@ -572,4 +582,134 @@ sub configure_and_reload ); isnt($osubtrans, $nsubtrans, "contents of pg_subtrans/ have changed"); +############################################################################### +# Check handling of already committed or aborted 2PC files at recovery. +# This test does a manual copy of 2PC files created in a running server, +# to cheaply emulate situations that could be found in base backups. +############################################################################### + +# Issue a set of transactions that will be used for this portion of the test: +# - One transaction to hold on the minimum xid horizon at bay. +# - One transaction that will be found as already committed at recovery. +# - One transaction that will be fonnd as already rollbacked at recovery. +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (40, 'transaction: xid horizon'); + PREPARE TRANSACTION 'xact_009_40'; + BEGIN; + INSERT INTO t_009_tbl VALUES (41, 'transaction: commit-prepared'); + PREPARE TRANSACTION 'xact_009_41'; + BEGIN; + INSERT INTO t_009_tbl VALUES (42, 'transaction: rollback-prepared'); + PREPARE TRANSACTION 'xact_009_42';"); + +# Issue a checkpoint, fixing the XID horizon based on the first transaction, +# flushing to disk the two files to use. +$cur_primary->psql('postgres', "CHECKPOINT"); + +# Get the transaction IDs of the ones to 2PC files to manipulate. +my $commit_prepared_xid = int( + $cur_primary->safe_psql( + 'postgres', + "SELECT transaction FROM pg_prepared_xacts WHERE gid = 'xact_009_41'") +); +my $abort_prepared_xid = int( + $cur_primary->safe_psql( + 'postgres', + "SELECT transaction FROM pg_prepared_xacts WHERE gid = 'xact_009_42'") +); + +# Copy the two-phase files that will be put back later. Assume an +# epoch of 0. +my $commit_prepared_name = twophase_file_name(0, $commit_prepared_xid); +my $abort_prepared_name = twophase_file_name(0, $abort_prepared_xid); + +my $twophase_tmpdir = $PostgreSQL::Test::Utils::tmp_check . '/' . "2pc_files"; +mkdir($twophase_tmpdir); +my $primary_twophase_folder = $cur_primary->data_dir . '/pg_twophase/'; +copy("$primary_twophase_folder/$commit_prepared_name", $twophase_tmpdir); +copy("$primary_twophase_folder/$abort_prepared_name", $twophase_tmpdir); + +# Issue abort/commit prepared. +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_41'"); +$cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_42'"); + +# Again checkpoint, to advance the LSN past the point where the two previous +# transaction records would be replayed. +$cur_primary->psql('postgres', "CHECKPOINT"); + +# Take down node. +$cur_primary->teardown_node; + +# Move back the two twophase files. +copy("$twophase_tmpdir/$commit_prepared_name", $primary_twophase_folder); +copy("$twophase_tmpdir/$abort_prepared_name", $primary_twophase_folder); + +# Grab location in logs of primary +my $log_offset = -s $cur_primary->logfile; + +# Start node and check that the two previous files are removed by checking the +# server logs, following the CLOG lookup done at the end of recovery. +$cur_primary->start; + +$cur_primary->log_check( + "two-phase files of committed transactions removed at recovery", + $log_offset, + log_like => [ + qr/removing stale two-phase state from memory for transaction $commit_prepared_xid of epoch 0/, + qr/removing stale two-phase state from memory for transaction $abort_prepared_xid of epoch 0/ + ]); + +# Commit the first transaction. +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_40'"); +# After replay, there should be no 2PC transactions. +$cur_primary->psql( + 'postgres', + "SELECT * FROM pg_prepared_xact", + stdout => \$psql_out); +is($psql_out, qq{}, "Check expected pg_prepared_xact data on primary"); +# Data from transactions should be around. +$cur_primary->psql( + 'postgres', + "SELECT * FROM t_009_tbl WHERE id IN (40, 41, 42);", + stdout => \$psql_out); +is( $psql_out, qq{40|transaction: xid horizon +41|transaction: commit-prepared}, + "Check expected table data on primary"); + +############################################################################### +# Check handling of orphaned 2PC files at recovery. +############################################################################### + +$cur_standby->teardown_node; +$cur_primary->teardown_node; + +# Grab location in logs of primary +$log_offset = -s $cur_primary->logfile; + +# Create fake files with a transaction ID large or low enough to be in the +# future or the past, in different epochs, then check that the primary is able +# to start and remove these files at recovery. + +# First bump the epoch with pg_resetwal. +$cur_primary->command_ok( + [ 'pg_resetwal', '-e', 256, '-f', $cur_primary->data_dir ], + 'bump epoch of primary'); + +my $future_2pc_file = + $cur_primary->data_dir . '/pg_twophase/000001FF00000FFF'; +append_to_file $future_2pc_file, ""; +my $past_2pc_file = $cur_primary->data_dir . '/pg_twophase/000000EE00000FFF'; +append_to_file $past_2pc_file, ""; + +$cur_primary->start; +$cur_primary->log_check( + "two-phase files removed at recovery", + $log_offset, + log_like => [ + qr/removing past two-phase state file for transaction 4095 of epoch 238/, + qr/removing future two-phase state file for transaction 4095 of epoch 511/ + ]); + done_testing();