summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavan Deolasee2016-05-04 12:17:02 +0000
committerPavan Deolasee2016-10-18 10:05:06 +0000
commitcc7086e1cc49d98fe387bf752eb922ee932d1e7d (patch)
treec3e059eaf214e928cbf90491d0c1770df50b9b45
parent1934215f39853fd6e67618106f0ca97b3e016a6f (diff)
Fix a nasty bug that was zeroing out clog and subtrans pages, thus causing
various sorts of data corruptions. The bug dates back to the XC days, but probably became prominent in XL because of certain recent changes. In XC/XL, a node may not see all the XIDs and hence clog/subtrans log must be extended whenever a new XID crosses the previously seen page boundary. We do this by comparing the pageno where the new XID maps with the latest_page_no as stored in the shared SLRU data structure. But to handle XID wrap-arounds, we added a check for difference in number of pages to be less than CLOG_WRAP_CHECK_DELTA, which was incorrectly defined as (2^30 / CLOG_XACTS_PER_PAGE). Note that "^" is a logical XOR operator in C and hence this was returned a very small number of 28, thus causing incorrect zeroing of pages if ExtendCLOG is called with an XID which is older than what 28 clog pages can hold. All such transactions would suddenly be marked as aborted, resulting in removal of perfectly valid tuples. This patch fixes the mess by just relying on built-in routines for checking XID wrap-arounds. I also found another issue while working on this. We must not only zero the page at hand, but also all intermediate pages because we won't this opportunity later if an intermediate XID is seen. In our test setup, this seems to help some of the recent reports of data corruption, including "missing attributes" errors.
-rw-r--r--src/backend/access/transam/clog.c45
-rw-r--r--src/backend/access/transam/subtrans.c40
2 files changed, 56 insertions, 29 deletions
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 543cb69d88..fbb8b0fda7 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -73,12 +73,6 @@
#define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \
((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
-#ifdef PGXC
-/* Check if there is about a 1 billion XID difference for XID wraparound */
-#define CLOG_WRAP_CHECK_DELTA (2^30 / CLOG_XACTS_PER_PAGE)
-#endif
-
-
/*
* Link to shared-memory data structures for CLOG control
*/
@@ -627,6 +621,7 @@ void
ExtendCLOG(TransactionId newestXact)
{
int pageno;
+ TransactionId latestXid;
/*
* No work except at first XID of a page. But beware: just after
@@ -638,20 +633,22 @@ ExtendCLOG(TransactionId newestXact)
* and therefore will be skipped, so we need to detect this by using
* the latest_page_number instead of the pg index.
*
- * Also, there is a special case of when transactions wrap-around that
- * we need to detect.
+ * latest_page_number always points to the last page of CLOG. We don't need
+ * to do anything for an XID that maps to a page that precedes or equals
+ * the latest_page_number. To handle wrap-around correctly, we just compute
+ * the last XID mapped to latest_page_number and compare that against the
+ * passed in XID.
*/
pageno = TransactionIdToPage(newestXact);
/*
- * The first condition makes sure we did not wrap around
- * The second checks if we are still using the same page
* Note that this value can change and we are not holding a lock,
* so we repeat the check below. We do it this way instead of
* grabbing the lock to avoid lock contention.
*/
- if (ClogCtl->shared->latest_page_number - pageno <= CLOG_WRAP_CHECK_DELTA
- && pageno <= ClogCtl->shared->latest_page_number)
+ latestXid = (ClogCtl->shared->latest_page_number * CLOG_XACTS_PER_PAGE)
+ + CLOG_XACTS_PER_PAGE - 1;
+ if (TransactionIdPrecedesOrEquals(newestXact, latestXid))
return;
#else
if (TransactionIdToPgIndex(newestXact) != 0 &&
@@ -669,17 +666,31 @@ ExtendCLOG(TransactionId newestXact)
* out the page already and advanced the latest_page_number
* while we were waiting for the lock.
*/
- if (ClogCtl->shared->latest_page_number - pageno <= CLOG_WRAP_CHECK_DELTA
- && pageno <= ClogCtl->shared->latest_page_number)
+ latestXid = (ClogCtl->shared->latest_page_number * CLOG_XACTS_PER_PAGE)
+ + CLOG_XACTS_PER_PAGE - 1;
+ if (TransactionIdPrecedesOrEquals(newestXact, latestXid))
{
LWLockRelease(CLogControlLock);
return;
}
-#endif
- /* Zero the page and make an XLOG entry about it */
+ /*
+ * We must initialise all pages between latest_page_number and pageno,
+ * taking into consideration XID wraparound
+ */
+ for (;;)
+ {
+ /* Zero the page and make an XLOG entry about it */
+ int target_pageno = ClogCtl->shared->latest_page_number + 1;
+ if (target_pageno > TransactionIdToPage(MaxTransactionId))
+ target_pageno = 0;
+ ZeroCLOGPage(target_pageno, true);
+ if (target_pageno == pageno)
+ break;
+ }
+#else
ZeroCLOGPage(pageno, true);
-
+#endif
LWLockRelease(CLogControlLock);
}
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 84b83ecf46..557c70055f 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -35,11 +35,6 @@
#include "pg_trace.h"
#include "utils/snapmgr.h"
-#ifdef PGXC
-/* Check if there is about a 1 billion XID difference for XID wraparound */
-#define SUBTRANS_WRAP_CHECK_DELTA (2^30 / SUBTRANS_XACTS_PER_PAGE)
-#endif
-
/*
* Defines for SubTrans page sizes. A page is the same BLCKSZ as is used
* everywhere else in Postgres.
@@ -311,6 +306,7 @@ void
ExtendSUBTRANS(TransactionId newestXact)
{
int pageno;
+ TransactionId latestSubXid;
/*
* No work except at first XID of a page. But beware: just after
@@ -322,8 +318,11 @@ ExtendSUBTRANS(TransactionId newestXact)
* and therefore will be skipped, so we need to detect this by using
* the latest_page_number instead of the pg index.
*
- * Also, there is a special case of when transactions wrap-around that
- * we need to detect.
+ * latest_page_number always points to the last page of SubtransLog. We
+ * don't need to do anything for an XID that maps to a page that precedes
+ * or equals the latest_page_number. To handle wrap-around correctly, we
+ * just compute the last XID mapped to latest_page_number and compare that
+ * against the passed in XID.
*/
pageno = TransactionIdToPage(newestXact);
@@ -334,8 +333,9 @@ ExtendSUBTRANS(TransactionId newestXact)
* so we repeat the check below. We do it this way instead of
* grabbing the lock to avoid lock contention.
*/
- if (SubTransCtl->shared->latest_page_number - pageno <= SUBTRANS_WRAP_CHECK_DELTA
- && pageno <= SubTransCtl->shared->latest_page_number)
+ latestSubXid = (SubTransCtl->shared->latest_page_number *
+ SUBTRANS_XACTS_PER_PAGE) + SUBTRANS_XACTS_PER_PAGE - 1;
+ if (TransactionIdPrecedesOrEquals(newestXact, latestSubXid))
return;
#else
if (TransactionIdToEntry(newestXact) != 0 &&
@@ -353,16 +353,32 @@ ExtendSUBTRANS(TransactionId newestXact)
* out the page already and advanced the latest_page_number
* while we were waiting for the lock.
*/
- if (SubTransCtl->shared->latest_page_number - pageno <= SUBTRANS_WRAP_CHECK_DELTA
- && pageno <= SubTransCtl->shared->latest_page_number)
+ latestSubXid = (SubTransCtl->shared->latest_page_number *
+ SUBTRANS_XACTS_PER_PAGE) + SUBTRANS_XACTS_PER_PAGE - 1;
+ if (TransactionIdPrecedesOrEquals(newestXact, latestSubXid))
{
LWLockRelease(SubtransControlLock);
return;
}
-#endif
+ /*
+ * We must initialise all pages between latest_page_number and pageno,
+ * taking into consideration XID wraparound
+ */
+ for (;;)
+ {
+ /* Zero the page and make an XLOG entry about it */
+ int target_pageno = SubTransCtl->shared->latest_page_number + 1;
+ if (target_pageno > TransactionIdToPage(MaxTransactionId))
+ target_pageno = 0;
+ ZeroSUBTRANSPage(target_pageno);
+ if (target_pageno == pageno)
+ break;
+ }
+#else
/* Zero the page */
ZeroSUBTRANSPage(pageno);
+#endif
LWLockRelease(SubtransControlLock);
}