summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/transam/gtm.c8
-rw-r--r--src/backend/postmaster/clustermon.c25
-rw-r--r--src/backend/storage/ipc/procarray.c5
-rw-r--r--src/gtm/main/gtm_txn.c6
-rw-r--r--src/gtm/recovery/register_common.c72
-rw-r--r--src/include/gtm/register.h1
6 files changed, 82 insertions, 35 deletions
diff --git a/src/backend/access/transam/gtm.c b/src/backend/access/transam/gtm.c
index 26defd944e..64f39d19ad 100644
--- a/src/backend/access/transam/gtm.c
+++ b/src/backend/access/transam/gtm.c
@@ -704,10 +704,8 @@ ReportGlobalXmin(GlobalTransactionId gxid, GlobalTransactionId *global_xmin,
if (!conn)
return EOF;
- if (report_global_xmin(conn, PGXCNodeName,
+ report_global_xmin(conn, PGXCNodeName,
IS_PGXC_COORDINATOR ? GTM_NODE_COORDINATOR : GTM_NODE_DATANODE,
- gxid, global_xmin, latest_completed_xid, &errcode))
- return errcode;
- else
- return 0;
+ gxid, global_xmin, latest_completed_xid, &errcode);
+ return errcode;
}
diff --git a/src/backend/postmaster/clustermon.c b/src/backend/postmaster/clustermon.c
index f2148667e2..2ff09787ed 100644
--- a/src/backend/postmaster/clustermon.c
+++ b/src/backend/postmaster/clustermon.c
@@ -212,8 +212,11 @@ ClusterMonitorInit(void)
if ((status = ReportGlobalXmin(oldestXmin, &newOldestXmin,
&latestCompletedXid)))
{
- elog(DEBUG2, "Failed to report RecentGlobalXmin to GTM - %d:%d",
- status, newOldestXmin);
+ elog(DEBUG1, "Failed (status %d) to report RecentGlobalXmin "
+ "- reported RecentGlobalXmin %d, received "
+ "RecentGlobalXmin %d, " "received latestCompletedXid",
+ status, oldestXmin, newOldestXmin,
+ latestCompletedXid);
if (status == GTM_ERRCODE_TOO_OLD_XMIN ||
status == GTM_ERRCODE_NODE_EXCLUDED)
{
@@ -238,14 +241,17 @@ ClusterMonitorInit(void)
SetLatestCompletedXid(latestCompletedXid);
continue;
}
- elog(PANIC, "Global xmin computation mismatch");
}
}
else
{
+ elog(DEBUG1, "Successfully reported xmin to GTM - reported_xmin %d,"
+ "received RecentGlobalXmin %d, "
+ "received latestCompletedXid %d", oldestXmin,
+ newOldestXmin, latestCompletedXid);
+
SetLatestCompletedXid(latestCompletedXid);
ClusterMonitorSetReportedGlobalXmin(oldestXmin);
- elog(DEBUG2, "Updating global_xmin to %d", newOldestXmin);
if (GlobalTransactionIdIsValid(newOldestXmin))
ClusterMonitorSetGlobalXmin(newOldestXmin);
}
@@ -373,15 +379,20 @@ void
ClusterMonitorSetGlobalXmin(GlobalTransactionId xmin)
{
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+
+ /*
+ * Do a consistency check to ensure that we NEVER have running transactions
+ * with xmin less than what the GTM has already computed. While during
+ * normal execution, this should never happen, if we ever been excluded
+ * from the xmin calculation by the GTM while we are still running old
+ * transactions, PANIC is our best bet to avoid corruption
+ */
ProcArrayCheckXminConsistency(xmin);
SpinLockAcquire(&ClusterMonitorCtl->mutex);
ClusterMonitorCtl->gtm_recent_global_xmin = xmin;
SpinLockRelease(&ClusterMonitorCtl->mutex);
- if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xmin))
- ShmemVariableCache->latestCompletedXid = xmin;
-
LWLockRelease(ProcArrayLock);
}
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 55456cd3be..216891485b 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -1302,6 +1302,9 @@ GetOldestXminInternal(Relation rel, bool ignoreVacuum, bool computeLocal,
TransactionIdAdvance(result);
#endif
+ elog(DEBUG1, "GetOldestXminInternal - Starting computation with"
+ "latestCompletedXid %d + 1", result);
+
for (index = 0; index < arrayP->numProcs; index++)
{
int pgprocno = arrayP->pgprocnos[index];
@@ -1342,7 +1345,7 @@ GetOldestXminInternal(Relation rel, bool ignoreVacuum, bool computeLocal,
*/
#ifdef XCP
- elog(DEBUG3, "proc: pid:%d, xmin: %d, xid: %d", proc->pid,
+ elog(DEBUG1, "proc: pid:%d, xmin: %d, xid: %d", proc->pid,
xmin, xid);
if (TransactionIdIsNormal(xmin) &&
diff --git a/src/gtm/main/gtm_txn.c b/src/gtm/main/gtm_txn.c
index f6453532a6..bc678f9770 100644
--- a/src/gtm/main/gtm_txn.c
+++ b/src/gtm/main/gtm_txn.c
@@ -565,7 +565,7 @@ GTM_GetGlobalTransactionIdMulti(GTM_TransactionHandle handle[], int txn_count,
if (GlobalTransactionIdIsValid(gtm_txninfo->gti_gxid))
{
gxid[ii] = gtm_txninfo->gti_gxid;
- elog(DEBUG2, "GTM_TransactionInfo has XID already assgined - %s:%d",
+ elog(DEBUG1, "GTM_TransactionInfo has XID already assgined - %s:%d",
gtm_txninfo->gti_global_session_id, gxid[ii]);
continue;
}
@@ -604,7 +604,7 @@ GTM_GetGlobalTransactionIdMulti(GTM_TransactionHandle handle[], int txn_count,
GlobalTransactionIdAdvance(GTMTransactions.gt_nextXid);
- elog(DEBUG2, "Assigning new transaction ID = %s:%d",
+ elog(DEBUG1, "Assigning new transaction ID = %s:%d",
gtm_txninfo->gti_global_session_id, xid);
gxid[ii] = gtm_txninfo->gti_gxid = xid;
new_handle[*new_txn_count] = gtm_txninfo->gti_handle;
@@ -725,7 +725,7 @@ GTM_BeginTransactionMulti(GTM_IsolationLevel isolevel[],
if (txn != InvalidTransactionHandle)
{
gtm_txninfo[kk] = GTM_HandleToTransactionInfo(txn);
- elog(DEBUG2, "Existing transaction found: %s:%d",
+ elog(DEBUG1, "Existing transaction found: %s:%d",
gtm_txninfo[kk]->gti_global_session_id,
gtm_txninfo[kk]->gti_gxid);
txns[kk] = txn;
diff --git a/src/gtm/recovery/register_common.c b/src/gtm/recovery/register_common.c
index f8d2748a77..ebca82b828 100644
--- a/src/gtm/recovery/register_common.c
+++ b/src/gtm/recovery/register_common.c
@@ -941,9 +941,9 @@ GTM_InitNodeManager(void)
}
/*
- * Set to 120 seconds, but should be a few multiple for cluster monitor naptime
+ * Set to 600 seconds, but should be a few multiple for cluster monitor naptime
*/
-#define GTM_REPORT_XMIN_DELAY_THRESHOLD (120 * 1000)
+#define GTM_REPORT_XMIN_DELAY_THRESHOLD (600 * 1000)
GlobalTransactionId
GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
@@ -988,20 +988,31 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
* get opportunity to report xmin in a timely fashion, we shouldn't get
* into this situation often.
*
- * The exception to this rule is that if the remote node is idle, then we
- * actually ignore the xmin reported by it and instead calculate a new xmin
- * for it and send it back in respone. The remote node will still done
- * final sanity check and either accept that xmin or kill itself via PANIC
- * mechanism.
*/
if ((mynodeinfo->excluded) &&
- GlobalTransactionIdPrecedes(mynodeinfo->reported_xmin,
- GTM_GlobalXmin))
+ GlobalTransactionIdPrecedes(reported_xmin, GTM_GlobalXmin))
{
*errcode = GTM_ERRCODE_NODE_EXCLUDED;
+
+ /*
+ * This node is joining back the cluster after being excluded from the
+ * GTM_GlobalXmin calculation because of timeout, disconnection or node
+ * failure. In such cases, we send appropriate error back to the node
+ * and let it handle the situation. To ensure that our GTM_GlobalXmin
+ * does not keep advancing while the node is trying to join back the
+ * cluster, we temporarily set reported_xmin to the current
+ * GTM_GlobalXmin and wait to see if the node finally catches up.
+ *
+ * Note: If the node had old transaction running while it was excluded
+ * by the GTM, it will fail the consistency checks and restart itself.
+ */
+ mynodeinfo->joining = true;
+ mynodeinfo->reported_xmin_time = GTM_TimestampGetCurrent();
+ mynodeinfo->reported_xmin = GTM_GlobalXmin;
+
GTM_RWLockRelease(&mynodeinfo->node_lock);
elog(LOG, "GTM_ERRCODE_NODE_EXCLUDED - node_name %s, reported_xmin %d "
- "previously reported_xmin, GTM_GlobalXmin %d", node_name,
+ "previously reported_xmin %d, GTM_GlobalXmin %d", node_name,
reported_xmin,
mynodeinfo->reported_xmin,
GTM_GlobalXmin);
@@ -1009,13 +1020,18 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
}
/*
- * The remote node must not report a xmin which precedes the xmin it had
- * reported in the past. If it ever happens, send an error back and let the
- * remote node restart itself
+ * The remote node must not report a xmin which precedes the GTM_GlobalXmin
+ * we have already computed. If it ever happens, send an error back and let
+ * the remote node handle it, possibly restarting itself
*/
- if (GlobalTransactionIdPrecedes(reported_xmin, mynodeinfo->reported_xmin))
+ if (GlobalTransactionIdPrecedes(reported_xmin, GTM_GlobalXmin))
{
*errcode = GTM_ERRCODE_TOO_OLD_XMIN;
+
+ mynodeinfo->joining = true;
+ mynodeinfo->reported_xmin_time = GTM_TimestampGetCurrent();
+ mynodeinfo->reported_xmin = GTM_GlobalXmin;
+
GTM_RWLockRelease(&mynodeinfo->node_lock);
/*
@@ -1028,8 +1044,8 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
*/
if (mynodeinfo->reported_xmin_time)
elog(LOG, "GTM_ERRCODE_TOO_OLD_XMIN - node_name %s, reported_xmin %d, "
- "previously reported_xmin %d", node_name,
- reported_xmin, mynodeinfo->reported_xmin);
+ "previously reported_xmin %d, GTM_GlobalXmin %d", node_name,
+ reported_xmin, mynodeinfo->reported_xmin, GTM_GlobalXmin);
return InvalidGlobalTransactionId;
}
@@ -1038,7 +1054,15 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
mynodeinfo->reported_xmin);
mynodeinfo->reported_xmin = reported_xmin;
- mynodeinfo->excluded = false;
+
+ /*
+ * Node joined back, set both excluded and joining to false
+ */
+ if (mynodeinfo->excluded)
+ {
+ mynodeinfo->excluded = false;
+ mynodeinfo->joining = false;
+ }
mynodeinfo->reported_xmin_time = current_time = GTM_TimestampGetCurrent();
GTM_RWLockRelease(&mynodeinfo->node_lock);
@@ -1060,13 +1084,22 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
{
GTM_PGXCNodeInfo *nodeinfo = all_nodes[ii];
- elog(DEBUG1, "nodeinfo %p, type: %d, exclude %c, xmin %d, time %lld",
+ elog(DEBUG1, "nodeinfo %p, type: %d, exclude %c, xmin %d, time %ld",
nodeinfo, nodeinfo->type, nodeinfo->excluded ? 'T' : 'F',
nodeinfo->reported_xmin, nodeinfo->reported_xmin_time);
- if (nodeinfo->excluded)
+ /*
+ * If a node has not reported its status for
+ * GTM_REPORT_XMIN_DELAY_THRESHOLD and neither in the process of
+ * rejoining the cluster, don't include it in the GTM_GlobalXmin
+ * calculation
+ */
+ if (nodeinfo->excluded && !nodeinfo->joining)
continue;
+ /*
+ * Care only for datanodes and coordinators
+ */
if (nodeinfo->type != GTM_NODE_COORDINATOR && nodeinfo->type !=
GTM_NODE_DATANODE)
continue;
@@ -1087,6 +1120,7 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
current_time, GTM_REPORT_XMIN_DELAY_THRESHOLD))
{
nodeinfo->excluded = true;
+ nodeinfo->joining = false;
GTM_RWLockRelease(&nodeinfo->node_lock);
continue;
}
diff --git a/src/include/gtm/register.h b/src/include/gtm/register.h
index 0212a9ecf8..a50ffe0c21 100644
--- a/src/include/gtm/register.h
+++ b/src/include/gtm/register.h
@@ -62,6 +62,7 @@ typedef struct GTM_PGXCNodeInfo
* Has the node timed out and be
* excluded from xmin computation?
*/
+ bool joining; /* Is the node joining back */
bool idle; /* Has the node been idle since
* last report
*/