diff options
-rw-r--r-- | src/backend/access/transam/gtm.c | 8 | ||||
-rw-r--r-- | src/backend/postmaster/clustermon.c | 25 | ||||
-rw-r--r-- | src/backend/storage/ipc/procarray.c | 5 | ||||
-rw-r--r-- | src/gtm/main/gtm_txn.c | 6 | ||||
-rw-r--r-- | src/gtm/recovery/register_common.c | 72 | ||||
-rw-r--r-- | src/include/gtm/register.h | 1 |
6 files changed, 82 insertions, 35 deletions
diff --git a/src/backend/access/transam/gtm.c b/src/backend/access/transam/gtm.c index 26defd944e..64f39d19ad 100644 --- a/src/backend/access/transam/gtm.c +++ b/src/backend/access/transam/gtm.c @@ -704,10 +704,8 @@ ReportGlobalXmin(GlobalTransactionId gxid, GlobalTransactionId *global_xmin, if (!conn) return EOF; - if (report_global_xmin(conn, PGXCNodeName, + report_global_xmin(conn, PGXCNodeName, IS_PGXC_COORDINATOR ? GTM_NODE_COORDINATOR : GTM_NODE_DATANODE, - gxid, global_xmin, latest_completed_xid, &errcode)) - return errcode; - else - return 0; + gxid, global_xmin, latest_completed_xid, &errcode); + return errcode; } diff --git a/src/backend/postmaster/clustermon.c b/src/backend/postmaster/clustermon.c index f2148667e2..2ff09787ed 100644 --- a/src/backend/postmaster/clustermon.c +++ b/src/backend/postmaster/clustermon.c @@ -212,8 +212,11 @@ ClusterMonitorInit(void) if ((status = ReportGlobalXmin(oldestXmin, &newOldestXmin, &latestCompletedXid))) { - elog(DEBUG2, "Failed to report RecentGlobalXmin to GTM - %d:%d", - status, newOldestXmin); + elog(DEBUG1, "Failed (status %d) to report RecentGlobalXmin " + "- reported RecentGlobalXmin %d, received " + "RecentGlobalXmin %d, " "received latestCompletedXid", + status, oldestXmin, newOldestXmin, + latestCompletedXid); if (status == GTM_ERRCODE_TOO_OLD_XMIN || status == GTM_ERRCODE_NODE_EXCLUDED) { @@ -238,14 +241,17 @@ ClusterMonitorInit(void) SetLatestCompletedXid(latestCompletedXid); continue; } - elog(PANIC, "Global xmin computation mismatch"); } } else { + elog(DEBUG1, "Successfully reported xmin to GTM - reported_xmin %d," + "received RecentGlobalXmin %d, " + "received latestCompletedXid %d", oldestXmin, + newOldestXmin, latestCompletedXid); + SetLatestCompletedXid(latestCompletedXid); ClusterMonitorSetReportedGlobalXmin(oldestXmin); - elog(DEBUG2, "Updating global_xmin to %d", newOldestXmin); if (GlobalTransactionIdIsValid(newOldestXmin)) ClusterMonitorSetGlobalXmin(newOldestXmin); } @@ -373,15 +379,20 @@ void ClusterMonitorSetGlobalXmin(GlobalTransactionId xmin) { LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + + /* + * Do a consistency check to ensure that we NEVER have running transactions + * with xmin less than what the GTM has already computed. While during + * normal execution, this should never happen, if we ever been excluded + * from the xmin calculation by the GTM while we are still running old + * transactions, PANIC is our best bet to avoid corruption + */ ProcArrayCheckXminConsistency(xmin); SpinLockAcquire(&ClusterMonitorCtl->mutex); ClusterMonitorCtl->gtm_recent_global_xmin = xmin; SpinLockRelease(&ClusterMonitorCtl->mutex); - if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, xmin)) - ShmemVariableCache->latestCompletedXid = xmin; - LWLockRelease(ProcArrayLock); } diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 55456cd3be..216891485b 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -1302,6 +1302,9 @@ GetOldestXminInternal(Relation rel, bool ignoreVacuum, bool computeLocal, TransactionIdAdvance(result); #endif + elog(DEBUG1, "GetOldestXminInternal - Starting computation with" + "latestCompletedXid %d + 1", result); + for (index = 0; index < arrayP->numProcs; index++) { int pgprocno = arrayP->pgprocnos[index]; @@ -1342,7 +1345,7 @@ GetOldestXminInternal(Relation rel, bool ignoreVacuum, bool computeLocal, */ #ifdef XCP - elog(DEBUG3, "proc: pid:%d, xmin: %d, xid: %d", proc->pid, + elog(DEBUG1, "proc: pid:%d, xmin: %d, xid: %d", proc->pid, xmin, xid); if (TransactionIdIsNormal(xmin) && diff --git a/src/gtm/main/gtm_txn.c b/src/gtm/main/gtm_txn.c index f6453532a6..bc678f9770 100644 --- a/src/gtm/main/gtm_txn.c +++ b/src/gtm/main/gtm_txn.c @@ -565,7 +565,7 @@ GTM_GetGlobalTransactionIdMulti(GTM_TransactionHandle handle[], int txn_count, if (GlobalTransactionIdIsValid(gtm_txninfo->gti_gxid)) { gxid[ii] = gtm_txninfo->gti_gxid; - elog(DEBUG2, "GTM_TransactionInfo has XID already assgined - %s:%d", + elog(DEBUG1, "GTM_TransactionInfo has XID already assgined - %s:%d", gtm_txninfo->gti_global_session_id, gxid[ii]); continue; } @@ -604,7 +604,7 @@ GTM_GetGlobalTransactionIdMulti(GTM_TransactionHandle handle[], int txn_count, GlobalTransactionIdAdvance(GTMTransactions.gt_nextXid); - elog(DEBUG2, "Assigning new transaction ID = %s:%d", + elog(DEBUG1, "Assigning new transaction ID = %s:%d", gtm_txninfo->gti_global_session_id, xid); gxid[ii] = gtm_txninfo->gti_gxid = xid; new_handle[*new_txn_count] = gtm_txninfo->gti_handle; @@ -725,7 +725,7 @@ GTM_BeginTransactionMulti(GTM_IsolationLevel isolevel[], if (txn != InvalidTransactionHandle) { gtm_txninfo[kk] = GTM_HandleToTransactionInfo(txn); - elog(DEBUG2, "Existing transaction found: %s:%d", + elog(DEBUG1, "Existing transaction found: %s:%d", gtm_txninfo[kk]->gti_global_session_id, gtm_txninfo[kk]->gti_gxid); txns[kk] = txn; diff --git a/src/gtm/recovery/register_common.c b/src/gtm/recovery/register_common.c index f8d2748a77..ebca82b828 100644 --- a/src/gtm/recovery/register_common.c +++ b/src/gtm/recovery/register_common.c @@ -941,9 +941,9 @@ GTM_InitNodeManager(void) } /* - * Set to 120 seconds, but should be a few multiple for cluster monitor naptime + * Set to 600 seconds, but should be a few multiple for cluster monitor naptime */ -#define GTM_REPORT_XMIN_DELAY_THRESHOLD (120 * 1000) +#define GTM_REPORT_XMIN_DELAY_THRESHOLD (600 * 1000) GlobalTransactionId GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name, @@ -988,20 +988,31 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name, * get opportunity to report xmin in a timely fashion, we shouldn't get * into this situation often. * - * The exception to this rule is that if the remote node is idle, then we - * actually ignore the xmin reported by it and instead calculate a new xmin - * for it and send it back in respone. The remote node will still done - * final sanity check and either accept that xmin or kill itself via PANIC - * mechanism. */ if ((mynodeinfo->excluded) && - GlobalTransactionIdPrecedes(mynodeinfo->reported_xmin, - GTM_GlobalXmin)) + GlobalTransactionIdPrecedes(reported_xmin, GTM_GlobalXmin)) { *errcode = GTM_ERRCODE_NODE_EXCLUDED; + + /* + * This node is joining back the cluster after being excluded from the + * GTM_GlobalXmin calculation because of timeout, disconnection or node + * failure. In such cases, we send appropriate error back to the node + * and let it handle the situation. To ensure that our GTM_GlobalXmin + * does not keep advancing while the node is trying to join back the + * cluster, we temporarily set reported_xmin to the current + * GTM_GlobalXmin and wait to see if the node finally catches up. + * + * Note: If the node had old transaction running while it was excluded + * by the GTM, it will fail the consistency checks and restart itself. + */ + mynodeinfo->joining = true; + mynodeinfo->reported_xmin_time = GTM_TimestampGetCurrent(); + mynodeinfo->reported_xmin = GTM_GlobalXmin; + GTM_RWLockRelease(&mynodeinfo->node_lock); elog(LOG, "GTM_ERRCODE_NODE_EXCLUDED - node_name %s, reported_xmin %d " - "previously reported_xmin, GTM_GlobalXmin %d", node_name, + "previously reported_xmin %d, GTM_GlobalXmin %d", node_name, reported_xmin, mynodeinfo->reported_xmin, GTM_GlobalXmin); @@ -1009,13 +1020,18 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name, } /* - * The remote node must not report a xmin which precedes the xmin it had - * reported in the past. If it ever happens, send an error back and let the - * remote node restart itself + * The remote node must not report a xmin which precedes the GTM_GlobalXmin + * we have already computed. If it ever happens, send an error back and let + * the remote node handle it, possibly restarting itself */ - if (GlobalTransactionIdPrecedes(reported_xmin, mynodeinfo->reported_xmin)) + if (GlobalTransactionIdPrecedes(reported_xmin, GTM_GlobalXmin)) { *errcode = GTM_ERRCODE_TOO_OLD_XMIN; + + mynodeinfo->joining = true; + mynodeinfo->reported_xmin_time = GTM_TimestampGetCurrent(); + mynodeinfo->reported_xmin = GTM_GlobalXmin; + GTM_RWLockRelease(&mynodeinfo->node_lock); /* @@ -1028,8 +1044,8 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name, */ if (mynodeinfo->reported_xmin_time) elog(LOG, "GTM_ERRCODE_TOO_OLD_XMIN - node_name %s, reported_xmin %d, " - "previously reported_xmin %d", node_name, - reported_xmin, mynodeinfo->reported_xmin); + "previously reported_xmin %d, GTM_GlobalXmin %d", node_name, + reported_xmin, mynodeinfo->reported_xmin, GTM_GlobalXmin); return InvalidGlobalTransactionId; } @@ -1038,7 +1054,15 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name, mynodeinfo->reported_xmin); mynodeinfo->reported_xmin = reported_xmin; - mynodeinfo->excluded = false; + + /* + * Node joined back, set both excluded and joining to false + */ + if (mynodeinfo->excluded) + { + mynodeinfo->excluded = false; + mynodeinfo->joining = false; + } mynodeinfo->reported_xmin_time = current_time = GTM_TimestampGetCurrent(); GTM_RWLockRelease(&mynodeinfo->node_lock); @@ -1060,13 +1084,22 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name, { GTM_PGXCNodeInfo *nodeinfo = all_nodes[ii]; - elog(DEBUG1, "nodeinfo %p, type: %d, exclude %c, xmin %d, time %lld", + elog(DEBUG1, "nodeinfo %p, type: %d, exclude %c, xmin %d, time %ld", nodeinfo, nodeinfo->type, nodeinfo->excluded ? 'T' : 'F', nodeinfo->reported_xmin, nodeinfo->reported_xmin_time); - if (nodeinfo->excluded) + /* + * If a node has not reported its status for + * GTM_REPORT_XMIN_DELAY_THRESHOLD and neither in the process of + * rejoining the cluster, don't include it in the GTM_GlobalXmin + * calculation + */ + if (nodeinfo->excluded && !nodeinfo->joining) continue; + /* + * Care only for datanodes and coordinators + */ if (nodeinfo->type != GTM_NODE_COORDINATOR && nodeinfo->type != GTM_NODE_DATANODE) continue; @@ -1087,6 +1120,7 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name, current_time, GTM_REPORT_XMIN_DELAY_THRESHOLD)) { nodeinfo->excluded = true; + nodeinfo->joining = false; GTM_RWLockRelease(&nodeinfo->node_lock); continue; } diff --git a/src/include/gtm/register.h b/src/include/gtm/register.h index 0212a9ecf8..a50ffe0c21 100644 --- a/src/include/gtm/register.h +++ b/src/include/gtm/register.h @@ -62,6 +62,7 @@ typedef struct GTM_PGXCNodeInfo * Has the node timed out and be * excluded from xmin computation? */ + bool joining; /* Is the node joining back */ bool idle; /* Has the node been idle since * last report */ |