summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavan Deolasee2015-12-08 08:21:57 +0000
committerPavan Deolasee2015-12-08 08:21:57 +0000
commitda8eb3b85b7bd560ab728534dc4dd3f670ddd42a (patch)
tree4668489350972a0b7816faefce41910e6b247076
parent5f5ef0503fca3d0dbc04f0c5adbbb3448dd57821 (diff)
Rework handling of idle nodes for xmin reporting and calculation
The local value of latestCompletedXid caps the xmin computation on an idle cluster. So GTM sends back an updated value of latestCompletedXid as part of report-xmin response. Once latestCompletedXid is updated on the node, the next iteration will ensure that the local xmin is advanced on an idle server
-rw-r--r--src/backend/access/transam/gtm.c13
-rw-r--r--src/backend/postmaster/clustermon.c55
-rw-r--r--src/backend/storage/ipc/procarray.c63
-rw-r--r--src/gtm/client/fe-protocol.c8
-rw-r--r--src/gtm/client/gtm_client.c37
-rw-r--r--src/gtm/main/gtm_standby.c5
-rw-r--r--src/gtm/main/gtm_txn.c9
-rw-r--r--src/gtm/recovery/register_common.c114
-rw-r--r--src/gtm/recovery/register_gtm.c14
-rw-r--r--src/include/access/gtm.h7
-rw-r--r--src/include/gtm/gtm_client.h18
-rw-r--r--src/include/gtm/register.h3
-rw-r--r--src/include/storage/procarray.h6
13 files changed, 124 insertions, 228 deletions
diff --git a/src/backend/access/transam/gtm.c b/src/backend/access/transam/gtm.c
index 50b6646c72..29e4dc3dbb 100644
--- a/src/backend/access/transam/gtm.c
+++ b/src/backend/access/transam/gtm.c
@@ -612,7 +612,7 @@ RenameSequenceGTM(char *seqname, const char *newseqname)
* Connection for registering is just used once then closed
*/
int
-RegisterGTM(GTM_PGXCNodeType type, GlobalTransactionId *xmin)
+RegisterGTM(GTM_PGXCNodeType type)
{
int ret;
@@ -621,7 +621,7 @@ RegisterGTM(GTM_PGXCNodeType type, GlobalTransactionId *xmin)
if (!conn)
return EOF;
- ret = node_register(conn, type, 0, PGXCNodeName, "", xmin);
+ ret = node_register(conn, type, 0, PGXCNodeName, "");
/* If something went wrong, retry once */
if (ret < 0)
@@ -629,8 +629,7 @@ RegisterGTM(GTM_PGXCNodeType type, GlobalTransactionId *xmin)
CloseGTM();
InitGTM();
if (conn)
- ret = node_register(conn, type, 0, PGXCNodeName, "",
- xmin);
+ ret = node_register(conn, type, 0, PGXCNodeName, "");
}
return ret;
@@ -688,8 +687,8 @@ ReportBarrierGTM(char *barrier_id)
}
int
-ReportGlobalXmin(GlobalTransactionId *gxid, GlobalTransactionId *global_xmin,
- bool isIdle)
+ReportGlobalXmin(GlobalTransactionId gxid, GlobalTransactionId *global_xmin,
+ GlobalTransactionId *latest_completed_xid)
{
int errcode = GTM_ERRCODE_UNKNOWN;
@@ -699,7 +698,7 @@ ReportGlobalXmin(GlobalTransactionId *gxid, GlobalTransactionId *global_xmin,
if (report_global_xmin(conn, PGXCNodeName,
IS_PGXC_COORDINATOR ? GTM_NODE_COORDINATOR : GTM_NODE_DATANODE,
- gxid, global_xmin, isIdle, &errcode))
+ gxid, global_xmin, latest_completed_xid, &errcode))
return errcode;
else
return 0;
diff --git a/src/backend/postmaster/clustermon.c b/src/backend/postmaster/clustermon.c
index 40377b2abe..d79aae8e1e 100644
--- a/src/backend/postmaster/clustermon.c
+++ b/src/backend/postmaster/clustermon.c
@@ -78,8 +78,8 @@ ClusterMonitorInit(void)
GTM_NODE_COORDINATOR;
GlobalTransactionId oldestXmin;
GlobalTransactionId newOldestXmin;
- GlobalTransactionId reportedXmin;
GlobalTransactionId lastGlobalXmin;
+ GlobalTransactionId latestCompletedXid;
int status;
am_clustermon = true;
@@ -124,15 +124,10 @@ ClusterMonitorInit(void)
SetProcessingMode(NormalProcessing);
- /*
- * Register this node with the GTM
- */
- oldestXmin = InvalidGlobalTransactionId;
- if (RegisterGTM(nodetype, &oldestXmin) < 0)
+ if (RegisterGTM(nodetype) < 0)
{
UnregisterGTM(nodetype);
- oldestXmin = InvalidGlobalTransactionId;
- if (RegisterGTM(nodetype, &oldestXmin) < 0)
+ if (RegisterGTM(nodetype) < 0)
{
ereport(LOG,
(errcode(ERRCODE_IO_ERROR),
@@ -141,12 +136,6 @@ ClusterMonitorInit(void)
}
/*
- * If the registration is successful, GTM would send us back current
- * GlobalXmin. Initialise our local state to the same value
- */
- ClusterMonitorSetReportedGlobalXmin(oldestXmin);
-
- /*
* If an exception is encountered, processing resumes here.
*
* This code is a stripped down version of PostgresMain error recovery.
@@ -208,31 +197,45 @@ ClusterMonitorInit(void)
{
struct timeval nap;
int rc;
- bool isIdle;
/*
* Compute RecentGlobalXmin, report it to the GTM and sleep for the set
* interval. Keep doing this forever
*/
- isIdle = false;
- reportedXmin = ClusterMonitorGetReportedGlobalXmin();
lastGlobalXmin = ClusterMonitorGetGlobalXmin();
- oldestXmin = GetOldestXminInternal(NULL, false, true, &isIdle,
- lastGlobalXmin, reportedXmin);
-
- if (GlobalTransactionIdPrecedes(oldestXmin, reportedXmin))
- oldestXmin = reportedXmin;
-
- if (GlobalTransactionIdPrecedes(oldestXmin, lastGlobalXmin))
- oldestXmin = lastGlobalXmin;
+ oldestXmin = GetOldestXminInternal(NULL, false, true, lastGlobalXmin);
- if ((status = ReportGlobalXmin(&oldestXmin, &newOldestXmin, isIdle)))
+ if ((status = ReportGlobalXmin(oldestXmin, &newOldestXmin,
+ &latestCompletedXid)))
{
elog(DEBUG2, "Failed to report RecentGlobalXmin to GTM - %d:%d",
status, newOldestXmin);
if (status == GTM_ERRCODE_TOO_OLD_XMIN ||
status == GTM_ERRCODE_NODE_EXCLUDED)
+ {
+ /*
+ * If we haven't seen a new transaction for a very long time or
+ * were disconncted for a while or excluded from the xmin
+ * computation for any reason, our xmin calculation could be
+ * well in the past, especially because its capped by the
+ * latestCompletedXid which may not advance on an idle server.
+ * In such cases, use the value of latestCompletedXid as
+ * returned by GTM and then recompute local xmin.
+ *
+ * If the GTM's global xmin advances even further while we are
+ * ready with a new xmin, just repeat the entire exercise as
+ * long as GTM keeps returning us a more current value of
+ * latestCompletedXid and thus pushing forward our local xmin
+ * calculation
+ */
+ if (GlobalTransactionIdIsValid(latestCompletedXid) &&
+ TransactionIdPrecedes(oldestXmin, latestCompletedXid))
+ {
+ SetLatestCompletedXid(latestCompletedXid);
+ continue;
+ }
elog(PANIC, "Global xmin computation mismatch");
+ }
}
else
{
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 48840c24bb..420f27d17d 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -1220,8 +1220,8 @@ TransactionIdIsActive(TransactionId xid)
TransactionId
GetOldestXmin(Relation rel, bool ignoreVacuum)
{
- return GetOldestXminInternal(rel, ignoreVacuum, false, NULL,
- InvalidTransactionId, InvalidTransactionId);
+ return GetOldestXminInternal(rel, ignoreVacuum, false,
+ InvalidTransactionId);
}
/*
@@ -1245,8 +1245,7 @@ GetOldestXmin(Relation rel, bool ignoreVacuum)
*/
TransactionId
GetOldestXminInternal(Relation rel, bool ignoreVacuum, bool computeLocal,
- bool *isIdle, TransactionId lastGlobalXmin,
- TransactionId lastReportedXmin)
+ TransactionId lastGlobalXmin)
{
ProcArrayStruct *arrayP = procArray;
TransactionId result;
@@ -1292,8 +1291,6 @@ GetOldestXminInternal(Relation rel, bool ignoreVacuum, bool computeLocal,
else
TransactionIdAdvance(result);
- if (isIdle)
- *isIdle = true;
#else
Assert(TransactionIdIsNormal(result));
TransactionIdAdvance(result);
@@ -1344,28 +1341,10 @@ GetOldestXminInternal(Relation rel, bool ignoreVacuum, bool computeLocal,
result = xmin;
/*
- * If we found a normal xid or a transaction running with xmin set,
- * we are not idle
- */
- if (isIdle &&
- (TransactionIdIsNormal(xmin) || TransactionIdIsNormal(xid)))
- *isIdle = false;
-
- /*
- * If we see an xid or an xmin which precedes either the last
- * reported xmin or the GlobalXmin calculated by the
+ * If we see an xid or an xmin which precedes the GlobalXmin calculated by the
* Cluster Monitor process then it signals bad things and we must
* abort and restart the database server
*/
- if (TransactionIdIsValid(lastReportedXmin))
- {
- if ((TransactionIdIsValid(xmin) && TransactionIdPrecedes(xmin, lastReportedXmin)) ||
- (TransactionIdIsValid(xid) && TransactionIdPrecedes(xid,
- lastReportedXmin)))
- elog(PANIC, "Found xid (%d) or xmin (%d) precedes last "
- "reported xmin (%d)", xid, xmin, lastReportedXmin);
- }
-
if (TransactionIdIsValid(lastGlobalXmin))
{
if ((TransactionIdIsValid(xmin) && TransactionIdPrecedes(xmin, lastGlobalXmin)) ||
@@ -4386,3 +4365,37 @@ ProcArrayCheckXminConsistency(TransactionId global_xmin)
global_xmin);
}
}
+
+void
+SetLatestCompletedXid(TransactionId latestCompletedXid)
+{
+ int index;
+ ProcArrayStruct *arrayP = procArray;
+
+ if (!TransactionIdIsValid(latestCompletedXid))
+ return;
+
+ LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
+
+ if (TransactionIdPrecedes(latestCompletedXid,
+ ShmemVariableCache->latestCompletedXid))
+ {
+ LWLockRelease(ProcArrayLock);
+ return;
+ }
+
+ for (index = 0; index < arrayP->numProcs; index++)
+ {
+ int pgprocno = arrayP->pgprocnos[index];
+ volatile PGXACT *pgxact = &allPgXact[pgprocno];
+ TransactionId pxid = pgxact->xid;
+
+ if (TransactionIdPrecedesOrEquals(pxid, latestCompletedXid))
+ elog(PANIC, "Cannot set latestCompletedXid to %d while another "
+ "process is running with an older xid %d",
+ latestCompletedXid, pxid);
+ }
+
+ ShmemVariableCache->latestCompletedXid = latestCompletedXid;
+ LWLockRelease(ProcArrayLock);
+}
diff --git a/src/gtm/client/fe-protocol.c b/src/gtm/client/fe-protocol.c
index bc0a72ea93..abce635ed7 100644
--- a/src/gtm/client/fe-protocol.c
+++ b/src/gtm/client/fe-protocol.c
@@ -732,12 +732,6 @@ gtmpqParseSuccess(GTM_Conn *conn, GTM_Result *result)
}
result->gr_resdata.grd_node.node_name[result->gr_resdata.grd_node.len] = '\0';
- if (result->gr_type == NODE_UNREGISTER_RESULT)
- break;
-
- if (gtmpqGetInt((char *)&result->gr_resdata.grd_node.xmin, sizeof
- (GlobalTransactionId), conn))
- result->gr_status = GTM_RESULT_ERROR;
break;
case NODE_LIST_RESULT:
@@ -790,7 +784,7 @@ gtmpqParseSuccess(GTM_Conn *conn, GTM_Result *result)
break;
case REPORT_XMIN_RESULT:
- if (gtmpqGetnchar((char *)&result->gr_resdata.grd_report_xmin.reported_xmin,
+ if (gtmpqGetnchar((char *)&result->gr_resdata.grd_report_xmin.latest_completed_xid,
sizeof (GlobalTransactionId), conn))
{
result->gr_status = GTM_RESULT_ERROR;
diff --git a/src/gtm/client/gtm_client.c b/src/gtm/client/gtm_client.c
index 1f1f6f50fd..8910e68320 100644
--- a/src/gtm/client/gtm_client.c
+++ b/src/gtm/client/gtm_client.c
@@ -78,8 +78,7 @@ static int alter_sequence_internal(GTM_Conn *conn, GTM_SequenceKey key, GTM_Sequ
GTM_Sequence startval, GTM_Sequence lastval, bool cycle, bool is_restart, bool is_backup);
static int node_register_worker(GTM_Conn *conn, GTM_PGXCNodeType type, const char *host, GTM_PGXCNodePort port,
char *node_name, char *datafolder,
- GTM_PGXCNodeStatus status, bool is_backup,
- GlobalTransactionId *xmin);
+ GTM_PGXCNodeStatus status, bool is_backup);
static int node_unregister_worker(GTM_Conn *conn, GTM_PGXCNodeType type, const char * node_name, bool is_backup);
static int report_barrier_internal(GTM_Conn *conn, char *barrier_id, bool is_backup);
/*
@@ -1656,8 +1655,7 @@ int node_register(GTM_Conn *conn,
GTM_PGXCNodeType type,
GTM_PGXCNodePort port,
char *node_name,
- char *datafolder,
- GlobalTransactionId *xmin)
+ char *datafolder)
{
char host[1024];
int rc;
@@ -1669,7 +1667,7 @@ int node_register(GTM_Conn *conn,
}
return node_register_worker(conn, type, host, port, node_name, datafolder,
- NODE_CONNECTED, false, xmin);
+ NODE_CONNECTED, false);
}
int node_register_internal(GTM_Conn *conn,
@@ -1678,11 +1676,10 @@ int node_register_internal(GTM_Conn *conn,
GTM_PGXCNodePort port,
char *node_name,
char *datafolder,
- GTM_PGXCNodeStatus status,
- GlobalTransactionId *xmin)
+ GTM_PGXCNodeStatus status)
{
return node_register_worker(conn, type, host, port, node_name, datafolder,
- status, false, xmin);
+ status, false);
}
int bkup_node_register_internal(GTM_Conn *conn,
@@ -1691,11 +1688,10 @@ int bkup_node_register_internal(GTM_Conn *conn,
GTM_PGXCNodePort port,
char *node_name,
char *datafolder,
- GTM_PGXCNodeStatus status,
- GlobalTransactionId xmin)
+ GTM_PGXCNodeStatus status)
{
return node_register_worker(conn, type, host, port, node_name, datafolder,
- status, true, &xmin);
+ status, true);
}
static int node_register_worker(GTM_Conn *conn,
@@ -1705,8 +1701,7 @@ static int node_register_worker(GTM_Conn *conn,
char *node_name,
char *datafolder,
GTM_PGXCNodeStatus status,
- bool is_backup,
- GlobalTransactionId *xmin)
+ bool is_backup)
{
GTM_Result *res = NULL;
time_t finish_time;
@@ -1745,9 +1740,7 @@ static int node_register_worker(GTM_Conn *conn,
/* Data Folder (var-len) */
gtmpqPutnchar(datafolder, strlen(datafolder), conn) ||
/* Node Status */
- gtmpqPutInt(status, sizeof(GTM_PGXCNodeStatus), conn) ||
- /* Recent Xmin */
- gtmpqPutnchar((char *)xmin, sizeof (GlobalTransactionId), conn))
+ gtmpqPutInt(status, sizeof(GTM_PGXCNodeStatus), conn))
{
goto send_failed;
}
@@ -1783,8 +1776,6 @@ static int node_register_worker(GTM_Conn *conn,
{
Assert(res->gr_resdata.grd_node.type == type);
Assert((strcmp(res->gr_resdata.grd_node.node_name,node_name) == 0));
- if (xmin)
- *xmin = res->gr_resdata.grd_node.xmin;
}
return res->gr_status;
@@ -2427,17 +2418,17 @@ send_failed:
int
report_global_xmin(GTM_Conn *conn, const char *node_name,
- GTM_PGXCNodeType type, GlobalTransactionId *gxid,
+ GTM_PGXCNodeType type, GlobalTransactionId gxid,
GlobalTransactionId *global_xmin,
- bool isIdle, int *errcode)
+ GlobalTransactionId *latest_completed_xid,
+ int *errcode)
{
GTM_Result *res = NULL;
time_t finish_time;
if (gtmpqPutMsgStart('C', true, conn) ||
gtmpqPutInt(MSG_REPORT_XMIN, sizeof (GTM_MessageType), conn) ||
- gtmpqPutnchar((char *)gxid, sizeof(GlobalTransactionId), conn) ||
- gtmpqPutc(isIdle, conn) ||
+ gtmpqPutnchar((char *)&gxid, sizeof(GlobalTransactionId), conn) ||
gtmpqPutInt(type, sizeof (GTM_PGXCNodeType), conn) ||
gtmpqPutInt(strlen(node_name), sizeof (GTM_StrLen), conn) ||
gtmpqPutnchar(node_name, strlen(node_name), conn))
@@ -2471,7 +2462,7 @@ report_global_xmin(GTM_Conn *conn, const char *node_name,
if (res->gr_status == GTM_RESULT_OK)
{
- *gxid = res->gr_resdata.grd_report_xmin.reported_xmin;
+ *latest_completed_xid = res->gr_resdata.grd_report_xmin.latest_completed_xid;
*global_xmin = res->gr_resdata.grd_report_xmin.global_xmin;
*errcode = res->gr_resdata.grd_report_xmin.errcode;
}
diff --git a/src/gtm/main/gtm_standby.c b/src/gtm/main/gtm_standby.c
index 45b545b107..bc96c32793 100644
--- a/src/gtm/main/gtm_standby.c
+++ b/src/gtm/main/gtm_standby.c
@@ -220,7 +220,6 @@ gtm_standby_restore_node(void)
data[i].type, data[i].nodename, data[i].datafolder);
if (Recovery_PGXCNodeRegister(data[i].type, data[i].nodename, data[i].port,
data[i].proxyname, data[i].status,
- &data[i].reported_xmin,
data[i].ipaddress, data[i].datafolder, true,
-1 /* dummy socket */) != 0)
{
@@ -262,7 +261,7 @@ gtm_standby_register_self(const char *node_name, int port, const char *datadir)
rc = node_register_internal(GTM_ActiveConn, GTM_NODE_GTM, standbyHostName, standbyPortNumber,
standbyNodeName, standbyDataDir,
- NODE_DISCONNECTED, InvalidGlobalTransactionId);
+ NODE_DISCONNECTED);
if (rc < 0)
{
elog(DEBUG1, "Failed to register a standby-GTM status.");
@@ -295,7 +294,7 @@ gtm_standby_activate_self(void)
rc = node_register_internal(GTM_ActiveConn, GTM_NODE_GTM, standbyHostName, standbyPortNumber,
standbyNodeName, standbyDataDir,
- NODE_CONNECTED, InvalidGlobalTransactionId);
+ NODE_CONNECTED);
if (rc < 0)
{
diff --git a/src/gtm/main/gtm_txn.c b/src/gtm/main/gtm_txn.c
index 7514f95359..0185e4d151 100644
--- a/src/gtm/main/gtm_txn.c
+++ b/src/gtm/main/gtm_txn.c
@@ -2630,7 +2630,6 @@ ProcessReportXminCommand(Port *myport, StringInfo message, bool is_backup)
GTM_PGXCNodeType type;
GlobalTransactionId global_xmin;
int errcode;
- bool remoteIdle;
const char *data = pq_getmsgbytes(message, sizeof (gxid));
@@ -2640,9 +2639,6 @@ ProcessReportXminCommand(Port *myport, StringInfo message, bool is_backup)
errmsg("Message does not contain valid GXID")));
memcpy(&gxid, data, sizeof (gxid));
- /* Read number of running transactions */
- remoteIdle = pq_getmsgbyte(message);
-
/* Read Node Type */
type = pq_getmsgint(message, sizeof (GTM_PGXCNodeType));
@@ -2652,8 +2648,7 @@ ProcessReportXminCommand(Port *myport, StringInfo message, bool is_backup)
node_name[nodelen] = '\0';
pq_getmsgend(message);
- global_xmin = GTM_HandleGlobalXmin(type, node_name, &gxid, remoteIdle,
- &errcode);
+ global_xmin = GTM_HandleGlobalXmin(type, node_name, gxid, &errcode);
{
/*
@@ -2667,7 +2662,7 @@ ProcessReportXminCommand(Port *myport, StringInfo message, bool is_backup)
proxyhdr.ph_conid = myport->conn_id;
pq_sendbytes(&buf, (char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader));
}
- pq_sendbytes(&buf, (char *)&gxid, sizeof (GlobalTransactionId));
+ pq_sendbytes(&buf, (char *)&GTMTransactions.gt_latestCompletedXid, sizeof (GlobalTransactionId));
pq_sendbytes(&buf, (char *)&global_xmin, sizeof (GlobalTransactionId));
pq_sendbytes(&buf, (char *)&errcode, sizeof (errcode));
pq_endmessage(myport, &buf);
diff --git a/src/gtm/recovery/register_common.c b/src/gtm/recovery/register_common.c
index 802bf902f3..ef6c0e5574 100644
--- a/src/gtm/recovery/register_common.c
+++ b/src/gtm/recovery/register_common.c
@@ -388,7 +388,6 @@ Recovery_PGXCNodeRegister(GTM_PGXCNodeType type,
GTM_PGXCNodePort port,
char *proxyname,
GTM_PGXCNodeStatus status,
- GlobalTransactionId *xmin,
char *ipaddress,
char *datafolder,
bool in_recovery,
@@ -417,7 +416,7 @@ Recovery_PGXCNodeRegister(GTM_PGXCNodeType type,
nodeinfo->ipaddress = pgxcnode_copy_char(ipaddress);
nodeinfo->status = status;
nodeinfo->socket = socket;
- nodeinfo->reported_xmin = *xmin;
+ nodeinfo->reported_xmin = InvalidGlobalTransactionId;
nodeinfo->reported_xmin_time = GTM_TimestampGetCurrent();
elog(DEBUG1, "Recovery_PGXCNodeRegister Request info: type=%d, nodename=%s, port=%d," \
@@ -438,9 +437,6 @@ Recovery_PGXCNodeRegister(GTM_PGXCNodeType type,
if (!in_recovery && errcode == 0)
Recovery_RecordRegisterInfo(nodeinfo, true);
- if (xmin)
- *xmin = nodeinfo->reported_xmin;
-
return errcode;
}
@@ -759,10 +755,8 @@ Recovery_PGXCNodeRegisterCoordProcess(char *coord_node, int coord_procid,
while (nodeinfo == NULL)
{
- GlobalTransactionId xmin = InvalidGlobalTransactionId;
int errcode = Recovery_PGXCNodeRegister(GTM_NODE_COORDINATOR, coord_node, 0, NULL,
NODE_CONNECTED,
- &xmin,
NULL, NULL, false, 0);
/*
@@ -930,7 +924,7 @@ GTM_InitNodeManager(void)
GlobalTransactionId
GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
- GlobalTransactionId *reported_xmin, bool remoteIdle, int *errcode)
+ GlobalTransactionId reported_xmin, int *errcode)
{
GTM_PGXCNodeInfo *all_nodes[MAX_NODES];
int num_nodes;
@@ -938,15 +932,13 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
GTM_PGXCNodeInfo *mynodeinfo;
int ii;
GlobalTransactionId global_xmin;
- GlobalTransactionId non_idle_global_xmin;
- GlobalTransactionId idle_global_xmin;
bool excludeSelf = false;
gtm_ListCell *elem;
*errcode = 0;
- elog(DEBUG1, "node_name: %s, remoteIdle: %d, reported_xmin: %d, global_xmin: %d",
- node_name, remoteIdle, *reported_xmin,
+ elog(DEBUG1, "node_name: %s, reported_xmin: %d, global_xmin: %d",
+ node_name, reported_xmin,
GTM_GlobalXmin);
mynodeinfo = pgxcnode_find_info(type, node_name);
@@ -981,7 +973,7 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
*/
if ((mynodeinfo->excluded) &&
GlobalTransactionIdPrecedes(mynodeinfo->reported_xmin,
- GTM_GlobalXmin) && !remoteIdle)
+ GTM_GlobalXmin))
{
*errcode = GTM_ERRCODE_NODE_EXCLUDED;
GTM_RWLockRelease(&mynodeinfo->node_lock);
@@ -994,7 +986,7 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
* reported in the past. If it ever happens, send an error back and let the
* remote node restart itself
*/
- if (!remoteIdle && GlobalTransactionIdPrecedes(*reported_xmin, mynodeinfo->reported_xmin))
+ if (GlobalTransactionIdPrecedes(reported_xmin, mynodeinfo->reported_xmin))
{
*errcode = GTM_ERRCODE_TOO_OLD_XMIN;
GTM_RWLockRelease(&mynodeinfo->node_lock);
@@ -1002,52 +994,18 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
return InvalidGlobalTransactionId;
}
- elog(DEBUG1, "node_name: %s, remoteIdle: %d, reported_xmin: %d, nodeinfo->reported_xmin: %d",
- mynodeinfo->nodename, remoteIdle, *reported_xmin,
+ elog(DEBUG1, "node_name: %s, reported_xmin: %d, nodeinfo->reported_xmin: %d",
+ mynodeinfo->nodename, reported_xmin,
mynodeinfo->reported_xmin);
- /*
- * If the remote node is idle, there is a danger that it may keep reporting
- * a very old xmin (usually capped by latestCompletedXid). To handle such
- * cases, which can be quite common in a large cluster, we check if the
- * remote node has reported idle status and the reported xmin is same as
- * what it reported in the last cycle and mark such node as "idle".
- * Xmin reported by such a node is ignored and we compute xmin for it
- * locally, here on the GTM.
- *
- * There are two strategies we follow:
- *
- * 1. We compute the lower bound of xmins reported by all non-idle remote
- * nodes and assign that to this guy. This assumes that there is zero
- * chance that a currently active (non-idle) node will send something to
- * this guy which is older than the xmin computed
- *
- * 2. If all nodes are reporting their status as idle, we compute the lower
- * bound of xmins reported by all idle nodes. This guarantees that the
- * GlobalXmin can advance to a reasonable point even when all nodes have
- * turned idle.
- *
- * In any case, the remote node will do its own sanity check before
- * accepting the xmin computed by us and bail out if it doesn't agree with
- * that.
- */
- if (remoteIdle &&
- GlobalTransactionIdEquals(mynodeinfo->reported_xmin,
- *reported_xmin))
- mynodeinfo->idle = true;
- else
- {
- mynodeinfo->idle = false;
- mynodeinfo->reported_xmin = *reported_xmin;
- }
+ mynodeinfo->reported_xmin = reported_xmin;
mynodeinfo->excluded = false;
mynodeinfo->reported_xmin_time = current_time = GTM_TimestampGetCurrent();
GTM_RWLockRelease(&mynodeinfo->node_lock);
- /* Compute both, idle as well as non-idle xmin */
- non_idle_global_xmin = InvalidGlobalTransactionId;
- idle_global_xmin = InvalidGlobalTransactionId;
+ /* Compute global xmin */
+ global_xmin = InvalidGlobalTransactionId;
/*
* Hold the PGXCNodesLock in READ mode until we are done with the
@@ -1063,12 +1021,10 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
for (ii = 0; ii < num_nodes; ii++)
{
- GlobalTransactionId xid;
GTM_PGXCNodeInfo *nodeinfo = all_nodes[ii];
- elog(DEBUG1, "nodeinfo %p, type: %d, exclude %c, idle %c, xmin %d, time %lld",
+ elog(DEBUG1, "nodeinfo %p, type: %d, exclude %c, xmin %d, time %lld",
nodeinfo, nodeinfo->type, nodeinfo->excluded ? 'T' : 'F',
- nodeinfo->idle ? 'T' : 'F',
nodeinfo->reported_xmin, nodeinfo->reported_xmin_time);
if (nodeinfo->excluded)
@@ -1097,13 +1053,6 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
}
/*
- * If the remote node is idle, don't include its reported xmin in the
- * calculation which could be quite stale
- */
- if (mynodeinfo->idle && (nodeinfo == mynodeinfo))
- continue;
-
- /*
* Now grab the lock on the nodeinfo so that no further changes are
* possible to its state.
*/
@@ -1119,49 +1068,10 @@ GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
continue;
}
- xid = nodeinfo->reported_xmin;
- if (!nodeinfo->idle)
- {
- if (!GlobalTransactionIdIsValid(non_idle_global_xmin))
- non_idle_global_xmin = xid;
- else if (GlobalTransactionIdPrecedes(xid, non_idle_global_xmin))
- non_idle_global_xmin = xid;
- }
- else
- {
- if (!GlobalTransactionIdIsValid(idle_global_xmin))
- idle_global_xmin = xid;
- else if (GlobalTransactionIdPrecedes(xid, idle_global_xmin))
- idle_global_xmin = xid;
- }
GTM_RWLockRelease(&nodeinfo->node_lock);
}
/*
- * If the remote node is idle, a new xmin might have been computed for it
- * by us. We first try for non_idle_global_xmin, but if all nodes are idle,
- * we use the idle_global_xmin
- */
- if (mynodeinfo && mynodeinfo->idle)
- {
- GTM_RWLockAcquire(&mynodeinfo->node_lock, GTM_LOCKMODE_WRITE);
- if (GlobalTransactionIdIsValid(non_idle_global_xmin))
- {
- if (GlobalTransactionIdFollows(non_idle_global_xmin,
- mynodeinfo->reported_xmin))
- *reported_xmin = mynodeinfo->reported_xmin = non_idle_global_xmin;
- }
- else if (GlobalTransactionIdIsValid(idle_global_xmin))
- {
- if (GlobalTransactionIdFollows(non_idle_global_xmin,
- mynodeinfo->reported_xmin))
- *reported_xmin = mynodeinfo->reported_xmin = idle_global_xmin;
- }
- mynodeinfo->reported_xmin_time = current_time;
- GTM_RWLockRelease(&mynodeinfo->node_lock);
- }
-
- /*
* Now all nodes that must be excluded from GlobalXmin computation have
* been marked correctly and xmin computed and set for an idle remote node,
* if so. Lets compute the GlobalXmin
diff --git a/src/gtm/recovery/register_gtm.c b/src/gtm/recovery/register_gtm.c
index dd9707c1f5..67ae760bc1 100644
--- a/src/gtm/recovery/register_gtm.c
+++ b/src/gtm/recovery/register_gtm.c
@@ -63,7 +63,6 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
int len;
StringInfoData buf;
GTM_PGXCNodeStatus status;
- GlobalTransactionId xmin;
/* Read Node Type */
memcpy(&type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)),
@@ -108,13 +107,10 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
status = pq_getmsgint(message, sizeof (GTM_PGXCNodeStatus));
- memcpy(&xmin, pq_getmsgbytes(message, sizeof (GlobalTransactionId)),
- sizeof (GlobalTransactionId));
-
elog(DEBUG1,
"ProcessPGXCNodeRegister: ipaddress = \"%s\", node name = \"%s\", proxy name = \"%s\", "
- "datafolder \"%s\", status = %d, xmin = %d",
- ipaddress, node_name, proxyname, datafolder, status, xmin);
+ "datafolder \"%s\", status = %d",
+ ipaddress, node_name, proxyname, datafolder, status);
if ((type!=GTM_NODE_GTM_PROXY) &&
(type!=GTM_NODE_GTM_PROXY_POSTMASTER) &&
@@ -166,7 +162,7 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
}
if (Recovery_PGXCNodeRegister(type, node_name, port,
- proxyname, status, &xmin,
+ proxyname, status,
ipaddress, datafolder, false, myport->sock))
{
ereport(ERROR,
@@ -207,8 +203,7 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
port,
node_name,
datafolder,
- status,
- xmin);
+ status);
elog(DEBUG1, "node_register_internal() returns rc %d.", _rc);
@@ -240,7 +235,6 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
pq_sendint(&buf, strlen(node_name), 4);
/* Node name (var-len) */
pq_sendbytes(&buf, node_name, strlen(node_name));
- pq_sendint(&buf, xmin, sizeof (GlobalTransactionId));
pq_endmessage(myport, &buf);
if (myport->remote_type != GTM_NODE_GTM_PROXY)
diff --git a/src/include/access/gtm.h b/src/include/access/gtm.h
index 27383d8cac..1e948b6b02 100644
--- a/src/include/access/gtm.h
+++ b/src/include/access/gtm.h
@@ -44,7 +44,7 @@ extern int CommitPreparedTranGTM(GlobalTransactionId gxid,
extern GTM_Snapshot GetSnapshotGTM(GlobalTransactionId gxid, bool canbe_grouped);
/* Node registration APIs with GTM */
-extern int RegisterGTM(GTM_PGXCNodeType type, GlobalTransactionId *xmin);
+extern int RegisterGTM(GTM_PGXCNodeType type);
extern int UnregisterGTM(GTM_PGXCNodeType type);
/* Sequence interface APIs with GTM */
@@ -62,6 +62,7 @@ extern int DropSequenceGTM(char *name, GTM_SequenceKeyType type);
extern int RenameSequenceGTM(char *seqname, const char *newseqname);
/* Barrier */
extern int ReportBarrierGTM(char *barrier_id);
-extern int ReportGlobalXmin(GlobalTransactionId *gxid,
- GlobalTransactionId *global_xmin, bool isIdle);
+extern int ReportGlobalXmin(GlobalTransactionId gxid,
+ GlobalTransactionId *global_xmin,
+ GlobalTransactionId *latest_completed_xid);
#endif /* ACCESS_GTM_H */
diff --git a/src/include/gtm/gtm_client.h b/src/include/gtm/gtm_client.h
index d21ba44297..ade4b682f3 100644
--- a/src/include/gtm/gtm_client.h
+++ b/src/include/gtm/gtm_client.h
@@ -123,7 +123,7 @@ typedef union GTM_ResultData
struct
{
- GlobalTransactionId reported_xmin;
+ GlobalTransactionId latest_completed_xid;
GlobalTransactionId global_xmin;
int errcode;
} grd_report_xmin; /* REPORT_XMIN */
@@ -260,17 +260,14 @@ int node_register(GTM_Conn *conn,
GTM_PGXCNodeType type,
GTM_PGXCNodePort port,
char *node_name,
- char *datafolder,
- GlobalTransactionId *xmin);
+ char *datafolder);
int node_register(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodePort port,
- char *node_name, char *datafolder, GlobalTransactionId *xmin);
+ char *node_name, char *datafolder);
int node_register_internal(GTM_Conn *conn, GTM_PGXCNodeType type, const char *host, GTM_PGXCNodePort port, char *node_name,
- char *datafolder, GTM_PGXCNodeStatus status,
- GlobalTransactionId *xmin);
+ char *datafolder, GTM_PGXCNodeStatus status);
int bkup_node_register_internal(GTM_Conn *conn, GTM_PGXCNodeType type, const char *host, GTM_PGXCNodePort port,
char *node_name, char *datafolder,
- GTM_PGXCNodeStatus status,
- GlobalTransactionId xmin);
+ GTM_PGXCNodeStatus status);
int node_unregister(GTM_Conn *conn, GTM_PGXCNodeType type, const char *node_name);
int bkup_node_unregister(GTM_Conn *conn, GTM_PGXCNodeType type, const char * node_name);
@@ -279,9 +276,10 @@ char *node_get_local_addr(GTM_Conn *conn, char *buf, size_t buflen, int *rc);
int register_session(GTM_Conn *conn, const char *coord_name, int coord_procid,
int coord_backendid);
int report_global_xmin(GTM_Conn *conn, const char *node_name,
- GTM_PGXCNodeType type, GlobalTransactionId *gxid,
+ GTM_PGXCNodeType type, GlobalTransactionId gxid,
GlobalTransactionId *global_xmin,
- bool isIdle, int *errcode);
+ GlobalTransactionId *latest_completed_xid,
+ int *errcode);
/*
* Sequence Management API
diff --git a/src/include/gtm/register.h b/src/include/gtm/register.h
index 98c1dc4a2d..16e46e35b6 100644
--- a/src/include/gtm/register.h
+++ b/src/include/gtm/register.h
@@ -87,7 +87,6 @@ int Recovery_PGXCNodeRegister(GTM_PGXCNodeType type,
GTM_PGXCNodePort port,
char *proxyname,
GTM_PGXCNodeStatus status,
- GlobalTransactionId *xmin,
char *ipaddress,
char *datafolder,
bool in_recovery,
@@ -116,5 +115,5 @@ void ProcessGTMEndBackup(Port *myport, StringInfo message);
void GTM_InitNodeManager(void);
GlobalTransactionId GTM_HandleGlobalXmin(GTM_PGXCNodeType type, char *node_name,
- GlobalTransactionId *reported_xmin, bool remoteIdle, int *errcode);
+ GlobalTransactionId reported_xmin, int *errcode);
#endif /* GTM_NODE_H */
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index a63cb067aa..f17fc22e65 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -68,7 +68,8 @@ extern Snapshot GetSnapshotData(Snapshot snapshot);
extern bool ProcArrayInstallImportedXmin(TransactionId xmin,
TransactionId sourcexid);
extern bool ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc);
-void ProcArrayCheckXminConsistency(TransactionId global_xmin);
+extern void ProcArrayCheckXminConsistency(TransactionId global_xmin);
+extern void SetLatestCompletedXid(TransactionId latestCompletedXid);
extern RunningTransactions GetRunningTransactionData(void);
@@ -76,8 +77,7 @@ extern bool TransactionIdIsInProgress(TransactionId xid);
extern bool TransactionIdIsActive(TransactionId xid);
extern TransactionId GetOldestXmin(Relation rel, bool ignoreVacuum);
extern TransactionId GetOldestXminInternal(Relation rel, bool ignoreVacuum,
- bool computeLocal, bool *isIdle, TransactionId lastGlobalXmin,
- TransactionId lastReportedXmin);
+ bool computeLocal, TransactionId lastGlobalXmin);
extern TransactionId GetOldestActiveTransactionId(void);
extern TransactionId GetOldestSafeDecodingTransactionId(void);