summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavan Deolasee2014-09-19 12:14:57 +0000
committerPavan Deolasee2014-09-23 07:28:51 +0000
commit0aab895a0b583de3b3ee2763c993f41e8368ca59 (patch)
tree0b7fd0cb0028b664bc653f5934e3b47b1e72755d
parent3516333bbc4c300397b0fd6bc4e87c9cd7b89e44 (diff)
Fix assorted bugs in GTM standby and failover.
1. GTM standby should insert the transaction info structure at the specified "handle" in the array. GTM relies on that index to fetch the right transaction info given the handle 2. GTM master should not try to connect it itself. We assume GTM master and standby to have different node names. 3. Fix a bug in tracking the next GXID to be assigned by the standby after failover. Accidently, we were starting from the last XID assigned by the old master 4. Fix a bug in the GTM standby connection and synchronization logic. We must not release the locks on the GTM threads until the backup is complete and the standby is ready to accept connections. Failing to do so correctly can result in certain messages getting lost while standby is connecting and catching up 5. The GTM master thread which is serving backup to a joining standby must not try to connect to the standby before the backup is complete. This results in a deadlock 6. Do not remove transaction info structures from GTM standby if the GTM master disconnects from it. That defeats the purpose of failover 7. Add a missing "break" after handling MSG_END_BACKUP message. Surprised that this was not caught earlier because the subsequent statement should have aborted the thread 8. Finally, stop MSG_BACKEND_DISCONNECT handling for now. This is leading to several issues, but must be looked at carefully again.
-rw-r--r--src/gtm/common/gtm_utils.c3
-rw-r--r--src/gtm/main/gtm_standby.c68
-rw-r--r--src/gtm/main/gtm_txn.c22
-rw-r--r--src/gtm/main/main.c61
-rw-r--r--src/gtm/recovery/register_common.c4
-rw-r--r--src/gtm/recovery/register_gtm.c14
6 files changed, 119 insertions, 53 deletions
diff --git a/src/gtm/common/gtm_utils.c b/src/gtm/common/gtm_utils.c
index 081660ec57..a9c79cf301 100644
--- a/src/gtm/common/gtm_utils.c
+++ b/src/gtm/common/gtm_utils.c
@@ -36,6 +36,9 @@ static struct enum_name message_name_tab[] =
{MSG_BKUP_NODE_REGISTER, "MSG_BKUP_NODE_REGISTER"},
{MSG_NODE_UNREGISTER, "MSG_NODE_UNREGISTER"},
{MSG_BKUP_NODE_UNREGISTER, "MSG_BKUP_NODE_UNREGISTER"},
+#ifdef XCP
+ {MSG_REGISTER_SESSION, "MSG_REGISTER_SESSION"},
+#endif
{MSG_NODE_LIST, "MSG_NODE_LIST"},
{MSG_NODE_BEGIN_REPLICATION_INIT, "MSG_NODE_BEGIN_REPLICATION_INIT"},
{MSG_NODE_END_REPLICATION_INIT, "MSG_NODE_END_REPLICATION_INIT"},
diff --git a/src/gtm/main/gtm_standby.c b/src/gtm/main/gtm_standby.c
index 741d5eaad2..b4701a42e5 100644
--- a/src/gtm/main/gtm_standby.c
+++ b/src/gtm/main/gtm_standby.c
@@ -135,54 +135,66 @@ gtm_standby_restore_gxid(void)
for (i = 0; i < num_txn; i++)
{
- GTMTransactions.gt_transactions_array[i].gti_handle = txn.gt_transactions_array[i].gti_handle;
- GTMTransactions.gt_transactions_array[i].gti_thread_id = txn.gt_transactions_array[i].gti_thread_id;
- GTMTransactions.gt_transactions_array[i].gti_in_use = txn.gt_transactions_array[i].gti_in_use;
- GTMTransactions.gt_transactions_array[i].gti_gxid = txn.gt_transactions_array[i].gti_gxid;
- GTMTransactions.gt_transactions_array[i].gti_state = txn.gt_transactions_array[i].gti_state;
- GTMTransactions.gt_transactions_array[i].gti_coordname = txn.gt_transactions_array[i].gti_coordname;
- GTMTransactions.gt_transactions_array[i].gti_xmin = txn.gt_transactions_array[i].gti_xmin;
- GTMTransactions.gt_transactions_array[i].gti_isolevel = txn.gt_transactions_array[i].gti_isolevel;
- GTMTransactions.gt_transactions_array[i].gti_readonly = txn.gt_transactions_array[i].gti_readonly;
- GTMTransactions.gt_transactions_array[i].gti_backend_id = txn.gt_transactions_array[i].gti_backend_id;
+ int handle = txn.gt_transactions_array[i].gti_handle;
+
+ GTMTransactions.gt_transactions_array[handle].gti_handle = txn.gt_transactions_array[i].gti_handle;
+
+ /*
+ * Don't copy the gti_thread_id. Its the thread id of the thread
+ * running on the GTM master and does make no sense on the standy.
+ *
+ * XXX How do we clean up these transaction info structures if the
+ * connection goes away after standby is promoted? We need some
+ * mechanism to set ownership of the in-progress transactions once a
+ * standby is promoted
+ */
+ GTMTransactions.gt_transactions_array[handle].gti_thread_id = -1;
+ GTMTransactions.gt_transactions_array[handle].gti_in_use = txn.gt_transactions_array[i].gti_in_use;
+ GTMTransactions.gt_transactions_array[handle].gti_gxid = txn.gt_transactions_array[i].gti_gxid;
+ GTMTransactions.gt_transactions_array[handle].gti_state = txn.gt_transactions_array[i].gti_state;
+ GTMTransactions.gt_transactions_array[handle].gti_coordname = txn.gt_transactions_array[i].gti_coordname;
+ GTMTransactions.gt_transactions_array[handle].gti_xmin = txn.gt_transactions_array[i].gti_xmin;
+ GTMTransactions.gt_transactions_array[handle].gti_isolevel = txn.gt_transactions_array[i].gti_isolevel;
+ GTMTransactions.gt_transactions_array[handle].gti_readonly = txn.gt_transactions_array[i].gti_readonly;
+ GTMTransactions.gt_transactions_array[handle].gti_backend_id = txn.gt_transactions_array[i].gti_backend_id;
if (txn.gt_transactions_array[i].nodestring == NULL )
- GTMTransactions.gt_transactions_array[i].nodestring = NULL;
+ GTMTransactions.gt_transactions_array[handle].nodestring = NULL;
else
- GTMTransactions.gt_transactions_array[i].nodestring = txn.gt_transactions_array[i].nodestring;
+ GTMTransactions.gt_transactions_array[handle].nodestring = txn.gt_transactions_array[i].nodestring;
/* GID */
if (txn.gt_transactions_array[i].gti_gid == NULL )
- GTMTransactions.gt_transactions_array[i].gti_gid = NULL;
+ GTMTransactions.gt_transactions_array[handle].gti_gid = NULL;
else
- GTMTransactions.gt_transactions_array[i].gti_gid = txn.gt_transactions_array[i].gti_gid;
+ GTMTransactions.gt_transactions_array[handle].gti_gid = txn.gt_transactions_array[i].gti_gid;
/* copy GTM_SnapshotData */
- GTMTransactions.gt_transactions_array[i].gti_current_snapshot.sn_xmin =
+ GTMTransactions.gt_transactions_array[handle].gti_current_snapshot.sn_xmin =
txn.gt_transactions_array[i].gti_current_snapshot.sn_xmin;
- GTMTransactions.gt_transactions_array[i].gti_current_snapshot.sn_xmax =
+ GTMTransactions.gt_transactions_array[handle].gti_current_snapshot.sn_xmax =
txn.gt_transactions_array[i].gti_current_snapshot.sn_xmax;
- GTMTransactions.gt_transactions_array[i].gti_current_snapshot.sn_recent_global_xmin =
+ GTMTransactions.gt_transactions_array[handle].gti_current_snapshot.sn_recent_global_xmin =
txn.gt_transactions_array[i].gti_current_snapshot.sn_recent_global_xmin;
- GTMTransactions.gt_transactions_array[i].gti_current_snapshot.sn_xcnt =
+ GTMTransactions.gt_transactions_array[handle].gti_current_snapshot.sn_xcnt =
txn.gt_transactions_array[i].gti_current_snapshot.sn_xcnt;
- GTMTransactions.gt_transactions_array[i].gti_current_snapshot.sn_xip =
+ GTMTransactions.gt_transactions_array[handle].gti_current_snapshot.sn_xip =
txn.gt_transactions_array[i].gti_current_snapshot.sn_xip;
/* end of copying GTM_SnapshotData */
- GTMTransactions.gt_transactions_array[i].gti_snapshot_set =
+ GTMTransactions.gt_transactions_array[handle].gti_snapshot_set =
txn.gt_transactions_array[i].gti_snapshot_set;
- GTMTransactions.gt_transactions_array[i].gti_vacuum =
+ GTMTransactions.gt_transactions_array[handle].gti_vacuum =
txn.gt_transactions_array[i].gti_vacuum;
/*
* Is this correct? Is GTM_TXN_COMMITTED transaction categorized as "open"?
*/
- if (GTMTransactions.gt_transactions_array[i].gti_state != GTM_TXN_ABORTED)
+ if (GTMTransactions.gt_transactions_array[handle].gti_state != GTM_TXN_ABORTED)
{
GTMTransactions.gt_open_transactions =
gtm_lappend(GTMTransactions.gt_open_transactions,
- &GTMTransactions.gt_transactions_array[i]);
+ &GTMTransactions.gt_transactions_array[handle]);
}
}
@@ -333,7 +345,15 @@ find_standby_node_info(void)
node[i]->port,
node[i]->status);
- if ( (strcmp(standbyNodeName, node[i]->nodename) != 0) &&
+ /*
+ * Must not try and connect to ourself. That will lead to a deadlock
+ *
+ * !!TODO Ideally we should not be registered on the GTM, but when a
+ * failover happens, the standby may carry forward the node
+ * registration information previously sent by the original master as a
+ * backup. This needs to be studied further
+ */
+ if (strcmp(node[i]->nodename, NodeName) &&
node[i]->status == NODE_CONNECTED)
return node[i];
}
diff --git a/src/gtm/main/gtm_txn.c b/src/gtm/main/gtm_txn.c
index 6fe3370ba9..f55a3f16d3 100644
--- a/src/gtm/main/gtm_txn.c
+++ b/src/gtm/main/gtm_txn.c
@@ -231,7 +231,8 @@ GTM_HandleToTransactionInfo(GTM_TransactionHandle handle)
if (!gtm_txninfo->gti_in_use)
{
ereport(WARNING,
- (ERANGE, errmsg("Invalid transaction handle, txn_info not in use")));
+ (ERANGE, errmsg("Invalid transaction handle (%d), txn_info not in use",
+ handle)));
return NULL;
}
@@ -272,8 +273,9 @@ GTM_RemoveTransInfoMulti(GTM_TransactionInfo *gtm_txninfo[], int txn_count)
GTMTransactions.gt_latestCompletedXid))
GTMTransactions.gt_latestCompletedXid = gtm_txninfo[ii]->gti_gxid;
- elog(DEBUG1, "GTM_RemoveTransInfoMulti: removing transaction id %u, %lu",
- gtm_txninfo[ii]->gti_gxid, gtm_txninfo[ii]->gti_thread_id);
+ elog(DEBUG1, "GTM_RemoveTransInfoMulti: removing transaction id %u, %lu, handle (%d)",
+ gtm_txninfo[ii]->gti_gxid, gtm_txninfo[ii]->gti_thread_id,
+ gtm_txninfo[ii]->gti_handle);
/*
* Now mark the transaction as aborted and mark the structure as not-in-use
@@ -328,8 +330,9 @@ GTM_RemoveAllTransInfos(int backend_id)
GTMTransactions.gt_latestCompletedXid))
GTMTransactions.gt_latestCompletedXid = gtm_txninfo->gti_gxid;
- elog(DEBUG1, "GTM_RemoveAllTransInfos: removing transaction id %u, %lu:%lu",
- gtm_txninfo->gti_gxid, gtm_txninfo->gti_thread_id, thread_id);
+ elog(DEBUG1, "GTM_RemoveAllTransInfos: removing transaction id %u, %lu:%lu %d:%d",
+ gtm_txninfo->gti_gxid, gtm_txninfo->gti_thread_id,
+ thread_id, gtm_txninfo->gti_backend_id, backend_id);
/*
* Now mark the transaction as aborted and mark the structure as not-in-use
*/
@@ -823,6 +826,9 @@ GTM_BkupBeginTransactionMulti(char *coord_name,
txn[kk]);
return;
}
+
+ elog(DEBUG1, "GTM_BkupBeginTransactionMulti: handle(%u)", txn[kk]);
+
init_GTM_TransactionInfo(gtm_txninfo, coord_name, txn[kk], isolevel[kk], connid[kk], readonly[kk]);
GTMTransactions.gt_lastslot = txn[kk];
GTMTransactions.gt_open_transactions = gtm_lappend(GTMTransactions.gt_open_transactions, gtm_txninfo);
@@ -1295,10 +1301,14 @@ GTM_BkupBeginTransactionGetGXIDMulti(char *coord_name,
init_GTM_TransactionInfo(gtm_txninfo, coord_name, txn[ii], isolevel[ii], connid[ii], readonly[ii]);
GTMTransactions.gt_lastslot = txn[ii];
gtm_txninfo->gti_gxid = gxid[ii];
+
+ elog(DEBUG1, "GTM_BkupBeginTransactionGetGXIDMulti: xid(%u), handle(%u)",
+ gxid[ii], txn[ii]);
+
/*
* Advance next gxid
*/
- if (GlobalTransactionIdPrecedes(GTMTransactions.gt_nextXid, gxid[ii]))
+ if (GlobalTransactionIdPrecedesOrEquals(GTMTransactions.gt_nextXid, gxid[ii]))
GTMTransactions.gt_nextXid = gxid[ii] + 1;
if (!GlobalTransactionIdIsValid(GTMTransactions.gt_nextXid)) /* Handle wrap around too */
GTMTransactions.gt_nextXid = FirstNormalGlobalTransactionId;
diff --git a/src/gtm/main/main.c b/src/gtm/main/main.c
index 8cbddd392b..9864e66a3d 100644
--- a/src/gtm/main/main.c
+++ b/src/gtm/main/main.c
@@ -679,13 +679,6 @@ main(int argc, char *argv[])
exit(1);
}
elog(LOG, "Restoring node information from the active-GTM succeeded.");
-
- if (!gtm_standby_end_backup())
- {
- elog(ERROR, "Failed to setup normal standby mode to the active-GTM.");
- exit(1);
- }
- elog(LOG, "Started to run as GTM-Standby.");
}
else
{
@@ -749,12 +742,32 @@ main(int argc, char *argv[])
*/
if (Recovery_IsStandby())
{
+ /*
+ * Before ending the backup, inform the GTM master that we are now
+ * ready to accept connections and mark ourselves as CONNECTED. All GTM
+ * threads are still blocked at this point and when they are unlocked,
+ * we will be ready to accept new connections
+ */
if (!gtm_standby_activate_self())
{
elog(ERROR, "Failed to update the standby-GTM status as \"CONNECTED\".");
exit(1);
}
elog(DEBUG1, "Updating the standby-GTM status as \"CONNECTED\" succeeded.");
+
+ /*
+ * GTM master can now start serving incoming requests. Before it serves
+ * any request, it will open a connection with us and start copying all
+ * those messages. So we are guaranteed to see each operation, either
+ * in the backup we took or as GTM master copies those messages
+ */
+ if (!gtm_standby_end_backup())
+ {
+ elog(ERROR, "Failed to setup normal standby mode to the active-GTM.");
+ exit(1);
+ }
+ elog(LOG, "Started to run as GTM-Standby.");
+
if (!gtm_standby_finish_startup())
{
elog(ERROR, "Failed to close the initial connection to the active-GTM.");
@@ -1174,8 +1187,14 @@ GTM_ThreadMain(void *argp)
* Maybe the following lines can be a separate function. At present, this is done only here so
* I'll leave them here. K.Suzuki, Nov.29, 2011
* Please note that we don't check if it is not in the standby mode to allow cascased standby.
+ *
+ * Also ensure that we don't try to connect just yet if we are
+ * responsible for serving the BACKUP request from the standby.
+ * Otherwise, this will lead to a deadlock
*/
- if (GTMThreads->gt_standby_ready && thrinfo->thr_conn->standby == NULL)
+ if (GTMThreads->gt_standby_ready &&
+ thrinfo->thr_conn->standby == NULL &&
+ thrinfo->thr_status != GTM_THREAD_BACKUP)
{
/* Connect to GTM-Standby */
thrinfo->thr_conn->standby = gtm_standby_connect_to_standby();
@@ -1197,12 +1216,18 @@ GTM_ThreadMain(void *argp)
break;
case 'X':
+ elog(DEBUG1, "Removing all transaction infos - qtype:X");
case EOF:
/*
* Connection termination request
- * Remove all transactions opened within the thread
+ * Remove all transactions opened within the thread. Note that
+ * we don't remove transaction infos if we are a standby and
+ * the transaction infos actually correspond to in-progress
+ * transactions on the master
*/
- GTM_RemoveAllTransInfos(-1);
+ elog(DEBUG1, "Removing all transaction infos - qtype:EOF");
+ if (!Recovery_IsStandby())
+ GTM_RemoveAllTransInfos(-1);
/* Disconnect node if necessary */
Recovery_PGXCNodeDisconnect(thrinfo->thr_conn->con_port);
@@ -1270,9 +1295,7 @@ ProcessCommand(Port *myport, StringInfo input_message)
* The next line will have some overhead. Better to be in
* compile option.
*/
-#ifdef GTM_DEBUG
- elog(DEBUG3, "mtype = %s (%d).", gtm_util_message_name(mtype), (int)mtype);
-#endif
+ elog(DEBUG1, "mtype = %s (%d).", gtm_util_message_name(mtype), (int)mtype);
switch (mtype)
{
@@ -1294,6 +1317,7 @@ ProcessCommand(Port *myport, StringInfo input_message)
break;
case MSG_END_BACKUP:
ProcessGTMEndBackup(myport, input_message);
+ break;
case MSG_NODE_BEGIN_REPLICATION_INIT:
case MSG_NODE_END_REPLICATION_INIT:
case MSG_TXN_BEGIN:
@@ -1358,8 +1382,17 @@ ProcessCommand(Port *myport, StringInfo input_message)
break;
case MSG_BACKEND_DISCONNECT:
+ elog(DEBUG1, "MSG_BACKEND_DISCONNECT received - removing all txn infos");
+ /*
+ * !!TODO The original code used to remove all transaction info
+ * structures with the given ph_conid stored in gti_backend_id. But
+ * we are seeing several issues because of that during GTM
+ * failover. So disable the code for now as we further investigate
+ * the code
+ */
+#ifdef NOT_USED
GTM_RemoveAllTransInfos(proxyhdr.ph_conid);
-
+#endif
/* Mark PGXC Node as disconnected if backend disconnected is postmaster */
ProcessPGXCNodeBackendDisconnect(myport, input_message);
break;
diff --git a/src/gtm/recovery/register_common.c b/src/gtm/recovery/register_common.c
index 9f579a8c58..8f4f05042b 100644
--- a/src/gtm/recovery/register_common.c
+++ b/src/gtm/recovery/register_common.c
@@ -252,7 +252,9 @@ pgxcnode_add_info(GTM_PGXCNodeInfo *nodeinfo)
GTM_RWLockRelease(&bucket->nhb_lock);
ereport(LOG,
(EEXIST,
- errmsg("Node with the given ID number already exists")));
+ errmsg("Node with the given ID number already exists - %s %d:%d",
+ nodeinfo->nodename, nodeinfo->status,
+ nodeinfo->type )));
return EEXIST;
}
else
diff --git a/src/gtm/recovery/register_gtm.c b/src/gtm/recovery/register_gtm.c
index bb7f433404..72ebf92e9a 100644
--- a/src/gtm/recovery/register_gtm.c
+++ b/src/gtm/recovery/register_gtm.c
@@ -105,12 +105,12 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
memcpy(datafolder, (char *)pq_getmsgbytes(message, len), len);
datafolder[len] = '\0';
+ status = pq_getmsgint(message, sizeof (GTM_PGXCNodeStatus));
+
elog(DEBUG1,
"ProcessPGXCNodeRegister: ipaddress = \"%s\", node name = \"%s\", proxy name = \"%s\", "
- "datafolder \"%s\"",
- ipaddress, node_name, proxyname, datafolder);
-
- status = pq_getmsgint(message, sizeof (GTM_PGXCNodeStatus));
+ "datafolder \"%s\", status = %d",
+ ipaddress, node_name, proxyname, datafolder, status);
if ((type!=GTM_NODE_GTM_PROXY) &&
(type!=GTM_NODE_GTM_PROXY_POSTMASTER) &&
@@ -159,12 +159,10 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
* Cascade standby may be allowed.
*/
GTM_DoForAllOtherThreads(finishStandbyConn);
-
- GTMThreads->gt_standby_ready = true;
}
if (Recovery_PGXCNodeRegister(type, node_name, port,
- proxyname, NODE_CONNECTED,
+ proxyname, status,
ipaddress, datafolder, false, myport->sock))
{
ereport(ERROR,
@@ -176,7 +174,7 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
* We don't check if the this is not in standby mode to allow
* cascaded standby.
*/
- if (type == GTM_NODE_GTM)
+ if ((type == GTM_NODE_GTM) && (status == NODE_CONNECTED))
GTMThreads->gt_standby_ready = true;
MemoryContextSwitchTo(oldContext);