Introduce new GTM thread status managing thread backup

In the case of a GTM standby crash during the initial backup, the master GTM remained locked. The origin of this problem was during GTM backup process, where GTM locks all its threads one by one, performs a backup and then releases everything. This commit adds an additional control layer able to detect the thread status during backup. The new status GTM_THREAD_BACKUP is introduced for this purpose. Once backup is performed, the thread status is changed to GTM_THREAD_RUNNING. Hence, if the handler (GTM) is invoked upon sudden disconnection for backend (here GTM-Standby) disconnect of its connection and it sees that the current thread has status GTM_THREAD_BACKUP it goes ahead and releases the locks. This commit also includes some fixes for whitespaces. Patch by Andrei Martsinchyk
author: Michael Paquier 2012-07-11 00:46:31 +0000
committer: Michael Paquier 2012-07-11 00:46:31 +0000
commit: 28f921a1f9fed08e305756a9d327e82a110ce840 (patch)
tree: b66795be7ffe1a248051edc341c55e9c1b7cce51
parent: a5dddc37b885fc2cb6bf14040c7b323d83766a6a (diff)
3 files changed, 32 insertions, 20 deletions
diff --git a/src/gtm/main/gtm_thread.c b/src/gtm/main/gtm_thread.c
index 03c3228267..045b317ce7 100644
--- a/src/gtm/main/gtm_thread.c
+++ b/src/gtm/main/gtm_thread.c
@@ -27,7 +27,7 @@ GTM_Threads *GTMThreads = &GTMThreadsData;
 
 #define GTM_MIN_THREADS 32			/* Provision for minimum threads */
 #define GTM_MAX_THREADS 1024		/* Max threads allowed in the GTM */
-#define GTMThreadsFull	(GTMThreads->gt_thread_count == GTMThreads->gt_array_size)	
+#define GTMThreadsFull	(GTMThreads->gt_thread_count == GTMThreads->gt_array_size)
 
 /*
  * Add the given thrinfo structure to the global array, expanding it if
@@ -43,10 +43,10 @@ GTM_ThreadAdd(GTM_ThreadInfo *thrinfo)
 	if (GTMThreadsFull)
 	{
 		uint32 newsize;
-	   
+
 		/*
 		 * TODO Optimize lock management by not holding any locks during memory
-		 * allocation 
+		 * allocation.
 		 */
 		if (GTMThreads->gt_array_size == GTM_MAX_THREADS)
 			elog(ERROR, "Too many threads active");
@@ -99,7 +99,7 @@ GTM_ThreadAdd(GTM_ThreadInfo *thrinfo)
 	}
 	GTM_RWLockRelease(&GTMThreads->gt_lock);
 
-	/* 
+	/*
 	 * Track the slot information in the thrinfo. This is useful to quickly
 	 * find the slot given the thrinfo structure.
 	 */
@@ -264,6 +264,17 @@ GTM_ThreadCleanup(void *argp)
 
 	elog(LOG, "Cleaning up thread state");
 
+	if (thrinfo->thr_status == GTM_THREAD_BACKUP)
+	{
+		int 			ii;
+
+		for (ii = 0; ii < GTMThreads->gt_array_size; ii++)
+		{
+			if (GTMThreads->gt_threads[ii] && GTMThreads->gt_threads[ii] != thrinfo)
+				GTM_RWLockRelease(&GTMThreads->gt_threads[ii]->thr_lock);
+		}
+	}
+
 	/*
 	 * Close a connection to GTM standby.
 	 */
@@ -293,7 +304,7 @@ GTM_ThreadCleanup(void *argp)
 	 * our memory contextes easily.
 	 *
 	 * XXX We don't setup cleanup handlers for the main process. So this
-	 * routine would never be called for the main process/thread
+	 * routine would never be called for the main process/thread.
 	 */
 	MemoryContextSwitchTo(thrinfo->thr_parent_context);
 
@@ -314,20 +325,20 @@ GTM_ThreadCleanup(void *argp)
 
 	/*
 	 * Reset the thread-specific information. This should be done only after we
-	 * are sure that memory contextes are not required 
+	 * are sure that memory contextes are not required.
 	 *
 	 * Note: elog calls need memory contextes, so no elog calls beyond this
 	 * point.
 	 */
 	SetMyThreadInfo(NULL);
-	
+
 	return;
 }
 
 /*
  * A wrapper around the start routine of the thread. This helps us doing any
  * initialization and setting up cleanup handlers before the main routine is
- * started
+ * started.
  */
 void *
 GTM_ThreadMainWrapper(void *argp)
@@ -338,7 +349,7 @@ GTM_ThreadMainWrapper(void *argp)
 
 	SetMyThreadInfo(thrinfo);
 	MemoryContextSwitchTo(TopMemoryContext);
-	
+
 	pthread_cleanup_push(GTM_ThreadCleanup, thrinfo);
 	thrinfo->thr_startroutine(thrinfo);
 	pthread_cleanup_pop(1);
@@ -370,7 +381,7 @@ GTM_UnlockAllOtherThreads(void)
 		if (GTMThreads->gt_threads[ii] && GTMThreads->gt_threads[ii] != my_threadinfo)
 			GTM_RWLockRelease(&GTMThreads->gt_threads[ii]->thr_lock);
 	}
-}	
+}
 
 void
 GTM_DoForAllOtherThreads(void (* process_routine)(GTM_ThreadInfo *))
@@ -384,4 +395,4 @@ GTM_DoForAllOtherThreads(void (* process_routine)(GTM_ThreadInfo *))
 			(process_routine)(GTMThreads->gt_threads[ii]);
 	}
 }
-	
+
diff --git a/src/gtm/recovery/register_gtm.c b/src/gtm/recovery/register_gtm.c
index 3c0e116489..a7423f8a39 100644
--- a/src/gtm/recovery/register_gtm.c
+++ b/src/gtm/recovery/register_gtm.c
@@ -96,7 +96,7 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
 	memcpy(datafolder, (char *)pq_getmsgbytes(message, len), len);
 	datafolder[len] = '\0';
 
-	elog(LOG, 
+	elog(LOG,
 		 "ProcessPGXCNodeRegister: ipaddress = \"%s\", node name = \"%s\", proxy name = \"%s\", "
 		 "datafolder \"%s\"",
 		 ipaddress, node_name, proxyname, datafolder);
@@ -131,7 +131,7 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
 		elog(DEBUG1, "Registering GTM (Standby).  Unregister this first.");
 		/*
 		 * There's another standby.   May be failed one.
-		 * Clean this up.  This means that we allow 
+		 * Clean this up.  This means that we allow
 		 * only one standby at the same time.
 		 *
 		 * This helps to give up failed standby and connect
@@ -139,8 +139,7 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message, bool is_backup)
 		 *
 		 * Be sure that all ther threads are locked by other
 		 * means, typically by receiving MSG_BEGIN_BACKUP.
-		 */
-		/*
+		 *
 		 * First try to unregister GTM which is now connected.  We don't care
 		 * if it failed.
 		 */
@@ -300,13 +299,13 @@ ProcessPGXCNodeUnregister(Port *myport, StringInfo message, bool is_backup)
 
 			elog(LOG, "calling node_unregister() for standby GTM %p.",
 				 GetMyThreadInfo->thr_conn->standby);
-		
+
 		retry:
 			_rc = bkup_node_unregister(GetMyThreadInfo->thr_conn->standby,
 									   type,
 									   node_name);
 
-		
+
 			if (gtm_standby_check_communication_error(&count, oldconn))
 				goto retry;
 
@@ -445,6 +444,7 @@ ProcessGTMBeginBackup(Port *myport, StringInfo message)
 		if (GTMThreads->gt_threads[ii] && GTMThreads->gt_threads[ii] != my_threadinfo)
 			GTM_RWLockAcquire(&GTMThreads->gt_threads[ii]->thr_lock, GTM_LOCKMODE_WRITE);
 	}
+	my_threadinfo->thr_status = GTM_THREAD_BACKUP;
 	pq_beginmessage(&buf, 'S');
 	pq_sendint(&buf, BEGIN_BACKUP_RESULT, 4);
 	pq_endmessage(myport, &buf);
@@ -466,6 +466,7 @@ ProcessGTMEndBackup(Port *myport, StringInfo message)
 		if (GTMThreads->gt_threads[ii] && GTMThreads->gt_threads[ii] != my_threadinfo)
 			GTM_RWLockRelease(&GTMThreads->gt_threads[ii]->thr_lock);
 	}
+	my_threadinfo->thr_status = GTM_THREAD_RUNNING;
 	pq_beginmessage(&buf, 'S');
 	pq_sendint(&buf, END_BACKUP_RESULT, 4);
 	pq_endmessage(myport, &buf);
diff --git a/src/include/gtm/gtm.h b/src/include/gtm/gtm.h
index 3955cee6f4..293d67119b 100644
--- a/src/include/gtm/gtm.h
+++ b/src/include/gtm/gtm.h
@@ -30,6 +30,7 @@ typedef enum GTM_ThreadStatus
 	GTM_THREAD_STARTING,
 	GTM_THREAD_RUNNING,
 	GTM_THREAD_EXITING,
+	GTM_THREAD_BACKUP, 		/* Backup to standby is in progress */
 	/* Must be the last */
 	GTM_THREAD_INVALID
 } GTM_ThreadStatus;
@@ -47,9 +48,9 @@ typedef struct GTM_ThreadInfo
 	uint32				thr_localid;
 	bool				is_main_thread;
 	void * (* thr_startroutine)(void *);
-	
+
 	MemoryContext	thr_thread_context;
-	MemoryContext	thr_message_context;	
+	MemoryContext	thr_message_context;
 	MemoryContext	thr_current_context;
 	MemoryContext	thr_error_context;
 	MemoryContext	thr_parent_context;
@@ -66,7 +67,6 @@ typedef struct GTM_ThreadInfo
 
 	GTM_RWLock			thr_lock;
 	gtm_List				*thr_cached_txninfo;
-
 } GTM_ThreadInfo;
 
 typedef struct GTM_Threads
author	Michael Paquier	2012-07-11 00:46:31 +0000
committer	Michael Paquier	2012-07-11 00:46:31 +0000
commit	28f921a1f9fed08e305756a9d327e82a110ce840 (patch)
tree	b66795be7ffe1a248051edc341c55e9c1b7cce51
parent	a5dddc37b885fc2cb6bf14040c7b323d83766a6a (diff)