diff options
author | Michael P | 2011-06-29 23:41:18 +0000 |
---|---|---|
committer | Michael P | 2011-06-29 23:41:18 +0000 |
commit | 091b0e828cf0fd5bbd1f9ae58ab96fc983e55d77 (patch) | |
tree | 30e2748bd04e6b07fdd043a72c434e01c3f7d632 | |
parent | 9cf414760f30f6fb377f47ced0d0315460bf85a7 (diff) |
Support for GTM-Proxy reconnection
This commit adds support for GTM-Proxy reconnection.
After promoting a GTM-Standby instance, it is possible
to reorientate Proxy worker threads to the newly-promoted
GTM instance.
Implemented in gtm_ctl, GTM reconnection uses USR1/USR2 to signal
GTM-Proxy main thread or worker threads.
If a worker thread receives a USR1 signal, it transmits it to the main thread.
After main thread has received USR1 signal, it signals worker threads with USR2.
Signal handling is managed by longjump.
A new keyword "reconnect" has been added in gtm_ctl.
Ex:
gtm_ctl reconnect -S gtm_proxy -D $PROXY_DATA -o '-s $STANDBY_HOST -t $STANDBY_PORT'
It is expected that new connection parameters to GTM are specified in string -o.
Implementation by Koichi Suzuki
-rw-r--r-- | src/gtm/gtm_ctl/gtm_ctl.c | 102 | ||||
-rw-r--r-- | src/gtm/proxy/proxy_main.c | 698 | ||||
-rw-r--r-- | src/gtm/proxy/proxy_thread.c | 17 | ||||
-rw-r--r-- | src/include/gtm/gtm_proxy.h | 28 |
4 files changed, 770 insertions, 75 deletions
diff --git a/src/gtm/gtm_ctl/gtm_ctl.c b/src/gtm/gtm_ctl/gtm_ctl.c index 18fc3f9240..d37c009f37 100644 --- a/src/gtm/gtm_ctl/gtm_ctl.c +++ b/src/gtm/gtm_ctl/gtm_ctl.c @@ -44,7 +44,8 @@ typedef enum STOP_COMMAND, PROMOTE_COMMAND, RESTART_COMMAND, - STATUS_COMMAND + STATUS_COMMAND, + RECONNECT_COMMAND /* gtm_ctl -S gtm_proxy reconnect */ } CtlCommand; #define DEFAULT_WAIT 60 @@ -64,6 +65,8 @@ static char *log_file = NULL; static char *gtm_path = NULL; static char *gtm_app = NULL; static char *argv0 = NULL; +static char *reconnect_host = NULL; +static char *reconnect_port = NULL; static void write_stderr(const char *fmt,...) @@ -78,6 +81,7 @@ static void set_mode(char *modeopt); static void do_start(void); static void do_stop(void); static void do_restart(void); +static void do_reconnect(void); static void print_msg(const char *msg); static pgpid_t get_pgpid(void); @@ -571,6 +575,92 @@ do_promote(void) } } +/* + * At least we expect the following argument + * + * 1) -D datadir + * 2) -o options: we expect that -t and -s options are specified here. + * Check will be done in GTM-Proxy. If there's an error, it will be + * logged. In this case, GTM-Proxy won't terminate. It will continue + * to read/write with old GTM. + * + * Because they are not passed to gtm directly, they should appear in + * gtm_ctl argument, not in -o options. They're specific to gtm_ctl + * reconnect. + */ +static void +do_reconnect(void) +{ + pgpid_t pid; + char *reconnect_point_file_nam; + FILE *reconnect_point_file; + +#ifdef GTM_SBY_DEBUG + write_stderr("Reconnecting to new GTM ... DEBUG MODE."); +#endif + + /* + * Target must be "gtm_proxy" + */ + if (strcmp(gtm_app, "gtm_proxy") != 0) + { + write_stderr(_("%s: only gtm_proxy can accept reconnect command\n"), progname); + exit(1); + } + pid = get_pgpid(); + + if (pid == 0) /* no pid file */ + { + write_stderr(_("%s: PID file \"%s\" does not exist\n"), progname, pid_file); + write_stderr(_("Is server running?\n")); + exit(1); + } + else if (pid < 0) /* standalone backend, not gtm */ + { + pid = -pid; + write_stderr(_("%s: cannot promote server; " + "single-user server is running (PID: %ld)\n"), + progname, pid); + exit(1); + } + read_gtm_opts(); + /* + * Pass reconnect info to GTM-Proxy. + * + * Option arguments are written to new gtm file under -D directory. + */ + reconnect_point_file_nam = malloc(strlen(gtm_data) + 9); + if (reconnect_point_file_nam == NULL) + { + write_stderr(_("%s: No memory available.\n"), progname); + exit(1); + } + + snprintf(reconnect_point_file_nam, strlen(gtm_data) + 8, "%s/newgtm", gtm_data); + reconnect_point_file = fopen(reconnect_point_file_nam, "w"); + + if (reconnect_point_file == NULL) + { + write_stderr(_("%s: Cannot open reconnect point file %s\n"), progname, reconnect_point_file_nam); + exit(1); + } + + fprintf(reconnect_point_file, "%s\n", gtm_opts); + fclose(reconnect_point_file); + free(reconnect_point_file_nam); +#if 0 /* GTM_SBY_DEBUG */ + write_stderr("Now about to send SIGUSR1 to pid %ld.\n", pid); + write_stderr("Returning. This is the debug. Don't send signal actually.\n"); + return; +#endif + if (kill((pid_t) pid, SIGUSR1) != 0) + { + write_stderr(_("%s: could not send promote signal (PID: %ld): %s\n"), progname, pid, + strerror(errno)); + exit(1); + } +} + /* * restart/reload routines @@ -790,6 +880,7 @@ do_help(void) printf(_(" %s restart -S STARTUP_MODE [-w] [-t SECS] [-D DATADIR] [-m SHUTDOWN-MODE]\n" " [-o \"OPTIONS\"]\n"), progname); printf(_(" %s status -S STARTUP_MODE [-w] [-t SECS] [-D DATADIR]\n"), progname); + printf(_(" %s reconnect [-D DATADIR] -o \"OPTIONS\"]\n"), progname); printf(_("\nCommon options:\n")); printf(_(" -D DATADIR location of the database storage area\n")); @@ -810,6 +901,10 @@ do_help(void) printf(_("\nOptions for stop or restart:\n")); printf(_(" -m SHUTDOWN-MODE can be \"smart\", \"fast\", or \"immediate\"\n")); + printf(_("\nOptions for reconnect:\n")); + printf(_(" -t NewGTMPORT Port number of new GTM.\n")); + printf(_(" -s NewGTMHost Host Name of new GTM.\n")); + printf(_("\nShutdown modes are:\n")); printf(_(" smart quit after all clients have disconnected\n")); printf(_(" fast quit directly, with proper shutdown\n")); @@ -985,6 +1080,8 @@ main(int argc, char **argv) ctl_command = RESTART_COMMAND; else if (strcmp(argv[optind], "status") == 0) ctl_command = STATUS_COMMAND; + else if (strcmp(argv[optind], "reconnect") == 0) + ctl_command = RECONNECT_COMMAND; else { write_stderr(_("%s: unrecognized operation mode \"%s\"\n"), @@ -1136,6 +1233,9 @@ main(int argc, char **argv) case STATUS_COMMAND: do_status(); break; + case RECONNECT_COMMAND: + do_reconnect(); + break; default: break; } diff --git a/src/gtm/proxy/proxy_main.c b/src/gtm/proxy/proxy_main.c index 9ad7551759..1dc17e0d86 100644 --- a/src/gtm/proxy/proxy_main.c +++ b/src/gtm/proxy/proxy_main.c @@ -42,6 +42,8 @@ #include "gtm/libpq-int.h" #include "gtm/gtm_ip.h" #include "gtm/gtm_standby.h" +/* For reconnect control lock */ +#include "gtm/gtm_lock.h" extern int optind; extern char *optarg; @@ -60,12 +62,28 @@ int GTMProxyPortNumber; int GTMProxyWorkerThreads; char *GTMProxyDataDir; +/* GTM communication error handling options */ +int GTMErrorWaitOpt = FALSE; /* Wait and assume XCM if TRUE */ +int GTMErrorWaitSecs = 0; /* Duration of each wait */ +int GTMErrorWaitCount = 0; /* How many durations to wait */ + char *GTMServerHost; int GTMServerPortNumber; GTM_PGXCNodeId GTMProxyID = 0; GTM_ThreadID TopMostThreadID; +/* Communication area with SIGUSR2 signal handler */ +GTMProxy_ThreadInfo **Proxy_ThreadInfo; +short ReadyToReconnect = FALSE; +char *NewGTMServerHost; +int NewGTMServerPortNumber; + +/* Reconnect Control Lock */ +GTM_RWLock ReconnectControlLock; +jmp_buf mainThreadSIGUSR1_buf; +int SIGUSR1Accepted = FALSE; + /* The socket(s) we're listening to. */ #define MAXLISTEN 64 static int ListenSocket[MAXLISTEN]; @@ -119,9 +137,11 @@ static void SetDataDir(void); static void ChangeToDataDir(void); static void checkDataDir(void); static void DeleteLockFile(const char *filename); -static void RegisterProxy(void); +static void RegisterProxy(bool is_reconnect); static void UnregisterProxy(void); static GTM_Conn *ConnectGTM(void); +static void ReleaseCmdBackup(GTMProxy_CommandInfo *cmdinfo); +static void workerThreadReconnectToGTMstandby(void); /* * One-time initialization. It's called immediately after the main process @@ -188,11 +208,15 @@ BaseInit() sprintf(GTMLogFile, "%s/%s", GTMProxyDataDir, GTM_LOG_FILE); } + /* Initialize reconnect control lock */ + + GTM_RWLockInit(&ReconnectControlLock); + /* Save Node Register File in register.c */ Recovery_SaveRegisterFileName(GTMProxyDataDir); /* Register Proxy on GTM */ - RegisterProxy(); + RegisterProxy(false); DebugFileOpen(); @@ -208,10 +232,125 @@ BaseInit() } } +static char * +read_token(char *line, char **next) +{ + char *tok; + char *next_token; + + if (line == NULL) + { + *next = NULL; + return(NULL); + } + for (tok = line;; tok++) + { + if (*tok == 0 || *tok == '\n') + return(NULL); + if (*tok == ' ' || *tok == '\t') + continue; + else + break; + } + for (next_token = tok;; next_token++) + { + if (*next_token == 0 || *next_token == '\n') + { + *next_token = 0; + *next = NULL; + return(tok); + } + if (*next_token == ' ' || *next_token == '\t') + { + *next_token = 0; + *next = next_token + 1; + return(tok); + } + else + continue; + } + Assert(0); /* Never comes here. Keep compiler quiet. */ +} + +/* + * Returns non-zero if failed. + * We assume that current working directory is that specified by -D option. + */ +#define MAXLINE 1024 +#define INVALID_RECONNECT_OPTION_MSG() \ + do{ \ + ereport(ERROR, (0, errmsg("Invalid Reconnect Option"))); \ + } while(0) + +static int +GTMProxy_ReadReconnectInfo(void) +{ + + char optstr[MAXLINE]; + char *line; + FILE *optarg_file; + char *optValue; + char *option; + char *next_token; + + optarg_file = fopen("newgtm", "r"); + if (optarg_file == NULL) + { + INVALID_RECONNECT_OPTION_MSG(); + return(-1); + } + line = fgets(optstr, MAXLINE, optarg_file); + if (line == NULL) + { + INVALID_RECONNECT_OPTION_MSG(); + return(-1); + } + fclose(optarg_file); +#ifdef GTM_SBY_DEBUG + elog(LOG, "reconnect option = \"%s\"\n", optstr); +#endif + next_token = optstr; + while ((option = read_token(next_token, &next_token))) + { + if (strcmp(option, "-t") == 0) /* New GTM port */ + { + optValue = read_token(next_token, &next_token); + if (optValue == NULL) + { + INVALID_RECONNECT_OPTION_MSG(); + return(-1); + } + NewGTMServerPortNumber = atoi(optValue); + continue; + } + else if (strcmp(option, "-s") == 0) + { + optValue = read_token(next_token, &next_token); + if (optValue == NULL) + { + INVALID_RECONNECT_OPTION_MSG(); + return(-1); + } + if (NewGTMServerHost) + free(NewGTMServerHost); + NewGTMServerHost = strdup(optValue); + continue; + } + else + { + INVALID_RECONNECT_OPTION_MSG(); + return(-1); + } + } + return(0); +} + static void GTMProxy_SigleHandler(int signal) { - fprintf(stderr, "Received signal %d", signal); + int ii; + + elog(LOG, "Received signal %d", signal); switch (signal) { @@ -221,6 +360,111 @@ GTMProxy_SigleHandler(int signal) case SIGINT: case SIGHUP: break; + case SIGUSR1: /* Reconnect from gtm_ctl */ + /* + * Only the main thread can distribute SIGUSR2 to avoid lock contention + * of the thread info. If an other thread receives SIGUSR1, it will proxy + * SIGUSR1 to the main thread. + * + * The mask is set to block signals. They're blocked until all the + * threads reconnect to the new GTM. + */ +#ifdef GTM_SBY_DEBUG + elog(LOG, "Accepted SIGUSR1\n"); +#endif + if (MyThreadID != TopMostThreadID) + { +#ifdef GTM_SBY_DEBUG + elog(LOG, "Not on main thread, proxy the signal to the main thread."); +#endif + pthread_kill(TopMostThreadID, SIGUSR1); + return; + } + /* + * Then this is the main thread. + */ + PG_SETMASK(&BlockSig); +#ifdef GTM_SBY_DEBUG + elog(LOG, "I'm the main thread. Accepted SIGUSR1."); +#endif + /* + * Set Reconnect Info + */ + if (!ReadyToReconnect) + { + elog(LOG, "SIGUSR1 detected, but not ready to handle this. Ignored"); + PG_SETMASK(&UnBlockSig); + return; + } + elog(LOG, "SIGUSR1 detected. Set reconnect info for each worker thread"); + if (GTMProxy_ReadReconnectInfo() != 0) + { + /* Failed to read reconnect information from reconnect data file */ + PG_SETMASK(&UnBlockSig); + return; + } + /* + * Send SIGUSR2 to all worker threads. + * Check if all the worker threads can accept SIGUSR2 + */ + for (ii = 0; ii < GTMProxyWorkerThreads; ii++) + { + if ((Proxy_ThreadInfo[ii] == NULL) || + (Proxy_ThreadInfo[ii]->can_accept_SIGUSR2 == FALSE)) + { + elog(NOTICE, "Some worker thread is not ready to handle this. Retry reconnection later.\n"); + PG_SETMASK(&UnBlockSig); + return; + } + } + /* + * Before send SIGUSR2 to worker threads, acquire reconnect control lock in write mode + * so that worker threads wait until main thread reconnects to new GTM and register + * itself. + */ + GTM_RWLockAcquire(&ReconnectControlLock, GTM_LOCKMODE_WRITE); + + /* We cannot accept the next SIGUSR1 until all the reconnect is finished. */ + ReadyToReconnect = false; + + /* + * Issue SIGUSR2 to all the worker threads. + * It will not be issued to the main thread. + */ + for (ii = 0; ii < GTMProxyWorkerThreads; ii++) + pthread_kill(Proxy_ThreadInfo[ii]->thr_id, SIGUSR2); + + elog(LOG, "SIGUSR2 issued to all the worker threads."); + PG_SETMASK(&UnBlockSig); + + /* + * Note that during connection handling with backends, signals are blocked + * so it is safe to longjump here. + */ + siglongjmp(mainThreadSIGUSR1_buf, 1); + + case SIGUSR2: /* Reconnect from the main thread */ + /* Main thread has nothing to do twith this signal and should not receive this. */ + PG_SETMASK(&BlockSig); +#ifdef GTM_SBY_DEBUG + elog(LOG, "Detected SIGUSR2, thread:%ld", MyThreadID); +#endif + if (MyThreadID == TopMostThreadID) + { + /* This should not be reached. Just in case. */ +#ifdef GTM_SBY_DEBUG + elog(LOG, "SIGUSR2 received by the main thread. Ignoring."); +#endif + PG_SETMASK(&UnBlockSig); + return; + } + GetMyThreadInfo->reconnect_issued = TRUE; + if (GetMyThreadInfo->can_longjmp) + { + siglongjmp(GetMyThreadInfo->longjmp_env, 1); + } + PG_SETMASK(&UnBlockSig); + return; default: fprintf(stderr, "Unknown signal %d\n", signal); @@ -289,10 +533,12 @@ main(int argc, char *argv[]) GTMProxyPortNumber = GTM_PROXY_DEFAULT_PORT; GTMProxyWorkerThreads = GTM_PROXY_DEFAULT_WORKERS; + NewGTMServerHost = NULL; + /* * Parse the command like options and set variables */ - while ((opt = getopt(argc, argv, "h:i:p:n:D:l:s:t:")) != -1) + while ((opt = getopt(argc, argv, "h:i:p:n:D:l:s:t:w:z:")) != -1) { switch (opt) { @@ -336,6 +582,16 @@ main(int argc, char *argv[]) GTMServerPortNumber = atoi(optarg); break; + case 'w': + /* Duration to wait at GTM communication error */ + GTMErrorWaitSecs = atoi(optarg); + break; + + case 'z': + /* How many durations to wait */ + GTMErrorWaitCount = atoi(optarg); + break; + default: write_stderr("Try \"%s --help\" for more information.\n", progname); @@ -358,6 +614,19 @@ main(int argc, char *argv[]) } /* + * Validate GTM communication error handling option + */ + if (GTMErrorWaitSecs > 0 && GTMErrorWaitCount > 0) + { + GTMErrorWaitOpt = TRUE; + } + else + { + GTMErrorWaitOpt = FALSE; + GTMErrorWaitSecs = 0; + GTMErrorWaitCount = 0; + } + /* * GTM accepts no non-option switch arguments. */ if (optind < argc) @@ -420,19 +689,25 @@ main(int argc, char *argv[]) pqsignal(SIGQUIT, GTMProxy_SigleHandler); pqsignal(SIGTERM, GTMProxy_SigleHandler); pqsignal(SIGINT, GTMProxy_SigleHandler); + pqsignal(SIGUSR1, GTMProxy_SigleHandler); + pqsignal(SIGUSR2, GTMProxy_SigleHandler); pqinitmask(); /* - * Pre-fork so many worker threads + * Initialize SIGUSR2 interface area (Thread info) */ + Proxy_ThreadInfo = palloc0(sizeof(GTMProxy_ThreadInfo *) * GTMProxyWorkerThreads); + /* + * Pre-fork so many worker threads + */ for (i = 0; i < GTMProxyWorkerThreads; i++) { /* * XXX Start the worker thread */ - if (GTMProxy_ThreadCreate(GTMProxy_ThreadMain) == NULL) + if (GTMProxy_ThreadCreate(GTMProxy_ThreadMain, i) == NULL) { elog(ERROR, "failed to create a new thread"); return STATUS_ERROR; @@ -507,10 +782,35 @@ ServerLoop(void) fd_set rmask; int selres; + if (sigsetjmp(mainThreadSIGUSR1_buf, 1) != 0) + { + /* + * Reconnect! + * Use RegisterProxy() call. Before this, change connection information + * of GTM to the new one. + * Because this is done while ReconnectControlLock is acquired, + * worker threads can use this change and they don't have to worry about + * new connection point. + * + * Because we leave the old socket as is, there could be some waste of + * the resource but this may not happen so many times. + */ + + RegisterProxy(TRUE); + + /* If it is done, then release the lock for worker threads. */ + GTM_RWLockRelease(&ReconnectControlLock); + } + /* + * Delay the point to accept reconnect until here because + * longjmp buffer has not been prepared. + */ + ReadyToReconnect = TRUE; + /* * Wait for a connection request to arrive. * - * We wait at most one minute, to ensure that the other background + * Wait at most one minute, to ensure that the other background * tasks handled below get done even when no requests are arriving. */ memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set)); @@ -626,6 +926,7 @@ GTMProxy_ThreadMain(void *argp) int32 saved_seqno = -1; int ii, nrfds; char gtm_connect_string[1024]; + int first_turn = TRUE; /* Used only to set longjmp target at the first turn of thread loop */ elog(DEBUG3, "Starting the connection helper thread"); @@ -664,6 +965,25 @@ GTMProxy_ThreadMain(void *argp) initStringInfo(&input_message); /* + * Set GTM communication error handling options. + */ + thrinfo->thr_gtm_conn->gtmErrorWaitOpt = GTMErrorWaitOpt; + thrinfo->thr_gtm_conn->gtmErrorWaitSecs = GTMErrorWaitSecs; + thrinfo->thr_gtm_conn->gtmErrorWaitCount = GTMErrorWaitCount; + + thrinfo->reconnect_issued = FALSE; + + /* + * Initialize comand backup area + */ + for (ii = 0; ii < GTM_PROXY_MAX_CONNECTIONS; ii++) + { + thrinfo->thr_any_backup[ii] = FALSE; + thrinfo->thr_qtype[ii] = 0; + initStringInfo(&(thrinfo->thr_inBufData[ii])); + } + + /* * If an exception is encountered, processing resumes here so we abort the * current transaction and start a new one. * @@ -730,6 +1050,18 @@ GTMProxy_ThreadMain(void *argp) /* We can now handle ereport(ERROR) */ PG_exception_stack = &local_sigjmp_buf; + /* + * Now we're entering thread loop. The last work is to initialize SIGUSR2 control. + */ + Disable_Longjmp(); + GetMyThreadInfo->can_accept_SIGUSR2 = TRUE; + GetMyThreadInfo->reconnect_issued = FALSE; + GetMyThreadInfo->can_longjmp = FALSE; + + /*-------------------------------------------------------------- + * Thread Loop + *------------------------------------------------------------- + */ for (;;) { gtm_ListCell *elem = NULL; @@ -743,86 +1075,142 @@ GTMProxy_ThreadMain(void *argp) MemoryContextResetAndDeleteChildren(MessageContext); /* - * Just reset the input buffer to avoid repeated palloc/pfrees - * - * XXX We should consider resetting the MessageContext periodically to - * handle any memory leaks - */ - resetStringInfo(&input_message); - - /* - * Check if there are any changes to the connection array assigned to - * this thread. If so, we need to rebuild the fd array. + * The following block should be skipped at the first turn. */ - GTM_MutexLockAcquire(&thrinfo->thr_lock); - if (saved_seqno != thrinfo->thr_seqno) + if (!first_turn) { - saved_seqno = thrinfo->thr_seqno; - - while (thrinfo->thr_conn_count <= 0) - { - /* - * No connections assigned to the thread. Wait for at least one - * connection to be assgined to us - */ - GTM_CVWait(&thrinfo->thr_cv, &thrinfo->thr_lock); - } - - memset(thrinfo->thr_poll_fds, 0, sizeof (thrinfo->thr_poll_fds)); - /* - * Now grab all the open connections. We are holding the lock so no - * new connections can be added. + * Check if there are any changes to the connection array assigned to + * this thread. If so, we need to rebuild the fd array. */ - for (ii = 0; ii < thrinfo->thr_conn_count; ii++) + GTM_MutexLockAcquire(&thrinfo->thr_lock); + if (saved_seqno != thrinfo->thr_seqno) { - GTMProxy_ConnectionInfo *conninfo = thrinfo->thr_all_conns[ii]; + saved_seqno = thrinfo->thr_seqno; - /* We detect if the connection has been dropped to avoid - * a segmentation fault. - */ - if (conninfo->con_port == NULL) + while (thrinfo->thr_conn_count <= 0) { - conninfo->con_disconnected = true; - continue; - } + /* + * No connections assigned to the thread. Wait for at least one + * connection to be assigned to us + */ + if (sigsetjmp(GetMyThreadInfo->longjmp_env, 1) == 0) + { + Enable_Longjmp(); + GTM_CVWait(&thrinfo->thr_cv, &thrinfo->thr_lock); + Disable_Longjmp(); + } + else + { + /* SIGUSR2 here */ + workerThreadReconnectToGTMstandby(); + } + } + + memset(thrinfo->thr_poll_fds, 0, sizeof (thrinfo->thr_poll_fds)); /* - * If this is a newly added connection, complete the handshake + * Now grab all the open connections. A lock is being hold so no + * new connections can be added. */ - if (!conninfo->con_authenticated) - GTMProxy_HandshakeConnection(conninfo); + for (ii = 0; ii < thrinfo->thr_conn_count; ii++) + { + GTMProxy_ConnectionInfo *conninfo = thrinfo->thr_all_conns[ii]; - thrinfo->thr_poll_fds[ii].fd = conninfo->con_port->sock; - thrinfo->thr_poll_fds[ii].events = POLLIN; - thrinfo->thr_poll_fds[ii].revents = 0; + /* + * Detect if the connection has been dropped to avoid + * a segmentation fault. + */ + if (conninfo->con_port == NULL) + { + conninfo->con_disconnected = true; + continue; + } + + /* + * If this is a newly added connection, complete the handshake + */ + if (!conninfo->con_authenticated) + GTMProxy_HandshakeConnection(conninfo); + + thrinfo->thr_poll_fds[ii].fd = conninfo->con_port->sock; + thrinfo->thr_poll_fds[ii].events = POLLIN; + thrinfo->thr_poll_fds[ii].revents = 0; + } + } + GTM_MutexLockRelease(&thrinfo->thr_lock); + + while (true) + { + Enable_Longjmp(); + nrfds = poll(thrinfo->thr_poll_fds, thrinfo->thr_conn_count, 1000); + Disable_Longjmp(); + + if (nrfds < 0) + { + if (errno == EINTR) + continue; + elog(FATAL, "poll returned with error %d", nrfds); + } + else + break; } + + if (nrfds == 0) + continue; + + /* + * Initialize the lists + */ + thrinfo->thr_processed_commands = gtm_NIL; + memset(thrinfo->thr_pending_commands, 0, sizeof (thrinfo->thr_pending_commands)); } - GTM_MutexLockRelease(&thrinfo->thr_lock); - while (true) + /* + * Each SIGUSR2 should return here and please note that from the beginning + * of the outer loop, longjmp is disabled and signal handler will simply return + * so that we don't have to be botherd with the memory context. We should be + * sure to be in MemoryContext where siglongjmp() is issued. + */ +setjmp_again: + if (sigsetjmp(thrinfo->longjmp_env, 1) == 0) { - nrfds = poll(thrinfo->thr_poll_fds, thrinfo->thr_conn_count, 1000); + Disable_Longjmp(); + } + else + { + /* + * SIGUSR2 is detected and jumped here + * Reconnection phase + */ + workerThreadReconnectToGTMstandby(); - if (nrfds < 0) + /* + * Correction of pending works. + */ + thrinfo->thr_processed_commands = gtm_NIL; + for (ii = 0; ii < MSG_TYPE_COUNT; ii++) { - if (errno == EINTR) - continue; - elog(FATAL, "poll returned with error %d", nrfds); + thrinfo->thr_pending_commands[ii] = gtm_NIL; } - else - break; + gtm_list_free_deep(thrinfo->thr_processed_commands); + thrinfo->thr_processed_commands = gtm_NIL; + goto setjmp_again; /* Get ready for another SIGUSR2 */ } - - if (nrfds == 0) + if (first_turn) + { + first_turn = FALSE; continue; + } /* - * Initialize the lists + * Just reset the input buffer to avoid repeated palloc/pfrees + * + * XXX We should consider resetting the MessageContext periodically to + * handle any memory leaks */ - thrinfo->thr_processed_commands = gtm_NIL; - memset(thrinfo->thr_pending_commands, 0, sizeof (thrinfo->thr_pending_commands)); - + resetStringInfo(&input_message); + /* * Now, read command from each of the connections that has some data to * be read. @@ -843,13 +1231,16 @@ GTMProxy_ThreadMain(void *argp) continue; } - if (thrinfo->thr_poll_fds[ii].revents & POLLIN) + if ((thrinfo->thr_any_backup[ii]) || + (thrinfo->thr_poll_fds[ii].revents & POLLIN)) { /* * (3) read a command (loop blocks here) */ qtype = ReadCommand(thrinfo->thr_conn, &input_message); + thrinfo->thr_poll_fds[ii].revents = 0; + switch(qtype) { case 'C': @@ -903,7 +1294,9 @@ GTMProxy_ThreadMain(void *argp) /* * Make sure everything is on wire now */ + Enable_Longjmp(); gtmpqFlush(thrinfo->thr_gtm_conn); + Disable_Longjmp(); /* * Read back the responses and put them on to the right backend @@ -920,8 +1313,10 @@ GTMProxy_ThreadMain(void *argp) */ if (cmdinfo->ci_res_index == 0) { + Enable_Longjmp(); if ((res = GTMPQgetResult(thrinfo->thr_gtm_conn)) == NULL) elog(ERROR, "GTMPQgetResult failed"); + Disable_Longjmp(); } ProcessResponse(thrinfo, cmdinfo, res); @@ -1055,9 +1450,15 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, if (res->gr_status == GTM_RESULT_OK) { if (res->gr_type != TXN_BEGIN_GETGXID_MULTI_RESULT) + { + ReleaseCmdBackup(cmdinfo); elog(ERROR, "Wrong result"); + } if (cmdinfo->ci_res_index >= res->gr_resdata.grd_txn_get_multi.txn_count) + { + ReleaseCmdBackup(cmdinfo); elog(ERROR, "Too few GXIDs"); + } gxid = res->gr_resdata.grd_txn_get_multi.start_gxid + cmdinfo->ci_res_index; @@ -1083,18 +1484,25 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, pq_flush(cmdinfo->ci_conn->con_port); } cmdinfo->ci_conn->con_pending_msg = MSG_TYPE_INVALID; + ReleaseCmdBackup(cmdinfo); break; case MSG_TXN_COMMIT: if (res->gr_type != TXN_COMMIT_MULTI_RESULT) + { + ReleaseCmdBackup(cmdinfo); elog(ERROR, "Wrong result"); + } /* * These are grouped messages. We send an array of GXIDs to commit * or rollback and the server sends us back an array of status * codes. */ if (cmdinfo->ci_res_index >= res->gr_resdata.grd_txn_rc_multi.txn_count) + { + ReleaseCmdBackup(cmdinfo); elog(ERROR, "Too few GXIDs"); + } if (res->gr_resdata.grd_txn_rc_multi.status[cmdinfo->ci_res_index] == STATUS_OK) { @@ -1105,20 +1513,30 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, pq_flush(cmdinfo->ci_conn->con_port); } else + { + ReleaseCmdBackup(cmdinfo); ereport(ERROR2, (EINVAL, errmsg("Transaction commit failed"))); + } cmdinfo->ci_conn->con_pending_msg = MSG_TYPE_INVALID; + ReleaseCmdBackup(cmdinfo); break; case MSG_TXN_ROLLBACK: if (res->gr_type != TXN_ROLLBACK_MULTI_RESULT) + { + ReleaseCmdBackup(cmdinfo); elog(ERROR, "Wrong result"); + } /* * These are grouped messages. We send an array of GXIDs to commit * or rollback and the server sends us back an array of status * codes. */ if (cmdinfo->ci_res_index >= res->gr_resdata.grd_txn_rc_multi.txn_count) + { + ReleaseCmdBackup(cmdinfo); elog(ERROR, "Too few GXIDs"); + } if (res->gr_resdata.grd_txn_rc_multi.status[cmdinfo->ci_res_index] == STATUS_OK) { @@ -1129,17 +1547,27 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, pq_flush(cmdinfo->ci_conn->con_port); } else + { + ReleaseCmdBackup(cmdinfo); ereport(ERROR2, (EINVAL, errmsg("Transaction commit failed"))); + } cmdinfo->ci_conn->con_pending_msg = MSG_TYPE_INVALID; + ReleaseCmdBackup(cmdinfo); break; case MSG_SNAPSHOT_GET: if ((res->gr_type != SNAPSHOT_GET_RESULT) && (res->gr_type != SNAPSHOT_GET_MULTI_RESULT)) + { + ReleaseCmdBackup(cmdinfo); elog(ERROR, "Wrong result"); + } if (cmdinfo->ci_res_index >= res->gr_resdata.grd_txn_snap_multi.txn_count) + { + ReleaseCmdBackup(cmdinfo); elog(ERROR, "Too few GXIDs"); + } if (res->gr_resdata.grd_txn_snap_multi.status[cmdinfo->ci_res_index] == STATUS_OK) { @@ -1161,8 +1589,12 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, pq_flush(cmdinfo->ci_conn->con_port); } else + { + ReleaseCmdBackup(cmdinfo); ereport(ERROR2, (EINVAL, errmsg("snapshot request failed"))); + } cmdinfo->ci_conn->con_pending_msg = MSG_TYPE_INVALID; + ReleaseCmdBackup(cmdinfo); break; case MSG_TXN_BEGIN: @@ -1188,7 +1620,10 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, if ((res->gr_proxyhdr.ph_conid == InvalidGTMProxyConnID) || (res->gr_proxyhdr.ph_conid >= GTM_PROXY_MAX_CONNECTIONS) || (thrinfo->thr_all_conns[res->gr_proxyhdr.ph_conid] != cmdinfo->ci_conn)) + { + ReleaseCmdBackup(cmdinfo); elog(PANIC, "Invalid response or synchronization loss"); + } /* * These are just proxied messages.. so just forward the response @@ -1216,9 +1651,11 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, break; } cmdinfo->ci_conn->con_pending_msg = MSG_TYPE_INVALID; + ReleaseCmdBackup(cmdinfo); break; default: + ReleaseCmdBackup(cmdinfo); ereport(FATAL, (EPROTO, errmsg("invalid frontend message type %d", @@ -1237,11 +1674,32 @@ static int ReadCommand(GTMProxy_ConnectionInfo *conninfo, StringInfo inBuf) { int qtype; + int rv; + int connIdx = conninfo->con_id; + int anyBackup; + int myLocalId; + myLocalId = GetMyThreadInfo->thr_localid; + anyBackup = (GetMyThreadInfo->thr_any_backup[connIdx] ? TRUE : FALSE); + + /* * Get message type code from the frontend. */ - qtype = pq_getbyte(conninfo->con_port); + if (!anyBackup) + { + qtype = pq_getbyte(conninfo->con_port); + GetMyThreadInfo->thr_qtype[connIdx] = qtype; + /* + * We should not update thr_any_backup here. This should be + * updated when the backup is consumed or command processing + * is done. + */ + } + else + { + qtype = GetMyThreadInfo->thr_qtype[connIdx]; + } if (qtype == EOF) /* frontend disconnected */ { @@ -1286,9 +1744,23 @@ ReadCommand(GTMProxy_ConnectionInfo *conninfo, StringInfo inBuf) * after the type code; we can read the message contents independently of * the type. */ - if (pq_getmessage(conninfo->con_port, inBuf, 0)) - return EOF; /* suitable message already logged */ + if (!anyBackup) + { + if (pq_getmessage(conninfo->con_port, inBuf, 0)) + return EOF; /* suitable message already logged */ + copyStringInfo(&(GetMyThreadInfo->thr_inBufData[connIdx]), inBuf); + + /* The next line should be added when we add the code to clear backup + * when the response is processed. */ +#if 0 + GetMyThreadInfo->thr_any_backup[connIdx] = TRUE; +#endif + } + else + { + copyStringInfo(inBuf, &(GetMyThreadInfo->thr_inBufData[connIdx])); + } return qtype; } @@ -1343,6 +1815,7 @@ ProcessPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, len = pq_getmsgint(message, sizeof(GTM_StrLen)); pq_getmsgbytes(message, len); + /* Then the next is the port number */ memcpy(&cmd_data.cd_reg.port, pq_getmsgbytes(message, sizeof (GTM_PGXCNodePort)), sizeof (GTM_PGXCNodePort)); memcpy(&cmd_data.cd_reg.proxynum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)), @@ -1575,8 +2048,10 @@ GTMProxy_ProxyCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, thrinfo->thr_processed_commands = gtm_lappend(thrinfo->thr_processed_commands, cmdinfo); /* Finish the message. */ + Enable_Longjmp(); if (gtmpqPutMsgEnd(gtm_conn)) elog(ERROR, "Error finishing the message"); + Disable_Longjmp(); return; } @@ -1621,6 +2096,7 @@ static void GTMProxy_ProxyPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo,GTM_ gtmpqPutnchar(cmd_data.cd_reg.datafolder, strlen(cmd_data.cd_reg.datafolder), gtm_conn) || /* Node Status */ gtmpqPutInt(cmd_data.cd_reg.status, sizeof(GTM_PGXCNodeStatus), gtm_conn)) + elog(ERROR, "Error proxing data"); break; @@ -1841,8 +2317,10 @@ GTMProxy_ProcessPendingCommands(GTMProxy_ThreadInfo *thrinfo) } /* Finish the message. */ + Enable_Longjmp(); if (gtmpqPutMsgEnd(gtm_conn)) elog(ERROR, "Error finishing the message"); + Disable_Longjmp(); /* * Move the entire list to the processed command @@ -1879,8 +2357,10 @@ GTMProxy_ProcessPendingCommands(GTMProxy_ThreadInfo *thrinfo) } /* Finish the message. */ + Enable_Longjmp(); if (gtmpqPutMsgEnd(gtm_conn)) elog(ERROR, "Error finishing the message"); + Disable_Longjmp(); /* * Move the entire list to the processed command @@ -1919,8 +2399,10 @@ GTMProxy_ProcessPendingCommands(GTMProxy_ThreadInfo *thrinfo) } /* Finish the message. */ + Enable_Longjmp(); if (gtmpqPutMsgEnd(gtm_conn)) elog(ERROR, "Error finishing the message"); + Disable_Longjmp(); /* @@ -1958,8 +2440,10 @@ GTMProxy_ProcessPendingCommands(GTMProxy_ThreadInfo *thrinfo) } /* Finish the message. */ + Enable_Longjmp(); if (gtmpqPutMsgEnd(gtm_conn)) elog(ERROR, "Error finishing the message"); + Disable_Longjmp(); /* * Move the entire list to the processed command @@ -2047,9 +2531,9 @@ SetDataDir(void) new = make_absolute_path(GTMProxyDataDir); if (!new) ereport(FATAL, - (errno, - errmsg("failed to set the data directory \"%s\"", - GTMProxyDataDir))); + (errno, + errmsg("failed to set the data directory \"%s\"", + GTMProxyDataDir))); if (GTMProxyDataDir) free(GTMProxyDataDir); @@ -2360,9 +2844,13 @@ failed: /* * Register Proxy on GTM + * + * If reconnect is specified, then existing connection is closed + * and the target GTM is taken from NewGTMServerHost and + * NewGTMServerPortNumber. */ static void -RegisterProxy(void) +RegisterProxy(bool is_reconnect) { GTM_PGXCNodeType type = PGXC_NODE_GTM_PROXY; GTM_PGXCNodePort port = (GTM_PGXCNodePort) GTMProxyPortNumber; @@ -2370,6 +2858,26 @@ RegisterProxy(void) GTM_PGXCNodeId proxynum = 0; time_t finish_time; + MemoryContext old_mcxt; + + if (is_reconnect) + { + elog(NOTICE, + "Reconnect to new GTM, hostname=%s, port=%d", + NewGTMServerHost, NewGTMServerPortNumber); + /* + * Now reconnect. Close the exising connection + * and update the target host and port. + * First, change the memory context to TopMemoryContext + */ + old_mcxt = MemoryContextSwitchTo(TopMemoryContext); + + /* Change the target to new GTM */ + GTMPQfinish(master_conn); + GTMServerHost = NewGTMServerHost; + GTMServerPortNumber = NewGTMServerPortNumber; + } + master_conn = ConnectGTM(); if (!master_conn) goto failed; @@ -2413,6 +2921,9 @@ RegisterProxy(void) Assert(res->gr_resdata.grd_node.nodenum == GTMProxyID); } + /* If reconnect, restore the old memory context */ + if (is_reconnect) + MemoryContextSwitchTo(old_mcxt); return; failed: @@ -2443,3 +2954,46 @@ ConnectGTM(void) return conn; } + +/* + * Release backup command data + */ +static void ReleaseCmdBackup(GTMProxy_CommandInfo *cmdinfo) +{ + GTMProxy_ConnID connIdx = cmdinfo->ci_conn->con_id; + + GetMyThreadInfo->thr_any_backup[connIdx] = FALSE; + GetMyThreadInfo->thr_qtype[connIdx] = 0; + resetStringInfo(&(GetMyThreadInfo->thr_inBufData[connIdx])); +} + +static void +workerThreadReconnectToGTMstandby(void) +{ + char gtm_connect_string[1024]; + + /* + * First of all, we should acquire reconnect control lock in READ mode + * to wait for the main thread to finish reconnect. + */ + GTM_RWLockAcquire(&ReconnectControlLock, GTM_LOCKMODE_READ); + GTM_RWLockRelease(&ReconnectControlLock); /* The lock not needed any longer */ + PG_SETMASK(&UnBlockSig); + + /* Disconnect the current connection and re-connect to the new GTM */ + GTMPQfinish(GetMyThreadInfo->thr_gtm_conn); + sprintf(gtm_connect_string, "host=%s port=%d pgxc_node_id=%d remote_type=%d", + NewGTMServerHost, NewGTMServerPortNumber, GTMProxyID, PGXC_NODE_GTM_PROXY); + GetMyThreadInfo->thr_gtm_conn = PQconnectGTM(gtm_connect_string); + + if (GetMyThreadInfo->thr_gtm_conn == NULL) + elog(FATAL, "GTM connection failed."); + + /* Set GTM communication error handling option */ + GetMyThreadInfo->thr_gtm_conn->gtmErrorWaitOpt = GTMErrorWaitOpt; + GetMyThreadInfo->thr_gtm_conn->gtmErrorWaitSecs = GTMErrorWaitSecs; + GetMyThreadInfo->thr_gtm_conn->gtmErrorWaitCount = GTMErrorWaitCount; + + /* Initialize the command processing */ + GetMyThreadInfo->reconnect_issued = FALSE; +} diff --git a/src/gtm/proxy/proxy_thread.c b/src/gtm/proxy/proxy_thread.c index 4139936104..6aca454a83 100644 --- a/src/gtm/proxy/proxy_thread.c +++ b/src/gtm/proxy/proxy_thread.c @@ -27,6 +27,9 @@ GTMProxy_Threads *GTMProxyThreads = >MProxyThreadsData; #define GTM_PROXY_MAX_THREADS 1024 /* Max threads allowed in the GTMProxy */ #define GTMProxyThreadsFull (GTMProxyThreads->gt_thread_count == GTMProxyThreads->gt_array_size) +extern int GTMProxyWorkerThreads; +extern GTMProxy_ThreadInfo **Proxy_ThreadInfo; + /* * Add the given thrinfo structure to the global array, expanding it if * necessary @@ -126,7 +129,7 @@ GTMProxy_ThreadRemove(GTMProxy_ThreadInfo *thrinfo) * "startroutine". The thread information is returned to the calling process. */ GTMProxy_ThreadInfo * -GTMProxy_ThreadCreate(void *(* startroutine)(void *)) +GTMProxy_ThreadCreate(void *(* startroutine)(void *), int idx) { GTMProxy_ThreadInfo *thrinfo; int err; @@ -142,6 +145,11 @@ GTMProxy_ThreadCreate(void *(* startroutine)(void *)) GTM_CVInit(&thrinfo->thr_cv); /* + * Initialize communication area with SIGUSR2 signal handler (reconnect) + */ + Proxy_ThreadInfo[idx] = thrinfo; + + /* * The thread status is set to GTM_PROXY_THREAD_STARTING and will be changed by * the thread itself when it actually starts executing */ @@ -418,6 +426,13 @@ GTMProxy_ThreadRemoveConnection(GTMProxy_ThreadInfo *thrinfo, GTMProxy_Connectio } /* + * Reset command backup info + */ + thrinfo->thr_any_backup[ii] = FALSE; + thrinfo->thr_qtype[ii] = 0; + resetStringInfo(&(thrinfo->thr_inBufData[ii])); + + /* * If this is the last entry in the array ? If not, then copy the last * entry in this slot and mark the last slot an empty */ diff --git a/src/include/gtm/gtm_proxy.h b/src/include/gtm/gtm_proxy.h index ead825320b..d7db891c6a 100644 --- a/src/include/gtm/gtm_proxy.h +++ b/src/include/gtm/gtm_proxy.h @@ -120,6 +120,13 @@ typedef struct GTMProxy_ThreadInfo gtm_List *thr_pending_commands[MSG_TYPE_COUNT]; GTM_Conn *thr_gtm_conn; + + /* Reconnect Info */ + int can_accept_SIGUSR2; + int reconnect_issued; + int can_longjmp; + sigjmp_buf longjmp_env; + } GTMProxy_ThreadInfo; typedef struct GTMProxy_Threads @@ -138,7 +145,7 @@ int GTMProxy_ThreadRemove(GTMProxy_ThreadInfo *thrinfo); int GTMProxy_ThreadJoin(GTMProxy_ThreadInfo *thrinfo); void GTMProxy_ThreadExit(void); -extern GTMProxy_ThreadInfo *GTMProxy_ThreadCreate(void *(* startroutine)(void *)); +extern GTMProxy_ThreadInfo *GTMProxy_ThreadCreate(void *(* startroutine)(void *), int idx); extern GTMProxy_ThreadInfo * GTMProxy_GetThreadInfo(GTM_ThreadID thrid); extern GTMProxy_ThreadInfo *GTMProxy_ThreadAddConnection(GTMProxy_ConnectionInfo *conninfo); extern int GTMProxy_ThreadRemoveConnection(GTMProxy_ThreadInfo *thrinfo, @@ -236,4 +243,23 @@ extern GTM_ThreadID TopMostThreadID; CritSectionCount--; \ } while(0) +/* Signal Handler controller */ +#define SIGUSR2DETECTED() (GetMyThreadInfo->reconnect_issued == TRUE) +#define RECONNECT_LONGJMP() do{longjmp(GetMyThreadInfo->longjmp_env, 1);}while(0) +#if 1 +#define Disable_Longjmp() do{GetMyThreadInfo->can_longjmp = FALSE;}while(0) +#define Enable_Longjmp() \ + do{ \ + if (SIGUSR2DETECTED()) { \ + RECONNECT_LONGJMP(); \ + } \ + else { \ + GetMyThreadInfo->can_longjmp = TRUE; \ + } \ + } while(0) +#else +#define Disable_Longjmp() +#define Enable_Longjmp() +#endif + #endif |