summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael P2011-01-13 02:09:41 +0000
committerPavan Deolasee2011-05-19 17:46:47 +0000
commit7d3c6424afe048548cb232f0b08492e1612553af (patch)
treeb147c9d34a3a4c493fcb5f154275015f7aa4a5d0
parent4dad125a5f6a825bc20fba63fc660721b74b832c (diff)
Node Registering feature
Feature to register a Proxy, Coordinator or Datanode in Postgres-XC cluster. When a PGXC Node connects in the cluster, Node connects to GTM to register the following information: - Node ID - Node Type - Data folder - Port number - Socket IP (got directly from remote socket when registering) - Proxy Number Node goes through (0 if no Proxy used) When a node is shutdown, it automatically unregisters itself. If node is quickly shutdown, it disconnects itself, and can reconnect itself later. In this case, node ID is reserved for this node. When a node tries to register with an ID already used, it cannot start up. Socket IP is forwarded to GTM if Node goes through a Proxy. As proxy also registers, a start-up option "-i" has been added to set Proxy ID.
-rw-r--r--src/backend/access/transam/gtm.c62
-rw-r--r--src/backend/postmaster/postmaster.c30
-rw-r--r--src/gtm/Makefile2
-rw-r--r--src/gtm/client/fe-protocol.c15
-rw-r--r--src/gtm/client/gtm_client.c94
-rw-r--r--src/gtm/main/Makefile2
-rw-r--r--src/gtm/main/main.c45
-rw-r--r--src/gtm/proxy/Makefile2
-rw-r--r--src/gtm/proxy/proxy_main.c433
-rw-r--r--src/gtm/recovery/Makefile24
-rw-r--r--src/gtm/recovery/register.c779
-rw-r--r--src/include/access/gtm.h4
-rw-r--r--src/include/gtm/gtm.h4
-rw-r--r--src/include/gtm/gtm_c.h2
-rw-r--r--src/include/gtm/gtm_client.h13
-rw-r--r--src/include/gtm/gtm_msg.h6
-rw-r--r--src/include/gtm/gtm_proxy.h11
-rw-r--r--src/include/gtm/gtm_txn.h4
-rw-r--r--src/include/gtm/libpq-be.h2
-rw-r--r--src/include/gtm/libpq-int.h4
-rw-r--r--src/include/gtm/register.h74
21 files changed, 1545 insertions, 67 deletions
diff --git a/src/backend/access/transam/gtm.c b/src/backend/access/transam/gtm.c
index a4645780b7..a9bf1d6ea1 100644
--- a/src/backend/access/transam/gtm.c
+++ b/src/backend/access/transam/gtm.c
@@ -426,3 +426,65 @@ RenameSequenceGTM(char *seqname, const char *newseqname)
return conn ? rename_sequence(conn, &seqkey, &newseqkey) : -1;
}
+
+/*
+ * Register Given Node
+ * Connection for registering is just used once then closed
+ */
+int
+RegisterGTM(GTM_PGXCNodeType type, GTM_PGXCNodePort port, char *datafolder)
+{
+ int ret;
+
+ CheckConnection();
+
+ if (!conn)
+ return EOF;
+
+ ret = node_register(conn, type, port, PGXCNodeId, datafolder);
+
+ /* If something went wrong, retry once */
+ if (ret < 0)
+ {
+ CloseGTM();
+ InitGTM();
+ if (conn)
+ ret = node_register(conn, type, port, PGXCNodeId, datafolder);
+ }
+
+ return ret;
+}
+
+/*
+ * UnRegister Given Node
+ * Connection for registering is just used once then closed
+ */
+int
+UnregisterGTM(GTM_PGXCNodeType type)
+{
+ int ret;
+
+ CheckConnection();
+
+ if (!conn)
+ return EOF;
+
+ ret = node_unregister(conn, type, PGXCNodeId);
+
+ /* If something went wrong, retry once */
+ if (ret < 0)
+ {
+ CloseGTM();
+ InitGTM();
+ if (conn)
+ ret = node_unregister(conn, type, PGXCNodeId);
+ }
+
+ /*
+ * If node is unregistered cleanly, cut the connection.
+ * and Node shuts down smoothly.
+ */
+ CloseGTM();
+
+ return ret;
+}
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index a1382eab81..5a428cf337 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1056,6 +1056,24 @@ PostmasterMain(int argc, char *argv[])
*/
whereToSendOutput = DestNone;
+#ifdef PGXC
+ /* Register node on GTM during Postmaster Startup. */
+ if (IS_PGXC_COORDINATOR)
+ {
+ if (RegisterGTM(PGXC_NODE_COORDINATOR, PostPortNumber, userDoption) < 0)
+ ereport(FATAL,
+ (errcode(ERRCODE_IO_ERROR),
+ errmsg("Can not register Coordinator on GTM")));
+ }
+ if (IS_PGXC_DATANODE)
+ {
+ if (RegisterGTM(PGXC_NODE_DATANODE, PostPortNumber, userDoption) < 0)
+ ereport(FATAL,
+ (errcode(ERRCODE_IO_ERROR),
+ errmsg("Can not register Datanode on GTM")));
+ }
+#endif
+
/*
* Initialize stats collection subsystem (this does NOT start the
* collector process!)
@@ -2133,6 +2151,12 @@ pmdie(SIGNAL_ARGS)
/* and the pool manager too */
if (IS_PGXC_COORDINATOR && PgPoolerPID != 0)
signal_child(PgPoolerPID, SIGTERM);
+
+ /* Unregister Node on GTM */
+ if (IS_PGXC_COORDINATOR)
+ UnregisterGTM(PGXC_NODE_COORDINATOR);
+ else if (IS_PGXC_DATANODE)
+ UnregisterGTM(PGXC_NODE_DATANODE);
#endif
pmState = PM_WAIT_BACKUP;
}
@@ -2184,6 +2208,12 @@ pmdie(SIGNAL_ARGS)
/* and the pool manager too */
if (IS_PGXC_COORDINATOR && PgPoolerPID != 0)
signal_child(PgPoolerPID, SIGTERM);
+
+ /* Unregister Node on GTM */
+ if (IS_PGXC_COORDINATOR)
+ UnregisterGTM(PGXC_NODE_COORDINATOR);
+ else if (IS_PGXC_DATANODE)
+ UnregisterGTM(PGXC_NODE_DATANODE);
#endif
pmState = PM_WAIT_BACKENDS;
}
diff --git a/src/gtm/Makefile b/src/gtm/Makefile
index 51c55e0dd5..52149363d9 100644
--- a/src/gtm/Makefile
+++ b/src/gtm/Makefile
@@ -11,7 +11,7 @@ subdir = src/gtm
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
-WANTED_DIRS=common path libpq client main proxy gtm_ctl
+WANTED_DIRS=common path libpq client recovery main proxy gtm_ctl
all:
@for dir in $(WANTED_DIRS); do \
diff --git a/src/gtm/client/fe-protocol.c b/src/gtm/client/fe-protocol.c
index 117f89fa0a..a70be31f4b 100644
--- a/src/gtm/client/fe-protocol.c
+++ b/src/gtm/client/fe-protocol.c
@@ -587,6 +587,21 @@ gtmpqParseSuccess(GTM_Conn *conn, GTM_Result *result)
}
break;
+ case NODE_UNREGISTER_RESULT:
+ case NODE_REGISTER_RESULT:
+ if (gtmpqGetnchar((char *)&result->gr_resdata.grd_node.type,
+ sizeof (GTM_PGXCNodeType), conn))
+ {
+ result->gr_status = -1;
+ break;
+ }
+ if (gtmpqGetnchar((char *)&result->gr_resdata.grd_node.nodenum,
+ sizeof (GTM_PGXCNodeId), conn))
+ {
+ result->gr_status = -1;
+ }
+ break;
+
default:
printfGTMPQExpBuffer(&conn->errorMessage,
"unexpected result type from server; result typr was \"%d\"\n",
diff --git a/src/gtm/client/gtm_client.c b/src/gtm/client/gtm_client.c
index 08911d89e2..9501478363 100644
--- a/src/gtm/client/gtm_client.c
+++ b/src/gtm/client/gtm_client.c
@@ -791,6 +791,100 @@ send_failed:
return -1;
}
+/*
+ * Register a Node on GTM
+ * Seen from a Node viewpoint, we do not know if we are directly connected to GTM
+ * or go through a proxy, so register 0 as proxy number.
+ * This number is modified at proxy level automatically.
+ */
+int node_register(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodePort port, GTM_PGXCNodeId nodenum,
+ char *datafolder)
+{
+ GTM_Result *res = NULL;
+ time_t finish_time;
+ GTM_PGXCNodeId proxynum = 0;
+
+ if (gtmpqPutMsgStart('C', true, conn) ||
+ gtmpqPutInt(MSG_NODE_REGISTER, sizeof (GTM_MessageType), conn) ||
+ gtmpqPutnchar((char *)&type, sizeof(GTM_PGXCNodeType), conn) ||
+ gtmpqPutnchar((char *)&nodenum, sizeof(GTM_PGXCNodeId), conn) ||
+ gtmpqPutnchar((char *)&port, sizeof(GTM_PGXCNodePort), conn) ||
+ gtmpqPutnchar((char *)&proxynum, sizeof(GTM_PGXCNodeId), conn) ||
+ gtmpqPutInt(strlen(datafolder), sizeof (GTM_StrLen), conn) ||
+ gtmpqPutnchar(datafolder, strlen(datafolder), conn))
+ goto send_failed;
+
+ /* Finish the message. */
+ if (gtmpqPutMsgEnd(conn))
+ goto send_failed;
+
+ /* Flush to ensure backend gets it. */
+ if (gtmpqFlush(conn))
+ goto send_failed;
+
+ finish_time = time(NULL) + CLIENT_GTM_TIMEOUT;
+ if (gtmpqWaitTimed(true, false, conn, finish_time) ||
+ gtmpqReadData(conn) < 0)
+ goto receive_failed;
+
+ if ((res = GTMPQgetResult(conn)) == NULL)
+ goto receive_failed;
+
+ /* Check on node type and node number */
+ if (res->gr_status == 0)
+ {
+ Assert(res->gr_resdata.grd_node.type == type);
+ Assert(res->gr_resdata.grd_node.nodenum == nodenum);
+ }
+
+ return res->gr_status;
+
+receive_failed:
+send_failed:
+ return -1;
+}
+
+int node_unregister(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum)
+{
+ GTM_Result *res = NULL;
+ time_t finish_time;
+
+ if (gtmpqPutMsgStart('C', true, conn) ||
+ gtmpqPutInt(MSG_NODE_UNREGISTER, sizeof (GTM_MessageType), conn) ||
+ gtmpqPutnchar((char *)&type, sizeof(GTM_PGXCNodeType), conn) ||
+ gtmpqPutnchar((char *)&nodenum, sizeof(GTM_PGXCNodeId), conn))
+ goto send_failed;
+
+ /* Finish the message. */
+ if (gtmpqPutMsgEnd(conn))
+ goto send_failed;
+
+ /* Flush to ensure backend gets it. */
+ if (gtmpqFlush(conn))
+ goto send_failed;
+
+ finish_time = time(NULL) + CLIENT_GTM_TIMEOUT;
+ if (gtmpqWaitTimed(true, false, conn, finish_time) ||
+ gtmpqReadData(conn) < 0)
+ goto receive_failed;
+
+ if ((res = GTMPQgetResult(conn)) == NULL)
+ goto receive_failed;
+
+ /* Check on node type and node number */
+ if (res->gr_status == 0)
+ {
+ Assert(res->gr_resdata.grd_node.type == type);
+ Assert(res->gr_resdata.grd_node.nodenum == nodenum);
+ }
+
+ return res->gr_status;
+
+receive_failed:
+send_failed:
+ return -1;
+}
+
void
GTM_FreeResult(GTM_Result *result, GTM_PGXCNodeType remote_type)
{
diff --git a/src/gtm/main/Makefile b/src/gtm/main/Makefile
index 5d8aaea7d8..3ed2c78b12 100644
--- a/src/gtm/main/Makefile
+++ b/src/gtm/main/Makefile
@@ -3,7 +3,7 @@
top_build_dir=../..
include $(top_build_dir)/gtm/Makefile.global
-OBJS=main.o gtm_thread.o gtm_txn.o gtm_seq.o gtm_snap.o gtm_time.o ../common/libgtm.a ../libpq/libpqcomm.a ../path/libgtmpath.a
+OBJS=main.o gtm_thread.o gtm_txn.o gtm_seq.o gtm_snap.o gtm_time.o ../common/libgtm.a ../libpq/libpqcomm.a ../path/libgtmpath.a ../recovery/libgtmrecovery.a ../client/libgtmclient.a
LDFLAGS=-L$(top_build_dir)/common -L$(top_build_dir)/libpq
LIBS=-lpthread
diff --git a/src/gtm/main/main.c b/src/gtm/main/main.c
index 118faabd53..8c1f4ca080 100644
--- a/src/gtm/main/main.c
+++ b/src/gtm/main/main.c
@@ -34,6 +34,7 @@
#include "gtm/pqsignal.h"
#include "gtm/pqformat.h"
#include "gtm/assert.h"
+#include "gtm/register.h"
#include "gtm/gtm_txn.h"
#include "gtm/gtm_seq.h"
#include "gtm/gtm_msg.h"
@@ -69,7 +70,7 @@ static int GTMAddConnection(Port *port);
static int ReadCommand(Port *myport, StringInfo inBuf);
static void ProcessCommand(Port *myport, StringInfo input_message);
-static void ProcessCoordinatorCommand(Port *myport, GTM_MessageType mtype, StringInfo message);
+static void ProcessPGXCNodeCommand(Port *myport, GTM_MessageType mtype, StringInfo message);
static void ProcessTransactionCommand(Port *myport, GTM_MessageType mtype, StringInfo message);
static void ProcessSnapshotCommand(Port *myport, GTM_MessageType mtype, StringInfo message);
static void ProcessSequenceCommand(Port *myport, GTM_MessageType mtype, StringInfo message);
@@ -77,7 +78,7 @@ static void ProcessQueryCommand(Port *myport, GTM_MessageType mtype, StringInfo
static void GTM_RegisterPGXCNode(Port *myport, GTM_PGXCNodeId pgxc_node_id);
static void GTM_UnregisterPGXCNode(Port *myport, GTM_PGXCNodeId pgxc_node_id);
-
+
static bool CreateOptsFile(int argc, char *argv[]);
static void CreateDataDirLockFile(void);
static void CreateLockFile(const char *filename, const char *refName);
@@ -146,6 +147,9 @@ BaseInit()
sprintf(GTMLogFile, "%s/%s", GTMDataDir, GTM_LOG_FILE);
}
+ /* Save Node Register File in register.c */
+ Recovery_SaveRegisterFileName(GTMDataDir);
+
DebugFileOpen();
GTM_InitTxnManager();
@@ -185,6 +189,10 @@ GTM_SigleHandler(int signal)
/*
* XXX We should do a clean shutdown here.
*/
+
+ /* Rewrite Register Information (clean up unregister records) */
+ Recovery_SaveRegisterInfo();
+
/* Delete pid file before shutting down */
DeleteLockFile(GTM_PID_FILE);
@@ -306,6 +314,10 @@ main(int argc, char *argv[])
GTM_RestoreSeqInfo(ctlfd);
close(ctlfd);
+
+ /* Recover Data of Registered nodes. */
+ Recovery_RestoreRegisterInfo();
+
/*
* Establish input sockets.
*/
@@ -712,6 +724,9 @@ GTM_ThreadMain(void *argp)
* Remove all transactions opened within the thread
*/
GTM_RemoveAllTransInfos(-1);
+
+ /* Disconnect node if necessary */
+ Recovery_PGXCNodeDisconnect(thrinfo->thr_conn->con_port);
pthread_exit(thrinfo);
break;
@@ -731,6 +746,9 @@ GTM_ThreadMain(void *argp)
*/
GTM_RemoveAllTransInfos(-1);
+ /* Disconnect node if necessary */
+ Recovery_PGXCNodeDisconnect(thrinfo->thr_conn->con_port);
+
ereport(FATAL,
(EPROTO,
errmsg("invalid frontend message type %d",
@@ -762,8 +780,9 @@ ProcessCommand(Port *myport, StringInfo input_message)
switch (mtype)
{
- case MSG_UNREGISTER_COORD:
- ProcessCoordinatorCommand(myport, mtype, input_message);
+ case MSG_NODE_REGISTER:
+ case MSG_NODE_UNREGISTER:
+ ProcessPGXCNodeCommand(myport, mtype, input_message);
break;
case MSG_TXN_BEGIN:
@@ -807,6 +826,9 @@ ProcessCommand(Port *myport, StringInfo input_message)
case MSG_BACKEND_DISCONNECT:
GTM_RemoveAllTransInfos(proxyhdr.ph_conid);
+
+ /* Mark PGXC Node as disconnected if backend disconnected is postmaster */
+ ProcessPGXCNodeBackendDisconnect(myport, input_message);
break;
default:
@@ -917,22 +939,21 @@ ReadCommand(Port *myport, StringInfo inBuf)
}
static void
-ProcessCoordinatorCommand(Port *myport, GTM_MessageType mtype, StringInfo message)
+ProcessPGXCNodeCommand(Port *myport, GTM_MessageType mtype, StringInfo message)
{
- GTM_PGXCNodeId cid;
-
- cid = pq_getmsgint(message, sizeof (GTM_PGXCNodeId));
-
switch (mtype)
{
- case MSG_UNREGISTER_COORD:
- GTM_UnregisterPGXCNode(myport, cid);
+ case MSG_NODE_REGISTER:
+ ProcessPGXCNodeRegister(myport, message);
+ break;
+
+ case MSG_NODE_UNREGISTER:
+ ProcessPGXCNodeUnregister(myport, message);
break;
default:
Assert(0); /* Shouldn't come here.. keep compiler quite */
}
- pq_getmsgend(message);
}
static void
diff --git a/src/gtm/proxy/Makefile b/src/gtm/proxy/Makefile
index 3ed6ccce13..d2e6623d6a 100644
--- a/src/gtm/proxy/Makefile
+++ b/src/gtm/proxy/Makefile
@@ -3,7 +3,7 @@
top_build_dir=../..
include $(top_build_dir)/gtm/Makefile.global
-OBJS=proxy_main.o proxy_thread.o ../common/libgtm.a ../libpq/libpqcomm.a ../client/libgtmclient.a ../path/libgtmpath.a
+OBJS=proxy_main.o proxy_thread.o ../common/libgtm.a ../libpq/libpqcomm.a ../client/libgtmclient.a ../path/libgtmpath.a ../recovery/libgtmrecovery.a ../client/libgtmclient.a
LDFLAGS=-L$(top_build_dir)/common -L$(top_build_dir)/libpq
LIBS=-lpthread
diff --git a/src/gtm/proxy/proxy_main.c b/src/gtm/proxy/proxy_main.c
index 4275d91b02..4950e0480c 100644
--- a/src/gtm/proxy/proxy_main.c
+++ b/src/gtm/proxy/proxy_main.c
@@ -25,6 +25,7 @@
#include "gtm/gtm_c.h"
#include "gtm/gtm_proxy.h"
+#include "gtm/register.h"
#include "gtm/elog.h"
#include "gtm/memutils.h"
#include "gtm/gtm_list.h"
@@ -48,6 +49,7 @@ extern char *optarg;
#define GTM_PROXY_DEFAULT_WORKERS 2
#define GTM_PID_FILE "gtm_proxy.pid"
#define GTM_LOG_FILE "gtm_proxy.log"
+#define PROXY_CLIENT_TIMEOUT 20
static char *progname = "gtm_proxy";
char *ListenAddresses;
@@ -58,12 +60,15 @@ char *GTMProxyDataDir;
char *GTMServerHost;
int GTMServerPortNumber;
+GTM_PGXCNodeId GTMProxyID = 0;
+
/* The socket(s) we're listening to. */
#define MAXLISTEN 64
static int ListenSocket[MAXLISTEN];
pthread_key_t threadinfo_key;
static bool GTMProxyAbortPending = false;
+static GTM_Conn *master_conn;
static Port *ConnCreate(int serverFd);
static void ConnFree(Port *conn);
@@ -77,10 +82,12 @@ static void GTMProxy_HandleDisconnect(GTMProxy_ConnectionInfo *conninfo, GTM_Con
static void GTMProxy_ProxyCommand(GTMProxy_ConnectionInfo *conninfo,
GTM_Conn *gtm_conn, GTM_MessageType mtype, StringInfo message);
+static void GTMProxy_ProxyPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo,
+ GTM_Conn *gtm_conn, GTM_MessageType mtype, GTMProxy_CommandData cmd_data);
static void ProcessCommand(GTMProxy_ConnectionInfo *conninfo,
GTM_Conn *gtm_conn, StringInfo input_message);
-static void ProcessCoordinatorCommand(GTMProxy_ConnectionInfo *conninfo,
+static void ProcessPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo,
GTM_Conn *gtm_conn, GTM_MessageType mtype, StringInfo message);
static void ProcessTransactionCommand(GTMProxy_ConnectionInfo *conninfo,
GTM_Conn *gtm_conn, GTM_MessageType mtype, StringInfo message);
@@ -92,9 +99,7 @@ static void ProcessSequenceCommand(GTMProxy_ConnectionInfo *conninfo,
static void GTMProxy_RegisterPGXCNode(GTMProxy_ConnectionInfo *conninfo,
GTM_PGXCNodeId cid,
GTM_PGXCNodeType remote_type,
- bool is_postmaster);
-static void GTMProxy_UnregisterPGXCNode(GTMProxy_ConnectionInfo *conninfo,
- GTM_PGXCNodeId pgxc_node_id);
+ bool is_postmaster);
static void ProcessResponse(GTMProxy_ThreadInfo *thrinfo,
GTMProxy_CommandInfo *cmdinfo, GTM_Result *res);
@@ -109,6 +114,9 @@ static void CreateLockFile(const char *filename, const char *refName);
static void ChangeToDataDir(void);
static void checkDataDir(void);
static void DeleteLockFile(const char *filename);
+static void RegisterProxy(void);
+static void UnregisterProxy(void);
+static GTM_Conn *ConnectGTM(void);
/*
* One-time initialization. It's called immediately after the main process
@@ -170,6 +178,12 @@ BaseInit()
sprintf(GTMLogFile, "%s/%s", GTMProxyDataDir, GTM_LOG_FILE);
}
+ /* Save Node Register File in register.c */
+ Recovery_SaveRegisterFileName(GTMProxyDataDir);
+
+ /* Register Proxy on GTM */
+ RegisterProxy();
+
DebugFileOpen();
/*
@@ -203,6 +217,12 @@ GTMProxy_SigleHandler(int signal)
return;
}
+ /* Unregister Proxy on GTM */
+ UnregisterProxy();
+
+ /* Rewrite Register Information (clean up unregister records) */
+ Recovery_SaveRegisterInfo();
+
/*
* XXX We should do a clean shutdown here.
*/
@@ -224,10 +244,11 @@ help(const char *progname)
printf(_("This is the GTM proxy.\n\n"));
printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
printf(_("Options:\n"));
- printf(_(" -h hostname GTM proxy hostname/IP\n"));
+ printf(_(" -h hostname GTM proxy hostname/IP\n"));
printf(_(" -p port GTM proxy port number\n"));
printf(_(" -s hostname GTM server hostname/IP \n"));
printf(_(" -t port GTM server port number\n"));
+ printf(_(" -i ID number GTM proxy ID number\n"));
printf(_(" -n count Number of worker threads\n"));
printf(_(" -D directory GTM proxy working directory\n"));
printf(_(" -l filename GTM proxy log file name \n"));
@@ -257,11 +278,11 @@ main(int argc, char *argv[])
ListenAddresses = GTM_PROXY_DEFAULT_HOSTNAME;
GTMProxyPortNumber = GTM_PROXY_DEFAULT_PORT;
GTMProxyWorkerThreads = GTM_PROXY_DEFAULT_WORKERS;
-
+
/*
* Parse the command like options and set variables
*/
- while ((opt = getopt(argc, argv, "h:p:n:D:l:s:t:")) != -1)
+ while ((opt = getopt(argc, argv, "h:i:p:n:D:l:s:t:")) != -1)
{
switch (opt)
{
@@ -270,6 +291,11 @@ main(int argc, char *argv[])
ListenAddresses = strdup(optarg);
break;
+ case 'i':
+ /* GTM Proxy identification number */
+ GTMProxyID = (GTM_PGXCNodeId) atoi(optarg);
+ break;
+
case 'p':
/* Port number for the proxy to listen on */
GTMProxyPortNumber = atoi(optarg);
@@ -313,6 +339,14 @@ main(int argc, char *argv[])
progname);
exit(1);
}
+ if (GTMProxyID == 0)
+ {
+ write_stderr("GTM Proxy ID must be specified\n");
+ write_stderr("Try \"%s --help\" for more information.\n",
+ progname);
+ exit(1);
+ }
+
/*
* GTM accepts no non-option switch arguments.
*/
@@ -333,6 +367,9 @@ main(int argc, char *argv[])
elog(DEBUG3, "Starting GTM proxy at (%s:%d)", ListenAddresses, GTMProxyPortNumber);
+ /* Recover Data of Registered nodes. */
+ Recovery_RestoreRegisterInfo();
+
/*
* Establish input sockets.
*/
@@ -600,8 +637,8 @@ GTMProxy_ThreadMain(void *argp)
/*
* Set up connection with the GTM server
*/
- sprintf(gtm_connect_string, "host=%s port=%d pgxc_node_id=1 remote_type=%d",
- GTMServerHost, GTMServerPortNumber, PGXC_NODE_GTM_PROXY);
+ sprintf(gtm_connect_string, "host=%s port=%d pgxc_node_id=%d remote_type=%d",
+ GTMServerHost, GTMServerPortNumber, GTMProxyID, PGXC_NODE_GTM_PROXY);
thrinfo->thr_gtm_conn = PQconnectGTM(gtm_connect_string);
@@ -943,8 +980,9 @@ ProcessCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn,
switch (mtype)
{
- case MSG_UNREGISTER_COORD:
- ProcessCoordinatorCommand(conninfo, gtm_conn, mtype, input_message);
+ case MSG_NODE_REGISTER:
+ case MSG_NODE_UNREGISTER:
+ ProcessPGXCNodeCommand(conninfo, gtm_conn, mtype, input_message);
break;
case MSG_TXN_BEGIN:
@@ -1125,6 +1163,8 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo,
case MSG_TXN_COMMIT_PREPARED:
case MSG_TXN_GET_GXID:
case MSG_TXN_GET_GID_DATA:
+ case MSG_NODE_REGISTER:
+ case MSG_NODE_UNREGISTER:
case MSG_SNAPSHOT_GXID_GET:
case MSG_SEQUENCE_INIT:
case MSG_SEQUENCE_GET_CURRENT:
@@ -1243,23 +1283,115 @@ ReadCommand(GTMProxy_ConnectionInfo *conninfo, StringInfo inBuf)
}
static void
-ProcessCoordinatorCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn,
+ProcessPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn,
GTM_MessageType mtype, StringInfo message)
{
- GTM_PGXCNodeId cid;
+ GTMProxy_CommandData cmd_data;
- cid = pq_getmsgint(message, sizeof (GTM_PGXCNodeId));
-
- switch (mtype)
+ /*
+ * For Node registering, proxy number is also saved and registered on GTM with node.
+ * So get and modify the register message in consequence.
+ */
+ switch(mtype)
{
- case MSG_UNREGISTER_COORD:
- GTMProxy_UnregisterPGXCNode(conninfo, cid);
+ case MSG_NODE_REGISTER:
+ {
+ int len;
+ MemoryContext oldContext;
+ char remote_host[NI_MAXHOST];
+ char remote_port[NI_MAXSERV];
+
+ /* Get Remote IP and port from Conn structure to register */
+ remote_host[0] = '\0';
+ remote_port[0] = '\0';
+
+ if (gtm_getnameinfo_all(&conninfo->con_port->raddr.addr,
+ conninfo->con_port->raddr.salen,
+ remote_host, sizeof(remote_host),
+ remote_port, sizeof(remote_port),
+ NI_NUMERICSERV))
+ {
+ int ret = gtm_getnameinfo_all(&conninfo->con_port->raddr.addr,
+ conninfo->con_port->raddr.salen,
+ remote_host, sizeof(remote_host),
+ remote_port, sizeof(remote_port),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+
+ if (ret)
+ ereport(WARNING,
+ (errmsg_internal("gtm_getnameinfo_all() failed")));
+ }
+
+ memcpy(&cmd_data.cd_reg.type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)),
+ sizeof (GTM_PGXCNodeType));
+ memcpy(&cmd_data.cd_reg.nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)),
+ sizeof (GTM_PGXCNodeId));
+ memcpy(&cmd_data.cd_reg.port, pq_getmsgbytes(message, sizeof (GTM_PGXCNodePort)),
+ sizeof (GTM_PGXCNodePort));
+ memcpy(&cmd_data.cd_reg.proxynum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)),
+ sizeof (GTM_PGXCNodeId));
+
+ len = pq_getmsgint(message, sizeof (int));
+ cmd_data.cd_reg.datafolder = (char *)pq_getmsgbytes(message, len);
+ pq_getmsgend(message);
+
+ /* Copy also remote host address in data to be proxied */
+ cmd_data.cd_reg.ipaddress = (char *) palloc(strlen(remote_host));
+ memcpy(cmd_data.cd_reg.ipaddress, remote_host, strlen(remote_host));
+
+ /* Registering has to be saved where it can be seen by all the threads */
+ oldContext = MemoryContextSwitchTo(TopMostMemoryContext);
+
+ /* Register Node also on Proxy */
+ if (Recovery_PGXCNodeRegister(cmd_data.cd_reg.type,
+ cmd_data.cd_reg.nodenum,
+ cmd_data.cd_reg.port,
+ GTMProxyID,
+ NODE_CONNECTED,
+ remote_host,
+ cmd_data.cd_reg.datafolder,
+ false))
+ {
+ ereport(ERROR,
+ (EINVAL,
+ errmsg("Failed to Register node")));
+ }
+
+ MemoryContextSwitchTo(oldContext);
+
+ GTMProxy_ProxyPGXCNodeCommand(conninfo, gtm_conn, mtype, cmd_data);
break;
+ }
+ case MSG_NODE_UNREGISTER:
+ {
+ MemoryContext oldContext;
+
+ memcpy(&cmd_data.cd_reg.type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)),
+ sizeof (GTM_PGXCNodeType));
+ memcpy(&cmd_data.cd_reg.nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)),
+ sizeof (GTM_PGXCNodeId));
+ pq_getmsgend(message);
+
+ /* Unregistering has to be saved in a place where it can be seen by all the threads */
+ oldContext = MemoryContextSwitchTo(TopMostMemoryContext);
+
+ /* Unregister Node also on Proxy */
+ if (Recovery_PGXCNodeUnregister(cmd_data.cd_reg.type, cmd_data.cd_reg.nodenum, false))
+ {
+ ereport(ERROR,
+ (EINVAL,
+ errmsg("Failed to Unregister node")));
+ }
+
+ MemoryContextSwitchTo(oldContext);
+ GTMProxy_ProxyPGXCNodeCommand(conninfo, gtm_conn, mtype, cmd_data);
+ break;
+ }
default:
- Assert(0); /* Shouldn't come here.. keep compiler quite */
+ Assert(0); /* Shouldn't come here.. Keep compiler quiet */
}
- pq_getmsgend(message);
+ return;
}
static void
@@ -1317,7 +1449,7 @@ ProcessTransactionCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn,
break;
default:
- Assert(0); /* Shouldn't come here.. keep compiler quite */
+ Assert(0); /* Shouldn't come here.. keep compiler quiet */
}
}
@@ -1367,7 +1499,7 @@ ProcessSnapshotCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn,
break;
default:
- Assert(0); /* Shouldn't come here.. keep compiler quite */
+ Assert(0); /* Shouldn't come here.. keep compiler quiet */
}
}
@@ -1427,6 +1559,66 @@ GTMProxy_ProxyCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn,
/*
+ * Proxy the incoming message to the GTM server after adding our own identifier
+ * to it. Add also in the registration message the GTM Proxy number and rebuilt message.
+ */
+static void GTMProxy_ProxyPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo,GTM_Conn *gtm_conn, GTM_MessageType mtype, GTMProxy_CommandData cmd_data)
+{
+ GTMProxy_CommandInfo *cmdinfo;
+ GTMProxy_ThreadInfo *thrinfo = GetMyThreadInfo;
+ GTM_ProxyMsgHeader proxyhdr;
+
+ proxyhdr.ph_conid = conninfo->con_id;
+
+ switch(mtype)
+ {
+ case MSG_NODE_REGISTER:
+ /* Rebuild the message */
+ if (gtmpqPutMsgStart('C', true, gtm_conn) ||
+ gtmpqPutnchar((char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader), gtm_conn) ||
+ gtmpqPutInt(MSG_NODE_REGISTER, sizeof (GTM_MessageType), gtm_conn) ||
+ gtmpqPutnchar((char *)&cmd_data.cd_reg.type, sizeof(GTM_PGXCNodeType), gtm_conn) ||
+ gtmpqPutnchar((char *)&cmd_data.cd_reg.nodenum, sizeof(GTM_PGXCNodeId), gtm_conn) ||
+ gtmpqPutnchar((char *)&cmd_data.cd_reg.port, sizeof(GTM_PGXCNodePort), gtm_conn) ||
+ gtmpqPutnchar((char *)&GTMProxyID, sizeof(GTM_PGXCNodeId), gtm_conn) ||
+ gtmpqPutInt(strlen(cmd_data.cd_reg.ipaddress), sizeof (GTM_StrLen), gtm_conn) ||
+ gtmpqPutnchar(cmd_data.cd_reg.ipaddress, strlen(cmd_data.cd_reg.ipaddress), gtm_conn) ||
+ gtmpqPutInt(strlen(cmd_data.cd_reg.datafolder), 4, gtm_conn) ||
+ gtmpqPutnchar(cmd_data.cd_reg.datafolder, strlen(cmd_data.cd_reg.datafolder), gtm_conn))
+ elog(ERROR, "Error proxing data");
+ break;
+
+ case MSG_NODE_UNREGISTER:
+ if (gtmpqPutMsgStart('C', true, gtm_conn) ||
+ gtmpqPutnchar((char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader), gtm_conn) ||
+ gtmpqPutInt(MSG_NODE_UNREGISTER, sizeof (GTM_MessageType), gtm_conn) ||
+ gtmpqPutnchar((char *)&cmd_data.cd_reg.type, sizeof(GTM_PGXCNodeType), gtm_conn) ||
+ gtmpqPutnchar((char *)&cmd_data.cd_reg.nodenum, sizeof(GTM_PGXCNodeId), gtm_conn))
+ elog(ERROR, "Error proxing data");
+ break;
+
+ default:
+ Assert(0); /* Shouldn't come here.. Keep compiler quiet */
+ }
+
+ /*
+ * Add the message to the pending command list
+ */
+ cmdinfo = palloc0(sizeof (GTMProxy_CommandInfo));
+ cmdinfo->ci_mtype = mtype;
+ cmdinfo->ci_conn = conninfo;
+ cmdinfo->ci_res_index = 0;
+ thrinfo->thr_processed_commands = lappend(thrinfo->thr_processed_commands, cmdinfo);
+
+ /* Finish the message. */
+ if (gtmpqPutMsgEnd(gtm_conn))
+ elog(ERROR, "Error finishing the message");
+
+ return;
+}
+
+
+/*
* Record the incoming message as per its type. After all messages of this type
* are collected, they will be sent in a single message to the GTM server.
*/
@@ -1449,32 +1641,29 @@ GTMProxy_CommandPending(GTMProxy_ConnectionInfo *conninfo, GTM_MessageType mtype
return;
}
+
+/*
+ * Register PGXC Node Connection in Proxy
+ * Registery on GTM is made with MSG_NODE_REGISTER message type when node is launched.
+ */
static void
-GTMProxy_RegisterPGXCNode(GTMProxy_ConnectionInfo *conninfo, GTM_PGXCNodeId cid, GTM_PGXCNodeType remote_type, bool is_postmaster)
+GTMProxy_RegisterPGXCNode(GTMProxy_ConnectionInfo *conninfo,
+ GTM_PGXCNodeId cid,
+ GTM_PGXCNodeType remote_type,
+ bool is_postmaster)
{
- elog(DEBUG3, "Registering PGXC Node with cid %d", cid);
+ elog(DEBUG3, "Registering PGXC Node with id %d", cid);
conninfo->con_port->pgxc_node_id = cid;
conninfo->con_port->remote_type = remote_type;
conninfo->con_port->is_postmaster = is_postmaster;
}
-
-static void
-GTMProxy_UnregisterPGXCNode(GTMProxy_ConnectionInfo *conninfo, GTM_PGXCNodeId cid)
-{
- /*
- * Do a clean shutdown
- */
- return;
-}
-
-
static void
GTMProxy_HandshakeConnection(GTMProxy_ConnectionInfo *conninfo)
{
/*
* We expect a startup message at the very start. The message type is
- * REGISTER_COORD, followed by the 4 byte coordinator ID
+ * REGISTER_COORD, followed by the 4 byte PGXC node ID
*/
char startup_type;
GTM_StartupPacket sp;
@@ -1499,7 +1688,7 @@ GTMProxy_HandshakeConnection(GTMProxy_ConnectionInfo *conninfo)
if (pq_getmessage(conninfo->con_port, &inBuf, 0))
ereport(ERROR,
(EPROTO,
- errmsg("Expecting coordinator ID, but received EOF")));
+ errmsg("Expecting PGXC Node ID, but received EOF")));
memcpy(&sp,
pq_getmsgbytes(&inBuf, sizeof (GTM_StartupPacket)),
@@ -1526,29 +1715,47 @@ GTMProxy_HandleDisconnect(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn)
{
GTM_ProxyMsgHeader proxyhdr;
- conninfo->con_disconnected = true;
- if (conninfo->con_port->sock > 0)
- StreamClose(conninfo->con_port->sock);
- ConnFree(conninfo->con_port);
- conninfo->con_port = NULL;
- proxyhdr.ph_conid = conninfo->con_id;
+ /* Mark node as disconnected if it is a postmaster backend */
+ Recovery_PGXCNodeDisconnect(conninfo->con_port);
/* Start the message. */
if (gtmpqPutMsgStart('C', true, gtm_conn) ||
gtmpqPutnchar((char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader), gtm_conn) ||
- gtmpqPutInt(MSG_BACKEND_DISCONNECT, sizeof (GTM_MessageType), gtm_conn))
+ gtmpqPutInt(MSG_BACKEND_DISCONNECT, sizeof (GTM_MessageType), gtm_conn) ||
+ gtmpqPutc(conninfo->con_port->is_postmaster, gtm_conn))
elog(ERROR, "Error proxing data");
+ /*
+ * Then send node type and node number if backend is a postmaster to
+ * disconnect the correct node.
+ */
+ if (conninfo->con_port->is_postmaster)
+ {
+ if (gtmpqPutnchar((char *)&conninfo->con_port->remote_type,
+ sizeof(GTM_PGXCNodeType), gtm_conn) ||
+ gtmpqPutnchar((char *)&conninfo->con_port->pgxc_node_id,
+ sizeof(GTM_PGXCNodeId), gtm_conn))
+ elog(ERROR, "Error proxing data");
+ }
+
/* Finish the message. */
if (gtmpqPutMsgEnd(gtm_conn))
elog(ERROR, "Error finishing the message");
+ conninfo->con_disconnected = true;
+ if (conninfo->con_port->sock > 0)
+ StreamClose(conninfo->con_port->sock);
+ ConnFree(conninfo->con_port);
+ conninfo->con_port = NULL;
+
+ proxyhdr.ph_conid = conninfo->con_id;
+
return;
}
/*
- * Proces all the pending messages now.
+ * Process all the pending messages now.
*/
static void
GTMProxy_ProcessPendingCommands(GTMProxy_ThreadInfo *thrinfo)
@@ -2038,3 +2245,139 @@ DeleteLockFile(const char *filename)
"it could not be removed. Please remove the file "
"by hand and try again.")));
}
+
+/*
+ * Unregister Proxy on GTM
+ */
+static void
+UnregisterProxy(void)
+{
+ GTM_PGXCNodeType type = PGXC_NODE_GTM_PROXY;
+ GTM_Result *res = NULL;
+ time_t finish_time;
+
+ if (!master_conn || GTMPQstatus(master_conn) != CONNECTION_OK)
+ master_conn = ConnectGTM();
+ if (!master_conn)
+ goto failed;
+
+ if (gtmpqPutMsgStart('C', true, master_conn) ||
+ gtmpqPutInt(MSG_NODE_UNREGISTER, sizeof (GTM_MessageType), master_conn) ||
+ gtmpqPutnchar((char *)&type, sizeof(GTM_PGXCNodeType), master_conn) ||
+ gtmpqPutnchar((char *)&GTMProxyID, sizeof(GTM_PGXCNodeId), master_conn))
+ goto failed;
+
+ /* Finish the message. */
+ if (gtmpqPutMsgEnd(master_conn))
+ goto failed;
+
+ /* Flush to ensure backend gets it. */
+ if (gtmpqFlush(master_conn))
+ goto failed;
+
+ finish_time = time(NULL) + PROXY_CLIENT_TIMEOUT;
+ if (gtmpqWaitTimed(true, false, master_conn, finish_time) ||
+ gtmpqReadData(master_conn) < 0)
+ goto failed;
+
+ if ((res = GTMPQgetResult(master_conn)) == NULL)
+ goto failed;
+
+ /* Check on node type and node number */
+ if (res->gr_status == 0)
+ {
+ Assert(res->gr_resdata.grd_node.type == type);
+ Assert(res->gr_resdata.grd_node.nodenum == GTMProxyID);
+ }
+
+ /* Disconnect cleanly as Proxy is shutting down */
+ GTMPQfinish(master_conn);
+
+ return;
+
+failed:
+ return elog(ERROR, "can not Unregister Proxy on GTM");
+}
+
+/*
+ * Register Proxy on GTM
+ */
+static void
+RegisterProxy(void)
+{
+ GTM_PGXCNodeType type = PGXC_NODE_GTM_PROXY;
+ GTM_PGXCNodePort port = (GTM_PGXCNodePort) GTMProxyPortNumber;
+ GTM_Result *res = NULL;
+ GTM_PGXCNodeId proxynum = 0;
+ time_t finish_time;
+
+ master_conn = ConnectGTM();
+ if (!master_conn)
+ goto failed;
+
+ /*
+ * As this node is itself a Proxy it registers 0 as Proxy ID on GTM
+ * as it doesn't go through any other proxy.
+ */
+ if (gtmpqPutMsgStart('C', true, master_conn) ||
+ gtmpqPutInt(MSG_NODE_REGISTER, sizeof (GTM_MessageType), master_conn) ||
+ gtmpqPutnchar((char *)&type, sizeof(GTM_PGXCNodeType), master_conn) ||
+ gtmpqPutnchar((char *)&GTMProxyID, sizeof(GTM_PGXCNodeId), master_conn) || /* nodenum */
+ gtmpqPutnchar((char *)&port, sizeof(GTM_PGXCNodePort), master_conn) ||
+ gtmpqPutnchar((char *)&proxynum, sizeof(GTM_PGXCNodeId), master_conn) ||
+ gtmpqPutInt(strlen(GTMProxyDataDir), 4, master_conn) ||
+ gtmpqPutnchar(GTMProxyDataDir, strlen(GTMProxyDataDir), master_conn))
+ goto failed;
+
+ /* Finish the message. */
+ if (gtmpqPutMsgEnd(master_conn))
+ goto failed;
+
+ /* Flush to ensure backend gets it. */
+ if (gtmpqFlush(master_conn))
+ goto failed;
+
+ finish_time = time(NULL) + PROXY_CLIENT_TIMEOUT;
+ if (gtmpqWaitTimed(true, false, master_conn, finish_time) ||
+ gtmpqReadData(master_conn) < 0)
+ goto failed;
+
+ if ((res = GTMPQgetResult(master_conn)) == NULL)
+ goto failed;
+
+ if (res->gr_status == 0)
+ {
+ Assert(res->gr_resdata.grd_node.type == type);
+ Assert(res->gr_resdata.grd_node.nodenum == GTMProxyID);
+ }
+
+ return;
+
+failed:
+ elog(ERROR, "can not register Proxy on GTM");
+}
+
+static GTM_Conn*
+ConnectGTM(void)
+{
+ char conn_str[256];
+ GTM_Conn *conn;
+
+ sprintf(conn_str, "host=%s port=%d pgxc_node_id=%d remote_type=%d postmaster=1",
+ GTMServerHost, GTMServerPortNumber, GTMProxyID, PGXC_NODE_GTM_PROXY_POSTMASTER);
+
+ conn = PQconnectGTM(conn_str);
+ if (GTMPQstatus(conn) != CONNECTION_OK)
+ {
+ int save_errno = errno;
+
+ elog(ERROR, "can not connect to GTM");
+
+ errno = save_errno;
+
+ GTMPQfinish(conn);
+ conn = NULL;
+ }
+
+ return conn;
+}
diff --git a/src/gtm/recovery/Makefile b/src/gtm/recovery/Makefile
new file mode 100644
index 0000000000..5092a56581
--- /dev/null
+++ b/src/gtm/recovery/Makefile
@@ -0,0 +1,24 @@
+# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation
+
+top_build_dir=../..
+include $(top_build_dir)/gtm/Makefile.global
+
+
+NAME=gtmrecovery
+SO_MAJOR_VERSION= 1
+SO_MINOR_VERSION= 0
+
+OBJS=register.o ../client/libgtmclient.a
+
+all:all-lib
+
+include $(top_build_dir)/Makefile.shlib
+
+clean:
+ rm -f $(OBJS)
+ rm -f libgtmrecovery.a libgtmrecovery.so libgtmrecovery.so.1 libgtmrecovery.so.1.0
+
+distclean: clean
+
+maintainer-clean: distclean
+
diff --git a/src/gtm/recovery/register.c b/src/gtm/recovery/register.c
new file mode 100644
index 0000000000..bb5cfc3d9a
--- /dev/null
+++ b/src/gtm/recovery/register.c
@@ -0,0 +1,779 @@
+/*-------------------------------------------------------------------------
+ *
+ * register.c
+ * PGXC Node Register on GTM and GTM Proxy, node registering functions
+ *
+ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "gtm/gtm_c.h"
+#include "gtm/gtm.h"
+#include "gtm/register.h"
+#include "gtm/assert.h"
+#include <stdio.h>
+#include "gtm/libpq.h"
+#include "gtm/pqformat.h"
+#include "gtm/gtm_msg.h"
+#include "gtm/stringinfo.h"
+#include "gtm/gtm_ip.h"
+
+#define GTM_NODE_FILE "register.node"
+#define NODE_HASH_TABLE_SIZE 16
+#define GTM_NODE_FILE_MAX_PATH 1024
+
+typedef struct GTM_NodeInfoHashBucket
+{
+ List *nhb_list;
+ GTM_RWLock nhb_lock;
+} GTM_PGXCNodeInfoHashBucket;
+
+static char GTMPGXCNodeFile[GTM_NODE_FILE_MAX_PATH];
+
+/* Lock access of record file when necessary */
+static GTM_RWLock RegisterFileLock;
+
+static int NodeRegisterMagic = 0xeaeaeaea;
+static int NodeUnregisterMagic = 0xebebebeb;
+static int NodeEndMagic = 0xefefefef;
+
+static GTM_PGXCNodeInfoHashBucket GTM_PGXCNodes[NODE_HASH_TABLE_SIZE];
+
+static GTM_PGXCNodeInfo *pgxcnode_find_info(GTM_PGXCNodeType type,
+ GTM_PGXCNodeId nodenum);
+static uint32 pgxcnode_gethash(GTM_PGXCNodeId nodenum);
+static int pgxcnode_remove_info(GTM_PGXCNodeInfo *node);
+static int pgxcnode_add_info(GTM_PGXCNodeInfo *node);
+static char *pgxcnode_copy_char(const char *str);
+
+#define pgxcnode_type_equal(type1,type2) (type1 == type2)
+#define pgxcnode_nodenum_equal(num1,num2) (num1 == num2)
+#define pgxcnode_port_equal(port1,port2) (port1 == port2)
+
+/*
+ * Find the pgxcnode info structure for the given node type and number key.
+ */
+static GTM_PGXCNodeInfo *
+pgxcnode_find_info(GTM_PGXCNodeType type,
+ GTM_PGXCNodeId nodenum)
+{
+ uint32 hash = pgxcnode_gethash(nodenum);
+ GTM_PGXCNodeInfoHashBucket *bucket;
+ ListCell *elem;
+ GTM_PGXCNodeInfo *curr_nodeinfo = NULL;
+
+ bucket = &GTM_PGXCNodes[hash];
+
+ GTM_RWLockAcquire(&bucket->nhb_lock, GTM_LOCKMODE_READ);
+
+ foreach(elem, bucket->nhb_list)
+ {
+ curr_nodeinfo = (GTM_PGXCNodeInfo *) lfirst(elem);
+ if (pgxcnode_type_equal(curr_nodeinfo->type, type) &&
+ pgxcnode_nodenum_equal(curr_nodeinfo->nodenum, nodenum))
+ break;
+ curr_nodeinfo = NULL;
+ }
+
+ GTM_RWLockRelease(&bucket->nhb_lock);
+
+ return curr_nodeinfo;
+}
+
+/*
+ * Get the Hash Key depending on the node number
+ * We do not except to have hundreds of nodes yet,
+ * This function could be replaced by a better one
+ * such as a double hash function indexed on type and Node Number
+ */
+static uint32
+pgxcnode_gethash(GTM_PGXCNodeId nodenum)
+{
+ uint32 hash = 0;
+
+ hash = (uint32) nodenum;
+
+ return (hash % NODE_HASH_TABLE_SIZE);
+}
+
+/*
+ * Remove a PGXC Node Info structure from the global hash table
+ */
+static int
+pgxcnode_remove_info(GTM_PGXCNodeInfo *nodeinfo)
+{
+ uint32 hash = pgxcnode_gethash(nodeinfo->nodenum);
+ GTM_PGXCNodeInfoHashBucket *bucket;
+
+ bucket = &GTM_PGXCNodes[hash];
+
+ GTM_RWLockAcquire(&bucket->nhb_lock, GTM_LOCKMODE_WRITE);
+ GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_WRITE);
+
+ bucket->nhb_list = list_delete(bucket->nhb_list, nodeinfo);
+
+ GTM_RWLockRelease(&nodeinfo->node_lock);
+ GTM_RWLockRelease(&bucket->nhb_lock);
+
+ return 0;
+}
+
+/*
+ * Add a PGXC Node info structure to the global hash table
+ */
+static int
+pgxcnode_add_info(GTM_PGXCNodeInfo *nodeinfo)
+{
+ uint32 hash = pgxcnode_gethash(nodeinfo->nodenum);
+ GTM_PGXCNodeInfoHashBucket *bucket;
+ ListCell *elem;
+
+ bucket = &GTM_PGXCNodes[hash];
+
+ GTM_RWLockAcquire(&bucket->nhb_lock, GTM_LOCKMODE_WRITE);
+
+ foreach(elem, bucket->nhb_list)
+ {
+ GTM_PGXCNodeInfo *curr_nodeinfo = NULL;
+ curr_nodeinfo = (GTM_PGXCNodeInfo *) lfirst(elem);
+
+ /* GTM Proxy are always registered as they do not have Identification numbers yet */
+ if (pgxcnode_type_equal(curr_nodeinfo->type, nodeinfo->type) &&
+ pgxcnode_nodenum_equal(curr_nodeinfo->nodenum, nodeinfo->nodenum))
+ {
+ if (curr_nodeinfo->status == NODE_CONNECTED)
+ {
+ GTM_RWLockRelease(&bucket->nhb_lock);
+ ereport(LOG,
+ (EEXIST,
+ errmsg("Node with the given ID number already exists")));
+ return EEXIST;
+ }
+ else
+ {
+ /*
+ * Check if its data (port, datafolder and remote IP) has changed
+ * and modify it
+ */
+ if (!pgxcnode_port_equal(curr_nodeinfo->port, nodeinfo->port))
+ curr_nodeinfo->port = nodeinfo->port;
+
+ if (strlen(curr_nodeinfo->datafolder) == strlen(nodeinfo->datafolder))
+ {
+ if (memcpy(curr_nodeinfo->datafolder,
+ nodeinfo->datafolder,
+ strlen(nodeinfo->datafolder)) != 0)
+ {
+ pfree(curr_nodeinfo->ipaddress);
+ curr_nodeinfo->ipaddress = nodeinfo->ipaddress;
+ }
+ }
+
+ if (strlen(curr_nodeinfo->ipaddress) == strlen(nodeinfo->ipaddress))
+ {
+ if (memcpy(curr_nodeinfo->datafolder,
+ nodeinfo->datafolder,
+ strlen(nodeinfo->datafolder)) != 0)
+ {
+ pfree(curr_nodeinfo->datafolder);
+ curr_nodeinfo->datafolder = nodeinfo->datafolder;
+ }
+ }
+
+ /* Reconnect a disconnected node */
+ curr_nodeinfo->status = NODE_CONNECTED;
+ GTM_RWLockRelease(&bucket->nhb_lock);
+ return 0;
+ }
+ }
+ }
+
+ /*
+ * Safe to add the structure to the list
+ */
+ bucket->nhb_list = lappend(bucket->nhb_list, nodeinfo);
+ GTM_RWLockRelease(&bucket->nhb_lock);
+
+ return 0;
+}
+
+/*
+ * Makes a copy of given string in TopMostMemoryContext
+ */
+static char *
+pgxcnode_copy_char(const char *str)
+{
+ char *retstr = NULL;
+
+ /*
+ * We must use the TopMostMemoryContext because the node information is
+ * not bound to a thread and can outlive any of the thread specific
+ * contextes.
+ */
+ retstr = (char *) MemoryContextAlloc(TopMostMemoryContext,
+ strlen(str));
+
+ if (retstr == NULL)
+ ereport(ERROR, (ENOMEM, errmsg("Out of memory")));
+
+ memcpy(retstr, str, strlen(str));
+
+ return retstr;
+}
+
+/*
+ * Unregister the given node
+ */
+int
+Recovery_PGXCNodeUnregister(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, bool in_recovery)
+{
+ GTM_PGXCNodeInfo *nodeinfo = pgxcnode_find_info(type, nodenum);
+
+ if (nodeinfo != NULL)
+ {
+ pgxcnode_remove_info(nodeinfo);
+
+ /* Add a record to file on disk saying that this node has been unregistered correctly */
+ if (!in_recovery)
+ Recovery_RecordRegisterInfo(nodeinfo, false);
+
+ pfree(nodeinfo->ipaddress);
+ pfree(nodeinfo->datafolder);
+ pfree(nodeinfo);
+ }
+ else
+ return EINVAL;
+
+ return 0;
+}
+
+int
+Recovery_PGXCNodeRegister(GTM_PGXCNodeType type,
+ GTM_PGXCNodeId nodenum,
+ GTM_PGXCNodePort port,
+ GTM_PGXCNodeId proxynum,
+ GTM_PGXCNodeStatus status,
+ char *ipaddress,
+ char *datafolder,
+ bool in_recovery)
+{
+ GTM_PGXCNodeInfo *nodeinfo = NULL;
+ int errcode = 0;
+
+ nodeinfo = (GTM_PGXCNodeInfo *) palloc(sizeof (GTM_PGXCNodeInfo));
+
+ if (nodeinfo == NULL)
+ ereport(ERROR, (ENOMEM, errmsg("Out of memory")));
+
+ GTM_RWLockInit(&nodeinfo->node_lock);
+
+ /* Fill in structure */
+ nodeinfo->type = type;
+ nodeinfo->nodenum = nodenum;
+ nodeinfo->port = port;
+ nodeinfo->proxynum = proxynum;
+ nodeinfo->datafolder = pgxcnode_copy_char(datafolder);
+ nodeinfo->ipaddress = pgxcnode_copy_char(ipaddress);
+ nodeinfo->status = status;
+
+ /* Add PGXC Node Info to the global hash table */
+ errcode = pgxcnode_add_info(nodeinfo);
+
+ /*
+ * Add a Record to file disk saying that this node
+ * with given data has been correctly registered
+ */
+ if (!in_recovery && errcode == 0)
+ Recovery_RecordRegisterInfo(nodeinfo, true);
+
+ return errcode;
+}
+
+
+/*
+ * Process MSG_NODE_REGISTER
+ */
+void
+ProcessPGXCNodeRegister(Port *myport, StringInfo message)
+{
+ GTM_PGXCNodeType type;
+ GTM_PGXCNodeId nodenum, proxynum;
+ GTM_PGXCNodePort port;
+ char remote_host[NI_MAXHOST];
+ char remote_port[NI_MAXSERV];
+ char *datafolder;
+ char *ipaddress;
+ MemoryContext oldContext;
+ int strlen;
+ StringInfoData buf;
+
+ /* Get the Remote node IP and port to register it */
+ remote_host[0] = '\0';
+ remote_port[0] = '\0';
+
+ if (myport->remote_type != PGXC_NODE_GTM_PROXY)
+ {
+ if (gtm_getnameinfo_all(&myport->raddr.addr, myport->raddr.salen,
+ remote_host, sizeof(remote_host),
+ remote_port, sizeof(remote_port),
+ NI_NUMERICSERV))
+ {
+ int ret = gtm_getnameinfo_all(&myport->raddr.addr, myport->raddr.salen,
+ remote_host, sizeof(remote_host),
+ remote_port, sizeof(remote_port),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+
+ if (ret)
+ ereport(WARNING,
+ (errmsg_internal("gtm_getnameinfo_all() failed")));
+ }
+ }
+
+ /* Read Node Type and number */
+ memcpy(&type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)),
+ sizeof (GTM_PGXCNodeType));
+ memcpy(&nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)),
+ sizeof (GTM_PGXCNodeId));
+
+ /* Read Port Number */
+ memcpy(&port, pq_getmsgbytes(message, sizeof (GTM_PGXCNodePort)),
+ sizeof (GTM_PGXCNodePort));
+
+ /* Read Proxy ID number (0 if no proxy used) */
+ memcpy(&proxynum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)),
+ sizeof (GTM_PGXCNodeId));
+
+ /*
+ * Message is received from a proxy, get also the remote node address
+ * In the case a proxy registering itself, the remote address
+ * is directly taken from socket.
+ */
+ if (myport->remote_type == PGXC_NODE_GTM_PROXY &&
+ !myport->is_postmaster)
+ {
+ strlen = pq_getmsgint(message, sizeof (GTM_StrLen));
+ ipaddress = (char *)pq_getmsgbytes(message, strlen);
+ }
+ else
+ ipaddress = remote_host;
+
+ /*
+ * Finish by reading Data Folder (length and then string)
+ */
+
+ strlen = pq_getmsgint(message, sizeof (GTM_StrLen));
+ datafolder = (char *)pq_getmsgbytes(message, strlen);
+
+ /*
+ * We must use the TopMostMemoryContext because the Node ID information is
+ * not bound to a thread and can outlive any of the thread specific
+ * contextes.
+ */
+ oldContext = MemoryContextSwitchTo(TopMostMemoryContext);
+
+ if (Recovery_PGXCNodeRegister(type, nodenum, port,
+ proxynum, NODE_CONNECTED,
+ ipaddress, datafolder, false))
+ {
+ ereport(ERROR,
+ (EINVAL,
+ errmsg("Failed to Register node")));
+ }
+
+ MemoryContextSwitchTo(oldContext);
+
+ pq_getmsgend(message);
+
+ /*
+ * Send a SUCCESS message back to the client
+ */
+ pq_beginmessage(&buf, 'S');
+ pq_sendint(&buf, NODE_REGISTER_RESULT, 4);
+ if (myport->remote_type == PGXC_NODE_GTM_PROXY)
+ {
+ GTM_ProxyMsgHeader proxyhdr;
+ proxyhdr.ph_conid = myport->conn_id;
+ pq_sendbytes(&buf, (char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader));
+ }
+ pq_sendbytes(&buf, (char *)&type, sizeof(GTM_PGXCNodeType));
+ pq_sendbytes(&buf, (char *)&nodenum, sizeof(GTM_PGXCNodeId));
+ pq_endmessage(myport, &buf);
+
+ if (myport->remote_type != PGXC_NODE_GTM_PROXY)
+ pq_flush(myport);
+}
+
+/*
+ * Process MSG_NODE_UNREGISTER
+ */
+void
+ProcessPGXCNodeUnregister(Port *myport, StringInfo message)
+{
+ GTM_PGXCNodeType type;
+ GTM_PGXCNodeId nodenum;
+ MemoryContext oldContext;
+ StringInfoData buf;
+
+ /* Read Node Type and number */
+ memcpy(&type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)),
+ sizeof (GTM_PGXCNodeType));
+ memcpy(&nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)),
+ sizeof (GTM_PGXCNodeId));
+
+ /*
+ * We must use the TopMostMemoryContext because the Node ID information is
+ * not bound to a thread and can outlive any of the thread specific
+ * contextes.
+ */
+ oldContext = MemoryContextSwitchTo(TopMostMemoryContext);
+
+ if (Recovery_PGXCNodeUnregister(type, nodenum, false))
+ {
+ ereport(ERROR,
+ (EINVAL,
+ errmsg("Failed to Unregister node")));
+ }
+
+ MemoryContextSwitchTo(oldContext);
+
+ pq_getmsgend(message);
+
+ /*
+ * Send a SUCCESS message back to the client
+ */
+ pq_beginmessage(&buf, 'S');
+ pq_sendint(&buf, NODE_UNREGISTER_RESULT, 4);
+ if (myport->remote_type == PGXC_NODE_GTM_PROXY)
+ {
+ GTM_ProxyMsgHeader proxyhdr;
+ proxyhdr.ph_conid = myport->conn_id;
+ pq_sendbytes(&buf, (char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader));
+ }
+ pq_sendbytes(&buf, (char *)&type, sizeof(GTM_PGXCNodeType));
+ pq_sendbytes(&buf, (char *)&nodenum, sizeof(GTM_PGXCNodeId));
+ pq_endmessage(myport, &buf);
+
+ if (myport->remote_type != PGXC_NODE_GTM_PROXY)
+ pq_flush(myport);
+}
+
+/*
+ * Called at GTM shutdown, rewrite on disk register information
+ * and write only data of nodes currently registered.
+ */
+void
+Recovery_SaveRegisterInfo(void)
+{
+ GTM_PGXCNodeInfoHashBucket *bucket;
+ ListCell *elem;
+ GTM_PGXCNodeInfo *nodeinfo = NULL;
+ int hash, ctlfd;
+ char filebkp[GTM_NODE_FILE_MAX_PATH];
+
+ GTM_RWLockAcquire(&RegisterFileLock, GTM_LOCKMODE_WRITE);
+
+ /* Create a backup file in case their is a problem during file writing */
+ sprintf(filebkp, "%s.bkp", GTMPGXCNodeFile);
+
+ ctlfd = open(filebkp, O_WRONLY | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR);
+
+ if (ctlfd < 0)
+ {
+ GTM_RWLockRelease(&RegisterFileLock);
+ return;
+ }
+
+ for (hash = 0; hash < NODE_HASH_TABLE_SIZE; hash++)
+ {
+ bucket = &GTM_PGXCNodes[hash];
+
+ GTM_RWLockAcquire(&bucket->nhb_lock, GTM_LOCKMODE_READ);
+
+ /* Write one by one information about registered nodes */
+ foreach(elem, bucket->nhb_list)
+ {
+ int len;
+
+ nodeinfo = (GTM_PGXCNodeInfo *) lfirst(elem);
+ if (nodeinfo == NULL)
+ break;
+
+ GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_READ);
+
+ write(ctlfd, &NodeRegisterMagic, sizeof (NodeRegisterMagic));
+
+ write(ctlfd, &nodeinfo->type, sizeof (GTM_PGXCNodeType));
+ write(ctlfd, &nodeinfo->nodenum, sizeof (GTM_PGXCNodeId));
+
+ write(ctlfd, &nodeinfo->port, sizeof (GTM_PGXCNodePort));
+ write(ctlfd, &nodeinfo->proxynum, sizeof (GTM_PGXCNodeId));
+ write(ctlfd, &nodeinfo->status, sizeof (GTM_PGXCNodeStatus));
+
+ len = strlen(nodeinfo->ipaddress);
+ write(ctlfd, &len, sizeof(uint32));
+ write(ctlfd, nodeinfo->ipaddress, len);
+
+ len = strlen(nodeinfo->datafolder);
+ write(ctlfd, &len, sizeof(uint32));
+ write(ctlfd, nodeinfo->datafolder, len);
+
+ write(ctlfd, &NodeEndMagic, sizeof(NodeEndMagic));
+
+ GTM_RWLockRelease(&nodeinfo->node_lock);
+ }
+
+ GTM_RWLockRelease(&bucket->nhb_lock);
+ }
+
+ close(ctlfd);
+
+ /* Replace former file by backup file */
+ if (rename(filebkp, GTMPGXCNodeFile) < 0)
+ {
+ elog(LOG, "Cannot save register file");
+ }
+
+ GTM_RWLockRelease(&RegisterFileLock);
+}
+
+/*
+ * Add a Register or Unregister record on PGXC Node file on disk.
+ */
+void
+Recovery_RecordRegisterInfo(GTM_PGXCNodeInfo *nodeinfo, bool is_register)
+{
+ int ctlfd;
+
+ GTM_RWLockAcquire(&RegisterFileLock, GTM_LOCKMODE_WRITE);
+
+ ctlfd = open(GTMPGXCNodeFile, O_WRONLY | O_CREAT | O_APPEND,
+ S_IRUSR | S_IWUSR);
+
+ if (ctlfd == -1 || nodeinfo == NULL)
+ {
+ GTM_RWLockRelease(&RegisterFileLock);
+ return;
+ }
+
+ GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_READ);
+
+ if (is_register)
+ write(ctlfd, &NodeRegisterMagic, sizeof (NodeRegisterMagic));
+ else
+ write(ctlfd, &NodeUnregisterMagic, sizeof (NodeUnregisterMagic));
+
+ write(ctlfd, &nodeinfo->type, sizeof (GTM_PGXCNodeType));
+ write(ctlfd, &nodeinfo->nodenum, sizeof (GTM_PGXCNodeId));
+
+ if (is_register)
+ {
+ int len;
+
+ write(ctlfd, &nodeinfo->port, sizeof (GTM_PGXCNodePort));
+ write(ctlfd, &nodeinfo->proxynum, sizeof (GTM_PGXCNodeId));
+ write(ctlfd, &nodeinfo->status, sizeof (GTM_PGXCNodeStatus));
+
+ len = strlen(nodeinfo->ipaddress);
+ write(ctlfd, &len, sizeof(uint32));
+ write(ctlfd, nodeinfo->ipaddress, len);
+
+ len = strlen(nodeinfo->datafolder);
+ write(ctlfd, &len, sizeof(uint32));
+ write(ctlfd, nodeinfo->datafolder, len);
+ }
+
+ write(ctlfd, &NodeEndMagic, sizeof(NodeEndMagic));
+
+ GTM_RWLockRelease(&nodeinfo->node_lock);
+
+ close(ctlfd);
+ GTM_RWLockRelease(&RegisterFileLock);
+}
+
+void
+Recovery_RestoreRegisterInfo(void)
+{
+ int magic;
+ int ctlfd;
+
+ /* This is made when GTM/Proxy restarts, so it is not necessary to take a lock */
+ ctlfd = open(GTMPGXCNodeFile, O_RDONLY);
+
+ if (ctlfd == -1)
+ return;
+
+ while (read(ctlfd, &magic, sizeof (NodeRegisterMagic)) == sizeof (NodeRegisterMagic))
+ {
+ GTM_PGXCNodeType type;
+ GTM_PGXCNodeId nodenum, proxynum;
+ GTM_PGXCNodePort port;
+ GTM_PGXCNodeStatus status;
+ char *ipaddress, *datafolder;
+ int len;
+
+ if (magic != NodeRegisterMagic && magic != NodeUnregisterMagic)
+ {
+ elog(WARNING, "Start magic mismatch %x", magic);
+ break;
+ }
+
+ read(ctlfd, &type, sizeof (GTM_PGXCNodeType));
+ read(ctlfd, &nodenum, sizeof (GTM_PGXCNodeId));
+
+ if (magic == NodeRegisterMagic)
+ {
+ read(ctlfd, &port, sizeof (GTM_PGXCNodePort));
+ read(ctlfd, &proxynum, sizeof (GTM_PGXCNodeId));
+ read(ctlfd, &status, sizeof (GTM_PGXCNodeStatus));
+
+ /* Read size of ipaddress string */
+ read(ctlfd, &len, sizeof (uint32));
+ ipaddress = (char *) palloc(len);
+ read(ctlfd, ipaddress, len);
+
+ /* Read size of datafolder string */
+ read(ctlfd, &len, sizeof (uint32));
+ datafolder = (char *) palloc(len);
+ read(ctlfd, datafolder, len);
+ }
+
+ /* Rebuild based on the records */
+ if (magic == NodeRegisterMagic)
+ Recovery_PGXCNodeRegister(type, nodenum, port, proxynum, status,
+ ipaddress, datafolder, true);
+ else
+ Recovery_PGXCNodeUnregister(type, nodenum, true);
+
+ read(ctlfd, &magic, sizeof(NodeEndMagic));
+
+ if (magic != NodeEndMagic)
+ {
+ elog(WARNING, "Corrupted control file");
+ return;
+ }
+ }
+
+ close(ctlfd);
+}
+
+void
+Recovery_SaveRegisterFileName(char *dir)
+{
+ if (!dir)
+ return;
+
+ sprintf(GTMPGXCNodeFile, "%s/%s", dir, GTM_NODE_FILE);
+}
+
+/*
+ * Disconnect node whose master connection has been cut with GTM
+ */
+void
+Recovery_PGXCNodeDisconnect(Port *myport)
+{
+ GTM_PGXCNodeType type = myport->remote_type;
+ GTM_PGXCNodeId nodenum = myport->pgxc_node_id;
+ GTM_PGXCNodeInfo *nodeinfo = NULL;
+ MemoryContext oldContext;
+
+ /* Only a master connection can disconnect a node */
+ if (!myport->is_postmaster)
+ return;
+
+ /*
+ * We must use the TopMostMemoryContext because the Node ID information is
+ * not bound to a thread and can outlive any of the thread specific
+ * contextes.
+ */
+ oldContext = MemoryContextSwitchTo(TopMostMemoryContext);
+
+ nodeinfo = pgxcnode_find_info(type, nodenum);
+
+ if (nodeinfo != NULL)
+ {
+ GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_WRITE);
+
+ nodeinfo->status = NODE_DISCONNECTED;
+
+ GTM_RWLockRelease(&nodeinfo->node_lock);
+ }
+
+ MemoryContextSwitchTo(oldContext);
+}
+
+int
+Recovery_PGXCNodeBackendDisconnect(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum)
+{
+ GTM_PGXCNodeInfo *nodeinfo = pgxcnode_find_info(type, nodenum);
+ int errcode = 0;
+
+ if (nodeinfo != NULL)
+ {
+ GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_WRITE);
+
+ nodeinfo->status = NODE_DISCONNECTED;
+
+ GTM_RWLockRelease(&nodeinfo->node_lock);
+ }
+ else
+ errcode = -1;
+
+ return errcode;
+}
+
+/*
+ * Process MSG_BACKEND_DISCONNECT
+ *
+ * A Backend has disconnected on a Proxy.
+ * If this backend is postmaster, mark the referenced node as disconnected.
+ */
+void
+ProcessPGXCNodeBackendDisconnect(Port *myport, StringInfo message)
+{
+ MemoryContext oldContext;
+ GTM_PGXCNodeId nodenum;
+ GTM_PGXCNodeType type;
+ bool is_postmaster;
+
+ is_postmaster = pq_getmsgbyte(message);
+
+ if (is_postmaster)
+ {
+ /* Read Node Type and number */
+ memcpy(&type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)),
+ sizeof (GTM_PGXCNodeType));
+ memcpy(&nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)),
+ sizeof (GTM_PGXCNodeId));
+ }
+
+ pq_getmsgend(message);
+
+ if (!is_postmaster)
+ return; /* Nothing to do */
+
+ /*
+ * We must use the TopMostMemoryContext because the Node ID information is
+ * not bound to a thread and can outlive any of the thread specific
+ * contextes.
+ */
+ oldContext = MemoryContextSwitchTo(TopMostMemoryContext);
+
+ if (Recovery_PGXCNodeBackendDisconnect(type, nodenum) < 0)
+ {
+ elog(LOG, "Cannot disconnect Unregistered node");
+ }
+
+ MemoryContextSwitchTo(oldContext);
+}
diff --git a/src/include/access/gtm.h b/src/include/access/gtm.h
index 6687474a2a..3a03d3c631 100644
--- a/src/include/access/gtm.h
+++ b/src/include/access/gtm.h
@@ -43,6 +43,10 @@ extern int CommitPreparedTranGTM(GlobalTransactionId gxid,
extern GTM_Snapshot GetSnapshotGTM(GlobalTransactionId gxid, bool canbe_grouped);
+/* Node registration APIs with GTM */
+extern int RegisterGTM(GTM_PGXCNodeType type, GTM_PGXCNodePort port, char *datafolder);
+extern int UnregisterGTM(GTM_PGXCNodeType type);
+
/* Sequence interface APIs with GTM */
extern GTM_Sequence GetCurrentValGTM(char *seqname);
extern GTM_Sequence GetNextValGTM(char *seqname);
diff --git a/src/include/gtm/gtm.h b/src/include/gtm/gtm.h
index 77522b249a..90754338a0 100644
--- a/src/include/gtm/gtm.h
+++ b/src/include/gtm/gtm.h
@@ -131,6 +131,10 @@ extern MemoryContext TopMostMemoryContext;
#if 0
+/* Coordinator registration */
+int GTM_RegisterCoordinator(GTM_CoordInfo *cinfo);
+int GTM_UnregisterCoordinator(GTM_PGXCNodeId cid);
+
#endif
#endif
diff --git a/src/include/gtm/gtm_c.h b/src/include/gtm/gtm_c.h
index f918592101..e1fc4bde62 100644
--- a/src/include/gtm/gtm_c.h
+++ b/src/include/gtm/gtm_c.h
@@ -54,7 +54,7 @@ typedef enum GTM_PGXCNodeType
PGXC_NODE_COORDINATOR,
PGXC_NODE_DATANODE,
PGXC_NODE_DEFAULT /* In case nothing is associated to connection */
-} GTM_PGXCNodeType;
+} GTM_PGXCNodeType;
/*
* A unique handle to identify transaction at the GTM. It could just be
diff --git a/src/include/gtm/gtm_client.h b/src/include/gtm/gtm_client.h
index ff1befdced..b8b6af0143 100644
--- a/src/include/gtm/gtm_client.h
+++ b/src/include/gtm/gtm_client.h
@@ -82,6 +82,12 @@ typedef union GTM_ResultData
PGXC_NodeId *coordinators;
} grd_txn_get_gid_data; /* TXN_GET_GID_DATA_RESULT */
+ struct
+ {
+ GTM_PGXCNodeType type; /* NODE_REGISTER */
+ GTM_PGXCNodeId nodenum; /* NODE_UNREGISTER */
+ } grd_node;
+
/*
* TODO
* TXN_GET_STATUS
@@ -141,6 +147,13 @@ GTM_SnapshotData *get_snapshot(GTM_Conn *conn, GlobalTransactionId gxid,
bool canbe_grouped);
/*
+ * Node Registering management API
+ */
+int node_register(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum,
+ GTM_PGXCNodePort port, char *datafolder);
+int node_unregister(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum);
+
+/*
* Sequence Management API
*/
int open_sequence(GTM_Conn *conn, GTM_SequenceKey key, GTM_Sequence increment,
diff --git a/src/include/gtm/gtm_msg.h b/src/include/gtm/gtm_msg.h
index 77c97ac1fd..16dfaac8ff 100644
--- a/src/include/gtm/gtm_msg.h
+++ b/src/include/gtm/gtm_msg.h
@@ -17,8 +17,8 @@
typedef enum GTM_MessageType
{
MSG_TYPE_INVALID,
- MSG_REGISTER_COORD, /* Register a Coordinator with GTM */
- MSG_UNREGISTER_COORD, /* Unregister a Coordinator with GTM */
+ MSG_NODE_REGISTER, /* Register a PGXC Node with GTM */
+ MSG_NODE_UNREGISTER, /* Unregister a PGXC Node with GTM */
MSG_TXN_BEGIN, /* Start a new transaction */
MSG_TXN_BEGIN_GETGXID, /* Start a new transaction and get GXID */
MSG_TXN_BEGIN_GETGXID_MULTI, /* Start multiple new transactions and get GXIDs */
@@ -58,6 +58,8 @@ typedef enum GTM_MessageType
typedef enum GTM_ResultType
{
+ NODE_REGISTER_RESULT,
+ NODE_UNREGISTER_RESULT,
TXN_BEGIN_RESULT,
TXN_BEGIN_GETGXID_RESULT,
TXN_BEGIN_GETGXID_MULTI_RESULT,
diff --git a/src/include/gtm/gtm_proxy.h b/src/include/gtm/gtm_proxy.h
index 8dc16bca0e..4c55639790 100644
--- a/src/include/gtm/gtm_proxy.h
+++ b/src/include/gtm/gtm_proxy.h
@@ -141,6 +141,7 @@ extern int GTMProxy_ThreadRemoveConnection(GTMProxy_ThreadInfo *thrinfo,
/*
* Command data - the only relevant information right now is the XID
+ * and data necessary for registering (modification of Proxy number registered)
*/
typedef union GTMProxy_CommandData
{
@@ -163,6 +164,16 @@ typedef union GTMProxy_CommandData
GlobalTransactionId gxid;
GTM_TransactionHandle handle;
} cd_snap;
+
+ struct
+ {
+ GTM_PGXCNodeType type;
+ GTM_PGXCNodeId nodenum;
+ GTM_PGXCNodePort port;
+ GTM_PGXCNodeId proxynum;
+ char *datafolder;
+ char *ipaddress;
+ } cd_reg;
} GTMProxy_CommandData;
/*
diff --git a/src/include/gtm/gtm_txn.h b/src/include/gtm/gtm_txn.h
index 47444c6d4c..be6852fb95 100644
--- a/src/include/gtm/gtm_txn.h
+++ b/src/include/gtm/gtm_txn.h
@@ -183,10 +183,10 @@ GTM_TransactionHandle GTM_GIDToHandle(char *gid);
/* Transaction Control */
void GTM_InitTxnManager(void);
-GTM_TransactionHandle GTM_BeginTransaction(GTM_PGXCNodeId pgxc_node_id,
+GTM_TransactionHandle GTM_BeginTransaction(GTM_PGXCNodeId coord_id,
GTM_IsolationLevel isolevel,
bool readonly);
-int GTM_BeginTransactionMulti(GTM_PGXCNodeId pgxc_node_id,
+int GTM_BeginTransactionMulti(GTM_PGXCNodeId coord_id,
GTM_IsolationLevel isolevel[],
bool readonly[],
GTMProxy_ConnID connid[],
diff --git a/src/include/gtm/libpq-be.h b/src/include/gtm/libpq-be.h
index f8036fe7bc..a157a7349d 100644
--- a/src/include/gtm/libpq-be.h
+++ b/src/include/gtm/libpq-be.h
@@ -49,7 +49,7 @@ typedef struct Port
GTM_PGXCNodeType remote_type; /* Type of remote connection */
GTM_PGXCNodeId pgxc_node_id; /* Coordinator ID */
- bool is_postmaster; /* Is remote a node postmaster? */
+ bool is_postmaster; /* Is remote a node postmaster? */
#define PQ_BUFFER_SIZE 8192
char PqSendBuffer[PQ_BUFFER_SIZE];
diff --git a/src/include/gtm/libpq-int.h b/src/include/gtm/libpq-int.h
index 557a441d0f..43fa3ea3f6 100644
--- a/src/include/gtm/libpq-int.h
+++ b/src/include/gtm/libpq-int.h
@@ -27,6 +27,7 @@
#include "gtm/pqcomm.h"
#include "gtm/pqexpbuffer.h"
#include "gtm/gtm_client.h"
+#include "gtm/gtm_c.h"
/*
* GTM_Conn stores all the state data associated with a single connection
@@ -43,8 +44,9 @@ struct gtm_conn
char *pgport; /* the server's communication port */
char *connect_timeout; /* connection timeout (numeric string) */
char *pgxc_node_id; /* PGXC Node id */
- int remote_type; /* is this a connection to/from a proxy ? */
+ int remote_type; /* is this a connection to/from a proxy ? */
int is_postmaster; /* is this connection to/from a postmaster instance */
+
/* Optional file to write trace info to */
FILE *Pfdebug;
diff --git a/src/include/gtm/register.h b/src/include/gtm/register.h
new file mode 100644
index 0000000000..573a17b829
--- /dev/null
+++ b/src/include/gtm/register.h
@@ -0,0 +1,74 @@
+/*-------------------------------------------------------------------------
+ *
+ * register.h
+ *
+ *
+ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation
+ *
+ * $PostgreSQL$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _REGISTER_H
+#define _REGISTER_H
+
+#include "gtm/gtm_c.h"
+#include "gtm/gtm_lock.h"
+#include "gtm/gtm_list.h"
+#include "gtm/stringinfo.h"
+
+/*
+ * This structure represents the data that is saved each time a Postgres-XC node
+ * registered on GTM.
+ * It contains:
+ * - Type of the Node: Proxy, Coordinator, Datanode
+ * - Node number
+ * - Proxy number: This ID number is set at 0 if node does not go through a Proxy
+ * or if Node Type is Proxy
+ * - PostgreSQL port the node uses to communicate
+ * - IP visible to GTM
+ * - Data folder of the node
+ */
+
+typedef enum GTM_PGXCNodeStatus
+{
+ NODE_CONNECTED,
+ NODE_DISCONNECTED
+} GTM_PGXCNodeStatus;
+
+typedef struct GTM_PGXCNodeInfo
+{
+ GTM_PGXCNodeType type;
+ GTM_PGXCNodeId nodenum;
+ GTM_PGXCNodeId proxynum;
+ GTM_PGXCNodePort port;
+ char *ipaddress;
+ char *datafolder;
+ GTM_PGXCNodeStatus status;
+ GTM_RWLock node_lock;
+} GTM_PGXCNodeInfo;
+
+int Recovery_PGXCNodeRegister(GTM_PGXCNodeType type,
+ GTM_PGXCNodeId nodenum,
+ GTM_PGXCNodePort port,
+ GTM_PGXCNodeId proxynum,
+ GTM_PGXCNodeStatus status,
+ char *ipaddress,
+ char *datafolder,
+ bool in_recovery);
+int Recovery_PGXCNodeUnregister(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, bool in_recovery);
+int Recovery_PGXCNodeBackendDisconnect(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum);
+
+void Recovery_RecordRegisterInfo(GTM_PGXCNodeInfo *nodeinfo, bool is_register);
+void Recovery_RestoreRegisterInfo(void);
+void Recovery_SaveRegisterInfo(void);
+void Recovery_PGXCNodeDisconnect(Port *myport);
+void Recovery_SaveRegisterFileName(char *dir);
+
+void ProcessPGXCNodeRegister(Port *myport, StringInfo message);
+void ProcessPGXCNodeUnregister(Port *myport, StringInfo message);
+void ProcessPGXCNodeBackendDisconnect(Port *myport, StringInfo message);
+
+#endif /* GTM_NODE_H */