summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavan Deolasee2016-03-25 11:23:31 +0000
committerPavan Deolasee2016-10-18 10:04:33 +0000
commit21b27a46f5e75d10b332fd6b97f0c64cb7eabd43 (patch)
tree16252fe77a132722832a19b5886d8529997d5bc6
parent03ee8e89befc1bccf7206a6fd61355f4fc0b2657 (diff)
Do not turn hot_standby in coordinator/datanode slaves since its not supported.
We'd earlier turned that on so that PQping() can check status of standbys. But that clearly creates bigger trouble and standbys may just stop working. So add a new mechanism to ping slave nodes by using pg_ctl
-rw-r--r--contrib/pgxc_ctl/coord_cmd.c14
-rw-r--r--contrib/pgxc_ctl/datanode_cmd.c16
-rw-r--r--contrib/pgxc_ctl/monitor.c9
-rw-r--r--contrib/pgxc_ctl/utils.c29
-rw-r--r--contrib/pgxc_ctl/utils.h1
5 files changed, 52 insertions, 17 deletions
diff --git a/contrib/pgxc_ctl/coord_cmd.c b/contrib/pgxc_ctl/coord_cmd.c
index 78af7fb3cf..7e59070e95 100644
--- a/contrib/pgxc_ctl/coord_cmd.c
+++ b/contrib/pgxc_ctl/coord_cmd.c
@@ -158,7 +158,7 @@ cmd_t *prepare_initCoordinatorMaster(char *nodeName)
fprintf(f,
"#========================================\n"
"# Addition for log shipping, %s\n"
- "wal_level = hot_standby\n"
+ "wal_level = archive\n"
"archive_mode = on\n"
"archive_command = 'rsync %%p %s@%s:%s/%%f'\n"
"max_wal_senders = %s\n"
@@ -325,10 +325,10 @@ cmd_t *prepare_initCoordinatorSlave(char *nodeName)
fprintf(f,
"#==========================================\n"
"# Added to initialize the slave, %s\n"
- "hot_standby = on\n"
+ "hot_standby = off\n"
"port = %s\n"
"pooler_port = %s\n"
- "wal_level = minimal\n"
+ "wal_level = archive\n"
"archive_mode = off\n"
"archive_command = ''\n"
"max_wal_senders = 0\n"
@@ -1305,7 +1305,7 @@ int add_coordinatorSlave(char *name, char *host, int port, int pooler_port, char
fprintf(f,
"#========================================\n"
"# Addition for log shipping, %s\n"
- "wal_level = hot_standby\n"
+ "wal_level = archive\n"
"archive_mode = on\n"
"archive_command = 'rsync %%p %s@%s:%s/%%f'\n"
"max_wal_senders = %d\n"
@@ -1405,10 +1405,10 @@ int add_coordinatorSlave(char *name, char *host, int port, int pooler_port, char
fprintf(f,
"#==========================================\n"
"# Added to initialize the slave, %s\n"
- "hot_standby = on\n"
+ "hot_standby = off\n"
"port = %d\n"
"pooler_port = %d\n"
- "wal_level = minimal\n" /* WAL level --- minimal. No cascade slave so far. */
+ "wal_level = archive\n"
"archive_mode = off\n" /* No archive mode */
"archive_command = ''\n" /* No archive mode */
"max_wal_senders = 0\n" /* Minimum WAL senders */
@@ -1641,7 +1641,7 @@ int remove_coordinatorSlave(char *name, int clean_opt)
return 1;
}
AddMember(nodelist, name);
- if (pingNode(aval(VAR_coordSlaveServers)[idx], aval(VAR_coordSlavePorts)[idx]) == 0)
+ if (pingNodeSlave(aval(VAR_coordSlaveServers)[idx], aval(VAR_coordSlaveDirs)[idx]) == 0)
stop_coordinator_slave(nodelist, "immediate");
{
FILE *f;
diff --git a/contrib/pgxc_ctl/datanode_cmd.c b/contrib/pgxc_ctl/datanode_cmd.c
index 18180bd896..c9d4731ba2 100644
--- a/contrib/pgxc_ctl/datanode_cmd.c
+++ b/contrib/pgxc_ctl/datanode_cmd.c
@@ -175,7 +175,7 @@ cmd_t *prepare_initDatanodeMaster(char *nodeName)
return(NULL);
}
fprintf(f,
- "wal_level = hot_standby\n"
+ "wal_level = archive\n"
"archive_mode = on\n"
"archive_command = 'rsync %%p %s@%s:%s/%%f'\n"
"max_wal_senders = %s\n"
@@ -349,7 +349,7 @@ cmd_t *prepare_initDatanodeSlave(char *nodeName)
fprintf(f,
"#==========================================\n"
"# Added to startup the slave, %s\n"
- "hot_standby = on\n"
+ "hot_standby = off\n"
"port = %s\n"
"pooler_port = %s\n"
"# End of addition\n",
@@ -646,7 +646,8 @@ cmd_t *prepare_stopDatanodeSlave(char *nodeName, char *immediate)
/* The next step might need improvement. When GTM is dead, the following may
* fail even though the master is running.
*/
- if (pingNode(aval(VAR_datanodeSlaveServers)[idx], aval(VAR_datanodeSlavePorts)[idx]) == 0)
+ if (pingNodeSlave(aval(VAR_datanodeSlaveServers)[idx],
+ aval(VAR_datanodeSlaveDirs)[idx]) == 0)
{
cmd_t *cmdReloadMaster;
@@ -1360,7 +1361,7 @@ int add_datanodeSlave(char *name, char *host, int port, int pooler, char *dir,
fprintf(f,
"#========================================\n"
"# Addition for log shipping, %s\n"
- "wal_level = hot_standby\n"
+ "wal_level = archive\n"
"archive_mode = on\n"
"archive_command = 'rsync %%p %s@%s:%s/%%f'\n"
"max_wal_senders = %d\n"
@@ -1472,10 +1473,10 @@ int add_datanodeSlave(char *name, char *host, int port, int pooler, char *dir,
fprintf(f,
"#==========================================\n"
"# Added to initialize the slave, %s\n"
- "hot_standby = on\n"
+ "hot_standby = off\n"
"port = %s\n"
"pooler_port = %s\n"
- "wal_level = minimal\n" /* WAL level --- minimal. No cascade slave so far. */
+ "wal_level = archive\n"
"archive_mode = off\n" /* No archive mode */
"archive_command = ''\n" /* No archive mode */
"max_wal_senders = 0\n" /* Minimum WAL senders */
@@ -1728,7 +1729,8 @@ int remove_datanodeSlave(char *name, int clean_opt)
return 1;
}
AddMember(nodelist, name);
- if (pingNode(aval(VAR_datanodeSlaveServers)[idx], aval(VAR_datanodeSlavePorts)[idx]) == 0)
+ if (pingNodeSlave(aval(VAR_datanodeSlaveServers)[idx],
+ aval(VAR_datanodeSlaveDirs)[idx]) == 0)
stop_datanode_slave(nodelist, "immediate");
{
FILE *f;
diff --git a/contrib/pgxc_ctl/monitor.c b/contrib/pgxc_ctl/monitor.c
index 1483504269..f14c24bb47 100644
--- a/contrib/pgxc_ctl/monitor.c
+++ b/contrib/pgxc_ctl/monitor.c
@@ -155,7 +155,8 @@ static void monitor_coordinator(char **nodeList)
printResult(pingNode(aval(VAR_coordMasterServers)[idx], aval(VAR_coordPorts)[idx]),
"coordinator master", actualNodeList[ii]);
if (doesExist(VAR_coordSlaveServers, idx) && !is_none(aval(VAR_coordSlaveServers)[idx]))
- printResult(pingNode(aval(VAR_coordSlaveServers)[idx], aval(VAR_coordSlavePorts)[idx]),
+ printResult(pingNodeSlave(aval(VAR_coordSlaveServers)[idx],
+ aval(VAR_coordSlaveDirs)[idx]),
"coordinator slave", actualNodeList[ii]);
}
}
@@ -198,7 +199,8 @@ static void monitor_datanode_slave(char **nodeList)
continue;
}
if (doesExist(VAR_datanodeSlaveServers, idx) && !is_none(aval(VAR_datanodeSlaveServers)[idx]))
- printResult(pingNode(aval(VAR_datanodeSlaveServers)[idx], aval(VAR_datanodeSlavePorts)[idx]),
+ printResult(pingNodeSlave(aval(VAR_datanodeSlaveServers)[idx],
+ aval(VAR_datanodeSlaveDirs)[idx]),
"datanode slave", actualNodeList[ii]);
else
elog(ERROR, "ERROR: datanode slave %s is not configured.\n", actualNodeList[ii]);
@@ -222,7 +224,8 @@ static void monitor_datanode(char **nodeList)
printResult(pingNode(aval(VAR_datanodeMasterServers)[idx], aval(VAR_datanodePorts)[idx]),
"datanode master", actualNodeList[ii]);
if (doesExist(VAR_datanodeSlaveServers, idx) && !is_none(aval(VAR_datanodeSlaveServers)[idx]))
- printResult(pingNode(aval(VAR_datanodeSlaveServers)[idx], aval(VAR_datanodeSlavePorts)[idx]),
+ printResult(pingNodeSlave(aval(VAR_datanodeSlaveServers)[idx],
+ aval(VAR_datanodeSlaveDirs)[idx]),
"datanode slave", actualNodeList[ii]);
}
}
diff --git a/contrib/pgxc_ctl/utils.c b/contrib/pgxc_ctl/utils.c
index dd79fa10dc..96c487f20c 100644
--- a/contrib/pgxc_ctl/utils.c
+++ b/contrib/pgxc_ctl/utils.c
@@ -324,6 +324,35 @@ int pingNode(char *host, char *port)
return -1;
}
+/*
+ * A different mechanism to ping datanode and coordinator slaves since these
+ * nodes currently do not accept connections and hence won't respond to PQping
+ * requests. Instead we rely on "pg_ctl status", which must be run via ssh on
+ * the remote machine
+ */
+int pingNodeSlave(char *host, char *datadir)
+{
+ FILE *wkf;
+ char cmd[MAXLINE+1];
+ char line[MAXLINE+1];
+ int rv;
+
+ snprintf(cmd, MAXLINE, "ssh %s@%s pg_ctl -D %s status > /dev/null 2>&1; echo $?",
+ sval(VAR_pgxcUser), host, datadir);
+ wkf = popen(cmd, "r");
+ if (wkf == NULL)
+ return -1;
+ if (fgets(line, MAXLINE, wkf))
+ {
+ trimNl(line);
+ rv = atoi(line);
+ }
+ else
+ rv = -1;
+ pclose(wkf);
+ return rv;
+}
+
void trimNl(char *s)
{
for (;*s && *s != '\n'; s++);
diff --git a/contrib/pgxc_ctl/utils.h b/contrib/pgxc_ctl/utils.h
index 6d0fb9d691..18cc0ec9fe 100644
--- a/contrib/pgxc_ctl/utils.h
+++ b/contrib/pgxc_ctl/utils.h
@@ -30,6 +30,7 @@ extern int datanodeIdx(char *datanodeName);
extern int getEffectiveGtmProxyIdxFromServerName(char *serverName);
extern pid_t get_prog_pid(char *host, char *pidfile, char *dir);
extern int pingNode(char *host, char *port);
+extern int pingNodeSlave(char *host, char *datadir);
extern void trimNl(char *s);
extern char *getChPidList(char *host, pid_t ppid);
extern char *getIpAddress(char *hostName);