summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavan Deolasee2016-02-10 10:14:00 +0000
committerPavan Deolasee2016-10-18 09:57:45 +0000
commitb45a5101a229b1051edee028b22047d6a489c5f7 (patch)
treee739580dd20d22188f6951b1c6ca7ea377fa25ff
parent50ede31b3c6068c89274cc04d1c2984114d617d5 (diff)
Some misc fixes and documentation updates for pgxc_ctl
-rw-r--r--contrib/pgxc_ctl/config.c3
-rw-r--r--contrib/pgxc_ctl/datanode_cmd.c2
-rw-r--r--contrib/pgxc_ctl/do_shell.c2
-rw-r--r--contrib/pgxc_ctl/gtm_cmd.c15
-rw-r--r--contrib/pgxc_ctl/pgxc_ctl.c2
-rw-r--r--doc/src/sgml/pgxc_ctl-ref.sgml83
-rw-r--r--doc/src/sgml/start.sgml598
-rw-r--r--src/backend/pgxc/pool/poolutils.c2
8 files changed, 678 insertions, 29 deletions
diff --git a/contrib/pgxc_ctl/config.c b/contrib/pgxc_ctl/config.c
index 2098fd786d..b36fa2bc82 100644
--- a/contrib/pgxc_ctl/config.c
+++ b/contrib/pgxc_ctl/config.c
@@ -1208,5 +1208,6 @@ int getDefaultWalSender(int isCoord)
if (doesExist(names, ii) && !is_none(aval(names)[ii]) && (atoi(aval(walSender)[ii]) >= 0))
return atoi(aval(walSender)[ii]);
}
- return 0;
+ /* If none found, return 5 as the default value.. */
+ return 5;
}
diff --git a/contrib/pgxc_ctl/datanode_cmd.c b/contrib/pgxc_ctl/datanode_cmd.c
index a60f53c68a..18180bd896 100644
--- a/contrib/pgxc_ctl/datanode_cmd.c
+++ b/contrib/pgxc_ctl/datanode_cmd.c
@@ -1772,7 +1772,7 @@ int remove_datanodeSlave(char *name, int clean_opt)
}
fprintf(f,
"#================================================================\n"
- "# pgxc configuration file updated due to coodinator slave removal\n"
+ "# pgxc configuration file updated due to datanode slave removal\n"
"# %s\n",
timeStampString(date, MAXTOKEN+1));
fprintSval(f, VAR_datanodeSlave);
diff --git a/contrib/pgxc_ctl/do_shell.c b/contrib/pgxc_ctl/do_shell.c
index 7dd126c851..8c112651fc 100644
--- a/contrib/pgxc_ctl/do_shell.c
+++ b/contrib/pgxc_ctl/do_shell.c
@@ -715,7 +715,7 @@ void dump_cmdList(cmdList_t *cmdList)
"allocated = %d, used = %d\n", cmdList->allocated, cmdList->used);
if (cmdList->cmds == NULL)
{
- elog(DEBUG1, "=== No command dfined. ===\n");
+ elog(DEBUG1, "=== No command defined. ===\n");
return;
}
for (ii = 0; cmdList->cmds[ii]; ii++)
diff --git a/contrib/pgxc_ctl/gtm_cmd.c b/contrib/pgxc_ctl/gtm_cmd.c
index 5f92fb7965..7fc77ba28c 100644
--- a/contrib/pgxc_ctl/gtm_cmd.c
+++ b/contrib/pgxc_ctl/gtm_cmd.c
@@ -124,11 +124,17 @@ cmd_t *prepare_initGtmMaster(bool stop)
int init_gtm_master(bool stop)
{
- int rc;
+ int rc = 0;
cmdList_t *cmdList;
cmd_t *cmd;
elog(INFO, "Initialize GTM master\n");
+ if (is_none(sval(VAR_gtmMasterServer)))
+ {
+ elog(INFO, "No GTM master specified, exiting!\n");
+ return rc;
+ }
+
cmdList = initCmdList();
/* Kill current gtm, build work directory and run initgtm */
@@ -482,9 +488,14 @@ cmd_t *prepare_startGtmMaster(void)
int start_gtm_master(void)
{
cmdList_t *cmdList;
- int rc;
+ int rc = 0;
elog(INFO, "Start GTM master\n");
+ if (is_none(sval(VAR_gtmMasterServer)))
+ {
+ elog(INFO, "No GTM master specified, cannot start. Exiting!\n");
+ return rc;
+ }
cmdList = initCmdList();
addCmd(cmdList, prepare_startGtmMaster());
rc = doCmdList(cmdList);
diff --git a/contrib/pgxc_ctl/pgxc_ctl.c b/contrib/pgxc_ctl/pgxc_ctl.c
index eaad8a0b8a..cb59eed1ad 100644
--- a/contrib/pgxc_ctl/pgxc_ctl.c
+++ b/contrib/pgxc_ctl/pgxc_ctl.c
@@ -266,7 +266,7 @@ static void read_configuration(void)
read_vars(conf);
pclose(conf);
uninstall_pgxc_ctl_bash(pgxc_ctl_bash_path);
- elog(INFO, "Finished to read configuration.\n");
+ elog(INFO, "Finished reading configuration.\n");
}
static void prepare_pgxc_ctl_bash(char *path)
diff --git a/doc/src/sgml/pgxc_ctl-ref.sgml b/doc/src/sgml/pgxc_ctl-ref.sgml
index 50c094c060..9405a3086f 100644
--- a/doc/src/sgml/pgxc_ctl-ref.sgml
+++ b/doc/src/sgml/pgxc_ctl-ref.sgml
@@ -214,6 +214,14 @@ PGXC$ prepare config minimal
PGXC$ prepare config minimal my_minimal_pgxc.conf
</programlisting>
+If you want, you may want to start off with a completely empty cluster to
+add all the nodes one-by-one. Use option <literal>empty</literal> to generate
+an empty template configuration file.
+<programlisting>
+PGXC$ prepare config empty
+PGXC$ prepare config empty my_empty_pgxc.conf
+</programlisting>
+
A more detailed syntax of the command will be described in a later section.
</para>
@@ -223,6 +231,16 @@ PGXC$ prepare config minimal my_minimal_pgxc.conf
<title>Make your configuration</title>
<para>
+ If you are starting with an <literal>empty</literal> configuration file, then
+ there is no real need to provide values for most of the variables. However, if
+ you want to provide custom values for <filename>pg_hba.conf</filename> entries
+ or additional parameters to be added to your <filename>postgresql.conf</filename>
+ file, then you will need to do so before going ahead with your cluster creation.
+ You can skip the rest of this section if you are going ahead with an
+ <literal>empty</literal> configuration.
+ </para>
+
+ <para>
Please take a look at the template of the configuration file you
created in the previous section.
This file is actually a bash script file to setup various bash
@@ -726,36 +744,45 @@ $
As described in the previous section, you can configure your
<application>Postgres-XL</application> cluster by editing
<filename>pgxc_ctl.conf</filename> or other configuration files manually.
- But editing the file from the scratch can be a mess. It is much better to
- have a separate configuration file. You can create a configuration file
- template by typing
+ But editing the file can be a bit of work. A better way would be to
+ start off with an empty configuration file. The pgxc_ctl utility supports
+ three types of templates as shown below.
<programlisting>
+PGXC$ prepare config empty
+</programlisting>
+or
+<programlisting>
+PGXC$ prepare config minimal
+</programlisting>
+or
+<programlisting>
PGXC$ prepare config
-PGXC$
</programlisting>
</para>
<para>
- You have your <filename>pgxc_ctl.conf</filename> file at
- <filename>$HOME/pgxc_ctl</filename>.
+ The default <filename>pgxc_ctl.conf</filename> file can be found inside the
+ <filename>$HOME/pgxc_ctl</filename> location.
You can edit it to configure your
- <application>Postgres-XL</application> cluster.
- When it messes up, you can again create the template with
+ <application>Postgres-XL</application> cluster or you can choose
+ to start with an empty cluster and add components one-by-one.
+ When the configuration is messed up, you can again create a specific template
+ of your choice with the proper
<command>prepare config</command> command.
- If you want to use other file name, specify the names
- <command>prepare config</command> command option like:
+ You can choose to specify your own custom name for the configuration file
+ like below:
<programlisting>
-PGXC$ prepare config my_config.conf
+PGXC$ prepare config empty my_config.conf
</programlisting>
</para>
<para>
- Then you can edit this file to configure your
- <application>postgres-XL</application> cluster. This file is actually a
- bash script file defining many variables to define the cluster
- configuration. With template values and comments, it will be easy to
- understand what they mean.
+ Then you can edit this file to configure and customize your
+ <application>postgres-XL</application> cluster. This configuration file is basically a
+ bash script file which declares many variables to define the cluster
+ configuration. Although it might seem confusing, but With template values and comments,
+ one can easily understand what each of these variables mean.
</para>
<para>
You can also generate a minimal configuration file, good enough to test
@@ -766,8 +793,8 @@ PGXC$ prepare config minimal
PGXC$ prepare config minimal my_minimal_config.conf
</programlisting>
- The following describes each variable in the order you find in the
- configuration template.
+ Given below is the description of the various variables in the order that they
+ appear in the configuration file.
</para>
<sect3>
@@ -1431,7 +1458,7 @@ PGXC$ prepare config minimal my_minimal_config.conf
<term><option>datanodeNames</option></term>
<listitem>
<para>
- Array to specify Coordinator names.
+ Array to specify Datanode names.
</para>
</listitem>
</varlistentry>
@@ -1594,16 +1621,17 @@ PGXC$ prepare config minimal my_minimal_config.conf
<variablelist>
<varlistentry>
+ <term><literal>add gtm master <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">dir</replaceable></literal></term>
<term><literal>add gtm slave <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">dir</replaceable></literal></term>
<term><literal>add gtm_proxy <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">dir</replaceable></literal></term>
<term><literal>add coordinator master <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable> <replaceable class="parameter">dir</replaceable>< <replaceable class="parameter">extraServerConf</replaceable> <replaceable class="parameter">extraPgHbaConf</replaceable></literal></term>
<term><literal>add coordinator slave <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable> <replaceable class="parameter">dir</replaceable> <replaceable class="parameter">archDir</replaceable></literal></term>
- <term><literal>add datanode master <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable> <replaceable class="parameter">dir</replaceable> <replaceable class="parameter">xlogdir</replaceable> <replaceable class="parameter">restoreDatanode</replaceable> <replaceable class="parameter">extraServerConf</replaceable> <replaceable class="parameter">extraPgHbaConf</replaceable></literal></term>
+ <term><literal>add datanode master <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable> <replaceable class="parameter">dir</replaceable> <replaceable class="parameter">xlogdir</replaceable> <replaceable class="parameter">extraServerConf</replaceable> <replaceable class="parameter">extraPgHbaConf</replaceable></literal></term>
<term><literal>add datanode slave <replaceable class="parameter">name</replaceable> <replaceable class="parameter">host</replaceable> <replaceable class="parameter">port</replaceable> <replaceable class="parameter">pooler</replaceable> <replaceable class="parameter">dir</replaceable> <replaceable class="parameter">xlogdir</replaceable> <replaceable class="parameter">archDir</replaceable></literal></term>
<listitem>
<para>
Add the specified node to your Postgres-XL cluster. Each node needs a
- host name and its work directory. GTM slave, GTM proxy, Coordinator
+ host name and its work directory. GTM master, GTM slave, GTM proxy, Coordinator
master/slave and Datanode master/slave need its own port to listen to.
Coordinators and Datanodes also need a pooler port to pool connections to
Datanodes. Coordinator and Datanode slaves need a directory to receive
@@ -1620,6 +1648,16 @@ PGXC$ prepare config minimal my_minimal_config.conf
<para>
You cannot add slaves without master.
</para>
+ <para>
+ Typically, when you start with an empty configuration file, first you will add your
+ GTM node. Then you will add your first Coordinator master and then the first Datanode master.
+ When you add a Coordinator master and it is the first Coordinator in the cluster, then it
+ starts up on its own with empty node metadata. Otherwise the new Coordinator master connects to any
+ existing Coordinator and gets the existing node metadata of the cluster.
+ When you add a Datanode master and it is the first Datanode, then it connects
+ to any existing Coordinator to get the node metadata. Otherwise the Datanode master
+ connects to any existing Datanode and gets the current metadata from it.
+ </para>
</listitem>
</varlistentry>
@@ -1789,7 +1827,8 @@ PGXC$ prepare config minimal my_minimal_config.conf
</varlistentry>
<varlistentry>
- <term><literal>remove gtm slave</literal></term>
+ <term><literal>remove gtm master [ clean ]</literal></term>
+ <term><literal>remove gtm slave [ clean ]</literal></term>
<term><literal>remove gtm_proxy <replaceable class="parameter">nodename</replaceable> [ clean ]</literal></term>
<term><literal>remove coordinator [ master| slave ] <replaceable class="parameter">nodename</replaceable> [ clean ]</literal></term>
<term><literal>remove datanode [ master| slave ] <replaceable class="parameter">nodename</replaceable> [ clean ]</literal></term>
diff --git a/doc/src/sgml/start.sgml b/doc/src/sgml/start.sgml
index cf285f8777..1bd9c47885 100644
--- a/doc/src/sgml/start.sgml
+++ b/doc/src/sgml/start.sgml
@@ -187,6 +187,604 @@
</para>
</sect1>
+ <sect1 id="tutorial-createcluster">
+ <title>Creating a Postgres-XL cluster</title>
+
+ <para>
+ As mentioned in the architectural fundamentals, <productname>Postgres-XL</productname>
+ is a collection of multiple components. It can be a bit of work to come up with your
+ initial working setup. In this tutorial, we will show how one can start with
+ an <literal>empty</literal> configuration file and use the <application>pgxc_ctl</application>
+ utility to create your <productname>Postgres-XL</productname> cluster from scratch.
+ </para>
+
+ <para>
+ A few pre-requisites are necessary on each node that is going to be a part of the
+ <productname>Postgres-XL</productname> setup.
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ Password-less ssh access is required from the node that is going to run the
+ <application>pgxc_ctl</application> utility.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The PATH environment variable should have the correct <productname>Postgres-XL</productname>
+ binaries on all nodes, especially while running a command via ssh.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The <filename>pg_hba.conf</filename> entries must be updated to allow remote access. Variables
+ like <option>coordPgHbaEntries</option> and <option>datanodePgHbaEntries</option>
+ in the <filename>pgxc_ctl.conf</filename> configuration file may need appropriate changes.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Firewalls and iptables may need to be updated to allow access to ports.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+
+ <para>
+ The <application>pgxc_ctl</application> utility should be present in your PATH. If it is
+ not there, it can be compiled from source.
+<screen>
+<prompt>$</prompt> <userinput>cd $XLSRC/contrib/pgxc_ctl</userinput>
+<prompt>$</prompt> <userinput>make install</userinput>
+</screen>
+
+ We are now ready to prepare our template configuration file. The <application>pgxc_ctl</application>
+ utility allows you to create three types of configuration. We will choose the <literal>empty</literal>
+ configuration which will allow us to create our <productname>Postgres-XL</productname> setup from
+ scratch. Note that we also need to set up the <option>dataDirRoot</option> environment
+ variable properly for all future invocations of <application>pgxc_ctl</application>.
+<screen>
+<prompt>$</prompt> <userinput>export dataDirRoot=$HOME/DATA/pgxl/nodes</userinput>
+<prompt>$</prompt> <userinput>mkdir $HOME/pgxc_ctl</userinput>
+<prompt>$</prompt> <userinput>pgxc_ctl</userinput>
+Installing pgxc_ctl_bash script as /Users/nikhils/pgxc_ctl/pgxc_ctl_bash.
+Installing pgxc_ctl_bash script as /Users/nikhils/pgxc_ctl/pgxc_ctl_bash.
+Reading configuration using /Users/nikhils/pgxc_ctl/pgxc_ctl_bash --home
+/Users/nikhils/pgxc_ctl --configuration
+/Users/nikhils/pgxc_ctl/pgxc_ctl.conf
+Finished reading configuration.
+ ******** PGXC_CTL START ***************
+
+ Current directory: /Users/nikhils/pgxc_ctl
+<prompt>PGXC$ </prompt> <userinput>prepare config empty</userinput>
+<prompt>PGXC$ </prompt> <userinput>exit</userinput>
+</screen>
+
+ The <literal>empty</literal> configuration file is now ready. You should now make changes
+ to the <filename>pgxc_ctl.conf</filename>. At a minimum, <option>pgxcOwner</option>
+ should be set correctly. The configuration file does contain <envar>USERi</> and <envar>HOME</>
+ environment variables to allow easy defaults for the current user.
+ </para>
+
+ <para>
+ The next step is to add the GTM master to the setup.
+<screen>
+<prompt>$</prompt> <userinput>pgxc_ctl</userinput>
+<prompt>PGXC$ </prompt> <userinput>add gtm master gtm localhost 20001 $dataDirRoot/gtm</userinput>
+</screen>
+
+ Use the "monitor" command to check the status of the cluster.
+<screen>
+<prompt>$</prompt> <userinput>pgxc_ctl</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+</screen>
+ </para>
+
+
+ <para>
+ Let us now add a couple of coordinators. When the first coordinator is added, it just
+starts up. When another coordinator is added, it connects to any existing coordinator node
+to fetch the metadata about objects.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add coordinator master coord1 localhost 30001 30011 $dataDirRoot/coord_master.1 none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+<prompt>PGXC$ </prompt> <userinput>add coordinator master coord2 localhost 30002 30012 $dataDirRoot/coord_master.2 none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+</screen>
+ </para>
+
+ <para>
+ Moving on to the addition of a couple of datanodes, now. When the first datanode is added,
+it connects to any existing coordinator node to fetch global metadata. When a subsequent
+datanode is added, it connects to any existing datanode for the metadata.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add datanode master dn1 localhost 40001 40011 $dataDirRoot/dn_master.1 none none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+<prompt>PGXC$ </prompt> <userinput>add datanode master dn2 localhost 40002 40012 $dataDirRoot/dn_master.2 none none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+</screen>
+</para>
+
+<para>
+ Your <productname>Postgres-XL</productname> setup is ready now and you can move on to the next
+ "Getting Started" topic.
+ </para>
+ <para>
+ Read on further, only if you want a quick crash course on the various commands you can
+ try out with <productname>Postgres-XL</productname>. It is strongly recommended to go through
+ the entire documentation for more details on each and every command that we will touch upon
+ below.
+ </para>
+
+ <para>
+ Connect to one of the coordinators and create a test database.
+<screen>
+<prompt>$ </prompt> <userinput>psql -p 30001 postgres</userinput>
+postgres=# CREATE DATABASE testdb;
+CREATE DATABASE
+postgres=# \q
+</screen>
+
+Look at pgxc_node catalog. It should show all the configured nodes. It is normal to have
+negative node id values. This will be fixed soon.
+<screen>
+<prompt>$ </prompt> <userinput>psql -p 30001 testdb</userinput>
+testdb=# SELECT * FROM pgxc_node;
+ node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id
+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ coord1 | C | 30001 | localhost | f | f | 1885696643
+ coord2 | C | 30002 | localhost | f | f | -1197102633
+ dn1 | D | 40001 | localhost | t | t | -560021589
+ dn2 | D | 40002 | localhost | f | t | 352366662
+(4 rows)
+
+</screen>
+
+Let us now create a distributed table, distributed on first column by HASH.
+
+<screen>
+testdb=# CREATE TABLE disttab(col1 int, col2 int, col3 text) DISTRIBUTE BY HASH(col1);
+CREATE TABLE
+testdb=# \d+ disttab
+ Table "public.disttab"
+ Column | Type | Modifiers | Storage | Stats target | Description
+--------+---------+-----------+----------+--------------+-------------
+ col1 | integer | | plain | |
+ col2 | integer | | plain | |
+ col3 | text | | extended | |
+Has OIDs: no
+Distribute By: HASH(col1)
+Location Nodes: ALL DATANODES
+
+</screen>
+
+Also create a replicated table.
+
+<screen>
+testdb=# CREATE TABLE repltab (col1 int, col2 int) DISTRIBUTE BY
+REPLICATION;
+CREATE TABLE
+testdb=# \d+ repltab
+ Table "public.repltab"
+ Column | Type | Modifiers | Storage | Stats target | Description
+--------+---------+-----------+---------+--------------+-------------
+ col1 | integer | | plain | |
+ col2 | integer | | plain | |
+Has OIDs: no
+Distribute By: REPLICATION
+Location Nodes: ALL DATANODES
+
+</screen>
+
+Now insert some sample data in these tables.
+<screen>
+testdb=# INSERT INTO disttab VALUES (generate_series(1,100), generate_series(101, 200), 'foo');
+INSERT 0 100
+testdb=# INSERT INTO repltab VALUES (generate_series(1,100), generate_series(101, 200));
+INSERT 0 100
+
+</screen>
+Ok. So the distributed table should have 100 rows
+
+<screen>
+testdb=# SELECT count(*) FROM disttab;
+ count
+-------
+ 100
+(1 row)
+
+
+</screen>
+
+And they must not be all on the same node. <literal>xc_node_id</> is a system
+column which shows the originating datanode for each row.
+
+Note that the distribution can be slightly uneven because of the HASH
+function
+
+<screen>
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -560021589 | 42
+ 352366662 | 58
+(2 rows)
+
+
+</screen>
+For replicated tables, we expect all rows to come from a single
+datanode (even though the other node has a copy too).
+
+<screen>
+testdb=# SELECT count(*) FROM repltab;
+ count
+-------
+ 100
+(1 row)
+
+testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -560021589 | 100
+(1 row)
+
+</screen>
+
+Now add a new datanode to the cluster.
+
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add datanode master dn3 localhost 40003 40013 $dataDirRoot/dn_master.3 none none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+Running: datanode master dn3
+</screen>
+
+
+Note that during cluster reconfiguration, all outstanding transactions
+are aborted and sessions are reset. So you would typically see errors
+like these on open sessions
+
+<screen>
+testdb=# SELECT * FROM pgxc_node;
+ERROR: canceling statement due to user request <==== pgxc_pool_reload() resets all sessions and aborts all open transactions
+
+testdb=# SELECT * FROM pgxc_node;
+ node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id
+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ coord1 | C | 30001 | localhost | f | f | 1885696643
+ coord2 | C | 30002 | localhost | f | f | -1197102633
+ dn1 | D | 40001 | localhost | t | t | -560021589
+ dn2 | D | 40002 | localhost | f | t | 352366662
+ dn3 | D | 40003 | localhost | f | f | -700122826
+(5 rows)
+</screen>
+
+Note that with new datanode addition, Existing tables are not affected. The distribution information now
+explicitly shows the older datanodes
+<screen>
+testdb=# \d+ disttab
+ Table "public.disttab"
+ Column | Type | Modifiers | Storage | Stats target | Description
+--------+---------+-----------+----------+--------------+-------------
+ col1 | integer | | plain | |
+ col2 | integer | | plain | |
+ col3 | text | | extended | |
+Has OIDs: no
+Distribute By: HASH(col1)
+Location Nodes: dn1, dn2
+
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -560021589 | 42
+ 352366662 | 58
+(2 rows)
+
+testdb=# \d+ repltab
+ Table "public.repltab"
+ Column | Type | Modifiers | Storage | Stats target | Description
+--------+---------+-----------+---------+--------------+-------------
+ col1 | integer | | plain | |
+ col2 | integer | | plain | |
+Has OIDs: no
+Distribute By: REPLICATION
+Location Nodes: dn1, dn2
+</screen>
+
+Let us now try to redistribute tables so that they can take advantage
+of the new datanode
+
+<screen>
+testdb=# ALTER TABLE disttab ADD NODE (dn3);
+ALTER TABLE
+testdb=# \d+ disttab
+ Table "public.disttab"
+ Column | Type | Modifiers | Storage | Stats target | Description
+--------+---------+-----------+----------+--------------+-------------
+ col1 | integer | | plain | |
+ col2 | integer | | plain | |
+ col3 | text | | extended | |
+Has OIDs: no
+Distribute By: HASH(col1)
+Location Nodes: ALL DATANODES
+
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -700122826 | 32
+ 352366662 | 32
+ -560021589 | 36
+(3 rows)
+
+</screen>
+
+Let us now add a third coordinator.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add coordinator master coord3 localhost 30003 30013 $dataDirRoot/coord_master.3 none none</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: coordinator master coord3
+Running: datanode master dn1
+Running: datanode master dn2
+Running: datanode master dn3
+
+testdb=# SELECT * FROM pgxc_node;
+ERROR: canceling statement due to user request
+testdb=# SELECT * FROM pgxc_node;
+ node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id
+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ coord1 | C | 30001 | localhost | f | f | 1885696643
+ coord2 | C | 30002 | localhost | f | f | -1197102633
+ dn1 | D | 40001 | localhost | t | t | -560021589
+ dn2 | D | 40002 | localhost | f | t | 352366662
+ dn3 | D | 40003 | localhost | f | f | -700122826
+ coord3 | C | 30003 | localhost | f | f | 1638403545
+(6 rows)
+
+</screen>
+
+We can try some more ALTER TABLE so as to delete a node from a table
+distribution and add it back
+
+<screen>
+testdb=# ALTER TABLE disttab DELETE NODE (dn1);
+ALTER TABLE
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ 352366662 | 42
+ -700122826 | 58
+(2 rows)
+
+testdb=# ALTER TABLE disttab ADD NODE (dn1);
+ALTER TABLE
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -700122826 | 32
+ 352366662 | 32
+ -560021589 | 36
+(3 rows)
+</screen>
+
+
+You could also alter a replicated table to make it a distributed table.
+Note that even though the cluster has 3 datanodes now, the table will continue
+to use only 2 datanodes where the table was originally replicated on.
+
+<screen>
+testdb=# ALTER TABLE repltab DISTRIBUTE BY HASH(col1);
+ALTER TABLE
+testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -560021589 | 42
+ 352366662 | 58
+(2 rows)
+
+testdb=# ALTER TABLE repltab DISTRIBUTE BY REPLICATION;
+ALTER TABLE
+testdb=# SELECT xc_node_id, count(*) FROM repltab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -560021589 | 100
+(1 row)
+</screen>
+
+Remove the coordinator added previously now. You can use the "clean" option
+to remove the corresponding data directory as well.
+
+<screen>
+<prompt>PGXC$ </prompt> <userinput>remove coordinator master coord3 clean</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+Running: datanode master dn3
+
+testdb=# SELECT oid, * FROM pgxc_node;
+ERROR: canceling statement due to user request
+testdb=# SELECT oid, * FROM pgxc_node;
+ oid | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id
+-------+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ 11197 | coord1 | C | 30001 | localhost | f | f | 1885696643
+ 16384 | coord2 | C | 30002 | localhost | f | f | -1197102633
+ 16385 | dn1 | D | 40001 | localhost | t | t | -560021589
+ 16386 | dn2 | D | 40002 | localhost | f | t | 352366662
+ 16397 | dn3 | D | 40003 | localhost | f | f | -700122826
+(5 rows)
+
+</screen>
+
+Let us try to remove a datanode now. NOTE: <productname>Postgres-XL</productname> does not
+employ any additional checks to ascertain if the datanode being dropped has data from tables
+that are replicated/distributed. It is the responsibility of the user to ensure that it's
+safe to remove a datanode.
+
+You can use the below query to find out if the datanode being removed has any data on it.
+Do note that this only shows tables from the current database. You might want to ensure
+the same for all databases before going ahead with the datanode removal. Use the OID of the
+datanode that is to be removed in the below query:
+
+<screen>
+testdb=# SELECT * FROM pgxc_class WHERE nodeoids::integer[] @> ARRAY[16397];
+ pcrelid | pclocatortype | pcattnum | pchashalgorithm | pchashbuckets | nodeoids
+---------+---------------+----------+-----------------+---------------+-------------------
+ 16388 | H | 1 | 1 | 4096 | 16385 16386 16397
+(1 row)
+
+
+testdb=# ALTER TABLE disttab DELETE NODE (dn3);
+ALTER TABLE
+testdb=# SELECT * FROM pgxc_class WHERE nodeoids::integer[] @> ARRAY[16397];
+ pcrelid | pclocatortype | pcattnum | pchashalgorithm | pchashbuckets | nodeoids
+---------+---------------+----------+-----------------+---------------+----------
+(0 rows)
+</screen>
+
+Ok, it is safe to remove datanode "dn3" now.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>remove datanode master dn3 clean</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+
+testdb=# SELECT oid, * FROM pgxc_node;
+ERROR: canceling statement due to user request
+testdb=# SELECT oid, * FROM pgxc_node;
+ oid | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id
+-------+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ 11197 | coord1 | C | 30001 | localhost | f | f | 1885696643
+ 16384 | coord2 | C | 30002 | localhost | f | f | -1197102633
+ 16385 | dn1 | D | 40001 | localhost | t | t | -560021589
+ 16386 | dn2 | D | 40002 | localhost | f | t | 352366662
+(4 rows)
+
+</screen>
+
+The <application>pgxc_ctl</application> utility can also help in setting up slaves for
+datanodes and coordinators. Let us setup a slave for a datanode and see how failover can
+be performed in case the master datanode goes down.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>add datanode slave dn1 localhost 40101 40111 $dataDirRoot/dn_slave.1 none $dataDirRoot/datanode_archlog.1</userinput>
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode slave dn1
+Running: datanode master dn2
+
+testdb=# EXECUTE DIRECT ON(dn1) 'SELECT client_hostname, state, sync_state FROM pg_stat_replication';
+ client_hostname | state | sync_state
+-----------------+-----------+------------
+ | streaming | async
+(1 row)
+</screen>
+
+Add some more rows to test failover now.
+
+<screen>
+testdb=# INSERT INTO disttab VALUES (generate_series(1001,1100), generate_series(1101, 1200), 'foo');
+INSERT 0 100
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -560021589 | 94
+ 352366662 | 106
+(2 rows)
+</screen>
+
+Let us simulate datanode failover now. We will first stop the datanode master "dn1" for
+which we configured a slave above. Note that since the slave is connected to the master
+we will use "immediate" mode for stopping it.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>stop -m immediate datanode master dn1</userinput>
+</screen>
+
+Since a datanode is down, queries will fail. Though a few queries may still work if
+the failed node is not required to run the query, and that is determined by the
+distribution of the data and the WHERE clause being used.
+
+<screen>
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ERROR: Failed to get pooled connections
+
+testdb=# SELECT xc_node_id, * FROM disttab WHERE col1 = 3;
+ xc_node_id | col1 | col2 | col3
+------------+------+------+------
+ 352366662 | 3 | 103 | foo
+(1 row)
+</screen>
+
+We will now perform the failover and check that everything is working fine post that.
+<screen>
+<prompt>PGXC$ </prompt> <userinput>failover datanode dn1</userinput>
+
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ERROR: canceling statement due to user request
+testdb=# SELECT xc_node_id, count(*) FROM disttab GROUP BY xc_node_id;
+ xc_node_id | count
+------------+-------
+ -560021589 | 94
+ 352366662 | 106
+(2 rows)
+</screen>
+
+
+The pgxc_node catalog now should have updated entries. Especially, the
+failed over datanode node_host and node_port should have been replaced
+with the slave's host and port values.
+
+<screen>
+testdb=# SELECT oid, * FROM pgxc_node;
+ oid | node_name | node_type | node_port | node_host | nodeis_primary | nodeis_preferred | node_id
+-------+-----------+-----------+-----------+-----------+----------------+------------------+-------------
+ 11197 | coord1 | C | 30001 | localhost | f | f | 1885696643
+ 16384 | coord2 | C | 30002 | localhost | f | f | -1197102633
+ 16386 | dn2 | D | 40002 | localhost | f | t | 352366662
+ 16385 | dn1 | D | 40101 | localhost | t | t | -560021589
+(4 rows)
+
+<prompt>PGXC$ </prompt> <userinput>monitor all</userinput>
+Running: gtm master
+Running: coordinator master coord1
+Running: coordinator master coord2
+Running: datanode master dn1
+Running: datanode master dn2
+</screen>
+</para>
+
+ </sect1>
<sect1 id="tutorial-createdb">
<title>Creating a Database</title>
diff --git a/src/backend/pgxc/pool/poolutils.c b/src/backend/pgxc/pool/poolutils.c
index fad39cda9e..9dbf9d81cb 100644
--- a/src/backend/pgxc/pool/poolutils.c
+++ b/src/backend/pgxc/pool/poolutils.c
@@ -90,7 +90,7 @@ pgxc_pool_check(PG_FUNCTION_ARGS)
* to remote nodes. This results in losing prepared and temporary objects
* in all the sessions of server. All the existing transactions are aborted
* and a WARNING message is sent back to client.
- * Session that invocated the reload does the same process, but no WARNING
+ * Session that invoked the reload does the same process, but no WARNING
* message is sent back to client.
*/
Datum