summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc-xc/src/sgml/ref/alter_table.sgmlin224
-rw-r--r--src/backend/access/hash/hashfunc.c81
-rw-r--r--src/backend/catalog/heap.c28
-rw-r--r--src/backend/catalog/pgxc_class.c106
-rw-r--r--src/backend/commands/copy.c22
-rw-r--r--src/backend/commands/tablecmds.c531
-rw-r--r--src/backend/parser/gram.y34
-rw-r--r--src/backend/parser/parse_utilcmd.c4
-rw-r--r--src/backend/pgxc/copy/Makefile2
-rw-r--r--src/backend/pgxc/copy/copyops.c496
-rw-r--r--src/backend/pgxc/copy/remotecopy.c22
-rw-r--r--src/backend/pgxc/locator/Makefile4
-rw-r--r--src/backend/pgxc/locator/locator.c37
-rw-r--r--src/backend/pgxc/locator/redistrib.c871
-rw-r--r--src/backend/pgxc/nodemgr/nodemgr.c3
-rw-r--r--src/backend/pgxc/pool/execRemote.c133
-rw-r--r--src/include/access/hash.h1
-rw-r--r--src/include/catalog/pgxc_class.h25
-rw-r--r--src/include/nodes/parsenodes.h6
-rw-r--r--src/include/pgxc/copyops.h27
-rw-r--r--src/include/pgxc/execRemote.h17
-rw-r--r--src/include/pgxc/locator.h1
-rw-r--r--src/include/pgxc/redistrib.h80
-rw-r--r--src/include/pgxc/remotecopy.h1
-rw-r--r--src/include/utils/rel.h8
-rw-r--r--src/test/regress/expected/xc_alter_table.out408
-rw-r--r--src/test/regress/expected/xc_create_function.out84
-rw-r--r--src/test/regress/sql/xc_alter_table.sql133
-rw-r--r--src/test/regress/sql/xc_create_function.sql85
29 files changed, 3416 insertions, 58 deletions
diff --git a/doc-xc/src/sgml/ref/alter_table.sgmlin b/doc-xc/src/sgml/ref/alter_table.sgmlin
index 3a1f095e15..9116c8313e 100644
--- a/doc-xc/src/sgml/ref/alter_table.sgmlin
+++ b/doc-xc/src/sgml/ref/alter_table.sgmlin
@@ -67,6 +67,10 @@ ALTER TABLE <replaceable class="PARAMETER">name</replaceable>
NOT OF
OWNER TO <replaceable class="PARAMETER">new_owner</replaceable>
SET TABLESPACE <replaceable class="PARAMETER">new_tablespace</replaceable>
+ DISTRIBUTE BY { REPLICATION | ROUND ROBIN | { [HASH | MODULO ] ( <replaceable class="PARAMETER">column_name</replaceable> ) } }
+ TO { GROUP <replaceable class="PARAMETER">groupname</replaceable> | NODE ( <replaceable class="PARAMETER">nodename</replaceable> [, ... ] ) }
+ ADD NODE ( <replaceable class="PARAMETER">nodename</replaceable> [, ... ] )
+ DELETE NODE ( <replaceable class="PARAMETER">nodename</replaceable> [, ... ] )
<phrase>and <replaceable class="PARAMETER">table_constraint_using_index</replaceable> is:</phrase>
@@ -573,6 +577,111 @@ ALTER TABLE <replaceable class="PARAMETER">name</replaceable>
</listitem>
</varlistentry>
+<!## XC>
+ <varlistentry>
+ <term><literal>DISTRIBUTE BY</literal></term>
+ <listitem>
+&xconly;
+ <para>
+ This clause specifies how the table is distributed or replicated among Datanodes.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term><literal>REPLICATION</literal></term>
+ <listitem>
+ <para>
+ Each row of the table will be replicated into all the
+ Datanode of the <productname>Postgres-XC</> database
+ cluster.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>ROUND ROBIN</literal></term>
+ <listitem>
+ <para>
+ Each row of the table will be placed in one of the Datanodes
+ by round-robin manner. The value of the row will not be
+ needed to determine what Datanode to go.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>HASH ( <replaceable class="PARAMETER">column_name</> )</literal></term>
+ <listitem>
+ <para>
+ Each row of the table will be placed based on the hash value
+ of the specified column. Following type is allowed as
+ distribution column: INT8, INT2, OID, INT4, BOOL, INT2VECTOR,
+ OIDVECTOR, CHAR, NAME, TEXT, BPCHAR, BYTEA, VARCHAR, FLOAT4,
+ FLOAT8, NUMERIC, CASH, ABSTIME, RELTIME, DATE, TIME,
+ TIMESTAMP, TIMESTAMPTZ, INTERVAL, and TIMETZ.
+ </para>
+ <para>
+ Please note that floating point is not allowed as a basis of
+ the distribution column.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>MODULO ( <replaceable class="PARAMETER">column_name</> )</literal></term>
+ <listitem>
+ <para>
+ Each row of the table will be placed based on the modulo
+ of the specified column. Following type is allowed as
+ distribution column: INT8, INT2, OID, INT4, BOOL, INT2VECTOR,
+ OIDVECTOR, CHAR, NAME, TEXT, BPCHAR, BYTEA, VARCHAR, FLOAT4,
+ FLOAT8, NUMERIC, CASH, ABSTIME, RELTIME, DATE, TIME,
+ TIMESTAMP, TIMESTAMPTZ, INTERVAL, and TIMETZ.
+ </para>
+ <para>
+ Please note that floating point is not allowed as a basis of
+ the distribution column.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>TO GROUP</literal></term>
+ <term><literal>TO NODE</literal></term>
+ <listitem>
+ <para>
+ This defines the list of nodes on which table data exists.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>ADD NODE</literal></term>
+ <listitem>
+ <para>
+ This adds a list of nodes where data of table is distributed
+ to the existing list. If the list of nodes added contains nodes
+ already used by table, an error is returned.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>DELETE NODE</literal></term>
+ <listitem>
+ <para>
+ This deletes a list of nodes where data of table is distributed
+ to the existing list. If the list of nodes deleted contains nodes
+ not used by table, an error is returned.
+ </para>
+ </listitem>
+ </varlistentry>
+<!## end>
+
</variablelist>
</para>
@@ -789,7 +898,26 @@ ALTER TABLE <replaceable class="PARAMETER">name</replaceable>
</listitem>
</varlistentry>
+<!## XC>
+ <varlistentry>
+ <term><replaceable class="PARAMETER">nodename</replaceable></term>
+ <listitem>
+ <para>
+ It defines a <productname>Postgres-XC</productname> node of catalog pgxc_node.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">groupname</replaceable></term>
+ <listitem>
+ <para>
+ It defines a <productname>Postgres-XC</productname> node group in catalog pgxc_group.
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
+<!## end>
</refsect1>
<refsect1>
@@ -904,10 +1032,74 @@ ALTER TABLE <replaceable class="PARAMETER">name</replaceable>
<!## XC>
&xconly;
<para>
- Please note that except for the column name, you cannot alter
- attribute of table distribution as specified
- with <literal>DISTRIBUTE BY</> clause in <literal>CREATE TABLE</>
- statement.
+ <command>ALTER TABLE</> with clauses <literal>DISTRIBUTE BY</>, <literal>ADD NODE</>,
+ <literal>DELETE NODE</>, <literal>TO NODE</> or <literal>TO GROUP</> is used for data
+ redistribution among nodes specific to <productname>Postgres-XC</>. Those clauses cannot be
+ used with other commands.
+ </para>
+
+ <para>
+ Multiple redistribution scenarios are possible depending on modifications done:
+ <variablelist>
+ <varlistentry>
+ <term>Default redistribution:</term>
+ <listitem>
+ <para>
+ This is the slowest scenario possible. It is done in 3 or 4 steps. Data is firstly
+ saved on Coordinator by fetching all the data with <command>COPY TO</> command. At
+ this point all the tuples are saved using tuple store. The amount of cache allowed for
+ tuple store operation can be controlled with <varname>work_mem</>. Then the table is
+ truncated on all the nodes. Then catalogs are updated. Finally data inside tuple store
+ is redistributed using an internal <command>COPY FROM</> mechanism. <command>REINDEX</>
+ is issued if necessary. The overall performance of this scenario is close to the
+ time necessary to run consecutively <command>COPY TO</> and <command>COPY FROM</>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>Redistribution from replicated to replicated table:</term>
+ <listitem>
+ <para>
+ The node list of a table can have new nodes as well as removed nodes.
+ If nodes are only removed, <command>TRUNCATE</> is launched to remote nodes that are
+ removed. If new nodes are added, then table data is fetch on Coordinator with <command>
+ COPY TO</> and stored inside a tuplestore controlled with <varname>work_mem</>, then
+ data stored is only sent to the new nodes using <command>COPY FROM</> with data stored
+ inside the tuplestore. <command>REINDEX</> is issued if necessary.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>Redistribution from replicated to distributed table:</term>
+ <listitem>
+ <para>
+ If the relation node list contains new nodes, the default redistribution
+ mechanism is used. However, if the node list of relation after redistribution is
+ included in node list of relation after redistribution, as all the tuples are already
+ located on remote nodes, it is not necessary to fetch any data on Coordinator. Hence,
+ <command>DELETE</> is used to remove on remote nodes only the necessary tuples. This
+ query uses selects tuples to remove with conditions based on the number of nodes in node
+ list of relation after redistribution, the <literal>HASH</> or <literal>MODULO</> value
+ used for new distribution and the remote node itself where <command>DELETE</> is launched..
+ <command>REINDEX</> is issued if necessary.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>Redistribution from distributed to replicated table:</term>
+ <listitem>
+ <para>
+ In this case the default redistribution mechanism is used.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+
+ <para>
</para>
<!## end>
</refsect1>
@@ -1055,6 +1247,30 @@ ALTER TABLE distributors DROP CONSTRAINT distributors_pkey,
</programlisting>
</para>
+<!## XC>
+ <para>
+ To change the distribution type and the list of nodes where table data
+ is located:
+<programlisting>
+ALTER TABLE distributors TO NODE (dn1, dn7), DISTRIBUTE BY HASH(dist_id);
+</programlisting>
+ </para>
+
+ <para>
+ To add a node where data of table is distributed:
+<programlisting>
+ALTER TABLE distributors ADD NODE (dn9, dn14);
+</programlisting>
+ </para>
+
+ <para>
+ To remove a node where data of table is distributed:
+<programlisting>
+ALTER TABLE distributors DELETE NODE (dn4, dn0);
+</programlisting>
+ </para>
+<!## end>
+
</refsect1>
<refsect1>
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index 9cb17eb4f7..f4a14e3229 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -531,8 +531,8 @@ hash_uint32(uint32 k)
#ifdef PGXC
/*
- * compute_hash() -- Generaic hash function for all datatypes
- *
+ * compute_hash()
+ * Generic hash function for all datatypes
*/
Datum
compute_hash(Oid type, Datum value, char locator)
@@ -637,4 +637,81 @@ compute_hash(Oid type, Datum value, char locator)
return (Datum)0;
}
+
+/*
+ * get_compute_hash_function
+ * Get hash function name depending on the hash type.
+ * For some cases of hash or modulo distribution, a function might
+ * be required or not.
+ */
+char *
+get_compute_hash_function(Oid type, char locator)
+{
+ switch (type)
+ {
+ case INT8OID:
+ if (locator == LOCATOR_TYPE_HASH)
+ return "hashint8";
+ return NULL;
+ case INT2OID:
+ if (locator == LOCATOR_TYPE_HASH)
+ return "hashint2";
+ return NULL;
+ case OIDOID:
+ if (locator == LOCATOR_TYPE_HASH)
+ return "hashoid";
+ return NULL;
+ case DATEOID:
+ case INT4OID:
+ if (locator == LOCATOR_TYPE_HASH)
+ return "hashint4";
+ return NULL;
+ case BOOLOID:
+ if (locator == LOCATOR_TYPE_HASH)
+ return "hashchar";
+ return NULL;
+ case CHAROID:
+ return "hashchar";
+ case NAMEOID:
+ return "hashname";
+ case INT2VECTOROID:
+ return "hashint2vector";
+ case VARCHAROID:
+ case TEXTOID:
+ return "hashtext";
+ case OIDVECTOROID:
+ return "hashoidvector";
+ case FLOAT4OID:
+ return "hashfloat4";
+ case FLOAT8OID:
+ return "hashfloat8";
+ case RELTIMEOID:
+ case ABSTIMEOID:
+ if (locator == LOCATOR_TYPE_HASH)
+ return "hashint4";
+ return NULL;
+ case CASHOID:
+ return "hashint8";
+ case BPCHAROID:
+ return "hashbpchar";
+ case BYTEAOID:
+ return "hashvarlena";
+ case TIMEOID:
+ return "time_hash";
+ case TIMESTAMPOID:
+ case TIMESTAMPTZOID:
+ return "timestamp_hash";
+ case INTERVALOID:
+ return "interval_hash";
+ case TIMETZOID:
+ return "timetz_hash";
+ case NUMERICOID:
+ return "hash_numeric";
+ default:
+ ereport(ERROR,(errmsg("Unhandled datatype for modulo or hash distribution\n")));
+ }
+
+ /* Keep compiler quiet */
+ return NULL;
+}
#endif
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 18248f4193..f797a0b75f 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -937,13 +937,13 @@ cmp_nodes(const void *p1, const void *p2)
}
/* --------------------------------
- * AddRelationDistribution
+ * AddRelationDistribution
*
* Add to pgxc_class table
* --------------------------------
*/
-void
-AddRelationDistribution(Oid relid,
+void
+AddRelationDistribution(Oid relid,
DistributeBy *distributeby,
PGXCSubCluster *subcluster,
List *parentOids,
@@ -1007,7 +1007,7 @@ GetRelationDistributionItems(Oid relid,
if (!distributeby)
{
- /*
+ /*
* If no distribution was specified, and we have not chosen
* one based on primary key or foreign key, use first column with
* a supported data type.
@@ -1032,9 +1032,9 @@ GetRelationDistributionItems(Oid relid,
if (local_attnum == 0)
local_locatortype = LOCATOR_TYPE_RROBIN;
}
- else
+ else
{
- /*
+ /*
* User specified distribution type
*/
switch (distributeby->disttype)
@@ -1051,12 +1051,12 @@ GetRelationDistributionItems(Oid relid,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("Invalid distribution column specified")));
}
-
+
if (!IsTypeHashDistributable(descriptor->attrs[local_attnum - 1]->atttypid))
{
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("Column %s is not a hash distributable data type",
+ errmsg("Column %s is not a hash distributable data type",
distributeby->colname)));
}
local_locatortype = LOCATOR_TYPE_HASH;
@@ -1108,10 +1108,14 @@ GetRelationDistributionItems(Oid relid,
}
/* Save results */
- *attnum = local_attnum;
- *hashalgorithm = local_hashalgorithm;
- *hashbuckets = local_hashbuckets;
- *locatortype = local_locatortype;
+ if (attnum)
+ *attnum = local_attnum;
+ if (hashalgorithm)
+ *hashalgorithm = local_hashalgorithm;
+ if (hashbuckets)
+ *hashbuckets = local_hashbuckets;
+ if (locatortype)
+ *locatortype = local_locatortype;
}
diff --git a/src/backend/catalog/pgxc_class.c b/src/backend/catalog/pgxc_class.c
index 6d1cf0ed2a..1543a45342 100644
--- a/src/backend/catalog/pgxc_class.c
+++ b/src/backend/catalog/pgxc_class.c
@@ -23,9 +23,13 @@
#include "pgxc/locator.h"
#include "utils/array.h"
+/*
+ * PgxcClassCreate
+ * Create a pgxc_class entry
+ */
void
PgxcClassCreate(Oid pcrelid,
- char pclocatortype,
+ char pclocatortype,
int pcattnum,
int pchashalgorithm,
int pchashbuckets,
@@ -42,7 +46,7 @@ PgxcClassCreate(Oid pcrelid,
/* Build array of Oids to be inserted */
nodes_array = buildoidvector(nodes, numnodes);
- /* Iterate through edb_linkauth attributes initializing nulls and values */
+ /* Iterate through attributes initializing nulls and values */
for (i = 0; i < Natts_pgxc_class; i++)
{
nulls[i] = false;
@@ -81,6 +85,102 @@ PgxcClassCreate(Oid pcrelid,
heap_close(pgxcclassrel, RowExclusiveLock);
}
+
+/*
+ * PgxcClassAlter
+ * Modify a pgxc_class entry with given data
+ */
+void
+PgxcClassAlter(Oid pcrelid,
+ char pclocatortype,
+ int pcattnum,
+ int pchashalgorithm,
+ int pchashbuckets,
+ int numnodes,
+ Oid *nodes,
+ PgxcClassAlterType type)
+{
+ Relation rel;
+ HeapTuple oldtup, newtup;
+ oidvector *nodes_array;
+ Datum new_record[Natts_pgxc_class];
+ bool new_record_nulls[Natts_pgxc_class];
+ bool new_record_repl[Natts_pgxc_class];
+
+ Assert(OidIsValid(pcrelid));
+
+ rel = heap_open(PgxcClassRelationId, RowExclusiveLock);
+ oldtup = SearchSysCacheCopy1(PGXCCLASSRELID,
+ ObjectIdGetDatum(pcrelid));
+
+ if (!HeapTupleIsValid(oldtup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for pgxc_class %u", pcrelid);
+
+ /* Build array of Oids to be inserted */
+ nodes_array = buildoidvector(nodes, numnodes);
+
+ /* Initialize fields */
+ MemSet(new_record, 0, sizeof(new_record));
+ MemSet(new_record_nulls, false, sizeof(new_record_nulls));
+ MemSet(new_record_repl, false, sizeof(new_record_repl));
+
+ /* Fields are updated depending on operation type */
+ switch (type)
+ {
+ case PGXC_CLASS_ALTER_DISTRIBUTION:
+ new_record_repl[Anum_pgxc_class_pclocatortype - 1] = true;
+ new_record_repl[Anum_pgxc_class_pcattnum - 1] = true;
+ new_record_repl[Anum_pgxc_class_pchashalgorithm - 1] = true;
+ new_record_repl[Anum_pgxc_class_pchashbuckets - 1] = true;
+ break;
+ case PGXC_CLASS_ALTER_NODES:
+ new_record_repl[Anum_pgxc_class_nodes - 1] = true;
+ break;
+ case PGXC_CLASS_ALTER_ALL:
+ default:
+ new_record_repl[Anum_pgxc_class_pcrelid - 1] = true;
+ new_record_repl[Anum_pgxc_class_pclocatortype - 1] = true;
+ new_record_repl[Anum_pgxc_class_pcattnum - 1] = true;
+ new_record_repl[Anum_pgxc_class_pchashalgorithm - 1] = true;
+ new_record_repl[Anum_pgxc_class_pchashbuckets - 1] = true;
+ new_record_repl[Anum_pgxc_class_nodes - 1] = true;
+ }
+
+ /* Set up new fields */
+ /* Relation Oid */
+ if (new_record_repl[Anum_pgxc_class_pcrelid - 1])
+ new_record[Anum_pgxc_class_pcrelid - 1] = ObjectIdGetDatum(pcrelid);
+
+ /* Locator type */
+ if (new_record_repl[Anum_pgxc_class_pclocatortype - 1])
+ new_record[Anum_pgxc_class_pclocatortype - 1] = CharGetDatum(pclocatortype);
+
+ /* Attribute number of distribution column */
+ if (new_record_repl[Anum_pgxc_class_pcattnum - 1])
+ new_record[Anum_pgxc_class_pcattnum - 1] = UInt16GetDatum(pcattnum);
+
+ /* Hash algorithm type */
+ if (new_record_repl[Anum_pgxc_class_pchashalgorithm - 1])
+ new_record[Anum_pgxc_class_pchashalgorithm - 1] = UInt16GetDatum(pchashalgorithm);
+
+ /* Hash buckets */
+ if (new_record_repl[Anum_pgxc_class_pchashbuckets - 1])
+ new_record[Anum_pgxc_class_pchashbuckets - 1] = UInt16GetDatum(pchashbuckets);
+
+ /* Node information */
+ if (new_record_repl[Anum_pgxc_class_nodes - 1])
+ new_record[Anum_pgxc_class_nodes - 1] = PointerGetDatum(nodes_array);
+
+ /* Update relation */
+ newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
+ new_record,
+ new_record_nulls, new_record_repl);
+ simple_heap_update(rel, &oldtup->t_self, newtup);
+ CatalogUpdateIndexes(rel, newtup);
+
+ heap_close(rel, RowExclusiveLock);
+}
+
/*
* RemovePGXCClass():
* Remove extended PGXC information
@@ -108,5 +208,3 @@ RemovePgxcClass(Oid pcrelid)
heap_close(relation, RowExclusiveLock);
}
-
-
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 074bf09b39..41e77bc39c 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -1700,15 +1700,26 @@ CopyTo(CopyState cstate)
cstate->remoteCopyState->rel_loc)
{
RemoteCopyData *remoteCopyState = cstate->remoteCopyState;
+ RemoteCopyType remoteCopyType;
+
+ /* Set up remote COPY to correct operation */
+ if (cstate->copy_dest == COPY_FILE)
+ remoteCopyType = REMOTE_COPY_FILE;
+ else
+ remoteCopyType = REMOTE_COPY_STDOUT;
/*
* We don't know the value of the distribution column value, so need to
* read from all nodes. Hence indicate that the value is NULL.
*/
- processed = DataNodeCopyOut(
- GetRelationNodes(remoteCopyState->rel_loc, 0, true, UNKNOWNOID, RELATION_ACCESS_READ),
- remoteCopyState->connections,
- cstate->copy_file);
+ processed = DataNodeCopyOut(GetRelationNodes(remoteCopyState->rel_loc, 0,
+ true, UNKNOWNOID,
+ RELATION_ACCESS_READ),
+ remoteCopyState->connections,
+ NULL,
+ cstate->copy_file,
+ NULL,
+ remoteCopyType);
}
else
{
@@ -4289,9 +4300,8 @@ CreateCopyDestReceiver(void)
static RemoteCopyOptions *
GetRemoteCopyOptions(CopyState cstate)
{
- RemoteCopyOptions *res;
+ RemoteCopyOptions *res = makeRemoteCopyOptions();
Assert(cstate);
- res = (RemoteCopyOptions *) palloc0(sizeof(RemoteCopyOptions));
/* Then fill in structure */
res->rco_binary = cstate->binary;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index b3aaa88541..2cf1ec71b2 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -89,8 +89,11 @@
#ifdef PGXC
#include "pgxc/pgxc.h"
#include "access/gtm.h"
+#include "catalog/pgxc_class.h"
+#include "catalog/pgxc_node.h"
#include "commands/sequence.h"
#include "pgxc/execRemote.h"
+#include "pgxc/redistrib.h"
#endif
/*
@@ -139,7 +142,12 @@ static List *on_commits = NIL;
#define AT_PASS_ADD_INDEX 6 /* ADD indexes */
#define AT_PASS_ADD_CONSTR 7 /* ADD constraints, defaults */
#define AT_PASS_MISC 8 /* other stuff */
+#ifdef PGXC
+#define AT_PASS_DISTRIB 9 /* Redistribution pass */
+#define AT_NUM_PASSES 10
+#else
#define AT_NUM_PASSES 9
+#endif
typedef struct AlteredTableInfo
{
@@ -375,7 +383,14 @@ static void ATExecAddOf(Relation rel, const TypeName *ofTypename, LOCKMODE lockm
static void ATExecDropOf(Relation rel, LOCKMODE lockmode);
static void ATExecGenericOptions(Relation rel, List *options);
#ifdef PGXC
+static void AtExecDistributeBy(Relation rel, DistributeBy *options);
+static void AtExecSubCluster(Relation rel, PGXCSubCluster *options);
+static void AtExecAddNode(Relation rel, List *options);
+static void AtExecDeleteNode(Relation rel, List *options);
static void ATCheckCmd(Relation rel, AlterTableCmd *cmd);
+static RedistribState *BuildRedistribCommands(Oid relid, List *subCmds);
+static Oid *delete_node_list(Oid *old_oids, int old_num, Oid *del_oids, int del_num, int *new_num);
+static Oid *add_node_list(Oid *old_oids, int old_num, Oid *add_oids, int add_num, int *new_num);
#endif
static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
@@ -620,7 +635,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId)
#ifdef PGXC
/*
* Add to pgxc_class.
- * we need to do this after CommandCounterIncrement
+ * we need to do this after CommandCounterIncrement
*/
if (IS_PGXC_COORDINATOR && relkind == RELKIND_RELATION)
{
@@ -2509,7 +2524,17 @@ CheckTableNotInUse(Relation rel, const char *stmt)
* lock level we want as we recurse may well be higher than required for
* that specific subcommand. So we pass down the overall lock requirement,
* rather than reassess it at lower levels.
+ *
+ */
+#ifdef PGXC
+/*
+ * In Postgres-XC, an extension is added to ALTER TABLE for modification
+ * of the data distribution. Depending on the old and new distribution type
+ * of the relation redistributed, a list of redistribution subcommands is built.
+ * Data redistribution cannot be done in parallel of operations that need
+ * the table to be rewritten like column addition/deletion.
*/
+#endif
void
AlterTable(AlterTableStmt *stmt)
{
@@ -2696,6 +2721,15 @@ AlterTableGetLockLevel(List *cmds)
cmd_lockmode = AccessExclusiveLock;
break;
+#ifdef PGXC
+ case AT_DistributeBy: /* Changes table distribution type */
+ case AT_SubCluster: /* Changes node list of distribution */
+ case AT_AddNodeList: /* Adds nodes in distribution */
+ case AT_DeleteNodeList: /* Deletes nodes in distribution */
+ cmd_lockmode = ExclusiveLock;
+ break;
+#endif
+
/*
* These subcommands affect write operations only.
*/
@@ -2819,6 +2853,9 @@ ATController(Relation rel, List *cmds, bool recurse, LOCKMODE lockmode)
{
List *wqueue = NIL;
ListCell *lcmd;
+#ifdef PGXC
+ RedistribState *redistribState = NULL;
+#endif
/* Phase 1: preliminary examination of commands, create work queue */
foreach(lcmd, cmds)
@@ -2833,12 +2870,82 @@ ATController(Relation rel, List *cmds, bool recurse, LOCKMODE lockmode)
ATPrepCmd(&wqueue, rel, cmd, recurse, false, lockmode);
}
+#ifdef PGXC
+ /* Only check that on local Coordinator */
+ if (IS_PGXC_COORDINATOR && !IsConnFromCoord())
+ {
+ ListCell *ltab;
+
+ /*
+ * Redistribution is only applied to the parent table and not subsequent
+ * children. It is also not applied in recursion. This needs to be done
+ * once all the commands have been treated.
+ */
+ foreach(ltab, wqueue)
+ {
+ AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+
+ if (RelationGetRelid(rel) == tab->relid &&
+ list_length(tab->subcmds[AT_PASS_DISTRIB]) > 0)
+ {
+ /*
+ * Check if there are any commands incompatible
+ * with redistribution. For the time being no other commands
+ * are authorized.
+ */
+ if (list_length(tab->subcmds[AT_PASS_ADD_COL]) > 0 ||
+ list_length(tab->subcmds[AT_PASS_DROP]) > 0 ||
+ list_length(tab->subcmds[AT_PASS_ALTER_TYPE]) > 0 ||
+ list_length(tab->subcmds[AT_PASS_OLD_CONSTR]) > 0 ||
+ list_length(tab->subcmds[AT_PASS_COL_ATTRS]) > 0 ||
+ list_length(tab->subcmds[AT_PASS_ADD_COL]) > 0 ||
+ list_length(tab->subcmds[AT_PASS_ADD_INDEX]) > 0 ||
+ list_length(tab->subcmds[AT_PASS_ADD_CONSTR]) > 0 ||
+ list_length(tab->subcmds[AT_PASS_MISC]) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
+ errmsg("Incompatible operation with data redistribution")));
+
+
+ /* Scan redistribution commands and improve operation */
+ redistribState = BuildRedistribCommands(RelationGetRelid(rel),
+ tab->subcmds[AT_PASS_DISTRIB]);
+ break;
+ }
+ }
+ }
+#endif
+
/* Close the relation, but keep lock until commit */
relation_close(rel, NoLock);
+#ifdef PGXC
+ /* Perform pre-catalog-update redistribution operations */
+ PGXCRedistribTable(redistribState, CATALOG_UPDATE_BEFORE);
+#endif
+
/* Phase 2: update system catalogs */
ATRewriteCatalogs(&wqueue, lockmode);
+#ifdef PGXC
+ /* Invalidate cache for redistributed relation */
+ if (redistribState)
+ {
+ Relation rel2 = relation_open(redistribState->relid, NoLock);
+
+ /* Invalidate all entries related to this relation */
+ CacheInvalidateRelcache(rel2);
+
+ /* Make sure locator info is rebuilt */
+ RelationCacheInvalidateEntry(redistribState->relid);
+ relation_close(rel2, NoLock);
+ }
+
+ /* Perform post-catalog-update redistribution operations */
+ PGXCRedistribTable(redistribState, CATALOG_UPDATE_AFTER);
+ FreeRedistribState(redistribState);
+#endif
+
/* Phase 3: scan/rewrite tables as needed */
ATRewriteTables(&wqueue, lockmode);
}
@@ -3060,6 +3167,16 @@ ATPrepCmd(List **wqueue, Relation rel, AlterTableCmd *cmd,
/* No command-specific prep needed */
pass = AT_PASS_MISC;
break;
+#ifdef PGXC
+ case AT_DistributeBy:
+ case AT_SubCluster:
+ case AT_AddNodeList:
+ case AT_DeleteNodeList:
+ ATSimplePermissions(rel, ATT_TABLE);
+ /* No command-specific prep needed */
+ pass = AT_PASS_DISTRIB;
+ break;
+#endif
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
@@ -3327,6 +3444,20 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab, Relation rel,
case AT_GenericOptions:
ATExecGenericOptions(rel, (List *) cmd->def);
break;
+#ifdef PGXC
+ case AT_DistributeBy:
+ AtExecDistributeBy(rel, (DistributeBy *) cmd->def);
+ break;
+ case AT_SubCluster:
+ AtExecSubCluster(rel, (PGXCSubCluster *) cmd->def);
+ break;
+ case AT_AddNodeList:
+ AtExecAddNode(rel, (List *) cmd->def);
+ break;
+ case AT_DeleteNodeList:
+ AtExecDeleteNode(rel, (List *) cmd->def);
+ break;
+#endif
default: /* oops */
elog(ERROR, "unrecognized alter table type: %d",
(int) cmd->subtype);
@@ -3353,6 +3484,17 @@ ATRewriteTables(List **wqueue, LOCKMODE lockmode)
{
AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab);
+#ifdef PGXC
+ /* Forbid table rewrite operations with online data redistribution */
+ if (tab->rewrite &&
+ list_length(tab->subcmds[AT_PASS_DISTRIB]) > 0 &&
+ IS_PGXC_COORDINATOR &&
+ !IsConnFromCoord())
+ ereport(ERROR,
+ (errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
+ errmsg("Incompatible operation with data redistribution")));
+#endif
+
/* Foreign tables have no storage. */
if (tab->relkind == RELKIND_FOREIGN_TABLE)
continue;
@@ -3464,7 +3606,7 @@ ATRewriteTables(List **wqueue, LOCKMODE lockmode)
}
#ifdef PGXC
- /*
+ /*
* In PGXC, do not check the FK constraints on the Coordinator, and just return
* That is because a SELECT is generated whose plan will try and use
* the Datanodes. We (currently) do not want to do that on the Coordinator,
@@ -9180,8 +9322,179 @@ ATExecGenericOptions(Relation rel, List *options)
#ifdef PGXC
/*
+ * ALTER TABLE <name> DISTRIBUTE BY ...
+ */
+static void
+AtExecDistributeBy(Relation rel, DistributeBy *options)
+{
+ Oid relid;
+ char locatortype;
+ int hashalgorithm, hashbuckets;
+ AttrNumber attnum;
+
+ /* Nothing to do on Datanodes */
+ if (IS_PGXC_DATANODE || options == NULL)
+ return;
+
+ relid = RelationGetRelid(rel);
+
+ /* Get necessary distribution information */
+ GetRelationDistributionItems(relid,
+ options,
+ RelationGetDescr(rel),
+ &locatortype,
+ &hashalgorithm,
+ &hashbuckets,
+ &attnum);
+
+ /*
+ * It is not checked if the distribution type list is the same as the old one,
+ * user might define a different sub-cluster at the same time.
+ */
+
+ /* Update pgxc_class entry */
+ PgxcClassAlter(relid,
+ locatortype,
+ (int) attnum,
+ hashalgorithm,
+ hashbuckets,
+ 0,
+ NULL,
+ PGXC_CLASS_ALTER_DISTRIBUTION);
+
+ /* Make the additional catalog changes visible */
+ CommandCounterIncrement();
+}
+
+
+/*
+ * ALTER TABLE <name> TO [ NODE nodelist | GROUP groupname ]
+ */
+static void
+AtExecSubCluster(Relation rel, PGXCSubCluster *options)
+{
+ Oid *nodeoids;
+ int numnodes;
+
+ /* Nothing to do on Datanodes */
+ if (IS_PGXC_DATANODE || options == NULL)
+ return;
+
+ /*
+ * It is not checked if the new subcluster list is the same as the old one,
+ * user might define a different distribution type.
+ */
+
+ /* Obtain new node information */
+ nodeoids = GetRelationDistributionNodes(options, &numnodes);
+
+ /* Update pgxc_class entry */
+ PgxcClassAlter(RelationGetRelid(rel),
+ '\0',
+ 0,
+ 0,
+ 0,
+ numnodes,
+ nodeoids,
+ PGXC_CLASS_ALTER_NODES);
+
+ /* Make the additional catalog changes visible */
+ CommandCounterIncrement();
+}
+
+
+/*
+ * ALTER TABLE <name> ADD NODE nodelist
+ */
+static void
+AtExecAddNode(Relation rel, List *options)
+{
+ Oid *add_oids, *old_oids;
+ int add_num, old_num;
+
+ /* Nothing to do on Datanodes */
+ if (IS_PGXC_DATANODE || options == NIL)
+ return;
+
+ /*
+ * Build a new array of sorted node Oids given the list of name nodes
+ * to be added.
+ */
+ add_oids = BuildRelationDistributionNodes(options, &add_num);
+
+ /*
+ * Then check if nodes to be added are not in existing node
+ * list and build updated list of nodes.
+ */
+ old_num = get_pgxc_classnodes(RelationGetRelid(rel), &old_oids);
+
+ /* Add elements to array */
+ old_oids = add_node_list(old_oids, old_num, add_oids, add_num, &old_num);
+
+ /* Sort once again the newly-created array of node Oids to maintain consistency */
+ old_oids = SortRelationDistributionNodes(old_oids, old_num);
+
+ /* Update pgxc_class entry */
+ PgxcClassAlter(RelationGetRelid(rel),
+ '\0',
+ 0,
+ 0,
+ 0,
+ old_num,
+ old_oids,
+ PGXC_CLASS_ALTER_NODES);
+
+ /* Make the additional catalog changes visible */
+ CommandCounterIncrement();
+}
+
+
+/*
+ * ALTER TABLE <name> DELETE NODE nodelist
+ */
+static void
+AtExecDeleteNode(Relation rel, List *options)
+{
+ Oid *del_oids, *old_oids;
+ int del_num, old_num;
+
+ /* Nothing to do on Datanodes */
+ if (IS_PGXC_DATANODE || options == NIL)
+ return;
+
+ /*
+ * Build a new array of sorted node Oids given the list of name nodes
+ * to be deleted.
+ */
+ del_oids = BuildRelationDistributionNodes(options, &del_num);
+
+ /*
+ * Check if nodes to be deleted are really included in existing
+ * node list and get updated list of nodes.
+ */
+ old_num = get_pgxc_classnodes(RelationGetRelid(rel), &old_oids);
+
+ /* Delete elements on array */
+ old_oids = delete_node_list(old_oids, old_num, del_oids, del_num, &old_num);
+
+ /* Update pgxc_class entry */
+ PgxcClassAlter(RelationGetRelid(rel),
+ '\0',
+ 0,
+ 0,
+ 0,
+ old_num,
+ old_oids,
+ PGXC_CLASS_ALTER_NODES);
+
+ /* Make the additional catalog changes visible */
+ CommandCounterIncrement();
+}
+
+
+/*
* ATCheckCmd
- *
+ *
* Check ALTER TABLE restrictions in Postgres-XC
*/
static void
@@ -9205,6 +9518,218 @@ ATCheckCmd(Relation rel, AlterTableCmd *cmd)
break;
}
}
+
+
+/*
+ * BuildRedistribCommands
+ * Evaluate new and old distribution and build the list of operations
+ * necessary to perform table redistribution.
+ */
+static RedistribState *
+BuildRedistribCommands(Oid relid, List *subCmds)
+{
+ RedistribState *redistribState = makeRedistribState(relid);
+ RelationLocInfo *oldLocInfo, *newLocInfo; /* Former locator info */
+ Relation rel;
+ Oid *new_oid_array; /* Modified list of Oids */
+ int new_num, i; /* Modified number of Oids */
+ ListCell *item;
+
+ /* Get necessary information about relation */
+ rel = relation_open(redistribState->relid, NoLock);
+ oldLocInfo = RelationGetLocInfo(rel);
+ Assert(oldLocInfo);
+
+ /*
+ * Get a copy of the locator information that will be modified by
+ * successive ALTER TABLE commands.
+ */
+ newLocInfo = CopyRelationLocInfo(oldLocInfo);
+ /* The node list of this locator information will be rebuilt after command scan */
+ list_free(newLocInfo->nodeList);
+ newLocInfo->nodeList = NULL;
+
+ /* Get the list to be modified */
+ new_num = get_pgxc_classnodes(RelationGetRelid(rel), &new_oid_array);
+
+ foreach(item, subCmds)
+ {
+ AlterTableCmd *cmd = (AlterTableCmd *) lfirst(item);
+ switch (cmd->subtype)
+ {
+ case AT_DistributeBy:
+ /*
+ * Get necessary distribution information and update to new
+ * distribution type.
+ */
+ GetRelationDistributionItems(redistribState->relid,
+ (DistributeBy *) cmd->def,
+ RelationGetDescr(rel),
+ &(newLocInfo->locatorType),
+ NULL,
+ NULL,
+ (AttrNumber *)&(newLocInfo->partAttrNum));
+ break;
+ case AT_SubCluster:
+ /* Update new list of nodes */
+ new_oid_array = GetRelationDistributionNodes((PGXCSubCluster *) cmd->def, &new_num);
+ break;
+ case AT_AddNodeList:
+ {
+ Oid *add_oids;
+ int add_num;
+ add_oids = BuildRelationDistributionNodes((List *) cmd->def, &add_num);
+ /* Add elements to array */
+ new_oid_array = add_node_list(new_oid_array, new_num, add_oids, add_num, &new_num);
+ }
+ break;
+ case AT_DeleteNodeList:
+ {
+ Oid *del_oids;
+ int del_num;
+ del_oids = BuildRelationDistributionNodes((List *) cmd->def, &del_num);
+ /* Delete elements from array */
+ new_oid_array = delete_node_list(new_oid_array, new_num, del_oids, del_num, &new_num);
+ }
+ break;
+ default:
+ Assert(0); /* Should not happen */
+ }
+ }
+
+ /* Build relation node list for new locator info */
+ for (i = 0; i < new_num; i++)
+ newLocInfo->nodeList = lappend_int(newLocInfo->nodeList,
+ PGXCNodeGetNodeId(new_oid_array[i],
+ PGXC_NODE_DATANODE));
+
+ /* Build the command tree for table redistribution */
+ PGXCRedistribCreateCommandList(redistribState, newLocInfo);
+
+ /* Clean up */
+ FreeRelationLocInfo(newLocInfo);
+ pfree(new_oid_array);
+ relation_close(rel, NoLock);
+
+ return redistribState;
+}
+
+
+/*
+ * Delete from given Oid array old_oids the given oid list del_oids
+ * and build a new one.
+ */
+Oid *
+delete_node_list(Oid *old_oids, int old_num, Oid *del_oids, int del_num, int *new_num)
+{
+ /* Allocate former array and data */
+ Oid *new_oids = old_oids;
+ int loc_new_num = old_num;
+ int i;
+
+ /*
+ * Delete from existing node Oid array the elements to be removed.
+ * An error is returned if an element to be deleted is not in existing array.
+ * It is not necessary to sort once again the result array of node Oids
+ * as here only a deletion of elements is done.
+ */
+ for (i = 0; i < del_num; i++)
+ {
+ Oid nodeoid = del_oids[i];
+ int j, position;
+ bool is_listed = false;
+ position = 0;
+
+ for (j = 0; j < loc_new_num; j++)
+ {
+ /* Check if element can be removed */
+ if (nodeoid == new_oids[j])
+ {
+ is_listed = true;
+ position = j;
+ }
+ }
+
+ /* Move all the elements from [j+1, n-1] to [j, n-2] */
+ if (is_listed)
+ {
+ for (j = position + 1; j < loc_new_num; j++)
+ new_oids[j - 1] = new_oids[j];
+
+ loc_new_num--;
+
+ /* Not possible to have an empty list */
+ if (loc_new_num == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("Node list is empty: one node at least is mandatory")));
+
+ new_oids = (Oid *) repalloc(new_oids, loc_new_num * sizeof(Oid));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("PGXC Node %s: object not in relation node list",
+ get_pgxc_nodename(nodeoid))));
+ }
+
+ /* Save new number of nodes */
+ *new_num = loc_new_num;
+ return new_oids;
+}
+
+
+/*
+ * Add to given Oid array old_oids the given oid list add_oids
+ * and build a new one.
+ */
+Oid *
+add_node_list(Oid *old_oids, int old_num, Oid *add_oids, int add_num, int *new_num)
+{
+ /* Allocate former array and data */
+ Oid *new_oids = old_oids;
+ int loc_new_num = old_num;
+ int i;
+
+ /*
+ * Build new Oid list, both addition and old list are already sorted.
+ * The idea here is to go through the list of nodes to be added and
+ * add the elements one-by-one on the existing list.
+ * An error is returned if an element to be added already exists
+ * in relation node array.
+ * Here we do O(n^2) scan to avoid a dependency with the way
+ * oids are sorted by heap APIs. They are sorted once again once
+ * the addition operation is completed.
+ */
+ for (i = 0; i < add_num; i++)
+ {
+ Oid nodeoid = add_oids[i];
+ int j;
+
+ /* Check if element is already a part of array */
+ for (j = 0; j < loc_new_num; j++)
+ {
+ /* Item is already in node list */
+ if (nodeoid == new_oids[j])
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("PGXC Node %s: object already in relation node list",
+ get_pgxc_nodename(nodeoid))));
+ }
+
+ /* If we are here, element can be added safely in node array */
+ loc_new_num++;
+ new_oids = (Oid *) repalloc(new_oids, loc_new_num * sizeof(Oid));
+ new_oids[loc_new_num - 1] = nodeoid;
+ }
+
+ /* Sort once again the newly-created array of node Oids to maintain consistency */
+ new_oids = SortRelationDistributionNodes(new_oids, loc_new_num);
+
+ /* Save new number of nodes */
+ *new_num = loc_new_num;
+ return new_oids;
+}
#endif
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index c87fdbf3d9..7b6050e4f4 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -2038,6 +2038,40 @@ alter_table_cmd:
n->def = (Node *)$1;
$$ = (Node *) n;
}
+/* PGXC_BEGIN */
+ /* ALTER TABLE <name> DISTRIBUTE BY ... */
+ | OptDistributeByInternal
+ {
+ AlterTableCmd *n = makeNode(AlterTableCmd);
+ n->subtype = AT_DistributeBy;
+ n->def = (Node *)$1;
+ $$ = (Node *)n;
+ }
+ /* ALTER TABLE <name> TO [ NODE (nodelist) | GROUP groupname ] */
+ | OptSubClusterInternal
+ {
+ AlterTableCmd *n = makeNode(AlterTableCmd);
+ n->subtype = AT_SubCluster;
+ n->def = (Node *)$1;
+ $$ = (Node *)n;
+ }
+ /* ALTER TABLE <name> ADD NODE (nodelist) */
+ | ADD_P NODE pgxcnodes
+ {
+ AlterTableCmd *n = makeNode(AlterTableCmd);
+ n->subtype = AT_AddNodeList;
+ n->def = (Node *)$3;
+ $$ = (Node *)n;
+ }
+ /* ALTER TABLE <name> DELETE NODE (nodelist) */
+ | DELETE_P NODE pgxcnodes
+ {
+ AlterTableCmd *n = makeNode(AlterTableCmd);
+ n->subtype = AT_DeleteNodeList;
+ n->def = (Node *)$3;
+ $$ = (Node *)n;
+ }
+/* PGXC_END */
;
alter_column_default:
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index edd4a104e0..f98e6ea59d 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -93,7 +93,8 @@ typedef struct
IndexStmt *pkey; /* PRIMARY KEY index, if any */
#ifdef PGXC
char *fallback_dist_col; /* suggested column to distribute on */
- DistributeBy *distributeby; /* original distribute by column in create table */
+ DistributeBy *distributeby; /* original distribute by column of CREATE TABLE */
+ PGXCSubCluster *subcluster; /* original subcluster option of CREATE TABLE */
#endif
} CreateStmtContext;
@@ -2415,6 +2416,7 @@ transformAlterTableStmt(AlterTableStmt *stmt, const char *queryString)
#ifdef PGXC
cxt.fallback_dist_col = NULL;
cxt.distributeby = NULL;
+ cxt.subcluster = NULL;
#endif
/*
diff --git a/src/backend/pgxc/copy/Makefile b/src/backend/pgxc/copy/Makefile
index a8cfbd86da..2ddcc904b3 100644
--- a/src/backend/pgxc/copy/Makefile
+++ b/src/backend/pgxc/copy/Makefile
@@ -14,6 +14,6 @@ subdir = src/backend/pgxc/copy
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = remotecopy.o
+OBJS = copyops.o remotecopy.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/pgxc/copy/copyops.c b/src/backend/pgxc/copy/copyops.c
new file mode 100644
index 0000000000..a85a06cc09
--- /dev/null
+++ b/src/backend/pgxc/copy/copyops.c
@@ -0,0 +1,496 @@
+/*-------------------------------------------------------------------------
+ *
+ * copyops.c
+ * Functions related to remote COPY data manipulation and materialization
+ * of data redistribution
+ *
+ * Copyright (c) 2010-2012 Postgres-XC Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/pgxc/copy/copyops.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "miscadmin.h"
+#include "fmgr.h"
+#include "lib/stringinfo.h"
+#include "mb/pg_wchar.h"
+#include "pgxc/copyops.h"
+#include "utils/lsyscache.h"
+
+/* NULL print marker */
+#define COPYOPS_NULL_PRINT "\\N"
+
+/* Some octal operations */
+#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
+#define OCTVALUE(c) ((c) - '0')
+/* Send text representation of one attribute, with conversion and escaping */
+#define DUMPSOFAR() \
+ do { \
+ if (ptr > start) \
+ appendBinaryStringInfo(buf, (char *) start, ptr - start); \
+ } while (0)
+
+
+static int get_decimal_from_hex(char hex);
+static void attribute_out_text(StringInfo buf, char *string);
+
+/*
+ * Return decimal value for a hexadecimal digit
+ */
+static int
+get_decimal_from_hex(char hex)
+{
+ if (isdigit((unsigned char) hex))
+ return hex - '0';
+ else
+ return tolower((unsigned char) hex) - 'a' + 10;
+}
+
+
+/*
+ * Output an attribute to text
+ * This takes portions of the code of CopyAttributeOutText
+ */
+static void
+attribute_out_text(StringInfo buf, char *string)
+{
+ char *ptr;
+ char c;
+ char *start;
+ char delimc = COPYOPS_DELIMITER;
+ bool need_transcoding, encoding_embeds_ascii;
+ int file_encoding = pg_get_client_encoding();
+
+ need_transcoding = (file_encoding != GetDatabaseEncoding() ||
+ pg_database_encoding_max_length() > 1);
+ encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(file_encoding);
+
+ if (need_transcoding)
+ ptr = pg_server_to_any(string, strlen(string), file_encoding);
+ else
+ ptr = string;
+
+ /*
+ * We have to grovel through the string searching for control characters
+ * and instances of the delimiter character. In most cases, though, these
+ * are infrequent. To avoid overhead from calling CopySendData once per
+ * character, we dump out all characters between escaped characters in a
+ * single call. The loop invariant is that the data from "start" to "ptr"
+ * can be sent literally, but hasn't yet been.
+ *
+ * We can skip pg_encoding_mblen() overhead when encoding is safe, because
+ * in valid backend encodings, extra bytes of a multibyte character never
+ * look like ASCII. This loop is sufficiently performance-critical that
+ * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
+ * of the normal safe-encoding path.
+ */
+ if (encoding_embeds_ascii)
+ {
+ start = ptr;
+ while ((c = *ptr) != '\0')
+ {
+ if ((unsigned char) c < (unsigned char) 0x20)
+ {
+ /*
+ * \r and \n must be escaped, the others are traditional. We
+ * prefer to dump these using the C-like notation, rather than
+ * a backslash and the literal character, because it makes the
+ * dump file a bit more proof against Microsoftish data
+ * mangling.
+ */
+ switch (c)
+ {
+ case '\b':
+ c = 'b';
+ break;
+ case '\f':
+ c = 'f';
+ break;
+ case '\n':
+ c = 'n';
+ break;
+ case '\r':
+ c = 'r';
+ break;
+ case '\t':
+ c = 't';
+ break;
+ case '\v':
+ c = 'v';
+ break;
+ default:
+ /* If it's the delimiter, must backslash it */
+ if (c == delimc)
+ break;
+ /* All ASCII control chars are length 1 */
+ ptr++;
+ continue; /* fall to end of loop */
+ }
+
+ /* if we get here, we need to convert the control char */
+ DUMPSOFAR();
+ appendStringInfoCharMacro(buf, '\\');
+ appendStringInfoCharMacro(buf, c);
+ start = ++ptr;
+ }
+ else if (c == '\\' || c == delimc)
+ {
+ DUMPSOFAR();
+ appendStringInfoCharMacro(buf, '\\');
+ start = ++ptr;
+ }
+ else if (IS_HIGHBIT_SET(c))
+ ptr += pg_encoding_mblen(file_encoding, ptr);
+ else
+ ptr++;
+ }
+ }
+ else
+ {
+ start = ptr;
+ while ((c = *ptr) != '\0')
+ {
+ if ((unsigned char) c < (unsigned char) 0x20)
+ {
+ /*
+ * \r and \n must be escaped, the others are traditional. We
+ * prefer to dump these using the C-like notation, rather than
+ * a backslash and the literal character, because it makes the
+ * dump file a bit more proof against Microsoftish data
+ * mangling.
+ */
+ switch (c)
+ {
+ case '\b':
+ c = 'b';
+ break;
+ case '\f':
+ c = 'f';
+ break;
+ case '\n':
+ c = 'n';
+ break;
+ case '\r':
+ c = 'r';
+ break;
+ case '\t':
+ c = 't';
+ break;
+ case '\v':
+ c = 'v';
+ break;
+ default:
+ /* If it's the delimiter, must backslash it */
+ if (c == delimc)
+ break;
+ /* All ASCII control chars are length 1 */
+ ptr++;
+ continue; /* fall to end of loop */
+ }
+ /* if we get here, we need to convert the control char */
+ DUMPSOFAR();
+ appendStringInfoCharMacro(buf, '\\');
+ appendStringInfoCharMacro(buf, c);
+ start = ++ptr;
+ }
+ else if (c == '\\' || c == delimc)
+ {
+ DUMPSOFAR();
+ appendStringInfoCharMacro(buf, '\\');
+ start = ++ptr;
+ }
+ else
+ ptr++;
+ }
+ }
+
+ DUMPSOFAR();
+}
+
+
+/*
+ * CopyOps_RawDataToArrayField
+ * Convert the raw output of COPY TO to an array of fields.
+ * This is a simplified version of CopyReadAttributesText used for data
+ * redistribution and storage of tuple data into a tuple store.
+ */
+char **
+CopyOps_RawDataToArrayField(TupleDesc tupdesc, char *message, int len)
+{
+ char delimc = COPYOPS_DELIMITER;
+ int fieldno;
+ int null_print_len = strlen(COPYOPS_NULL_PRINT);
+ char *origin_ptr;
+ char *output_ptr;
+ char *cur_ptr;
+ char *line_end_ptr;
+ int fields = tupdesc->natts;
+ char **raw_fields;
+ Form_pg_attribute *attr = tupdesc->attrs;
+
+ /* Adjust number of fields depending on dropped attributes */
+ for (fieldno = 0; fieldno < tupdesc->natts; fieldno++)
+ {
+ if (attr[fieldno]->attisdropped)
+ fields--;
+ }
+
+ /* Then alloc necessary space */
+ raw_fields = (char **) palloc(fields * sizeof(char *));
+
+ /* Take a copy of message to manipulate */
+ origin_ptr = (char *) palloc0(sizeof(char) * (len + 1));
+ memcpy(origin_ptr, message, len + 1);
+
+ /* Add clean separator '\0' at the end of message */
+ origin_ptr[len] = '\0';
+
+ /* Keep track of original pointer */
+ output_ptr = origin_ptr;
+
+ /* set pointer variables for loop */
+ cur_ptr = message;
+ line_end_ptr = message + len;
+
+ /* Outer loop iterates over fields */
+ fieldno = 0;
+ for (;;)
+ {
+ char *start_ptr;
+ char *end_ptr;
+ int input_len;
+ bool found_delim = false;
+ bool saw_non_ascii = false;
+
+ /* Make sure there is enough space for the next value */
+ if (fieldno >= fields)
+ {
+ fields *= 2;
+ raw_fields = repalloc(raw_fields, fields * sizeof(char *));
+ }
+
+ /* Remember start of field on output side */
+ start_ptr = cur_ptr;
+ raw_fields[fieldno] = output_ptr;
+
+ /* Scan data for field */
+ for (;;)
+ {
+ char c;
+
+ end_ptr = cur_ptr;
+ if (cur_ptr >= line_end_ptr)
+ break;
+ c = *cur_ptr++;
+ if (c == delimc)
+ {
+ found_delim = true;
+ break;
+ }
+ if (c == '\\')
+ {
+ if (cur_ptr >= line_end_ptr)
+ break;
+ c = *cur_ptr++;
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ {
+ /* handle \013 */
+ int val;
+
+ val = OCTVALUE(c);
+ if (cur_ptr < line_end_ptr)
+ {
+ c = *cur_ptr;
+ if (ISOCTAL(c))
+ {
+ cur_ptr++;
+ val = (val << 3) + OCTVALUE(c);
+ if (cur_ptr < line_end_ptr)
+ {
+ c = *cur_ptr;
+ if (ISOCTAL(c))
+ {
+ cur_ptr++;
+ val = (val << 3) + OCTVALUE(c);
+ }
+ }
+ }
+ }
+ c = val & 0377;
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
+ }
+ break;
+ case 'x':
+ /* Handle \x3F */
+ if (cur_ptr < line_end_ptr)
+ {
+ char hexchar = *cur_ptr;
+
+ if (isxdigit((unsigned char) hexchar))
+ {
+ int val = get_decimal_from_hex(hexchar);
+
+ cur_ptr++;
+ if (cur_ptr < line_end_ptr)
+ {
+ hexchar = *cur_ptr;
+ if (isxdigit((unsigned char) hexchar))
+ {
+ cur_ptr++;
+ val = (val << 4) + get_decimal_from_hex(hexchar);
+ }
+ }
+ c = val & 0xff;
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
+ }
+ }
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+
+ /*
+ * in all other cases, take the char after '\'
+ * literally
+ */
+ }
+ }
+
+ /* Add c to output string */
+ *output_ptr++ = c;
+ }
+
+ /* Terminate attribute value in output area */
+ *output_ptr++ = '\0';
+
+ /*
+ * If we de-escaped a non-7-bit-ASCII char, make sure we still have
+ * valid data for the db encoding. Avoid calling strlen here for the
+ * sake of efficiency.
+ */
+ if (saw_non_ascii)
+ {
+ char *fld = raw_fields[fieldno];
+
+ pg_verifymbstr(fld, output_ptr - (fld + 1), false);
+ }
+
+ /* Check whether raw input matched null marker */
+ input_len = end_ptr - start_ptr;
+ if (input_len == null_print_len &&
+ strncmp(start_ptr, COPYOPS_NULL_PRINT, input_len) == 0)
+ raw_fields[fieldno] = NULL;
+
+ fieldno++;
+ /* Done if we hit EOL instead of a delim */
+ if (!found_delim)
+ break;
+ }
+
+ /* Clean up state of attribute_buf */
+ output_ptr--;
+ Assert(*output_ptr == '\0');
+
+ return raw_fields;
+}
+
+/*
+ * CopyOps_BuildOneRowTo
+ * Build one row message to be sent to remote nodes through COPY protocol
+ */
+char *
+CopyOps_BuildOneRowTo(TupleDesc tupdesc, Datum *values, bool *nulls, int *len)
+{
+ bool need_delim = false;
+ char *res;
+ int i;
+ FmgrInfo *out_functions;
+ Form_pg_attribute *attr = tupdesc->attrs;
+ StringInfo buf;
+
+ /* Get info about the columns we need to process. */
+ out_functions = (FmgrInfo *) palloc(tupdesc->natts * sizeof(FmgrInfo));
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ Oid out_func_oid;
+ bool isvarlena;
+
+ /* Do not need any information for dropped attributes */
+ if (attr[i]->attisdropped)
+ continue;
+
+ getTypeOutputInfo(attr[i]->atttypid,
+ &out_func_oid,
+ &isvarlena);
+ fmgr_info(out_func_oid, &out_functions[i]);
+ }
+
+ /* Initialize output buffer */
+ buf = makeStringInfo();
+
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ Datum value = values[i];
+ bool isnull = nulls[i];
+
+ /* Do not need any information for dropped attributes */
+ if (attr[i]->attisdropped)
+ continue;
+
+ if (need_delim)
+ appendStringInfoCharMacro(buf, COPYOPS_DELIMITER);
+ need_delim = true;
+
+ if (isnull)
+ {
+ /* Null print value to client */
+ appendBinaryStringInfo(buf, "\\N", strlen("\\N"));
+ }
+ else
+ {
+ char *string;
+ string = OutputFunctionCall(&out_functions[i],
+ value);
+ attribute_out_text(buf, string);
+ pfree(string);
+ }
+ }
+
+ /* Record length of message */
+ *len = buf->len;
+ res = pstrdup(buf->data);
+ pfree(out_functions);
+ pfree(buf->data);
+ pfree(buf);
+ return res;
+}
diff --git a/src/backend/pgxc/copy/remotecopy.c b/src/backend/pgxc/copy/remotecopy.c
index 8c3eba0bff..5c0299dc64 100644
--- a/src/backend/pgxc/copy/remotecopy.c
+++ b/src/backend/pgxc/copy/remotecopy.c
@@ -167,7 +167,6 @@ RemoteCopy_BuildStatement(RemoteCopyData *state,
else
appendStringInfoString(&state->query_buf, " TO STDOUT");
-
if (options->rco_binary)
appendStringInfoString(&state->query_buf, " BINARY");
@@ -201,7 +200,6 @@ RemoteCopy_BuildStatement(RemoteCopyData *state,
* It is not necessary to send the HEADER part to Datanodes.
* Sending data is sufficient.
*/
-
if (options->rco_quote && options->rco_quote[0] != '"')
{
appendStringInfoString(&state->query_buf, " QUOTE AS ");
@@ -245,6 +243,26 @@ RemoteCopy_BuildStatement(RemoteCopyData *state,
/*
+ * Build a default set for RemoteCopyOptions
+ */
+RemoteCopyOptions *
+makeRemoteCopyOptions(void)
+{
+ RemoteCopyOptions *res = (RemoteCopyOptions *) palloc(sizeof(RemoteCopyOptions));
+ res->rco_binary = false;
+ res->rco_oids = false;
+ res->rco_csv_mode = false;
+ res->rco_delim = NULL;
+ res->rco_null_print = NULL;
+ res->rco_quote = NULL;
+ res->rco_escape = NULL;
+ res->rco_force_quote = NIL;
+ res->rco_force_notnull = NIL;
+ return res;
+}
+
+
+/*
* FreeRemoteCopyOptions
* Free remote COPY options structure
*/
diff --git a/src/backend/pgxc/locator/Makefile b/src/backend/pgxc/locator/Makefile
index 107fe0f601..66c4c50d2d 100644
--- a/src/backend/pgxc/locator/Makefile
+++ b/src/backend/pgxc/locator/Makefile
@@ -1,7 +1,7 @@
#-------------------------------------------------------------------------
#
# Makefile--
-# Makefile for locator
+# Makefile for locator and data distribution
#
# Copyright(C) 2010-2012 Postgres-XC Development Group
#
@@ -14,6 +14,6 @@ subdir = src/backend/pgxc/locator
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = locator.o
+OBJS = locator.o redistrib.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c
index feab0a1f9e..b5b920a443 100644
--- a/src/backend/pgxc/locator/locator.c
+++ b/src/backend/pgxc/locator/locator.c
@@ -440,7 +440,6 @@ IsModuloColumn(RelationLocInfo *rel_loc_info, char *part_col_name)
/*
* IsModuloColumnForRelId - return whether or not column for relation is used for modulo distribution.
- *
*/
bool
IsModuloColumnForRelId(Oid relid, char *part_col_name)
@@ -502,6 +501,42 @@ IsTableDistOnPrimary(RelationLocInfo *rel_loc_info)
return false;
}
+
+/*
+ * IsLocatorInfoEqual
+ * Check equality of given locator information
+ */
+bool
+IsLocatorInfoEqual(RelationLocInfo *rel_loc_info1, RelationLocInfo *rel_loc_info2)
+{
+ List *nodeList1, *nodeList2;
+ Assert(rel_loc_info1 && rel_loc_info2);
+
+ nodeList1 = rel_loc_info1->nodeList;
+ nodeList2 = rel_loc_info2->nodeList;
+
+ /* Same relation? */
+ if (rel_loc_info1->relid != rel_loc_info2->relid)
+ return false;
+
+ /* Same locator type? */
+ if (rel_loc_info1->locatorType != rel_loc_info2->locatorType)
+ return false;
+
+ /* Same attribute number? */
+ if (rel_loc_info1->partAttrNum != rel_loc_info2->partAttrNum)
+ return false;
+
+ /* Same node list? */
+ if (list_difference_int(nodeList1, nodeList2) != NIL ||
+ list_difference_int(nodeList2, nodeList1) != NIL)
+ return false;
+
+ /* Everything is equal */
+ return true;
+}
+
+
/*
* GetRelationNodes
*
diff --git a/src/backend/pgxc/locator/redistrib.c b/src/backend/pgxc/locator/redistrib.c
new file mode 100644
index 0000000000..264f01b1d1
--- /dev/null
+++ b/src/backend/pgxc/locator/redistrib.c
@@ -0,0 +1,871 @@
+/*-------------------------------------------------------------------------
+ *
+ * redistrib.c
+ * Routines related to online data redistribution
+ *
+ * Copyright (c) 2010-2012 Postgres-XC Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/pgxc/locator/redistrib.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+#include "miscadmin.h"
+
+#include "access/hash.h"
+#include "access/htup.h"
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "catalog/pgxc_node.h"
+#include "commands/tablecmds.h"
+#include "pgxc/copyops.h"
+#include "pgxc/execRemote.h"
+#include "pgxc/pgxc.h"
+#include "pgxc/redistrib.h"
+#include "pgxc/remotecopy.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+#define IsCommandTypePreUpdate(x) (x == CATALOG_UPDATE_BEFORE || \
+ x == CATALOG_UPDATE_BOTH)
+#define IsCommandTypePostUpdate(x) (x == CATALOG_UPDATE_AFTER || \
+ x == CATALOG_UPDATE_BOTH)
+
+/* Functions used for the execution of redistribution commands */
+static void distrib_execute_query(char *sql, bool is_temp, ExecNodes *exec_nodes);
+static void distrib_execute_command(RedistribState *distribState, RedistribCommand *command);
+static void distrib_copy_to(RedistribState *distribState);
+static void distrib_copy_from(RedistribState *distribState, ExecNodes *exec_nodes);
+static void distrib_truncate(RedistribState *distribState, ExecNodes *exec_nodes);
+static void distrib_reindex(RedistribState *distribState, ExecNodes *exec_nodes);
+static void distrib_delete_hash(RedistribState *distribState, ExecNodes *exec_nodes);
+
+/* Functions used to build the command list */
+static void pgxc_redist_build_entry(RedistribState *distribState,
+ RelationLocInfo *oldLocInfo,
+ RelationLocInfo *newLocInfo);
+static void pgxc_redist_build_replicate(RedistribState *distribState,
+ RelationLocInfo *oldLocInfo,
+ RelationLocInfo *newLocInfo);
+static void pgxc_redist_build_replicate_to_distrib(RedistribState *distribState,
+ RelationLocInfo *oldLocInfo,
+ RelationLocInfo *newLocInfo);
+
+static void pgxc_redist_build_default(RedistribState *distribState);
+static void pgxc_redist_add_reindex(RedistribState *distribState);
+
+
+/*
+ * PGXCRedistribTable
+ * Execute redistribution operations after catalog update
+ */
+void
+PGXCRedistribTable(RedistribState *distribState, RedistribCatalog type)
+{
+ ListCell *item;
+
+ /* Nothing to do if no redistribution operation */
+ if (!distribState)
+ return;
+
+ /* Nothing to do if on remote node */
+ if (IS_PGXC_DATANODE || IsConnFromCoord())
+ return;
+
+ /* Execute each command if necessary */
+ foreach(item, distribState->commands)
+ {
+ RedistribCommand *command = (RedistribCommand *)lfirst(item);
+
+ /* Check if command can be run */
+ if (!IsCommandTypePostUpdate(type) &&
+ IsCommandTypePostUpdate(command->updateState))
+ continue;
+ if (!IsCommandTypePreUpdate(type) &&
+ IsCommandTypePreUpdate(command->updateState))
+ continue;
+
+ /* Now enter in execution list */
+ distrib_execute_command(distribState, command);
+ }
+}
+
+
+/*
+ * PGXCRedistribCreateCommandList
+ * Look for the list of necessary commands to perform table redistribution.
+ */
+void
+PGXCRedistribCreateCommandList(RedistribState *distribState, RelationLocInfo *newLocInfo)
+{
+ Relation rel;
+ RelationLocInfo *oldLocInfo;
+
+ rel = relation_open(distribState->relid, NoLock);
+ oldLocInfo = RelationGetLocInfo(rel);
+
+ /* Build redistribution command list */
+ pgxc_redist_build_entry(distribState, oldLocInfo, newLocInfo);
+
+ relation_close(rel, NoLock);
+}
+
+
+/*
+ * pgxc_redist_build_entry
+ * Entry point for command list building
+ */
+static void
+pgxc_redist_build_entry(RedistribState *distribState,
+ RelationLocInfo *oldLocInfo,
+ RelationLocInfo *newLocInfo)
+{
+ /* If distribution has not changed at all, nothing to do */
+ if (IsLocatorInfoEqual(oldLocInfo, newLocInfo))
+ return;
+
+ /* Evaluate cases for replicated tables */
+ pgxc_redist_build_replicate(distribState, oldLocInfo, newLocInfo);
+
+ /* Evaluate cases for replicated to distributed tables */
+ pgxc_redist_build_replicate_to_distrib(distribState, oldLocInfo, newLocInfo);
+
+ /* PGXCTODO: perform more complex builds of command list */
+
+ /* Fallback to default */
+ pgxc_redist_build_default(distribState);
+}
+
+
+/*
+ * pgxc_redist_build_replicate_to_distrib
+ * Build redistribution command list from replicated to distributed
+ * table.
+ */
+static void
+pgxc_redist_build_replicate_to_distrib(RedistribState *distribState,
+ RelationLocInfo *oldLocInfo,
+ RelationLocInfo *newLocInfo)
+{
+ List *removedNodes;
+ List *newNodes;
+
+ /* If a command list has already been built, nothing to do */
+ if (list_length(distribState->commands) != 0)
+ return;
+
+ /* Redistribution is done from replication to distributed (with value) */
+ if (!IsLocatorReplicated(oldLocInfo->locatorType) ||
+ !IsLocatorDistributedByValue(newLocInfo->locatorType))
+ return;
+
+ /* Get the list of nodes that are added to the relation */
+ removedNodes = list_difference_int(oldLocInfo->nodeList, newLocInfo->nodeList);
+
+ /* Get the list of nodes that are removed from relation */
+ newNodes = list_difference_int(newLocInfo->nodeList, oldLocInfo->nodeList);
+
+ /*
+ * If some nodes are added, turn back to default, we need to fetch data
+ * and then redistribute it properly.
+ */
+ if (newNodes != NIL)
+ return;
+
+ /* Nodes removed have to be truncated, so add a TRUNCATE commands to removed nodes */
+ if (removedNodes != NIL)
+ {
+ ExecNodes *execNodes = makeNode(ExecNodes);
+ execNodes->nodeList = removedNodes;
+ /* Add TRUNCATE command */
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_TRUNCATE, CATALOG_UPDATE_BEFORE, execNodes));
+ }
+
+ /*
+ * If the table is redistributed to a single node, a TRUNCATE on removed nodes
+ * is sufficient so leave here.
+ */
+ if (list_length(newLocInfo->nodeList) == 1)
+ {
+ /* Add REINDEX command if necessary */
+ pgxc_redist_add_reindex(distribState);
+ return;
+ }
+
+ /*
+ * If we are here we are sure that redistribution only requires to delete data on remote
+ * nodes on the new subset of nodes. So launch to remote nodes a DELETE command that only
+ * eliminates the data not verifying the new hashing condition.
+ */
+ if (newLocInfo->locatorType == LOCATOR_TYPE_HASH)
+ {
+ ExecNodes *execNodes = makeNode(ExecNodes);
+ execNodes->nodeList = newLocInfo->nodeList;
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_DELETE_HASH, CATALOG_UPDATE_AFTER, execNodes));
+ }
+ else if (newLocInfo->locatorType == LOCATOR_TYPE_MODULO)
+ {
+ ExecNodes *execNodes = makeNode(ExecNodes);
+ execNodes->nodeList = newLocInfo->nodeList;
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_DELETE_MODULO, CATALOG_UPDATE_AFTER, execNodes));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("Incorrect redistribution operation")));
+
+ /* Add REINDEX command if necessary */
+ pgxc_redist_add_reindex(distribState);
+}
+
+
+/*
+ * pgxc_redist_build_replicate
+ * Build redistribution command list for replicated tables
+ */
+static void
+pgxc_redist_build_replicate(RedistribState *distribState,
+ RelationLocInfo *oldLocInfo,
+ RelationLocInfo *newLocInfo)
+{
+ List *removedNodes;
+ List *newNodes;
+
+ /* If a command list has already been built, nothing to do */
+ if (list_length(distribState->commands) != 0)
+ return;
+
+ /* Case of a replicated table whose set of nodes is changed */
+ if (!IsLocatorReplicated(newLocInfo->locatorType) ||
+ !IsLocatorReplicated(oldLocInfo->locatorType))
+ return;
+
+ /* Get the list of nodes that are added to the relation */
+ removedNodes = list_difference_int(oldLocInfo->nodeList, newLocInfo->nodeList);
+
+ /* Get the list of nodes that are removed from relation */
+ newNodes = list_difference_int(newLocInfo->nodeList, oldLocInfo->nodeList);
+
+ /*
+ * If nodes have to be added, we need to fetch data for redistribution first.
+ * So add a COPY TO command to fetch data.
+ */
+ if (newNodes != NIL)
+ {
+ /* Add COPY TO command */
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_COPY_TO, CATALOG_UPDATE_BEFORE, NULL));
+ }
+
+ /* Nodes removed have to be truncated, so add a TRUNCATE commands to removed nodes */
+ if (removedNodes != NIL)
+ {
+ ExecNodes *execNodes = makeNode(ExecNodes);
+ execNodes->nodeList = removedNodes;
+ /* Add TRUNCATE command */
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_TRUNCATE, CATALOG_UPDATE_BEFORE, execNodes));
+ }
+
+ /* If necessary, COPY the data obtained at first step to the new nodes. */
+ if (newNodes != NIL)
+ {
+ ExecNodes *execNodes = makeNode(ExecNodes);
+ execNodes->nodeList = newNodes;
+ /* Add COPY FROM command */
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_COPY_FROM, CATALOG_UPDATE_AFTER, execNodes));
+ }
+
+ /* Add REINDEX command if necessary */
+ pgxc_redist_add_reindex(distribState);
+}
+
+
+/*
+ * pgxc_redist_build_default
+ * Build a default list consisting of
+ * COPY TO -> TRUNCATE -> COPY FROM ( -> REINDEX )
+ */
+static void
+pgxc_redist_build_default(RedistribState *distribState)
+{
+ /* If a command list has already been built, nothing to do */
+ if (list_length(distribState->commands) != 0)
+ return;
+
+ /* COPY TO command */
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_COPY_TO, CATALOG_UPDATE_BEFORE, NULL));
+ /* TRUNCATE command */
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_TRUNCATE, CATALOG_UPDATE_BEFORE, NULL));
+ /* COPY FROM command */
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_COPY_FROM, CATALOG_UPDATE_AFTER, NULL));
+
+ /* REINDEX command */
+ pgxc_redist_add_reindex(distribState);
+}
+
+
+/*
+ * pgxc_redist_build_reindex
+ * Add a reindex command if necessary
+ */
+static void
+pgxc_redist_add_reindex(RedistribState *distribState)
+{
+ Relation rel;
+
+ rel = relation_open(distribState->relid, NoLock);
+
+ /* Build REINDEX command if necessary */
+ if (RelationGetIndexList(rel) != NIL)
+ {
+ distribState->commands = lappend(distribState->commands,
+ makeRedistribCommand(DISTRIB_REINDEX, CATALOG_UPDATE_AFTER, NULL));
+ }
+
+ relation_close(rel, NoLock);
+}
+
+
+/*
+ * distrib_execute_command
+ * Execute a redistribution operation
+ */
+static void
+distrib_execute_command(RedistribState *distribState, RedistribCommand *command)
+{
+ /* Execute redistribution command */
+ switch (command->type)
+ {
+ case DISTRIB_COPY_TO:
+ distrib_copy_to(distribState);
+ break;
+ case DISTRIB_COPY_FROM:
+ distrib_copy_from(distribState, command->execNodes);
+ break;
+ case DISTRIB_TRUNCATE:
+ distrib_truncate(distribState, command->execNodes);
+ break;
+ case DISTRIB_REINDEX:
+ distrib_reindex(distribState, command->execNodes);
+ break;
+ case DISTRIB_DELETE_HASH:
+ case DISTRIB_DELETE_MODULO:
+ distrib_delete_hash(distribState, command->execNodes);
+ break;
+ case DISTRIB_NONE:
+ default:
+ Assert(0); /* Should not happen */
+ }
+}
+
+
+/*
+ * distrib_copy_to
+ * Copy all the data of table to be distributed.
+ * This data is saved in a tuplestore saved in distribution state.
+ * a COPY FROM operation is always done on nodes determined by the locator data
+ * in catalogs, explaining why this cannot be done on a subset of nodes. It also
+ * insures that no read operations are done on nodes where data is not yet located.
+ */
+static void
+distrib_copy_to(RedistribState *distribState)
+{
+ Oid relOid = distribState->relid;
+ Relation rel;
+ RemoteCopyOptions *options;
+ RemoteCopyData *copyState;
+ Tuplestorestate *store; /* Storage of redistributed data */
+
+ /* Fetch necessary data to prepare for the table data acquisition */
+ options = makeRemoteCopyOptions();
+
+ /* All the fields are separated by tabs in redistribution */
+ options->rco_delim = palloc(2);
+ options->rco_delim[0] = COPYOPS_DELIMITER;
+ options->rco_delim[1] = '\0';
+
+ copyState = (RemoteCopyData *) palloc0(sizeof(RemoteCopyData));
+ copyState->is_from = false;
+
+ /* A sufficient lock level needs to be taken at a higher level */
+ rel = relation_open(relOid, NoLock);
+ RemoteCopy_GetRelationLoc(copyState, rel, NIL);
+ RemoteCopy_BuildStatement(copyState, rel, options, NIL, NIL);
+
+ /* Inform client of operation being done */
+ ereport(DEBUG1,
+ (errmsg("Copying data for relation \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel))));
+
+ /* Begin the COPY process */
+ copyState->connections = DataNodeCopyBegin(copyState->query_buf.data,
+ copyState->exec_nodes->nodeList,
+ GetActiveSnapshot());
+
+ /* Create tuplestore storage */
+ store = tuplestore_begin_heap(true, false, work_mem);
+
+ /* Then get rows and copy them to the tuplestore used for redistribution */
+ DataNodeCopyOut(copyState->exec_nodes,
+ copyState->connections,
+ RelationGetDescr(rel), /* Need also to set up the tuple descriptor */
+ NULL,
+ store, /* Tuplestore used for redistribution */
+ REMOTE_COPY_TUPLESTORE);
+
+ /* Do necessary clean-up */
+ FreeRemoteCopyOptions(options);
+
+ /* Lock is maintained until transaction commits */
+ relation_close(rel, NoLock);
+
+ /* Save results */
+ distribState->store = store;
+}
+
+
+/*
+ * PGXCDistribTableCopyFrom
+ * Execute commands related to COPY FROM
+ * Redistribute all the data of table with a COPY FROM from given tuplestore.
+ */
+static void
+distrib_copy_from(RedistribState *distribState, ExecNodes *exec_nodes)
+{
+ Oid relOid = distribState->relid;
+ Tuplestorestate *store = distribState->store;
+ Relation rel;
+ RemoteCopyOptions *options;
+ RemoteCopyData *copyState;
+ bool replicated, contains_tuple = true;
+ TupleDesc tupdesc;
+
+ /* Nothing to do if on remote node */
+ if (IS_PGXC_DATANODE || IsConnFromCoord())
+ return;
+
+ /* Fetch necessary data to prepare for the table data acquisition */
+ options = makeRemoteCopyOptions();
+ /* All the fields are separated by tabs in redistribution */
+ options->rco_delim = palloc(2);
+ options->rco_delim[0] = COPYOPS_DELIMITER;
+ options->rco_delim[1] = '\0';
+
+ copyState = (RemoteCopyData *) palloc0(sizeof(RemoteCopyData));
+ copyState->is_from = true;
+
+ /* A sufficient lock level needs to be taken at a higher level */
+ rel = relation_open(relOid, NoLock);
+ RemoteCopy_GetRelationLoc(copyState, rel, NIL);
+ RemoteCopy_BuildStatement(copyState, rel, options, NIL, NIL);
+
+ /*
+ * When building COPY FROM command in redistribution list,
+ * use the list of nodes that has been calculated there.
+ * It might be possible that this COPY is done only on a portion of nodes.
+ */
+ if (exec_nodes && exec_nodes->nodeList != NIL)
+ {
+ copyState->exec_nodes->nodeList = exec_nodes->nodeList;
+ copyState->rel_loc->nodeList = exec_nodes->nodeList;
+ }
+
+ tupdesc = RelationGetDescr(rel);
+
+ /* Inform client of operation being done */
+ ereport(DEBUG1,
+ (errmsg("Redistributing data for relation \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel))));
+
+ /* Begin redistribution on remote nodes */
+ copyState->connections = DataNodeCopyBegin(copyState->query_buf.data,
+ copyState->exec_nodes->nodeList,
+ GetActiveSnapshot());
+
+ /* Transform each tuple stored into a COPY message and send it to remote nodes */
+ while (contains_tuple)
+ {
+ char *data;
+ int len;
+ Form_pg_attribute *attr = tupdesc->attrs;
+ Datum dist_col_value = (Datum) 0;
+ bool dist_col_is_null = true;
+ Oid dist_col_type = UNKNOWNOID;
+ TupleTableSlot *slot;
+ ExecNodes *local_execnodes;
+
+ /* Build table slot for this relation */
+ slot = MakeSingleTupleTableSlot(tupdesc);
+
+ /* Get tuple slot from the tuplestore */
+ contains_tuple = tuplestore_gettupleslot(store, true, false, slot);
+ if (!contains_tuple)
+ {
+ ExecDropSingleTupleTableSlot(slot);
+ break;
+ }
+
+ /* Make sure the tuple is fully deconstructed */
+ slot_getallattrs(slot);
+
+ /* Find value of distribution column if necessary */
+ if (copyState->idx_dist_by_col >= 0)
+ {
+ dist_col_value = slot->tts_values[copyState->idx_dist_by_col];
+ dist_col_is_null = slot->tts_isnull[copyState->idx_dist_by_col];
+ dist_col_type = attr[copyState->idx_dist_by_col]->atttypid;
+ }
+
+ /* Build message to be sent to Datanodes */
+ data = CopyOps_BuildOneRowTo(tupdesc, slot->tts_values, slot->tts_isnull, &len);
+
+ /* Build relation node list */
+ local_execnodes = GetRelationNodes(copyState->rel_loc,
+ dist_col_value,
+ dist_col_is_null,
+ dist_col_type,
+ RELATION_ACCESS_INSERT);
+ /* Take a copy of the node lists so as not to interfere with locator info */
+ local_execnodes->primarynodelist = list_copy(local_execnodes->primarynodelist);
+ local_execnodes->nodeList = list_copy(local_execnodes->nodeList);
+
+ /* Process data to Datanodes */
+ DataNodeCopyIn(data,
+ len,
+ local_execnodes,
+ copyState->connections);
+
+ /* Clean up */
+ pfree(data);
+ FreeExecNodes(&local_execnodes);
+ ExecClearTuple(slot);
+ ExecDropSingleTupleTableSlot(slot);
+ }
+
+ /* Finish the redistribution process */
+ replicated = copyState->rel_loc->locatorType == LOCATOR_TYPE_REPLICATED;
+ DataNodeCopyFinish(copyState->connections,
+ replicated ? PGXCNodeGetNodeId(primary_data_node, PGXC_NODE_DATANODE) : -1,
+ replicated ? COMBINE_TYPE_SAME : COMBINE_TYPE_SUM);
+
+ /* Lock is maintained until transaction commits */
+ relation_close(rel, NoLock);
+}
+
+
+/*
+ * distrib_truncate
+ * Truncate all the data of specified table.
+ * This is used as a second step of online data redistribution.
+ */
+static void
+distrib_truncate(RedistribState *distribState, ExecNodes *exec_nodes)
+{
+ Relation rel;
+ StringInfo buf;
+ Oid relOid = distribState->relid;
+
+ /* Nothing to do if on remote node */
+ if (IS_PGXC_DATANODE || IsConnFromCoord())
+ return;
+
+ /* A sufficient lock level needs to be taken at a higher level */
+ rel = relation_open(relOid, NoLock);
+
+ /* Inform client of operation being done */
+ ereport(DEBUG1,
+ (errmsg("Truncating data for relation \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel))));
+
+ /* Initialize buffer */
+ buf = makeStringInfo();
+
+ /* Build query to clean up table before redistribution */
+ appendStringInfo(buf, "TRUNCATE %s.%s",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel));
+
+ /*
+ * Lock is maintained until transaction commits,
+ * relation needs also to be closed before effectively launching the query.
+ */
+ relation_close(rel, NoLock);
+
+ /* Execute the query */
+ distrib_execute_query(buf->data, IsTempTable(relOid), exec_nodes);
+
+ /* Clean buffers */
+ pfree(buf->data);
+ pfree(buf);
+}
+
+
+/*
+ * distrib_reindex
+ * Reindex the table that has been redistributed
+ */
+static void
+distrib_reindex(RedistribState *distribState, ExecNodes *exec_nodes)
+{
+ Relation rel;
+ StringInfo buf;
+ Oid relOid = distribState->relid;
+
+ /* Nothing to do if on remote node */
+ if (IS_PGXC_DATANODE || IsConnFromCoord())
+ return;
+
+ /* A sufficient lock level needs to be taken at a higher level */
+ rel = relation_open(relOid, NoLock);
+
+ /* Inform client of operation being done */
+ ereport(DEBUG1,
+ (errmsg("Reindexing relation \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel))));
+
+ /* Initialize buffer */
+ buf = makeStringInfo();
+
+ /* Generate the query */
+ appendStringInfo(buf, "REINDEX TABLE %s.%s",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel));
+
+ /* Execute the query */
+ distrib_execute_query(buf->data, IsTempTable(relOid), exec_nodes);
+
+ /* Clean buffers */
+ pfree(buf->data);
+ pfree(buf);
+
+ /* Lock is maintained until transaction commits */
+ relation_close(rel, NoLock);
+}
+
+
+/*
+ * distrib_delete_hash
+ * Perform a partial tuple deletion of remote tuples not checking the correct hash
+ * condition. The new distribution condition is set up in exec_nodes when building
+ * the command list.
+ */
+static void
+distrib_delete_hash(RedistribState *distribState, ExecNodes *exec_nodes)
+{
+ Relation rel;
+ StringInfo buf;
+ Oid relOid = distribState->relid;
+ ListCell *item;
+
+ /* Nothing to do if on remote node */
+ if (IS_PGXC_DATANODE || IsConnFromCoord())
+ return;
+
+ /* A sufficient lock level needs to be taken at a higher level */
+ rel = relation_open(relOid, NoLock);
+
+ /* Inform client of operation being done */
+ ereport(DEBUG1,
+ (errmsg("Deleting necessary tuples \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel))));
+
+ /* Initialize buffer */
+ buf = makeStringInfo();
+
+ /* Build query to clean up table before redistribution */
+ appendStringInfo(buf, "DELETE FROM %s.%s",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel));
+
+ /*
+ * Launch the DELETE query to each node as the DELETE depends on
+ * local conditions for each node.
+ */
+ foreach(item, exec_nodes->nodeList)
+ {
+ StringInfo buf2;
+ char *hashfuncname, *colname;
+ Oid hashtype;
+ RelationLocInfo *locinfo = RelationGetLocInfo(rel);
+ int nodenum = lfirst_int(item);
+ int nodepos = 0;
+ ExecNodes *local_exec_nodes = makeNode(ExecNodes);
+ TupleDesc tupDesc = RelationGetDescr(rel);
+ Form_pg_attribute *attr = tupDesc->attrs;
+ ListCell *item2;
+
+ /* Here the query is launched to a unique node */
+ local_exec_nodes->nodeList = lappend_int(NIL, nodenum);
+
+ /* Get the hash type of relation */
+ hashtype = attr[locinfo->partAttrNum - 1]->atttypid;
+
+ /* Get function hash name */
+ hashfuncname = get_compute_hash_function(hashtype, locinfo->locatorType);
+
+ /* Get distribution column name */
+ if (locinfo->locatorType == LOCATOR_TYPE_HASH)
+ colname = GetRelationHashColumn(locinfo);
+ else if (locinfo->locatorType == LOCATOR_TYPE_MODULO)
+ colname = GetRelationModuloColumn(locinfo);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("Incorrect redistribution operation")));
+
+ /*
+ * Find the correct node position in node list of locator information.
+ * So scan the node list and fetch the position of node.
+ */
+ foreach(item2, locinfo->nodeList)
+ {
+ int loc = lfirst_int(item2);
+ if (loc == nodenum)
+ break;
+ nodepos++;
+ }
+
+ /*
+ * Then build the WHERE clause for deletion.
+ * The condition that allows to keep the tuples on remote nodes
+ * is of the type "RemoteNodeNumber != abs(hash_func(dis_col)) % NumDatanodes".
+ * the remote Datanode has no knowledge of its position in cluster so this
+ * number needs to be compiled locally on Coordinator.
+ * Taking the absolute value is necessary as hash may return a negative value.
+ * For hash distributions a condition with correct hash function is used.
+ * For modulo distribution, well we might need a hash function call but not
+ * all the time, this is determined implicitely by get_compute_hash_function.
+ */
+ buf2 = makeStringInfo();
+ if (hashfuncname)
+ appendStringInfo(buf2, "%s WHERE abs(%s(%s)) %% %d != %d",
+ buf->data, hashfuncname, colname,
+ list_length(locinfo->nodeList), nodepos);
+ else
+ appendStringInfo(buf2, "%s WHERE abs(%s) %% %d != %d", buf->data, colname,
+ list_length(locinfo->nodeList), nodepos);
+
+ /* Then launch this single query */
+ distrib_execute_query(buf2->data, IsTempTable(relOid), local_exec_nodes);
+
+ FreeExecNodes(&local_exec_nodes);
+ pfree(buf2->data);
+ pfree(buf2);
+ }
+
+ relation_close(rel, NoLock);
+
+ /* Clean buffers */
+ pfree(buf->data);
+ pfree(buf);
+}
+
+
+/*
+ * makeRedistribState
+ * Build a distribution state operator
+ */
+RedistribState *
+makeRedistribState(Oid relOid)
+{
+ RedistribState *res = (RedistribState *) palloc(sizeof(RedistribState));
+ res->relid = relOid;
+ res->commands = NIL;
+ res->store = NULL;
+ return res;
+}
+
+
+/*
+ * FreeRedistribState
+ * Free given distribution state
+ */
+void
+FreeRedistribState(RedistribState *state)
+{
+ ListCell *item;
+
+ /* Leave if nothing to do */
+ if (!state)
+ return;
+
+ foreach(item, state->commands)
+ FreeRedistribCommand((RedistribCommand *) lfirst(item));
+ if (list_length(state->commands) > 0)
+ list_free(state->commands);
+ if (state->store)
+ tuplestore_clear(state->store);
+}
+
+/*
+ * makeRedistribCommand
+ * Build a distribution command
+ */
+RedistribCommand *
+makeRedistribCommand(RedistribOperation type, RedistribCatalog updateState, ExecNodes *nodes)
+{
+ RedistribCommand *res = (RedistribCommand *) palloc0(sizeof(RedistribCommand));
+ res->type = type;
+ res->updateState = updateState;
+ res->execNodes = nodes;
+ return res;
+}
+
+/*
+ * FreeRedistribCommand
+ * Free given distribution command
+ */
+void
+FreeRedistribCommand(RedistribCommand *command)
+{
+ ExecNodes *nodes;
+ /* Leave if nothing to do */
+ if (!command)
+ return;
+ nodes = command->execNodes;
+
+ if (nodes)
+ FreeExecNodes(&nodes);
+ pfree(command);
+}
+
+/*
+ * distrib_execute_query
+ * Execute single raw query on given list of nodes
+ */
+static void
+distrib_execute_query(char *sql, bool is_temp, ExecNodes *exec_nodes)
+{
+ RemoteQuery *step = makeNode(RemoteQuery);
+ step->combine_type = COMBINE_TYPE_SAME;
+ step->exec_nodes = exec_nodes;
+ step->sql_statement = pstrdup(sql);
+ step->force_autocommit = false;
+
+ /* Redistribution operations only concern Datanodes */
+ step->exec_type = EXEC_ON_DATANODES;
+ step->is_temp = is_temp;
+ ExecRemoteUtility(step);
+ pfree(step->sql_statement);
+ pfree(step);
+
+ /* Be sure to advance the command counter after the last command */
+ CommandCounterIncrement();
+}
diff --git a/src/backend/pgxc/nodemgr/nodemgr.c b/src/backend/pgxc/nodemgr/nodemgr.c
index 8a28486b5c..68b3e91500 100644
--- a/src/backend/pgxc/nodemgr/nodemgr.c
+++ b/src/backend/pgxc/nodemgr/nodemgr.c
@@ -4,8 +4,7 @@
* Routines to support manipulation of the pgxc_node catalog
* Support concerns CREATE/ALTER/DROP on NODE object.
*
- * Copyright (c) 1996-2010, PostgreSQL Global Development Group
- * Portions Copyright (c) 2010-2012 Postgres-XC Development Group
+ * Copyright (c) 2010-2012 Postgres-XC Development Group
*
*-------------------------------------------------------------------------
*/
diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c
index 9284d9c99f..7fcbebc30d 100644
--- a/src/backend/pgxc/pool/execRemote.c
+++ b/src/backend/pgxc/pool/execRemote.c
@@ -34,6 +34,7 @@
#include "nodes/nodes.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/var.h"
+#include "pgxc/copyops.h"
#include "pgxc/nodemgr.h"
#include "pgxc/poolmgr.h"
#include "storage/ipc.h"
@@ -60,7 +61,7 @@ typedef enum RemoteXactNodeStatus
RXACT_NODE_NONE, /* Initial state */
RXACT_NODE_PREPARE_SENT, /* PREPARE request sent */
RXACT_NODE_PREPARE_FAILED, /* PREPARE failed on the node */
- RXACT_NODE_PREPARED, /* PREARED successfully on the node */
+ RXACT_NODE_PREPARED, /* PREPARED successfully on the node */
RXACT_NODE_COMMIT_SENT, /* COMMIT sent successfully */
RXACT_NODE_COMMIT_FAILED, /* failed to COMMIT on the node */
RXACT_NODE_COMMITTED, /* COMMITTed successfully on the node */
@@ -293,6 +294,7 @@ CreateResponseCombiner(int node_count, CombineType combine_type)
combiner->rowBuffer = NIL;
combiner->tapenodes = NULL;
combiner->initAggregates = true;
+ combiner->remoteCopyType = REMOTE_COPY_NONE;
combiner->copy_file = NULL;
combiner->rqs_cmd_id = FirstCommandId;
@@ -576,12 +578,98 @@ HandleCopyDataRow(RemoteQueryState *combiner, char *msg_body, size_t len)
/* count the row */
combiner->processed++;
- /* If there is a copy file, data has to be sent to the local file */
- if (combiner->copy_file)
- /* write data to the copy file */
- fwrite(msg_body, 1, len, combiner->copy_file);
- else
- pq_putmessage('d', msg_body, len);
+ /* Output remote COPY operation to correct location */
+ switch (combiner->remoteCopyType)
+ {
+ case REMOTE_COPY_FILE:
+ /* Write data directly to file */
+ fwrite(msg_body, 1, len, combiner->copy_file);
+ break;
+ case REMOTE_COPY_STDOUT:
+ /* Send back data to client */
+ pq_putmessage('d', msg_body, len);
+ break;
+ case REMOTE_COPY_TUPLESTORE:
+ {
+ Datum *values;
+ bool *nulls;
+ TupleDesc tupdesc = combiner->tuple_desc;
+ int i, dropped;
+ Form_pg_attribute *attr = tupdesc->attrs;
+ FmgrInfo *in_functions;
+ Oid *typioparams;
+ char **fields;
+
+ values = (Datum *) palloc(tupdesc->natts * sizeof(Datum));
+ nulls = (bool *) palloc(tupdesc->natts * sizeof(bool));
+ in_functions = (FmgrInfo *) palloc(tupdesc->natts * sizeof(FmgrInfo));
+ typioparams = (Oid *) palloc(tupdesc->natts * sizeof(Oid));
+
+ /* Calculate the Oids of input functions */
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ Oid in_func_oid;
+
+ /* Do not need any information for dropped attributes */
+ if (attr[i]->attisdropped)
+ continue;
+
+ getTypeInputInfo(attr[i]->atttypid,
+ &in_func_oid, &typioparams[i]);
+ fmgr_info(in_func_oid, &in_functions[i]);
+ }
+
+ /*
+ * Convert message into an array of fields.
+ * Last \n is not included in converted message.
+ */
+ fields = CopyOps_RawDataToArrayField(tupdesc, msg_body, len - 1);
+
+ /* Fill in the array values */
+ dropped = 0;
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ char *string = fields[i - dropped];
+ /* Do not need any information for dropped attributes */
+ if (attr[i]->attisdropped)
+ {
+ dropped++;
+ nulls[i] = true; /* Consider dropped parameter as NULL */
+ continue;
+ }
+
+ /* Find value */
+ values[i] = InputFunctionCall(&in_functions[i],
+ string,
+ typioparams[i],
+ attr[i]->atttypmod);
+ /* Setup value with NULL flag if necessary */
+ if (string == NULL)
+ nulls[i] = true;
+ else
+ nulls[i] = false;
+ }
+
+ /* Then insert the values into tuplestore */
+ tuplestore_putvalues(combiner->tuplestorestate,
+ combiner->tuple_desc,
+ values,
+ nulls);
+
+ /* Clean up everything */
+ if (*fields)
+ pfree(*fields);
+ pfree(fields);
+ pfree(values);
+ pfree(nulls);
+ pfree(in_functions);
+ pfree(typioparams);
+ }
+ break;
+ case REMOTE_COPY_NONE:
+ default:
+ Assert(0); /* Should not happen */
+ }
}
/*
@@ -852,7 +940,15 @@ CloseCombiner(RemoteQueryState *combiner)
if (combiner->connections)
pfree(combiner->connections);
if (combiner->tuple_desc)
- FreeTupleDesc(combiner->tuple_desc);
+ {
+ /*
+ * In the case of a remote COPY with tuplestore, combiner is not
+ * responsible from freeing the tuple store. This is done at an upper
+ * level once data redistribution is completed.
+ */
+ if (combiner->remoteCopyType != REMOTE_COPY_TUPLESTORE)
+ FreeTupleDesc(combiner->tuple_desc);
+ }
if (combiner->errorMessage)
pfree(combiner->errorMessage);
if (combiner->errorDetail)
@@ -2343,7 +2439,12 @@ DataNodeCopyIn(char *data_row, int len, ExecNodes *exec_nodes, PGXCNodeHandle**
}
uint64
-DataNodeCopyOut(ExecNodes *exec_nodes, PGXCNodeHandle** copy_connections, FILE* copy_file)
+DataNodeCopyOut(ExecNodes *exec_nodes,
+ PGXCNodeHandle** copy_connections,
+ TupleDesc tupleDesc,
+ FILE* copy_file,
+ Tuplestorestate *store,
+ RemoteCopyType remoteCopyType)
{
RemoteQueryState *combiner;
int conn_count = list_length(exec_nodes->nodeList) == 0 ? NumDataNodes : list_length(exec_nodes->nodeList);
@@ -2352,9 +2453,19 @@ DataNodeCopyOut(ExecNodes *exec_nodes, PGXCNodeHandle** copy_connections, FILE*
combiner = CreateResponseCombiner(conn_count, COMBINE_TYPE_SUM);
combiner->processed = 0;
- /* If there is an existing file where to copy data, pass it to combiner */
- if (copy_file)
+ combiner->remoteCopyType = remoteCopyType;
+
+ /*
+ * If there is an existing file where to copy data,
+ * pass it to combiner when remote COPY output is sent back to file.
+ */
+ if (copy_file && remoteCopyType == REMOTE_COPY_FILE)
combiner->copy_file = copy_file;
+ if (store && remoteCopyType == REMOTE_COPY_TUPLESTORE)
+ {
+ combiner->tuplestorestate = store;
+ combiner->tuple_desc = tupleDesc;
+ }
foreach(nodeitem, exec_nodes->nodeList)
{
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index bc7006dfb3..777a9369aa 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -358,6 +358,7 @@ extern void hash_desc(StringInfo buf, uint8 xl_info, char *rec);
#ifdef PGXC
extern Datum compute_hash(Oid type, Datum value, char locator);
+extern char *get_compute_hash_function(Oid type, char locator);
#endif
#endif /* HASH_H */
diff --git a/src/include/catalog/pgxc_class.h b/src/include/catalog/pgxc_class.h
index 5a0cd597d3..cb540bf584 100644
--- a/src/include/catalog/pgxc_class.h
+++ b/src/include/catalog/pgxc_class.h
@@ -22,22 +22,37 @@ CATALOG(pgxc_class,9001) BKI_WITHOUT_OIDS
typedef FormData_pgxc_class *Form_pgxc_class;
-#define Natts_pgxc_class 6
+#define Natts_pgxc_class 6
-#define Anum_pgxc_class_pcrelid 1
+#define Anum_pgxc_class_pcrelid 1
#define Anum_pgxc_class_pclocatortype 2
-#define Anum_pgxc_class_pcattnum 3
+#define Anum_pgxc_class_pcattnum 3
#define Anum_pgxc_class_pchashalgorithm 4
#define Anum_pgxc_class_pchashbuckets 5
-#define Anum_pgxc_class_nodes 6
+#define Anum_pgxc_class_nodes 6
+
+typedef enum PgxcClassAlterType
+{
+ PGXC_CLASS_ALTER_DISTRIBUTION,
+ PGXC_CLASS_ALTER_NODES,
+ PGXC_CLASS_ALTER_ALL
+} PgxcClassAlterType;
extern void PgxcClassCreate(Oid pcrelid,
- char pclocatortype,
+ char pclocatortype,
int pcattnum,
int pchashalgorithm,
int pchashbuckets,
int numnodes,
Oid *nodes);
+extern void PgxcClassAlter(Oid pcrelid,
+ char pclocatortype,
+ int pcattnum,
+ int pchashalgorithm,
+ int pchashbuckets,
+ int numnodes,
+ Oid *nodes,
+ PgxcClassAlterType type);
extern void RemovePgxcClass(Oid pcrelid);
#endif /* PGXC_CLASS_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index e8f2317c1b..8a837b39d5 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1244,6 +1244,12 @@ typedef enum AlterTableType
AT_DropInherit, /* NO INHERIT parent */
AT_AddOf, /* OF <type_name> */
AT_DropOf, /* NOT OF */
+#ifdef PGXC
+ AT_DistributeBy, /* DISTRIBUTE BY ... */
+ AT_SubCluster, /* TO [ NODE nodelist | GROUP groupname ] */
+ AT_AddNodeList, /* ADD NODE nodelist */
+ AT_DeleteNodeList, /* DELETE NODE nodelist */
+#endif
AT_GenericOptions /* OPTIONS (...) */
} AlterTableType;
diff --git a/src/include/pgxc/copyops.h b/src/include/pgxc/copyops.h
new file mode 100644
index 0000000000..862dbbd299
--- /dev/null
+++ b/src/include/pgxc/copyops.h
@@ -0,0 +1,27 @@
+/*--------------------------------------------------------------------------
+ *
+ * copyops.h
+ * Routines for manipulation of remote COPY data
+ *
+ *
+ * Copyright (c) 2010-2012 Postgres-XC Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/include/pgxc/copyops.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef COPYOPS_H
+#define COPYOPS_H
+
+#include "access/tupdesc.h"
+
+/* Type of data delimiter used for data redistribution using remote COPY */
+#define COPYOPS_DELIMITER '\t'
+
+extern char **CopyOps_RawDataToArrayField(TupleDesc tupdesc, char *message, int len);
+extern char *CopyOps_BuildOneRowTo(TupleDesc tupdesc, Datum *values, bool *nulls, int *len);
+
+#endif
diff --git a/src/include/pgxc/execRemote.h b/src/include/pgxc/execRemote.h
index 32a88ecca4..5e26850d1c 100644
--- a/src/include/pgxc/execRemote.h
+++ b/src/include/pgxc/execRemote.h
@@ -48,6 +48,17 @@ typedef enum
REQUEST_TYPE_COPY_OUT /* Copy Out response */
} RequestType;
+/*
+ * Type of requests associated to a remote COPY OUT
+ */
+typedef enum
+{
+ REMOTE_COPY_NONE, /* Not defined yet */
+ REMOTE_COPY_STDOUT, /* Send back to client */
+ REMOTE_COPY_FILE, /* Write in file */
+ REMOTE_COPY_TUPLESTORE /* Store data in tuplestore */
+} RemoteCopyType;
+
/* Combines results of INSERT statements using multiple values */
typedef struct CombineTag
{
@@ -107,7 +118,8 @@ typedef struct RemoteQueryState
/* Simple DISTINCT support */
FmgrInfo *eqfunctions; /* functions to compare tuples */
MemoryContext tmp_ctx; /* separate context is needed to compare tuples */
- FILE *copy_file; /* used if copy_dest == COPY_FILE */
+ RemoteCopyType remoteCopyType; /* Type of remote COPY operation */
+ FILE *copy_file; /* used if remoteCopyType == REMOTE_COPY_FILE */
uint64 processed; /* count of data rows when running CopyOut */
/* cursor support */
char *cursor; /* cursor name */
@@ -136,7 +148,8 @@ extern void PGXCNodeCommitPrepared(char *gid);
/* Copy command just involves Datanodes */
extern PGXCNodeHandle** DataNodeCopyBegin(const char *query, List *nodelist, Snapshot snapshot);
extern int DataNodeCopyIn(char *data_row, int len, ExecNodes *exec_nodes, PGXCNodeHandle** copy_connections);
-extern uint64 DataNodeCopyOut(ExecNodes *exec_nodes, PGXCNodeHandle** copy_connections, FILE* copy_file);
+extern uint64 DataNodeCopyOut(ExecNodes *exec_nodes, PGXCNodeHandle** copy_connections, TupleDesc tupleDesc,
+ FILE* copy_file, Tuplestorestate *store, RemoteCopyType remoteCopyType);
extern void DataNodeCopyFinish(PGXCNodeHandle** copy_connections, int primary_dn_index, CombineType combine_type);
extern bool DataNodeCopyEnd(PGXCNodeHandle *handle, bool is_error);
extern int DataNodeCopyInBinaryForAll(char *msg_buf, int len, PGXCNodeHandle** copy_connections);
diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h
index bd719911ea..78ce3cff00 100644
--- a/src/include/pgxc/locator.h
+++ b/src/include/pgxc/locator.h
@@ -99,6 +99,7 @@ extern RelationLocInfo *GetRelationLocInfo(Oid relid);
extern RelationLocInfo *CopyRelationLocInfo(RelationLocInfo *src_info);
extern char GetRelationLocType(Oid relid);
extern bool IsTableDistOnPrimary(RelationLocInfo *rel_loc_info);
+extern bool IsLocatorInfoEqual(RelationLocInfo *rel_loc_info1, RelationLocInfo *rel_loc_info2);
extern ExecNodes *GetRelationNodes(RelationLocInfo *rel_loc_info, Datum valueForDistCol,
bool isValueNull, Oid typeOfValueForDistCol,
RelationAccessType accessType);
diff --git a/src/include/pgxc/redistrib.h b/src/include/pgxc/redistrib.h
new file mode 100644
index 0000000000..ee94523dbb
--- /dev/null
+++ b/src/include/pgxc/redistrib.h
@@ -0,0 +1,80 @@
+/*-------------------------------------------------------------------------
+ *
+ * redistrib.h
+ * Routines related to online data redistribution
+ *
+ * Copyright (c) 2010-2012 Postgres-XC Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/include/pgxc/redistrib.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef REDISTRIB_H
+#define REDISTRIB_H
+
+#include "nodes/parsenodes.h"
+#include "utils/tuplestore.h"
+
+/*
+ * Type of data redistribution operations.
+ * Online data redistribution is made of one or more of those operations.
+ */
+typedef enum RedistribOperation {
+ DISTRIB_NONE, /* Default operation */
+ DISTRIB_DELETE_HASH, /* Perform a DELETE with hash value check */
+ DISTRIB_DELETE_MODULO, /* Perform a DELETE with modulo value check */
+ DISTRIB_COPY_TO, /* Perform a COPY TO */
+ DISTRIB_COPY_FROM, /* Perform a COPY FROM */
+ DISTRIB_TRUNCATE, /* Truncate relation */
+ DISTRIB_REINDEX /* Reindex relation */
+} RedistribOperation;
+
+/*
+ * Determine if operation can be done before or after
+ * catalog update on local node.
+ */
+typedef enum RedistribCatalog {
+ CATALOG_UPDATE_NONE, /* Default state */
+ CATALOG_UPDATE_AFTER, /* After catalog update */
+ CATALOG_UPDATE_BEFORE, /* Before catalog update */
+ CATALOG_UPDATE_BOTH /* Before and after catalog update */
+} RedistribCatalog;
+
+/*
+ * Redistribution command
+ * This contains the tools necessary to perform a redistribution operation.
+ */
+typedef struct RedistribCommand {
+ RedistribOperation type; /* Operation type */
+ ExecNodes *execNodes; /* List of nodes where to perform operation */
+ RedistribCatalog updateState; /* Flag to determine if operation can be done
+ * before or after catalog update */
+} RedistribCommand;
+
+/*
+ * Redistribution operation state
+ * Maintainer of redistribution state having the list of commands
+ * to be performed during redistribution.
+ * For the list of commands, we use an array and not a simple list as operations
+ * might need to be done in a certain order.
+ */
+typedef struct RedistribState {
+ Oid relid; /* Oid of relation redistributed */
+ List *commands; /* List of commands */
+ Tuplestorestate *store; /* Tuple store used for temporary data storage */
+} RedistribState;
+
+extern void PGXCRedistribTable(RedistribState *distribState, RedistribCatalog type);
+extern void PGXCRedistribCreateCommandList(RedistribState *distribState,
+ RelationLocInfo *newLocInfo);
+extern RedistribCommand *makeRedistribCommand(RedistribOperation type,
+ RedistribCatalog updateState,
+ ExecNodes *nodes);
+extern RedistribState *makeRedistribState(Oid relOid);
+extern void FreeRedistribState(RedistribState *state);
+extern void FreeRedistribCommand(RedistribCommand *command);
+
+#endif /* REDISTRIB_H */
diff --git a/src/include/pgxc/remotecopy.h b/src/include/pgxc/remotecopy.h
index 77134e71f9..93368c0ada 100644
--- a/src/include/pgxc/remotecopy.h
+++ b/src/include/pgxc/remotecopy.h
@@ -70,6 +70,7 @@ extern void RemoteCopy_BuildStatement(RemoteCopyData *state,
extern void RemoteCopy_GetRelationLoc(RemoteCopyData *state,
Relation rel,
List *attnums);
+extern RemoteCopyOptions *makeRemoteCopyOptions(void);
extern void FreeRemoteCopyData(RemoteCopyData *state);
extern void FreeRemoteCopyOptions(RemoteCopyOptions *options);
#endif
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index fde1467185..4eaabe6592 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -365,6 +365,14 @@ typedef struct StdRdOptions
#define RelationUsesTempNamespace(relation) \
((relation)->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+#ifdef PGXC
+/*
+ * RelationGetLocInfo
+ * Return the location info of relation
+ */
+#define RelationGetLocInfo(relation) ((relation)->rd_locator_info)
+#endif
+
/*
* RELATION_IS_LOCAL
* If a rel is either temp or newly created in the current transaction,
diff --git a/src/test/regress/expected/xc_alter_table.out b/src/test/regress/expected/xc_alter_table.out
index a798e2f8a8..ca50a710bc 100644
--- a/src/test/regress/expected/xc_alter_table.out
+++ b/src/test/regress/expected/xc_alter_table.out
@@ -211,3 +211,411 @@ SELECT a, a2, b, c FROM xc_alter_table_2 ORDER BY b;
(5 rows)
DROP TABLE xc_alter_table_2;
+-- Tests for ALTER TABLE redistribution
+-- In the following test, a table is redistributed in all the ways possible
+-- and effects of redistribution is checked on all the dependent objects
+-- Table with integers
+CREATE TABLE xc_alter_table_3 (a int, b varchar(10)) DISTRIBUTE BY HASH(a);
+INSERT INTO xc_alter_table_3 VALUES (0, NULL);
+INSERT INTO xc_alter_table_3 VALUES (1, 'a');
+INSERT INTO xc_alter_table_3 VALUES (2, 'aa');
+INSERT INTO xc_alter_table_3 VALUES (3, 'aaa');
+INSERT INTO xc_alter_table_3 VALUES (4, 'aaaa');
+INSERT INTO xc_alter_table_3 VALUES (5, 'aaaaa');
+INSERT INTO xc_alter_table_3 VALUES (6, 'aaaaaa');
+INSERT INTO xc_alter_table_3 VALUES (7, 'aaaaaaa');
+INSERT INTO xc_alter_table_3 VALUES (8, 'aaaaaaaa');
+INSERT INTO xc_alter_table_3 VALUES (9, 'aaaaaaaaa');
+INSERT INTO xc_alter_table_3 VALUES (10, 'aaaaaaaaaa');
+-- Create some objects to check the effect of redistribution
+CREATE VIEW xc_alter_table_3_v AS SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3;
+CREATE RULE xc_alter_table_3_insert AS ON UPDATE TO xc_alter_table_3 WHERE OLD.a = 11 DO INSERT INTO xc_alter_table_3 VALUES (OLD.a + 1, 'nnn');
+PREPARE xc_alter_table_insert AS INSERT INTO xc_alter_table_3 VALUES ($1, $2);
+PREPARE xc_alter_table_delete AS DELETE FROM xc_alter_table_3 WHERE a = $1;
+PREPARE xc_alter_table_update AS UPDATE xc_alter_table_3 SET b = $2 WHERE a = $1;
+-- Now begin the tests
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+---
+ b
+(1 row)
+
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+----
+ bb
+(1 row)
+
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+ b
+-----
+ nnn
+(1 row)
+
+EXECUTE xc_alter_table_delete(12);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(b);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+---
+ b
+(1 row)
+
+EXECUTE xc_alter_table_update(11, 'bb');
+ERROR: Partition column can't be updated in current version
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+---
+ b
+(1 row)
+
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+ b
+---
+(0 rows)
+
+EXECUTE xc_alter_table_delete(12);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY ROUND ROBIN;
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+---
+ b
+(1 row)
+
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+----
+ bb
+(1 row)
+
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+ b
+-----
+ nnn
+(1 row)
+
+EXECUTE xc_alter_table_delete(12);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY MODULO(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+---
+ b
+(1 row)
+
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+----
+ bb
+(1 row)
+
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+ b
+-----
+ nnn
+(1 row)
+
+EXECUTE xc_alter_table_delete(12);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY MODULO(b);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+---
+ b
+(1 row)
+
+EXECUTE xc_alter_table_update(11, 'bb');
+ERROR: Partition column can't be updated in current version
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+---
+ b
+(1 row)
+
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+ b
+---
+(0 rows)
+
+EXECUTE xc_alter_table_delete(12);
+-- Index and redistribution
+CREATE INDEX xc_alter_table_3_index ON xc_alter_table_3(a);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+---
+ b
+(1 row)
+
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+ b
+----
+ bb
+(1 row)
+
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+ b
+-----
+ nnn
+(1 row)
+
+EXECUTE xc_alter_table_delete(12);
+-- Add column on table
+ALTER TABLE xc_alter_table_3 ADD COLUMN c int DEFAULT 4;
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY REPLICATION;
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Drop column on table
+ALTER TABLE xc_alter_table_3 DROP COLUMN b;
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Remanipulate table once again and distribute on old column
+ALTER TABLE xc_alter_table_3 DROP COLUMN c;
+ALTER TABLE xc_alter_table_3 ADD COLUMN b varchar(3) default 'aaa';
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Change the node list
+SELECT alter_table_change_nodes('xc_alter_table_3', '{1}', 'to', NULL);
+ alter_table_change_nodes
+--------------------------
+ t
+(1 row)
+
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Add some nodes on it
+SELECT alter_table_change_nodes('xc_alter_table_3', '{2,4,5}', 'add', NULL);
+ alter_table_change_nodes
+--------------------------
+ t
+(1 row)
+
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check in tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Remove some nodes on it
+SELECT alter_table_change_nodes('xc_alter_table_3', '{3}', 'add', NULL);
+ alter_table_change_nodes
+--------------------------
+ t
+(1 row)
+
+SELECT alter_table_change_nodes('xc_alter_table_3', '{2,3,5}', 'delete', NULL);
+ alter_table_change_nodes
+--------------------------
+ t
+(1 row)
+
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Multiple operations with replication
+SELECT alter_table_change_nodes('xc_alter_table_3', '{1,3,4,5}', 'to', 'replication');
+ alter_table_change_nodes
+--------------------------
+ t
+(1 row)
+
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Manipulate number of nodes to include and remove nodes on a replicated table
+-- On removed nodes data is deleted and on new nodes data is added
+SELECT alter_table_change_nodes('xc_alter_table_3', '{2,3,5}', 'to', NULL);
+ alter_table_change_nodes
+--------------------------
+ t
+(1 row)
+
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Re-do a double operation with hash this time
+SELECT alter_table_change_nodes('xc_alter_table_3', '{2}', 'delete', 'hash(a)');
+ alter_table_change_nodes
+--------------------------
+ t
+(1 row)
+
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+SELECT * FROM xc_alter_table_3_v;
+ count | sum | avg
+-------+-----+--------------------
+ 11 | 55 | 5.0000000000000000
+(1 row)
+
+-- Error checks
+ALTER TABLE xc_alter_table_3 ADD COLUMN b int, DISTRIBUTE BY HASH(a);
+ERROR: Incompatible operation with data redistribution
+-- Clean up
+DROP TABLE xc_alter_table_3 CASCADE;
+NOTICE: drop cascades to view xc_alter_table_3_v
diff --git a/src/test/regress/expected/xc_create_function.out b/src/test/regress/expected/xc_create_function.out
index 41bbcdd256..64d7198513 100644
--- a/src/test/regress/expected/xc_create_function.out
+++ b/src/test/regress/expected/xc_create_function.out
@@ -43,6 +43,90 @@ begin
execute cr_command;
end;
$$;
+-- Add/Delete/change node list of a table
+CREATE OR REPLACE FUNCTION alter_table_change_nodes(tab_schema varchar, nodenums int[], command varchar, distribution varchar)
+RETURNS BOOLEAN LANGUAGE plpgsql as $$
+declare
+ cr_command varchar;
+ nodes varchar[];
+ nodename varchar;
+ nodenames_query varchar;
+ nodenames varchar;
+ sep varchar;
+ nodenum_new int[];
+ nodenum_res int[];
+ tmp_node int;
+ num_nodes int;
+ node int;
+ check_num boolean;
+ enforce_to boolean;
+BEGIN
+ -- Check the command type, only delete/add/to are allowed
+ IF command != 'delete' AND command != 'add' AND command != 'to' THEN
+ RETURN FALSE;
+ END IF;
+ nodenames_query := 'SELECT node_name FROM pgxc_node WHERE node_type = ''D''';
+ FOR nodename IN EXECUTE nodenames_query LOOP
+ nodes := array_append(nodes, nodename);
+ END LOOP;
+ nodenames := '(';
+ sep := '';
+ num_nodes := array_length(nodes, 1);
+ enforce_to := FALSE;
+
+ -- Adjust node array according to total number of nodes
+ FOREACH node IN ARRAY nodenums LOOP
+ tmp_node := node;
+ IF (node < 1 OR node > num_nodes) THEN
+ -- Enforce the usage of TO here, only safe method
+ enforce_to := TRUE;
+ tmp_node := node % num_nodes;
+ nodenum_new := array_append(nodenum_new, tmp_node);
+ END IF;
+ nodenum_new := array_append(nodenum_new, tmp_node);
+ END LOOP;
+ -- Eliminate duplicates
+ nodenum_res := array_append(nodenum_res, nodenum_new[1]);
+ FOREACH node IN ARRAY nodenum_new LOOP
+ check_num := TRUE;
+ FOREACH tmp_node IN ARRAY nodenum_res LOOP
+ IF (tmp_node = node) THEN
+ check_num := FALSE;
+ END IF;
+ END LOOP;
+ -- Fill in result array only if not replicated
+ IF check_num THEN
+ nodenum_res := array_append(nodenum_res, node);
+ END IF;
+ END LOOP;
+
+ -- If there is a unique Datanode in cluster, enforce the use of 'TO NODE'
+ -- This will avoid any consistency problems
+ IF (num_nodes = 1 OR enforce_to) THEN
+ command := 'TO';
+ END IF;
+
+ -- Finally build query
+ cr_command := 'ALTER TABLE ' || tab_schema || ' ' || command || ' NODE ';
+ FOREACH node IN ARRAY nodenum_res LOOP
+ IF (node > 0 AND node <= num_nodes) THEN
+ nodenames := nodenames || sep || nodes[node];
+ sep := ', ';
+ END IF;
+ END LOOP;
+ nodenames := nodenames || ')';
+ cr_command := cr_command || nodenames;
+
+ -- Add distribution if necessary
+ IF (distribution IS NOT NULL) then
+ cr_command := cr_command || ', DISTRIBUTE BY ' || distribution;
+ END IF;
+
+ -- Launch it
+ EXECUTE cr_command;
+ RETURN TRUE;
+END;
+$$;
-- A function to return data node name given a node number
CREATE OR REPLACE FUNCTION get_xc_node_name(node_num int) RETURNS varchar LANGUAGE plpgsql AS $$
DECLARE
diff --git a/src/test/regress/sql/xc_alter_table.sql b/src/test/regress/sql/xc_alter_table.sql
index bfa76fc848..5f78deba77 100644
--- a/src/test/regress/sql/xc_alter_table.sql
+++ b/src/test/regress/sql/xc_alter_table.sql
@@ -57,3 +57,136 @@ EXPLAIN (VERBOSE true, COSTS false, NODES false) UPDATE xc_alter_table_2 SET a =
UPDATE xc_alter_table_2 SET a = 200, a2 = 'CTO' WHERE b = 'John';
SELECT a, a2, b, c FROM xc_alter_table_2 ORDER BY b;
DROP TABLE xc_alter_table_2;
+
+-- Tests for ALTER TABLE redistribution
+-- In the following test, a table is redistributed in all the ways possible
+-- and effects of redistribution is checked on all the dependent objects
+-- Table with integers
+CREATE TABLE xc_alter_table_3 (a int, b varchar(10)) DISTRIBUTE BY HASH(a);
+INSERT INTO xc_alter_table_3 VALUES (0, NULL);
+INSERT INTO xc_alter_table_3 VALUES (1, 'a');
+INSERT INTO xc_alter_table_3 VALUES (2, 'aa');
+INSERT INTO xc_alter_table_3 VALUES (3, 'aaa');
+INSERT INTO xc_alter_table_3 VALUES (4, 'aaaa');
+INSERT INTO xc_alter_table_3 VALUES (5, 'aaaaa');
+INSERT INTO xc_alter_table_3 VALUES (6, 'aaaaaa');
+INSERT INTO xc_alter_table_3 VALUES (7, 'aaaaaaa');
+INSERT INTO xc_alter_table_3 VALUES (8, 'aaaaaaaa');
+INSERT INTO xc_alter_table_3 VALUES (9, 'aaaaaaaaa');
+INSERT INTO xc_alter_table_3 VALUES (10, 'aaaaaaaaaa');
+-- Create some objects to check the effect of redistribution
+CREATE VIEW xc_alter_table_3_v AS SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3;
+CREATE RULE xc_alter_table_3_insert AS ON UPDATE TO xc_alter_table_3 WHERE OLD.a = 11 DO INSERT INTO xc_alter_table_3 VALUES (OLD.a + 1, 'nnn');
+PREPARE xc_alter_table_insert AS INSERT INTO xc_alter_table_3 VALUES ($1, $2);
+PREPARE xc_alter_table_delete AS DELETE FROM xc_alter_table_3 WHERE a = $1;
+PREPARE xc_alter_table_update AS UPDATE xc_alter_table_3 SET b = $2 WHERE a = $1;
+
+-- Now begin the tests
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+EXECUTE xc_alter_table_delete(12);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(b);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+EXECUTE xc_alter_table_delete(12);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY ROUND ROBIN;
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+EXECUTE xc_alter_table_delete(12);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY MODULO(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+EXECUTE xc_alter_table_delete(12);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY MODULO(b);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+EXECUTE xc_alter_table_delete(12);
+-- Index and redistribution
+CREATE INDEX xc_alter_table_3_index ON xc_alter_table_3(a);
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+EXECUTE xc_alter_table_insert(11, 'b');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_update(11, 'bb');
+SELECT b FROM xc_alter_table_3 WHERE a = 11;
+EXECUTE xc_alter_table_delete(11);
+SELECT b FROM xc_alter_table_3 WHERE a = 11 or a = 12;
+EXECUTE xc_alter_table_delete(12);
+-- Add column on table
+ALTER TABLE xc_alter_table_3 ADD COLUMN c int DEFAULT 4;
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY REPLICATION;
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3;
+SELECT * FROM xc_alter_table_3_v;
+-- Drop column on table
+ALTER TABLE xc_alter_table_3 DROP COLUMN b;
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3;
+SELECT * FROM xc_alter_table_3_v;
+-- Remanipulate table once again and distribute on old column
+ALTER TABLE xc_alter_table_3 DROP COLUMN c;
+ALTER TABLE xc_alter_table_3 ADD COLUMN b varchar(3) default 'aaa';
+ALTER TABLE xc_alter_table_3 DISTRIBUTE BY HASH(a);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+-- Change the node list
+SELECT alter_table_change_nodes('xc_alter_table_3', '{1}', 'to', NULL);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+-- Add some nodes on it
+SELECT alter_table_change_nodes('xc_alter_table_3', '{2,4,5}', 'add', NULL);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check in tuple presence
+SELECT * FROM xc_alter_table_3_v;
+-- Remove some nodes on it
+SELECT alter_table_change_nodes('xc_alter_table_3', '{3}', 'add', NULL);
+SELECT alter_table_change_nodes('xc_alter_table_3', '{2,3,5}', 'delete', NULL);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+-- Multiple operations with replication
+SELECT alter_table_change_nodes('xc_alter_table_3', '{1,3,4,5}', 'to', 'replication');
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+-- Manipulate number of nodes to include and remove nodes on a replicated table
+-- On removed nodes data is deleted and on new nodes data is added
+SELECT alter_table_change_nodes('xc_alter_table_3', '{2,3,5}', 'to', NULL);
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+-- Re-do a double operation with hash this time
+SELECT alter_table_change_nodes('xc_alter_table_3', '{2}', 'delete', 'hash(a)');
+SELECT count(*), sum(a), avg(a) FROM xc_alter_table_3; -- Check on tuple presence
+SELECT * FROM xc_alter_table_3_v;
+-- Error checks
+ALTER TABLE xc_alter_table_3 ADD COLUMN b int, DISTRIBUTE BY HASH(a);
+-- Clean up
+DROP TABLE xc_alter_table_3 CASCADE;
diff --git a/src/test/regress/sql/xc_create_function.sql b/src/test/regress/sql/xc_create_function.sql
index 1c8e2350eb..bd7ad3c8b8 100644
--- a/src/test/regress/sql/xc_create_function.sql
+++ b/src/test/regress/sql/xc_create_function.sql
@@ -45,6 +45,91 @@ begin
end;
$$;
+-- Add/Delete/change node list of a table
+CREATE OR REPLACE FUNCTION alter_table_change_nodes(tab_schema varchar, nodenums int[], command varchar, distribution varchar)
+RETURNS BOOLEAN LANGUAGE plpgsql as $$
+declare
+ cr_command varchar;
+ nodes varchar[];
+ nodename varchar;
+ nodenames_query varchar;
+ nodenames varchar;
+ sep varchar;
+ nodenum_new int[];
+ nodenum_res int[];
+ tmp_node int;
+ num_nodes int;
+ node int;
+ check_num boolean;
+ enforce_to boolean;
+BEGIN
+ -- Check the command type, only delete/add/to are allowed
+ IF command != 'delete' AND command != 'add' AND command != 'to' THEN
+ RETURN FALSE;
+ END IF;
+ nodenames_query := 'SELECT node_name FROM pgxc_node WHERE node_type = ''D''';
+ FOR nodename IN EXECUTE nodenames_query LOOP
+ nodes := array_append(nodes, nodename);
+ END LOOP;
+ nodenames := '(';
+ sep := '';
+ num_nodes := array_length(nodes, 1);
+ enforce_to := FALSE;
+
+ -- Adjust node array according to total number of nodes
+ FOREACH node IN ARRAY nodenums LOOP
+ tmp_node := node;
+ IF (node < 1 OR node > num_nodes) THEN
+ -- Enforce the usage of TO here, only safe method
+ enforce_to := TRUE;
+ tmp_node := node % num_nodes;
+ nodenum_new := array_append(nodenum_new, tmp_node);
+ END IF;
+ nodenum_new := array_append(nodenum_new, tmp_node);
+ END LOOP;
+ -- Eliminate duplicates
+ nodenum_res := array_append(nodenum_res, nodenum_new[1]);
+ FOREACH node IN ARRAY nodenum_new LOOP
+ check_num := TRUE;
+ FOREACH tmp_node IN ARRAY nodenum_res LOOP
+ IF (tmp_node = node) THEN
+ check_num := FALSE;
+ END IF;
+ END LOOP;
+ -- Fill in result array only if not replicated
+ IF check_num THEN
+ nodenum_res := array_append(nodenum_res, node);
+ END IF;
+ END LOOP;
+
+ -- If there is a unique Datanode in cluster, enforce the use of 'TO NODE'
+ -- This will avoid any consistency problems
+ IF (num_nodes = 1 OR enforce_to) THEN
+ command := 'TO';
+ END IF;
+
+ -- Finally build query
+ cr_command := 'ALTER TABLE ' || tab_schema || ' ' || command || ' NODE ';
+ FOREACH node IN ARRAY nodenum_res LOOP
+ IF (node > 0 AND node <= num_nodes) THEN
+ nodenames := nodenames || sep || nodes[node];
+ sep := ', ';
+ END IF;
+ END LOOP;
+ nodenames := nodenames || ')';
+ cr_command := cr_command || nodenames;
+
+ -- Add distribution if necessary
+ IF (distribution IS NOT NULL) then
+ cr_command := cr_command || ', DISTRIBUTE BY ' || distribution;
+ END IF;
+
+ -- Launch it
+ EXECUTE cr_command;
+ RETURN TRUE;
+END;
+$$;
+
-- A function to return data node name given a node number
CREATE OR REPLACE FUNCTION get_xc_node_name(node_num int) RETURNS varchar LANGUAGE plpgsql AS $$
DECLARE