summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael P2012-04-10 01:06:29 +0000
committerMichael P2012-04-10 01:07:03 +0000
commit79efd8faf958947b1509e814b4dc87faae28706e (patch)
tree83934d0f096b774c6dd978991a98a0f8105a2b8d
parentce001733b43f9e76e6554ffea21fb35385e34db4 (diff)
Addition of integer node identifier as part of tuple information
This mechanism permits to identify uniquely a tuple fetched from Datanode on Coordinator by adding to the tuple information a field called xc_node_id which is used as a unique node integer identifier. This node ID is calculated at node creation and added in tuple information as a kind of extension of ctid. It can be used in internal XC mechanisms. This commit also includes a fix for round robin tables using remote update and delete plans. This was an old issue that remained in the test xc_remote but there was no way until now to identify the node origin of a tuple with a method which is tuple-based. Original feature is from Abbas. The fix regarding round robin tables is from me. I also did some cleanup and documentation addition on the new column of pgxc_node.
-rw-r--r--contrib/hstore/hstore_io.c6
-rw-r--r--doc-xc/src/sgml/catalogs.sgmlin9
-rw-r--r--src/backend/access/common/heaptuple.c23
-rw-r--r--src/backend/access/heap/heapam.c25
-rw-r--r--src/backend/access/heap/tuptoaster.c7
-rw-r--r--src/backend/access/index/indexam.c8
-rw-r--r--src/backend/catalog/catalog.c1
-rw-r--r--src/backend/catalog/genbki.pl3
-rw-r--r--src/backend/catalog/heap.c17
-rw-r--r--src/backend/commands/trigger.c6
-rw-r--r--src/backend/executor/execMain.c3
-rw-r--r--src/backend/executor/execQual.c9
-rw-r--r--src/backend/executor/nodeForeignscan.c8
-rw-r--r--src/backend/executor/nodeModifyTable.c9
-rw-r--r--src/backend/executor/spi.c4
-rw-r--r--src/backend/optimizer/plan/createplan.c23
-rw-r--r--src/backend/pgxc/locator/locator.c14
-rw-r--r--src/backend/pgxc/nodemgr/nodemgr.c82
-rw-r--r--src/backend/postmaster/postmaster.c6
-rw-r--r--src/backend/rewrite/rewriteHandler.c38
-rw-r--r--src/backend/utils/adt/rowtypes.c18
-rw-r--r--src/backend/utils/adt/selfuncs.c5
-rw-r--r--src/backend/utils/cache/lsyscache.c28
-rw-r--r--src/backend/utils/cache/syscache.c11
-rw-r--r--src/backend/utils/mmgr/portalmem.c18
-rw-r--r--src/backend/utils/sort/tuplesort.c4
-rw-r--r--src/bin/pg_dump/pg_dump.c4
-rw-r--r--src/include/access/htup.h3
-rw-r--r--src/include/access/sysattr.h6
-rw-r--r--src/include/catalog/indexing.h5
-rw-r--r--src/include/catalog/pgxc_node.h8
-rw-r--r--src/include/pgxc/locator.h1
-rw-r--r--src/include/pgxc/pgxc.h1
-rw-r--r--src/include/utils/lsyscache.h1
-rw-r--r--src/include/utils/syscache.h1
-rw-r--r--src/pl/plpgsql/src/pl_exec.c9
-rw-r--r--src/test/regress/expected/xc_misc.out103
-rw-r--r--src/test/regress/expected/xc_node.out9
-rw-r--r--src/test/regress/expected/xc_remote.out63
-rw-r--r--src/test/regress/parallel_schedule1
-rw-r--r--src/test/regress/serial_schedule2
-rw-r--r--src/test/regress/sql/xc_misc.sql74
-rw-r--r--src/test/regress/sql/xc_node.sql3
-rw-r--r--src/test/regress/sql/xc_remote.sql31
44 files changed, 656 insertions, 54 deletions
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index 0d6f0b6b13..73312ab293 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -814,6 +814,9 @@ hstore_from_record(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple.t_xc_node_id = 0;
+#endif
tuple.t_data = rec;
values = (Datum *) palloc(ncolumns * sizeof(Datum));
@@ -960,6 +963,9 @@ hstore_populate_record(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple.t_xc_node_id = 0;
+#endif
tuple.t_data = rec;
}
diff --git a/doc-xc/src/sgml/catalogs.sgmlin b/doc-xc/src/sgml/catalogs.sgmlin
index 129188fe1f..cccddb4ece 100644
--- a/doc-xc/src/sgml/catalogs.sgmlin
+++ b/doc-xc/src/sgml/catalogs.sgmlin
@@ -6573,6 +6573,15 @@
Only a Datanode can be a preferred node.
</entry>
</row>
+
+ <row>
+ <entry><structfield>node_id</structfield></entry>
+ <entry><type>int4</type></entry>
+ <entry></entry>
+ <entry>Integer node identifier of node.
+ It is generated when node is created.
+ </entry>
+ </row>
</tbody>
</tgroup>
</table>
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index 5caae51be1..47f6a5a352 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -290,6 +290,9 @@ heap_attisnull(HeapTuple tup, int attnum)
case MinCommandIdAttributeNumber:
case MaxTransactionIdAttributeNumber:
case MaxCommandIdAttributeNumber:
+#ifdef PGXC
+ case XC_NodeIdAttributeNumber:
+#endif
/* these are never null */
break;
@@ -562,6 +565,11 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull)
case TableOidAttributeNumber:
result = ObjectIdGetDatum(tup->t_tableOid);
break;
+#ifdef PGXC
+ case XC_NodeIdAttributeNumber:
+ result = UInt32GetDatum(tup->t_xc_node_id);
+ break;
+#endif
default:
elog(ERROR, "invalid attnum: %d", attnum);
result = 0; /* keep compiler quiet */
@@ -591,6 +599,9 @@ heap_copytuple(HeapTuple tuple)
newTuple->t_len = tuple->t_len;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
+#ifdef PGXC
+ newTuple->t_xc_node_id = tuple->t_xc_node_id;
+#endif
newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE);
memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len);
return newTuple;
@@ -617,6 +628,9 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest)
dest->t_len = src->t_len;
dest->t_self = src->t_self;
dest->t_tableOid = src->t_tableOid;
+#ifdef PGXC
+ dest->t_xc_node_id = src->t_xc_node_id;
+#endif
dest->t_data = (HeapTupleHeader) palloc(src->t_len);
memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len);
}
@@ -706,6 +720,9 @@ heap_form_tuple(TupleDesc tupleDescriptor,
tuple->t_len = len;
ItemPointerSetInvalid(&(tuple->t_self));
tuple->t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple->t_xc_node_id = 0;
+#endif
HeapTupleHeaderSetDatumLength(td, len);
HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid);
@@ -825,6 +842,9 @@ heap_modify_tuple(HeapTuple tuple,
newTuple->t_data->t_ctid = tuple->t_data->t_ctid;
newTuple->t_self = tuple->t_self;
newTuple->t_tableOid = tuple->t_tableOid;
+#ifdef PGXC
+ newTuple->t_xc_node_id = tuple->t_xc_node_id;
+#endif
if (tupleDesc->tdhasoid)
HeapTupleSetOid(newTuple, HeapTupleGetOid(tuple));
@@ -1638,6 +1658,9 @@ heap_tuple_from_minimal_tuple(MinimalTuple mtup)
result->t_len = len;
ItemPointerSetInvalid(&(result->t_self));
result->t_tableOid = InvalidOid;
+#ifdef PGXC
+ result->t_xc_node_id = 0;
+#endif
result->t_data = (HeapTupleHeader) ((char *) result + HEAPTUPLESIZE);
memcpy((char *) result->t_data + MINIMAL_TUPLE_OFFSET, mtup, mtup->t_len);
memset(result->t_data, 0, offsetof(HeapTupleHeaderData, t_infomask2));
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 01a492e496..e32805716a 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -68,6 +68,9 @@
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
+#ifdef PGXC
+#include "pgxc/pgxc.h"
+#endif
/* GUC variable */
@@ -1232,6 +1235,10 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
/* we only need to set this up once */
scan->rs_ctup.t_tableOid = RelationGetRelid(relation);
+#ifdef PGXC
+ scan->rs_ctup.t_xc_node_id = PGXCNodeIdentifier;
+#endif
+
/*
* we do this here instead of in initscan() because heap_rescan also calls
* initscan() and we don't want to allocate memory again
@@ -1463,6 +1470,9 @@ heap_fetch(Relation relation,
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
+#ifdef PGXC
+ tuple->t_xc_node_id = PGXCNodeIdentifier;
+#endif
/*
* check time qualification of tuple, then release lock
@@ -1569,6 +1579,9 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
heapTuple.t_data = (HeapTupleHeader) PageGetItem(dp, lp);
heapTuple.t_len = ItemIdGetLength(lp);
heapTuple.t_tableOid = relation->rd_id;
+#ifdef PGXC
+ heapTuple.t_xc_node_id = PGXCNodeIdentifier;
+#endif
heapTuple.t_self = *tid;
/*
@@ -1892,6 +1905,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
HeapTupleHeaderSetCmin(tup->t_data, cid);
HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */
tup->t_tableOid = RelationGetRelid(relation);
+#ifdef PGXC
+ tup->t_xc_node_id = PGXCNodeIdentifier;
+#endif
/*
* If the new tuple is too big for storage or contains already toasted
@@ -2609,6 +2625,9 @@ l2:
HeapTupleHeaderSetCmin(newtup->t_data, cid);
HeapTupleHeaderSetXmax(newtup->t_data, 0); /* for cleanliness */
newtup->t_tableOid = RelationGetRelid(relation);
+#ifdef PGXC
+ newtup->t_xc_node_id = PGXCNodeIdentifier;
+#endif
/*
* Replace cid with a combo cid if necessary. Note that we already put
@@ -2933,6 +2952,9 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
if (attrnum < 0)
{
if (attrnum != ObjectIdAttributeNumber &&
+#ifdef PGXC
+ attrnum != XC_NodeIdAttributeNumber &&
+#endif
attrnum != TableOidAttributeNumber)
return false;
}
@@ -3143,6 +3165,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer,
tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp);
tuple->t_len = ItemIdGetLength(lp);
tuple->t_tableOid = RelationGetRelid(relation);
+#ifdef PGXC
+ tuple->t_xc_node_id = PGXCNodeIdentifier;
+#endif
l3:
result = HeapTupleSatisfiesUpdate(tuple->t_data, cid, *buffer);
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 4f4dd69291..c028e2fd88 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -881,6 +881,10 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
result_tuple->t_len = new_len;
result_tuple->t_self = newtup->t_self;
result_tuple->t_tableOid = newtup->t_tableOid;
+#ifdef PGXC
+ result_tuple->t_xc_node_id = newtup->t_xc_node_id;
+#endif
+
new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
result_tuple->t_data = new_data;
@@ -971,6 +975,9 @@ toast_flatten_tuple_attribute(Datum value,
tmptup.t_len = HeapTupleHeaderGetDatumLength(olddata);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tmptup.t_xc_node_id = 0;
+#endif
tmptup.t_data = olddata;
Assert(numAttrs <= MaxTupleAttributeNumber);
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 0208765964..badff24541 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -73,6 +73,10 @@
#include "utils/relcache.h"
#include "utils/snapmgr.h"
#include "utils/tqual.h"
+#ifdef PGXC
+#include "utils/lsyscache.h"
+#include "pgxc/pgxc.h"
+#endif
/* ----------------------------------------------------------------
@@ -575,6 +579,10 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
heapTuple->t_len = ItemIdGetLength(lp);
ItemPointerSetOffsetNumber(tid, offnum);
heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation);
+#ifdef PGXC
+ heapTuple->t_xc_node_id = PGXCNodeIdentifier;
+#endif
+
ctid = &heapTuple->t_data->t_ctid;
/*
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c
index 02335b0750..8fceb06ca7 100644
--- a/src/backend/catalog/catalog.c
+++ b/src/backend/catalog/catalog.c
@@ -451,6 +451,7 @@ IsSharedRelation(Oid relationId)
relationId == TablespaceNameIndexId ||
#ifdef PGXC
relationId == PgxcNodeNodeNameIndexId ||
+ relationId == PgxcNodeNodeIdIndexId ||
relationId == PgxcNodeOidIndexId ||
relationId == PgxcGroupGroupNameIndexId ||
relationId == PgxcGroupOidIndexId ||
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index 0aeaf5bfd7..3cedcbeb51 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -219,6 +219,9 @@ foreach my $catname ( @{ $catalogs->{names} } )
{xmax => 'xid'},
{cmax => 'cid'},
{tableoid => 'oid'}
+#PGXC_BEGIN
+ ,{xc_node_id => 'int4'}
+#PGXC_END
);
foreach my $attr (@SYS_ATTRS)
{
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 6c6b95aa3a..dc50c6b330 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -193,7 +193,24 @@ static FormData_pg_attribute a7 = {
true, 'p', 'i', true, false, false, true, 0
};
+#ifdef PGXC
+/*
+ * In XC we need some sort of node identification for each tuple
+ * We are adding another system column that would serve as node identifier.
+ * This is not only required by WHERE CURRENT OF but it can be used any
+ * where we want to know the originating Datanode of a tuple received
+ * at the Coordinator
+ */
+static FormData_pg_attribute a8 = {
+ 0, {"xc_node_id"}, INT4OID, 0, sizeof(int4),
+ XC_NodeIdAttributeNumber, 0, -1, -1,
+ true, 'p', 'i', true, false, false, true, 0
+};
+
+static const Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8};
+#else
static const Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+#endif
/*
* This function returns a Form_pg_attribute pointer for a system attribute.
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 7e30442580..012107a3da 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -54,6 +54,9 @@
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
+#ifdef PGXC
+#include "pgxc/pgxc.h"
+#endif
/* GUC variables */
@@ -2645,6 +2648,9 @@ ltrmark:;
tuple.t_len = ItemIdGetLength(lp);
tuple.t_self = *tid;
tuple.t_tableOid = RelationGetRelid(relation);
+#ifdef PGXC
+ tuple.t_xc_node_id = PGXCNodeIdentifier;
+#endif
}
result = heap_copytuple(&tuple);
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index d351ed98e7..0f7566164f 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -2111,6 +2111,9 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate)
tuple.t_len = HeapTupleHeaderGetDatumLength(td);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple.t_xc_node_id = 0;
+#endif
tuple.t_data = td;
/* copy and store tuple */
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index 80f08d8b92..7edc63e275 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -1099,6 +1099,9 @@ GetAttributeByNum(HeapTupleHeader tuple,
tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tmptup.t_xc_node_id = 0;
+#endif
tmptup.t_data = tuple;
result = heap_getattr(&tmptup,
@@ -1160,6 +1163,9 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull)
tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tmptup.t_xc_node_id = 0;
+#endif
tmptup.t_data = tuple;
result = heap_getattr(&tmptup,
@@ -3967,6 +3973,9 @@ ExecEvalFieldStore(FieldStoreState *fstate,
tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tmptup.t_xc_node_id = 0;
+#endif
tmptup.t_data = tuphdr;
heap_deform_tuple(&tmptup, tupDesc, values, isnull);
diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c
index d50489c7f4..622e6d427e 100644
--- a/src/backend/executor/nodeForeignscan.c
+++ b/src/backend/executor/nodeForeignscan.c
@@ -26,6 +26,11 @@
#include "executor/nodeForeignscan.h"
#include "foreign/fdwapi.h"
+#ifdef PGXC
+#include "utils/lsyscache.h"
+#include "pgxc/pgxc.h"
+#endif
+
static TupleTableSlot *ForeignNext(ForeignScanState *node);
static bool ForeignRecheck(ForeignScanState *node, TupleTableSlot *slot);
@@ -60,6 +65,9 @@ ForeignNext(ForeignScanState *node)
HeapTuple tup = ExecMaterializeSlot(slot);
tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation);
+#ifdef PGXC
+ tup->t_xc_node_id = PGXCNodeIdentifier;
+#endif
}
return slot;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 0cf693aec4..002e1d2991 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -351,6 +351,9 @@ ExecDelete(ItemPointer tupleid,
tuple.t_len = HeapTupleHeaderGetDatumLength(oldtuple);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple.t_xc_node_id = 0;
+#endif
dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, &tuple);
@@ -464,6 +467,9 @@ ldelete:;
deltuple.t_len = HeapTupleHeaderGetDatumLength(oldtuple);
ItemPointerSetInvalid(&(deltuple.t_self));
deltuple.t_tableOid = InvalidOid;
+#ifdef PGXC
+ deltuple.t_xc_node_id = 0;
+#endif
delbuffer = InvalidBuffer;
}
else
@@ -575,6 +581,9 @@ ExecUpdate(ItemPointer tupleid,
oldtup.t_len = HeapTupleHeaderGetDatumLength(oldtuple);
ItemPointerSetInvalid(&(oldtup.t_self));
oldtup.t_tableOid = InvalidOid;
+#ifdef PGXC
+ oldtup.t_xc_node_id = 0;
+#endif
slot = ExecIRUpdateTriggers(estate, resultRelInfo,
&oldtup, slot);
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index b5213aa5f9..4bfc6bc3c3 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -765,6 +765,10 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum,
mtuple->t_data->t_ctid = tuple->t_data->t_ctid;
mtuple->t_self = tuple->t_self;
mtuple->t_tableOid = tuple->t_tableOid;
+#ifdef PGXC
+ mtuple->t_xc_node_id = tuple->t_xc_node_id;
+#endif
+
if (rel->rd_att->tdhasoid)
HeapTupleSetOid(mtuple, HeapTupleGetOid(tuple));
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index e68d7cf270..5ab5c554a8 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -5798,16 +5798,21 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan)
else
appendStringInfoString(buf, "AND ");
- /* Nullify TLEs that are not from this relation */
- if (tle->resorigtbl != ttab->relid)
- appendStringInfo(buf, "$%d = $%d ",
- count,
- count);
- else
- appendStringInfo(buf, "%s = $%d ",
- quote_identifier(tle->resname),
- count);
+ Assert(IsA((Node *)tle->expr, Var));
+ if (IsA((Node *)tle->expr, Var))
+ {
+ Var *var = (Var *) tle->expr;
+ /* Nullify TLEs that are not from this relation */
+ if (var->varno != resultRelationIndex)
+ appendStringInfo(buf, "$%d = $%d ",
+ count,
+ count);
+ else
+ appendStringInfo(buf, "%s = $%d ",
+ quote_identifier(tle->resname),
+ count);
+ }
/* Associate type of parameter */
param_types[count - 1] = exprType((Node *) tle->expr);
diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c
index 92f6200df9..629b970fc7 100644
--- a/src/backend/pgxc/locator/locator.c
+++ b/src/backend/pgxc/locator/locator.c
@@ -837,7 +837,6 @@ RelationBuildLocator(Relation rel)
/*
* GetLocatorRelationInfo - Returns the locator information for relation,
* in a copy of the RelationLocatorInfo struct in relcache
- *
*/
RelationLocInfo *
GetRelationLocInfo(Oid relid)
@@ -855,6 +854,19 @@ GetRelationLocInfo(Oid relid)
}
/*
+ * Get the distribution type of relation.
+ */
+char
+GetRelationLocType(Oid relid)
+{
+ RelationLocInfo *locinfo = GetRelationLocInfo(relid);
+ if (!locinfo)
+ return LOCATOR_TYPE_NONE;
+
+ return locinfo->locatorType;
+}
+
+/*
* Copy the RelationLocInfo struct
*/
RelationLocInfo *
diff --git a/src/backend/pgxc/nodemgr/nodemgr.c b/src/backend/pgxc/nodemgr/nodemgr.c
index e6f83c3ef2..c46d0c60f1 100644
--- a/src/backend/pgxc/nodemgr/nodemgr.c
+++ b/src/backend/pgxc/nodemgr/nodemgr.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "miscadmin.h"
+#include "access/hash.h"
#include "access/heapam.h"
#include "catalog/catalog.h"
#include "catalog/indexing.h"
@@ -28,6 +29,14 @@
#include "pgxc/nodemgr.h"
#include "pgxc/pgxc.h"
+/*
+ * How many times should we try to find a unique indetifier
+ * in case hash of the node name comes out to be duplicate
+ */
+
+#define MAX_TRIES_FOR_NID 200
+
+static Datum generate_node_id(const char *node_name);
/*
* GUC parameters.
@@ -202,6 +211,70 @@ check_node_options(const char *node_name, List *options, char **node_host,
node_name)));
}
+/*
+ * generate_node_id
+ *
+ * Given a node name compute its hash to generate the identifier
+ * If the hash comes out to be duplicate , try some other values
+ * Give up after a few tries
+ */
+static Datum
+generate_node_id(const char *node_name)
+{
+ Datum node_id;
+ uint32 n;
+ bool inc;
+ int i;
+
+ /* Compute node identifier by computing hash of node name */
+ node_id = hash_any((unsigned char *)node_name, strlen(node_name));
+
+ /*
+ * Check if the hash is near the overflow limit, then we will
+ * decrement it , otherwise we will increment
+ */
+ inc = true;
+ n = DatumGetUInt32(node_id);
+ if (n >= UINT_MAX - MAX_TRIES_FOR_NID)
+ inc = false;
+
+ /*
+ * Check if the identifier is clashing with an existing one,
+ * and if it is try some other
+ */
+ for (i = 0; i < MAX_TRIES_FOR_NID; i++)
+ {
+ HeapTuple tup;
+
+ tup = SearchSysCache1(PGXCNODEIDENTIFIER, node_id);
+ if (tup == NULL)
+ break;
+
+ ReleaseSysCache(tup);
+
+ n = DatumGetUInt32(node_id);
+ if (inc)
+ n++;
+ else
+ n--;
+
+ node_id = UInt32GetDatum(n);
+ }
+
+ /*
+ * This has really few chances to happen, but inform backend that node
+ * has not been registered correctly in this case.
+ */
+ if (i >= MAX_TRIES_FOR_NID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("Please choose different node name."),
+ errdetail("Name \"%s\" produces a duplicate identifier node_name",
+ node_name)));
+
+ return node_id;
+}
+
/* --------------------------------
* cmp_nodes
*
@@ -422,6 +495,7 @@ PgxcNodeCreate(CreateNodeStmt *stmt)
int node_port = 0;
bool is_primary = false;
bool is_preferred = false;
+ Datum node_id;
/* Only a DB administrator can add nodes */
if (!superuser())
@@ -448,6 +522,9 @@ PgxcNodeCreate(CreateNodeStmt *stmt)
&node_port, &node_type,
&is_primary, &is_preferred);
+ /* Compute node identifier */
+ node_id = generate_node_id(node_name);
+
/*
* Then assign default values if necessary
* First for port.
@@ -489,6 +566,7 @@ PgxcNodeCreate(CreateNodeStmt *stmt)
values[Anum_pgxc_node_host - 1] = DirectFunctionCall1(namein, CStringGetDatum(node_host));
values[Anum_pgxc_node_is_primary - 1] = BoolGetDatum(is_primary);
values[Anum_pgxc_node_is_preferred - 1] = BoolGetDatum(is_preferred);
+ values[Anum_pgxc_node_id - 1] = node_id;
htup = heap_form_tuple(pgxcnodesrel->rd_att, values, nulls);
@@ -520,6 +598,7 @@ PgxcNodeAlter(AlterNodeStmt *stmt)
Datum new_record[Natts_pgxc_node];
bool new_record_nulls[Natts_pgxc_node];
bool new_record_repl[Natts_pgxc_node];
+ uint32 node_id;
/* Only a DB administrator can alter cluster nodes */
if (!superuser())
@@ -552,6 +631,7 @@ PgxcNodeAlter(AlterNodeStmt *stmt)
is_primary = is_pgxc_nodeprimary(nodeOid);
node_type = get_pgxc_nodetype(nodeOid);
node_type_old = node_type;
+ node_id = get_pgxc_node_id(nodeOid);
/* Filter options */
check_node_options(node_name, stmt->options, &node_host,
@@ -587,6 +667,8 @@ PgxcNodeAlter(AlterNodeStmt *stmt)
new_record_repl[Anum_pgxc_node_is_primary - 1] = true;
new_record[Anum_pgxc_node_is_preferred - 1] = BoolGetDatum(is_preferred);
new_record_repl[Anum_pgxc_node_is_preferred - 1] = true;
+ new_record[Anum_pgxc_node_id - 1] = UInt32GetDatum(node_id);
+ new_record_repl[Anum_pgxc_node_id - 1] = true;
/* Update relation */
newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel),
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index d6ddbe1981..1fcb2c05ac 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -339,6 +339,12 @@ static DNSServiceRef bonjour_sdref = NULL;
#ifdef PGXC
char *PGXCNodeName = NULL;
int PGXCNodeId = -1;
+/*
+ * When a particular node starts up, store the node identifier in this variable
+ * so that we dont have to calculate it OR do a search in cache any where else
+ * This will have minimal impact on performance
+ */
+uint32 PGXCNodeIdentifier = 0;
#endif
/*
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index 5be26fe5e7..1ce3063bc1 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -29,6 +29,7 @@
#include "commands/trigger.h"
#ifdef PGXC
+#include "pgxc/locator.h"
#include "pgxc/nodemgr.h"
#include "pgxc/pgxc.h"
#include "pgxc/postgresql_fdw.h"
@@ -1204,15 +1205,16 @@ rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte,
if (var->varattno < 1 || var->varattno > numattrs)
continue;
+ /* Bypass if this var does not use this relation */
+ if (var->varno != parsetree->resultRelation)
+ continue;
+
att_tup = target_relation->rd_att->attrs[var->varattno - 1];
tle = makeTargetEntry((Expr *) var,
list_length(parsetree->targetList) + 1,
pstrdup(NameStr(att_tup->attname)),
true);
- /* This is needed in remote planning to confirm that TLE is for this relation */
- tle->resorigtbl = RelationGetRelid(target_relation);
-
parsetree->targetList = lappend(parsetree->targetList, tle);
}
#endif
@@ -1249,12 +1251,34 @@ rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte,
pstrdup(attrname),
true);
+ parsetree->targetList = lappend(parsetree->targetList, tle);
+
#ifdef PGXC
- /* This is needed in remote planning to confirm that TLE is for this relation */
- tle->resorigtbl = RelationGetRelid(target_relation);
-#endif
+ /*
+ * If relation is non-replicated, we need also to identify the Datanode
+ * from where tuple is fetched.
+ */
+ if (IS_PGXC_COORDINATOR &&
+ !IsConnFromCoord() &&
+ !IsLocatorReplicated(GetRelationLocType(RelationGetRelid(target_relation))))
+ {
+ var = makeVar(parsetree->resultRelation,
+ XC_NodeIdAttributeNumber,
+ INT4OID,
+ -1,
+ InvalidOid,
+ 0);
- parsetree->targetList = lappend(parsetree->targetList, tle);
+ attrname = "xc_node_id";
+
+ tle = makeTargetEntry((Expr *) var,
+ list_length(parsetree->targetList) + 1,
+ pstrdup(attrname),
+ true);
+
+ parsetree->targetList = lappend(parsetree->targetList, tle);
+ }
+#endif
}
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index 919ed9582a..acfd063f37 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -318,6 +318,9 @@ record_out(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple.t_xc_node_id = 0;
+#endif
tuple.t_data = rec;
/*
@@ -661,6 +664,9 @@ record_send(PG_FUNCTION_ARGS)
tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
ItemPointerSetInvalid(&(tuple.t_self));
tuple.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple.t_xc_node_id = 0;
+#endif
tuple.t_data = rec;
/*
@@ -811,10 +817,16 @@ record_cmp(FunctionCallInfo fcinfo)
tuple1.t_len = HeapTupleHeaderGetDatumLength(record1);
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple1.t_xc_node_id = 0;
+#endif
tuple1.t_data = record1;
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple2.t_xc_node_id = 0;
+#endif
tuple2.t_data = record2;
/*
@@ -1046,10 +1058,16 @@ record_eq(PG_FUNCTION_ARGS)
tuple1.t_len = HeapTupleHeaderGetDatumLength(record1);
ItemPointerSetInvalid(&(tuple1.t_self));
tuple1.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple1.t_xc_node_id = 0;
+#endif
tuple1.t_data = record1;
tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
ItemPointerSetInvalid(&(tuple2.t_self));
tuple2.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple2.t_xc_node_id = 0;
+#endif
tuple2.t_data = record2;
/*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 00ba19ec6c..9c55f296d1 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -4360,6 +4360,11 @@ get_variable_numdistinct(VariableStatData *vardata)
case TableOidAttributeNumber:
stadistinct = 1.0; /* only 1 value */
break;
+#ifdef PGXC
+ case XC_NodeIdAttributeNumber:
+ stadistinct = 1.0; /* only 1 value */
+ break;
+#endif
default:
stadistinct = 0.0; /* means "unknown" */
break;
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index fa179c29c1..589c9ffe1d 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -2171,7 +2171,7 @@ get_pgxc_nodeoid(const char *nodename)
/*
* get_pgxc_nodename
- * Get node type for given Oid
+ * Get node name for given Oid
*/
char *
get_pgxc_nodename(Oid nodeid)
@@ -2192,6 +2192,32 @@ get_pgxc_nodename(Oid nodeid)
return result;
}
+ /*
+ * get_pgxc_node_id
+ * Get node identifier for a given Oid
+ */
+uint32
+get_pgxc_node_id(Oid nodeid)
+{
+ HeapTuple tuple;
+ Form_pgxc_node nodeForm;
+ uint32 result;
+
+ if (nodeid == InvalidOid)
+ return 0;
+
+ tuple = SearchSysCache1(PGXCNODEOID, ObjectIdGetDatum(nodeid));
+
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for node %u", nodeid);
+
+ nodeForm = (Form_pgxc_node) GETSTRUCT(tuple);
+ result = nodeForm->node_id;
+ ReleaseSysCache(tuple);
+
+ return result;
+}
+
/*
* get_pgxc_nodetype
* Get node type for given Oid
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 0abdb30977..31b98ff8bb 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -594,6 +594,17 @@ static const struct cachedesc cacheinfo[] = {
},
256
},
+ {PgxcNodeRelationId, /* PGXCNODEIDENTIFIER */
+ PgxcNodeNodeIdIndexId,
+ 1,
+ {
+ Anum_pgxc_node_id,
+ 0,
+ 0,
+ 0
+ },
+ 256
+ },
#endif
{ProcedureRelationId, /* PROCNAMEARGSNSP */
ProcedureNameArgsNspIndexId,
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index f3814c0186..0bae872589 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -25,6 +25,14 @@
#include "utils/builtins.h"
#include "utils/memutils.h"
+#ifdef PGXC
+#include "pgxc/pgxc.h"
+#include "access/hash.h"
+#include "catalog/pg_collation.h"
+#include "utils/formatting.h"
+#include "utils/lsyscache.h"
+#endif
+
/*
* Estimate of the maximum number of open portals a user would have,
* used in initially sizing the PortalHashTable in EnablePortalManager().
@@ -241,6 +249,16 @@ CreatePortal(const char *name, bool allowDup, bool dupSilent)
/* put portal in table (sets portal->name) */
PortalHashTableInsert(portal, name);
+#ifdef PGXC
+ if (PGXCNodeIdentifier == 0)
+ {
+ char *node_name;
+ node_name = str_tolower(PGXCNodeName, strlen(PGXCNodeName), DEFAULT_COLLATION_OID);
+ PGXCNodeIdentifier = get_pgxc_node_id(get_pgxc_nodeoid(node_name));
+ pfree(node_name);
+ }
+#endif
+
return portal;
}
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 646f0d05d1..b82dd5daae 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -3342,6 +3342,10 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup,
&tuple->t_self, sizeof(ItemPointerData));
/* We don't currently bother to reconstruct t_tableOid */
tuple->t_tableOid = InvalidOid;
+#ifdef PGXC
+ tuple->t_xc_node_id = 0;
+#endif
+
/* Read in the tuple body */
LogicalTapeReadExact(state->tapeset, tapenum,
tuple->t_data, tuple->t_len);
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index e9cbf80b05..db7da049e4 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -12628,6 +12628,10 @@ getAttrName(int attrnum, TableInfo *tblInfo)
return "cmax";
case TableOidAttributeNumber:
return "tableoid";
+#ifdef PGXC
+ case XC_NodeIdAttributeNumber:
+ return "xc_node_id";
+#endif
}
write_msg(NULL, "invalid column number %d for table \"%s\"\n",
attrnum, tblInfo->dobj.name);
diff --git a/src/include/access/htup.h b/src/include/access/htup.h
index c147707169..4d2586b304 100644
--- a/src/include/access/htup.h
+++ b/src/include/access/htup.h
@@ -518,6 +518,9 @@ typedef struct HeapTupleData
uint32 t_len; /* length of *t_data */
ItemPointerData t_self; /* SelfItemPointer */
Oid t_tableOid; /* table the tuple came from */
+#ifdef PGXC
+ uint32 t_xc_node_id; /* Data node the tuple came from */
+#endif
HeapTupleHeader t_data; /* -> tuple header and data */
} HeapTupleData;
diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h
index 1b3e64aaf8..e4b007b082 100644
--- a/src/include/access/sysattr.h
+++ b/src/include/access/sysattr.h
@@ -25,7 +25,13 @@
#define MaxTransactionIdAttributeNumber (-5)
#define MaxCommandIdAttributeNumber (-6)
#define TableOidAttributeNumber (-7)
+#ifdef PGXC
+#define XC_NodeIdAttributeNumber (-8)
+#define FirstLowInvalidHeapAttributeNumber (-9)
+#else
#define FirstLowInvalidHeapAttributeNumber (-8)
+#endif
+
#endif /* SYSATTR_H */
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index aa8157d513..98dda2c4e4 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -285,7 +285,7 @@ DECLARE_UNIQUE_INDEX(pg_user_mapping_user_server_index, 175, on pg_user_mapping
DECLARE_UNIQUE_INDEX(pgxc_class_pcrelid_index, 9002, on pgxc_class using btree(pcrelid oid_ops));
#define PgxcClassPgxcRelIdIndexId 9002
-DECLARE_UNIQUE_INDEX(pgxc_node_id_index, 9010, on pgxc_node using btree(oid oid_ops));
+DECLARE_UNIQUE_INDEX(pgxc_node_oid_index, 9010, on pgxc_node using btree(oid oid_ops));
#define PgxcNodeOidIndexId 9010
DECLARE_UNIQUE_INDEX(pgxc_node_name_index, 9011, on pgxc_node using btree(node_name name_ops));
@@ -297,6 +297,9 @@ DECLARE_UNIQUE_INDEX(pgxc_group_name_index, 9012, on pgxc_group using btree(grou
DECLARE_UNIQUE_INDEX(pgxc_group_oid, 9013, on pgxc_group using btree(oid oid_ops));
#define PgxcGroupOidIndexId 9013
+DECLARE_UNIQUE_INDEX(pgxc_node_id_index, 9003, on pgxc_node using btree(node_id int4_ops));
+#define PgxcNodeNodeIdIndexId 9003
+
#endif
DECLARE_UNIQUE_INDEX(pg_foreign_table_relid_index, 3119, on pg_foreign_table using btree(ftrelid oid_ops));
diff --git a/src/include/catalog/pgxc_node.h b/src/include/catalog/pgxc_node.h
index a8bdc7e989..4696afc2a9 100644
--- a/src/include/catalog/pgxc_node.h
+++ b/src/include/catalog/pgxc_node.h
@@ -52,11 +52,16 @@ CATALOG(pgxc_node,9015) BKI_SHARED_RELATION
* Is this node preferred
*/
bool nodeis_preferred;
+
+ /*
+ * Node identifier to be used at places where a fixed length node identification is required
+ */
+ int4 node_id;
} FormData_pgxc_node;
typedef FormData_pgxc_node *Form_pgxc_node;
-#define Natts_pgxc_node 6
+#define Natts_pgxc_node 7
#define Anum_pgxc_node_name 1
#define Anum_pgxc_node_type 2
@@ -64,6 +69,7 @@ typedef FormData_pgxc_node *Form_pgxc_node;
#define Anum_pgxc_node_host 4
#define Anum_pgxc_node_is_primary 5
#define Anum_pgxc_node_is_preferred 6
+#define Anum_pgxc_node_id 7
/* Possible types of nodes */
#define PGXC_NODE_COORDINATOR 'C'
diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h
index cd8ee7d1df..d35d24f205 100644
--- a/src/include/pgxc/locator.h
+++ b/src/include/pgxc/locator.h
@@ -100,6 +100,7 @@ extern char ConvertToLocatorType(int disttype);
extern char *GetRelationHashColumn(RelationLocInfo *rel_loc_info);
extern RelationLocInfo *GetRelationLocInfo(Oid relid);
extern RelationLocInfo *CopyRelationLocInfo(RelationLocInfo *src_info);
+extern char GetRelationLocType(Oid relid);
extern bool IsTableDistOnPrimary(RelationLocInfo *rel_loc_info);
extern ExecNodes *GetRelationNodes(RelationLocInfo *rel_loc_info, Datum valueForDistCol,
bool isValueNull, Oid typeOfValueForDistCol,
diff --git a/src/include/pgxc/pgxc.h b/src/include/pgxc/pgxc.h
index 3413653a61..1d1821c789 100644
--- a/src/include/pgxc/pgxc.h
+++ b/src/include/pgxc/pgxc.h
@@ -34,6 +34,7 @@ extern int remoteConnType;
/* Local node name and numer */
extern char *PGXCNodeName;
extern int PGXCNodeId;
+extern uint32 PGXCNodeIdentifier;
#define IS_PGXC_COORDINATOR isPGXCCoordinator
#define IS_PGXC_DATANODE isPGXCDataNode
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 3f70c4cd9b..aab8e0e4fc 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -136,6 +136,7 @@ extern Oid getBaseTypeAndTypmod(Oid typid, int32 *typmod);
extern char *get_typename(Oid typid);
extern char *get_pgxc_nodename(Oid nodeoid);
extern Oid get_pgxc_nodeoid(const char *nodename);
+extern uint32 get_pgxc_node_id(Oid nodeid);
extern char get_pgxc_nodetype(Oid nodeid);
extern int get_pgxc_nodeport(Oid nodeid);
extern char *get_pgxc_nodehost(Oid nodeid);
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 23fdf050cc..44f3ff0790 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -75,6 +75,7 @@ enum SysCacheIdentifier
PGXCGROUPOID,
PGXCNODENAME,
PGXCNODEOID,
+ PGXCNODEIDENTIFIER,
#endif
PROCNAMEARGSNSP,
PROCOID,
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 906a485853..a3db99e92c 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -281,6 +281,9 @@ plpgsql_exec_function(PLpgSQL_function *func, FunctionCallInfo fcinfo)
tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tmptup.t_xc_node_id = 0;
+#endif
tmptup.t_data = td;
exec_move_row(&estate, NULL, row, &tmptup, tupdesc);
ReleaseTupleDesc(tupdesc);
@@ -3768,6 +3771,9 @@ exec_assign_value(PLpgSQL_execstate *estate,
tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tmptup.t_xc_node_id = 0;
+#endif
tmptup.t_data = td;
exec_move_row(estate, NULL, row, &tmptup, tupdesc);
ReleaseTupleDesc(tupdesc);
@@ -3811,6 +3817,9 @@ exec_assign_value(PLpgSQL_execstate *estate,
tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
ItemPointerSetInvalid(&(tmptup.t_self));
tmptup.t_tableOid = InvalidOid;
+#ifdef PGXC
+ tmptup.t_xc_node_id = 0;
+#endif
tmptup.t_data = td;
exec_move_row(estate, rec, NULL, &tmptup, tupdesc);
ReleaseTupleDesc(tupdesc);
diff --git a/src/test/regress/expected/xc_misc.out b/src/test/regress/expected/xc_misc.out
new file mode 100644
index 0000000000..3e56f3913b
--- /dev/null
+++ b/src/test/regress/expected/xc_misc.out
@@ -0,0 +1,103 @@
+-- A function to create table on specified nodes
+create or replace function cr_table(tab_schema varchar, nodenums int[], distribution varchar) returns void language plpgsql as $$
+declare
+ cr_command varchar;
+ nodes varchar[];
+ nodename varchar;
+ nodenames_query varchar;
+ nodenames varchar;
+ node int;
+ sep varchar;
+ tmp_node int;
+ num_nodes int;
+begin
+ nodenames_query := 'SELECT node_name FROM pgxc_node WHERE node_type = ''D''';
+ cr_command := 'CREATE TABLE ' || tab_schema || ' DISTRIBUTE BY ' || distribution || ' TO NODE ';
+ for nodename in execute nodenames_query loop
+ nodes := array_append(nodes, nodename);
+ end loop;
+ nodenames := '';
+ sep := '';
+ num_nodes := array_length(nodes, 1);
+ foreach node in array nodenums loop
+ tmp_node := node;
+ if (tmp_node < 1 or tmp_node > num_nodes) then
+ tmp_node := tmp_node % num_nodes;
+ if (tmp_node < 1) then
+ tmp_node := num_nodes;
+ end if;
+ end if;
+ nodenames := nodenames || sep || nodes[tmp_node];
+ sep := ', ';
+ end loop;
+ cr_command := cr_command || nodenames;
+ execute cr_command;
+end;
+$$;
+-- A function to return a unified data node name given a node identifer
+create or replace function get_unified_node_name(node_ident int) returns varchar language plpgsql as $$
+declare
+ r pgxc_node%rowtype;
+ node int;
+ nodenames_query varchar;
+begin
+ nodenames_query := 'SELECT * FROM pgxc_node WHERE node_type = ''D'' ORDER BY xc_node_id';
+
+ node := 1;
+ for r in execute nodenames_query loop
+ if r.node_id = node_ident THEN
+ RETURN 'NODE_' || node;
+ end if;
+ node := node + 1;
+ end loop;
+ RETURN 'NODE_?';
+end;
+$$;
+-- Test the system column added by XC called xc_node_id, used to find which tuples belong to which data node
+select cr_table('t1(a int, b int)', '{1, 2}'::int[], 'modulo(a)');
+ cr_table
+----------
+
+(1 row)
+
+insert into t1 values(1,11),(2,11),(3,11),(4,22),(5,22),(6,33),(7,44),(8,44);
+select get_unified_node_name(xc_node_id),* from t1 order by a;
+ get_unified_node_name | a | b
+-----------------------+---+----
+ NODE_2 | 1 | 11
+ NODE_1 | 2 | 11
+ NODE_2 | 3 | 11
+ NODE_1 | 4 | 22
+ NODE_2 | 5 | 22
+ NODE_1 | 6 | 33
+ NODE_2 | 7 | 44
+ NODE_1 | 8 | 44
+(8 rows)
+
+select get_unified_node_name(xc_node_id),* from t1 where xc_node_id > 0 order by a;
+ get_unified_node_name | a | b
+-----------------------+---+----
+ NODE_2 | 1 | 11
+ NODE_2 | 3 | 11
+ NODE_2 | 5 | 22
+ NODE_2 | 7 | 44
+(4 rows)
+
+select get_unified_node_name(xc_node_id),* from t1 order by xc_node_id;
+ get_unified_node_name | a | b
+-----------------------+---+----
+ NODE_1 | 2 | 11
+ NODE_1 | 4 | 22
+ NODE_1 | 6 | 33
+ NODE_1 | 8 | 44
+ NODE_2 | 1 | 11
+ NODE_2 | 3 | 11
+ NODE_2 | 5 | 22
+ NODE_2 | 7 | 44
+(8 rows)
+
+create table t2(a int , xc_node_id int) distribute by modulo(a);
+ERROR: column name "xc_node_id" conflicts with a system column name
+create table t2(a int , b int) distribute by modulo(xc_node_id);
+ERROR: Column xc_node_id is not modulo distributable data type
+drop table t1;
diff --git a/src/test/regress/expected/xc_node.out b/src/test/regress/expected/xc_node.out
index a239cf7f24..351fa00be9 100644
--- a/src/test/regress/expected/xc_node.out
+++ b/src/test/regress/expected/xc_node.out
@@ -14,6 +14,15 @@ ORDER BY 1;
dummy_node_datanode | D | 5432 | localhost
(2 rows)
+-- test to make sure that node_id is generated correctly for the added nodes
+select hashname(node_name) = node_id from pgxc_node
+WHERE node_name IN ('dummy_node_coordinator', 'dummy_node_datanode');
+ ?column?
+----------
+ t
+ t
+(2 rows)
+
-- Some modifications
ALTER NODE dummy_node_coordinator WITH (PORT = 5466, HOST = 'target_host_1');
ALTER NODE dummy_node_datanode WITH (PORT = 5689, HOST = 'target_host_2', PREFERRED);
diff --git a/src/test/regress/expected/xc_remote.out b/src/test/regress/expected/xc_remote.out
index 9335d06548..481d89dfe4 100644
--- a/src/test/regress/expected/xc_remote.out
+++ b/src/test/regress/expected/xc_remote.out
@@ -204,28 +204,57 @@ SELECT a FROM rel_rep WHERE c = true ORDER BY 1;
DROP SEQUENCE seqtest3;
-- UPDATE cases for round robin table
-- Plain cases change it completely
--- PGXCTODO: For Round robin tuple selection is still incorrect
--- It is necessary to incorporate all the columns in qual list
--- to insure SQL consistency
CREATE TABLE rel_rr (a int, b timestamp DEFAULT NULL, c boolean DEFAULT NULL) DISTRIBUTE BY ROUND ROBIN;
CREATE SEQUENCE seqtest4 START 1;
INSERT INTO rel_rr VALUES (1),(2),(3),(4),(5);
--- UPDATE rel_rr SET a = nextval('seqtest4'), b = now(), c = false;
--- SELECT a FROM rel_rr ORDER BY 1,2;
+UPDATE rel_rr SET a = nextval('seqtest4'), b = now(), c = false;
+SELECT a FROM rel_rr ORDER BY 1;
+ a
+---
+ 1
+ 2
+ 3
+ 4
+ 5
+(5 rows)
+
-- Non-Coordinator quals
--- UPDATE rel_rr SET b = now(), c = true WHERE a < func_volatile(2);
--- SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
--- UPDATE rel_rr SET c = false;
--- UPDATE rel_rr SET b = now(), c = true WHERE a < func_stable(3);
--- SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
--- UPDATE rel_rr SET c = false WHERE c = true;
--- UPDATE rel_rr SET b = now(), c = true WHERE a < func_immutable(4);
--- SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
--- UPDATE rel_rr SET c = false;
+UPDATE rel_rr SET b = now(), c = true WHERE a < func_volatile(2);
+SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
+ a
+---
+ 1
+(1 row)
+
+UPDATE rel_rr SET c = false;
+UPDATE rel_rr SET b = now(), c = true WHERE a < func_stable(3);
+SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
+ a
+---
+ 1
+ 2
+(2 rows)
+
+UPDATE rel_rr SET c = false WHERE c = true;
+UPDATE rel_rr SET b = now(), c = true WHERE a < func_immutable(4);
+SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
+ a
+---
+ 1
+ 2
+ 3
+(3 rows)
+
+UPDATE rel_rr SET c = false;
-- Coordinator qual
--- UPDATE rel_rr SET b = now(), c = true WHERE a < currval('seqtest4') - 3 AND b < now();
--- SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
--- DROP SEQUENCE seqtest4;
+UPDATE rel_rr SET b = now(), c = true WHERE a < currval('seqtest4') - 3 AND b < now();
+SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
+ a
+---
+ 1
+(1 row)
+
+DROP SEQUENCE seqtest4;
-- UPDATE cases for hash table
-- Hash tables cannot be updated on distribution keys so insert fresh rows
CREATE TABLE rel_hash (a int, b timestamp DEFAULT now(), c boolean DEFAULT false) DISTRIBUTE BY HASH(a);
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 50a912f3d1..79fdaa1693 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -108,3 +108,4 @@ test: xc_groupby xc_distkey xc_having xc_temp xc_remote xc_FQS xc_FQS_join
#Cluster setting related test is independant
test: xc_node
+test: xc_misc
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 6d9695341a..a909ab72a9 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -137,3 +137,5 @@ test: xc_remote
test: xc_node
test: xc_FQS
test: xc_FQS_join
+test: xc_misc
+
diff --git a/src/test/regress/sql/xc_misc.sql b/src/test/regress/sql/xc_misc.sql
new file mode 100644
index 0000000000..84f97d569d
--- /dev/null
+++ b/src/test/regress/sql/xc_misc.sql
@@ -0,0 +1,74 @@
+-- A function to create table on specified nodes
+create or replace function cr_table(tab_schema varchar, nodenums int[], distribution varchar) returns void language plpgsql as $$
+declare
+ cr_command varchar;
+ nodes varchar[];
+ nodename varchar;
+ nodenames_query varchar;
+ nodenames varchar;
+ node int;
+ sep varchar;
+ tmp_node int;
+ num_nodes int;
+begin
+ nodenames_query := 'SELECT node_name FROM pgxc_node WHERE node_type = ''D''';
+ cr_command := 'CREATE TABLE ' || tab_schema || ' DISTRIBUTE BY ' || distribution || ' TO NODE ';
+ for nodename in execute nodenames_query loop
+ nodes := array_append(nodes, nodename);
+ end loop;
+ nodenames := '';
+ sep := '';
+ num_nodes := array_length(nodes, 1);
+ foreach node in array nodenums loop
+ tmp_node := node;
+ if (tmp_node < 1 or tmp_node > num_nodes) then
+ tmp_node := tmp_node % num_nodes;
+ if (tmp_node < 1) then
+ tmp_node := num_nodes;
+ end if;
+ end if;
+ nodenames := nodenames || sep || nodes[tmp_node];
+ sep := ', ';
+ end loop;
+ cr_command := cr_command || nodenames;
+ execute cr_command;
+end;
+$$;
+
+-- A function to return a unified data node name given a node identifer
+create or replace function get_unified_node_name(node_ident int) returns varchar language plpgsql as $$
+declare
+ r pgxc_node%rowtype;
+ node int;
+ nodenames_query varchar;
+begin
+ nodenames_query := 'SELECT * FROM pgxc_node WHERE node_type = ''D'' ORDER BY xc_node_id';
+
+ node := 1;
+ for r in execute nodenames_query loop
+ if r.node_id = node_ident THEN
+ RETURN 'NODE_' || node;
+ end if;
+ node := node + 1;
+ end loop;
+ RETURN 'NODE_?';
+end;
+$$;
+
+-- Test the system column added by XC called xc_node_id, used to find which tuples belong to which data node
+
+select cr_table('t1(a int, b int)', '{1, 2}'::int[], 'modulo(a)');
+insert into t1 values(1,11),(2,11),(3,11),(4,22),(5,22),(6,33),(7,44),(8,44);
+
+select get_unified_node_name(xc_node_id),* from t1 order by a;
+
+select get_unified_node_name(xc_node_id),* from t1 where xc_node_id > 0 order by a;
+
+select get_unified_node_name(xc_node_id),* from t1 order by xc_node_id;
+
+create table t2(a int , xc_node_id int) distribute by modulo(a);
+
+create table t2(a int , b int) distribute by modulo(xc_node_id);
+
+drop table t1;
+
diff --git a/src/test/regress/sql/xc_node.sql b/src/test/regress/sql/xc_node.sql
index c51a7e9753..18d377008f 100644
--- a/src/test/regress/sql/xc_node.sql
+++ b/src/test/regress/sql/xc_node.sql
@@ -10,6 +10,9 @@ CREATE NODE dummy_node_datanode WITH (TYPE = 'datanode');
SELECT node_name, node_type, node_port, node_host FROM pgxc_node
WHERE node_name IN ('dummy_node_coordinator', 'dummy_node_datanode')
ORDER BY 1;
+-- test to make sure that node_id is generated correctly for the added nodes
+select hashname(node_name) = node_id from pgxc_node
+WHERE node_name IN ('dummy_node_coordinator', 'dummy_node_datanode');
-- Some modifications
ALTER NODE dummy_node_coordinator WITH (PORT = 5466, HOST = 'target_host_1');
ALTER NODE dummy_node_datanode WITH (PORT = 5689, HOST = 'target_host_2', PREFERRED);
diff --git a/src/test/regress/sql/xc_remote.sql b/src/test/regress/sql/xc_remote.sql
index b7a27ad3b2..0e02d98f81 100644
--- a/src/test/regress/sql/xc_remote.sql
+++ b/src/test/regress/sql/xc_remote.sql
@@ -83,28 +83,25 @@ DROP SEQUENCE seqtest3;
-- UPDATE cases for round robin table
-- Plain cases change it completely
--- PGXCTODO: For Round robin tuple selection is still incorrect
--- It is necessary to incorporate all the columns in qual list
--- to insure SQL consistency
CREATE TABLE rel_rr (a int, b timestamp DEFAULT NULL, c boolean DEFAULT NULL) DISTRIBUTE BY ROUND ROBIN;
CREATE SEQUENCE seqtest4 START 1;
INSERT INTO rel_rr VALUES (1),(2),(3),(4),(5);
--- UPDATE rel_rr SET a = nextval('seqtest4'), b = now(), c = false;
--- SELECT a FROM rel_rr ORDER BY 1,2;
+UPDATE rel_rr SET a = nextval('seqtest4'), b = now(), c = false;
+SELECT a FROM rel_rr ORDER BY 1;
-- Non-Coordinator quals
--- UPDATE rel_rr SET b = now(), c = true WHERE a < func_volatile(2);
--- SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
--- UPDATE rel_rr SET c = false;
--- UPDATE rel_rr SET b = now(), c = true WHERE a < func_stable(3);
--- SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
--- UPDATE rel_rr SET c = false WHERE c = true;
--- UPDATE rel_rr SET b = now(), c = true WHERE a < func_immutable(4);
--- SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
--- UPDATE rel_rr SET c = false;
+UPDATE rel_rr SET b = now(), c = true WHERE a < func_volatile(2);
+SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
+UPDATE rel_rr SET c = false;
+UPDATE rel_rr SET b = now(), c = true WHERE a < func_stable(3);
+SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
+UPDATE rel_rr SET c = false WHERE c = true;
+UPDATE rel_rr SET b = now(), c = true WHERE a < func_immutable(4);
+SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
+UPDATE rel_rr SET c = false;
-- Coordinator qual
--- UPDATE rel_rr SET b = now(), c = true WHERE a < currval('seqtest4') - 3 AND b < now();
--- SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
--- DROP SEQUENCE seqtest4;
+UPDATE rel_rr SET b = now(), c = true WHERE a < currval('seqtest4') - 3 AND b < now();
+SELECT a FROM rel_rr WHERE c = true ORDER BY 1;
+DROP SEQUENCE seqtest4;
-- UPDATE cases for hash table
-- Hash tables cannot be updated on distribution keys so insert fresh rows