diff options
author | Michael P | 2012-04-10 01:06:29 +0000 |
---|---|---|
committer | Michael P | 2012-04-10 01:07:03 +0000 |
commit | 79efd8faf958947b1509e814b4dc87faae28706e (patch) | |
tree | 83934d0f096b774c6dd978991a98a0f8105a2b8d | |
parent | ce001733b43f9e76e6554ffea21fb35385e34db4 (diff) |
Addition of integer node identifier as part of tuple information
This mechanism permits to identify uniquely a tuple fetched from
Datanode on Coordinator by adding to the tuple information a field
called xc_node_id which is used as a unique node integer identifier.
This node ID is calculated at node creation and added in tuple information
as a kind of extension of ctid. It can be used in internal XC mechanisms.
This commit also includes a fix for round robin tables using remote update
and delete plans. This was an old issue that remained in the test xc_remote but
there was no way until now to identify the node origin of a tuple with a method
which is tuple-based.
Original feature is from Abbas.
The fix regarding round robin tables is from me. I also did some cleanup and
documentation addition on the new column of pgxc_node.
44 files changed, 656 insertions, 54 deletions
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index 0d6f0b6b13..73312ab293 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -814,6 +814,9 @@ hstore_from_record(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; +#ifdef PGXC + tuple.t_xc_node_id = 0; +#endif tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); @@ -960,6 +963,9 @@ hstore_populate_record(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; +#ifdef PGXC + tuple.t_xc_node_id = 0; +#endif tuple.t_data = rec; } diff --git a/doc-xc/src/sgml/catalogs.sgmlin b/doc-xc/src/sgml/catalogs.sgmlin index 129188fe1f..cccddb4ece 100644 --- a/doc-xc/src/sgml/catalogs.sgmlin +++ b/doc-xc/src/sgml/catalogs.sgmlin @@ -6573,6 +6573,15 @@ Only a Datanode can be a preferred node. </entry> </row> + + <row> + <entry><structfield>node_id</structfield></entry> + <entry><type>int4</type></entry> + <entry></entry> + <entry>Integer node identifier of node. + It is generated when node is created. + </entry> + </row> </tbody> </tgroup> </table> diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 5caae51be1..47f6a5a352 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -290,6 +290,9 @@ heap_attisnull(HeapTuple tup, int attnum) case MinCommandIdAttributeNumber: case MaxTransactionIdAttributeNumber: case MaxCommandIdAttributeNumber: +#ifdef PGXC + case XC_NodeIdAttributeNumber: +#endif /* these are never null */ break; @@ -562,6 +565,11 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) case TableOidAttributeNumber: result = ObjectIdGetDatum(tup->t_tableOid); break; +#ifdef PGXC + case XC_NodeIdAttributeNumber: + result = UInt32GetDatum(tup->t_xc_node_id); + break; +#endif default: elog(ERROR, "invalid attnum: %d", attnum); result = 0; /* keep compiler quiet */ @@ -591,6 +599,9 @@ heap_copytuple(HeapTuple tuple) newTuple->t_len = tuple->t_len; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; +#ifdef PGXC + newTuple->t_xc_node_id = tuple->t_xc_node_id; +#endif newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE); memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len); return newTuple; @@ -617,6 +628,9 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest) dest->t_len = src->t_len; dest->t_self = src->t_self; dest->t_tableOid = src->t_tableOid; +#ifdef PGXC + dest->t_xc_node_id = src->t_xc_node_id; +#endif dest->t_data = (HeapTupleHeader) palloc(src->t_len); memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len); } @@ -706,6 +720,9 @@ heap_form_tuple(TupleDesc tupleDescriptor, tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); tuple->t_tableOid = InvalidOid; +#ifdef PGXC + tuple->t_xc_node_id = 0; +#endif HeapTupleHeaderSetDatumLength(td, len); HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid); @@ -825,6 +842,9 @@ heap_modify_tuple(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; +#ifdef PGXC + newTuple->t_xc_node_id = tuple->t_xc_node_id; +#endif if (tupleDesc->tdhasoid) HeapTupleSetOid(newTuple, HeapTupleGetOid(tuple)); @@ -1638,6 +1658,9 @@ heap_tuple_from_minimal_tuple(MinimalTuple mtup) result->t_len = len; ItemPointerSetInvalid(&(result->t_self)); result->t_tableOid = InvalidOid; +#ifdef PGXC + result->t_xc_node_id = 0; +#endif result->t_data = (HeapTupleHeader) ((char *) result + HEAPTUPLESIZE); memcpy((char *) result->t_data + MINIMAL_TUPLE_OFFSET, mtup, mtup->t_len); memset(result->t_data, 0, offsetof(HeapTupleHeaderData, t_infomask2)); diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 01a492e496..e32805716a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -68,6 +68,9 @@ #include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" +#ifdef PGXC +#include "pgxc/pgxc.h" +#endif /* GUC variable */ @@ -1232,6 +1235,10 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, /* we only need to set this up once */ scan->rs_ctup.t_tableOid = RelationGetRelid(relation); +#ifdef PGXC + scan->rs_ctup.t_xc_node_id = PGXCNodeIdentifier; +#endif + /* * we do this here instead of in initscan() because heap_rescan also calls * initscan() and we don't want to allocate memory again @@ -1463,6 +1470,9 @@ heap_fetch(Relation relation, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); +#ifdef PGXC + tuple->t_xc_node_id = PGXCNodeIdentifier; +#endif /* * check time qualification of tuple, then release lock @@ -1569,6 +1579,9 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, heapTuple.t_data = (HeapTupleHeader) PageGetItem(dp, lp); heapTuple.t_len = ItemIdGetLength(lp); heapTuple.t_tableOid = relation->rd_id; +#ifdef PGXC + heapTuple.t_xc_node_id = PGXCNodeIdentifier; +#endif heapTuple.t_self = *tid; /* @@ -1892,6 +1905,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, HeapTupleHeaderSetCmin(tup->t_data, cid); HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); +#ifdef PGXC + tup->t_xc_node_id = PGXCNodeIdentifier; +#endif /* * If the new tuple is too big for storage or contains already toasted @@ -2609,6 +2625,9 @@ l2: HeapTupleHeaderSetCmin(newtup->t_data, cid); HeapTupleHeaderSetXmax(newtup->t_data, 0); /* for cleanliness */ newtup->t_tableOid = RelationGetRelid(relation); +#ifdef PGXC + newtup->t_xc_node_id = PGXCNodeIdentifier; +#endif /* * Replace cid with a combo cid if necessary. Note that we already put @@ -2933,6 +2952,9 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum, if (attrnum < 0) { if (attrnum != ObjectIdAttributeNumber && +#ifdef PGXC + attrnum != XC_NodeIdAttributeNumber && +#endif attrnum != TableOidAttributeNumber) return false; } @@ -3143,6 +3165,9 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); +#ifdef PGXC + tuple->t_xc_node_id = PGXCNodeIdentifier; +#endif l3: result = HeapTupleSatisfiesUpdate(tuple->t_data, cid, *buffer); diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c index 4f4dd69291..c028e2fd88 100644 --- a/src/backend/access/heap/tuptoaster.c +++ b/src/backend/access/heap/tuptoaster.c @@ -881,6 +881,10 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, result_tuple->t_len = new_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; +#ifdef PGXC + result_tuple->t_xc_node_id = newtup->t_xc_node_id; +#endif + new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; @@ -971,6 +975,9 @@ toast_flatten_tuple_attribute(Datum value, tmptup.t_len = HeapTupleHeaderGetDatumLength(olddata); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; +#ifdef PGXC + tmptup.t_xc_node_id = 0; +#endif tmptup.t_data = olddata; Assert(numAttrs <= MaxTupleAttributeNumber); diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 0208765964..badff24541 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -73,6 +73,10 @@ #include "utils/relcache.h" #include "utils/snapmgr.h" #include "utils/tqual.h" +#ifdef PGXC +#include "utils/lsyscache.h" +#include "pgxc/pgxc.h" +#endif /* ---------------------------------------------------------------- @@ -575,6 +579,10 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) heapTuple->t_len = ItemIdGetLength(lp); ItemPointerSetOffsetNumber(tid, offnum); heapTuple->t_tableOid = RelationGetRelid(scan->heapRelation); +#ifdef PGXC + heapTuple->t_xc_node_id = PGXCNodeIdentifier; +#endif + ctid = &heapTuple->t_data->t_ctid; /* diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index 02335b0750..8fceb06ca7 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -451,6 +451,7 @@ IsSharedRelation(Oid relationId) relationId == TablespaceNameIndexId || #ifdef PGXC relationId == PgxcNodeNodeNameIndexId || + relationId == PgxcNodeNodeIdIndexId || relationId == PgxcNodeOidIndexId || relationId == PgxcGroupGroupNameIndexId || relationId == PgxcGroupOidIndexId || diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index 0aeaf5bfd7..3cedcbeb51 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -219,6 +219,9 @@ foreach my $catname ( @{ $catalogs->{names} } ) {xmax => 'xid'}, {cmax => 'cid'}, {tableoid => 'oid'} +#PGXC_BEGIN + ,{xc_node_id => 'int4'} +#PGXC_END ); foreach my $attr (@SYS_ATTRS) { diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 6c6b95aa3a..dc50c6b330 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -193,7 +193,24 @@ static FormData_pg_attribute a7 = { true, 'p', 'i', true, false, false, true, 0 }; +#ifdef PGXC +/* + * In XC we need some sort of node identification for each tuple + * We are adding another system column that would serve as node identifier. + * This is not only required by WHERE CURRENT OF but it can be used any + * where we want to know the originating Datanode of a tuple received + * at the Coordinator + */ +static FormData_pg_attribute a8 = { + 0, {"xc_node_id"}, INT4OID, 0, sizeof(int4), + XC_NodeIdAttributeNumber, 0, -1, -1, + true, 'p', 'i', true, false, false, true, 0 +}; + +static const Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7, &a8}; +#else static const Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7}; +#endif /* * This function returns a Form_pg_attribute pointer for a system attribute. diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 7e30442580..012107a3da 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -54,6 +54,9 @@ #include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" +#ifdef PGXC +#include "pgxc/pgxc.h" +#endif /* GUC variables */ @@ -2645,6 +2648,9 @@ ltrmark:; tuple.t_len = ItemIdGetLength(lp); tuple.t_self = *tid; tuple.t_tableOid = RelationGetRelid(relation); +#ifdef PGXC + tuple.t_xc_node_id = PGXCNodeIdentifier; +#endif } result = heap_copytuple(&tuple); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index d351ed98e7..0f7566164f 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -2111,6 +2111,9 @@ EvalPlanQualFetchRowMarks(EPQState *epqstate) tuple.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; +#ifdef PGXC + tuple.t_xc_node_id = 0; +#endif tuple.t_data = td; /* copy and store tuple */ diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 80f08d8b92..7edc63e275 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -1099,6 +1099,9 @@ GetAttributeByNum(HeapTupleHeader tuple, tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; +#ifdef PGXC + tmptup.t_xc_node_id = 0; +#endif tmptup.t_data = tuple; result = heap_getattr(&tmptup, @@ -1160,6 +1163,9 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull) tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; +#ifdef PGXC + tmptup.t_xc_node_id = 0; +#endif tmptup.t_data = tuple; result = heap_getattr(&tmptup, @@ -3967,6 +3973,9 @@ ExecEvalFieldStore(FieldStoreState *fstate, tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; +#ifdef PGXC + tmptup.t_xc_node_id = 0; +#endif tmptup.t_data = tuphdr; heap_deform_tuple(&tmptup, tupDesc, values, isnull); diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c index d50489c7f4..622e6d427e 100644 --- a/src/backend/executor/nodeForeignscan.c +++ b/src/backend/executor/nodeForeignscan.c @@ -26,6 +26,11 @@ #include "executor/nodeForeignscan.h" #include "foreign/fdwapi.h" +#ifdef PGXC +#include "utils/lsyscache.h" +#include "pgxc/pgxc.h" +#endif + static TupleTableSlot *ForeignNext(ForeignScanState *node); static bool ForeignRecheck(ForeignScanState *node, TupleTableSlot *slot); @@ -60,6 +65,9 @@ ForeignNext(ForeignScanState *node) HeapTuple tup = ExecMaterializeSlot(slot); tup->t_tableOid = RelationGetRelid(node->ss.ss_currentRelation); +#ifdef PGXC + tup->t_xc_node_id = PGXCNodeIdentifier; +#endif } return slot; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 0cf693aec4..002e1d2991 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -351,6 +351,9 @@ ExecDelete(ItemPointer tupleid, tuple.t_len = HeapTupleHeaderGetDatumLength(oldtuple); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; +#ifdef PGXC + tuple.t_xc_node_id = 0; +#endif dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, &tuple); @@ -464,6 +467,9 @@ ldelete:; deltuple.t_len = HeapTupleHeaderGetDatumLength(oldtuple); ItemPointerSetInvalid(&(deltuple.t_self)); deltuple.t_tableOid = InvalidOid; +#ifdef PGXC + deltuple.t_xc_node_id = 0; +#endif delbuffer = InvalidBuffer; } else @@ -575,6 +581,9 @@ ExecUpdate(ItemPointer tupleid, oldtup.t_len = HeapTupleHeaderGetDatumLength(oldtuple); ItemPointerSetInvalid(&(oldtup.t_self)); oldtup.t_tableOid = InvalidOid; +#ifdef PGXC + oldtup.t_xc_node_id = 0; +#endif slot = ExecIRUpdateTriggers(estate, resultRelInfo, &oldtup, slot); diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index b5213aa5f9..4bfc6bc3c3 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -765,6 +765,10 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum, mtuple->t_data->t_ctid = tuple->t_data->t_ctid; mtuple->t_self = tuple->t_self; mtuple->t_tableOid = tuple->t_tableOid; +#ifdef PGXC + mtuple->t_xc_node_id = tuple->t_xc_node_id; +#endif + if (rel->rd_att->tdhasoid) HeapTupleSetOid(mtuple, HeapTupleGetOid(tuple)); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index e68d7cf270..5ab5c554a8 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -5798,16 +5798,21 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) else appendStringInfoString(buf, "AND "); - /* Nullify TLEs that are not from this relation */ - if (tle->resorigtbl != ttab->relid) - appendStringInfo(buf, "$%d = $%d ", - count, - count); - else - appendStringInfo(buf, "%s = $%d ", - quote_identifier(tle->resname), - count); + Assert(IsA((Node *)tle->expr, Var)); + if (IsA((Node *)tle->expr, Var)) + { + Var *var = (Var *) tle->expr; + /* Nullify TLEs that are not from this relation */ + if (var->varno != resultRelationIndex) + appendStringInfo(buf, "$%d = $%d ", + count, + count); + else + appendStringInfo(buf, "%s = $%d ", + quote_identifier(tle->resname), + count); + } /* Associate type of parameter */ param_types[count - 1] = exprType((Node *) tle->expr); diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index 92f6200df9..629b970fc7 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -837,7 +837,6 @@ RelationBuildLocator(Relation rel) /* * GetLocatorRelationInfo - Returns the locator information for relation, * in a copy of the RelationLocatorInfo struct in relcache - * */ RelationLocInfo * GetRelationLocInfo(Oid relid) @@ -855,6 +854,19 @@ GetRelationLocInfo(Oid relid) } /* + * Get the distribution type of relation. + */ +char +GetRelationLocType(Oid relid) +{ + RelationLocInfo *locinfo = GetRelationLocInfo(relid); + if (!locinfo) + return LOCATOR_TYPE_NONE; + + return locinfo->locatorType; +} + +/* * Copy the RelationLocInfo struct */ RelationLocInfo * diff --git a/src/backend/pgxc/nodemgr/nodemgr.c b/src/backend/pgxc/nodemgr/nodemgr.c index e6f83c3ef2..c46d0c60f1 100644 --- a/src/backend/pgxc/nodemgr/nodemgr.c +++ b/src/backend/pgxc/nodemgr/nodemgr.c @@ -13,6 +13,7 @@ #include "postgres.h" #include "miscadmin.h" +#include "access/hash.h" #include "access/heapam.h" #include "catalog/catalog.h" #include "catalog/indexing.h" @@ -28,6 +29,14 @@ #include "pgxc/nodemgr.h" #include "pgxc/pgxc.h" +/* + * How many times should we try to find a unique indetifier + * in case hash of the node name comes out to be duplicate + */ + +#define MAX_TRIES_FOR_NID 200 + +static Datum generate_node_id(const char *node_name); /* * GUC parameters. @@ -202,6 +211,70 @@ check_node_options(const char *node_name, List *options, char **node_host, node_name))); } +/* + * generate_node_id + * + * Given a node name compute its hash to generate the identifier + * If the hash comes out to be duplicate , try some other values + * Give up after a few tries + */ +static Datum +generate_node_id(const char *node_name) +{ + Datum node_id; + uint32 n; + bool inc; + int i; + + /* Compute node identifier by computing hash of node name */ + node_id = hash_any((unsigned char *)node_name, strlen(node_name)); + + /* + * Check if the hash is near the overflow limit, then we will + * decrement it , otherwise we will increment + */ + inc = true; + n = DatumGetUInt32(node_id); + if (n >= UINT_MAX - MAX_TRIES_FOR_NID) + inc = false; + + /* + * Check if the identifier is clashing with an existing one, + * and if it is try some other + */ + for (i = 0; i < MAX_TRIES_FOR_NID; i++) + { + HeapTuple tup; + + tup = SearchSysCache1(PGXCNODEIDENTIFIER, node_id); + if (tup == NULL) + break; + + ReleaseSysCache(tup); + + n = DatumGetUInt32(node_id); + if (inc) + n++; + else + n--; + + node_id = UInt32GetDatum(n); + } + + /* + * This has really few chances to happen, but inform backend that node + * has not been registered correctly in this case. + */ + if (i >= MAX_TRIES_FOR_NID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("Please choose different node name."), + errdetail("Name \"%s\" produces a duplicate identifier node_name", + node_name))); + + return node_id; +} + /* -------------------------------- * cmp_nodes * @@ -422,6 +495,7 @@ PgxcNodeCreate(CreateNodeStmt *stmt) int node_port = 0; bool is_primary = false; bool is_preferred = false; + Datum node_id; /* Only a DB administrator can add nodes */ if (!superuser()) @@ -448,6 +522,9 @@ PgxcNodeCreate(CreateNodeStmt *stmt) &node_port, &node_type, &is_primary, &is_preferred); + /* Compute node identifier */ + node_id = generate_node_id(node_name); + /* * Then assign default values if necessary * First for port. @@ -489,6 +566,7 @@ PgxcNodeCreate(CreateNodeStmt *stmt) values[Anum_pgxc_node_host - 1] = DirectFunctionCall1(namein, CStringGetDatum(node_host)); values[Anum_pgxc_node_is_primary - 1] = BoolGetDatum(is_primary); values[Anum_pgxc_node_is_preferred - 1] = BoolGetDatum(is_preferred); + values[Anum_pgxc_node_id - 1] = node_id; htup = heap_form_tuple(pgxcnodesrel->rd_att, values, nulls); @@ -520,6 +598,7 @@ PgxcNodeAlter(AlterNodeStmt *stmt) Datum new_record[Natts_pgxc_node]; bool new_record_nulls[Natts_pgxc_node]; bool new_record_repl[Natts_pgxc_node]; + uint32 node_id; /* Only a DB administrator can alter cluster nodes */ if (!superuser()) @@ -552,6 +631,7 @@ PgxcNodeAlter(AlterNodeStmt *stmt) is_primary = is_pgxc_nodeprimary(nodeOid); node_type = get_pgxc_nodetype(nodeOid); node_type_old = node_type; + node_id = get_pgxc_node_id(nodeOid); /* Filter options */ check_node_options(node_name, stmt->options, &node_host, @@ -587,6 +667,8 @@ PgxcNodeAlter(AlterNodeStmt *stmt) new_record_repl[Anum_pgxc_node_is_primary - 1] = true; new_record[Anum_pgxc_node_is_preferred - 1] = BoolGetDatum(is_preferred); new_record_repl[Anum_pgxc_node_is_preferred - 1] = true; + new_record[Anum_pgxc_node_id - 1] = UInt32GetDatum(node_id); + new_record_repl[Anum_pgxc_node_id - 1] = true; /* Update relation */ newtup = heap_modify_tuple(oldtup, RelationGetDescr(rel), diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index d6ddbe1981..1fcb2c05ac 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -339,6 +339,12 @@ static DNSServiceRef bonjour_sdref = NULL; #ifdef PGXC char *PGXCNodeName = NULL; int PGXCNodeId = -1; +/* + * When a particular node starts up, store the node identifier in this variable + * so that we dont have to calculate it OR do a search in cache any where else + * This will have minimal impact on performance + */ +uint32 PGXCNodeIdentifier = 0; #endif /* diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 5be26fe5e7..1ce3063bc1 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -29,6 +29,7 @@ #include "commands/trigger.h" #ifdef PGXC +#include "pgxc/locator.h" #include "pgxc/nodemgr.h" #include "pgxc/pgxc.h" #include "pgxc/postgresql_fdw.h" @@ -1204,15 +1205,16 @@ rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte, if (var->varattno < 1 || var->varattno > numattrs) continue; + /* Bypass if this var does not use this relation */ + if (var->varno != parsetree->resultRelation) + continue; + att_tup = target_relation->rd_att->attrs[var->varattno - 1]; tle = makeTargetEntry((Expr *) var, list_length(parsetree->targetList) + 1, pstrdup(NameStr(att_tup->attname)), true); - /* This is needed in remote planning to confirm that TLE is for this relation */ - tle->resorigtbl = RelationGetRelid(target_relation); - parsetree->targetList = lappend(parsetree->targetList, tle); } #endif @@ -1249,12 +1251,34 @@ rewriteTargetListUD(Query *parsetree, RangeTblEntry *target_rte, pstrdup(attrname), true); + parsetree->targetList = lappend(parsetree->targetList, tle); + #ifdef PGXC - /* This is needed in remote planning to confirm that TLE is for this relation */ - tle->resorigtbl = RelationGetRelid(target_relation); -#endif + /* + * If relation is non-replicated, we need also to identify the Datanode + * from where tuple is fetched. + */ + if (IS_PGXC_COORDINATOR && + !IsConnFromCoord() && + !IsLocatorReplicated(GetRelationLocType(RelationGetRelid(target_relation)))) + { + var = makeVar(parsetree->resultRelation, + XC_NodeIdAttributeNumber, + INT4OID, + -1, + InvalidOid, + 0); - parsetree->targetList = lappend(parsetree->targetList, tle); + attrname = "xc_node_id"; + + tle = makeTargetEntry((Expr *) var, + list_length(parsetree->targetList) + 1, + pstrdup(attrname), + true); + + parsetree->targetList = lappend(parsetree->targetList, tle); + } +#endif } diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index 919ed9582a..acfd063f37 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -318,6 +318,9 @@ record_out(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; +#ifdef PGXC + tuple.t_xc_node_id = 0; +#endif tuple.t_data = rec; /* @@ -661,6 +664,9 @@ record_send(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; +#ifdef PGXC + tuple.t_xc_node_id = 0; +#endif tuple.t_data = rec; /* @@ -811,10 +817,16 @@ record_cmp(FunctionCallInfo fcinfo) tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; +#ifdef PGXC + tuple1.t_xc_node_id = 0; +#endif tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; +#ifdef PGXC + tuple2.t_xc_node_id = 0; +#endif tuple2.t_data = record2; /* @@ -1046,10 +1058,16 @@ record_eq(PG_FUNCTION_ARGS) tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; +#ifdef PGXC + tuple1.t_xc_node_id = 0; +#endif tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; +#ifdef PGXC + tuple2.t_xc_node_id = 0; +#endif tuple2.t_data = record2; /* diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 00ba19ec6c..9c55f296d1 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4360,6 +4360,11 @@ get_variable_numdistinct(VariableStatData *vardata) case TableOidAttributeNumber: stadistinct = 1.0; /* only 1 value */ break; +#ifdef PGXC + case XC_NodeIdAttributeNumber: + stadistinct = 1.0; /* only 1 value */ + break; +#endif default: stadistinct = 0.0; /* means "unknown" */ break; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index fa179c29c1..589c9ffe1d 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -2171,7 +2171,7 @@ get_pgxc_nodeoid(const char *nodename) /* * get_pgxc_nodename - * Get node type for given Oid + * Get node name for given Oid */ char * get_pgxc_nodename(Oid nodeid) @@ -2192,6 +2192,32 @@ get_pgxc_nodename(Oid nodeid) return result; } + /* + * get_pgxc_node_id + * Get node identifier for a given Oid + */ +uint32 +get_pgxc_node_id(Oid nodeid) +{ + HeapTuple tuple; + Form_pgxc_node nodeForm; + uint32 result; + + if (nodeid == InvalidOid) + return 0; + + tuple = SearchSysCache1(PGXCNODEOID, ObjectIdGetDatum(nodeid)); + + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for node %u", nodeid); + + nodeForm = (Form_pgxc_node) GETSTRUCT(tuple); + result = nodeForm->node_id; + ReleaseSysCache(tuple); + + return result; +} + /* * get_pgxc_nodetype * Get node type for given Oid diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 0abdb30977..31b98ff8bb 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -594,6 +594,17 @@ static const struct cachedesc cacheinfo[] = { }, 256 }, + {PgxcNodeRelationId, /* PGXCNODEIDENTIFIER */ + PgxcNodeNodeIdIndexId, + 1, + { + Anum_pgxc_node_id, + 0, + 0, + 0 + }, + 256 + }, #endif {ProcedureRelationId, /* PROCNAMEARGSNSP */ ProcedureNameArgsNspIndexId, diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index f3814c0186..0bae872589 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -25,6 +25,14 @@ #include "utils/builtins.h" #include "utils/memutils.h" +#ifdef PGXC +#include "pgxc/pgxc.h" +#include "access/hash.h" +#include "catalog/pg_collation.h" +#include "utils/formatting.h" +#include "utils/lsyscache.h" +#endif + /* * Estimate of the maximum number of open portals a user would have, * used in initially sizing the PortalHashTable in EnablePortalManager(). @@ -241,6 +249,16 @@ CreatePortal(const char *name, bool allowDup, bool dupSilent) /* put portal in table (sets portal->name) */ PortalHashTableInsert(portal, name); +#ifdef PGXC + if (PGXCNodeIdentifier == 0) + { + char *node_name; + node_name = str_tolower(PGXCNodeName, strlen(PGXCNodeName), DEFAULT_COLLATION_OID); + PGXCNodeIdentifier = get_pgxc_node_id(get_pgxc_nodeoid(node_name)); + pfree(node_name); + } +#endif + return portal; } diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 646f0d05d1..b82dd5daae 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -3342,6 +3342,10 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, &tuple->t_self, sizeof(ItemPointerData)); /* We don't currently bother to reconstruct t_tableOid */ tuple->t_tableOid = InvalidOid; +#ifdef PGXC + tuple->t_xc_node_id = 0; +#endif + /* Read in the tuple body */ LogicalTapeReadExact(state->tapeset, tapenum, tuple->t_data, tuple->t_len); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e9cbf80b05..db7da049e4 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -12628,6 +12628,10 @@ getAttrName(int attrnum, TableInfo *tblInfo) return "cmax"; case TableOidAttributeNumber: return "tableoid"; +#ifdef PGXC + case XC_NodeIdAttributeNumber: + return "xc_node_id"; +#endif } write_msg(NULL, "invalid column number %d for table \"%s\"\n", attrnum, tblInfo->dobj.name); diff --git a/src/include/access/htup.h b/src/include/access/htup.h index c147707169..4d2586b304 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -518,6 +518,9 @@ typedef struct HeapTupleData uint32 t_len; /* length of *t_data */ ItemPointerData t_self; /* SelfItemPointer */ Oid t_tableOid; /* table the tuple came from */ +#ifdef PGXC + uint32 t_xc_node_id; /* Data node the tuple came from */ +#endif HeapTupleHeader t_data; /* -> tuple header and data */ } HeapTupleData; diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h index 1b3e64aaf8..e4b007b082 100644 --- a/src/include/access/sysattr.h +++ b/src/include/access/sysattr.h @@ -25,7 +25,13 @@ #define MaxTransactionIdAttributeNumber (-5) #define MaxCommandIdAttributeNumber (-6) #define TableOidAttributeNumber (-7) +#ifdef PGXC +#define XC_NodeIdAttributeNumber (-8) +#define FirstLowInvalidHeapAttributeNumber (-9) +#else #define FirstLowInvalidHeapAttributeNumber (-8) +#endif + #endif /* SYSATTR_H */ diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index aa8157d513..98dda2c4e4 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -285,7 +285,7 @@ DECLARE_UNIQUE_INDEX(pg_user_mapping_user_server_index, 175, on pg_user_mapping DECLARE_UNIQUE_INDEX(pgxc_class_pcrelid_index, 9002, on pgxc_class using btree(pcrelid oid_ops)); #define PgxcClassPgxcRelIdIndexId 9002 -DECLARE_UNIQUE_INDEX(pgxc_node_id_index, 9010, on pgxc_node using btree(oid oid_ops)); +DECLARE_UNIQUE_INDEX(pgxc_node_oid_index, 9010, on pgxc_node using btree(oid oid_ops)); #define PgxcNodeOidIndexId 9010 DECLARE_UNIQUE_INDEX(pgxc_node_name_index, 9011, on pgxc_node using btree(node_name name_ops)); @@ -297,6 +297,9 @@ DECLARE_UNIQUE_INDEX(pgxc_group_name_index, 9012, on pgxc_group using btree(grou DECLARE_UNIQUE_INDEX(pgxc_group_oid, 9013, on pgxc_group using btree(oid oid_ops)); #define PgxcGroupOidIndexId 9013 +DECLARE_UNIQUE_INDEX(pgxc_node_id_index, 9003, on pgxc_node using btree(node_id int4_ops)); +#define PgxcNodeNodeIdIndexId 9003 + #endif DECLARE_UNIQUE_INDEX(pg_foreign_table_relid_index, 3119, on pg_foreign_table using btree(ftrelid oid_ops)); diff --git a/src/include/catalog/pgxc_node.h b/src/include/catalog/pgxc_node.h index a8bdc7e989..4696afc2a9 100644 --- a/src/include/catalog/pgxc_node.h +++ b/src/include/catalog/pgxc_node.h @@ -52,11 +52,16 @@ CATALOG(pgxc_node,9015) BKI_SHARED_RELATION * Is this node preferred */ bool nodeis_preferred; + + /* + * Node identifier to be used at places where a fixed length node identification is required + */ + int4 node_id; } FormData_pgxc_node; typedef FormData_pgxc_node *Form_pgxc_node; -#define Natts_pgxc_node 6 +#define Natts_pgxc_node 7 #define Anum_pgxc_node_name 1 #define Anum_pgxc_node_type 2 @@ -64,6 +69,7 @@ typedef FormData_pgxc_node *Form_pgxc_node; #define Anum_pgxc_node_host 4 #define Anum_pgxc_node_is_primary 5 #define Anum_pgxc_node_is_preferred 6 +#define Anum_pgxc_node_id 7 /* Possible types of nodes */ #define PGXC_NODE_COORDINATOR 'C' diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h index cd8ee7d1df..d35d24f205 100644 --- a/src/include/pgxc/locator.h +++ b/src/include/pgxc/locator.h @@ -100,6 +100,7 @@ extern char ConvertToLocatorType(int disttype); extern char *GetRelationHashColumn(RelationLocInfo *rel_loc_info); extern RelationLocInfo *GetRelationLocInfo(Oid relid); extern RelationLocInfo *CopyRelationLocInfo(RelationLocInfo *src_info); +extern char GetRelationLocType(Oid relid); extern bool IsTableDistOnPrimary(RelationLocInfo *rel_loc_info); extern ExecNodes *GetRelationNodes(RelationLocInfo *rel_loc_info, Datum valueForDistCol, bool isValueNull, Oid typeOfValueForDistCol, diff --git a/src/include/pgxc/pgxc.h b/src/include/pgxc/pgxc.h index 3413653a61..1d1821c789 100644 --- a/src/include/pgxc/pgxc.h +++ b/src/include/pgxc/pgxc.h @@ -34,6 +34,7 @@ extern int remoteConnType; /* Local node name and numer */ extern char *PGXCNodeName; extern int PGXCNodeId; +extern uint32 PGXCNodeIdentifier; #define IS_PGXC_COORDINATOR isPGXCCoordinator #define IS_PGXC_DATANODE isPGXCDataNode diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 3f70c4cd9b..aab8e0e4fc 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -136,6 +136,7 @@ extern Oid getBaseTypeAndTypmod(Oid typid, int32 *typmod); extern char *get_typename(Oid typid); extern char *get_pgxc_nodename(Oid nodeoid); extern Oid get_pgxc_nodeoid(const char *nodename); +extern uint32 get_pgxc_node_id(Oid nodeid); extern char get_pgxc_nodetype(Oid nodeid); extern int get_pgxc_nodeport(Oid nodeid); extern char *get_pgxc_nodehost(Oid nodeid); diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index 23fdf050cc..44f3ff0790 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -75,6 +75,7 @@ enum SysCacheIdentifier PGXCGROUPOID, PGXCNODENAME, PGXCNODEOID, + PGXCNODEIDENTIFIER, #endif PROCNAMEARGSNSP, PROCOID, diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index 906a485853..a3db99e92c 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -281,6 +281,9 @@ plpgsql_exec_function(PLpgSQL_function *func, FunctionCallInfo fcinfo) tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; +#ifdef PGXC + tmptup.t_xc_node_id = 0; +#endif tmptup.t_data = td; exec_move_row(&estate, NULL, row, &tmptup, tupdesc); ReleaseTupleDesc(tupdesc); @@ -3768,6 +3771,9 @@ exec_assign_value(PLpgSQL_execstate *estate, tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; +#ifdef PGXC + tmptup.t_xc_node_id = 0; +#endif tmptup.t_data = td; exec_move_row(estate, NULL, row, &tmptup, tupdesc); ReleaseTupleDesc(tupdesc); @@ -3811,6 +3817,9 @@ exec_assign_value(PLpgSQL_execstate *estate, tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; +#ifdef PGXC + tmptup.t_xc_node_id = 0; +#endif tmptup.t_data = td; exec_move_row(estate, rec, NULL, &tmptup, tupdesc); ReleaseTupleDesc(tupdesc); diff --git a/src/test/regress/expected/xc_misc.out b/src/test/regress/expected/xc_misc.out new file mode 100644 index 0000000000..3e56f3913b --- /dev/null +++ b/src/test/regress/expected/xc_misc.out @@ -0,0 +1,103 @@ +-- A function to create table on specified nodes +create or replace function cr_table(tab_schema varchar, nodenums int[], distribution varchar) returns void language plpgsql as $$ +declare + cr_command varchar; + nodes varchar[]; + nodename varchar; + nodenames_query varchar; + nodenames varchar; + node int; + sep varchar; + tmp_node int; + num_nodes int; +begin + nodenames_query := 'SELECT node_name FROM pgxc_node WHERE node_type = ''D'''; + cr_command := 'CREATE TABLE ' || tab_schema || ' DISTRIBUTE BY ' || distribution || ' TO NODE '; + for nodename in execute nodenames_query loop + nodes := array_append(nodes, nodename); + end loop; + nodenames := ''; + sep := ''; + num_nodes := array_length(nodes, 1); + foreach node in array nodenums loop + tmp_node := node; + if (tmp_node < 1 or tmp_node > num_nodes) then + tmp_node := tmp_node % num_nodes; + if (tmp_node < 1) then + tmp_node := num_nodes; + end if; + end if; + nodenames := nodenames || sep || nodes[tmp_node]; + sep := ', '; + end loop; + cr_command := cr_command || nodenames; + execute cr_command; +end; +$$; +-- A function to return a unified data node name given a node identifer +create or replace function get_unified_node_name(node_ident int) returns varchar language plpgsql as $$ +declare + r pgxc_node%rowtype; + node int; + nodenames_query varchar; +begin + nodenames_query := 'SELECT * FROM pgxc_node WHERE node_type = ''D'' ORDER BY xc_node_id'; + + node := 1; + for r in execute nodenames_query loop + if r.node_id = node_ident THEN + RETURN 'NODE_' || node; + end if; + node := node + 1; + end loop; + RETURN 'NODE_?'; +end; +$$; +-- Test the system column added by XC called xc_node_id, used to find which tuples belong to which data node +select cr_table('t1(a int, b int)', '{1, 2}'::int[], 'modulo(a)'); + cr_table +---------- + +(1 row) + +insert into t1 values(1,11),(2,11),(3,11),(4,22),(5,22),(6,33),(7,44),(8,44); +select get_unified_node_name(xc_node_id),* from t1 order by a; + get_unified_node_name | a | b +-----------------------+---+---- + NODE_2 | 1 | 11 + NODE_1 | 2 | 11 + NODE_2 | 3 | 11 + NODE_1 | 4 | 22 + NODE_2 | 5 | 22 + NODE_1 | 6 | 33 + NODE_2 | 7 | 44 + NODE_1 | 8 | 44 +(8 rows) + +select get_unified_node_name(xc_node_id),* from t1 where xc_node_id > 0 order by a; + get_unified_node_name | a | b +-----------------------+---+---- + NODE_2 | 1 | 11 + NODE_2 | 3 | 11 + NODE_2 | 5 | 22 + NODE_2 | 7 | 44 +(4 rows) + +select get_unified_node_name(xc_node_id),* from t1 order by xc_node_id; + get_unified_node_name | a | b +-----------------------+---+---- + NODE_1 | 2 | 11 + NODE_1 | 4 | 22 + NODE_1 | 6 | 33 + NODE_1 | 8 | 44 + NODE_2 | 1 | 11 + NODE_2 | 3 | 11 + NODE_2 | 5 | 22 + NODE_2 | 7 | 44 +(8 rows) + +create table t2(a int , xc_node_id int) distribute by modulo(a); +ERROR: column name "xc_node_id" conflicts with a system column name +create table t2(a int , b int) distribute by modulo(xc_node_id); +ERROR: Column xc_node_id is not modulo distributable data type +drop table t1; diff --git a/src/test/regress/expected/xc_node.out b/src/test/regress/expected/xc_node.out index a239cf7f24..351fa00be9 100644 --- a/src/test/regress/expected/xc_node.out +++ b/src/test/regress/expected/xc_node.out @@ -14,6 +14,15 @@ ORDER BY 1; dummy_node_datanode | D | 5432 | localhost (2 rows) +-- test to make sure that node_id is generated correctly for the added nodes +select hashname(node_name) = node_id from pgxc_node +WHERE node_name IN ('dummy_node_coordinator', 'dummy_node_datanode'); + ?column? +---------- + t + t +(2 rows) + -- Some modifications ALTER NODE dummy_node_coordinator WITH (PORT = 5466, HOST = 'target_host_1'); ALTER NODE dummy_node_datanode WITH (PORT = 5689, HOST = 'target_host_2', PREFERRED); diff --git a/src/test/regress/expected/xc_remote.out b/src/test/regress/expected/xc_remote.out index 9335d06548..481d89dfe4 100644 --- a/src/test/regress/expected/xc_remote.out +++ b/src/test/regress/expected/xc_remote.out @@ -204,28 +204,57 @@ SELECT a FROM rel_rep WHERE c = true ORDER BY 1; DROP SEQUENCE seqtest3; -- UPDATE cases for round robin table -- Plain cases change it completely --- PGXCTODO: For Round robin tuple selection is still incorrect --- It is necessary to incorporate all the columns in qual list --- to insure SQL consistency CREATE TABLE rel_rr (a int, b timestamp DEFAULT NULL, c boolean DEFAULT NULL) DISTRIBUTE BY ROUND ROBIN; CREATE SEQUENCE seqtest4 START 1; INSERT INTO rel_rr VALUES (1),(2),(3),(4),(5); --- UPDATE rel_rr SET a = nextval('seqtest4'), b = now(), c = false; --- SELECT a FROM rel_rr ORDER BY 1,2; +UPDATE rel_rr SET a = nextval('seqtest4'), b = now(), c = false; +SELECT a FROM rel_rr ORDER BY 1; + a +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + -- Non-Coordinator quals --- UPDATE rel_rr SET b = now(), c = true WHERE a < func_volatile(2); --- SELECT a FROM rel_rr WHERE c = true ORDER BY 1; --- UPDATE rel_rr SET c = false; --- UPDATE rel_rr SET b = now(), c = true WHERE a < func_stable(3); --- SELECT a FROM rel_rr WHERE c = true ORDER BY 1; --- UPDATE rel_rr SET c = false WHERE c = true; --- UPDATE rel_rr SET b = now(), c = true WHERE a < func_immutable(4); --- SELECT a FROM rel_rr WHERE c = true ORDER BY 1; --- UPDATE rel_rr SET c = false; +UPDATE rel_rr SET b = now(), c = true WHERE a < func_volatile(2); +SELECT a FROM rel_rr WHERE c = true ORDER BY 1; + a +--- + 1 +(1 row) + +UPDATE rel_rr SET c = false; +UPDATE rel_rr SET b = now(), c = true WHERE a < func_stable(3); +SELECT a FROM rel_rr WHERE c = true ORDER BY 1; + a +--- + 1 + 2 +(2 rows) + +UPDATE rel_rr SET c = false WHERE c = true; +UPDATE rel_rr SET b = now(), c = true WHERE a < func_immutable(4); +SELECT a FROM rel_rr WHERE c = true ORDER BY 1; + a +--- + 1 + 2 + 3 +(3 rows) + +UPDATE rel_rr SET c = false; -- Coordinator qual --- UPDATE rel_rr SET b = now(), c = true WHERE a < currval('seqtest4') - 3 AND b < now(); --- SELECT a FROM rel_rr WHERE c = true ORDER BY 1; --- DROP SEQUENCE seqtest4; +UPDATE rel_rr SET b = now(), c = true WHERE a < currval('seqtest4') - 3 AND b < now(); +SELECT a FROM rel_rr WHERE c = true ORDER BY 1; + a +--- + 1 +(1 row) + +DROP SEQUENCE seqtest4; -- UPDATE cases for hash table -- Hash tables cannot be updated on distribution keys so insert fresh rows CREATE TABLE rel_hash (a int, b timestamp DEFAULT now(), c boolean DEFAULT false) DISTRIBUTE BY HASH(a); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 50a912f3d1..79fdaa1693 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -108,3 +108,4 @@ test: xc_groupby xc_distkey xc_having xc_temp xc_remote xc_FQS xc_FQS_join #Cluster setting related test is independant test: xc_node +test: xc_misc diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index 6d9695341a..a909ab72a9 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -137,3 +137,5 @@ test: xc_remote test: xc_node test: xc_FQS test: xc_FQS_join +test: xc_misc + diff --git a/src/test/regress/sql/xc_misc.sql b/src/test/regress/sql/xc_misc.sql new file mode 100644 index 0000000000..84f97d569d --- /dev/null +++ b/src/test/regress/sql/xc_misc.sql @@ -0,0 +1,74 @@ +-- A function to create table on specified nodes +create or replace function cr_table(tab_schema varchar, nodenums int[], distribution varchar) returns void language plpgsql as $$ +declare + cr_command varchar; + nodes varchar[]; + nodename varchar; + nodenames_query varchar; + nodenames varchar; + node int; + sep varchar; + tmp_node int; + num_nodes int; +begin + nodenames_query := 'SELECT node_name FROM pgxc_node WHERE node_type = ''D'''; + cr_command := 'CREATE TABLE ' || tab_schema || ' DISTRIBUTE BY ' || distribution || ' TO NODE '; + for nodename in execute nodenames_query loop + nodes := array_append(nodes, nodename); + end loop; + nodenames := ''; + sep := ''; + num_nodes := array_length(nodes, 1); + foreach node in array nodenums loop + tmp_node := node; + if (tmp_node < 1 or tmp_node > num_nodes) then + tmp_node := tmp_node % num_nodes; + if (tmp_node < 1) then + tmp_node := num_nodes; + end if; + end if; + nodenames := nodenames || sep || nodes[tmp_node]; + sep := ', '; + end loop; + cr_command := cr_command || nodenames; + execute cr_command; +end; +$$; + +-- A function to return a unified data node name given a node identifer +create or replace function get_unified_node_name(node_ident int) returns varchar language plpgsql as $$ +declare + r pgxc_node%rowtype; + node int; + nodenames_query varchar; +begin + nodenames_query := 'SELECT * FROM pgxc_node WHERE node_type = ''D'' ORDER BY xc_node_id'; + + node := 1; + for r in execute nodenames_query loop + if r.node_id = node_ident THEN + RETURN 'NODE_' || node; + end if; + node := node + 1; + end loop; + RETURN 'NODE_?'; +end; +$$; + +-- Test the system column added by XC called xc_node_id, used to find which tuples belong to which data node + +select cr_table('t1(a int, b int)', '{1, 2}'::int[], 'modulo(a)'); +insert into t1 values(1,11),(2,11),(3,11),(4,22),(5,22),(6,33),(7,44),(8,44); + +select get_unified_node_name(xc_node_id),* from t1 order by a; + +select get_unified_node_name(xc_node_id),* from t1 where xc_node_id > 0 order by a; + +select get_unified_node_name(xc_node_id),* from t1 order by xc_node_id; + +create table t2(a int , xc_node_id int) distribute by modulo(a); + +create table t2(a int , b int) distribute by modulo(xc_node_id); + +drop table t1; + diff --git a/src/test/regress/sql/xc_node.sql b/src/test/regress/sql/xc_node.sql index c51a7e9753..18d377008f 100644 --- a/src/test/regress/sql/xc_node.sql +++ b/src/test/regress/sql/xc_node.sql @@ -10,6 +10,9 @@ CREATE NODE dummy_node_datanode WITH (TYPE = 'datanode'); SELECT node_name, node_type, node_port, node_host FROM pgxc_node WHERE node_name IN ('dummy_node_coordinator', 'dummy_node_datanode') ORDER BY 1; +-- test to make sure that node_id is generated correctly for the added nodes +select hashname(node_name) = node_id from pgxc_node +WHERE node_name IN ('dummy_node_coordinator', 'dummy_node_datanode'); -- Some modifications ALTER NODE dummy_node_coordinator WITH (PORT = 5466, HOST = 'target_host_1'); ALTER NODE dummy_node_datanode WITH (PORT = 5689, HOST = 'target_host_2', PREFERRED); diff --git a/src/test/regress/sql/xc_remote.sql b/src/test/regress/sql/xc_remote.sql index b7a27ad3b2..0e02d98f81 100644 --- a/src/test/regress/sql/xc_remote.sql +++ b/src/test/regress/sql/xc_remote.sql @@ -83,28 +83,25 @@ DROP SEQUENCE seqtest3; -- UPDATE cases for round robin table -- Plain cases change it completely --- PGXCTODO: For Round robin tuple selection is still incorrect --- It is necessary to incorporate all the columns in qual list --- to insure SQL consistency CREATE TABLE rel_rr (a int, b timestamp DEFAULT NULL, c boolean DEFAULT NULL) DISTRIBUTE BY ROUND ROBIN; CREATE SEQUENCE seqtest4 START 1; INSERT INTO rel_rr VALUES (1),(2),(3),(4),(5); --- UPDATE rel_rr SET a = nextval('seqtest4'), b = now(), c = false; --- SELECT a FROM rel_rr ORDER BY 1,2; +UPDATE rel_rr SET a = nextval('seqtest4'), b = now(), c = false; +SELECT a FROM rel_rr ORDER BY 1; -- Non-Coordinator quals --- UPDATE rel_rr SET b = now(), c = true WHERE a < func_volatile(2); --- SELECT a FROM rel_rr WHERE c = true ORDER BY 1; --- UPDATE rel_rr SET c = false; --- UPDATE rel_rr SET b = now(), c = true WHERE a < func_stable(3); --- SELECT a FROM rel_rr WHERE c = true ORDER BY 1; --- UPDATE rel_rr SET c = false WHERE c = true; --- UPDATE rel_rr SET b = now(), c = true WHERE a < func_immutable(4); --- SELECT a FROM rel_rr WHERE c = true ORDER BY 1; --- UPDATE rel_rr SET c = false; +UPDATE rel_rr SET b = now(), c = true WHERE a < func_volatile(2); +SELECT a FROM rel_rr WHERE c = true ORDER BY 1; +UPDATE rel_rr SET c = false; +UPDATE rel_rr SET b = now(), c = true WHERE a < func_stable(3); +SELECT a FROM rel_rr WHERE c = true ORDER BY 1; +UPDATE rel_rr SET c = false WHERE c = true; +UPDATE rel_rr SET b = now(), c = true WHERE a < func_immutable(4); +SELECT a FROM rel_rr WHERE c = true ORDER BY 1; +UPDATE rel_rr SET c = false; -- Coordinator qual --- UPDATE rel_rr SET b = now(), c = true WHERE a < currval('seqtest4') - 3 AND b < now(); --- SELECT a FROM rel_rr WHERE c = true ORDER BY 1; --- DROP SEQUENCE seqtest4; +UPDATE rel_rr SET b = now(), c = true WHERE a < currval('seqtest4') - 3 AND b < now(); +SELECT a FROM rel_rr WHERE c = true ORDER BY 1; +DROP SEQUENCE seqtest4; -- UPDATE cases for hash table -- Hash tables cannot be updated on distribution keys so insert fresh rows |