summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Paquier2012-07-11 06:29:35 +0000
committerMichael Paquier2012-07-11 06:29:35 +0000
commited3516bfc3d6dd540877f2bc4bd71a4990b65763 (patch)
tree9b3b36b441bb3a0ac88f52ba62476d93955b72cd
parent700db54f04412e0fc91557d46da2b5a236b264a8 (diff)
Refactor functions for management and interpretation of distribution clauses
This commit externalizes all the features used for the interpretation of SQL clauses TO NODE, TO GROUP and DISTRIBUTE BY. This reduces the footprint of XC code in heap.c. It is important to note here that this code cannot be completely removed from heap.c as there is a necessary dependency regarding system columns that cannot be used as distribution columns for HASH and MODULO tables.
-rw-r--r--src/backend/catalog/heap.c269
-rw-r--r--src/backend/pgxc/locator/locator.c4
-rw-r--r--src/include/catalog/heap.h14
-rw-r--r--src/include/pgxc/locator.h4
4 files changed, 178 insertions, 113 deletions
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index ba4d250dbe..18248f4193 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -118,9 +118,6 @@ static Node *cookConstraint(ParseState *pstate,
Node *raw_constraint,
char *relname);
static List *insert_ordered_unique_oid(List *list, Oid datum);
-#ifdef PGXC
-static Oid *build_subcluster_data(PGXCSubCluster *subcluster, int *numnodes);
-#endif
/* ----------------------------------------------------------------
@@ -960,6 +957,54 @@ AddRelationDistribution(Oid relid,
int numnodes;
Oid *nodeoids;
+ /* Obtain details of distribution information */
+ GetRelationDistributionItems(relid,
+ distributeby,
+ descriptor,
+ &locatortype,
+ &hashalgorithm,
+ &hashbuckets,
+ &attnum);
+
+ /* Obtain details of nodes and classify them */
+ nodeoids = GetRelationDistributionNodes(subcluster, &numnodes);
+
+ /* Now OK to insert data in catalog */
+ PgxcClassCreate(relid, locatortype, attnum, hashalgorithm,
+ hashbuckets, numnodes, nodeoids);
+
+ /* Make dependency entries */
+ myself.classId = PgxcClassRelationId;
+ myself.objectId = relid;
+ myself.objectSubId = 0;
+
+ /* Dependency on relation */
+ referenced.classId = RelationRelationId;
+ referenced.objectId = relid;
+ referenced.objectSubId = 0;
+ recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+}
+
+/*
+ * GetRelationDistributionItems
+ * Obtain distribution type and related items based on deparsed information
+ * of clause DISTRIBUTE BY.
+ * Depending on the column types given a fallback to a safe distribution can be done.
+ */
+void
+GetRelationDistributionItems(Oid relid,
+ DistributeBy *distributeby,
+ TupleDesc descriptor,
+ char *locatortype,
+ int *hashalgorithm,
+ int *hashbuckets,
+ AttrNumber *attnum)
+{
+ int local_hashalgorithm = 0;
+ int local_hashbuckets = 0;
+ char local_locatortype = '\0';
+ AttrNumber local_attnum = 0;
+
if (!distributeby)
{
/*
@@ -970,22 +1015,22 @@ AddRelationDistribution(Oid relid,
Form_pg_attribute attr;
int i;
- locatortype = LOCATOR_TYPE_HASH;
+ local_locatortype = LOCATOR_TYPE_HASH;
for (i = 0; i < descriptor->natts; i++)
{
attr = descriptor->attrs[i];
- if (IsHashDistributable(attr->atttypid))
+ if (IsTypeHashDistributable(attr->atttypid))
{
/* distribute on this column */
- attnum = i + 1;
+ local_attnum = i + 1;
break;
}
}
/* If we did not find a usable type, fall back to round robin */
- if (attnum == 0)
- locatortype = LOCATOR_TYPE_RROBIN;
+ if (local_attnum == 0)
+ local_locatortype = LOCATOR_TYPE_RROBIN;
}
else
{
@@ -999,22 +1044,22 @@ AddRelationDistribution(Oid relid,
* Validate user-specified hash column.
* System columns cannot be used.
*/
- attnum = get_attnum(relid, distributeby->colname);
- if (attnum <= 0 && attnum >= -(int) lengthof(SysAtt))
+ local_attnum = get_attnum(relid, distributeby->colname);
+ if (local_attnum <= 0 && local_attnum >= -(int) lengthof(SysAtt))
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("Invalid distribution column specified")));
}
- if (!IsHashDistributable(descriptor->attrs[attnum-1]->atttypid))
+ if (!IsTypeHashDistributable(descriptor->attrs[local_attnum - 1]->atttypid))
{
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("Column %s is not a hash distributable data type",
distributeby->colname)));
}
- locatortype = LOCATOR_TYPE_HASH;
+ local_locatortype = LOCATOR_TYPE_HASH;
break;
case DISTTYPE_MODULO:
@@ -1022,30 +1067,30 @@ AddRelationDistribution(Oid relid,
* Validate user specified modulo column.
* System columns cannot be used.
*/
- attnum = get_attnum(relid, distributeby->colname);
- if (attnum <= 0 && attnum >= -(int) lengthof(SysAtt))
+ local_attnum = get_attnum(relid, distributeby->colname);
+ if (local_attnum <= 0 && local_attnum >= -(int) lengthof(SysAtt))
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
errmsg("Invalid distribution column specified")));
}
- if (!IsModuloDistributable(descriptor->attrs[attnum-1]->atttypid))
+ if (!IsTypeModuloDistributable(descriptor->attrs[local_attnum - 1]->atttypid))
{
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("Column %s is not modulo distributable data type",
distributeby->colname)));
}
- locatortype = LOCATOR_TYPE_MODULO;
+ local_locatortype = LOCATOR_TYPE_MODULO;
break;
case DISTTYPE_REPLICATION:
- locatortype = LOCATOR_TYPE_REPLICATED;
+ local_locatortype = LOCATOR_TYPE_REPLICATED;
break;
case DISTTYPE_ROUNDROBIN:
- locatortype = LOCATOR_TYPE_RROBIN;
+ local_locatortype = LOCATOR_TYPE_RROBIN;
break;
default:
@@ -1055,51 +1100,95 @@ AddRelationDistribution(Oid relid,
}
}
- switch (locatortype)
+ /* Use default hash values */
+ if (local_locatortype == LOCATOR_TYPE_HASH)
{
- case LOCATOR_TYPE_HASH:
- /* PGXCTODO */
- /* Use these for now until we make allowing different algorithms more flexible */
- hashalgorithm = 1;
- hashbuckets = HASH_SIZE;
- break;
-
- case LOCATOR_TYPE_MODULO:
- break;
+ local_hashalgorithm = 1;
+ local_hashbuckets = HASH_SIZE;
}
- /* Check and build list of nodes related to table */
- nodeoids = build_subcluster_data(subcluster, &numnodes);
+ /* Save results */
+ *attnum = local_attnum;
+ *hashalgorithm = local_hashalgorithm;
+ *hashbuckets = local_hashbuckets;
+ *locatortype = local_locatortype;
+}
- /*
- * Sort the list of nodes in ascending order before storing them
- * This is required so that indices are stored in ascending order
- * and later when node number is found by modulo, it points to the right node
- */
- qsort(nodeoids, numnodes, sizeof(Oid), cmp_nodes);
- /* Now OK to insert data in catalog */
- PgxcClassCreate(relid, locatortype, attnum, hashalgorithm,
- hashbuckets, numnodes, nodeoids);
+/*
+ * BuildRelationDistributionNodes
+ * Build an unsorted node Oid array based on a node name list.
+ */
+Oid *
+BuildRelationDistributionNodes(List *nodes, int *numnodes)
+{
+ Oid *nodeoids;
+ ListCell *item;
- /* Make dependency entries */
- myself.classId = PgxcClassRelationId;
- myself.objectId = relid;
- myself.objectSubId = 0;
+ *numnodes = 0;
- /* Dependency on relation */
- referenced.classId = RelationRelationId;
- referenced.objectId = relid;
- referenced.objectSubId = 0;
- recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
+ /* Allocate once enough space for OID array */
+ nodeoids = (Oid *) palloc0(NumDataNodes * sizeof(Oid));
+
+ /* Do process for each node name */
+ foreach(item, nodes)
+ {
+ char *node_name = strVal(lfirst(item));
+ Oid noid = get_pgxc_nodeoid(node_name);
+
+ /* Check existence of node */
+ if (!OidIsValid(noid))
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("PGXC Node %s: object not defined",
+ node_name)));
+
+ if (get_pgxc_nodetype(noid) != PGXC_NODE_DATANODE)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("PGXC node %s: not a Datanode",
+ node_name)));
+
+ /* Can be added if necessary */
+ if (*numnodes != 0)
+ {
+ bool is_listed = false;
+ int i;
+
+ /* Id Oid already listed? */
+ for (i = 0; i < *numnodes; i++)
+ {
+ if (nodeoids[i] == noid)
+ {
+ is_listed = true;
+ break;
+ }
+ }
+
+ if (!is_listed)
+ {
+ (*numnodes)++;
+ nodeoids[*numnodes - 1] = noid;
+ }
+ }
+ else
+ {
+ (*numnodes)++;
+ nodeoids[*numnodes - 1] = noid;
+ }
+ }
+
+ return nodeoids;
}
+
/*
- * Build list of node Oids for subcluster.
- * In case pgxc_node is empty return an error
+ * GetRelationDistributionNodes
+ * Transform subcluster information generated by query deparsing of TO NODE or
+ * TO GROUP clause into a sorted array of nodes OIDs.
*/
-static Oid *
-build_subcluster_data(PGXCSubCluster *subcluster, int *numnodes)
+Oid *
+GetRelationDistributionNodes(PGXCSubCluster *subcluster, int *numnodes)
{
ListCell *lc;
Oid *nodes = NULL;
@@ -1115,8 +1204,6 @@ build_subcluster_data(PGXCSubCluster *subcluster, int *numnodes)
* There could be a difference between the content of pgxc_node catalog
* table and current session, because someone may change nodes and not
* yet update session data.
- * We should use session data because Executor uses it as well to run
- * commands on nodes.
*/
*numnodes = NumDataNodes;
@@ -1129,12 +1216,10 @@ build_subcluster_data(PGXCSubCluster *subcluster, int *numnodes)
nodes = (Oid *) palloc(NumDataNodes * sizeof(Oid));
for (i = 0; i < NumDataNodes; i++)
nodes[i] = PGXCNodeGetNodeOid(i, PGXC_NODE_DATANODE);
-
- return nodes;
}
/* Build list of nodes from given group */
- if (subcluster->clustertype == SUBCLUSTER_GROUP)
+ if (!nodes && subcluster->clustertype == SUBCLUSTER_GROUP)
{
Assert(list_length(subcluster->members) == 1);
@@ -1152,60 +1237,28 @@ build_subcluster_data(PGXCSubCluster *subcluster, int *numnodes)
*numnodes = get_pgxc_groupmembers(group_oid, &nodes);
}
}
- else
+ else if (!nodes)
{
- /* This is the case of a list of nodes */
- foreach(lc, subcluster->members)
- {
- char *node_name = strVal(lfirst(lc));
- Oid noid = get_pgxc_nodeoid(node_name);
-
- /* Check existence of node */
- if (!OidIsValid(noid))
- ereport(ERROR,
- (errcode(ERRCODE_DUPLICATE_OBJECT),
- errmsg("PGXC Node %s: object not defined",
- node_name)));
-
- if (get_pgxc_nodetype(noid) != PGXC_NODE_DATANODE)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("PGXC node %s: not a Datanode",
- node_name)));
-
- /* Can be added if necessary */
- if (*numnodes != 0)
- {
- bool is_listed = false;
- int i;
-
- /* Id Oid already listed? */
- for (i = 0; i < *numnodes; i++)
- {
- if (nodes[i] == noid)
- {
- is_listed = true;
- break;
- }
- }
-
- if (!is_listed)
- {
- (*numnodes)++;
- nodes = (Oid *) repalloc(nodes, *numnodes * sizeof(Oid));
- nodes[*numnodes - 1] = noid;
- }
- }
- else
- {
- (*numnodes)++;
- nodes = (Oid *) palloc(*numnodes * sizeof(Oid));
- nodes[*numnodes - 1] = noid;
- }
- }
+ /*
+ * This is the case of a list of nodes names.
+ * Here the result is a sorted array of node Oids
+ */
+ nodes = BuildRelationDistributionNodes(subcluster->members, numnodes);
}
- return nodes;
+ /* Return a sorted array of node OIDs */
+ return SortRelationDistributionNodes(nodes, *numnodes);
+}
+
+/*
+ * SortRelationDistributionNodes
+ * Sort elements in a node array.
+ */
+Oid *
+SortRelationDistributionNodes(Oid *nodeoids, int numnodes)
+{
+ qsort(nodeoids, numnodes, sizeof(Oid), cmp_nodes);
+ return nodeoids;
}
#endif
diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c
index 68174c1298..0652b88725 100644
--- a/src/backend/pgxc/locator/locator.c
+++ b/src/backend/pgxc/locator/locator.c
@@ -245,7 +245,7 @@ char *pColName;
* PGXCTODO - expand support for other data types!
*/
bool
-IsHashDistributable(Oid col_type)
+IsTypeHashDistributable(Oid col_type)
{
if(col_type == INT8OID
|| col_type == INT2OID
@@ -360,7 +360,7 @@ IsDistColumnForRelId(Oid relid, char *part_col_name)
* PGXCTODO - expand support for other data types!
*/
bool
-IsModuloDistributable(Oid col_type)
+IsTypeModuloDistributable(Oid col_type)
{
if(col_type == INT8OID
|| col_type == INT2OID
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index 9ead52af49..879e448772 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -124,11 +124,23 @@ extern void CheckAttributeType(const char *attname,
bool allow_system_table_mods);
#ifdef PGXC
-extern void AddRelationDistribution(Oid relid,
+/* Functions related to distribution data of relations */
+extern void AddRelationDistribution(Oid relid,
DistributeBy *distributeby,
PGXCSubCluster *subcluster,
List *parentOids,
TupleDesc descriptor);
+extern void GetRelationDistributionItems(Oid relid,
+ DistributeBy *distributeby,
+ TupleDesc descriptor,
+ char *locatortype,
+ int *hashalgorithm,
+ int *hashbuckets,
+ AttrNumber *attnum);
+extern Oid *GetRelationDistributionNodes(PGXCSubCluster *subcluster,
+ int *numnodes);
+extern Oid *BuildRelationDistributionNodes(List *nodes, int *numnodes);
+extern Oid *SortRelationDistributionNodes(Oid *nodeoids, int numnodes);
#endif
#endif /* HEAP_H */
diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h
index 6e4bca32ef..555be50f5f 100644
--- a/src/include/pgxc/locator.h
+++ b/src/include/pgxc/locator.h
@@ -108,14 +108,14 @@ extern bool IsHashColumn(RelationLocInfo *rel_loc_info, char *part_col_name);
extern bool IsHashColumnForRelId(Oid relid, char *part_col_name);
extern int GetRoundRobinNode(Oid relid);
-extern bool IsHashDistributable(Oid col_type);
+extern bool IsTypeHashDistributable(Oid col_type);
extern List *GetAllDataNodes(void);
extern List *GetAllCoordNodes(void);
extern List *GetAnyDataNode(List *relNodes);
extern void RelationBuildLocator(Relation rel);
extern void FreeRelationLocInfo(RelationLocInfo *relationLocInfo);
-extern bool IsModuloDistributable(Oid col_type);
+extern bool IsTypeModuloDistributable(Oid col_type);
extern char *GetRelationModuloColumn(RelationLocInfo * rel_loc_info);
extern bool IsModuloColumn(RelationLocInfo *rel_loc_info, char *part_col_name);
extern bool IsModuloColumnForRelId(Oid relid, char *part_col_name);