diff options
author | Pavan Deolasee | 2017-07-11 08:34:11 +0000 |
---|---|---|
committer | Pavan Deolasee | 2017-07-11 08:34:11 +0000 |
commit | 93cbab90b0c6fc3fc4aa515b93057127c0ee8a1b (patch) | |
tree | dec7870dad1f426e2c85e05c8b288d42d185f3f7 | |
parent | 7ba7029dab2d1f347475cb0cc670442e413b0590 (diff) |
Ensure all partitions of a partitioned table has the same distribution.
To optimise and simplify XL's distributed query planning, we enforce that all
partitions of a partitioned table use the same distribution strategy. We also
put further restrictions that all columns in the partitions and the partitioned
table has matching positions. This can cause some problems when tables have
dropped columns etc, but we think it's far better to optimise XL's plans than
supporting all corner cases. We can look at removing some of these
restrictions later once the more usual queries run faster.
These restrictions allow us to unconditionally push down Append and MergeAppend
nodes to datanodes when these nodes are processing partitioned tables.
Some regression tests currently fail because of these added restrictions. We
would look at them in due course of time.
-rw-r--r-- | src/backend/commands/tablecmds.c | 89 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 129 |
2 files changed, 165 insertions, 53 deletions
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 6d6d6b5a8b..d20670b065 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -314,6 +314,7 @@ static List *MergeAttributes(List *schema, List *supers, char relpersistence, static bool MergeCheckConstraint(List *constraints, char *name, Node *expr); static void MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel); static void MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel); +static void MergeDistributionIntoExisting(Relation child_rel, Relation parent_rel); static void StoreCatalogInheritance(Oid relationId, List *supers, bool child_is_partition); static void StoreCatalogInheritance1(Oid relationId, Oid parentOid, @@ -11322,6 +11323,22 @@ CreateInheritance(Relation child_rel, Relation parent_rel) /* Match up the constraints and bump coninhcount as needed */ MergeConstraintsIntoExisting(child_rel, parent_rel); + if (IS_PGXC_COORDINATOR) + { + /* + * Match up the distribution mechanism. + * + * If do the check only on the coordinator since the distribution + * information is not available on the datanodes. This should not cause + * any problem since if the check fails on the coordinator, the entire + * transaction will be aborted and changes will be rolled back on the + * datanodes too. In fact, since we first run the command on the + * coordinator, the error will be caught even before any changes are + * made on the datanodes. + */ + MergeDistributionIntoExisting(child_rel, parent_rel); + } + /* * OK, it looks valid. Make the catalog entries that show inheritance. */ @@ -11456,6 +11473,30 @@ MergeAttributesIntoExisting(Relation child_rel, Relation parent_rel) attributeName))); /* + * In Postgres-XL, we demand that the attribute positions of the + * child and the parent table must match too. This seems overly + * restrictive and may have other side-effects when one of the + * tables have dropped columns, thus impacting the attribute + * numbering. But having this restriction helps us generate far + * more efficient plans without worrying too much about attribute + * number mismatch. + * + * In common cases of partitioning, the parent table and the + * partition tables will be created at the very beginning and if + * altered, they will be altered together. + */ + if (attribute->attnum != childatt->attnum) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("table \"%s\" contains column \"%s\" at " + "position %d, but parent \"%s\" has it at position %d", + RelationGetRelationName(child_rel), + attributeName, childatt->attnum, + RelationGetRelationName(parent_rel), + attribute->attnum), + errhint("Check for column ordering and dropped columns, if any"), + errdetail("Postgres-XL requires attribute positions to match"))); + /* * OK, bump the child column's inheritance count. (If we fail * later on, this change will just roll back.) */ @@ -11668,6 +11709,54 @@ MergeConstraintsIntoExisting(Relation child_rel, Relation parent_rel) heap_close(catalog_relation, RowExclusiveLock); } +static void +MergeDistributionIntoExisting(Relation child_rel, Relation parent_rel) +{ + RelationLocInfo *parent_locinfo = RelationGetLocInfo(parent_rel); + RelationLocInfo *child_locinfo = RelationGetLocInfo(child_rel); + List *nodeList1, *nodeList2; + + + nodeList1 = parent_locinfo->rl_nodeList; + nodeList2 = child_locinfo->rl_nodeList; + + /* Same locator type? */ + if (parent_locinfo->locatorType != child_locinfo->locatorType) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("table \"%s\" is using distribution type %c, but the " + "parent table \"%s\" is using distribution type %c", + RelationGetRelationName(child_rel), + child_locinfo->locatorType, + RelationGetRelationName(parent_rel), + parent_locinfo->locatorType), + errdetail("Distribution type for the child must be same as the parent"))); + + + /* Same attribute number? */ + if (parent_locinfo->partAttrNum != child_locinfo->partAttrNum) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("table \"%s\" is distributed on column \"%s\", but the " + "parent table \"%s\" is distributed on column \"%s\"", + RelationGetRelationName(child_rel), + child_locinfo->partAttrName, + RelationGetRelationName(parent_rel), + parent_locinfo->partAttrName), + errdetail("Distribution column for the child must be same as the parent"))); + + /* Same node list? */ + if (list_difference_int(nodeList1, nodeList2) != NIL || + list_difference_int(nodeList2, nodeList1) != NIL) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("child table \"%s\" and the parent table \"%s\" " + "are distributed on different sets of nodes", + RelationGetRelationName(child_rel), + RelationGetRelationName(parent_rel)), + errdetail("Distribution nodes for the child must be same as the parent"))); +} + /* * ALTER TABLE NO INHERIT * diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index d5f964419b..4c10883dc1 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2432,15 +2432,27 @@ create_append_path(RelOptInfo *rel, List *subpaths, Relids required_outer, pathnode->path.pathkeys = NIL; /* result is always considered unsorted */ #ifdef XCP /* - * Append path is used to implement scans of inherited tables and some - * "set" operations, like UNION ALL. While all inherited tables should - * have the same distribution, UNION'ed queries may have different. - * When paths being appended have the same distribution it is OK to push - * Append down to the data nodes. If not, perform "coordinator" Append. + * Append path is used to implement scans of partitioned tables, inherited + * tables and some "set" operations, like UNION ALL. While all partitioned + * and inherited tables should have the same distribution, UNION'ed queries + * may have different. When paths being appended have the same + * distribution it is OK to push Append down to the data nodes. If not, + * perform "coordinator" Append. + * + * Since we ensure that all partitions of a partitioned table are always + * distributed by the same strategy on the same set of nodes, we can push + * down MergeAppend of partitions of the table. */ - + if (partitioned_rels && subpaths) + { + /* Take distribution of the first node */ + l = list_head(subpaths); + subpath = (Path *) lfirst(l); + distribution = copyObject(subpath->distribution); + pathnode->path.distribution = distribution; + } /* Special case of the dummy relation, if the subpaths list is empty */ - if (subpaths) + else if (subpaths) { /* Take distribution of the first node */ l = list_head(subpaths); @@ -2558,70 +2570,81 @@ create_merge_append_path(PlannerInfo *root, pathnode->path.parent = rel; #ifdef XCP /* - * It is safe to push down MergeAppend if all subpath distributions - * are the same and these distributions are Replicated or distribution key - * is the expression of the first pathkey. + * Since we ensure that all partitions of a partitioned table are always + * distributed by the same strategy on the same set of nodes, we can push + * down MergeAppend of partitions of the table. + * + * For MergeAppend of non-partitions, it is safe to push down MergeAppend + * if all subpath distributions are the same and these distributions are + * Replicated or distribution key is the expression of the first pathkey. */ - /* Take distribution of the first node */ l = list_head(subpaths); subpath = (Path *) lfirst(l); distribution = copyObject(subpath->distribution); - /* - * Verify if it is safe to push down MergeAppend with this distribution. - * TODO implement check of the second condition (distribution key is the - * first pathkey) - */ - if (distribution == NULL || IsLocatorReplicated(distribution->distributionType)) + + if (partitioned_rels) + { + pathnode->path.distribution = distribution; + } + else { /* - * Check remaining subpaths, if all distributions equal to the first set - * it as a distribution of the Append path; otherwise make up coordinator - * Append + * Verify if it is safe to push down MergeAppend with this distribution. + * TODO implement check of the second condition (distribution key is the + * first pathkey) */ - while ((l = lnext(l))) + if (distribution == NULL || IsLocatorReplicated(distribution->distributionType)) { - subpath = (Path *) lfirst(l); - /* - * See comments in Append path + * Check remaining subpaths, if all distributions equal to the first set + * it as a distribution of the Append path; otherwise make up coordinator + * Append */ - if (distribution && equalDistribution(distribution, subpath->distribution)) - { - if (subpath->distribution->restrictNodes) - distribution->restrictNodes = bms_union( - distribution->restrictNodes, - subpath->distribution->restrictNodes); - } - else + while ((l = lnext(l))) { - break; + subpath = (Path *) lfirst(l); + + /* + * See comments in Append path + */ + if (distribution && equalDistribution(distribution, subpath->distribution)) + { + if (subpath->distribution->restrictNodes) + distribution->restrictNodes = bms_union( + distribution->restrictNodes, + subpath->distribution->restrictNodes); + } + else + { + break; + } } } - } - if (l) - { - List *newsubpaths = NIL; - foreach(l, subpaths) + if (l) { - subpath = (Path *) lfirst(l); - if (subpath->distribution) + List *newsubpaths = NIL; + foreach(l, subpaths) { - /* - * If an explicit sort is necessary, make sure it's pushed - * down to the remote node (i.e. add it before the remote - * subplan). - */ - subpath = redistribute_path(root, subpath, pathkeys, - LOCATOR_TYPE_NONE, NULL, - NULL, NULL); + subpath = (Path *) lfirst(l); + if (subpath->distribution) + { + /* + * If an explicit sort is necessary, make sure it's pushed + * down to the remote node (i.e. add it before the remote + * subplan). + */ + subpath = redistribute_path(root, subpath, pathkeys, + LOCATOR_TYPE_NONE, NULL, + NULL, NULL); + } + newsubpaths = lappend(newsubpaths, subpath); } - newsubpaths = lappend(newsubpaths, subpath); + subpaths = newsubpaths; + pathnode->path.distribution = NULL; } - subpaths = newsubpaths; - pathnode->path.distribution = NULL; + else + pathnode->path.distribution = distribution; } - else - pathnode->path.distribution = distribution; #endif pathnode->path.pathtarget = rel->reltarget; |