summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomas Vondra2017-07-09 20:02:21 +0000
committerTomas Vondra2017-07-09 20:02:21 +0000
commit7ba7029dab2d1f347475cb0cc670442e413b0590 (patch)
tree02bf38562561fa4162e0f1eb0b5451518f77022d
parente5944ebcc3f6ac09f55762df77540e68a634761f (diff)
Properly redistribute results of Gather Merge nodes
The optimizer was not generating correct distributed paths with Gather Merge nodes, because those nodes always looked as if the data was not distributed at all. There were two bugs causing this: 1) Gather Merge did not copy distribution from the subpath, leaving it NULL (as if running on coordinator), so no Remote Subquery needed. 2) create_grouping_paths() did not check if a Remote Subquery is needed on top of Gather Merge anyway. After fixing these two issues, we're now generating correct plans (at least judging by select_parallel regression suite).
-rw-r--r--src/backend/optimizer/plan/planner.c8
-rw-r--r--src/backend/optimizer/util/pathnode.c3
-rw-r--r--src/test/regress/expected/select_parallel.out21
3 files changed, 22 insertions, 10 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a95572b87b..0fe31f2952 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4312,6 +4312,14 @@ create_grouping_paths(PlannerInfo *root,
NULL,
&total_groups);
+ /*
+ * If the grouping can't be fully pushed down, we'll push down the
+ * first phase of the aggregate, and redistribute only the partial
+ * results.
+ */
+ if (! can_push_down_grouping(root, parse, gmpath))
+ gmpath = create_remotesubplan_path(root, gmpath, NULL);
+
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 8d99cf9b34..d5f964419b 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3018,6 +3018,9 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
required_outer);
pathnode->path.parallel_aware = false;
+ /* distribution is the same as in the subpath */
+ pathnode->path.distribution = (Distribution *) copyObject(subpath->distribution);
+
pathnode->subpath = subpath;
pathnode->num_workers = subpath->parallel_workers;
pathnode->path.pathkeys = pathkeys;
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
index 079dca310a..4392a3ada3 100644
--- a/src/test/regress/expected/select_parallel.out
+++ b/src/test/regress/expected/select_parallel.out
@@ -278,18 +278,19 @@ reset enable_nestloop;
set enable_hashagg to off;
explain (costs off)
select string4, count((unique2)) from tenk1 group by string4 order by string4;
- QUERY PLAN
-----------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------
Finalize GroupAggregate
Group Key: string4
- -> Gather Merge
- Workers Planned: 4
- -> Partial GroupAggregate
- Group Key: string4
- -> Sort
- Sort Key: string4
- -> Parallel Seq Scan on tenk1
-(9 rows)
+ -> Remote Subquery Scan on all (datanode_1,datanode_2)
+ -> Gather Merge
+ Workers Planned: 4
+ -> Partial GroupAggregate
+ Group Key: string4
+ -> Sort
+ Sort Key: string4
+ -> Parallel Seq Scan on tenk1
+(10 rows)
select string4, count((unique2)) from tenk1 group by string4 order by string4;
string4 | count