diff options
author | Tomas Vondra | 2017-07-15 15:39:17 +0000 |
---|---|---|
committer | Tomas Vondra | 2017-07-15 15:39:17 +0000 |
commit | 9f4a54d1c433f95a91a29dc417b52d563ee851b5 (patch) | |
tree | 7bc0729ce7e1542267a1ef12cb1c70c7ed0245c3 | |
parent | 62731781efed4824e890d6caf50681bbe9028927 (diff) |
Ensure grouping sets get properly distributed data
Grouping sets are stricter about distribution of input data, as all the
execution happens on the coordinator - there is no support for partial
grouping sets yet, so we can either push all the grouping set work to
the remote node (if all the sets include the distribution key), or make
sure that there is a Remote Subquery on the input path.
This is what Postgres-XL 9.6 was doing, but it got lost during merge
with PostgreSQL 10 which significantly reworked this part of the code.
Two queries still produce incorrect result, but those are not actually
using the grouping sets paths because
GROUP BY GROUPING SETS (a, b)
gets transformed into simple
GROUP BY a, b
and ends up using parallel aggregation. The bug seems to be that the
sort orders mismatch for some reason - the remote part produces data
sorted by "a" but the "Finalize GroupAggregate" expects input sorted
by "a, b" leading to duplicate groups in the result.
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 26 | ||||
-rw-r--r-- | src/test/regress/expected/groupingsets.out | 109 |
2 files changed, 86 insertions, 49 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0fe31f2952..842945d84c 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4917,6 +4917,14 @@ consider_groupingsets_paths(PlannerInfo *root, strat = AGG_MIXED; } + /* + * If the grouping can't be fully pushed down, redistribute the + * path on top of the (sorted) path. If if can be pushed down, + * disable construction of complex distributed paths. + */ + if (! can_push_down_grouping(root, parse, path)) + path = create_remotesubplan_path(root, path, NULL); + add_path(grouped_rel, (Path *) create_groupingsets_path(root, grouped_rel, @@ -5075,6 +5083,14 @@ consider_groupingsets_paths(PlannerInfo *root, if (rollups) { + /* + * If the grouping can't be fully pushed down, redistribute the + * path on top of the (sorted) path. If if can be pushed down, + * disable construction of complex distributed paths. + */ + if (! can_push_down_grouping(root, parse, path)) + path = create_remotesubplan_path(root, path, NULL); + add_path(grouped_rel, (Path *) create_groupingsets_path(root, grouped_rel, @@ -5092,6 +5108,15 @@ consider_groupingsets_paths(PlannerInfo *root, * Now try the simple sorted case. */ if (!gd->unsortable_sets) + { + /* + * If the grouping can't be fully pushed down, redistribute the + * path on top of the (sorted) path. If if can be pushed down, + * disable construction of complex distributed paths. + */ + if (! can_push_down_grouping(root, parse, path)) + path = create_remotesubplan_path(root, path, NULL); + add_path(grouped_rel, (Path *) create_groupingsets_path(root, grouped_rel, @@ -5102,6 +5127,7 @@ consider_groupingsets_paths(PlannerInfo *root, gd->rollups, agg_costs, dNumGroups)); + } } /* diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 8cfdb1fba3..032ef9c2cf 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -977,10 +977,11 @@ explain (costs off) -> MixedAggregate Hash Key: unsortable_col Group Key: unhashable_col - -> Sort - Sort Key: unhashable_col - -> Seq Scan on gstest4 -(8 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Sort + Sort Key: unhashable_col + -> Seq Scan on gstest4 +(9 rows) select unhashable_col, unsortable_col, grouping(unhashable_col, unsortable_col), @@ -1020,10 +1021,11 @@ explain (costs off) -> MixedAggregate Hash Key: v, unsortable_col Group Key: v, unhashable_col - -> Sort - Sort Key: v, unhashable_col - -> Seq Scan on gstest4 -(8 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Sort + Sort Key: v, unhashable_col + -> Seq Scan on gstest4 +(9 rows) -- empty input: first is 0 rows, second 1, third 3 etc. select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); @@ -1033,13 +1035,14 @@ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a) explain (costs off) select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),a); - QUERY PLAN --------------------------------- + QUERY PLAN +----------------------------------------------------------- HashAggregate Hash Key: a, b Hash Key: a - -> Seq Scan on gstest_empty -(4 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Seq Scan on gstest_empty +(5 rows) select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),()); a | b | sum | count @@ -1057,15 +1060,16 @@ select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),() explain (costs off) select a, b, sum(v), count(*) from gstest_empty group by grouping sets ((a,b),(),(),()); - QUERY PLAN --------------------------------- + QUERY PLAN +----------------------------------------------------------- MixedAggregate Hash Key: a, b Group Key: () Group Key: () Group Key: () - -> Seq Scan on gstest_empty -(6 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Seq Scan on gstest_empty +(7 rows) select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); sum | count @@ -1077,14 +1081,15 @@ select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); explain (costs off) select sum(v), count(*) from gstest_empty group by grouping sets ((),(),()); - QUERY PLAN --------------------------------- + QUERY PLAN +----------------------------------------------------------- Aggregate Group Key: () Group Key: () Group Key: () - -> Seq Scan on gstest_empty -(5 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Seq Scan on gstest_empty +(6 rows) -- check that functionally dependent cols are not nulled select a, d, grouping(a,b,c) @@ -1102,13 +1107,15 @@ explain (costs off) select a, d, grouping(a,b,c) from gstest3 group by grouping sets ((a,b), (a,c)); - QUERY PLAN ---------------------------- - HashAggregate - Hash Key: a, b - Hash Key: a, c - -> Seq Scan on gstest3 -(4 rows) + QUERY PLAN +---------------------------------- + Remote Fast Query Execution + Node/s: datanode_1, datanode_2 + -> HashAggregate + Hash Key: a, b + Hash Key: a, c + -> Seq Scan on gstest3 +(6 rows) -- simple rescan tests select a, b, sum(v.x) @@ -1209,8 +1216,8 @@ select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum explain (costs off) select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum from gstest2 group by cube (a,b) order by rsum, a, b; - QUERY PLAN ---------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------- Sort Sort Key: (sum((sum(c))) OVER (?)), a, b -> WindowAgg @@ -1221,8 +1228,9 @@ explain (costs off) Hash Key: a Hash Key: b Group Key: () - -> Seq Scan on gstest2 -(11 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Seq Scan on gstest2 +(12 rows) select a, b, sum(v.x) from (values (1),(2)) v(x), gstest_data(v.x) @@ -1395,8 +1403,8 @@ explain (costs off) count(hundred), count(thousand), count(twothousand), count(*) from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); - QUERY PLAN -------------------------------- + QUERY PLAN +----------------------------------------------------------- MixedAggregate Hash Key: two Hash Key: four @@ -1407,10 +1415,11 @@ explain (costs off) Group Key: twothousand Sort Key: thousand Group Key: thousand - -> Sort - Sort Key: unique1 - -> Seq Scan on tenk1 -(13 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(14 rows) explain (costs off) select unique1, @@ -1418,18 +1427,19 @@ explain (costs off) count(hundred), count(thousand), count(twothousand), count(*) from tenk1 group by grouping sets (unique1,hundred,ten,four,two); - QUERY PLAN -------------------------------- + QUERY PLAN +----------------------------------------------------------- MixedAggregate Hash Key: two Hash Key: four Hash Key: ten Hash Key: hundred Group Key: unique1 - -> Sort - Sort Key: unique1 - -> Seq Scan on tenk1 -(9 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(10 rows) set work_mem = '384kB'; explain (costs off) @@ -1438,8 +1448,8 @@ explain (costs off) count(hundred), count(thousand), count(twothousand), count(*) from tenk1 group by grouping sets (unique1,twothousand,thousand,hundred,ten,four,two); - QUERY PLAN -------------------------------- + QUERY PLAN +----------------------------------------------------------- MixedAggregate Hash Key: two Hash Key: four @@ -1449,9 +1459,10 @@ explain (costs off) Group Key: unique1 Sort Key: twothousand Group Key: twothousand - -> Sort - Sort Key: unique1 - -> Seq Scan on tenk1 -(12 rows) + -> Remote Subquery Scan on all (datanode_1,datanode_2) + -> Sort + Sort Key: unique1 + -> Seq Scan on tenk1 +(13 rows) -- end |