Skip to content

Commit ca976eb

Browse files
committed
Introduce new GROUP-BY strategy: put distinct column at the head of group-by list.
Let's allow GROUP-BY to utilize cost_sort feature which can differentiate orders of pathkeys lists according to the ndistinct of the first column.
1 parent 8bbfdc6 commit ca976eb

File tree

2 files changed

+76
-7
lines changed

2 files changed

+76
-7
lines changed

src/backend/optimizer/path/pathkeys.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "optimizer/paths.h"
2727
#include "partitioning/partbounds.h"
2828
#include "utils/lsyscache.h"
29+
#include "utils/selfuncs.h"
2930

3031
/* Consider reordering of GROUP BY keys? */
3132
bool enable_group_by_reordering = true;
@@ -472,6 +473,10 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
472473
List *pathkeys = root->group_pathkeys;
473474
List *clauses = root->processed_groupClause;
474475

476+
double nd_max = 0.0;
477+
PathKey *pk_opt;
478+
ListCell *lc1, *lc2;
479+
475480
/* always return at least the original pathkeys/clauses */
476481
info = makeNode(GroupByOrdering);
477482
info->pathkeys = pathkeys;
@@ -517,6 +522,71 @@ get_useful_group_keys_orderings(PlannerInfo *root, Path *path)
517522
}
518523
}
519524

525+
526+
/*
527+
* Let's try the order with the column having max ndistinct value
528+
*/
529+
530+
forboth(lc1, root->group_pathkeys, lc2, root->processed_groupClause)
531+
{
532+
PathKey *pkey = lfirst_node(PathKey, lc1);
533+
SortGroupClause *gc = (SortGroupClause *) lfirst(lc2);
534+
Node *node;
535+
Bitmapset *relids;
536+
VariableStatData vardata;
537+
double nd = -1;
538+
bool isdefault;
539+
540+
if (foreach_current_index(lc1) >= root->num_groupby_pathkeys)
541+
break;
542+
543+
node = get_sortgroupclause_expr(gc, root->parse->targetList);
544+
relids = pull_varnos(root, node);
545+
546+
if (bms_num_members(relids) != 1 && bms_is_member(0, relids))
547+
/*
548+
*Although functional index can estimate distincts here, the chance
549+
* is too low.
550+
*/
551+
continue;
552+
553+
examine_variable(root, node, 0, &vardata);
554+
if (!HeapTupleIsValid(vardata.statsTuple))
555+
continue;
556+
nd = get_variable_numdistinct(&vardata, &isdefault);
557+
ReleaseVariableStats(vardata);
558+
if (isdefault)
559+
continue;
560+
561+
Assert(nd >= 0);
562+
if (nd > nd_max)
563+
{
564+
nd_max = nd;
565+
pk_opt = pkey;
566+
}
567+
}
568+
569+
if (pk_opt != NULL)
570+
{
571+
List *new_pathkeys = list_make1(pk_opt);
572+
int n;
573+
574+
new_pathkeys = list_concat_unique_ptr(new_pathkeys, root->group_pathkeys);
575+
n = group_keys_reorder_by_pathkeys(new_pathkeys, &pathkeys, &clauses,
576+
root->num_groupby_pathkeys);
577+
578+
if (n > 0 &&
579+
(enable_incremental_sort || n == root->num_groupby_pathkeys) &&
580+
compare_pathkeys(pathkeys, root->group_pathkeys) != PATHKEYS_EQUAL)
581+
{
582+
info = makeNode(GroupByOrdering);
583+
info->pathkeys = pathkeys;
584+
info->clauses = clauses;
585+
586+
infos = lappend(infos, info);
587+
}
588+
}
589+
520590
#ifdef USE_ASSERT_CHECKING
521591
{
522592
GroupByOrdering *pinfo = linitial_node(GroupByOrdering, infos);

src/test/regress/expected/aggregates.out

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2811,12 +2811,11 @@ SELECT count(*) FROM btg GROUP BY z, y, w, x;
28112811
QUERY PLAN
28122812
-------------------------------------------------
28132813
GroupAggregate
2814-
Group Key: x, y, z, w
2815-
-> Incremental Sort
2816-
Sort Key: x, y, z, w
2817-
Presorted Key: x, y
2814+
Group Key: w, z, y, x
2815+
-> Sort
2816+
Sort Key: w, z, y, x
28182817
-> Index Scan using btg_x_y_idx on btg
2819-
(6 rows)
2818+
(5 rows)
28202819

28212820
-- Utilize the ordering of subquery scan to avoid a Sort operation
28222821
EXPLAIN (COSTS OFF) SELECT count(*)
@@ -2918,9 +2917,9 @@ GROUP BY c1.w, c1.z;
29182917
QUERY PLAN
29192918
-----------------------------------------------------
29202919
GroupAggregate
2921-
Group Key: c1.w, c1.z
2920+
Group Key: c1.z, c1.w
29222921
-> Sort
2923-
Sort Key: c1.w, c1.z, c1.x, c1.y
2922+
Sort Key: c1.z, c1.w, c1.x, c1.y
29242923
-> Merge Join
29252924
Merge Cond: (c1.x = c2.x)
29262925
-> Sort

0 commit comments

Comments
 (0)