Skip to content

Commit ba3e76c

Browse files
committed
Consider Incremental Sort paths at additional places
Commit d2d8a22 introduced Incremental Sort, but it was considered only in create_ordered_paths() as an alternative to regular Sort. There are many other places that require sorted input and might benefit from considering Incremental Sort too. This patch modifies a number of those places, but not all. The concern is that just adding Incremental Sort to any place that already adds Sort may increase the number of paths considered, negatively affecting planning time, without any benefit. So we've taken a more conservative approach, based on analysis of which places do affect a set of queries that did seem practical. This means some less common queries may not benefit from Incremental Sort yet. Author: Tomas Vondra Reviewed-by: James Coleman Discussion: https://postgr.es/m/CAPpHfds1waRZ=NOmueYq0sx1ZSCnt+5QJvizT8ndT2=etZEeAQ@mail.gmail.com
1 parent c7654f6 commit ba3e76c

File tree

7 files changed

+620
-49
lines changed

7 files changed

+620
-49
lines changed

contrib/postgres_fdw/postgres_fdw.c

-29
Original file line numberDiff line numberDiff line change
@@ -6523,35 +6523,6 @@ conversion_error_callback(void *arg)
65236523
}
65246524
}
65256525

6526-
/*
6527-
* Find an equivalence class member expression, all of whose Vars, come from
6528-
* the indicated relation.
6529-
*/
6530-
Expr *
6531-
find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
6532-
{
6533-
ListCell *lc_em;
6534-
6535-
foreach(lc_em, ec->ec_members)
6536-
{
6537-
EquivalenceMember *em = lfirst(lc_em);
6538-
6539-
if (bms_is_subset(em->em_relids, rel->relids) &&
6540-
!bms_is_empty(em->em_relids))
6541-
{
6542-
/*
6543-
* If there is more than one equivalence member whose Vars are
6544-
* taken entirely from this relation, we'll be content to choose
6545-
* any one of those.
6546-
*/
6547-
return em->em_expr;
6548-
}
6549-
}
6550-
6551-
/* We didn't find any suitable equivalence class expression */
6552-
return NULL;
6553-
}
6554-
65556526
/*
65566527
* Find an equivalence class member expression to be computed as a sort column
65576528
* in the given target.

src/backend/optimizer/geqo/geqo_eval.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene,
274274
* grouping_planner).
275275
*/
276276
if (old_clump->size + new_clump->size < num_gene)
277-
generate_gather_paths(root, joinrel, false);
277+
generate_useful_gather_paths(root, joinrel, false);
278278

279279
/* Find and save the cheapest paths for this joinrel */
280280
set_cheapest(joinrel);

src/backend/optimizer/path/allpaths.c

+215-2
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
556556
*/
557557
if (rel->reloptkind == RELOPT_BASEREL &&
558558
bms_membership(root->all_baserels) != BMS_SINGLETON)
559-
generate_gather_paths(root, rel, false);
559+
generate_useful_gather_paths(root, rel, false);
560560

561561
/* Now find the cheapest of the paths for this rel */
562562
set_cheapest(rel);
@@ -2727,6 +2727,219 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
27272727
}
27282728
}
27292729

2730+
/*
2731+
* get_useful_pathkeys_for_relation
2732+
* Determine which orderings of a relation might be useful.
2733+
*
2734+
* Getting data in sorted order can be useful either because the requested
2735+
* order matches the final output ordering for the overall query we're
2736+
* planning, or because it enables an efficient merge join. Here, we try
2737+
* to figure out which pathkeys to consider.
2738+
*
2739+
* This allows us to do incremental sort on top of an index scan under a gather
2740+
* merge node, i.e. parallelized.
2741+
*
2742+
* XXX At the moment this can only ever return a list with a single element,
2743+
* because it looks at query_pathkeys only. So we might return the pathkeys
2744+
* directly, but it seems plausible we'll want to consider other orderings
2745+
* in the future. For example, we might want to consider pathkeys useful for
2746+
* merge joins.
2747+
*/
2748+
static List *
2749+
get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
2750+
{
2751+
List *useful_pathkeys_list = NIL;
2752+
2753+
/*
2754+
* Considering query_pathkeys is always worth it, because it might allow us
2755+
* to avoid a total sort when we have a partially presorted path available.
2756+
*/
2757+
if (root->query_pathkeys)
2758+
{
2759+
ListCell *lc;
2760+
int npathkeys = 0; /* useful pathkeys */
2761+
2762+
foreach(lc, root->query_pathkeys)
2763+
{
2764+
PathKey *pathkey = (PathKey *) lfirst(lc);
2765+
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
2766+
2767+
/*
2768+
* We can only build an Incremental Sort for pathkeys which contain
2769+
* an EC member in the current relation, so ignore any suffix of the
2770+
* list as soon as we find a pathkey without an EC member the
2771+
* relation.
2772+
*
2773+
* By still returning the prefix of the pathkeys list that does meet
2774+
* criteria of EC membership in the current relation, we enable not
2775+
* just an incremental sort on the entirety of query_pathkeys but
2776+
* also incremental sort below a JOIN.
2777+
*/
2778+
if (!find_em_expr_for_rel(pathkey_ec, rel))
2779+
break;
2780+
2781+
npathkeys++;
2782+
}
2783+
2784+
/*
2785+
* The whole query_pathkeys list matches, so append it directly, to allow
2786+
* comparing pathkeys easily by comparing list pointer. If we have to truncate
2787+
* the pathkeys, we gotta do a copy though.
2788+
*/
2789+
if (npathkeys == list_length(root->query_pathkeys))
2790+
useful_pathkeys_list = lappend(useful_pathkeys_list,
2791+
root->query_pathkeys);
2792+
else if (npathkeys > 0)
2793+
useful_pathkeys_list = lappend(useful_pathkeys_list,
2794+
list_truncate(list_copy(root->query_pathkeys),
2795+
npathkeys));
2796+
}
2797+
2798+
return useful_pathkeys_list;
2799+
}
2800+
2801+
/*
2802+
* generate_useful_gather_paths
2803+
* Generate parallel access paths for a relation by pushing a Gather or
2804+
* Gather Merge on top of a partial path.
2805+
*
2806+
* Unlike plain generate_gather_paths, this looks both at pathkeys of input
2807+
* paths (aiming to preserve the ordering), but also considers ordering that
2808+
* might be useful for nodes above the gather merge node, and tries to add
2809+
* a sort (regular or incremental) to provide that.
2810+
*/
2811+
void
2812+
generate_useful_gather_paths(PlannerInfo *root, RelOptInfo *rel, bool override_rows)
2813+
{
2814+
ListCell *lc;
2815+
double rows;
2816+
double *rowsp = NULL;
2817+
List *useful_pathkeys_list = NIL;
2818+
Path *cheapest_partial_path = NULL;
2819+
2820+
/* If there are no partial paths, there's nothing to do here. */
2821+
if (rel->partial_pathlist == NIL)
2822+
return;
2823+
2824+
/* Should we override the rel's rowcount estimate? */
2825+
if (override_rows)
2826+
rowsp = &rows;
2827+
2828+
/* generate the regular gather (merge) paths */
2829+
generate_gather_paths(root, rel, override_rows);
2830+
2831+
/* consider incremental sort for interesting orderings */
2832+
useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel);
2833+
2834+
/* used for explicit (full) sort paths */
2835+
cheapest_partial_path = linitial(rel->partial_pathlist);
2836+
2837+
/*
2838+
* Consider incremental sort paths for each interesting ordering.
2839+
*/
2840+
foreach(lc, useful_pathkeys_list)
2841+
{
2842+
List *useful_pathkeys = lfirst(lc);
2843+
ListCell *lc2;
2844+
bool is_sorted;
2845+
int presorted_keys;
2846+
2847+
foreach(lc2, rel->partial_pathlist)
2848+
{
2849+
Path *subpath = (Path *) lfirst(lc2);
2850+
GatherMergePath *path;
2851+
2852+
/*
2853+
* If the path has no ordering at all, then we can't use either
2854+
* incremental sort or rely on implict sorting with a gather merge.
2855+
*/
2856+
if (subpath->pathkeys == NIL)
2857+
continue;
2858+
2859+
is_sorted = pathkeys_count_contained_in(useful_pathkeys,
2860+
subpath->pathkeys,
2861+
&presorted_keys);
2862+
2863+
/*
2864+
* We don't need to consider the case where a subpath is already
2865+
* fully sorted because generate_gather_paths already creates a
2866+
* gather merge path for every subpath that has pathkeys present.
2867+
*
2868+
* But since the subpath is already sorted, we know we don't need
2869+
* to consider adding a sort (other either kind) on top of it, so
2870+
* we can continue here.
2871+
*/
2872+
if (is_sorted)
2873+
continue;
2874+
2875+
/*
2876+
* Consider regular sort for the cheapest partial path (for each
2877+
* useful pathkeys). We know the path is not sorted, because we'd
2878+
* not get here otherwise.
2879+
*
2880+
* This is not redundant with the gather paths created in
2881+
* generate_gather_paths, because that doesn't generate ordered
2882+
* output. Here we add an explicit sort to match the useful
2883+
* ordering.
2884+
*/
2885+
if (cheapest_partial_path == subpath)
2886+
{
2887+
Path *tmp;
2888+
2889+
tmp = (Path *) create_sort_path(root,
2890+
rel,
2891+
subpath,
2892+
useful_pathkeys,
2893+
-1.0);
2894+
2895+
rows = tmp->rows * tmp->parallel_workers;
2896+
2897+
path = create_gather_merge_path(root, rel,
2898+
tmp,
2899+
rel->reltarget,
2900+
tmp->pathkeys,
2901+
NULL,
2902+
rowsp);
2903+
2904+
add_path(rel, &path->path);
2905+
2906+
/* Fall through */
2907+
}
2908+
2909+
/*
2910+
* Consider incremental sort, but only when the subpath is already
2911+
* partially sorted on a pathkey prefix.
2912+
*/
2913+
if (enable_incrementalsort && presorted_keys > 0)
2914+
{
2915+
Path *tmp;
2916+
2917+
/*
2918+
* We should have already excluded pathkeys of length 1 because
2919+
* then presorted_keys > 0 would imply is_sorted was true.
2920+
*/
2921+
Assert(list_length(useful_pathkeys) != 1);
2922+
2923+
tmp = (Path *) create_incremental_sort_path(root,
2924+
rel,
2925+
subpath,
2926+
useful_pathkeys,
2927+
presorted_keys,
2928+
-1);
2929+
2930+
path = create_gather_merge_path(root, rel,
2931+
tmp,
2932+
rel->reltarget,
2933+
tmp->pathkeys,
2934+
NULL,
2935+
rowsp);
2936+
2937+
add_path(rel, &path->path);
2938+
}
2939+
}
2940+
}
2941+
}
2942+
27302943
/*
27312944
* make_rel_from_joinlist
27322945
* Build access paths using a "joinlist" to guide the join path search.
@@ -2899,7 +3112,7 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
28993112
* once we know the final targetlist (see grouping_planner).
29003113
*/
29013114
if (lev < levels_needed)
2902-
generate_gather_paths(root, rel, false);
3115+
generate_useful_gather_paths(root, rel, false);
29033116

29043117
/* Find and save the cheapest paths for this rel */
29053118
set_cheapest(rel);

src/backend/optimizer/path/equivclass.c

+28
Original file line numberDiff line numberDiff line change
@@ -774,6 +774,34 @@ get_eclass_for_sort_expr(PlannerInfo *root,
774774
return newec;
775775
}
776776

777+
/*
778+
* Find an equivalence class member expression, all of whose Vars, come from
779+
* the indicated relation.
780+
*/
781+
Expr *
782+
find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
783+
{
784+
ListCell *lc_em;
785+
786+
foreach(lc_em, ec->ec_members)
787+
{
788+
EquivalenceMember *em = lfirst(lc_em);
789+
790+
if (bms_is_subset(em->em_relids, rel->relids) &&
791+
!bms_is_empty(em->em_relids))
792+
{
793+
/*
794+
* If there is more than one equivalence member whose Vars are
795+
* taken entirely from this relation, we'll be content to choose
796+
* any one of those.
797+
*/
798+
return em->em_expr;
799+
}
800+
}
801+
802+
/* We didn't find any suitable equivalence class expression */
803+
return NULL;
804+
}
777805

778806
/*
779807
* generate_base_implied_equalities

0 commit comments

Comments
 (0)