summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/clausesel.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/clausesel.c')
-rw-r--r--src/backend/optimizer/path/clausesel.c137
1 files changed, 113 insertions, 24 deletions
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 02660c2ba5..758ddea4a5 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -3,7 +3,7 @@
* clausesel.c
* Routines to compute clause selectivities
*
- * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
@@ -22,6 +22,7 @@
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
+#include "statistics/statistics.h"
/*
@@ -40,7 +41,8 @@ typedef struct RangeQueryClause
static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
bool varonleft, bool isLTsel, Selectivity s2);
-
+static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
+ List *clauses);
/****************************************************************************
* ROUTINES TO COMPUTE SELECTIVITIES
@@ -60,23 +62,28 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
* subclauses. However, that's only right if the subclauses have independent
* probabilities, and in reality they are often NOT independent. So,
* we want to be smarter where we can.
-
- * Currently, the only extra smarts we have is to recognize "range queries",
- * such as "x > 34 AND x < 42". Clauses are recognized as possible range
- * query components if they are restriction opclauses whose operators have
- * scalarltsel() or scalargtsel() as their restriction selectivity estimator.
- * We pair up clauses of this form that refer to the same variable. An
- * unpairable clause of this kind is simply multiplied into the selectivity
- * product in the normal way. But when we find a pair, we know that the
- * selectivities represent the relative positions of the low and high bounds
- * within the column's range, so instead of figuring the selectivity as
- * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this,
- * see that hisel is the fraction of the range below the high bound, while
- * losel is the fraction above the low bound; so hisel can be interpreted
- * directly as a 0..1 value but we need to convert losel to 1-losel before
- * interpreting it as a value. Then the available range is 1-losel to hisel.
- * However, this calculation double-excludes nulls, so really we need
- * hisel + losel + null_frac - 1.)
+ *
+ * If the clauses taken together refer to just one relation, we'll try to
+ * apply selectivity estimates using any extended statistics for that rel.
+ * Currently we only have (soft) functional dependencies, so apply these in as
+ * many cases as possible, and fall back on normal estimates for remaining
+ * clauses.
+ *
+ * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
+ * are recognized as possible range query components if they are restriction
+ * opclauses whose operators have scalarltsel() or scalargtsel() as their
+ * restriction selectivity estimator. We pair up clauses of this form that
+ * refer to the same variable. An unpairable clause of this kind is simply
+ * multiplied into the selectivity product in the normal way. But when we
+ * find a pair, we know that the selectivities represent the relative
+ * positions of the low and high bounds within the column's range, so instead
+ * of figuring the selectivity as hisel * losel, we can figure it as hisel +
+ * losel - 1. (To visualize this, see that hisel is the fraction of the range
+ * below the high bound, while losel is the fraction above the low bound; so
+ * hisel can be interpreted directly as a 0..1 value but we need to convert
+ * losel to 1-losel before interpreting it as a value. Then the available
+ * range is 1-losel to hisel. However, this calculation double-excludes
+ * nulls, so really we need hisel + losel + null_frac - 1.)
*
* If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
* and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
@@ -96,28 +103,67 @@ clauselist_selectivity(PlannerInfo *root,
SpecialJoinInfo *sjinfo)
{
Selectivity s1 = 1.0;
+ RelOptInfo *rel;
+ Bitmapset *estimatedclauses = NULL;
RangeQueryClause *rqlist = NULL;
ListCell *l;
+ int listidx;
/*
- * If there's exactly one clause, then no use in trying to match up pairs,
- * so just go directly to clause_selectivity().
+ * If there's exactly one clause, just go directly to
+ * clause_selectivity(). None of what we might do below is relevant.
*/
if (list_length(clauses) == 1)
return clause_selectivity(root, (Node *) linitial(clauses),
varRelid, jointype, sjinfo);
/*
- * Initial scan over clauses. Anything that doesn't look like a potential
- * rangequery clause gets multiplied into s1 and forgotten. Anything that
- * does gets inserted into an rqlist entry.
+ * Determine if these clauses reference a single relation. If so, and if
+ * it has extended statistics, try to apply those.
*/
+ rel = find_single_rel_for_clauses(root, clauses);
+ if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ {
+ /*
+ * Perform selectivity estimations on any clauses found applicable by
+ * dependencies_clauselist_selectivity. 'estimatedclauses' will be
+ * filled with the 0-based list positions of clauses used that way, so
+ * that we can ignore them below.
+ */
+ s1 *= dependencies_clauselist_selectivity(root, clauses, varRelid,
+ jointype, sjinfo, rel,
+ &estimatedclauses);
+
+ /*
+ * This would be the place to apply any other types of extended
+ * statistics selectivity estimations for remaining clauses.
+ */
+ }
+
+ /*
+ * Apply normal selectivity estimates for remaining clauses. We'll be
+ * careful to skip any clauses which were already estimated above.
+ *
+ * Anything that doesn't look like a potential rangequery clause gets
+ * multiplied into s1 and forgotten. Anything that does gets inserted into
+ * an rqlist entry.
+ */
+ listidx = -1;
foreach(l, clauses)
{
Node *clause = (Node *) lfirst(l);
RestrictInfo *rinfo;
Selectivity s2;
+ listidx++;
+
+ /*
+ * Skip this clause if it's already been estimated by some other
+ * statistics above.
+ */
+ if (bms_is_member(listidx, estimatedclauses))
+ continue;
+
/* Always compute the selectivity using clause_selectivity */
s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
@@ -373,6 +419,49 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
}
/*
+ * find_single_rel_for_clauses
+ * Examine each clause in 'clauses' and determine if all clauses
+ * reference only a single relation. If so return that relation,
+ * otherwise return NULL.
+ */
+static RelOptInfo *
+find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
+{
+ int lastrelid = 0;
+ ListCell *l;
+
+ foreach(l, clauses)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+ int relid;
+
+ /*
+ * If we have a list of bare clauses rather than RestrictInfos, we
+ * could pull out their relids the hard way with pull_varnos().
+ * However, currently the extended-stats machinery won't do anything
+ * with non-RestrictInfo clauses anyway, so there's no point in
+ * spending extra cycles; just fail if that's what we have.
+ */
+ if (!IsA(rinfo, RestrictInfo))
+ return NULL;
+
+ if (bms_is_empty(rinfo->clause_relids))
+ continue; /* we can ignore variable-free clauses */
+ if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
+ return NULL; /* multiple relations in this clause */
+ if (lastrelid == 0)
+ lastrelid = relid; /* first clause referencing a relation */
+ else if (relid != lastrelid)
+ return NULL; /* relation not same as last one */
+ }
+
+ if (lastrelid != 0)
+ return find_base_rel(root, lastrelid);
+
+ return NULL; /* no clauses */
+}
+
+/*
* bms_is_subset_singleton
*
* Same result as bms_is_subset(s, bms_make_singleton(x)),