2222 */
2323#include "postgres.h"
2424
25+ #include "nodes/nodeFuncs.h"
26+ #include "optimizer/clauses.h"
2527#include "optimizer/joininfo.h"
2628#include "optimizer/pathnode.h"
2729#include "optimizer/paths.h"
2830#include "optimizer/planmain.h"
29- #include "optimizer/var.h"
31+ #include "optimizer/tlist.h"
32+ #include "utils/lsyscache.h"
3033
3134/* local functions */
3235static bool join_is_removable (PlannerInfo * root , SpecialJoinInfo * sjinfo );
3336static void remove_rel_from_query (PlannerInfo * root , int relid ,
3437 Relids joinrelids );
3538static List * remove_rel_from_joinlist (List * joinlist , int relid , int * nremoved );
39+ static Oid distinct_col_search (int colno , List * colnos , List * opids );
3640
3741
3842/*
@@ -147,18 +151,15 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
147151{
148152 int innerrelid ;
149153 RelOptInfo * innerrel ;
154+ Query * subquery = NULL ;
150155 Relids joinrelids ;
151156 List * clause_list = NIL ;
152157 ListCell * l ;
153158 int attroff ;
154159
155160 /*
156- * Currently, we only know how to remove left joins to a baserel with
157- * unique indexes. We can check most of these criteria pretty trivially
158- * to avoid doing useless extra work. But checking whether any of the
159- * indexes are unique would require iterating over the indexlist, so for
160- * now we just make sure there are indexes of some sort or other. If none
161- * of them are unique, join removal will still fail, just slightly later.
161+ * Must be a non-delaying left join to a single baserel, else we aren't
162+ * going to be able to do anything with it.
162163 */
163164 if (sjinfo -> jointype != JOIN_LEFT ||
164165 sjinfo -> delay_upper_joins ||
@@ -168,11 +169,39 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
168169 innerrelid = bms_singleton_member (sjinfo -> min_righthand );
169170 innerrel = find_base_rel (root , innerrelid );
170171
171- if (innerrel -> reloptkind != RELOPT_BASEREL ||
172- innerrel -> rtekind != RTE_RELATION ||
173- innerrel -> indexlist == NIL )
172+ if (innerrel -> reloptkind != RELOPT_BASEREL )
174173 return false;
175174
175+ /*
176+ * Before we go to the effort of checking whether any innerrel variables
177+ * are needed above the join, make a quick check to eliminate cases in
178+ * which we will surely be unable to prove uniqueness of the innerrel.
179+ */
180+ if (innerrel -> rtekind == RTE_RELATION )
181+ {
182+ /*
183+ * For a plain-relation innerrel, we only know how to prove uniqueness
184+ * by reference to unique indexes. If there are no indexes then
185+ * there's certainly no unique indexes so there's no point in going
186+ * further.
187+ */
188+ if (innerrel -> indexlist == NIL )
189+ return false;
190+ }
191+ else if (innerrel -> rtekind == RTE_SUBQUERY )
192+ {
193+ subquery = root -> simple_rte_array [innerrelid ]-> subquery ;
194+
195+ /*
196+ * If the subquery has no qualities that support distinctness proofs
197+ * then there's no point in going further.
198+ */
199+ if (!query_supports_distinctness (subquery ))
200+ return false;
201+ }
202+ else
203+ return false; /* unsupported rtekind */
204+
176205 /* Compute the relid set for the join we are considering */
177206 joinrelids = bms_union (sjinfo -> min_lefthand , sjinfo -> min_righthand );
178207
@@ -272,12 +301,64 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
272301
273302 /*
274303 * relation_has_unique_index_for automatically adds any usable restriction
275- * clauses for the innerrel, so we needn't do that here.
304+ * clauses for the innerrel, so we needn't do that here. (XXX we are not
305+ * considering restriction clauses for subqueries; is that worth doing?)
276306 */
277307
278- /* Now examine the indexes to see if we have a matching unique index */
279- if (relation_has_unique_index_for (root , innerrel , clause_list , NIL , NIL ))
280- return true;
308+ if (innerrel -> rtekind == RTE_RELATION )
309+ {
310+ /* Now examine the indexes to see if we have a matching unique index */
311+ if (relation_has_unique_index_for (root , innerrel , clause_list , NIL , NIL ))
312+ return true;
313+ }
314+ else /* innerrel->rtekind == RTE_SUBQUERY */
315+ {
316+ List * colnos = NIL ;
317+ List * opids = NIL ;
318+
319+ /*
320+ * Build the argument lists for query_is_distinct_for: a list of
321+ * output column numbers that the query needs to be distinct over, and
322+ * a list of equality operators that the output columns need to be
323+ * distinct according to.
324+ */
325+ foreach (l , clause_list )
326+ {
327+ RestrictInfo * rinfo = (RestrictInfo * ) lfirst (l );
328+ Oid op ;
329+ Var * var ;
330+
331+ /*
332+ * Get the equality operator we need uniqueness according to.
333+ * (This might be a cross-type operator and thus not exactly the
334+ * same operator the subquery would consider; that's all right
335+ * since query_is_distinct_for can resolve such cases.) The
336+ * mergejoinability test above should have selected only OpExprs.
337+ */
338+ Assert (IsA (rinfo -> clause , OpExpr ));
339+ op = ((OpExpr * ) rinfo -> clause )-> opno ;
340+
341+ /* clause_sides_match_join identified the inner side for us */
342+ if (rinfo -> outer_is_left )
343+ var = (Var * ) get_rightop (rinfo -> clause );
344+ else
345+ var = (Var * ) get_leftop (rinfo -> clause );
346+
347+ /*
348+ * If inner side isn't a Var referencing a subquery output column,
349+ * this clause doesn't help us.
350+ */
351+ if (!var || !IsA (var , Var ) ||
352+ var -> varno != innerrelid || var -> varlevelsup != 0 )
353+ continue ;
354+
355+ colnos = lappend_int (colnos , var -> varattno );
356+ opids = lappend_oid (opids , op );
357+ }
358+
359+ if (query_is_distinct_for (subquery , colnos , opids ))
360+ return true;
361+ }
281362
282363 /*
283364 * Some day it would be nice to check for other methods of establishing
@@ -481,3 +562,189 @@ remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved)
481562
482563 return result ;
483564}
565+
566+
567+ /*
568+ * query_supports_distinctness - could the query possibly be proven distinct
569+ * on some set of output columns?
570+ *
571+ * This is effectively a pre-checking function for query_is_distinct_for().
572+ * It must return TRUE if query_is_distinct_for() could possibly return TRUE
573+ * with this query, but it should not expend a lot of cycles. The idea is
574+ * that callers can avoid doing possibly-expensive processing to compute
575+ * query_is_distinct_for()'s argument lists if the call could not possibly
576+ * succeed.
577+ */
578+ bool
579+ query_supports_distinctness (Query * query )
580+ {
581+ if (query -> distinctClause != NIL ||
582+ query -> groupClause != NIL ||
583+ query -> hasAggs ||
584+ query -> havingQual ||
585+ query -> setOperations )
586+ return true;
587+
588+ return false;
589+ }
590+
591+ /*
592+ * query_is_distinct_for - does query never return duplicates of the
593+ * specified columns?
594+ *
595+ * query is a not-yet-planned subquery (in current usage, it's always from
596+ * a subquery RTE, which the planner avoids scribbling on).
597+ *
598+ * colnos is an integer list of output column numbers (resno's). We are
599+ * interested in whether rows consisting of just these columns are certain
600+ * to be distinct. "Distinctness" is defined according to whether the
601+ * corresponding upper-level equality operators listed in opids would think
602+ * the values are distinct. (Note: the opids entries could be cross-type
603+ * operators, and thus not exactly the equality operators that the subquery
604+ * would use itself. We use equality_ops_are_compatible() to check
605+ * compatibility. That looks at btree or hash opfamily membership, and so
606+ * should give trustworthy answers for all operators that we might need
607+ * to deal with here.)
608+ */
609+ bool
610+ query_is_distinct_for (Query * query , List * colnos , List * opids )
611+ {
612+ ListCell * l ;
613+ Oid opid ;
614+
615+ Assert (list_length (colnos ) == list_length (opids ));
616+
617+ /*
618+ * A set-returning function in the query's targetlist can result in
619+ * returning duplicate rows, if the SRF is evaluated after the
620+ * de-duplication step; so we play it safe and say "no" if there are any
621+ * SRFs. (We could be certain that it's okay if SRFs appear only in the
622+ * specified columns, since those must be evaluated before de-duplication;
623+ * but it doesn't presently seem worth the complication to check that.)
624+ */
625+ if (expression_returns_set ((Node * ) query -> targetList ))
626+ return false;
627+
628+ /*
629+ * DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
630+ * columns in the DISTINCT clause appear in colnos and operator semantics
631+ * match.
632+ */
633+ if (query -> distinctClause )
634+ {
635+ foreach (l , query -> distinctClause )
636+ {
637+ SortGroupClause * sgc = (SortGroupClause * ) lfirst (l );
638+ TargetEntry * tle = get_sortgroupclause_tle (sgc ,
639+ query -> targetList );
640+
641+ opid = distinct_col_search (tle -> resno , colnos , opids );
642+ if (!OidIsValid (opid ) ||
643+ !equality_ops_are_compatible (opid , sgc -> eqop ))
644+ break ; /* exit early if no match */
645+ }
646+ if (l == NULL ) /* had matches for all? */
647+ return true;
648+ }
649+
650+ /*
651+ * Similarly, GROUP BY guarantees uniqueness if all the grouped columns
652+ * appear in colnos and operator semantics match.
653+ */
654+ if (query -> groupClause )
655+ {
656+ foreach (l , query -> groupClause )
657+ {
658+ SortGroupClause * sgc = (SortGroupClause * ) lfirst (l );
659+ TargetEntry * tle = get_sortgroupclause_tle (sgc ,
660+ query -> targetList );
661+
662+ opid = distinct_col_search (tle -> resno , colnos , opids );
663+ if (!OidIsValid (opid ) ||
664+ !equality_ops_are_compatible (opid , sgc -> eqop ))
665+ break ; /* exit early if no match */
666+ }
667+ if (l == NULL ) /* had matches for all? */
668+ return true;
669+ }
670+ else
671+ {
672+ /*
673+ * If we have no GROUP BY, but do have aggregates or HAVING, then the
674+ * result is at most one row so it's surely unique, for any operators.
675+ */
676+ if (query -> hasAggs || query -> havingQual )
677+ return true;
678+ }
679+
680+ /*
681+ * UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
682+ * except with ALL.
683+ */
684+ if (query -> setOperations )
685+ {
686+ SetOperationStmt * topop = (SetOperationStmt * ) query -> setOperations ;
687+
688+ Assert (IsA (topop , SetOperationStmt ));
689+ Assert (topop -> op != SETOP_NONE );
690+
691+ if (!topop -> all )
692+ {
693+ ListCell * lg ;
694+
695+ /* We're good if all the nonjunk output columns are in colnos */
696+ lg = list_head (topop -> groupClauses );
697+ foreach (l , query -> targetList )
698+ {
699+ TargetEntry * tle = (TargetEntry * ) lfirst (l );
700+ SortGroupClause * sgc ;
701+
702+ if (tle -> resjunk )
703+ continue ; /* ignore resjunk columns */
704+
705+ /* non-resjunk columns should have grouping clauses */
706+ Assert (lg != NULL );
707+ sgc = (SortGroupClause * ) lfirst (lg );
708+ lg = lnext (lg );
709+
710+ opid = distinct_col_search (tle -> resno , colnos , opids );
711+ if (!OidIsValid (opid ) ||
712+ !equality_ops_are_compatible (opid , sgc -> eqop ))
713+ break ; /* exit early if no match */
714+ }
715+ if (l == NULL ) /* had matches for all? */
716+ return true;
717+ }
718+ }
719+
720+ /*
721+ * XXX Are there any other cases in which we can easily see the result
722+ * must be distinct?
723+ *
724+ * If you do add more smarts to this function, be sure to update
725+ * query_supports_distinctness() to match.
726+ */
727+
728+ return false;
729+ }
730+
731+ /*
732+ * distinct_col_search - subroutine for query_is_distinct_for
733+ *
734+ * If colno is in colnos, return the corresponding element of opids,
735+ * else return InvalidOid. (Ordinarily colnos would not contain duplicates,
736+ * but if it does, we arbitrarily select the first match.)
737+ */
738+ static Oid
739+ distinct_col_search (int colno , List * colnos , List * opids )
740+ {
741+ ListCell * lc1 ,
742+ * lc2 ;
743+
744+ forboth (lc1 , colnos , lc2 , opids )
745+ {
746+ if (colno == lfirst_int (lc1 ))
747+ return lfirst_oid (lc2 );
748+ }
749+ return InvalidOid ;
750+ }
0 commit comments