Teach UNION planner to remove dummy inputs

author David Rowley <[email protected]>

Sat, 4 Oct 2025 01:30:03 +0000 (14:30 +1300)

committer David Rowley <[email protected]>

Sat, 4 Oct 2025 01:30:03 +0000 (14:30 +1300)
author David Rowley <[email protected]>
Sat, 4 Oct 2025 01:30:03 +0000 (14:30 +1300)
committer David Rowley <[email protected]>
Sat, 4 Oct 2025 01:30:03 +0000 (14:30 +1300)
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 6c0e2383af94cc7991226d21cd0b57e055843470..547dbd5354047dc5b32f25110b16cdb9a3aa1aad 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -523,6 +523,13 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
         bool        is_sorted;
         int         presorted_keys;
  
+       /* If the input rel is dummy, propagate that to this query level */
+       if (is_dummy_rel(final_rel))
+       {
+           mark_dummy_rel(rel);
+           continue;
+       }
+
         /*
          * Include the cheapest path as-is so that the set operation can be
          * cheaply implemented using a method which does not require the input
@@ -763,6 +770,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
         RelOptInfo *rel = lfirst(lc);
         Path       *ordered_path;
  
+       /* Skip any UNION children that are proven not to yield any rows */
+       if (is_dummy_rel(rel))
+           continue;
+
         cheapest_pathlist = lappend(cheapest_pathlist,
                                     rel->cheapest_total_path);
  
@@ -812,6 +823,15 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
     result_rel->consider_parallel = consider_parallel;
     result_rel->consider_startup = (root->tuple_fraction > 0);
  
+   /* If all UNION children were dummy rels, make the resulting rel dummy */
+   if (cheapest_pathlist == NIL)
+   {
+       result_rel->reltarget = create_pathtarget(root, list_nth(tlist_list, 0));
+       mark_dummy_rel(result_rel);
+
+       return result_rel;
+   }
+
     /*
      * Append the child results together using the cheapest paths from each
      * union child.
@@ -876,15 +896,33 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
         bool        can_sort = grouping_is_sortable(groupList);
         bool        can_hash = grouping_is_hashable(groupList);
  
-       /*
-        * XXX for the moment, take the number of distinct groups as equal to
-        * the total input size, i.e., the worst case.  This is too
-        * conservative, but it's not clear how to get a decent estimate of
-        * the true size.  One should note as well the propensity of novices
-        * to write UNION rather than UNION ALL even when they don't expect
-        * any duplicates...
-        */
-       dNumGroups = apath->rows;
+       if (list_length(cheapest_pathlist) == 1)
+       {
+           Path       *path = linitial(cheapest_pathlist);
+
+           /*
+            * In the case where only one union child remains due to the
+            * detection of one or more dummy union children, obtain an
+            * estimate on the surviving child directly.
+            */
+           dNumGroups = estimate_num_groups(root,
+                                            path->pathtarget->exprs,
+                                            path->rows,
+                                            NULL,
+                                            NULL);
+       }
+       else
+       {
+           /*
+            * Otherwise, for the moment, take the number of distinct groups
+            * as equal to the total input size, i.e., the worst case.  This
+            * is too conservative, but it's not clear how to get a decent
+            * estimate of the true size.  One should note as well the
+            * propensity of novices to write UNION rather than UNION ALL even
+            * when they don't expect any duplicates...
+            */
+           dNumGroups = apath->rows;
+       }
  
         if (can_hash)
         {
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out

index d3ea433db1577ee1d3aa29e049a219c853649aa5..7c089e0d598807c269a4686464b4f1fc16f13c02 100644 (file)
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1216,6 +1216,57 @@ select event_id
  
  drop table events_child, events, other_events;
  reset enable_indexonlyscan;
+--
+-- Test handling of UNION with provably empty inputs
+--
+-- Ensure the empty UNION input is pruned and de-duplication is done for the
+-- remaining relation.
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1
+ORDER BY 1;
+              QUERY PLAN              
+--------------------------------------
+ Sort
+   Output: tenk1.four
+   Sort Key: tenk1.four
+   ->  HashAggregate
+         Output: tenk1.four
+         Group Key: tenk1.four
+         ->  Seq Scan on public.tenk1
+               Output: tenk1.four
+(8 rows)
+
+-- Validate that the results of the above are correct
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1
+ORDER BY 1;
+ two 
+-----
+   0
+   1
+   2
+   3
+(4 rows)
+
+-- All UNION inputs are proven empty.  Ensure the planner provides a
+-- const-false Result node
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1 WHERE 1=2
+UNION
+SELECT ten FROM tenk1 WHERE 1=2;
+           QUERY PLAN           
+--------------------------------
+ Result
+   Output: unnamed_subquery.two
+   Replaces: Aggregate
+   One-Time Filter: false
+(4 rows)
+
  -- Test constraint exclusion of UNION ALL subqueries
  explain (costs off)
   SELECT * FROM
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql

index 13700a6bfc4e918b951d6829e36b970146e62dca..56bd20e741cc6ecea31382e18aaaa93e03d11f23 100644 (file)
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -459,6 +459,33 @@ drop table events_child, events, other_events;
  
  reset enable_indexonlyscan;
  
+--
+-- Test handling of UNION with provably empty inputs
+--
+
+-- Ensure the empty UNION input is pruned and de-duplication is done for the
+-- remaining relation.
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1
+ORDER BY 1;
+
+-- Validate that the results of the above are correct
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1
+ORDER BY 1;
+
+-- All UNION inputs are proven empty.  Ensure the planner provides a
+-- const-false Result node
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT two FROM tenk1 WHERE 1=2
+UNION
+SELECT four FROM tenk1 WHERE 1=2
+UNION
+SELECT ten FROM tenk1 WHERE 1=2;
+
  -- Test constraint exclusion of UNION ALL subqueries
  explain (costs off)
   SELECT * FROM
author	David Rowley <[email protected]>
	Sat, 4 Oct 2025 01:30:03 +0000 (14:30 +1300)
committer	David Rowley <[email protected]>
	Sat, 4 Oct 2025 01:30:03 +0000 (14:30 +1300)
src/backend/optimizer/prep/prepunion.c		patch \| blob \| blame \| history
src/test/regress/expected/union.out		patch \| blob \| blame \| history
src/test/regress/sql/union.sql		patch \| blob \| blame \| history