Disable anonymous record hash support except in special cases

author Peter Eisentraut <[email protected]>

Wed, 8 Sep 2021 07:25:46 +0000 (09:25 +0200)

committer Peter Eisentraut <[email protected]>

Wed, 8 Sep 2021 07:55:04 +0000 (09:55 +0200)
author Peter Eisentraut <[email protected]>
Wed, 8 Sep 2021 07:25:46 +0000 (09:25 +0200)
committer Peter Eisentraut <[email protected]>
Wed, 8 Sep 2021 07:55:04 +0000 (09:55 +0200)
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 15669c82386a0f579fe694059e82eac98e3e57ef..146ee8dd1eae7c927a0fc1f4d78136d03edfeb78 100644 (file)
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1852,9 +1852,12 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
  
  /*
   * Make a SortGroupClause node for a SetOperationStmt's groupClauses
+ *
+ * If require_hash is true, the caller is indicating that they need hash
+ * support or they will fail.  So look extra hard for hash support.
   */
  SortGroupClause *
-makeSortGroupClauseForSetOp(Oid rescoltype)
+makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash)
  {
         SortGroupClause *grpcl = makeNode(SortGroupClause);
         Oid                     sortop;
@@ -1867,6 +1870,15 @@ makeSortGroupClauseForSetOp(Oid rescoltype)
                                                          &sortop, &eqop, NULL,
                                                          &hashable);
  
+       /*
+        * The type cache doesn't believe that record is hashable (see
+        * cache_record_field_properties()), but if the caller really needs hash
+        * support, we can assume it does.  Worst case, if any components of the
+        * record don't support hashing, we will fail at execution.
+        */
+       if (require_hash && (rescoltype == RECORDOID || rescoltype == RECORDARRAYOID))
+               hashable = true;
+
         /* we don't have a tlist yet, so can't assign sortgrouprefs */
         grpcl->tleSortGroupRef = 0;
         grpcl->eqop = eqop;
@@ -2027,6 +2039,8 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
                 ListCell   *ltl;
                 ListCell   *rtl;
                 const char *context;
+               bool recursive = (pstate->p_parent_cte &&
+                                                 pstate->p_parent_cte->cterecursive);
  
                 context = (stmt->op == SETOP_UNION ? "UNION" :
                                    (stmt->op == SETOP_INTERSECT ? "INTERSECT" :
@@ -2048,9 +2062,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
                  * containing CTE as having those result columns.  We should do this
                  * only at the topmost setop of the CTE, of course.
                  */
-               if (isTopLevel &&
-                       pstate->p_parent_cte &&
-                       pstate->p_parent_cte->cterecursive)
+               if (isTopLevel && recursive)
                         determineRecursiveColTypes(pstate, op->larg, ltargetlist);
  
                 /*
@@ -2182,8 +2194,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
                                 setup_parser_errposition_callback(&pcbstate, pstate,
                                                                                                   bestlocation);
  
+                               /* If it's a recursive union, we need to require hashing support. */
                                 op->groupClauses = lappend(op->groupClauses,
-                                                                                  makeSortGroupClauseForSetOp(rescoltype));
+                                                                                  makeSortGroupClauseForSetOp(rescoltype, recursive));
  
                                 cancel_parser_errposition_callback(&pcbstate);
                         }
diff --git a/src/backend/rewrite/rewriteSearchCycle.c b/src/backend/rewrite/rewriteSearchCycle.c

index c50ebdba2470cf0bb7e700ff836b369de56b8d88..ef38f4025a571e28eefb87ac789374a6b5b9d19b 100644 (file)
--- a/src/backend/rewrite/rewriteSearchCycle.c
+++ b/src/backend/rewrite/rewriteSearchCycle.c
@@ -594,7 +594,7 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
                 sos->colCollations = lappend_oid(sos->colCollations, InvalidOid);
                 if (!sos->all)
                         sos->groupClauses = lappend(sos->groupClauses,
-                                                                               makeSortGroupClauseForSetOp(search_seq_type));
+                                                                               makeSortGroupClauseForSetOp(search_seq_type, true));
         }
         if (cte->cycle_clause)
         {
@@ -603,14 +603,14 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
                 sos->colCollations = lappend_oid(sos->colCollations, cte->cycle_clause->cycle_mark_collation);
                 if (!sos->all)
                         sos->groupClauses = lappend(sos->groupClauses,
-                                                                               makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type));
+                                                                               makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type, true));
  
                 sos->colTypes = lappend_oid(sos->colTypes, RECORDARRAYOID);
                 sos->colTypmods = lappend_int(sos->colTypmods, -1);
                 sos->colCollations = lappend_oid(sos->colCollations, InvalidOid);
                 if (!sos->all)
                         sos->groupClauses = lappend(sos->groupClauses,
-                                                                               makeSortGroupClauseForSetOp(RECORDARRAYOID));
+                                                                               makeSortGroupClauseForSetOp(RECORDARRAYOID, true));
         }
  
         /*
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c

index 04d487c544232b4f1a5098976464d4ccc0769b70..bc2f30bae15d8cede8bac717097499e36fee197d 100644 (file)
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -29,6 +29,7 @@
  #include "utils/arrayaccess.h"
  #include "utils/builtins.h"
  #include "utils/datum.h"
+#include "utils/fmgroids.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
  #include "utils/selfuncs.h"
@@ -3973,13 +3974,46 @@ hash_array(PG_FUNCTION_ARGS)
         {
                 typentry = lookup_type_cache(element_type,
                                                                          TYPECACHE_HASH_PROC_FINFO);
-               if (!OidIsValid(typentry->hash_proc_finfo.fn_oid))
+               if (!OidIsValid(typentry->hash_proc_finfo.fn_oid) && element_type != RECORDOID)
                         ereport(ERROR,
                                         (errcode(ERRCODE_UNDEFINED_FUNCTION),
                                          errmsg("could not identify a hash function for type %s",
                                                         format_type_be(element_type))));
+
+               /*
+                * The type cache doesn't believe that record is hashable (see
+                * cache_record_field_properties()), but since we're here, we're
+                * committed to hashing, so we can assume it does.  Worst case, if any
+                * components of the record don't support hashing, we will fail at
+                * execution.
+                */
+               if (element_type == RECORDOID)
+               {
+                       MemoryContext oldcontext;
+                       TypeCacheEntry *record_typentry;
+
+                       oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
+
+                       /*
+                        * Make fake type cache entry structure.  Note that we can't just
+                        * modify typentry, since that points directly into the type cache.
+                        */
+                       record_typentry = palloc(sizeof(*record_typentry));
+
+                       /* fill in what we need below */
+                       record_typentry->typlen = typentry->typlen;
+                       record_typentry->typbyval = typentry->typbyval;
+                       record_typentry->typalign = typentry->typalign;
+                       fmgr_info(F_HASH_RECORD, &record_typentry->hash_proc_finfo);
+
+                       MemoryContextSwitchTo(oldcontext);
+
+                       typentry = record_typentry;
+               }
+
                 fcinfo->flinfo->fn_extra = (void *) typentry;
         }
+
         typlen = typentry->typlen;
         typbyval = typentry->typbyval;
         typalign = typentry->typalign;
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c

index 326fae62e22b11239f64a94f14c3eea6e65bdb58..70e5c51297d962bb82fd6d6c655caabf4a7527f0 100644 (file)
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -1515,14 +1515,17 @@ cache_record_field_properties(TypeCacheEntry *typentry)
         /*
          * For type RECORD, we can't really tell what will work, since we don't
          * have access here to the specific anonymous type.  Just assume that
-        * everything will (we may get a failure at runtime ...)
+        * equality and comparison will (we may get a failure at runtime).  We
+        * could also claim that hashing works, but then if code that has the
+        * option between a comparison-based (sort-based) and a hash-based plan
+        * chooses hashing, stuff could fail that would otherwise work if it chose
+        * a comparison-based plan.  In practice more types support comparison
+        * than hashing.
          */
         if (typentry->type_id == RECORDOID)
         {
                 typentry->flags |= (TCFLAGS_HAVE_FIELD_EQUALITY |
-                                                       TCFLAGS_HAVE_FIELD_COMPARE |
-                                                       TCFLAGS_HAVE_FIELD_HASHING |
-                                                       TCFLAGS_HAVE_FIELD_EXTENDED_HASHING);
+                                                       TCFLAGS_HAVE_FIELD_COMPARE);
         }
         else if (typentry->typtype == TYPTYPE_COMPOSITE)
         {
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h

index 6716db6c1320e7bbebacffbcf067dd65563816a4..a0f0bd38d7d9d92dd46891c9a6d809abe1ca6b81 100644 (file)
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -48,6 +48,6 @@ extern void applyLockingClause(Query *qry, Index rtindex,
  extern List *BuildOnConflictExcludedTargetlist(Relation targetrel,
                                                                                            Index exclRelIndex);
  
-extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype);
+extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash);
  
  #endif                                                 /* ANALYZE_H */
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out

index 75f78db8f5816b146a30c1c981d9216bdb114eb3..dece7310cfef8aed3aff28ca9ae7e121f52bb27c 100644 (file)
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -648,34 +648,37 @@ reset enable_hashagg;
  set enable_hashagg to on;
  explain (costs off)
  select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
-               QUERY PLAN                
------------------------------------------
- HashAggregate
-   Group Key: "*VALUES*".column1
-   ->  Append
-         ->  Values Scan on "*VALUES*"
-         ->  Values Scan on "*VALUES*_1"
-(5 rows)
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
  
  select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
     x   
  -------
- (1,4)
- (1,3)
   (1,2)
+ (1,3)
+ (1,4)
  (3 rows)
  
  explain (costs off)
  select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                  QUERY PLAN                   
------------------------------------------------
- HashSetOp Intersect
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Values Scan on "*VALUES*"
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Values Scan on "*VALUES*_1"
-(6 rows)
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Intersect
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
  
  select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
     x   
@@ -685,15 +688,17 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
  
  explain (costs off)
  select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                  QUERY PLAN                   
------------------------------------------------
- HashSetOp Except
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Values Scan on "*VALUES*"
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Values Scan on "*VALUES*_1"
-(6 rows)
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Except
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
  
  select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
     x   
@@ -702,21 +707,28 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
  (1 row)
  
  -- non-hashable type
--- With an anonymous row type, the typcache reports that the type is
--- hashable, but then it will fail at run time.
+-- With an anonymous row type, the typcache does not report that the
+-- type is hashable.  (Otherwise, this would fail at execution time.)
  explain (costs off)
  select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
-               QUERY PLAN                
------------------------------------------
- HashAggregate
-   Group Key: "*VALUES*".column1
-   ->  Append
-         ->  Values Scan on "*VALUES*"
-         ->  Values Scan on "*VALUES*_1"
-(5 rows)
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
  
  select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
-ERROR:  could not identify a hash function for type money
+     x     
+-----------
+ ($100.00)
+ ($200.00)
+ ($300.00)
+(3 rows)
+
  -- With a defined row type, the typcache can inspect the type's fields
  -- for hashability.
  create type ct1 as (f1 money);
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql

index ce22f34c719e40f536dcab351825d7095ce918a3..ca8c9b4d1288e1693541c044edcbb7c0307ca463 100644 (file)
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -218,8 +218,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
  
  -- non-hashable type
  
--- With an anonymous row type, the typcache reports that the type is
--- hashable, but then it will fail at run time.
+-- With an anonymous row type, the typcache does not report that the
+-- type is hashable.  (Otherwise, this would fail at execution time.)
  explain (costs off)
  select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
  select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
author	Peter Eisentraut <[email protected]>
	Wed, 8 Sep 2021 07:25:46 +0000 (09:25 +0200)
committer	Peter Eisentraut <[email protected]>
	Wed, 8 Sep 2021 07:55:04 +0000 (09:55 +0200)
src/backend/parser/analyze.c		patch \| blob \| blame \| history
src/backend/rewrite/rewriteSearchCycle.c		patch \| blob \| blame \| history
src/backend/utils/adt/arrayfuncs.c		patch \| blob \| blame \| history
src/backend/utils/cache/typcache.c		patch \| blob \| blame \| history
src/include/parser/analyze.h		patch \| blob \| blame \| history
src/test/regress/expected/union.out		patch \| blob \| blame \| history
src/test/regress/sql/union.sql		patch \| blob \| blame \| history