summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Guo2024-12-09 11:36:23 +0000
committerRichard Guo2024-12-09 11:36:23 +0000
commit5668a857de4f3f12066b2bbc626b77be4fc95ee5 (patch)
treeccc5d2fb4f3d5b217f7e1e57338c87aa81b212ac
parentf0c569d7151532fbc9c016b01af49f04a9fb5278 (diff)
Fix right-semi-joins in HashJoin rescans
When resetting a HashJoin node for rescans, if it is a single-batch join and there are no parameter changes for the inner subnode, we can just reuse the existing hash table without rebuilding it. However, for join types that depend on the inner-tuple match flags in the hash table, we need to reset these match flags to avoid incorrect results. This applies to right, right-anti, right-semi, and full joins. When I introduced "Right Semi Join" plan shapes in aa86129e1, I failed to reset the match flags in the hash table for right-semi joins in rescans. This oversight has been shown to produce incorrect results. This patch fixes it. Author: Richard Guo Discussion: https://postgr.es/m/CAMbWs4-nQF9io2WL2SkD0eXvfPdyBc9Q=hRwfQHCGV2usa0jyA@mail.gmail.com
-rw-r--r--src/backend/executor/nodeHashjoin.c7
-rw-r--r--src/test/regress/expected/join.out63
-rw-r--r--src/test/regress/sql/join.sql30
3 files changed, 97 insertions, 3 deletions
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 6c3009fba0f..ea0045bc0f3 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -1511,10 +1511,11 @@ ExecReScanHashJoin(HashJoinState *node)
/*
* Okay to reuse the hash table; needn't rescan inner, either.
*
- * However, if it's a right/right-anti/full join, we'd better
- * reset the inner-tuple match flags contained in the table.
+ * However, if it's a right/right-anti/right-semi/full join, we'd
+ * better reset the inner-tuple match flags contained in the
+ * table.
*/
- if (HJ_FILL_INNER(node))
+ if (HJ_FILL_INNER(node) || node->js.jointype == JOIN_RIGHT_SEMI)
ExecHashTableResetMatchFlags(node->hj_HashTable);
/*
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index ebf2e3f851a..51aeb1dae66 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3036,6 +3036,69 @@ where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
reset enable_hashjoin;
reset enable_nestloop;
--
+-- regression test for bug with hash-right-semi join
+--
+create temp table tbl_rs(a int, b int);
+insert into tbl_rs select i, i from generate_series(1,10)i;
+analyze tbl_rs;
+set enable_nestloop to off;
+set enable_hashagg to off;
+-- ensure we get a hash right semi join with SubPlan in hash clauses
+explain (costs off)
+select * from tbl_rs t1
+where (select a from tbl_rs t2
+ where exists (select 1 from
+ (select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
+ where c in (select t1.a = 1 from tbl_rs t5 union all select true))
+ order by a limit 1) >= 0;
+ QUERY PLAN
+--------------------------------------------------------------------------------------
+ Seq Scan on tbl_rs t1
+ Filter: ((SubPlan 3) >= 0)
+ SubPlan 3
+ -> Limit
+ InitPlan 2
+ -> Hash Right Semi Join
+ Hash Cond: (((t1.a = 1)) = (ANY (t4.b = (hashed SubPlan 1).col1)))
+ -> Append
+ -> Seq Scan on tbl_rs t5
+ -> Result
+ -> Hash
+ -> Seq Scan on tbl_rs t4
+ Filter: (a = 1)
+ SubPlan 1
+ -> Seq Scan on tbl_rs t3
+ -> Sort
+ Sort Key: t2.a
+ -> Result
+ One-Time Filter: (InitPlan 2).col1
+ -> Seq Scan on tbl_rs t2
+(20 rows)
+
+-- and check we get the expected results
+select * from tbl_rs t1
+where (select a from tbl_rs t2
+ where exists (select 1 from
+ (select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
+ where c in (select t1.a = 1 from tbl_rs t5 union all select true))
+ order by a limit 1) >= 0;
+ a | b
+----+----
+ 1 | 1
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 6
+ 7 | 7
+ 8 | 8
+ 9 | 9
+ 10 | 10
+(10 rows)
+
+reset enable_nestloop;
+reset enable_hashagg;
+--
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
--
set work_mem to '64kB';
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 1004fc03551..1e9dafca573 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -739,6 +739,36 @@ reset enable_hashjoin;
reset enable_nestloop;
--
+-- regression test for bug with hash-right-semi join
+--
+create temp table tbl_rs(a int, b int);
+insert into tbl_rs select i, i from generate_series(1,10)i;
+analyze tbl_rs;
+
+set enable_nestloop to off;
+set enable_hashagg to off;
+
+-- ensure we get a hash right semi join with SubPlan in hash clauses
+explain (costs off)
+select * from tbl_rs t1
+where (select a from tbl_rs t2
+ where exists (select 1 from
+ (select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
+ where c in (select t1.a = 1 from tbl_rs t5 union all select true))
+ order by a limit 1) >= 0;
+
+-- and check we get the expected results
+select * from tbl_rs t1
+where (select a from tbl_rs t2
+ where exists (select 1 from
+ (select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
+ where c in (select t1.a = 1 from tbl_rs t5 union all select true))
+ order by a limit 1) >= 0;
+
+reset enable_nestloop;
+reset enable_hashagg;
+
+--
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
--