Skip to content

Commit 16dc270

Browse files
committed
Support "Right Anti Join" plan shapes.
Merge and hash joins can support antijoin with the non-nullable input on the right, using very simple combinations of their existing logic for right join and anti join. This gives the planner more freedom about how to order the join. It's particularly useful for hash join, since we may now have the option to hash the smaller table instead of the larger. Richard Guo, reviewed by Ronan Dunklau and myself Discussion: https://postgr.es/m/CAMbWs48xh9hMzXzSy3VaPzGAz+fkxXXTUbCLohX1_L8THFRm2Q@mail.gmail.com
1 parent dad50f6 commit 16dc270

File tree

12 files changed

+244
-194
lines changed

12 files changed

+244
-194
lines changed

src/backend/commands/explain.c

+3
Original file line numberDiff line numberDiff line change
@@ -1561,6 +1561,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
15611561
case JOIN_ANTI:
15621562
jointype = "Anti";
15631563
break;
1564+
case JOIN_RIGHT_ANTI:
1565+
jointype = "Right Anti";
1566+
break;
15641567
default:
15651568
jointype = "???";
15661569
break;

src/backend/executor/nodeHashjoin.c

+23-12
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
* PHJ_BATCH_ALLOCATE* -- one allocates buckets
8787
* PHJ_BATCH_LOAD -- all load the hash table from disk
8888
* PHJ_BATCH_PROBE -- all probe
89-
* PHJ_BATCH_SCAN* -- one does full/right unmatched scan
89+
* PHJ_BATCH_SCAN* -- one does right/right-anti/full unmatched scan
9090
* PHJ_BATCH_FREE* -- one frees memory
9191
*
9292
* Batch 0 is a special case, because it starts out in phase
@@ -228,10 +228,10 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
228228

229229
/*
230230
* If the outer relation is completely empty, and it's not
231-
* right/full join, we can quit without building the hash
232-
* table. However, for an inner join it is only a win to
233-
* check this when the outer relation's startup cost is less
234-
* than the projected cost of building the hash table.
231+
* right/right-anti/full join, we can quit without building
232+
* the hash table. However, for an inner join it is only a
233+
* win to check this when the outer relation's startup cost is
234+
* less than the projected cost of building the hash table.
235235
* Otherwise it's best to build the hash table first and see
236236
* if the inner relation is empty. (When it's a left join, we
237237
* should always make this check, since we aren't going to be
@@ -519,6 +519,14 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
519519
continue;
520520
}
521521

522+
/*
523+
* In a right-antijoin, we never return a matched tuple.
524+
* And we need to stay on the current outer tuple to
525+
* continue scanning the inner side for matches.
526+
*/
527+
if (node->js.jointype == JOIN_RIGHT_ANTI)
528+
continue;
529+
522530
/*
523531
* If we only need to join to the first matching inner
524532
* tuple, then consider returning this one, but after that
@@ -564,9 +572,10 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
564572
case HJ_FILL_INNER_TUPLES:
565573

566574
/*
567-
* We have finished a batch, but we are doing right/full join,
568-
* so any unmatched inner tuples in the hashtable have to be
569-
* emitted before we continue to the next batch.
575+
* We have finished a batch, but we are doing
576+
* right/right-anti/full join, so any unmatched inner tuples
577+
* in the hashtable have to be emitted before we continue to
578+
* the next batch.
570579
*/
571580
if (!(parallel ? ExecParallelScanHashTableForUnmatched(node, econtext)
572581
: ExecScanHashTableForUnmatched(node, econtext)))
@@ -732,6 +741,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags)
732741
ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
733742
break;
734743
case JOIN_RIGHT:
744+
case JOIN_RIGHT_ANTI:
735745
hjstate->hj_NullOuterTupleSlot =
736746
ExecInitNullTupleSlot(estate, outerDesc, &TTSOpsVirtual);
737747
break;
@@ -1027,8 +1037,9 @@ ExecHashJoinNewBatch(HashJoinState *hjstate)
10271037
* side, but there are exceptions:
10281038
*
10291039
* 1. In a left/full outer join, we have to process outer batches even if
1030-
* the inner batch is empty. Similarly, in a right/full outer join, we
1031-
* have to process inner batches even if the outer batch is empty.
1040+
* the inner batch is empty. Similarly, in a right/right-anti/full outer
1041+
* join, we have to process inner batches even if the outer batch is
1042+
* empty.
10321043
*
10331044
* 2. If we have increased nbatch since the initial estimate, we have to
10341045
* scan inner batches since they might contain tuples that need to be
@@ -1349,8 +1360,8 @@ ExecReScanHashJoin(HashJoinState *node)
13491360
/*
13501361
* Okay to reuse the hash table; needn't rescan inner, either.
13511362
*
1352-
* However, if it's a right/full join, we'd better reset the
1353-
* inner-tuple match flags contained in the table.
1363+
* However, if it's a right/right-anti/full join, we'd better
1364+
* reset the inner-tuple match flags contained in the table.
13541365
*/
13551366
if (HJ_FILL_INNER(node))
13561367
ExecHashTableResetMatchFlags(node->hj_HashTable);

src/backend/executor/nodeMergejoin.c

+21-12
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,14 @@ ExecMergeJoin(PlanState *pstate)
805805
break;
806806
}
807807

808+
/*
809+
* In a right-antijoin, we never return a matched tuple.
810+
* And we need to stay on the current outer tuple to
811+
* continue scanning the inner side for matches.
812+
*/
813+
if (node->js.jointype == JOIN_RIGHT_ANTI)
814+
break;
815+
808816
/*
809817
* If we only need to join to the first matching inner
810818
* tuple, then consider returning this one, but after that
@@ -1063,12 +1071,12 @@ ExecMergeJoin(PlanState *pstate)
10631071
* them will match this new outer tuple and therefore
10641072
* won't be emitted as fill tuples. This works *only*
10651073
* because we require the extra joinquals to be constant
1066-
* when doing a right or full join --- otherwise some of
1067-
* the rescanned tuples might fail the extra joinquals.
1068-
* This obviously won't happen for a constant-true extra
1069-
* joinqual, while the constant-false case is handled by
1070-
* forcing the merge clause to never match, so we never
1071-
* get here.
1074+
* when doing a right, right-anti or full join ---
1075+
* otherwise some of the rescanned tuples might fail the
1076+
* extra joinquals. This obviously won't happen for a
1077+
* constant-true extra joinqual, while the constant-false
1078+
* case is handled by forcing the merge clause to never
1079+
* match, so we never get here.
10721080
*/
10731081
if (!node->mj_SkipMarkRestore)
10741082
{
@@ -1332,8 +1340,8 @@ ExecMergeJoin(PlanState *pstate)
13321340

13331341
/*
13341342
* EXEC_MJ_ENDOUTER means we have run out of outer tuples, but
1335-
* are doing a right/full join and therefore must null-fill
1336-
* any remaining unmatched inner tuples.
1343+
* are doing a right/right-anti/full join and therefore must
1344+
* null-fill any remaining unmatched inner tuples.
13371345
*/
13381346
case EXEC_MJ_ENDOUTER:
13391347
MJ_printf("ExecMergeJoin: EXEC_MJ_ENDOUTER\n");
@@ -1554,14 +1562,15 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
15541562
ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
15551563
break;
15561564
case JOIN_RIGHT:
1565+
case JOIN_RIGHT_ANTI:
15571566
mergestate->mj_FillOuter = false;
15581567
mergestate->mj_FillInner = true;
15591568
mergestate->mj_NullOuterTupleSlot =
15601569
ExecInitNullTupleSlot(estate, outerDesc, &TTSOpsVirtual);
15611570

15621571
/*
1563-
* Can't handle right or full join with non-constant extra
1564-
* joinclauses. This should have been caught by planner.
1572+
* Can't handle right, right-anti or full join with non-constant
1573+
* extra joinclauses. This should have been caught by planner.
15651574
*/
15661575
if (!check_constant_qual(node->join.joinqual,
15671576
&mergestate->mj_ConstFalseJoin))
@@ -1578,8 +1587,8 @@ ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
15781587
ExecInitNullTupleSlot(estate, innerDesc, &TTSOpsVirtual);
15791588

15801589
/*
1581-
* Can't handle right or full join with non-constant extra
1582-
* joinclauses. This should have been caught by planner.
1590+
* Can't handle right, right-anti or full join with non-constant
1591+
* extra joinclauses. This should have been caught by planner.
15831592
*/
15841593
if (!check_constant_qual(node->join.joinqual,
15851594
&mergestate->mj_ConstFalseJoin))

src/backend/optimizer/path/costsize.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -3330,7 +3330,8 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
33303330
outerstartsel = 0.0;
33313331
outerendsel = 1.0;
33323332
}
3333-
else if (jointype == JOIN_RIGHT)
3333+
else if (jointype == JOIN_RIGHT ||
3334+
jointype == JOIN_RIGHT_ANTI)
33343335
{
33353336
innerstartsel = 0.0;
33363337
innerendsel = 1.0;

src/backend/optimizer/path/joinpath.c

+28-21
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,9 @@ add_paths_to_joinrel(PlannerInfo *root,
286286
* 2. Consider paths where the outer relation need not be explicitly
287287
* sorted. This includes both nestloops and mergejoins where the outer
288288
* path is already ordered. Again, skip this if we can't mergejoin.
289-
* (That's okay because we know that nestloop can't handle right/full
290-
* joins at all, so it wouldn't work in the prohibited cases either.)
289+
* (That's okay because we know that nestloop can't handle
290+
* right/right-anti/full joins at all, so it wouldn't work in the
291+
* prohibited cases either.)
291292
*/
292293
if (mergejoin_allowed)
293294
match_unsorted_outer(root, joinrel, outerrel, innerrel,
@@ -1261,14 +1262,15 @@ sort_inner_and_outer(PlannerInfo *root,
12611262
* If the joinrel is parallel-safe, we may be able to consider a partial
12621263
* merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
12631264
* outer path will be partial, and therefore we won't be able to properly
1264-
* guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
1265-
* JOIN_RIGHT, because they can produce false null extended rows. Also,
1266-
* the resulting path must not be parameterized.
1265+
* guarantee uniqueness. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT
1266+
* and JOIN_RIGHT_ANTI, because they can produce false null extended rows.
1267+
* Also, the resulting path must not be parameterized.
12671268
*/
12681269
if (joinrel->consider_parallel &&
12691270
save_jointype != JOIN_UNIQUE_OUTER &&
12701271
save_jointype != JOIN_FULL &&
12711272
save_jointype != JOIN_RIGHT &&
1273+
save_jointype != JOIN_RIGHT_ANTI &&
12721274
outerrel->partial_pathlist != NIL &&
12731275
bms_is_empty(joinrel->lateral_relids))
12741276
{
@@ -1663,10 +1665,10 @@ match_unsorted_outer(PlannerInfo *root,
16631665

16641666
/*
16651667
* Nestloop only supports inner, left, semi, and anti joins. Also, if we
1666-
* are doing a right or full mergejoin, we must use *all* the mergeclauses
1667-
* as join clauses, else we will not have a valid plan. (Although these
1668-
* two flags are currently inverses, keep them separate for clarity and
1669-
* possible future changes.)
1668+
* are doing a right, right-anti or full mergejoin, we must use *all* the
1669+
* mergeclauses as join clauses, else we will not have a valid plan.
1670+
* (Although these two flags are currently inverses, keep them separate
1671+
* for clarity and possible future changes.)
16701672
*/
16711673
switch (jointype)
16721674
{
@@ -1678,6 +1680,7 @@ match_unsorted_outer(PlannerInfo *root,
16781680
useallclauses = false;
16791681
break;
16801682
case JOIN_RIGHT:
1683+
case JOIN_RIGHT_ANTI:
16811684
case JOIN_FULL:
16821685
nestjoinOK = false;
16831686
useallclauses = true;
@@ -1849,13 +1852,14 @@ match_unsorted_outer(PlannerInfo *root,
18491852
* handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
18501853
* therefore we won't be able to properly guarantee uniqueness. Nor can
18511854
* we handle joins needing lateral rels, since partial paths must not be
1852-
* parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
1853-
* because they can produce false null extended rows.
1855+
* parameterized. Similarly, we can't handle JOIN_FULL, JOIN_RIGHT and
1856+
* JOIN_RIGHT_ANTI, because they can produce false null extended rows.
18541857
*/
18551858
if (joinrel->consider_parallel &&
18561859
save_jointype != JOIN_UNIQUE_OUTER &&
18571860
save_jointype != JOIN_FULL &&
18581861
save_jointype != JOIN_RIGHT &&
1862+
save_jointype != JOIN_RIGHT_ANTI &&
18591863
outerrel->partial_pathlist != NIL &&
18601864
bms_is_empty(joinrel->lateral_relids))
18611865
{
@@ -2228,11 +2232,13 @@ hash_inner_and_outer(PlannerInfo *root,
22282232
* total inner path will also be parallel-safe, but if not, we'll
22292233
* have to search for the cheapest safe, unparameterized inner
22302234
* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
2231-
* inner path. If full or right join, we can't use parallelism
2232-
* (building the hash table in each backend) because no one
2233-
* process has all the match bits.
2235+
* inner path. If full, right, or right-anti join, we can't use
2236+
* parallelism (building the hash table in each backend) because
2237+
* no one process has all the match bits.
22342238
*/
2235-
if (save_jointype == JOIN_FULL || save_jointype == JOIN_RIGHT)
2239+
if (save_jointype == JOIN_FULL ||
2240+
save_jointype == JOIN_RIGHT ||
2241+
save_jointype == JOIN_RIGHT_ANTI)
22362242
cheapest_safe_inner = NULL;
22372243
else if (cheapest_total_inner->parallel_safe)
22382244
cheapest_safe_inner = cheapest_total_inner;
@@ -2256,10 +2262,10 @@ hash_inner_and_outer(PlannerInfo *root,
22562262
* Returns a list of RestrictInfo nodes for those clauses.
22572263
*
22582264
* *mergejoin_allowed is normally set to true, but it is set to false if
2259-
* this is a right/full join and there are nonmergejoinable join clauses.
2260-
* The executor's mergejoin machinery cannot handle such cases, so we have
2261-
* to avoid generating a mergejoin plan. (Note that this flag does NOT
2262-
* consider whether there are actually any mergejoinable clauses. This is
2265+
* this is a right/right-anti/full join and there are nonmergejoinable join
2266+
* clauses. The executor's mergejoin machinery cannot handle such cases, so
2267+
* we have to avoid generating a mergejoin plan. (Note that this flag does
2268+
* NOT consider whether there are actually any mergejoinable clauses. This is
22632269
* correct because in some cases we need to build a clauseless mergejoin.
22642270
* Simply returning NIL is therefore not enough to distinguish safe from
22652271
* unsafe cases.)
@@ -2305,8 +2311,8 @@ select_mergejoin_clauses(PlannerInfo *root,
23052311
{
23062312
/*
23072313
* The executor can handle extra joinquals that are constants, but
2308-
* not anything else, when doing right/full merge join. (The
2309-
* reason to support constants is so we can do FULL JOIN ON
2314+
* not anything else, when doing right/right-anti/full merge join.
2315+
* (The reason to support constants is so we can do FULL JOIN ON
23102316
* FALSE.)
23112317
*/
23122318
if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
@@ -2349,6 +2355,7 @@ select_mergejoin_clauses(PlannerInfo *root,
23492355
switch (jointype)
23502356
{
23512357
case JOIN_RIGHT:
2358+
case JOIN_RIGHT_ANTI:
23522359
case JOIN_FULL:
23532360
*mergejoin_allowed = !have_nonmergeable_joinclause;
23542361
break;

src/backend/optimizer/path/joinrels.c

+3
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,9 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
925925
add_paths_to_joinrel(root, joinrel, rel1, rel2,
926926
JOIN_ANTI, sjinfo,
927927
restrictlist);
928+
add_paths_to_joinrel(root, joinrel, rel2, rel1,
929+
JOIN_RIGHT_ANTI, sjinfo,
930+
restrictlist);
928931
break;
929932
default:
930933
/* other values not expected here */

src/backend/optimizer/path/pathkeys.c

+6-4
Original file line numberDiff line numberDiff line change
@@ -1077,9 +1077,9 @@ find_var_for_subquery_tle(RelOptInfo *rel, TargetEntry *tle)
10771077
* Build the path keys for a join relation constructed by mergejoin or
10781078
* nestloop join. This is normally the same as the outer path's keys.
10791079
*
1080-
* EXCEPTION: in a FULL or RIGHT join, we cannot treat the result as
1081-
* having the outer path's path keys, because null lefthand rows may be
1082-
* inserted at random points. It must be treated as unsorted.
1080+
* EXCEPTION: in a FULL, RIGHT or RIGHT_ANTI join, we cannot treat the
1081+
* result as having the outer path's path keys, because null lefthand rows
1082+
* may be inserted at random points. It must be treated as unsorted.
10831083
*
10841084
* We truncate away any pathkeys that are uninteresting for higher joins.
10851085
*
@@ -1095,7 +1095,9 @@ build_join_pathkeys(PlannerInfo *root,
10951095
JoinType jointype,
10961096
List *outer_pathkeys)
10971097
{
1098-
if (jointype == JOIN_FULL || jointype == JOIN_RIGHT)
1098+
if (jointype == JOIN_FULL ||
1099+
jointype == JOIN_RIGHT ||
1100+
jointype == JOIN_RIGHT_ANTI)
10991101
return NIL;
11001102

11011103
/*

src/backend/optimizer/prep/prepjointree.c

+8-6
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
406406
* point of the available_rels machinations is to ensure that we only
407407
* pull up quals for which that's okay.
408408
*
409-
* We don't expect to see any pre-existing JOIN_SEMI or JOIN_ANTI
410-
* nodes here.
409+
* We don't expect to see any pre-existing JOIN_SEMI, JOIN_ANTI, or
410+
* JOIN_RIGHT_ANTI jointypes here.
411411
*/
412412
switch (j->jointype)
413413
{
@@ -2640,9 +2640,10 @@ flatten_simple_union_all(PlannerInfo *root)
26402640
* distribute_qual_to_rels to get rid of such clauses.
26412641
*
26422642
* Also, we get rid of JOIN_RIGHT cases by flipping them around to become
2643-
* JOIN_LEFT. This saves some code here and in some later planner routines,
2644-
* but the main reason to do it is to not need to invent a JOIN_REVERSE_ANTI
2645-
* join type.
2643+
* JOIN_LEFT. This saves some code here and in some later planner routines;
2644+
* the main benefit is to reduce the number of jointypes that can appear in
2645+
* SpecialJoinInfo nodes. Note that we can still generate Paths and Plans
2646+
* that use JOIN_RIGHT (or JOIN_RIGHT_ANTI) by switching the inputs again.
26462647
*
26472648
* To ease recognition of strict qual clauses, we require this routine to be
26482649
* run after expression preprocessing (i.e., qual canonicalization and JOIN
@@ -2896,7 +2897,8 @@ reduce_outer_joins_pass2(Node *jtnode,
28962897
/*
28972898
* These could only have been introduced by pull_up_sublinks,
28982899
* so there's no way that upper quals could refer to their
2899-
* righthand sides, and no point in checking.
2900+
* righthand sides, and no point in checking. We don't expect
2901+
* to see JOIN_RIGHT_ANTI yet.
29002902
*/
29012903
break;
29022904
default:

src/include/nodes/execnodes.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -2073,7 +2073,8 @@ typedef struct MergeJoinState
20732073
* OuterTupleSlot is empty!)
20742074
* hj_OuterTupleSlot tuple slot for outer tuples
20752075
* hj_HashTupleSlot tuple slot for inner (hashed) tuples
2076-
* hj_NullOuterTupleSlot prepared null tuple for right/full outer joins
2076+
* hj_NullOuterTupleSlot prepared null tuple for right/right-anti/full
2077+
* outer joins
20772078
* hj_NullInnerTupleSlot prepared null tuple for left/full outer joins
20782079
* hj_FirstOuterTupleSlot first tuple retrieved from outer plan
20792080
* hj_JoinState current state of ExecHashJoin state machine

src/include/nodes/nodes.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ typedef enum JoinType
317317
*/
318318
JOIN_SEMI, /* 1 copy of each LHS row that has match(es) */
319319
JOIN_ANTI, /* 1 copy of each LHS row that has no match */
320+
JOIN_RIGHT_ANTI, /* 1 copy of each RHS row that has no match */
320321

321322
/*
322323
* These codes are used internally in the planner, but are not supported
@@ -349,7 +350,8 @@ typedef enum JoinType
349350
((1 << JOIN_LEFT) | \
350351
(1 << JOIN_FULL) | \
351352
(1 << JOIN_RIGHT) | \
352-
(1 << JOIN_ANTI))) != 0)
353+
(1 << JOIN_ANTI) | \
354+
(1 << JOIN_RIGHT_ANTI))) != 0)
353355

354356
/*
355357
* AggStrategy -

0 commit comments

Comments
 (0)