diff options
author | Ashutosh Bapat | 2013-03-19 10:23:21 +0000 |
---|---|---|
committer | Ashutosh Bapat | 2013-03-19 10:23:21 +0000 |
commit | 4634c0bf36fa7e54d16ffaf2a6c0085d4f9a3afc (patch) | |
tree | c64a5f9ffdb2c46f6bb02a256b5330de8646b708 | |
parent | 15f9527dc8ca91866e9b6595a3a46b2dcd822ccd (diff) |
Ship OUTER JOINs to datanodes in following cases
1. An equi-outer-join between the distribution columns of two distributed tables
such that the tables are distributed on same set of nodes and distribution
strategy is same and datatype of distribution column is same, is shippable to
the datanodes where the distributed tables are distributed.
2. An outer-join between a distributed and a replicated relation is
shippable to the datanodes where distributed relation is distributed if the
replicated relation is replicated on those nodes and the outer side of join is
distributed relation.
3. An outer-join between two replicated relations shippable to a datanode where
both the relations are available.
Justification for shippability can be found in mail thread with subject
"shipping outer joins" on pgxc-hackers mailing list.
-rw-r--r-- | src/backend/optimizer/path/pgxcpath.c | 75 | ||||
-rw-r--r-- | src/backend/optimizer/util/pgxcship.c | 209 | ||||
-rw-r--r-- | src/include/optimizer/pgxcplan.h | 2 | ||||
-rw-r--r-- | src/include/optimizer/pgxcship.h | 7 | ||||
-rw-r--r-- | src/test/regress/expected/join.out | 41 | ||||
-rw-r--r-- | src/test/regress/expected/xc_FQS_join.out | 223 | ||||
-rw-r--r-- | src/test/regress/sql/xc_FQS_join.sql | 72 |
7 files changed, 474 insertions, 155 deletions
diff --git a/src/backend/optimizer/path/pgxcpath.c b/src/backend/optimizer/path/pgxcpath.c index 00d90a5751..7f62d6d64a 100644 --- a/src/backend/optimizer/path/pgxcpath.c +++ b/src/backend/optimizer/path/pgxcpath.c @@ -155,79 +155,6 @@ pgxc_find_remotequery_path(RelOptInfo *rel) } /* - * See if the nodelists corresponding to the RemoteQuery paths being joined can - * be merged. - */ -ExecNodes * -pgxc_is_join_reducible(ExecNodes *inner_en, ExecNodes *outer_en, Relids in_relids, - Relids out_relids, JoinType jointype, List *join_quals, - List *rtables) -{ - ExecNodes *join_exec_nodes; - bool merge_dist_equijoin = false; - bool merge_replicated_only; - ListCell *cell; - - /* - * If either of inner_en or outer_en is NULL, return NULL. We can't ship the - * join when either of the sides do not have datanodes to ship to. - */ - if (!outer_en || !inner_en) - return NULL; - /* - * We only support reduction of INNER, LEFT [OUTER] and FULL [OUTER] joins. - * RIGHT [OUTER] join is converted to LEFT [OUTER] join during join tree - * deconstruction. - */ - if (jointype != JOIN_INNER && jointype != JOIN_LEFT && jointype != JOIN_FULL) - return NULL; - /* - * When join type is other than INNER, we will get the unmatched rows on - * either side. The result will be correct only in case both the sides of - * join are replicated. In case one of the sides is replicated, and the - * unmatched results are not coming from that side, it might be possible to - * ship such join, but this needs to be validated from correctness - * perspective. - */ - merge_replicated_only = (jointype != JOIN_INNER); - - /* - * If both the relations are distributed with similar distribution strategy - * walk through the restriction info for this JOIN to find if there is an - * equality condition on the distributed columns of both the relations. In - * such case, we can reduce the JOIN if the distribution nodelist is also - * same. - */ - if (IsExecNodesDistributedByValue(inner_en) && - inner_en->baselocatortype == outer_en->baselocatortype && - !merge_replicated_only) - { - foreach(cell, join_quals) - { - Node *qual = (Node *)lfirst(cell); - if (pgxc_qual_has_dist_equijoin(in_relids, - out_relids, InvalidOid, - qual, rtables) && - pgxc_is_expr_shippable((Expr *)qual, NULL)) - { - merge_dist_equijoin = true; - break; - } - } - } - /* - * If the ExecNodes of inner and outer nodes can be merged, the JOIN is - * shippable - * PGXCTODO: Can we take into consideration the JOIN conditions to optimize - * further? - */ - join_exec_nodes = pgxc_merge_exec_nodes(inner_en, outer_en, - merge_dist_equijoin, - merge_replicated_only); - return join_exec_nodes; -} - -/* * pgxc_ship_remotejoin * If there are RemoteQuery paths for the rels being joined, check if the join * is shippable to the datanodes, and if so, create a remotequery path for this @@ -296,7 +223,7 @@ create_joinrel_rqpath(PlannerInfo *root, RelOptInfo *joinrel, * If the nodelists on both the sides of JOIN can be merged, the JOIN is * shippable. */ - join_en = pgxc_is_join_reducible(inner_en, outer_en, + join_en = pgxc_is_join_shippable(inner_en, outer_en, innerrel->relids, outerrel->relids, jointype, join_quals, root->parse->rtable); if (join_en) diff --git a/src/backend/optimizer/util/pgxcship.c b/src/backend/optimizer/util/pgxcship.c index f83c0be16b..28e20b1922 100644 --- a/src/backend/optimizer/util/pgxcship.c +++ b/src/backend/optimizer/util/pgxcship.c @@ -300,7 +300,7 @@ pgxc_FQS_find_datanodes_recurse(Node *node, Query *query, Bitmapset **relids) * Check whether the JOIN is pushable to the datanodes and * find the datanodes where the JOIN can be pushed to */ - result_en = pgxc_is_join_reducible(result_en, en, from_relids, + result_en = pgxc_is_join_shippable(result_en, en, from_relids, fle_relids, JOIN_INNER, make_ands_implicit((Expr *)from_expr->quals), query_rtable); @@ -349,7 +349,7 @@ pgxc_FQS_find_datanodes_recurse(Node *node, Query *query, Bitmapset **relids) * Check whether the JOIN is pushable or not, and find the datanodes * where the JOIN can be pushed to. */ - result_en = pgxc_is_join_reducible(ren, len, r_relids, l_relids, + result_en = pgxc_is_join_shippable(ren, len, r_relids, l_relids, join_expr->jointype, make_ands_implicit((Expr *)join_expr->quals), query_rtable); @@ -1074,14 +1074,36 @@ pgxc_shippability_walker(Node *node, Shippability_context *sc_context) */ if (!pgxc_test_shippability_reason(sc_context, SS_NO_NODES)) { - sc_context->sc_subquery_en = pgxc_merge_exec_nodes(sublink_en, - sc_context->sc_subquery_en, - false, - true); + /* + * If this is the first time we are finding out the nodes for + * SubLink, we don't have anything to merge, just assign. + */ + if (!sc_context->sc_subquery_en) + sc_context->sc_subquery_en = sublink_en; + /* + * Merge if only the accumulated SubLink ExecNodes and the + * ExecNodes for this subquery are both replicated. + */ + else if (sublink_en && IsExecNodesReplicated(sublink_en) && + IsExecNodesReplicated(sc_context->sc_subquery_en)) + { + sc_context->sc_subquery_en = pgxc_merge_exec_nodes(sublink_en, + sc_context->sc_subquery_en); + } + else + sc_context->sc_subquery_en = NULL; + + /* + * If we didn't find a cumulative ExecNodes, set shippability + * reason, so that we don't bother merging future sublinks. + */ if (!sc_context->sc_subquery_en) pgxc_set_shippability_reason(sc_context, SS_NO_NODES); } + else + Assert(!sc_context->sc_subquery_en); + /* Check if the type of sublink result is shippable */ pgxc_set_exprtype_shippability(exprType(node), sc_context); /* Wipe out subselect as explained above and walk the copied tree */ @@ -1226,20 +1248,29 @@ pgxc_is_query_shippable(Query *query, int query_level) * shipped. */ pgxc_shippability_walker((Node *)query, &sc_context); + + exec_nodes = sc_context.sc_exec_nodes; /* - * We have merged the nodelists and distributions of all subqueries seen in - * the query tree, merge it with the same obtained for the relations - * involved in the query. + * The shippability context contains two ExecNodes, one for the subLinks + * involved in the Query and other for the relation involved in FromClause. + * They are computed at different times while scanning the query. Merge both + * of them if they are both replicated. If query doesn't have SubLinks, we + * don't need to consider corresponding ExecNodes. * PGXC_FQS_TODO: * Merge the subquery ExecNodes if both of them are replicated. * The logic to merge node lists with other distribution * strategy is not clear yet. */ - exec_nodes = sc_context.sc_exec_nodes; - if (exec_nodes) - exec_nodes = pgxc_merge_exec_nodes(exec_nodes, - sc_context.sc_subquery_en, false, - true); + if (query->hasSubLinks) + { + if (exec_nodes && IsExecNodesReplicated(exec_nodes) && + sc_context.sc_subquery_en && + IsExecNodesReplicated(sc_context.sc_subquery_en)) + exec_nodes = pgxc_merge_exec_nodes(exec_nodes, + sc_context.sc_subquery_en); + else + exec_nodes = NULL; + } /* * Look at the information gathered by the walker in Shippability_context and that @@ -1366,11 +1397,11 @@ pgxc_is_func_shippable(Oid funcid) /* - * pgxc_qual_has_dist_equijoin + * pgxc_find_dist_equijoin_qual * Check equijoin conditions on given relations */ -bool -pgxc_qual_has_dist_equijoin(Relids varnos_1, +Expr * +pgxc_find_dist_equijoin_qual(Relids varnos_1, Relids varnos_2, Oid distcol_type, Node *quals, List *rtable) { List *lquals; @@ -1446,9 +1477,9 @@ pgxc_qual_has_dist_equijoin(Relids varnos_1, !op_hashjoinable(op->opno, exprType((Node *)lvar))) continue; /* Found equi-join condition on distribution columns */ - return true; + return qual_expr; } - return false; + return NULL; } @@ -1459,8 +1490,7 @@ pgxc_qual_has_dist_equijoin(Relids varnos_1, * If both exec_nodes can not be merged, it returns NULL. */ ExecNodes * -pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2, bool merge_dist_equijoin, - bool merge_replicated_only) +pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2) { ExecNodes *merged_en = makeNode(ExecNodes); ExecNodes *tmp_en; @@ -1499,22 +1529,11 @@ pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2, bool merge_dist_equijoin, merged_en->nodeList = list_intersection_int(en1->nodeList, en2->nodeList); merged_en->baselocatortype = LOCATOR_TYPE_REPLICATED; - /* No intersection, so has to go though standard planner... */ if (!merged_en->nodeList) FreeExecNodes(&merged_en); return merged_en; } - /* - * We are told to merge the nodelists if both the distributions are - * replicated. We checked that above, so bail out - */ - if (merge_replicated_only) - { - FreeExecNodes(&merged_en); - return merged_en; - } - if (IsExecNodesReplicated(en1) && IsExecNodesColumnDistributed(en2)) { @@ -1572,29 +1591,19 @@ pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2, bool merge_dist_equijoin, /* * Distributed/distributed case * If the caller has suggested that this is an equi-join between two - * distributed results, check if both are distributed by the same - * distribution strategy, and have the same nodes in the distribution - * node list. The caller should have made sure that distribution column - * type is same. + * distributed results, check that they have the same nodes in the distribution + * node list. The caller is expected to fully decide whether to merge + * the nodes or not. */ - if (merge_dist_equijoin && - en1->baselocatortype == en2->baselocatortype && - !list_difference_int(en1->nodeList, en2->nodeList) && + if (!list_difference_int(en1->nodeList, en2->nodeList) && !list_difference_int(en2->nodeList, en1->nodeList)) { merged_en->nodeList = list_copy(en1->nodeList); - merged_en->baselocatortype = en1->baselocatortype; + if (en1->baselocatortype == en2->baselocatortype) + merged_en->baselocatortype = en1->baselocatortype; + else + merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED; } - /* - * If both the relations are distributed but have only one node in the - * node list, the JOIN can be pushed down if the single node is same for - * both the relations. - * PGXCTODO: Should we set the locatortype as REPLICATED for such - * relation/s in first place? - */ - else if (list_length(en1->nodeList) == 1 && list_length(en2->nodeList) == 1 && - (merged_en->nodeList = list_intersection_int(en1->nodeList, en2->nodeList))) - merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED; else FreeExecNodes(&merged_en); return merged_en; @@ -1894,3 +1903,103 @@ pgxc_check_fk_shippability(RelationLocInfo *parentLocInfo, return result; } + +/* + * pgxc_is_join_reducible + * The shippability of JOIN is decided in following steps + * 1. Are the JOIN conditions shippable? + * For INNER JOIN it's possible to apply some of the conditions at the + * Datanodes and others at coordinator. But for other JOINs, JOIN conditions + * decide which tuples on the OUTER side are appended with NULL columns from + * INNER side, we need all the join conditions to be shippable for the join to + * be shippable. + * 2. Do the JOIN conditions have quals that will make it shippable? + * When both sides of JOIN are replicated, irrespective of the quals the JOIN + * is shippable. + * INNER joins between replicated and distributed relation are shippable + * irrespective of the quals. OUTER join between replicated and distributed + * relation is shippable if distributed relation is the outer relation. + * All joins between hash/modulo distributed relations are shippable if they + * have equi-join on the distributed column, such that distribution columns + * have same datatype and same distribution strategy. + * 3. Are datanodes where the joining relations exist, compatible? + * Joins between replicated relations are shippable if both relations share a + * datanode. Joins between distributed relations are shippable if both + * relations are distributed on same set of Datanodes. Join between replicated + * and distributed relations is shippable is replicated relation is replicated + * on all nodes where distributed relation is distributed. + * + * The first step is to be applied by the caller of this function. + */ +ExecNodes * +pgxc_is_join_shippable(ExecNodes *inner_en, ExecNodes *outer_en, Relids in_relids, + Relids out_relids, JoinType jointype, List *join_quals, + List *rtables) +{ + bool merge_nodes = false; + + /* + * If either of inner_en or outer_en is NULL, return NULL. We can't ship the + * join when either of the sides do not have datanodes to ship to. + */ + if (!outer_en || !inner_en) + return NULL; + /* + * We only support reduction of INNER, LEFT [OUTER] and FULL [OUTER] joins. + * RIGHT [OUTER] join is converted to LEFT [OUTER] join during join tree + * deconstruction. + */ + if (jointype != JOIN_INNER && jointype != JOIN_LEFT && jointype != JOIN_FULL) + return NULL; + + /* If both sides are replicated or have single node each, we ship any kind of JOIN */ + if ((IsExecNodesReplicated(inner_en) && IsExecNodesReplicated(outer_en)) || + (list_length(inner_en->nodeList) == 1 && + list_length(outer_en->nodeList) == 1)) + merge_nodes = true; + + /* If both sides are distributed, ... */ + else if (IsExecNodesColumnDistributed(inner_en) && + IsExecNodesColumnDistributed(outer_en)) + { + /* + * If two sides are distributed in the same manner by a value, with an + * equi-join on the distribution column and that condition + * is shippable, ship the join if node lists from both sides can be + * merged. + */ + if (inner_en->baselocatortype == outer_en->baselocatortype && + IsExecNodesDistributedByValue(inner_en)) + { + Expr *equi_join_expr = pgxc_find_dist_equijoin_qual(in_relids, + out_relids, InvalidOid, + (Node *)join_quals, rtables); + if (equi_join_expr && pgxc_is_expr_shippable(equi_join_expr, NULL)) + merge_nodes = true; + } + } + /* + * If outer side is distributed and inner side is replicated, we can ship + * LEFT OUTER and INNER join. + */ + else if (IsExecNodesColumnDistributed(outer_en) && + IsExecNodesReplicated(inner_en) && + (jointype == JOIN_INNER || jointype == JOIN_LEFT)) + merge_nodes = true; + /* + * If outer side is replicated and inner side is distributed, we can ship + * only for INNER join. + */ + else if (IsExecNodesReplicated(outer_en) && + IsExecNodesColumnDistributed(inner_en) && + jointype == JOIN_INNER) + merge_nodes = true; + /* + * If the ExecNodes of inner and outer nodes can be merged, the JOIN is + * shippable + */ + if (merge_nodes) + return pgxc_merge_exec_nodes(inner_en, outer_en); + else + return NULL; +} diff --git a/src/include/optimizer/pgxcplan.h b/src/include/optimizer/pgxcplan.h index eb923b3494..7c7f76642f 100644 --- a/src/include/optimizer/pgxcplan.h +++ b/src/include/optimizer/pgxcplan.h @@ -132,7 +132,7 @@ extern bool pgxc_query_contains_utility(List *queries); extern void pgxc_rqplan_adjust_tlist(RemoteQuery *rqplan); extern Plan *pgxc_make_modifytable(PlannerInfo *root, Plan *topplan); -extern ExecNodes *pgxc_is_join_reducible(ExecNodes *inner_en, ExecNodes *outer_en, +extern ExecNodes *pgxc_is_join_shippable(ExecNodes *inner_en, ExecNodes *outer_en, Relids in_relids, Relids out_relids, JoinType jointype, List *join_quals, List *rtables); diff --git a/src/include/optimizer/pgxcship.h b/src/include/optimizer/pgxcship.h index 7317617751..ad27744d8f 100644 --- a/src/include/optimizer/pgxcship.h +++ b/src/include/optimizer/pgxcship.h @@ -28,11 +28,10 @@ extern bool pgxc_is_expr_shippable(Expr *node, bool *has_aggs); /* Determine if given function is shippable */ extern bool pgxc_is_func_shippable(Oid funcid); /* Check equijoin conditions on given relations */ -extern bool pgxc_qual_has_dist_equijoin(Relids varnos_1, - Relids varnos_2, Oid distcol_type, Node *quals, List *rtable); +extern Expr *pgxc_find_dist_equijoin_qual(Relids varnos_1, Relids varnos_2, + Oid distcol_type, Node *quals, List *rtable); /* Merge given execution nodes based on join shippability conditions */ -extern ExecNodes *pgxc_merge_exec_nodes(ExecNodes *en1, - ExecNodes *en2, bool merge_dist_equijoin, bool merge_replicated_only); +extern ExecNodes *pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2); /* Check if given Query includes distribution column */ extern bool pgxc_query_has_distcolgrouping(Query *query); /* Check the shippability of an index */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 5c570c1e5c..0e973fe425 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2977,23 +2977,15 @@ explain (verbose true, costs false, nodes false) select p.*, linked from parent p left join (select c.*, true as linked from child c) as ss on (p.k = ss.k) order by p.k; - QUERY PLAN -------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Sort Output: p.k, p.pd, (true) Sort Key: p.k - -> Hash Left Join - Output: p.k, p.pd, (true) - Hash Cond: (p.k = c.k) - -> Data Node Scan on parent "_REMOTE_TABLE_QUERY_" - Output: p.k, p.pd - Remote query: SELECT k, pd FROM ONLY parent p WHERE true - -> Hash - Output: c.k, (true) - -> Data Node Scan on child "_REMOTE_TABLE_QUERY_" - Output: c.k, true - Remote query: SELECT k FROM ONLY child c WHERE true -(14 rows) + -> Data Node Scan on "__REMOTE_SORT_QUERY__" + Output: p.k, p.pd, true + Remote query: SELECT l.a_1, l.a_2 FROM ((SELECT p.k, p.pd FROM ONLY parent p WHERE true) l(a_1, a_2) LEFT JOIN (SELECT c.k FROM ONLY child c WHERE true) r(a_1) ON ((l.a_1 = r.a_1))) WHERE true ORDER BY 1 +(6 rows) -- check for a 9.0rc1 bug: join removal breaks pseudoconstant qual handling select p.* from @@ -3007,15 +2999,12 @@ explain (verbose true, costs false, nodes false) select p.* from parent p left join child c on (p.k = c.k) where p.k = 1 and p.k = 2; - QUERY PLAN ---------------------------------------------------------------------------------- - Result + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Data Node Scan on "__REMOTE_FQS_QUERY__" Output: p.k, p.pd - One-Time Filter: false - -> Data Node Scan on parent "_REMOTE_TABLE_QUERY_" - Output: p.k, p.pd - Remote query: SELECT k, pd FROM ONLY parent p WHERE ((k = 1) AND false) -(6 rows) + Remote query: SELECT p.k, p.pd FROM (parent p LEFT JOIN child c ON ((p.k = c.k))) WHERE ((p.k = 1) AND (p.k = 2)) +(3 rows) select p.* from (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k @@ -3028,11 +3017,11 @@ explain (verbose true, costs false, nodes false) select p.* from (parent p left join child c on (p.k = c.k)) join parent x on p.k = x.k where p.k = 1 and p.k = 2; - QUERY PLAN --------------------------- - Result + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------ + Data Node Scan on "__REMOTE_FQS_QUERY__" Output: p.k, p.pd - One-Time Filter: false + Remote query: SELECT p.k, p.pd FROM ((parent p LEFT JOIN child c ON ((p.k = c.k))) JOIN parent x ON ((p.k = x.k))) WHERE ((p.k = 1) AND (p.k = 2)) (3 rows) -- bug 5255: this is not optimizable by join removal diff --git a/src/test/regress/expected/xc_FQS_join.out b/src/test/regress/expected/xc_FQS_join.out index 7da1f6d33c..8a9d616728 100644 --- a/src/test/regress/expected/xc_FQS_join.out +++ b/src/test/regress/expected/xc_FQS_join.out @@ -46,6 +46,21 @@ select create_table_nodes('tab3_mod', '{1, 2, 3}'::int[], 'modulo(val)', 'as sel (1 row) +select create_table_nodes('single_node_rep_tab', '{1}'::int[], 'replication', 'as select * from tab1_rep limit 0'); + create_table_nodes +-------------------- + +(1 row) + +select create_table_nodes('single_node_mod_tab', '{1}'::int[], 'modulo(val)', 'as select * from tab1_rep limit 0'); + create_table_nodes +-------------------- + +(1 row) + +-- populate single node tables specially +insert into single_node_rep_tab values (1, 2), (3, 4); +insert into single_node_mod_tab values (1, 2), (5, 6); -- Join involving replicated tables only, all of them should be shippable select * from tab1_rep, tab2_rep where tab1_rep.val = tab2_rep.val and tab1_rep.val2 = tab2_rep.val2 and @@ -431,6 +446,211 @@ explain (costs off, verbose on, nodes off) select * from tab1_mod, tab3_mod Remote query: SELECT tab1_mod.val, tab1_mod.val2, tab3_mod.val, tab3_mod.val2 FROM tab1_mod, tab3_mod WHERE ((tab1_mod.val = tab3_mod.val) AND (tab1_mod.val = 1)) (3 rows) +-- OUTER joins, we insert some data in existing tables for testing OUTER join +-- OUTER join between two replicated tables is shippable if they have a common +-- datanode. +insert into tab1_rep values (100, 200); +insert into tab2_rep values (3000, 4000); +select * from tab1_rep left join tab2_rep on (tab1_rep.val = tab2_rep.val and tab1_rep.val2 = tab2_rep.val2) + where tab2_rep.val = tab2_rep.val2 or tab2_rep.val is null + order by tab1_rep.val, tab1_rep.val2; + val | val2 | val | val2 +-----+------+-----+------ + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 5 | 5 | 5 | 5 + 100 | 200 | | +(6 rows) + +explain (costs off, verbose on, nodes off) +select * from tab1_rep left join tab2_rep on (tab1_rep.val = tab2_rep.val and tab1_rep.val2 = tab2_rep.val2) + where tab1_rep.val = tab1_rep.val2 or tab2_rep.val is null + order by tab1_rep.val, tab1_rep.val2; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Data Node Scan on "__REMOTE_FQS_QUERY__" + Output: tab1_rep.val, tab1_rep.val2, tab2_rep.val, tab2_rep.val2, tab1_rep.val, tab1_rep.val2 + Remote query: SELECT tab1_rep.val, tab1_rep.val2, tab2_rep.val, tab2_rep.val2 FROM (tab1_rep LEFT JOIN tab2_rep ON (((tab1_rep.val = tab2_rep.val) AND (tab1_rep.val2 = tab2_rep.val2)))) WHERE ((tab1_rep.val = tab1_rep.val2) OR (tab2_rep.val IS NULL)) ORDER BY tab1_rep.val, tab1_rep.val2 +(3 rows) + +-- FULL OUTER join +select * from tab1_rep full join tab2_rep on (tab1_rep.val < tab2_rep.val and tab1_rep.val2 = tab2_rep.val2) + where tab1_rep.val > 5 or tab2_rep.val > 5 + order by tab1_rep.val, tab2_rep.val, tab1_rep.val2, tab2_rep.val2; + val | val2 | val | val2 +-----+------+------+------ + 100 | 200 | | + | | 3000 | 4000 +(2 rows) + +explain (costs off, verbose on, nodes off) +select * from tab1_rep full join tab2_rep on (tab1_rep.val < tab2_rep.val and tab1_rep.val2 = tab2_rep.val2) + where tab1_rep.val > 5 or tab2_rep.val > 5 + order by tab1_rep.val, tab2_rep.val, tab1_rep.val2, tab2_rep.val2; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Data Node Scan on "__REMOTE_FQS_QUERY__" + Output: tab1_rep.val, tab1_rep.val2, tab2_rep.val, tab2_rep.val2, tab1_rep.val, tab2_rep.val, tab1_rep.val2, tab2_rep.val2 + Remote query: SELECT tab1_rep.val, tab1_rep.val2, tab2_rep.val, tab2_rep.val2 FROM (tab1_rep FULL JOIN tab2_rep ON (((tab1_rep.val < tab2_rep.val) AND (tab1_rep.val2 = tab2_rep.val2)))) WHERE ((tab1_rep.val > 5) OR (tab2_rep.val > 5)) ORDER BY tab1_rep.val, tab2_rep.val, tab1_rep.val2, tab2_rep.val2 +(3 rows) + +-- OUTER join between two distributed tables is shippable if it's an equi-join +-- on the distribution columns, such that distribution columns are of same type +-- and the relations are distributed on same set of nodes +insert into tab1_mod values (100, 200); +insert into tab3_mod values (3000, 4000); +select * from tab1_mod left join tab3_mod on (tab1_mod.val = tab3_mod.val and tab1_mod.val2 = tab3_mod.val2) + where tab3_mod.val = tab3_mod.val2 or tab3_mod.val is null + order by tab1_mod.val, tab1_mod.val2; + val | val2 | val | val2 +-----+------+-----+------ + 1 | 1 | 1 | 1 + 2 | 2 | 2 | 2 + 3 | 3 | 3 | 3 + 4 | 4 | 4 | 4 + 5 | 5 | 5 | 5 + 100 | 200 | | +(6 rows) + +explain (costs off, verbose on, nodes off) +select * from tab1_mod left join tab3_mod on (tab1_mod.val = tab3_mod.val and tab1_mod.val2 = tab3_mod.val2) + where tab3_mod.val = tab3_mod.val2 or tab3_mod.val is null + order by tab1_mod.val, tab1_mod.val2; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Output: tab1_mod.val, tab1_mod.val2, tab3_mod.val, tab3_mod.val2, tab1_mod.val, tab1_mod.val2 + Sort Key: tab1_mod.val, tab1_mod.val2 + -> Data Node Scan on "__REMOTE_SORT_QUERY__" + Output: tab1_mod.val, tab1_mod.val2, tab3_mod.val, tab3_mod.val2, tab1_mod.val, tab1_mod.val2 + Remote query: SELECT l.a_1, l.a_2, r.a_1, r.a_2 FROM ((SELECT tab1_mod.val, tab1_mod.val2 FROM ONLY tab1_mod WHERE true) l(a_1, a_2) LEFT JOIN (SELECT tab3_mod.val, tab3_mod.val2 FROM ONLY tab3_mod WHERE true) r(a_1, a_2) ON (((l.a_1 = r.a_1) AND (l.a_2 = r.a_2)))) WHERE ((r.a_1 = r.a_2) OR (r.a_1 IS NULL)) ORDER BY 1, 2 +(6 rows) + +-- JOIN condition is not equi-join on distribution column, join is not shippable +select * from tab1_mod left join tab3_mod using (val2) + where (tab1_mod.val = tab1_mod.val2 and tab3_mod.val = tab3_mod.val2) or tab3_mod.val is null + order by tab1_mod.val, tab1_mod.val2, tab3_mod.val2; + val2 | val | val +------+-----+----- + 1 | 1 | 1 + 2 | 2 | 2 + 3 | 3 | 3 + 4 | 4 | 4 + 5 | 5 | 5 + 200 | 100 | +(6 rows) + +explain (costs off, verbose on, nodes off) +select * from tab1_mod left join tab3_mod using (val2) + where (tab1_mod.val = tab1_mod.val2 and tab3_mod.val = tab3_mod.val2) or tab3_mod.val is null + order by tab1_mod.val, tab1_mod.val2, tab3_mod.val2; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------- + Sort + Output: tab1_mod.val2, tab1_mod.val, tab3_mod.val, tab1_mod.val, tab1_mod.val2, tab3_mod.val2 + Sort Key: tab1_mod.val, tab1_mod.val2, tab3_mod.val2 + -> Hash Left Join + Output: tab1_mod.val2, tab1_mod.val, tab3_mod.val, tab1_mod.val, tab1_mod.val2, tab3_mod.val2 + Hash Cond: (tab1_mod.val2 = tab3_mod.val2) + Filter: (((tab1_mod.val = tab1_mod.val2) AND (tab3_mod.val = tab3_mod.val2)) OR (tab3_mod.val IS NULL)) + -> Data Node Scan on tab1_mod "_REMOTE_TABLE_QUERY_" + Output: tab1_mod.val2, tab1_mod.val + Remote query: SELECT val2, val FROM ONLY tab1_mod WHERE true + -> Hash + Output: tab3_mod.val, tab3_mod.val2 + -> Data Node Scan on tab3_mod "_REMOTE_TABLE_QUERY_" + Output: tab3_mod.val, tab3_mod.val2 + Remote query: SELECT val, val2 FROM ONLY tab3_mod WHERE true +(15 rows) + +-- OUTER join between replicated and distributed tables is shippable if the +-- the replicated table is available on all the datanodes where outer side is +-- distributed +select * from tab1_mod left join tab1_rep on (tab1_mod.val < tab1_rep.val and tab1_mod.val2 = tab1_rep.val2) + where tab1_mod.val >= 5 + order by tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2; + val | val2 | val | val2 +-----+------+-----+------ + 5 | 1 | | + 5 | 2 | | + 5 | 3 | | + 5 | 4 | | + 5 | 5 | | + 100 | 200 | | +(6 rows) + +explain (costs off, verbose on, nodes off) +select * from tab1_mod left join tab1_rep on (tab1_mod.val < tab1_rep.val and tab1_mod.val2 = tab1_rep.val2) + where tab1_mod.val >= 5 + order by tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Output: tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2, tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2 + Sort Key: tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2 + -> Data Node Scan on "__REMOTE_SORT_QUERY__" + Output: tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2, tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2 + Remote query: SELECT l.a_1, l.a_2, r.a_1, r.a_2 FROM ((SELECT tab1_mod.val, tab1_mod.val2 FROM ONLY tab1_mod WHERE (tab1_mod.val >= 5)) l(a_1, a_2) LEFT JOIN (SELECT tab1_rep.val, tab1_rep.val2 FROM ONLY tab1_rep WHERE true) r(a_1, a_2) ON (((l.a_1 < r.a_1) AND (l.a_2 = r.a_2)))) WHERE true ORDER BY 1, 2, 3, 4 +(6 rows) + +-- OUTER side is replicated and inner is distributed, join is not shippable +select * from tab1_mod right join tab1_rep on (tab1_mod.val > tab1_rep.val and tab1_mod.val2 = tab1_rep.val2) + where tab1_rep.val >= 5 + order by tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2; + val | val2 | val | val2 +-----+------+-----+------ + | | 5 | 1 + | | 5 | 2 + | | 5 | 3 + | | 5 | 4 + | | 5 | 5 + | | 100 | 200 +(6 rows) + +explain (costs off, verbose on, nodes off) +select * from tab1_mod right join tab1_rep on (tab1_mod.val > tab1_rep.val and tab1_mod.val2 = tab1_rep.val2) + where tab1_rep.val >= 5 + order by tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------ + Sort + Output: tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2, tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2 + Sort Key: tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2 + -> Hash Right Join + Output: tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2, tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2 + Hash Cond: (tab1_mod.val2 = tab1_rep.val2) + Join Filter: (tab1_mod.val > tab1_rep.val) + -> Data Node Scan on tab1_mod "_REMOTE_TABLE_QUERY_" + Output: tab1_mod.val, tab1_mod.val2 + Remote query: SELECT val, val2 FROM ONLY tab1_mod WHERE true + -> Hash + Output: tab1_rep.val, tab1_rep.val2 + -> Data Node Scan on tab1_rep "_REMOTE_TABLE_QUERY_" + Output: tab1_rep.val, tab1_rep.val2 + Remote query: SELECT val, val2 FROM ONLY tab1_rep WHERE (val >= 5) +(15 rows) + +-- Any join involving a distributed and replicated node each located on a single +-- and same node should be shippable +select * from single_node_rep_tab natural full outer join single_node_mod_tab order by val, val2; + val | val2 +-----+------ + 1 | 2 + 3 | 4 + 5 | 6 +(3 rows) + +explain (costs off, verbose on, nodes off) +select * from single_node_rep_tab natural full outer join single_node_mod_tab order by val, val2; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------- + Data Node Scan on "__REMOTE_FQS_QUERY__" + Output: val, val2 + Remote query: SELECT val, val2 FROM (single_node_rep_tab NATURAL FULL JOIN single_node_mod_tab) ORDER BY val, val2 +(3 rows) + -- DMLs involving JOINs are not FQSed -- We need to just make sure that FQS is not kicking in. But the JOINs can still -- be reduced by JOIN reduction optimization. Turn this optimization off so as @@ -520,3 +740,6 @@ drop table tab3_rep; drop table tab4_rep; drop table tab1_mod; drop table tab2_mod; +drop table tab3_mod; +drop table single_node_mod_tab; +drop table single_node_rep_tab; diff --git a/src/test/regress/sql/xc_FQS_join.sql b/src/test/regress/sql/xc_FQS_join.sql index 9419c720a9..9a6fe33944 100644 --- a/src/test/regress/sql/xc_FQS_join.sql +++ b/src/test/regress/sql/xc_FQS_join.sql @@ -13,6 +13,11 @@ select create_table_nodes('tab4_rep', '{2, 4}'::int[], 'replication', 'as select select create_table_nodes('tab1_mod', '{1, 2, 3}'::int[], 'modulo(val)', 'as select * from tab1_rep'); select create_table_nodes('tab2_mod', '{2, 4}'::int[], 'modulo(val)', 'as select * from tab1_rep'); select create_table_nodes('tab3_mod', '{1, 2, 3}'::int[], 'modulo(val)', 'as select * from tab1_rep'); +select create_table_nodes('single_node_rep_tab', '{1}'::int[], 'replication', 'as select * from tab1_rep limit 0'); +select create_table_nodes('single_node_mod_tab', '{1}'::int[], 'modulo(val)', 'as select * from tab1_rep limit 0'); +-- populate single node tables specially +insert into single_node_rep_tab values (1, 2), (3, 4); +insert into single_node_mod_tab values (1, 2), (5, 6); -- Join involving replicated tables only, all of them should be shippable select * from tab1_rep, tab2_rep where tab1_rep.val = tab2_rep.val and @@ -100,6 +105,70 @@ explain (costs off, verbose on, nodes off, num_nodes on) select * from tab1_mod select * from tab1_mod, tab3_mod where tab1_mod.val = tab3_mod.val and tab1_mod.val = 1; explain (costs off, verbose on, nodes off) select * from tab1_mod, tab3_mod where tab1_mod.val = tab3_mod.val and tab1_mod.val = 1; +-- OUTER joins, we insert some data in existing tables for testing OUTER join +-- OUTER join between two replicated tables is shippable if they have a common +-- datanode. +insert into tab1_rep values (100, 200); +insert into tab2_rep values (3000, 4000); +select * from tab1_rep left join tab2_rep on (tab1_rep.val = tab2_rep.val and tab1_rep.val2 = tab2_rep.val2) + where tab2_rep.val = tab2_rep.val2 or tab2_rep.val is null + order by tab1_rep.val, tab1_rep.val2; +explain (costs off, verbose on, nodes off) +select * from tab1_rep left join tab2_rep on (tab1_rep.val = tab2_rep.val and tab1_rep.val2 = tab2_rep.val2) + where tab1_rep.val = tab1_rep.val2 or tab2_rep.val is null + order by tab1_rep.val, tab1_rep.val2; +-- FULL OUTER join +select * from tab1_rep full join tab2_rep on (tab1_rep.val < tab2_rep.val and tab1_rep.val2 = tab2_rep.val2) + where tab1_rep.val > 5 or tab2_rep.val > 5 + order by tab1_rep.val, tab2_rep.val, tab1_rep.val2, tab2_rep.val2; +explain (costs off, verbose on, nodes off) +select * from tab1_rep full join tab2_rep on (tab1_rep.val < tab2_rep.val and tab1_rep.val2 = tab2_rep.val2) + where tab1_rep.val > 5 or tab2_rep.val > 5 + order by tab1_rep.val, tab2_rep.val, tab1_rep.val2, tab2_rep.val2; +-- OUTER join between two distributed tables is shippable if it's an equi-join +-- on the distribution columns, such that distribution columns are of same type +-- and the relations are distributed on same set of nodes +insert into tab1_mod values (100, 200); +insert into tab3_mod values (3000, 4000); +select * from tab1_mod left join tab3_mod on (tab1_mod.val = tab3_mod.val and tab1_mod.val2 = tab3_mod.val2) + where tab3_mod.val = tab3_mod.val2 or tab3_mod.val is null + order by tab1_mod.val, tab1_mod.val2; +explain (costs off, verbose on, nodes off) +select * from tab1_mod left join tab3_mod on (tab1_mod.val = tab3_mod.val and tab1_mod.val2 = tab3_mod.val2) + where tab3_mod.val = tab3_mod.val2 or tab3_mod.val is null + order by tab1_mod.val, tab1_mod.val2; +-- JOIN condition is not equi-join on distribution column, join is not shippable +select * from tab1_mod left join tab3_mod using (val2) + where (tab1_mod.val = tab1_mod.val2 and tab3_mod.val = tab3_mod.val2) or tab3_mod.val is null + order by tab1_mod.val, tab1_mod.val2, tab3_mod.val2; +explain (costs off, verbose on, nodes off) +select * from tab1_mod left join tab3_mod using (val2) + where (tab1_mod.val = tab1_mod.val2 and tab3_mod.val = tab3_mod.val2) or tab3_mod.val is null + order by tab1_mod.val, tab1_mod.val2, tab3_mod.val2; +-- OUTER join between replicated and distributed tables is shippable if the +-- the replicated table is available on all the datanodes where outer side is +-- distributed +select * from tab1_mod left join tab1_rep on (tab1_mod.val < tab1_rep.val and tab1_mod.val2 = tab1_rep.val2) + where tab1_mod.val >= 5 + order by tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2; +explain (costs off, verbose on, nodes off) +select * from tab1_mod left join tab1_rep on (tab1_mod.val < tab1_rep.val and tab1_mod.val2 = tab1_rep.val2) + where tab1_mod.val >= 5 + order by tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2; +-- OUTER side is replicated and inner is distributed, join is not shippable +select * from tab1_mod right join tab1_rep on (tab1_mod.val > tab1_rep.val and tab1_mod.val2 = tab1_rep.val2) + where tab1_rep.val >= 5 + order by tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2; +explain (costs off, verbose on, nodes off) +select * from tab1_mod right join tab1_rep on (tab1_mod.val > tab1_rep.val and tab1_mod.val2 = tab1_rep.val2) + where tab1_rep.val >= 5 + order by tab1_mod.val, tab1_mod.val2, tab1_rep.val, tab1_rep.val2; +-- Any join involving a distributed and replicated node each located on a single +-- and same node should be shippable +select * from single_node_rep_tab natural full outer join single_node_mod_tab order by val, val2; +explain (costs off, verbose on, nodes off) +select * from single_node_rep_tab natural full outer join single_node_mod_tab order by val, val2; + -- DMLs involving JOINs are not FQSed -- We need to just make sure that FQS is not kicking in. But the JOINs can still -- be reduced by JOIN reduction optimization. Turn this optimization off so as @@ -121,4 +190,7 @@ drop table tab3_rep; drop table tab4_rep; drop table tab1_mod; drop table tab2_mod; +drop table tab3_mod; +drop table single_node_mod_tab; +drop table single_node_rep_tab; |