diff options
author | Michael Paquier | 2012-08-29 06:40:23 +0000 |
---|---|---|
committer | Michael Paquier | 2012-08-29 06:49:22 +0000 |
commit | b0b514f887b0760bc3c9d0bab0d8efb612c98a29 (patch) | |
tree | c7d2073d4d8e1978fa313d489e8d8e4106db720a | |
parent | aa400e384223727ea76fbd547446b69d10c95748 (diff) |
Reorganize XC planner APIs for shippability evaluation
In order to improve Postgres-XC planner API visibility and transparency, all the
functions that are related to the evaluation of expression shippability to remote
nodes are now gathered into a unique file called pgxcship.c located in PostgreSQL
optimizer.
Most of those functionnalities were originally located in file src/pgxc/planner/planner.c.
This clean-up allows to keep separated the XC planning functions depending on their role
and facilitates code maintenance.
In order to evaluate if an expression or a query is shippable to remote nodes, a simple call
to the new APIs of pgxcship.h is enough.
The file called postgresql_fdw.c is removed, made unnecessary by the exposition of
shippability evaluation functions in new interface.
-rw-r--r-- | src/backend/catalog/heap.c | 1 | ||||
-rw-r--r-- | src/backend/commands/copy.c | 2 | ||||
-rw-r--r-- | src/backend/optimizer/path/pgxcpath.c | 25 | ||||
-rw-r--r-- | src/backend/optimizer/plan/pgxcplan.c | 54 | ||||
-rw-r--r-- | src/backend/optimizer/util/Makefile | 2 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/util/pgxcship.c | 1447 | ||||
-rw-r--r-- | src/backend/pgxc/copy/remotecopy.c | 2 | ||||
-rw-r--r-- | src/backend/pgxc/plan/planner.c | 1269 | ||||
-rw-r--r-- | src/backend/pgxc/pool/Makefile | 2 | ||||
-rw-r--r-- | src/backend/pgxc/pool/postgresql_fdw.c | 118 | ||||
-rw-r--r-- | src/backend/rewrite/rewriteHandler.c | 1 | ||||
-rw-r--r-- | src/backend/utils/misc/guc.c | 1 | ||||
-rw-r--r-- | src/include/optimizer/pgxcship.h | 39 | ||||
-rw-r--r-- | src/include/pgxc/planner.h | 72 | ||||
-rw-r--r-- | src/include/pgxc/postgresql_fdw.h | 23 |
16 files changed, 1536 insertions, 1523 deletions
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 6741f90a3f..9264829f47 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -77,7 +77,6 @@ #include "pgxc/nodemgr.h" #include "pgxc/pgxc.h" #include "pgxc/pgxcnode.h" -#include "pgxc/postgresql_fdw.h" #endif diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 331e21d056..0944131313 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -37,13 +37,13 @@ #include "optimizer/planner.h" #include "parser/parse_relation.h" #ifdef PGXC +#include "optimizer/pgxcship.h" #include "pgxc/pgxc.h" #include "pgxc/execRemote.h" #include "pgxc/locator.h" #include "pgxc/remotecopy.h" #include "nodes/nodes.h" #include "pgxc/poolmgr.h" -#include "pgxc/postgresql_fdw.h" #include "catalog/pgxc_node.h" #endif #include "rewrite/rewriteHandler.h" diff --git a/src/backend/optimizer/path/pgxcpath.c b/src/backend/optimizer/path/pgxcpath.c index 0ecbcb3d7d..9118315e75 100644 --- a/src/backend/optimizer/path/pgxcpath.c +++ b/src/backend/optimizer/path/pgxcpath.c @@ -16,11 +16,11 @@ #include "optimizer/cost.h" #include "optimizer/paths.h" #include "optimizer/pathnode.h" +#include "optimizer/pgxcship.h" #include "optimizer/restrictinfo.h" #include "parser/parsetree.h" #include "pgxc/pgxc.h" #include "pgxc/planner.h" -#include "pgxc/postgresql_fdw.h" static RemoteQueryPath *pgxc_find_remotequery_path(RelOptInfo *rel); static ExecNodes *pgxc_is_join_reducible(ExecNodes *inner_en, ExecNodes *outer_en, @@ -110,7 +110,7 @@ create_remotequery_path(PlannerInfo *root, RelOptInfo *rel, ExecNodes *exec_node * The caller can decide whether to add the scan paths depending upon the return * value. */ -extern bool +extern bool create_plainrel_rqpath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { RelationLocInfo *rel_loc_info; @@ -127,7 +127,7 @@ create_plainrel_rqpath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) rel_loc_info = GetRelationLocInfo(rte->relid); quals = extract_actual_clauses(rel->baserestrictinfo, false); - exec_nodes = GetRelationNodesByQuals(rte->relid, rel->relid, + exec_nodes = GetRelationNodesByQuals(rte->relid, rel->relid, (Node *)quals, RELATION_ACCESS_READ); if (!exec_nodes) @@ -147,7 +147,7 @@ create_plainrel_rqpath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* We don't have subpaths for a plain base relation */ - add_path(rel, (Path *)create_remotequery_path(root, rel, exec_nodes, + add_path(rel, (Path *)create_remotequery_path(root, rel, exec_nodes, NULL, NULL, 0, NULL)); return true; } @@ -217,8 +217,9 @@ pgxc_is_join_reducible(ExecNodes *inner_en, ExecNodes *outer_en, Relids in_relid foreach(cell, join_quals) { Node *qual = (Node *)lfirst(cell); - if (pgxc_qual_hash_dist_equijoin(in_relids, out_relids, InvalidOid, - qual, rtables) && + if (pgxc_qual_has_dist_equijoin(in_relids, + out_relids, InvalidOid, + qual, rtables) && pgxc_is_expr_shippable((Expr *)qual, NULL)) { merge_dist_equijoin = true; @@ -238,7 +239,7 @@ pgxc_is_join_reducible(ExecNodes *inner_en, ExecNodes *outer_en, Relids in_relid return join_exec_nodes; } -/* +/* * pgxc_ship_remotejoin * If there are RemoteQuery paths for the rels being joined, check if the join * is shippable to the datanodes, and if so, create a remotequery path for this @@ -279,10 +280,10 @@ create_joinrel_rqpath(PlannerInfo *root, RelOptInfo *joinrel, inner_en = innerpath->rqpath_en; outer_en = outerpath->rqpath_en; - + if (!inner_en || !outer_en) - elog(ERROR, "No node list provided for remote query path"); - /* + elog(ERROR, "No node list provided for remote query path"); + /* * Collect quals from restrictions so as to check the shippability of a JOIN * between distributed relations. */ @@ -291,7 +292,7 @@ create_joinrel_rqpath(PlannerInfo *root, RelOptInfo *joinrel, * If the joining qual is not shippable and it's an OUTER JOIN, we can not * ship the JOIN, since that would impact JOIN result. */ - if (jointype != JOIN_INNER && + if (jointype != JOIN_INNER && !pgxc_is_expr_shippable((Expr *)join_quals, NULL)) return; /* @@ -314,5 +315,5 @@ create_joinrel_rqpath(PlannerInfo *root, RelOptInfo *joinrel, add_path(joinrel, (Path *)create_remotequery_path(root, joinrel, join_en, outerpath, innerpath, jointype, restrictlist)); - return; + return; } diff --git a/src/backend/optimizer/plan/pgxcplan.c b/src/backend/optimizer/plan/pgxcplan.c index 36dc10632b..9811ecdc75 100644 --- a/src/backend/optimizer/plan/pgxcplan.c +++ b/src/backend/optimizer/plan/pgxcplan.c @@ -24,6 +24,7 @@ #include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" +#include "optimizer/pgxcship.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" #include "optimizer/tlist.h" @@ -33,7 +34,6 @@ #include "parser/parsetree.h" #include "pgxc/pgxc.h" #include "pgxc/planner.h" -#include "pgxc/postgresql_fdw.h" #include "rewrite/rewriteManip.h" #include "utils/builtins.h" #include "utils/rel.h" @@ -124,7 +124,7 @@ pgxc_build_shippable_tlist(List *tlist, List *unshippabl_quals) if (pgxc_is_expr_shippable((Expr *)tle, NULL)) tmp_rtlist = lappend(tmp_rtlist, expr); else - tmp_rtlist = list_concat(tmp_rtlist, pull_var_clause((Node *)expr, + tmp_rtlist = list_concat(tmp_rtlist, pull_var_clause((Node *)expr, PVC_REJECT_AGGREGATES, PVC_RECURSE_PLACEHOLDERS)); } @@ -137,7 +137,7 @@ pgxc_build_shippable_tlist(List *tlist, List *unshippabl_quals) remote_tlist = add_to_flat_tlist(remote_tlist, pull_var_clause((Node *)unshippabl_quals, PVC_RECURSE_AGGREGATES, PVC_RECURSE_PLACEHOLDERS)); - + return remote_tlist; } @@ -161,11 +161,11 @@ pgxc_build_shippable_query_baserel(PlannerInfo *root, RemoteQueryPath *rqpath, ListCell *lcell; RangeTblRef *rtr; - if ((baserel->reloptkind != RELOPT_BASEREL && + if ((baserel->reloptkind != RELOPT_BASEREL && baserel->reloptkind != RELOPT_OTHER_MEMBER_REL) || baserel->rtekind != RTE_RELATION) elog(ERROR, "can not generate shippable query for base relations of type other than plain tables"); - + *rep_tlist = NIL; *unshippable_quals = NIL; /* @@ -199,7 +199,7 @@ pgxc_build_shippable_query_baserel(PlannerInfo *root, RemoteQueryPath *rqpath, /* * The target list that we built just now represents the result of the - * query being built. This serves as a reference for building the + * query being built. This serves as a reference for building the * encapsulating queries. So, copy it. We then modify the Vars to change * their varno with 1 for the query being built */ @@ -209,7 +209,7 @@ pgxc_build_shippable_query_baserel(PlannerInfo *root, RemoteQueryPath *rqpath, query = makeNode(Query); query->commandType = CMD_SELECT; query->rtable = list_make1(rte); - query->targetList = copyObject(*rep_tlist); + query->targetList = copyObject(*rep_tlist); query->jointree = (FromExpr *)makeNode(FromExpr); rtr = makeNode(RangeTblRef); @@ -303,24 +303,24 @@ pgxc_build_shippable_query_jointree(PlannerInfo *root, RemoteQueryPath *rqpath, RangeTblRef *right_rtr; /* Miscellaneous variables */ ListCell *lcell; - + if (!rqpath->leftpath || !rqpath->rightpath) elog(ERROR, "a join relation path should have both left and right paths"); - /* + /* * Build the query representing the left side of JOIN and add corresponding * RTE with proper aliases */ left_query = pgxc_build_shippable_query_recurse(root, rqpath->leftpath, &left_us_quals, &left_rep_tlist); - left_colnames = pgxc_generate_colnames("a", list_length(left_rep_tlist)); + left_colnames = pgxc_generate_colnames("a", list_length(left_rep_tlist)); left_alias = makeAlias(left_aname, left_colnames); left_rte = addRangeTableEntryForSubquery(NULL, left_query, left_alias, true); - rtable = lappend(rtable, left_rte); + rtable = lappend(rtable, left_rte); left_rtr = makeNode(RangeTblRef); left_rtr->rtindex = list_length(rtable); - /* + /* * Build the query representing the right side of JOIN and add corresponding * RTE with proper aliases */ @@ -331,7 +331,7 @@ pgxc_build_shippable_query_jointree(PlannerInfo *root, RemoteQueryPath *rqpath, right_alias = makeAlias(right_aname, right_colnames); right_rte = addRangeTableEntryForSubquery(NULL, right_query, right_alias, true); - rtable = lappend(rtable, right_rte); + rtable = lappend(rtable, right_rte); right_rtr = makeNode(RangeTblRef); right_rtr->rtindex = list_length(rtable); @@ -354,7 +354,7 @@ pgxc_build_shippable_query_jointree(PlannerInfo *root, RemoteQueryPath *rqpath, other_clauses = pgxc_separate_quals(other_clauses, unshippable_quals); other_clauses = copyObject(other_clauses); - /* + /* * Build the targetlist for this relation and also the targetlist * representing the query targetlist. The representative target list is in * the form that rest of the plan can understand. The Vars in the JOIN Query @@ -487,8 +487,8 @@ pgxc_rqplan_adjust_vars(RemoteQuery *rqplan, Node *node) TargetEntry *qry_tle; Var *var = (Var *)lfirst(lcell_var); - ref_tle = tlist_member((Node *)var, rqplan->coord_var_tlist); - qry_tle = get_tle_by_resno(rqplan->query_var_tlist, ref_tle->resno); + ref_tle = tlist_member((Node *)var, rqplan->coord_var_tlist); + qry_tle = get_tle_by_resno(rqplan->query_var_tlist, ref_tle->resno); if (!IsA(qry_tle->expr, Var)) elog(ERROR, "expected a VAR node but got node of type %d", nodeTag(qry_tle->expr)); *var = *(Var *)(qry_tle->expr); @@ -515,7 +515,7 @@ pgxc_rqplan_build_statement(RemoteQuery *rqplan) * pgxc_rqplan_adjust_tlist * The function adjusts the targetlist of remote_query in RemoteQuery node * according to the plan's targetlist. This function should be - * called whenever we modify or set plan's targetlist (plan->targetlist). + * called whenever we modify or set plan's targetlist (plan->targetlist). */ extern void pgxc_rqplan_adjust_tlist(RemoteQuery *rqplan) @@ -550,10 +550,10 @@ static void pgxc_build_shippable_query(PlannerInfo *root, RemoteQueryPath *covering_path, RemoteQuery *result_node) { - Query *query; + Query *query; List *rep_tlist; List *unshippable_quals; - + /* * Build Query representing the result of the JOIN tree. During the process * we also get the set of unshippable quals to be applied after getting the @@ -583,7 +583,7 @@ pgxc_build_shippable_query(PlannerInfo *root, RemoteQueryPath *covering_path, Plan * create_remotequery_plan(PlannerInfo *root, RemoteQueryPath *best_path) { - RelOptInfo *rel = best_path->path.parent; /* relation for which plan is + RelOptInfo *rel = best_path->path.parent; /* relation for which plan is * being built */ RemoteQuery *result_node; /* the built plan */ @@ -594,12 +594,12 @@ create_remotequery_plan(PlannerInfo *root, RemoteQueryPath *best_path) Index dummy_rtindex; char *rte_name; - /* Get the target list required from this plan */ + /* Get the target list required from this plan */ tlist = pgxc_build_relation_tlist(rel); result_node = makeNode(RemoteQuery); result_node->scan.plan.targetlist = tlist; pgxc_build_shippable_query(root, best_path, result_node); - + /* * Create and append the dummy range table entry to the range table. * Note that this modifies the master copy the caller passed us, otherwise @@ -621,9 +621,9 @@ create_remotequery_plan(PlannerInfo *root, RemoteQueryPath *best_path) makeAlias("_REMOTE_TABLE_QUERY_", NIL)); root->parse->rtable = lappend(root->parse->rtable, dummy_rte); dummy_rtindex = list_length(root->parse->rtable); - + result_node->scan.scanrelid = dummy_rtindex; - result_node->read_only = true; + result_node->read_only = true; /* result_node->read_only = (query->commandType == CMD_SELECT && !query->hasForUpdate); */ /* result_node->has_row_marks = query->hasForUpdate; */ result_node->exec_nodes = best_path->rqpath_en; @@ -632,7 +632,7 @@ create_remotequery_plan(PlannerInfo *root, RemoteQueryPath *best_path) * many of them. */ if (IsLocatorReplicated(result_node->exec_nodes->baselocatortype)) - result_node->exec_nodes->nodeList = + result_node->exec_nodes->nodeList = GetPreferredReplicationNode(result_node->exec_nodes->nodeList); result_node->is_temp = best_path->rqhas_temp_rel; @@ -1419,13 +1419,13 @@ create_remotegrouping_plan(PlannerInfo *root, Plan *local_plan) SortGroupClause *sortClauseItem = makeNode(SortGroupClause); TargetEntry *sc_tle = get_tle_by_resno(base_tlist, grpColIdx[cntCols]); - + Assert(sc_tle->ressortgroupref > 0); sortClauseItem->tleSortGroupRef = sc_tle->ressortgroupref; sortClauseItem->sortop = remote_sort->sortOperators[cntCols]; sortClauseItem->nulls_first = remote_sort->nullsFirst[cntCols]; sortClause = lappend(sortClause, sortClauseItem); - + /* set the sorting column index in the Sort node in RemoteQuery */ remote_sort->sortColIdx[cntCols] = grpColIdx[cntCols]; } diff --git a/src/backend/optimizer/util/Makefile b/src/backend/optimizer/util/Makefile index 3b2d16b635..37244ad0be 100644 --- a/src/backend/optimizer/util/Makefile +++ b/src/backend/optimizer/util/Makefile @@ -13,6 +13,6 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = clauses.o joininfo.o pathnode.o placeholder.o plancat.o predtest.o \ - relnode.o restrictinfo.o tlist.o var.o + relnode.o restrictinfo.o tlist.o var.o pgxcship.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 609310bd0c..ef3a50d82f 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -30,7 +30,6 @@ #include "utils/selfuncs.h" #ifdef PGXC #include "commands/tablecmds.h" -#include "pgxc/postgresql_fdw.h" #include "optimizer/restrictinfo.h" #endif /* PGXC */ diff --git a/src/backend/optimizer/util/pgxcship.c b/src/backend/optimizer/util/pgxcship.c new file mode 100644 index 0000000000..56d5bb43ae --- /dev/null +++ b/src/backend/optimizer/util/pgxcship.c @@ -0,0 +1,1447 @@ +/*------------------------------------------------------------------------- + * + * pgxcship.c + * Routines to evaluate expression shippability to remote nodes + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2012, Postgres-XC Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/optimizer/util/pgxcship.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "catalog/pg_class.h" +#include "catalog/pg_inherits_fn.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "catalog/pgxc_node.h" +#include "nodes/nodeFuncs.h" +#include "nodes/relation.h" +#include "optimizer/clauses.h" +#include "optimizer/pgxcship.h" +#include "parser/parsetree.h" +#include "parser/parse_coerce.h" +#include "parser/parse_type.h" +#include "pgxc/pgxcnode.h" +#include "utils/lsyscache.h" + +/* Forbid unsafe SQL statements */ +bool StrictStatementChecking = true; + +/* + * Shippability_context + * This context structure is used by the Fast Query Shipping walker, to gather + * information during analysing query for Fast Query Shipping. + */ +typedef struct +{ + bool sc_for_expr; /* if false, the we are checking shippability + * of the Query, otherwise, we are checking + * shippability of a stand-alone expression. + */ + Bitmapset *sc_shippability; /* The conditions for (un)shippability of the + * query. + */ + Query *sc_query; /* the query being analysed for FQS */ + int sc_query_level; /* level of the query */ + int sc_max_varlevelsup; /* maximum upper level referred to by any + * variable reference in the query. If this + * value is greater than 0, the query is not + * shippable, if shipped alone. + */ + ExecNodes *sc_exec_nodes; /* nodes where the query should be executed */ + ExecNodes *sc_subquery_en; /* ExecNodes produced by merging the ExecNodes + * for individual subqueries. This gets + * ultimately merged with sc_exec_nodes. + */ +} Shippability_context; + +/* + * ShippabilityStat + * List of reasons why a query/expression is not shippable to remote nodes. + */ +typedef enum +{ + SS_UNSHIPPABLE_EXPR = 0, /* it has unshippable expression */ + SS_NEED_SINGLENODE, /* Has expressions which can be evaluated when + * there is only a single node involved. + * Athought aggregates too fit in this class, we + * have a separate status to report aggregates, + * see below. + */ + SS_NEEDS_COORD, /* the query needs Coordinator */ + SS_VARLEVEL, /* one of its subqueries has a VAR + * referencing an upper level query + * relation + */ + SS_NO_NODES, /* no suitable nodes can be found to ship + * the query + */ + SS_UNSUPPORTED_EXPR, /* it has expressions currently unsupported + * by FQS, but such expressions might be + * supported by FQS in future + */ + SS_HAS_AGG_EXPR, /* it has aggregate expressions */ + SS_UNSHIPPABLE_TYPE /* the type of expression is unshippable */ +} ShippabilityStat; + +/* Manipulation of shippability reason */ +static bool pgxc_test_shippability_reason(Shippability_context *context, + ShippabilityStat reason); +static void pgxc_set_shippability_reason(Shippability_context *context, + ShippabilityStat reason); +static void pgxc_reset_shippability_reason(Shippability_context *context, + ShippabilityStat reason); + +/* Evaluation of shippability */ +static bool pgxc_shippability_walker(Node *node, Shippability_context *sc_context); +static void pgxc_set_exprtype_shippability(Oid exprtype, Shippability_context *sc_context); + +/* Fast-query shipping (FQS) functions */ +static ExecNodes *pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, + Index varno, + Query *query); +static void pgxc_FQS_find_datanodes(Shippability_context *sc_context); +static bool pgxc_query_needs_coord(Query *query); +static bool pgxc_query_contains_only_pg_catalog(List *rtable); +static bool pgxc_is_var_distrib_column(Var *var, List *rtable); + + +/* + * Set the given reason in Shippability_context indicating why the query can not be + * shipped directly to remote nodes. + */ +static void +pgxc_set_shippability_reason(Shippability_context *context, ShippabilityStat reason) +{ + context->sc_shippability = bms_add_member(context->sc_shippability, reason); +} + +/* + * pgxc_reset_shippability_reason + * Reset reason why the query cannot be shipped to remote nodes + */ +static void +pgxc_reset_shippability_reason(Shippability_context *context, ShippabilityStat reason) +{ + context->sc_shippability = bms_del_member(context->sc_shippability, reason); + return; +} + + +/* + * See if a given reason is why the query can not be shipped directly + * to the remote nodes. + */ +static bool +pgxc_test_shippability_reason(Shippability_context *context, ShippabilityStat reason) +{ + return bms_is_member(reason, context->sc_shippability); +} + + +/* + * pgxc_set_exprtype_shippability + * Set the expression type shippability. For now composite types + * derived from view definitions are not shippable. + */ +static void +pgxc_set_exprtype_shippability(Oid exprtype, Shippability_context *sc_context) +{ + char typerelkind; + + typerelkind = get_rel_relkind(typeidTypeRelid(exprtype)); + + if (typerelkind == RELKIND_SEQUENCE || + typerelkind == RELKIND_VIEW || + typerelkind == RELKIND_FOREIGN_TABLE) + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_TYPE); +} + + +/* + * pgxc_FQS_find_datanodes + * Find the list of nodes where to ship query. + */ +static void +pgxc_FQS_find_datanodes(Shippability_context *sc_context) +{ + Query *query = sc_context->sc_query; + ListCell *rt; + ExecNodes *exec_nodes = NULL; + bool canShip = true; + Index varno = 0; + + /* No query, no nodes to execute! */ + if (!query) + { + sc_context->sc_exec_nodes = NULL; + return; + } + + /* + * For every range table entry, + * 1. Find out the Datanodes needed for that range table + * 2. Merge these Datanodes with the already available Datanodes + * 3. If the merge is unsuccessful, we can not ship this query directly to + * the Datanode/s + */ + foreach(rt, query->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); + Oid distcol_type; /* TODO mostly this is not needed */ + Relids dist_varnos; + + varno++; + switch (rte->rtekind) + { + case RTE_RELATION: + { + ExecNodes *rel_exec_nodes; + ExecNodes *tmp_en; + bool merge_dist_equijoin = false; + /* + * In case of inheritance, child tables can have completely different + * Datanode distribution than parent. To handle inheritance we need + * to merge the Datanodes of the children table as well. The inheritance + * is resolved during planning(?), so we may not have the RTEs of the + * children here. Also, the exact method of merging Datanodes of the + * children is not known yet. So, when inheritance is requested, query + * can not be shipped. + */ + if (rte->inh) + { + /* + * See prologue of has_subclass, we might miss on the + * optimization because has_subclass can return true + * even if there aren't any subclasses, but it's ok + */ + if (has_subclass(rte->relid)) + { + canShip = false; + break; + } + } + + if (rte->relkind != RELKIND_RELATION) + { + canShip = false; + break; + } + rel_exec_nodes = pgxc_FQS_get_relation_nodes(rte,varno, query); + if (!rel_exec_nodes) + { + /* + * No information about the location of relation in XC, + * a local table OR system catalog. The query can not be + * pushed. + */ + canShip = false; + break; + } + if (varno == 1) + { + if (IsLocatorColumnDistributed(rel_exec_nodes->baselocatortype)) + { + RelationLocInfo *rel_loc_info = GetRelationLocInfo(rte->relid); + distcol_type = get_atttype(rte->relid, + rel_loc_info->partAttrNum); + dist_varnos = bms_make_singleton(varno); + } + else + { + distcol_type = InvalidOid; + dist_varnos = NULL; + } + } + if (exec_nodes && + IsLocatorDistributedByValue(exec_nodes->baselocatortype) && + OidIsValid(distcol_type) && bms_num_members(dist_varnos) > 0 && + exec_nodes->baselocatortype == rel_exec_nodes->baselocatortype) + { + /* + * If the already reduced JOINs is distributed the same way + * as the current relation, check if there exists an + * equi-join condition between the relations and the data type + * of distribution column involved is same for both the + * relations + */ + if (pgxc_qual_has_dist_equijoin(dist_varnos, + bms_make_singleton(varno), + distcol_type, + query->jointree->quals, + query->rtable)) + merge_dist_equijoin = true; + } + + /* Save the current exec_nodes to be freed later */ + tmp_en = exec_nodes; + exec_nodes = pgxc_merge_exec_nodes(exec_nodes, rel_exec_nodes, + merge_dist_equijoin, + false); + /* + * The JOIN is equijoin between distributed tables, and we could + * obtain the nodelist for pushing this JOIN, so add the current + * relation to the list of relations already JOINed in the same + * fashion. + */ + if (exec_nodes && merge_dist_equijoin) + dist_varnos = bms_add_member(dist_varnos, varno); + FreeExecNodes(&tmp_en); + } + break; + + case RTE_JOIN: + /* Is information here useful in some or other way? */ + break; + case RTE_CTE: + case RTE_SUBQUERY: + case RTE_FUNCTION: + case RTE_VALUES: + default: + canShip = false; + } + + if (!canShip || !exec_nodes) + break; + } + + /* + * If we didn't find the Datanodes to ship the query to, we shouldn't ship + * the query :) + */ + if (!exec_nodes || !(exec_nodes->nodeList || exec_nodes->en_expr)) + canShip = false; + + if (canShip) + { + /* + * If relations involved in the query are such that ultimate JOIN is + * replicated JOIN, choose only one of them. If one of them is a + * preferred node choose that one, otherwise choose the first one. + */ + if (IsLocatorReplicated(exec_nodes->baselocatortype) && + exec_nodes->accesstype == RELATION_ACCESS_READ) + { + List *tmp_list = exec_nodes->nodeList; + ListCell *item; + int nodeid = -1; + foreach(item, exec_nodes->nodeList) + { + int cnt_nodes; + for (cnt_nodes = 0; + cnt_nodes < num_preferred_data_nodes && nodeid < 0; + cnt_nodes++) + { + if (PGXCNodeGetNodeId(preferred_data_node[cnt_nodes], + PGXC_NODE_DATANODE) == lfirst_int(item)) + nodeid = lfirst_int(item); + } + if (nodeid >= 0) + break; + } + if (nodeid < 0) + exec_nodes->nodeList = list_make1_int(linitial_int(exec_nodes->nodeList)); + else + exec_nodes->nodeList = list_make1_int(nodeid); + list_free(tmp_list); + } + sc_context->sc_exec_nodes = exec_nodes; + } + else if (exec_nodes) + { + FreeExecNodes(&exec_nodes); + } + return; +} + + +/* + * pgxc_FQS_get_relation_nodes + * Return ExecNodes structure so as to decide which node the query should + * execute on. If it is possible to set the node list directly, set it. + * Otherwise set the appropriate distribution column expression or relid in + * ExecNodes structure. + */ +static ExecNodes * +pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, Index varno, Query *query) +{ + CmdType command_type = query->commandType; + bool for_update = query->rowMarks ? true : false; + ExecNodes *rel_exec_nodes; + RelationAccessType rel_access = RELATION_ACCESS_READ; + RelationLocInfo *rel_loc_info; + + Assert(rte == rt_fetch(varno, (query->rtable))); + + switch (command_type) + { + case CMD_SELECT: + if (for_update) + rel_access = RELATION_ACCESS_READ_FOR_UPDATE; + else + rel_access = RELATION_ACCESS_READ; + break; + + case CMD_UPDATE: + case CMD_DELETE: + rel_access = RELATION_ACCESS_UPDATE; + break; + + case CMD_INSERT: + rel_access = RELATION_ACCESS_INSERT; + break; + + default: + /* should not happen, but */ + elog(ERROR, "Unrecognised command type %d", command_type); + break; + } + + rel_loc_info = GetRelationLocInfo(rte->relid); + /* If we don't know about the distribution of relation, bail out */ + if (!rel_loc_info) + return NULL; + + /* + * Find out the datanodes to execute this query on. + * PGXC_FQS_TODO: for now, we apply node reduction only when there is only + * one relation involved in the query. If there are multiple distributed + * tables in the query and we apply node reduction here, we may fail to ship + * the entire join. We should apply node reduction transitively. + */ + if (list_length(query->rtable) == 1) + rel_exec_nodes = GetRelationNodesByQuals(rte->relid, varno, + query->jointree->quals, rel_access); + else + rel_exec_nodes = GetRelationNodes(rel_loc_info, (Datum) 0, + true, InvalidOid, rel_access); + + if (!rel_exec_nodes) + return NULL; + rel_exec_nodes->accesstype = rel_access; + /* + * If we are reading a replicated table, pick all the nodes where it + * resides. If the query has JOIN, it helps picking up a matching set of + * Datanodes for that JOIN. FQS planner will ultimately pick up one node if + * the JOIN is replicated. + */ + if (rel_access == RELATION_ACCESS_READ && + IsLocatorReplicated(rel_loc_info->locatorType)) + { + list_free(rel_exec_nodes->nodeList); + rel_exec_nodes->nodeList = list_copy(rel_loc_info->nodeList); + } + else if (rel_access == RELATION_ACCESS_INSERT && + IsLocatorDistributedByValue(rel_loc_info->locatorType)) + { + ListCell *lc; + TargetEntry *tle; + /* + * If the INSERT is happening on a table distributed by value of a + * column, find out the + * expression for distribution column in the targetlist, and stick in + * in ExecNodes, and clear the nodelist. Execution will find + * out where to insert the row. + */ + /* It is a partitioned table, get value by looking in targetList */ + foreach(lc, query->targetList) + { + tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; + if (strcmp(tle->resname, rel_loc_info->partAttrName) == 0) + break; + } + /* Not found, bail out */ + if (!lc) + return NULL; + + Assert(tle); + /* We found the TargetEntry for the partition column */ + list_free(rel_exec_nodes->primarynodelist); + rel_exec_nodes->primarynodelist = NULL; + list_free(rel_exec_nodes->nodeList); + rel_exec_nodes->nodeList = NULL; + rel_exec_nodes->en_expr = tle->expr; + rel_exec_nodes->en_relid = rel_loc_info->relid; + } + return rel_exec_nodes; +} + + +/* + * pgxc_shippability_walker + * walks the query/expression tree routed at the node passed in, gathering + * information which will help decide whether the query to which this node + * belongs is shippable to the Datanodes. + * + * The function should try to walk the entire tree analysing each subquery for + * shippability. If a subquery is shippable but not the whole query, we would be + * able to create a RemoteQuery node for that subquery, shipping it to the + * Datanode. + * + * Return value of this function is governed by the same rules as + * expression_tree_walker(), see prologue of that function for details. + */ +static bool +pgxc_shippability_walker(Node *node, Shippability_context *sc_context) +{ + if (node == NULL) + return false; + + /* Below is the list of nodes that can appear in a query, examine each + * kind of node and find out under what conditions query with this node can + * be shippable. For each node, update the context (add fields if + * necessary) so that decision whether to FQS the query or not can be made. + * Every node which has a result is checked to see if the result type of that + * expression is shippable. + */ + switch(nodeTag(node)) + { + /* Constants are always shippable */ + case T_Const: + pgxc_set_exprtype_shippability(exprType(node), sc_context); + break; + + /* + * For placeholder nodes the shippability of the node, depends upon the + * expression which they refer to. It will be checked separately, when + * that expression is encountered. + */ + case T_CaseTestExpr: + pgxc_set_exprtype_shippability(exprType(node), sc_context); + break; + + /* + * record_in() function throws error, thus requesting a result in the + * form of anonymous record from datanode gets into error. Hence, if the + * top expression of a target entry is ROW(), it's not shippable. + */ + case T_TargetEntry: + { + TargetEntry *tle = (TargetEntry *)node; + if (tle->expr) + { + char typtype = get_typtype(exprType((Node *)tle->expr)); + if (!typtype || typtype == TYPTYPE_PSEUDO) + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); + } + } + break; + + case T_SortGroupClause: + if (sc_context->sc_for_expr) + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + break; + + case T_CoerceViaIO: + { + CoerceViaIO *cvio = (CoerceViaIO *)node; + Oid input_type = exprType((Node *)cvio->arg); + Oid output_type = cvio->resulttype; + CoercionContext cc; + + cc = cvio->coerceformat == COERCE_IMPLICIT_CAST ? COERCION_IMPLICIT : + COERCION_EXPLICIT; + /* + * Internally we use IO coercion for types which do not have casting + * defined for them e.g. cstring::date. If such casts are sent to + * the datanode, those won't be accepted. Hence such casts are + * unshippable. Since it will be shown as an explicit cast. + */ + if (!can_coerce_type(1, &input_type, &output_type, cc)) + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + /* + * Nodes, which are shippable if the tree rooted under these nodes is + * shippable + */ + case T_CoerceToDomainValue: + /* + * PGXCTODO: mostly, CoerceToDomainValue node appears in DDLs, + * do we handle DDLs here? + */ + case T_FieldSelect: + case T_NamedArgExpr: + case T_RelabelType: + case T_BoolExpr: + /* + * PGXCTODO: we might need to take into account the kind of boolean + * operator we have in the quals and see if the corresponding + * function is immutable. + */ + case T_ArrayCoerceExpr: + case T_ConvertRowtypeExpr: + case T_CaseExpr: + case T_ArrayExpr: + case T_RowExpr: + case T_CollateExpr: + case T_CoalesceExpr: + case T_XmlExpr: + case T_NullTest: + case T_BooleanTest: + case T_CoerceToDomain: + pgxc_set_exprtype_shippability(exprType(node), sc_context); + break; + + case T_List: + case T_RangeTblRef: + break; + + case T_ArrayRef: + /* + * When multiple values of of an array are updated at once + * FQS planner cannot yet handle SQL representation correctly. + * So disable FQS in this case and let standard planner manage it. + */ + case T_FieldStore: + /* + * PostgreSQL deparsing logic does not handle the FieldStore + * for more than one fields (see processIndirection()). So, let's + * handle it through standard planner, where whole row will be + * constructed. + */ + case T_SetToDefault: + /* + * PGXCTODO: we should actually check whether the default value to + * be substituted is shippable to the Datanode. Some cases like + * nextval() of a sequence can not be shipped to the Datanode, hence + * for now default values can not be shipped to the Datanodes + */ + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + pgxc_set_exprtype_shippability(exprType(node), sc_context); + break; + + case T_Var: + { + Var *var = (Var *)node; + /* + * if a subquery references an upper level variable, that query is + * not shippable, if shipped alone. + */ + if (var->varlevelsup > sc_context->sc_max_varlevelsup) + sc_context->sc_max_varlevelsup = var->varlevelsup; + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_Param: + { + Param *param = (Param *)node; + /* PGXCTODO: Can we handle internally generated parameters? */ + if (param->paramkind != PARAM_EXTERN) + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_CurrentOfExpr: + { + /* + * Ideally we should not see CurrentOf expression here, it + * should have been replaced by the CTID = ? expression. But + * still, no harm in shipping it as is. + */ + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_Aggref: + { + Aggref *aggref = (Aggref *)node; + /* + * An aggregate is completely shippable to the Datanode, if the + * whole group resides on that Datanode. This will be clear when + * we see the GROUP BY clause. + * agglevelsup is minimum of variable's varlevelsup, so we will + * set the sc_max_varlevelsup when we reach the appropriate + * VARs in the tree. + */ + pgxc_set_shippability_reason(sc_context, SS_HAS_AGG_EXPR); + /* + * If a stand-alone expression to be shipped, is an + * 1. aggregate with ORDER BY, DISTINCT directives, it needs all + * the qualifying rows + * 2. aggregate without collection function + * 3. (PGXCTODO:)aggregate with polymorphic transition type, the + * the transition type needs to be resolved to correctly interpret + * the transition results from Datanodes. + * Hence, such an expression can not be shipped to the datanodes. + */ + if (aggref->aggorder || + aggref->aggdistinct || + aggref->agglevelsup || + !aggref->agghas_collectfn || + IsPolymorphicType(aggref->aggtrantype)) + pgxc_set_shippability_reason(sc_context, SS_NEED_SINGLENODE); + + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_FuncExpr: + { + FuncExpr *funcexpr = (FuncExpr *)node; + /* + * PGXC_FQS_TODO: it's too restrictive not to ship non-immutable + * functions to the Datanode. We need a better way to see what + * can be shipped to the Datanode and what can not be. + */ + if (!pgxc_is_func_shippable(funcexpr->funcid)) + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); + + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_OpExpr: + case T_DistinctExpr: /* struct-equivalent to OpExpr */ + case T_NullIfExpr: /* struct-equivalent to OpExpr */ + { + /* + * All of these three are structurally equivalent to OpExpr, so + * cast the node to OpExpr and check if the operator function is + * immutable. See PGXC_FQS_TODO item for FuncExpr. + */ + OpExpr *op_expr = (OpExpr *)node; + Oid opfuncid = OidIsValid(op_expr->opfuncid) ? + op_expr->opfuncid : get_opcode(op_expr->opno); + if (!OidIsValid(opfuncid) || + !pgxc_is_func_shippable(opfuncid)) + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); + + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_ScalarArrayOpExpr: + { + /* + * Check if the operator function is shippable to the Datanode + * PGXC_FQS_TODO: see immutability note for FuncExpr above + */ + ScalarArrayOpExpr *sao_expr = (ScalarArrayOpExpr *)node; + Oid opfuncid = OidIsValid(sao_expr->opfuncid) ? + sao_expr->opfuncid : get_opcode(sao_expr->opno); + if (!OidIsValid(opfuncid) || + !pgxc_is_func_shippable(opfuncid)) + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); + } + break; + + case T_RowCompareExpr: + case T_MinMaxExpr: + { + /* + * PGXCTODO should we be checking the comparision operator + * functions as well, as we did for OpExpr OR that check is + * unnecessary. Operator functions are always shippable? + * Otherwise this node should be treated similar to other + * "shell" nodes. + */ + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_Query: + { + Query *query = (Query *)node; + + /* A stand-alone expression containing Query is not shippable */ + if (sc_context->sc_for_expr) + { + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); + break; + } + /* We are checking shippability of whole query, go ahead */ + + /* CREATE TABLE AS is not supported in FQS */ + if (query->commandType == CMD_UTILITY && + IsA(query->utilityStmt, CreateTableAsStmt)) + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + + if (query->hasRecursive) + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + /* + * If the query needs Coordinator for evaluation or the query can be + * completed on Coordinator itself, we don't ship it to the Datanode + */ + if (pgxc_query_needs_coord(query)) + pgxc_set_shippability_reason(sc_context, SS_NEEDS_COORD); + + /* PGXCTODO: It should be possible to look at the Query and find out + * whether it can be completely evaluated on the Datanode just like SELECT + * queries. But we need to be careful while finding out the Datanodes to + * execute the query on, esp. for the result relations. If one happens to + * remove/change this restriction, make sure you change + * pgxc_FQS_get_relation_nodes appropriately. + * For now DMLs with single rtable entry are candidates for FQS + */ + if (query->commandType != CMD_SELECT && list_length(query->rtable) > 1) + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + + /* + * In following conditions query is shippable when there is only one + * Datanode involved + * 1. the query has aggregagtes + * 2. the query has window functions + * 3. the query has ORDER BY clause + * 4. the query has Distinct clause + * 5. the query has limit and offset clause + * + * PGXC_FQS_TODO: Condition 1 above is really dependent upon the GROUP BY clause. If + * all rows in each group reside on the same Datanode, aggregates can be + * evaluated on that Datanode, thus condition 1 is has aggregates & the rows + * in any group reside on multiple Datanodes. + * PGXC_FQS_TODO: Condition 2 above is really dependent upon whether the distinct + * clause has distribution column in it. If the distinct clause has + * distribution column in it, we can ship DISTINCT clause to the Datanodes. + */ + if (query->hasAggs || query->hasWindowFuncs || query->sortClause || + query->distinctClause || query->groupClause || query->havingQual || + query->limitOffset || query->limitCount) + pgxc_set_shippability_reason(sc_context, SS_NEED_SINGLENODE); + + /* walk the entire query tree to analyse the query */ + if (query_tree_walker(query, pgxc_shippability_walker, sc_context, 0)) + return true; + + /* + * PGXC_FQS_TODO: + * There is a subquery in this query, which references Vars in the upper + * query. For now stop shipping such queries. We should get rid of this + * condition. + */ + if (sc_context->sc_max_varlevelsup != 0) + pgxc_set_shippability_reason(sc_context, SS_VARLEVEL); + + /* + * Walk the RangeTableEntries of the query and find the + * Datanodes needed for evaluating this query + */ + pgxc_FQS_find_datanodes(sc_context); + } + break; + + case T_FromExpr: + { + /* We don't expect FromExpr in a stand-alone expression */ + if (sc_context->sc_for_expr) + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + + /* + * We will be examining the range table entries separately and + * Join expressions are not candidate for FQS. + * If this is an INSERT query with quals, resulting from say + * conditional rule, we can not handle those in FQS, since there is + * not SQL representation for such quals. + */ + if (sc_context->sc_query->commandType == CMD_INSERT && + ((FromExpr *)node)->quals) + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + + } + break; + + case T_WindowFunc: + { + WindowFunc *winf = (WindowFunc *)node; + /* + * A window function can be evaluated on a Datanode if there is + * only one Datanode involved. + */ + pgxc_set_shippability_reason(sc_context, SS_NEED_SINGLENODE); + + /* + * A window function is not shippable as part of a stand-alone + * expression. If the window function is non-immutable, it can not + * be shipped to the datanodes. + */ + if (sc_context->sc_for_expr || + !pgxc_is_func_shippable(winf->winfnoid)) + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); + + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_WindowClause: + { + /* + * A window function can be evaluated on a Datanode if there is + * only one Datanode involved. + */ + pgxc_set_shippability_reason(sc_context, SS_NEED_SINGLENODE); + + /* + * A window function is not shippable as part of a stand-alone + * expression + */ + if (sc_context->sc_for_expr) + pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); + } + break; + + case T_JoinExpr: + /* We don't expect JoinExpr in a stand-alone expression */ + if (sc_context->sc_for_expr) + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + + /* + * For JoinExpr in a Query + * The compatibility of joining ranges will be deduced while + * examining the range table of the query. Nothing to do here + */ + break; + + case T_SubLink: + { + SubLink *sublink = (SubLink *)node; + ExecNodes *sublink_en; + /* + * Walk the query and find the nodes where the query should be + * executed and node distribution. Merge this with the existing + * node list obtained for other subqueries. If merging fails, we + * can not ship the whole query. + */ + if (IsA(sublink->subselect, Query)) + sublink_en = pgxc_is_query_shippable((Query *)(sublink->subselect), + sc_context->sc_query_level); + else + sublink_en = NULL; + + /* PGXCTODO free the old sc_subquery_en. */ + /* If we already know that this query does not have a set of nodes + * to evaluate on, don't bother to merge again. + */ + if (!pgxc_test_shippability_reason(sc_context, SS_NO_NODES)) + { + sc_context->sc_subquery_en = pgxc_merge_exec_nodes(sublink_en, + sc_context->sc_subquery_en, + false, + true); + if (!sc_context->sc_subquery_en) + pgxc_set_shippability_reason(sc_context, SS_NO_NODES); + } + + pgxc_set_exprtype_shippability(exprType(node), sc_context); + } + break; + + case T_SubPlan: + case T_AlternativeSubPlan: + case T_CommonTableExpr: + case T_SetOperationStmt: + case T_PlaceHolderVar: + case T_AppendRelInfo: + case T_PlaceHolderInfo: + { + /* PGXCTODO: till we exhaust this list */ + pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); + } + break; + + default: + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(node)); + break; + } + + return expression_tree_walker(node, pgxc_shippability_walker, (void *)sc_context); +} + + +/* + * pgxc_query_needs_coord + * Check if the query needs Coordinator for evaluation or it can be completely + * evaluated on Coordinator. Return true if so, otherwise return false. + */ +static bool +pgxc_query_needs_coord(Query *query) +{ + /* + * If the query is an EXEC DIRECT on the same Coordinator where it's fired, + * it should not be shipped + */ + if (query->is_local) + return true; + /* + * If the query involves just the catalog tables, and is not an EXEC DIRECT + * statement, it can be evaluated completely on the Coordinator. No need to + * involve Datanodes. + */ + if (pgxc_query_contains_only_pg_catalog(query->rtable)) + return true; + + /* Allow for override */ + if (query->commandType != CMD_SELECT && + query->commandType != CMD_INSERT && + query->commandType != CMD_UPDATE && + query->commandType != CMD_DELETE) + { + if (StrictStatementChecking) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("This command is not yet supported.")))); + + return true; + } + + return false; +} + + +/* + * pgxc_is_var_distrib_column + * Check if given var is a distribution key. + */ +static +bool pgxc_is_var_distrib_column(Var *var, List *rtable) +{ + RangeTblEntry *rte = rt_fetch(var->varno, rtable); + RelationLocInfo *rel_loc_info; + + /* distribution column only applies to the relations */ + if (rte->rtekind != RTE_RELATION || + rte->relkind != RELKIND_RELATION) + return false; + rel_loc_info = GetRelationLocInfo(rte->relid); + if (!rel_loc_info) + return false; + if (var->varattno == rel_loc_info->partAttrNum) + return true; + return false; +} + + +/* + * Returns whether or not the rtable (and its subqueries) + * only contain pg_catalog entries. + */ +static bool +pgxc_query_contains_only_pg_catalog(List *rtable) +{ + ListCell *item; + + /* May be complicated. Before giving up, just check for pg_catalog usage */ + foreach(item, rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(item); + + if (rte->rtekind == RTE_RELATION) + { + if (get_rel_namespace(rte->relid) != PG_CATALOG_NAMESPACE) + return false; + } + else if (rte->rtekind == RTE_SUBQUERY && + !pgxc_query_contains_only_pg_catalog(rte->subquery->rtable)) + return false; + } + return true; +} + + +/* + * pgxc_is_query_shippable + * This function calls the query walker to analyse the query to gather + * information like Constraints under which the query can be shippable, nodes + * on which the query is going to be executed etc. + * Based on the information gathered, it decides whether the query can be + * executed on Datanodes directly without involving Coordinator. + * If the query is shippable this routine also returns the nodes where the query + * should be shipped. If the query is not shippable, it returns NULL. + */ +ExecNodes * +pgxc_is_query_shippable(Query *query, int query_level) +{ + Shippability_context sc_context; + ExecNodes *exec_nodes; + bool canShip = true; + Bitmapset *shippability; + + memset(&sc_context, 0, sizeof(sc_context)); + /* let's assume that by default query is shippable */ + sc_context.sc_query = query; + sc_context.sc_query_level = query_level; + sc_context.sc_for_expr = false; + + /* + * We might have already decided not to ship the query to the Datanodes, but + * still walk it anyway to find out if there are any subqueries which can be + * shipped. + */ + pgxc_shippability_walker((Node *)query, &sc_context); + /* + * We have merged the nodelists and distributions of all subqueries seen in + * the query tree, merge it with the same obtained for the relations + * involved in the query. + * PGXC_FQS_TODO: + * Merge the subquery ExecNodes if both of them are replicated. + * The logic to merge node lists with other distribution + * strategy is not clear yet. + */ + exec_nodes = sc_context.sc_exec_nodes; + if (exec_nodes) + exec_nodes = pgxc_merge_exec_nodes(exec_nodes, + sc_context.sc_subquery_en, false, + true); + + /* + * Look at the information gathered by the walker in Shippability_context and that + * in the Query structure to decide whether we should ship this query + * directly to the Datanode or not + */ + + /* + * If the planner was not able to find the Datanodes to the execute the + * query, the query is not completely shippable. So, return NULL + */ + if (!exec_nodes) + return NULL; + + /* Copy the shippability reasons. We modify the copy for easier handling. + * The original can be saved away */ + shippability = bms_copy(sc_context.sc_shippability); + + /* + * If the query has an expression which renders the shippability to single + * node, and query needs to be shipped to more than one node, it can not be + * shipped + */ + if (bms_is_member(SS_NEED_SINGLENODE, shippability)) + { + /* We handled the reason here, reset it */ + shippability = bms_del_member(shippability, SS_NEED_SINGLENODE); + /* if nodeList has no nodes, it ExecNodes will have other means to know + * the nodes where to execute like distribution column expression. We + * can't tell how many nodes the query will be executed on, hence treat + * that as multiple nodes. + */ + if (list_length(exec_nodes->nodeList) != 1) + canShip = false; + } + /* We have delt with aggregates as well, delete the Has aggregates status */ + shippability = bms_del_member(shippability, SS_HAS_AGG_EXPR); + + /* Can not ship the query for some reason */ + if (!bms_is_empty(shippability)) + canShip = false; + + /* Always keep this at the end before checking canShip and return */ + if (!canShip && exec_nodes) + FreeExecNodes(&exec_nodes); + /* If query is to be shipped, we should know where to execute the query */ + Assert (!canShip || exec_nodes); + + bms_free(shippability); + shippability = NULL; + + return exec_nodes; +} + + +/* + * pgxc_is_expr_shippable + * Check whether the given expression can be shipped to remote nodes. + * This can be used as an entry point to check the shippability of + * an expression. + */ +bool +pgxc_is_expr_shippable(Expr *node, bool *has_aggs) +{ + Shippability_context sc_context; + + /* Create the FQS context */ + memset(&sc_context, 0, sizeof(sc_context)); + sc_context.sc_query = NULL; + sc_context.sc_query_level = 0; + sc_context.sc_for_expr = true; + + /* Walk the expression to check its shippability */ + pgxc_shippability_walker((Node *)node, &sc_context); + + /* + * If caller is interested in knowing, whether the expression has aggregates + * let the caller know about it. The caller is capable of handling such + * expressions. Otherwise assume such an expression as not shippable. + */ + if (has_aggs) + *has_aggs = pgxc_test_shippability_reason(&sc_context, SS_HAS_AGG_EXPR); + else if (pgxc_test_shippability_reason(&sc_context, SS_HAS_AGG_EXPR)) + return false; + /* Done with aggregate expression shippability. Delete the status */ + pgxc_reset_shippability_reason(&sc_context, SS_HAS_AGG_EXPR); + + /* If there are reasons why the expression is unshippable, return false */ + if (!bms_is_empty(sc_context.sc_shippability)) + return false; + + /* If nothing wrong found, the expression is shippable */ + return true; +} + + +/* + * pgxc_is_func_shippable + * Determine if a function is shippable + */ +bool +pgxc_is_func_shippable(Oid funcid) +{ + /* + * For the time being a function is thought as shippable + * only if it is immutable. + */ + return func_volatile(funcid) == PROVOLATILE_IMMUTABLE; +} + + +/* + * pgxc_qual_has_dist_equijoin + * Check equijoin conditions on given relations + */ +bool +pgxc_qual_has_dist_equijoin(Relids varnos_1, + Relids varnos_2, Oid distcol_type, Node *quals, List *rtable) +{ + List *lquals; + ListCell *qcell; + + /* If no quals, no equijoin */ + if (!quals) + return false; + /* + * Make a copy of the argument bitmaps, it will be modified by + * bms_first_member(). + */ + varnos_1 = bms_copy(varnos_1); + varnos_2 = bms_copy(varnos_2); + + if (!IsA(quals, List)) + lquals = make_ands_implicit((Expr *)quals); + else + lquals = (List *)quals; + + foreach(qcell, lquals) + { + Expr *qual_expr = (Expr *)lfirst(qcell); + OpExpr *op; + Var *lvar; + Var *rvar; + + if (!IsA(qual_expr, OpExpr)) + continue; + op = (OpExpr *)qual_expr; + /* If not a binary operator, it can not be '='. */ + if (list_length(op->args) != 2) + continue; + + /* + * Check if both operands are Vars, if not check next expression */ + if (IsA(linitial(op->args), Var) && IsA(lsecond(op->args), Var)) + { + lvar = (Var *)linitial(op->args); + rvar = (Var *)lsecond(op->args); + } + else + continue; + + /* + * If the data types of both the columns are not same, continue. Hash + * and Modulo of a the same bytes will be same if the data types are + * same. So, only when the data types of the columns are same, we can + * ship a distributed JOIN to the Datanodes + */ + if (exprType((Node *)lvar) != exprType((Node *)rvar)) + continue; + + /* if the vars do not correspond to the required varnos, continue. */ + if ((bms_is_member(lvar->varno, varnos_1) && bms_is_member(rvar->varno, varnos_2)) || + (bms_is_member(lvar->varno, varnos_2) && bms_is_member(rvar->varno, varnos_1))) + { + if (!pgxc_is_var_distrib_column(lvar, rtable) || + !pgxc_is_var_distrib_column(rvar, rtable)) + continue; + } + else + continue; + /* + * If the operator is not an assignment operator, check next + * constraint. An operator is an assignment operator if it's + * mergejoinable or hashjoinable. Beware that not every assignment + * operator is mergejoinable or hashjoinable, so we might leave some + * oportunity. But then we have to rely on the opname which may not + * be something we know to be equality operator as well. + */ + if (!op_mergejoinable(op->opno, exprType((Node *)lvar)) && + !op_hashjoinable(op->opno, exprType((Node *)lvar))) + continue; + /* Found equi-join condition on distribution columns */ + return true; + } + return false; +} + + +/* + * pgxc_merge_exec_nodes + * The routine combines the two exec_nodes passed such that the resultant + * exec_node corresponds to the JOIN of respective relations. + * If both exec_nodes can not be merged, it returns NULL. + */ +ExecNodes * +pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2, bool merge_dist_equijoin, + bool merge_replicated_only) +{ + ExecNodes *merged_en = makeNode(ExecNodes); + ExecNodes *tmp_en; + + /* If either of exec_nodes are NULL, return the copy of other one */ + if (!en1) + { + tmp_en = copyObject(en2); + return tmp_en; + } + if (!en2) + { + tmp_en = copyObject(en1); + return tmp_en; + } + + /* Following cases are not handled in this routine */ + /* PGXC_FQS_TODO how should we handle table usage type? */ + if (en1->primarynodelist || en2->primarynodelist || + en1->en_expr || en2->en_expr || + OidIsValid(en1->en_relid) || OidIsValid(en2->en_relid) || + en1->accesstype != RELATION_ACCESS_READ || en2->accesstype != RELATION_ACCESS_READ) + return NULL; + + if (IsLocatorReplicated(en1->baselocatortype) && + IsLocatorReplicated(en2->baselocatortype)) + { + /* + * Replicated/replicated join case + * Check that replicated relation is not disjoint + * with initial relation which is also replicated. + * If there is a common portion of the node list between + * the two relations, other rtables have to be checked on + * this restricted list. + */ + merged_en->nodeList = list_intersection_int(en1->nodeList, + en2->nodeList); + merged_en->baselocatortype = LOCATOR_TYPE_REPLICATED; + /* No intersection, so has to go though standard planner... */ + if (!merged_en->nodeList) + FreeExecNodes(&merged_en); + return merged_en; + } + + /* + * We are told to merge the nodelists if both the distributions are + * replicated. We checked that above, so bail out + */ + if (merge_replicated_only) + { + FreeExecNodes(&merged_en); + return merged_en; + } + + if (IsLocatorReplicated(en1->baselocatortype) && + IsLocatorColumnDistributed(en2->baselocatortype)) + { + List *diff_nodelist = NULL; + /* + * Replicated/distributed join case. + * Node list of distributed table has to be included + * in node list of replicated table. + */ + diff_nodelist = list_difference_int(en2->nodeList, en1->nodeList); + /* + * If the difference list is not empty, this means that node list of + * distributed table is not completely mapped by node list of replicated + * table, so go through standard planner. + */ + if (diff_nodelist) + FreeExecNodes(&merged_en); + else + { + merged_en->nodeList = list_copy(en2->nodeList); + merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED; + } + return merged_en; + } + + if (IsLocatorColumnDistributed(en1->baselocatortype) && + IsLocatorReplicated(en2->baselocatortype)) + { + List *diff_nodelist = NULL; + /* + * Distributed/replicated join case. + * Node list of distributed table has to be included + * in node list of replicated table. + */ + diff_nodelist = list_difference_int(en1->nodeList, en2->nodeList); + + /* + * If the difference list is not empty, this means that node list of + * distributed table is not completely mapped by node list of replicated + * table, so go through standard planner. + */ + if (diff_nodelist) + FreeExecNodes(&merged_en); + else + { + merged_en->nodeList = list_copy(en1->nodeList); + merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED; + } + return merged_en; + } + + if (IsLocatorColumnDistributed(en1->baselocatortype) && + IsLocatorColumnDistributed(en2->baselocatortype)) + { + /* + * Distributed/distributed case + * If the caller has suggested that this is an equi-join between two + * distributed results, check if both are distributed by the same + * distribution strategy, and have the same nodes in the distribution + * node list. The caller should have made sure that distribution column + * type is same. + */ + if (merge_dist_equijoin && + en1->baselocatortype == en2->baselocatortype && + !list_difference_int(en1->nodeList, en2->nodeList) && + !list_difference_int(en2->nodeList, en1->nodeList)) + { + merged_en->nodeList = list_copy(en1->nodeList); + merged_en->baselocatortype = en1->baselocatortype; + } + else if (list_length(en1->nodeList) == 1 && list_length(en2->nodeList) == 1) + { + merged_en->nodeList = list_intersection_int(en1->nodeList, + en2->nodeList); + merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED; + } + else + FreeExecNodes(&merged_en); + return merged_en; + } + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Postgres-XC does not support this distribution type yet"), + errdetail("The feature is not currently supported"))); + + /* Keep compiler happy */ + return NULL; +} diff --git a/src/backend/pgxc/copy/remotecopy.c b/src/backend/pgxc/copy/remotecopy.c index fb15be662d..9c8c3b0d82 100644 --- a/src/backend/pgxc/copy/remotecopy.c +++ b/src/backend/pgxc/copy/remotecopy.c @@ -16,9 +16,9 @@ #include "postgres.h" #include "miscadmin.h" #include "lib/stringinfo.h" +#include "optimizer/pgxcship.h" #include "optimizer/planner.h" #include "pgxc/pgxcnode.h" -#include "pgxc/postgresql_fdw.h" #include "pgxc/remotecopy.h" #include "rewrite/rewriteHandler.h" #include "utils/builtins.h" diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index d59f314613..2d6f968603 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -1,15 +1,14 @@ /*------------------------------------------------------------------------- * * planner.c - * - * Functions for generating a PGXC style plan. + * Functions for generating a Postgres-XC plan. * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 2010-2012 Postgres-XC Development Group * * * IDENTIFICATION - * $$ + * src/backend/pgxc/plan/planner.c * *------------------------------------------------------------------------- */ @@ -31,6 +30,7 @@ #include "nodes/nodes.h" #include "nodes/parsenodes.h" #include "optimizer/clauses.h" +#include "optimizer/pgxcship.h" #include "optimizer/planmain.h" #include "optimizer/planner.h" #include "optimizer/tlist.h" @@ -46,7 +46,6 @@ #include "pgxc/locator.h" #include "pgxc/nodemgr.h" #include "pgxc/planner.h" -#include "pgxc/postgresql_fdw.h" #include "tcop/pquery.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -61,33 +60,22 @@ #include "utils/timestamp.h" #include "utils/date.h" -/* Forbid unsafe SQL statements */ -bool StrictStatementChecking = true; /* fast query shipping is enabled by default */ bool enable_fast_query_shipping = true; static RemoteQuery *makeRemoteQuery(void); static void validate_part_col_updatable(const Query *query); static bool contains_temp_tables(List *rtable); -static bool contains_only_pg_catalog(List *rtable); static void pgxc_handle_unsupported_stmts(Query *query); static PlannedStmt *pgxc_FQS_planner(Query *query, int cursorOptions, ParamListInfo boundParams); -static bool pgxc_query_needs_coord(Query *query); -static ExecNodes *pgxc_is_query_shippable(Query *query, int query_level); -static void pgxc_FQS_find_datanodes(Shippability_context *sc_context); static PlannedStmt *pgxc_handle_exec_direct(Query *query, int cursorOptions, ParamListInfo boundParams); static RemoteQuery *pgxc_FQS_create_remote_plan(Query *query, ExecNodes *exec_nodes, bool is_exec_direct); static void pgxc_set_remote_parameters(PlannedStmt *plan, ParamListInfo boundParams); -static ExecNodes *pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, Index varno, - Query *query); -static bool VarAttrIsPartAttr(Var *var, List *rtable); -static void pgxc_set_shippability_reason(Shippability_context *context, - ShippabilityStat reason); -static void pgxc_set_exprtype_shippability(Oid exprtype, Shippability_context *sc_context); + /* * make_ctid_col_ref @@ -169,32 +157,6 @@ make_ctid_col_ref(Query *qry) return makeVar(firstTableRTENumber, attnum, vartypeid, type_mod, varcollid, 0); } -/* - * Returns whether or not the rtable (and its subqueries) - * only contain pg_catalog entries. - */ -static bool -contains_only_pg_catalog(List *rtable) -{ - ListCell *item; - - /* May be complicated. Before giving up, just check for pg_catalog usage */ - foreach(item, rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(item); - - if (rte->rtekind == RTE_RELATION) - { - if (get_rel_namespace(rte->relid) != PG_CATALOG_NAMESPACE) - return false; - } - else if (rte->rtekind == RTE_SUBQUERY && - !contains_only_pg_catalog(rte->subquery->rtable)) - return false; - } - return true; -} - /* * Returns true if at least one temporary table is in use @@ -652,7 +614,7 @@ pgxc_FQS_create_remote_plan(Query *query, ExecNodes *exec_nodes, bool is_exec_di query->qry_finalise_aggs = true; /* Deparse query tree to get step query. */ - if ( query_step->sql_statement == NULL ) + if (query_step->sql_statement == NULL) { initStringInfo(&buf); deparse_query(query, &buf, NIL); @@ -728,1227 +690,6 @@ pgxc_FQS_create_remote_plan(Query *query, ExecNodes *exec_nodes, bool is_exec_di return query_step; } -/* - * pgxc_query_needs_coord - * Check if the query needs Coordinator for evaluation or it can be completely - * evaluated on Coordinator. Return true if so, otherwise return false. - */ -static bool -pgxc_query_needs_coord(Query *query) -{ - /* - * If the query is an EXEC DIRECT on the same Coordinator where it's fired, - * it should not be shipped - */ - if (query->is_local) - return true; - /* - * If the query involves just the catalog tables, and is not an EXEC DIRECT - * statement, it can be evaluated completely on the Coordinator. No need to - * involve Datanodes. - */ - if (contains_only_pg_catalog(query->rtable)) - return true; - - - /* Allow for override */ - if (query->commandType != CMD_SELECT && - query->commandType != CMD_INSERT && - query->commandType != CMD_UPDATE && - query->commandType != CMD_DELETE) - { - if (StrictStatementChecking) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("This command is not yet supported.")))); - - return true; - } - - return false; -} - -/* - * Set the given reason in Shippability_context indicating why the query can not be - * shipped directly to the Datanodes. - */ -static void -pgxc_set_shippability_reason(Shippability_context *context, ShippabilityStat reason) -{ - context->sc_shippability = bms_add_member(context->sc_shippability, reason); -} - -/* - * See if a given reason is why the query can not be shipped directly - * to the Datanodes. - */ -bool -pgxc_test_shippability_reason(Shippability_context *context, ShippabilityStat reason) -{ - return bms_is_member(reason, context->sc_shippability); -} - -void -pgxc_reset_shippability_reason(Shippability_context *context, ShippabilityStat reason) -{ - context->sc_shippability = bms_del_member(context->sc_shippability, reason); - return; -} - -/* - * pgxc_is_query_shippable - * This function calls the query walker to analyse the query to gather - * information like Constraints under which the query can be shippable, nodes - * on which the query is going to be executed etc. - * Based on the information gathered, it decides whether the query can be - * executed on Datanodes directly without involving Coordinator. - * If the query is shippable this routine also returns the nodes where the query - * should be shipped. If the query is not shippable, it returns NULL. - */ -static ExecNodes * -pgxc_is_query_shippable(Query *query, int query_level) -{ - Shippability_context sc_context; - ExecNodes *exec_nodes; - bool canShip = true; - Bitmapset *shippability; - - memset(&sc_context, 0, sizeof(sc_context)); - /* let's assume that by default query is shippable */ - sc_context.sc_query = query; - sc_context.sc_query_level = query_level; - sc_context.sc_for_expr = false; - - /* - * We might have already decided not to ship the query to the Datanodes, but - * still walk it anyway to find out if there are any subqueries which can be - * shipped. - */ - pgxc_shippability_walker((Node *)query, &sc_context); - /* - * We have merged the nodelists and distributions of all subqueries seen in - * the query tree, merge it with the same obtained for the relations - * involved in the query. - * PGXC_FQS_TODO: - * Merge the subquery ExecNodes if both of them are replicated. - * The logic to merge node lists with other distribution - * strategy is not clear yet. - */ - exec_nodes = sc_context.sc_exec_nodes; - if (exec_nodes) - exec_nodes = pgxc_merge_exec_nodes(exec_nodes, - sc_context.sc_subquery_en, false, - true); - - /* - * Look at the information gathered by the walker in Shippability_context and that - * in the Query structure to decide whether we should ship this query - * directly to the Datanode or not - */ - - /* - * If the planner was not able to find the Datanodes to the execute the - * query, the query is not completely shippable. So, return NULL - */ - if (!exec_nodes) - return NULL; - - /* Copy the shippability reasons. We modify the copy for easier handling. - * The original can be saved away */ - shippability = bms_copy(sc_context.sc_shippability); - - /* - * If the query has an expression which renders the shippability to single - * node, and query needs to be shipped to more than one node, it can not be - * shipped - */ - if (bms_is_member(SS_NEED_SINGLENODE, shippability)) - { - /* We handled the reason here, reset it */ - shippability = bms_del_member(shippability, SS_NEED_SINGLENODE); - /* if nodeList has no nodes, it ExecNodes will have other means to know - * the nodes where to execute like distribution column expression. We - * can't tell how many nodes the query will be executed on, hence treat - * that as multiple nodes. - */ - if (list_length(exec_nodes->nodeList) != 1) - canShip = false; - } - /* We have delt with aggregates as well, delete the Has aggregates status */ - shippability = bms_del_member(shippability, SS_HAS_AGG_EXPR); - - /* Can not ship the query for some reason */ - if (!bms_is_empty(shippability)) - canShip = false; - - /* Always keep this at the end before checking canShip and return */ - if (!canShip && exec_nodes) - FreeExecNodes(&exec_nodes); - /* If query is to be shipped, we should know where to execute the query */ - Assert (!canShip || exec_nodes); - - bms_free(shippability); - shippability = NULL; - - return exec_nodes; -} - -/* - * pgxc_merge_exec_nodes - * The routine combines the two exec_nodes passed such that the resultant - * exec_node corresponds to the JOIN of respective relations. - * If both exec_nodes can not be merged, it returns NULL. - */ -ExecNodes * -pgxc_merge_exec_nodes(ExecNodes *en1, ExecNodes *en2, bool merge_dist_equijoin, - bool merge_replicated_only) -{ - ExecNodes *merged_en = makeNode(ExecNodes); - ExecNodes *tmp_en; - - /* If either of exec_nodes are NULL, return the copy of other one */ - if (!en1) - { - tmp_en = copyObject(en2); - return tmp_en; - } - if (!en2) - { - tmp_en = copyObject(en1); - return tmp_en; - } - - /* Following cases are not handled in this routine */ - /* PGXC_FQS_TODO how should we handle table usage type? */ - if (en1->primarynodelist || en2->primarynodelist || - en1->en_expr || en2->en_expr || - OidIsValid(en1->en_relid) || OidIsValid(en2->en_relid) || - en1->accesstype != RELATION_ACCESS_READ || en2->accesstype != RELATION_ACCESS_READ) - return NULL; - - if (IsLocatorReplicated(en1->baselocatortype) && - IsLocatorReplicated(en2->baselocatortype)) - { - /* - * Replicated/replicated join case - * Check that replicated relation is not disjoint - * with initial relation which is also replicated. - * If there is a common portion of the node list between - * the two relations, other rtables have to be checked on - * this restricted list. - */ - merged_en->nodeList = list_intersection_int(en1->nodeList, - en2->nodeList); - merged_en->baselocatortype = LOCATOR_TYPE_REPLICATED; - /* No intersection, so has to go though standard planner... */ - if (!merged_en->nodeList) - FreeExecNodes(&merged_en); - return merged_en; - } - - /* - * We are told to merge the nodelists if both the distributions are - * replicated. We checked that above, so bail out - */ - if (merge_replicated_only) - { - FreeExecNodes(&merged_en); - return merged_en; - } - - if (IsLocatorReplicated(en1->baselocatortype) && - IsLocatorColumnDistributed(en2->baselocatortype)) - { - List *diff_nodelist = NULL; - /* - * Replicated/distributed join case. - * Node list of distributed table has to be included - * in node list of replicated table. - */ - diff_nodelist = list_difference_int(en2->nodeList, en1->nodeList); - /* - * If the difference list is not empty, this means that node list of - * distributed table is not completely mapped by node list of replicated - * table, so go through standard planner. - */ - if (diff_nodelist) - FreeExecNodes(&merged_en); - else - { - merged_en->nodeList = list_copy(en2->nodeList); - merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED; - } - return merged_en; - } - - if (IsLocatorColumnDistributed(en1->baselocatortype) && - IsLocatorReplicated(en2->baselocatortype)) - { - List *diff_nodelist = NULL; - /* - * Distributed/replicated join case. - * Node list of distributed table has to be included - * in node list of replicated table. - */ - diff_nodelist = list_difference_int(en1->nodeList, en2->nodeList); - - /* - * If the difference list is not empty, this means that node list of - * distributed table is not completely mapped by node list of replicated - * table, so go through standard planner. - */ - if (diff_nodelist) - FreeExecNodes(&merged_en); - else - { - merged_en->nodeList = list_copy(en1->nodeList); - merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED; - } - return merged_en; - } - - if (IsLocatorColumnDistributed(en1->baselocatortype) && - IsLocatorColumnDistributed(en2->baselocatortype)) - { - /* - * Distributed/distributed case - * If the caller has suggested that this is an equi-join between two - * distributed results, check if both are distributed by the same - * distribution strategy, and have the same nodes in the distribution - * node list. The caller should have made sure that distribution column - * type is same. - */ - if (merge_dist_equijoin && - en1->baselocatortype == en2->baselocatortype && - !list_difference_int(en1->nodeList, en2->nodeList) && - !list_difference_int(en2->nodeList, en1->nodeList)) - { - merged_en->nodeList = list_copy(en1->nodeList); - merged_en->baselocatortype = en1->baselocatortype; - } - else if (list_length(en1->nodeList) == 1 && list_length(en2->nodeList) == 1) - { - merged_en->nodeList = list_intersection_int(en1->nodeList, - en2->nodeList); - merged_en->baselocatortype = LOCATOR_TYPE_DISTRIBUTED; - } - else - FreeExecNodes(&merged_en); - return merged_en; - } - - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Postgres-XC does not support this distribution type yet"), - errdetail("The feature is not currently supported"))); - - /* Keep compiler happy */ - return NULL; -} - -static void -pgxc_FQS_find_datanodes(Shippability_context *sc_context) -{ - Query *query = sc_context->sc_query; - ListCell *rt; - ExecNodes *exec_nodes = NULL; - bool canShip = true; - Index varno = 0; - - /* No query, no nodes to execute! */ - if (!query) - { - sc_context->sc_exec_nodes = NULL; - return; - } - - /* - * For every range table entry, - * 1. Find out the Datanodes needed for that range table - * 2. Merge these Datanodes with the already available Datanodes - * 3. If the merge is unsuccessful, we can not ship this query directly to - * the Datanode/s - */ - foreach(rt, query->rtable) - { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt); - Oid distcol_type; /* TODO mostly this is not needed */ - Relids dist_varnos; - - varno++; - switch (rte->rtekind) - { - case RTE_RELATION: - { - ExecNodes *rel_exec_nodes; - ExecNodes *tmp_en; - bool merge_dist_equijoin = false; - /* - * In case of inheritance, child tables can have completely different - * Datanode distribution than parent. To handle inheritance we need - * to merge the Datanodes of the children table as well. The inheritance - * is resolved during planning(?), so we may not have the RTEs of the - * children here. Also, the exact method of merging Datanodes of the - * children is not known yet. So, when inheritance is requested, query - * can not be shipped. - */ - if (rte->inh) - { - /* - * See prologue of has_subclass, we might miss on the - * optimization because has_subclass can return true - * even if there aren't any subclasses, but it's ok - */ - if (has_subclass(rte->relid)) - { - canShip = false; - break; - } - } - - if (rte->relkind != RELKIND_RELATION) - { - canShip = false; - break; - } - rel_exec_nodes = pgxc_FQS_get_relation_nodes(rte,varno, query); - if (!rel_exec_nodes) - { - /* - * No information about the location of relation in XC, - * a local table OR system catalog. The query can not be - * pushed. - */ - canShip = false; - break; - } - if (varno == 1) - { - if (IsLocatorColumnDistributed(rel_exec_nodes->baselocatortype)) - { - RelationLocInfo *rel_loc_info = GetRelationLocInfo(rte->relid); - distcol_type = get_atttype(rte->relid, - rel_loc_info->partAttrNum); - dist_varnos = bms_make_singleton(varno); - } - else - { - distcol_type = InvalidOid; - dist_varnos = NULL; - } - } - if (exec_nodes && - IsLocatorDistributedByValue(exec_nodes->baselocatortype) && - OidIsValid(distcol_type) && bms_num_members(dist_varnos) > 0 && - exec_nodes->baselocatortype == rel_exec_nodes->baselocatortype) - { - /* - * If the already reduced JOINs is distributed the same way - * as the current relation, check if there exists an - * equi-join condition between the relations and the data type - * of distribution column involved is same for both the - * relations - */ - if (pgxc_qual_hash_dist_equijoin(dist_varnos, - bms_make_singleton(varno), - distcol_type, - query->jointree->quals, - query->rtable)) - merge_dist_equijoin = true; - } - - /* Save the current exec_nodes to be freed later */ - tmp_en = exec_nodes; - exec_nodes = pgxc_merge_exec_nodes(exec_nodes, rel_exec_nodes, - merge_dist_equijoin, - false); - /* - * The JOIN is equijoin between distributed tables, and we could - * obtain the nodelist for pushing this JOIN, so add the current - * relation to the list of relations already JOINed in the same - * fashion. - */ - if (exec_nodes && merge_dist_equijoin) - dist_varnos = bms_add_member(dist_varnos, varno); - FreeExecNodes(&tmp_en); - } - break; - - case RTE_JOIN: - /* Is information here useful in some or other way? */ - break; - case RTE_CTE: - case RTE_SUBQUERY: - case RTE_FUNCTION: - case RTE_VALUES: - default: - canShip = false; - } - - if (!canShip || !exec_nodes) - break; - } - - /* - * If we didn't find the Datanodes to ship the query to, we shouldn't ship - * the query :) - */ - if (!exec_nodes || !(exec_nodes->nodeList || exec_nodes->en_expr)) - canShip = false; - - if (canShip) - { - /* - * If relations involved in the query are such that ultimate JOIN is - * replicated JOIN, choose only one of them. If one of them is a - * preferred node choose that one, otherwise choose the first one. - */ - if (IsLocatorReplicated(exec_nodes->baselocatortype) && - exec_nodes->accesstype == RELATION_ACCESS_READ) - { - List *tmp_list = exec_nodes->nodeList; - ListCell *item; - int nodeid = -1; - foreach(item, exec_nodes->nodeList) - { - int cnt_nodes; - for (cnt_nodes = 0; - cnt_nodes < num_preferred_data_nodes && nodeid < 0; - cnt_nodes++) - { - if (PGXCNodeGetNodeId(preferred_data_node[cnt_nodes], - PGXC_NODE_DATANODE) == lfirst_int(item)) - nodeid = lfirst_int(item); - } - if (nodeid >= 0) - break; - } - if (nodeid < 0) - exec_nodes->nodeList = list_make1_int(linitial_int(exec_nodes->nodeList)); - else - exec_nodes->nodeList = list_make1_int(nodeid); - list_free(tmp_list); - } - sc_context->sc_exec_nodes = exec_nodes; - } - else if (exec_nodes) - { - FreeExecNodes(&exec_nodes); - } - return; -} - -bool -pgxc_qual_hash_dist_equijoin(Relids varnos_1, Relids varnos_2, Oid distcol_type, - Node *quals, List *rtable) -{ - List *lquals; - ListCell *qcell; - - /* If no quals, no equijoin */ - if (!quals) - return false; - /* - * Make a copy of the argument bitmaps, it will be modified by - * bms_first_member(). - */ - varnos_1 = bms_copy(varnos_1); - varnos_2 = bms_copy(varnos_2); - - if (!IsA(quals, List)) - lquals = make_ands_implicit((Expr *)quals); - else - lquals = (List *)quals; - - foreach(qcell, lquals) - { - Expr *qual_expr = (Expr *)lfirst(qcell); - OpExpr *op; - Var *lvar; - Var *rvar; - - if (!IsA(qual_expr, OpExpr)) - continue; - op = (OpExpr *)qual_expr; - /* If not a binary operator, it can not be '='. */ - if (list_length(op->args) != 2) - continue; - - /* - * Check if both operands are Vars, if not check next expression */ - if (IsA(linitial(op->args), Var) && IsA(lsecond(op->args), Var)) - { - lvar = (Var *)linitial(op->args); - rvar = (Var *)lsecond(op->args); - } - else - continue; - - /* - * If the data types of both the columns are not same, continue. Hash - * and Modulo of a the same bytes will be same if the data types are - * same. So, only when the data types of the columns are same, we can - * ship a distributed JOIN to the Datanodes - */ - if (exprType((Node *)lvar) != exprType((Node *)rvar)) - continue; - - /* if the vars do not correspond to the required varnos, continue. */ - if ((bms_is_member(lvar->varno, varnos_1) && bms_is_member(rvar->varno, varnos_2)) || - (bms_is_member(lvar->varno, varnos_2) && bms_is_member(rvar->varno, varnos_1))) - { - if (!VarAttrIsPartAttr(lvar, rtable) || - !VarAttrIsPartAttr(rvar, rtable)) - continue; - } - else - continue; - /* - * If the operator is not an assignment operator, check next - * constraint. An operator is an assignment operator if it's - * mergejoinable or hashjoinable. Beware that not every assignment - * operator is mergejoinable or hashjoinable, so we might leave some - * oportunity. But then we have to rely on the opname which may not - * be something we know to be equality operator as well. - */ - if (!op_mergejoinable(op->opno, exprType((Node *)lvar)) && - !op_hashjoinable(op->opno, exprType((Node *)lvar))) - continue; - /* Found equi-join condition on distribution columns */ - return true; - } - return false; -} - -static bool VarAttrIsPartAttr(Var *var, List *rtable) -{ - RangeTblEntry *rte = rt_fetch(var->varno, rtable); - RelationLocInfo *rel_loc_info; - /* distribution column only applies to the relations */ - if (rte->rtekind != RTE_RELATION || - rte->relkind != RELKIND_RELATION) - return false; - rel_loc_info = GetRelationLocInfo(rte->relid); - if (!rel_loc_info) - return false; - if (var->varattno == rel_loc_info->partAttrNum) - return true; - return false; -} -/* - * pgxc_FQS_get_relation_nodes - * For FQS return ExecNodes structure so as to decide which Datanodes the query - * should execute on. If it is possible to set the node list directly, set it. - * Otherwise set the appropriate distribution column expression or relid in - * ExecNodes structure. - */ -static ExecNodes * -pgxc_FQS_get_relation_nodes(RangeTblEntry *rte, Index varno, Query *query) -{ - CmdType command_type = query->commandType; - bool for_update = query->rowMarks ? true : false; - ExecNodes *rel_exec_nodes; - RelationAccessType rel_access = RELATION_ACCESS_READ; - RelationLocInfo *rel_loc_info; - - Assert(rte == rt_fetch(varno, (query->rtable))); - - switch (command_type) - { - case CMD_SELECT: - if (for_update) - rel_access = RELATION_ACCESS_READ_FOR_UPDATE; - else - rel_access = RELATION_ACCESS_READ; - break; - - case CMD_UPDATE: - case CMD_DELETE: - rel_access = RELATION_ACCESS_UPDATE; - break; - - case CMD_INSERT: - rel_access = RELATION_ACCESS_INSERT; - break; - - default: - /* should not happen, but */ - elog(ERROR, "Unrecognised command type %d", command_type); - break; - } - - - rel_loc_info = GetRelationLocInfo(rte->relid); - /* If we don't know about the distribution of relation, bail out */ - if (!rel_loc_info) - return NULL; - - /* - * Find out the datanodes to execute this query on. - * PGXC_FQS_TODO: for now, we apply node reduction only when there is only - * one relation involved in the query. If there are multiple distributed - * tables in the query and we apply node reduction here, we may fail to ship - * the entire join. We should apply node reduction transitively. - */ - if (list_length(query->rtable) == 1) - rel_exec_nodes = GetRelationNodesByQuals(rte->relid, varno, - query->jointree->quals, rel_access); - else - rel_exec_nodes = GetRelationNodes(rel_loc_info, (Datum) 0, - true, InvalidOid, rel_access); - - if (!rel_exec_nodes) - return NULL; - rel_exec_nodes->accesstype = rel_access; - /* - * If we are reading a replicated table, pick all the nodes where it - * resides. If the query has JOIN, it helps picking up a matching set of - * Datanodes for that JOIN. FQS planner will ultimately pick up one node if - * the JOIN is replicated. - */ - if (rel_access == RELATION_ACCESS_READ && - IsLocatorReplicated(rel_loc_info->locatorType)) - { - list_free(rel_exec_nodes->nodeList); - rel_exec_nodes->nodeList = list_copy(rel_loc_info->nodeList); - } - else if (rel_access == RELATION_ACCESS_INSERT && - IsLocatorDistributedByValue(rel_loc_info->locatorType)) - { - ListCell *lc; - TargetEntry *tle; - /* - * If the INSERT is happening on a table distributed by value of a - * column, find out the - * expression for distribution column in the targetlist, and stick in - * in ExecNodes, and clear the nodelist. Execution will find - * out where to insert the row. - */ - /* It is a partitioned table, get value by looking in targetList */ - foreach(lc, query->targetList) - { - tle = (TargetEntry *) lfirst(lc); - - if (tle->resjunk) - continue; - if (strcmp(tle->resname, rel_loc_info->partAttrName) == 0) - break; - } - /* Not found, bail out */ - if (!lc) - return NULL; - - Assert(tle); - /* We found the TargetEntry for the partition column */ - list_free(rel_exec_nodes->primarynodelist); - rel_exec_nodes->primarynodelist = NULL; - list_free(rel_exec_nodes->nodeList); - rel_exec_nodes->nodeList = NULL; - rel_exec_nodes->en_expr = tle->expr; - rel_exec_nodes->en_relid = rel_loc_info->relid; - } - return rel_exec_nodes; -} - -/* - * pgxc_is_exprtype_shippable - * Checks if the type of the expression is shippable. For now composite types - * derived from view definitions are not shippable. Also sets the - * (un)shippability reason if the type is not shippable. - */ -static void -pgxc_set_exprtype_shippability(Oid exprtype, Shippability_context *sc_context) -{ - char typerelkind; - - typerelkind = get_rel_relkind(typeidTypeRelid(exprtype)); - - if (typerelkind == RELKIND_SEQUENCE || - typerelkind == RELKIND_VIEW || - typerelkind == RELKIND_FOREIGN_TABLE) - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_TYPE); -} - -/* - * pgxc_shippability_walker - * walks the query/expression tree routed at the node passed in, gathering - * information which will help decide whether the query to which this node - * belongs is shippable to the Datanodes. - * - * The function should try to walk the entire tree analysing each subquery for - * shippability. If a subquery is shippable but not the whole query, we would be - * able to create a RemoteQuery node for that subquery, shipping it to the - * Datanode. - * - * Return value of this function is governed by the same rules as - * expression_tree_walker(), see prologue of that function for details. - */ -bool -pgxc_shippability_walker(Node *node, Shippability_context *sc_context) -{ - if (node == NULL) - return false; - - /* Below is the list of nodes that can appear in a query, examine each - * kind of node and find out under what conditions query with this node can - * be shippable. For each node, update the context (add fields if - * necessary) so that decision whether to FQS the query or not can be made. - * Every node which has a result is checked to see if the result type of that - * expression is shippable. - */ - switch(nodeTag(node)) - { - /* Constants are always shippable */ - case T_Const: - pgxc_set_exprtype_shippability(exprType(node), sc_context); - break; - - /* - * For placeholder nodes the shippability of the node, depends upon the - * expression which they refer to. It will be checked separately, when - * that expression is encountered. - */ - case T_CaseTestExpr: - pgxc_set_exprtype_shippability(exprType(node), sc_context); - break; - - /* - * record_in() function throws error, thus requesting a result in the - * form of anonymous record from datanode gets into error. Hence, if the - * top expression of a target entry is ROW(), it's not shippable. - */ - case T_TargetEntry: - { - TargetEntry *tle = (TargetEntry *)node; - if (tle->expr) - { - char typtype = get_typtype(exprType((Node *)tle->expr)); - if (!typtype || typtype == TYPTYPE_PSEUDO) - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); - } - } - break; - - case T_SortGroupClause: - if (sc_context->sc_for_expr) - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - break; - - case T_CoerceViaIO: - { - CoerceViaIO *cvio = (CoerceViaIO *)node; - Oid input_type = exprType((Node *)cvio->arg); - Oid output_type = cvio->resulttype; - CoercionContext cc; - - cc = cvio->coerceformat == COERCE_IMPLICIT_CAST ? COERCION_IMPLICIT : - COERCION_EXPLICIT; - /* - * Internally we use IO coercion for types which do not have casting - * defined for them e.g. cstring::date. If such casts are sent to - * the datanode, those won't be accepted. Hence such casts are - * unshippable. Since it will be shown as an explicit cast. - */ - if (!can_coerce_type(1, &input_type, &output_type, cc)) - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - /* - * Nodes, which are shippable if the tree rooted under these nodes is - * shippable - */ - case T_CoerceToDomainValue: - /* - * PGXCTODO: mostly, CoerceToDomainValue node appears in DDLs, - * do we handle DDLs here? - */ - case T_FieldSelect: - case T_NamedArgExpr: - case T_RelabelType: - case T_BoolExpr: - /* - * PGXCTODO: we might need to take into account the kind of boolean - * operator we have in the quals and see if the corresponding - * function is immutable. - */ - case T_ArrayCoerceExpr: - case T_ConvertRowtypeExpr: - case T_CaseExpr: - case T_ArrayExpr: - case T_RowExpr: - case T_CollateExpr: - case T_CoalesceExpr: - case T_XmlExpr: - case T_NullTest: - case T_BooleanTest: - case T_CoerceToDomain: - pgxc_set_exprtype_shippability(exprType(node), sc_context); - break; - - case T_List: - case T_RangeTblRef: - break; - - case T_ArrayRef: - /* - * When multiple values of of an array are updated at once - * FQS planner cannot yet handle SQL representation correctly. - * So disable FQS in this case and let standard planner manage it. - */ - case T_FieldStore: - /* - * PostgreSQL deparsing logic does not handle the FieldStore - * for more than one fields (see processIndirection()). So, let's - * handle it through standard planner, where whole row will be - * constructed. - */ - case T_SetToDefault: - /* - * PGXCTODO: we should actually check whether the default value to - * be substituted is shippable to the Datanode. Some cases like - * nextval() of a sequence can not be shipped to the Datanode, hence - * for now default values can not be shipped to the Datanodes - */ - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - pgxc_set_exprtype_shippability(exprType(node), sc_context); - break; - - case T_Var: - { - Var *var = (Var *)node; - /* - * if a subquery references an upper level variable, that query is - * not shippable, if shipped alone. - */ - if (var->varlevelsup > sc_context->sc_max_varlevelsup) - sc_context->sc_max_varlevelsup = var->varlevelsup; - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_Param: - { - Param *param = (Param *)node; - /* PGXCTODO: Can we handle internally generated parameters? */ - if (param->paramkind != PARAM_EXTERN) - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_CurrentOfExpr: - { - /* - * Ideally we should not see CurrentOf expression here, it - * should have been replaced by the CTID = ? expression. But - * still, no harm in shipping it as is. - */ - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_Aggref: - { - Aggref *aggref = (Aggref *)node; - /* - * An aggregate is completely shippable to the Datanode, if the - * whole group resides on that Datanode. This will be clear when - * we see the GROUP BY clause. - * agglevelsup is minimum of variable's varlevelsup, so we will - * set the sc_max_varlevelsup when we reach the appropriate - * VARs in the tree. - */ - pgxc_set_shippability_reason(sc_context, SS_HAS_AGG_EXPR); - /* - * If a stand-alone expression to be shipped, is an - * 1. aggregate with ORDER BY, DISTINCT directives, it needs all - * the qualifying rows - * 2. aggregate without collection function - * 3. (PGXCTODO:)aggregate with polymorphic transition type, the - * the transition type needs to be resolved to correctly interpret - * the transition results from Datanodes. - * Hence, such an expression can not be shipped to the datanodes. - */ - if (aggref->aggorder || - aggref->aggdistinct || - aggref->agglevelsup || - !aggref->agghas_collectfn || - IsPolymorphicType(aggref->aggtrantype)) - pgxc_set_shippability_reason(sc_context, SS_NEED_SINGLENODE); - - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_FuncExpr: - { - FuncExpr *funcexpr = (FuncExpr *)node; - /* - * PGXC_FQS_TODO: it's too restrictive not to ship non-immutable - * functions to the Datanode. We need a better way to see what - * can be shipped to the Datanode and what can not be. - */ - if (!is_immutable_func(funcexpr->funcid)) - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); - - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_OpExpr: - case T_DistinctExpr: /* struct-equivalent to OpExpr */ - case T_NullIfExpr: /* struct-equivalent to OpExpr */ - { - /* - * All of these three are structurally equivalent to OpExpr, so - * cast the node to OpExpr and check if the operator function is - * immutable. See PGXC_FQS_TODO item for FuncExpr. - */ - OpExpr *op_expr = (OpExpr *)node; - Oid opfuncid = OidIsValid(op_expr->opfuncid) ? - op_expr->opfuncid : get_opcode(op_expr->opno); - if (!OidIsValid(opfuncid) || !is_immutable_func(opfuncid)) - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); - - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_ScalarArrayOpExpr: - { - /* - * Check if the operator function is shippable to the Datanode - * PGXC_FQS_TODO: see immutability note for FuncExpr above - */ - ScalarArrayOpExpr *sao_expr = (ScalarArrayOpExpr *)node; - Oid opfuncid = OidIsValid(sao_expr->opfuncid) ? - sao_expr->opfuncid : get_opcode(sao_expr->opno); - if (!OidIsValid(opfuncid) || !is_immutable_func(opfuncid)) - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); - } - break; - - case T_RowCompareExpr: - case T_MinMaxExpr: - { - /* - * PGXCTODO should we be checking the comparision operator - * functions as well, as we did for OpExpr OR that check is - * unnecessary. Operator functions are always shippable? - * Otherwise this node should be treated similar to other - * "shell" nodes. - */ - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_Query: - { - Query *query = (Query *)node; - - /* A stand-alone expression containing Query is not shippable */ - if (sc_context->sc_for_expr) - { - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); - break; - } - /* We are checking shippability of whole query, go ahead */ - - /* CREATE TABLE AS is not supported in FQS */ - if (query->commandType == CMD_UTILITY && - IsA(query->utilityStmt, CreateTableAsStmt)) - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - - if (query->hasRecursive) - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - /* - * If the query needs Coordinator for evaluation or the query can be - * completed on Coordinator itself, we don't ship it to the Datanode - */ - if (pgxc_query_needs_coord(query)) - pgxc_set_shippability_reason(sc_context, SS_NEEDS_COORD); - - /* PGXC_FQS_TODO: It should be possible to look at the Query and find out - * whether it can be completely evaluated on the Datanode just like SELECT - * queries. But we need to be careful while finding out the Datanodes to - * execute the query on, esp. for the result relations. If one happens to - * remove/change this restriction, make sure you change - * pgxc_FQS_get_relation_nodes appropriately. - * For now DMLs with single rtable entry are candidates for FQS - */ - if (query->commandType != CMD_SELECT && list_length(query->rtable) > 1) - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - - /* - * In following conditions query is shippable when there is only one - * Datanode involved - * 1. the query has aggregagtes - * 2. the query has window functions - * 3. the query has ORDER BY clause - * 4. the query has Distinct clause - * 5. the query has limit and offset clause - * - * PGXC_FQS_TODO: Condition 1 above is really dependent upon the GROUP BY clause. If - * all rows in each group reside on the same Datanode, aggregates can be - * evaluated on that Datanode, thus condition 1 is has aggregates & the rows - * in any group reside on multiple Datanodes. - * PGXC_FQS_TODO: Condition 2 above is really dependent upon whether the distinct - * clause has distribution column in it. If the distinct clause has - * distribution column in it, we can ship DISTINCT clause to the Datanodes. - */ - if (query->hasAggs || query->hasWindowFuncs || query->sortClause || - query->distinctClause || query->groupClause || query->havingQual || - query->limitOffset || query->limitCount) - pgxc_set_shippability_reason(sc_context, SS_NEED_SINGLENODE); - - /* walk the entire query tree to analyse the query */ - if (query_tree_walker(query, pgxc_shippability_walker, sc_context, 0)) - return true; - - /* - * PGXC_FQS_TODO: - * There is a subquery in this query, which references Vars in the upper - * query. For now stop shipping such queries. We should get rid of this - * condition. - */ - if (sc_context->sc_max_varlevelsup != 0) - pgxc_set_shippability_reason(sc_context, SS_VARLEVEL); - - /* - * Walk the RangeTableEntries of the query and find the - * Datanodes needed for evaluating this query - */ - pgxc_FQS_find_datanodes(sc_context); - } - break; - - case T_FromExpr: - { - /* We don't expect FromExpr in a stand-alone expression */ - if (sc_context->sc_for_expr) - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - - /* - * We will be examining the range table entries separately and - * Join expressions are not candidate for FQS. - * If this is an INSERT query with quals, resulting from say - * conditional rule, we can not handle those in FQS, since there is - * not SQL representation for such quals. - */ - if (sc_context->sc_query->commandType == CMD_INSERT && - ((FromExpr *)node)->quals) - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - - } - break; - - case T_WindowFunc: - { - WindowFunc *winf = (WindowFunc *)node; - /* - * A window function can be evaluated on a Datanode if there is - * only one Datanode involved. - */ - pgxc_set_shippability_reason(sc_context, SS_NEED_SINGLENODE); - - /* - * A window function is not shippable as part of a stand-alone - * expression. If the window function is non-immutable, it can not - * be shipped to the datanodes. - */ - if (sc_context->sc_for_expr || - !is_immutable_func(winf->winfnoid)) - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); - - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_WindowClause: - { - /* - * A window function can be evaluated on a Datanode if there is - * only one Datanode involved. - */ - pgxc_set_shippability_reason(sc_context, SS_NEED_SINGLENODE); - - /* - * A window function is not shippable as part of a stand-alone - * expression - */ - if (sc_context->sc_for_expr) - pgxc_set_shippability_reason(sc_context, SS_UNSHIPPABLE_EXPR); - } - break; - - case T_JoinExpr: - /* We don't expect JoinExpr in a stand-alone expression */ - if (sc_context->sc_for_expr) - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - - /* - * For JoinExpr in a Query - * The compatibility of joining ranges will be deduced while - * examining the range table of the query. Nothing to do here - */ - break; - - case T_SubLink: - { - SubLink *sublink = (SubLink *)node; - ExecNodes *sublink_en; - /* - * Walk the query and find the nodes where the query should be - * executed and node distribution. Merge this with the existing - * node list obtained for other subqueries. If merging fails, we - * can not ship the whole query. - */ - if (IsA(sublink->subselect, Query)) - sublink_en = pgxc_is_query_shippable((Query *)(sublink->subselect), - sc_context->sc_query_level); - else - sublink_en = NULL; - - /* PGXCTODO free the old sc_subquery_en. */ - /* If we already know that this query does not have a set of nodes - * to evaluate on, don't bother to merge again. - */ - if (!pgxc_test_shippability_reason(sc_context, SS_NO_NODES)) - { - sc_context->sc_subquery_en = pgxc_merge_exec_nodes(sublink_en, - sc_context->sc_subquery_en, - false, - true); - if (!sc_context->sc_subquery_en) - pgxc_set_shippability_reason(sc_context, SS_NO_NODES); - } - - pgxc_set_exprtype_shippability(exprType(node), sc_context); - } - break; - - case T_SubPlan: - case T_AlternativeSubPlan: - case T_CommonTableExpr: - case T_SetOperationStmt: - case T_PlaceHolderVar: - case T_AppendRelInfo: - case T_PlaceHolderInfo: - { - /* PGXCTODO: till we exhaust this list */ - pgxc_set_shippability_reason(sc_context, SS_UNSUPPORTED_EXPR); - } - break; - - default: - elog(ERROR, "unrecognized node type: %d", - (int) nodeTag(node)); - break; - } - - return expression_tree_walker(node, pgxc_shippability_walker, (void *)sc_context); -} /* * validate whether partition column of a table is being updated diff --git a/src/backend/pgxc/pool/Makefile b/src/backend/pgxc/pool/Makefile index f3830be690..019c756735 100644 --- a/src/backend/pgxc/pool/Makefile +++ b/src/backend/pgxc/pool/Makefile @@ -14,6 +14,6 @@ subdir = src/backend/pgxc/pool top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = pgxcnode.o execRemote.o poolmgr.o poolcomm.o postgresql_fdw.o poolutils.o +OBJS = pgxcnode.o execRemote.o poolmgr.o poolcomm.o poolutils.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/pgxc/pool/postgresql_fdw.c b/src/backend/pgxc/pool/postgresql_fdw.c deleted file mode 100644 index d355cf376f..0000000000 --- a/src/backend/pgxc/pool/postgresql_fdw.c +++ /dev/null @@ -1,118 +0,0 @@ -/*------------------------------------------------------------------------- - * - * postgresql_fdw.c - * foreign-data wrapper for PostgreSQL - * - * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group - * - * IDENTIFICATION - * $PostgreSQL$ - * - *------------------------------------------------------------------------- - */ -#include "pgxc/postgresql_fdw.h" -#include "catalog/pg_operator.h" -#include "catalog/pg_proc.h" -#include "catalog/pg_type.h" -#include "funcapi.h" -#include "mb/pg_wchar.h" -#include "miscadmin.h" -#include "nodes/nodeFuncs.h" -#include "nodes/makefuncs.h" -#include "optimizer/clauses.h" -#include "optimizer/planmain.h" -#include "parser/scansup.h" -#include "utils/builtins.h" -#include "utils/lsyscache.h" -#include "utils/memutils.h" -#include "utils/rel.h" -#include "utils/syscache.h" - -#define DEBUG_FDW - -/* - * Check whether the function is IMMUTABLE. - */ -bool -is_immutable_func(Oid funcid) -{ - HeapTuple tp; - bool isnull; - Datum datum; - - tp = SearchSysCache(PROCOID, ObjectIdGetDatum(funcid), 0, 0, 0); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", funcid); - -#ifdef DEBUG_FDW - /* print function name and its immutability */ - { - char *proname; - datum = SysCacheGetAttr(PROCOID, tp, Anum_pg_proc_proname, &isnull); - proname = pstrdup(DatumGetName(datum)->data); - elog(DEBUG1, "func %s(%u) is%s immutable", proname, funcid, - (DatumGetChar(datum) == PROVOLATILE_IMMUTABLE) ? "" : " not"); - pfree(proname); - } -#endif - - datum = SysCacheGetAttr(PROCOID, tp, Anum_pg_proc_provolatile, &isnull); - ReleaseSysCache(tp); - - return (DatumGetChar(datum) == PROVOLATILE_IMMUTABLE); -} - -/* - * Check whether the ExprState node should be evaluated in foreign server. - * - * An expression which consists of expressions below will be evaluated in - * the foreign server. - * - constant value - * - variable (foreign table column) - * - external parameter (parameter of prepared statement) - * - array - * - bool expression (AND/OR/NOT) - * - NULL test (IS [NOT] NULL) - * - operator - * - IMMUTABLE only - * - It is required that the meaning of the operator be the same as the - * local server in the foreign server. - * - function - * - IMMUTABLE only - * - It is required that the meaning of the operator be the same as the - * local server in the foreign server. - * - scalar array operator (ANY/ALL) - */ -bool -pgxc_is_expr_shippable(Expr *node, bool *has_aggs) -{ - Shippability_context sc_context; - - /* Create the FQS context */ - memset(&sc_context, 0, sizeof(sc_context)); - sc_context.sc_query = NULL; - sc_context.sc_query_level = 0; - sc_context.sc_for_expr = true; - - /* Walk the expression to check its shippability */ - pgxc_shippability_walker((Node *)node, &sc_context); - - /* - * If caller is interested in knowing, whether the expression has aggregets - * let the caller know about it. The caller is capable of handling such - * expressions. Otherwise assume such an expression as unshippable. - */ - if (has_aggs) - *has_aggs = pgxc_test_shippability_reason(&sc_context, SS_HAS_AGG_EXPR); - else if (pgxc_test_shippability_reason(&sc_context, SS_HAS_AGG_EXPR)) - return false; - /* Done with aggregate expression shippability. Delete the status */ - pgxc_reset_shippability_reason(&sc_context, SS_HAS_AGG_EXPR); - - /* If there are reasons why the expression is unshippable, return false */ - if (!bms_is_empty(sc_context.sc_shippability)) - return false; - - /* If nothing wrong found, the expression is shippable */ - return true; -} diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index a3d525cdad..d2b106ca8c 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -32,7 +32,6 @@ #include "pgxc/locator.h" #include "pgxc/nodemgr.h" #include "pgxc/pgxc.h" -#include "pgxc/postgresql_fdw.h" #include "nodes/nodes.h" #include "optimizer/planner.h" #include "optimizer/var.h" diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0a91db9f9f..316804dbc8 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -59,6 +59,7 @@ #ifdef PGXC #include "commands/tablecmds.h" #include "nodes/nodes.h" +#include "optimizer/pgxcship.h" #include "pgxc/execRemote.h" #include "pgxc/locator.h" #include "pgxc/planner.h" diff --git a/src/include/optimizer/pgxcship.h b/src/include/optimizer/pgxcship.h new file mode 100644 index 0000000000..a0f860daf1 --- /dev/null +++ b/src/include/optimizer/pgxcship.h @@ -0,0 +1,39 @@ +/*------------------------------------------------------------------------- + * + * pgxcship.h + * Functionalities for the evaluation of expression shippability + * to remote nodes + * + * + * Portions Copyright (c) 1996-2012 PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2012 Postgres-XC Development Group + * + * src/include/optimizer/pgxcship.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PGXCSHIP_H +#define PGXCSHIP_H + +#include "nodes/parsenodes.h" +#include "nodes/relation.h" +#include "pgxc/locator.h" + +/* Forbid SQL if unsafe, useful to turn off for development */ +extern bool StrictStatementChecking; + +/* Determine if query is shippable */ +extern ExecNodes *pgxc_is_query_shippable(Query *query, int query_level); +/* Determine if an expression is shippable */ +extern bool pgxc_is_expr_shippable(Expr *node, bool *has_aggs); +/* Determine if given function is shippable */ +extern bool pgxc_is_func_shippable(Oid funcid); +/* Check equijoin conditions on given relations */ +extern bool pgxc_qual_has_dist_equijoin(Relids varnos_1, + Relids varnos_2, Oid distcol_type, Node *quals, List *rtable); +/* Merge given execution nodes based on join shippability conditions */ +extern ExecNodes *pgxc_merge_exec_nodes(ExecNodes *en1, + ExecNodes *en2, bool merge_dist_equijoin, bool merge_replicated_only); + +#endif diff --git a/src/include/pgxc/planner.h b/src/include/pgxc/planner.h index f289071cff..786a78e702 100644 --- a/src/include/pgxc/planner.h +++ b/src/include/pgxc/planner.h @@ -118,67 +118,8 @@ typedef struct * inserts into child by selecting from its parent */ } RemoteQuery; -/* - * FQS_context - * This context structure is used by the Fast Query Shipping walker, to gather - * information during analysing query for Fast Query Shipping. - */ -typedef struct -{ - bool sc_for_expr; /* if false, the we are checking shippability - * of the Query, otherwise, we are checking - * shippability of a stand-alone expression. - */ - Bitmapset *sc_shippability; /* The conditions for (un)shippability of the - * query. - */ - Query *sc_query; /* the query being analysed for FQS */ - int sc_query_level; /* level of the query */ - int sc_max_varlevelsup; /* maximum upper level referred to by any - * variable reference in the query. If this - * value is greater than 0, the query is not - * shippable, if shipped alone. - */ - ExecNodes *sc_exec_nodes; /* nodes where the query should be executed */ - ExecNodes *sc_subquery_en; /* ExecNodes produced by merging the ExecNodes - * for individual subqueries. This gets - * ultimately merged with sc_exec_nodes. - */ -} Shippability_context; - -/* enum for reasons as to why a query/expression is not FQSable */ -typedef enum -{ - SS_UNSHIPPABLE_EXPR = 0, /* it has unshippable expression */ - SS_NEED_SINGLENODE, /* Has expressions which can be evaluated when - * there is only a single node involved. - * Athought aggregates too fit in this class, we - * have a separate status to report aggregates, - * see below. - */ - SS_NEEDS_COORD, /* the query needs Coordinator */ - SS_VARLEVEL, /* one of its subqueries has a VAR - * referencing an upper level query - * relation - */ - SS_NO_NODES, /* no suitable nodes can be found to ship - * the query - */ - SS_UNSUPPORTED_EXPR, /* it has expressions currently unsupported - * by FQS, but such expressions might be - * supported by FQS in future - */ - SS_HAS_AGG_EXPR, /* it has aggregate expressions */ - SS_UNSHIPPABLE_TYPE /* the type of expression is unshippable */ -} ShippabilityStat; - /* global variable corresponding to the GUC with same name */ extern bool enable_fast_query_shipping; -/* forbid SQL if unsafe, useful to turn off for development */ -extern bool StrictStatementChecking; - -/* forbid SELECT even multi-node ORDER BY */ -extern bool StrictSelectChecking; extern PlannedStmt *pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams); @@ -192,19 +133,6 @@ extern List *AddRemoteQueryNode(List *stmts, const char *queryString, RemoteQueryExecType remoteExecType, bool is_temp); extern bool pgxc_query_contains_temp_tables(List *queries); extern bool pgxc_query_contains_utility(List *queries); -/* TODO: We need a better place to keep these prototypes */ -extern bool pgxc_qual_hash_dist_equijoin(Relids varnos_1, Relids varnos_2, - Oid distcol_type, Node *quals, - List *rtable); -extern ExecNodes *pgxc_merge_exec_nodes(ExecNodes *exec_nodes1, - ExecNodes *exec_nodes2, - bool merge_dist_equijoin, - bool merge_replicated_only); -extern bool pgxc_shippability_walker(Node *node, Shippability_context *sc_context); -extern bool pgxc_test_shippability_reason(Shippability_context *context, - ShippabilityStat reason); -extern void pgxc_reset_shippability_reason(Shippability_context *context, - ShippabilityStat reason); extern void pgxc_rqplan_adjust_tlist(RemoteQuery *rqplan); extern void pgxc_rqplan_adjust_vars(RemoteQuery *rqplan, Node *node); diff --git a/src/include/pgxc/postgresql_fdw.h b/src/include/pgxc/postgresql_fdw.h deleted file mode 100644 index 57ab2b7d1d..0000000000 --- a/src/include/pgxc/postgresql_fdw.h +++ /dev/null @@ -1,23 +0,0 @@ -/*------------------------------------------------------------------------- - * - * postgresql_fdw.h - * - * foreign-data wrapper for PostgreSQL - * - * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group - * Portions Copyright (c) 2010-2012, Postgres-XC Development Group - * - * src/include/pgxc/postgresql_fdw.h - * - *------------------------------------------------------------------------- - */ - -#ifndef POSTGRES_FDW_H -#define POSTGRES_FDW_H - -#include "postgres.h" -#include "pgxc/execRemote.h" - -bool is_immutable_func(Oid funcid); -bool pgxc_is_expr_shippable(Expr *node, bool *has_aggs); -#endif |