You can subscribe to this list here.
2010 |
Jan
|
Feb
|
Mar
|
Apr
(4) |
May
(28) |
Jun
(12) |
Jul
(11) |
Aug
(12) |
Sep
(5) |
Oct
(19) |
Nov
(14) |
Dec
(12) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(18) |
Feb
(30) |
Mar
(115) |
Apr
(89) |
May
(50) |
Jun
(44) |
Jul
(22) |
Aug
(13) |
Sep
(11) |
Oct
(30) |
Nov
(28) |
Dec
(39) |
2012 |
Jan
(38) |
Feb
(18) |
Mar
(43) |
Apr
(91) |
May
(108) |
Jun
(46) |
Jul
(37) |
Aug
(44) |
Sep
(33) |
Oct
(29) |
Nov
(36) |
Dec
(15) |
2013 |
Jan
(35) |
Feb
(611) |
Mar
(5) |
Apr
(55) |
May
(30) |
Jun
(28) |
Jul
(458) |
Aug
(34) |
Sep
(9) |
Oct
(39) |
Nov
(22) |
Dec
(32) |
2014 |
Jan
(16) |
Feb
(16) |
Mar
(42) |
Apr
(179) |
May
(7) |
Jun
(6) |
Jul
(9) |
Aug
|
Sep
(4) |
Oct
|
Nov
(3) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
(2) |
May
(4) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
S | M | T | W | T | F | S |
---|---|---|---|---|---|---|
|
|
|
|
|
|
1
|
2
|
3
|
4
|
5
|
6
|
7
|
8
|
9
|
10
|
11
(2) |
12
(5) |
13
(3) |
14
|
15
|
16
|
17
|
18
|
19
|
20
(2) |
21
|
22
|
23
|
24
|
25
(1) |
26
(1) |
27
(2) |
28
(2) |
29
|
30
|
31
|
|
|
|
|
|
From: Michael P. <mic...@us...> - 2011-01-20 08:53:46
|
Project "Postgres-XC". The branch, master has been updated via c19b46835e586023fbaaf93c3c84e4898f4fe6bd (commit) via 05fc3e19fcafeb61b057a88b3c0c1285dff1a7a7 (commit) from efcf364436c54517788299e49d1d755553d854dd (commit) - Log ----------------------------------------------------------------- commit c19b46835e586023fbaaf93c3c84e4898f4fe6bd Author: Michael P <mic...@us...> Date: Thu Jan 20 17:40:49 2011 +0900 Support for correlated DELETE for replicated tables Implementation of correlated delete against replicated tables. written by Andrei Martsinchyk diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index a97b4fa..01c51c6 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4725,7 +4725,8 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) StringInfo buf; Oid nspid; char *nspname; - Var *ctid; + Var *ctid; + /* Get target table */ ttab = (RangeTblEntry *) list_nth(parse->rtable, parse->resultRelation - 1); @@ -4814,31 +4815,118 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) */ fstep = make_remotequery(NIL, ttab, NIL, ttab->relid); - innerPlan(fstep) = topplan; - /* - * TODO replicated handling: add extra step with step query - * SELECT * FROM ttab WHERE ctid = ? and final step with step query - * DELETE FROM ttab WHERE * = ? - */ - appendStringInfoString(buf, " WHERE ctid = $1"); - fstep->sql_statement = pstrdup(buf->data); - fstep->combine_type = COMBINE_TYPE_SUM; - fstep->read_only = false; - fstep->exec_nodes = makeNode(ExecNodes); - fstep->exec_nodes->baselocatortype = rel_loc_info->locatorType; - fstep->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; - fstep->exec_nodes->primarynodelist = NULL; - fstep->exec_nodes->nodelist = NULL; - fstep->exec_nodes->relid = ttab->relid; - fstep->exec_nodes->accesstype = RELATION_ACCESS_UPDATE; - - /* first and only target entry of topplan is ctid, reference it */ - ctid = makeVar(INNER, 1, TIDOID, -1, 0); - fstep->exec_nodes->expr = (Var *) ctid; + if (rel_loc_info->locatorType == LOCATOR_TYPE_REPLICATED) + { + /* + * For replicated case we need two extra steps. One is to determine + * all values by CTID on the node from which the tuple has come, next + * is to remove all rows with these values on all nodes + */ + RemoteQuery *xstep; + List *xtlist = NIL; + StringInfo xbuf = makeStringInfo(); + int natts = get_relnatts(ttab->relid); + int att; + + appendStringInfoString(xbuf, "SELECT "); + appendStringInfoString(buf, " WHERE"); + + /* + * Populate projections of the extra SELECT step and WHERE clause of + * the final DELETE step + */ + for (att = 1; att <= natts; att++) + { + TargetEntry *tle; + Var *expr; + HeapTuple tp; + + tp = SearchSysCache(ATTNUM, + ObjectIdGetDatum(ttab->relid), + Int16GetDatum(att), + 0, 0); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + + /* add comma before all except first attributes */ + if (att > 1) + { + appendStringInfoString(xbuf, ", "); + appendStringInfoString(buf, " AND"); + } + appendStringInfoString(xbuf, NameStr(att_tup->attname)); + appendStringInfo(buf, " %s = $%d", NameStr(att_tup->attname), att); + + expr = makeVar(att, att, att_tup->atttypid, + att_tup->atttypmod, 0); + tle = makeTargetEntry((Expr *) expr, att, + NameStr(att_tup->attname), false); + xtlist = lappend(xtlist, tle); + ReleaseSysCache(tp); + } + else + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + att, ttab->relid); + } + + /* complete SELECT command */ + appendStringInfo(xbuf, " FROM %s.%s WHERE ctid = $1", + quote_identifier(nspname), + quote_identifier(ttab->relname)); + + /* build up the extra select step */ + xstep = make_remotequery(xtlist, ttab, NIL, ttab->relid); + innerPlan(xstep) = topplan; + xstep->sql_statement = pstrdup(xbuf->data); + xstep->read_only = true; + xstep->exec_nodes = makeNode(ExecNodes); + xstep->exec_nodes->baselocatortype = rel_loc_info->locatorType; + xstep->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; + xstep->exec_nodes->primarynodelist = NULL; + xstep->exec_nodes->nodelist = NULL; + xstep->exec_nodes->relid = ttab->relid; + xstep->exec_nodes->accesstype = RELATION_ACCESS_READ; + + /* first and only target entry of topplan is ctid, reference it */ + ctid = makeVar(INNER, 1, TIDOID, -1, 0); + xstep->exec_nodes->expr = (Expr *) ctid; + + pfree(xbuf->data); + pfree(xbuf); + + /* build up the final delete step */ + innerPlan(fstep) = (Plan *) xstep; + fstep->sql_statement = pstrdup(buf->data); + fstep->combine_type = COMBINE_TYPE_SAME; + fstep->read_only = false; + fstep->exec_nodes = GetRelationNodes(rel_loc_info, NULL, + RELATION_ACCESS_UPDATE); + } + else + { + /* build up the final delete step */ + innerPlan(fstep) = topplan; + appendStringInfoString(buf, " WHERE ctid = $1"); + fstep->sql_statement = pstrdup(buf->data); + fstep->combine_type = COMBINE_TYPE_SUM; + fstep->read_only = false; + fstep->exec_nodes = makeNode(ExecNodes); + fstep->exec_nodes->baselocatortype = rel_loc_info->locatorType; + fstep->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; + fstep->exec_nodes->primarynodelist = NULL; + fstep->exec_nodes->nodelist = NULL; + fstep->exec_nodes->relid = ttab->relid; + fstep->exec_nodes->accesstype = RELATION_ACCESS_UPDATE; + + /* first and only target entry of topplan is ctid, reference it */ + ctid = makeVar(INNER, 1, TIDOID, -1, 0); + fstep->exec_nodes->expr = (Expr *) ctid; + } pfree(buf->data); pfree(buf); - return fstep; + return (Plan *) fstep; } #endif commit 05fc3e19fcafeb61b057a88b3c0c1285dff1a7a7 Author: Michael P <mic...@us...> Date: Thu Jan 20 17:38:39 2011 +0900 Base code to support correlated DELETE and UPDATE Base code to support correlated deletes and updates, implementation of correlated delete without replicated case. depends on executor_bugfixes patch. Patch written by Andrei Martsinchyk diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ad227f4..40cb4f5 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -855,6 +855,9 @@ _copyRemoteQuery(RemoteQuery *from) COPY_SCALAR_FIELD(inner_reduce_level); COPY_BITMAPSET_FIELD(outer_relids); COPY_BITMAPSET_FIELD(inner_relids); + COPY_STRING_FIELD(inner_statement); + COPY_STRING_FIELD(outer_statement); + COPY_STRING_FIELD(join_condition); return newnode; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index e91eb57..a97b4fa 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -39,6 +39,7 @@ #include "utils/builtins.h" #include "utils/syscache.h" #include "catalog/pg_proc.h" +#include "catalog/pg_type.h" #include "executor/executor.h" #endif #include "utils/lsyscache.h" @@ -636,6 +637,8 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla { NestLoop *nest_parent; JoinReduceInfo join_info; + RemoteQuery *outer = NULL; + RemoteQuery *inner = NULL; if (!enable_remotejoin) return parent; @@ -658,21 +661,26 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla else nest_parent = (NestLoop *)parent; + /* + * Now RemoteQuery subnode is behind Matherial but this may be changed later + */ + if (IsA(outer_plan, Material) && IsA(outer_plan->lefttree, RemoteQuery)) + outer = (RemoteQuery *) outer_plan->lefttree; + else if (IsA(outer_plan, RemoteQuery)) + outer = (RemoteQuery *) outer_plan; + + if (IsA(inner_plan, Material) && IsA(inner_plan->lefttree, RemoteQuery)) + inner = (RemoteQuery *) inner_plan->lefttree; + else if (IsA(inner_plan, RemoteQuery)) + inner = (RemoteQuery *) inner_plan; + + /* check if both the nodes qualify for reduction */ - if (IsA(outer_plan, Material) && - IsA(((Material *) outer_plan)->plan.lefttree, RemoteQuery) && - IsA(inner_plan, Material) && - IsA(((Material *) inner_plan)->plan.lefttree, RemoteQuery)) + if (outer && inner) { int i; List *rtable_list = NIL; - Material *outer_mat = (Material *)outer_plan; - Material *inner_mat = (Material *)inner_plan; - - RemoteQuery *outer = (RemoteQuery *)outer_mat->plan.lefttree; - RemoteQuery *inner = (RemoteQuery *)inner_mat->plan.lefttree; - /* * Check if both these plans are from the same remote node. If yes, * replace this JOIN along with it's two children with one equivalent @@ -697,7 +705,7 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla { RemoteQuery *result; Plan *result_plan; - StringInfoData targets, clauses, scan_clauses, fromlist; + StringInfoData targets, clauses, scan_clauses, fromlist, join_condition; StringInfoData squery; List *parent_vars, *out_tlist = NIL, *in_tlist = NIL, *base_tlist; ListCell *l; @@ -769,13 +777,13 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla } else { - /* + /* * there is no local bound clause, all the clauses are remote * scan clauses */ remote_scan_clauses = nest_parent->join.plan.qual; } - + /* generate the tlist for the new RemoteScan node using out_tlist, in_tlist */ initStringInfo(&targets); create_remote_target_list(root, &targets, out_tlist, in_tlist, @@ -830,6 +838,9 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla result->outer_reduce_level = outer->reduce_level; result->inner_relids = in_relids; result->outer_relids = out_relids; + result->inner_statement = pstrdup(inner->sql_statement); + result->outer_statement = pstrdup(outer->sql_statement); + result->join_condition = NULL; result->exec_nodes = copyObject(join_info.exec_nodes); appendStringInfo(&fromlist, " %s (%s) %s", @@ -896,22 +907,27 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla /* generate the squery for this node */ /* NOTE: it's assumed that the remote_paramNums array is - * filled in the same order as we create the query here. + * filled in the same order as we create the query here. * - * TODO: we need some way to ensure that the remote_paramNums - * is filled in the same order as the order in which the clauses + * TODO: we need some way to ensure that the remote_paramNums + * is filled in the same order as the order in which the clauses * are added in the query below. */ initStringInfo(&squery); appendStringInfo(&squery, "SELECT %s FROM %s", targets.data, fromlist.data); + initStringInfo(&join_condition); if (clauses.data[0] != '\0') - appendStringInfo(&squery, " %s %s", use_where? " WHERE " : " ON ", clauses.data); + appendStringInfo(&join_condition, " %s %s", use_where? " WHERE " : " ON ", clauses.data); if (scan_clauses.data[0] != '\0') - appendStringInfo(&squery, " %s %s", use_where? " AND " : " WHERE ", scan_clauses.data); + appendStringInfo(&join_condition, " %s %s", use_where? " AND " : " WHERE ", scan_clauses.data); + + if (join_condition.data[0] != '\0') + appendStringInfoString(&squery, join_condition.data); result->sql_statement = squery.data; + result->join_condition = join_condition.data; /* don't forget to increment the index for the next time around! */ result->reduce_level = root->rs_alias_index++; @@ -939,7 +955,7 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla result_plan->plan_rows = outer_plan->plan_rows; result_plan->plan_width = outer_plan->plan_width; - return (Plan *)make_material(result_plan); + return (Plan *) make_material(result_plan); } } @@ -4656,4 +4672,173 @@ findReferencedVars(List *parent_vars, Plan *plan, List **out_tlist, Relids *out_ *out_tlist = tlist; *out_relids = relids; } + + +/* + * create_remoteinsert_plan() + * + * Dummy + */ +Plan * +create_remoteinsert_plan(PlannerInfo *root, Plan *topplan) +{ + return topplan; +} + + +/* + * create_remoteupdate_plan() + * + * Dummy + */ +Plan * +create_remoteupdate_plan(PlannerInfo *root, Plan *topplan) +{ + return topplan; +} + +/* + * create_remotedelete_plan() + * + * Builds up a final node of the plan executing DELETE command. + * + * If target table is on coordinator (like catalog tables) the plan is left + * unchanged and delete will be handled using standard postgres procedure. + * + * If topmost node of the plan is a RemoteQuery the step query looks like + * SELECT ctid FROM target_table WHERE condition, and we should convert it to + * DELETE FROM target_table WHERE condition. + * + * In correlated case the step query looks like + * SELECT target_table.ctid FROM target_table, other_tables WHERE condition, and + * we should convert it to DELETE FROM target_table USING other_tables WHERE condition. + * + * XXX Is it ever possible if the topmost node is not a RemoteQuery? + */ +Plan * +create_remotedelete_plan(PlannerInfo *root, Plan *topplan) +{ + Query *parse = root->parse; + RangeTblEntry *ttab; + RelationLocInfo *rel_loc_info; + RemoteQuery *fstep; + StringInfo buf; + Oid nspid; + char *nspname; + Var *ctid; + + /* Get target table */ + ttab = (RangeTblEntry *) list_nth(parse->rtable, parse->resultRelation - 1); + /* Bad relation ? */ + if (ttab == NULL || ttab->rtekind != RTE_RELATION) + return topplan; + + /* Get location info of the target table */ + rel_loc_info = GetRelationLocInfo(ttab->relid); + if (rel_loc_info == NULL) + return topplan; + + buf = makeStringInfo(); + + /* Compose DELETE FROM target_table */ + nspid = get_rel_namespace(ttab->relid); + nspname = get_namespace_name(nspid); + + appendStringInfo(buf, "DELETE FROM %s.%s", quote_identifier(nspname), + quote_identifier(ttab->relname)); + + /* See if we can push down DELETE */ + if (IsA(topplan, RemoteQuery)) + { + char *query; + + fstep = (RemoteQuery *) topplan; + query = fstep->sql_statement; + + if (strncmp(query, "SELECT ctid", 11) == 0) + { + /* + * Single table case + * We need to find position of the WHERE keyword in the string and + * append to the buffer part of original string starting from the + * position found. It is possible WHERE clause is absent (DELETE ALL) + * In this case buffer already has new step query + */ + char *where = strstr(query, " WHERE "); + if (where) + appendStringInfoString(buf, where); + } + else + { + /* + * multi-table case + * Assuming the RemoteQuery is created in create_remotejoin_plan(). + * If the final RemoteQuery is for correlated delete outer_statement + * is just a SELECT FROM target_table, outer_statement is correlated + * part and we can put it into USING clause. + * Join type should be plain jon (comma-separated list) and all + * conditions are in WHERE clause. + * No GROUP BY or ORDER BY clauses expected. + * If create_remotejoin_plan is modified the code below should be + * revisited. + */ + /* + * In expressions target table is referenced as outer_alias, append + * alias name before USING clause + */ + appendStringInfo(buf, " %s USING ", fstep->outer_alias); + + /* Make up USING clause */ + appendStringInfo(buf, "(%s) %s ", fstep->inner_statement, fstep->inner_alias); + + /* Append WHERE clause */ + appendStringInfoString(buf, fstep->join_condition); + } + /* replace step query */ + pfree(fstep->sql_statement); + fstep->sql_statement = pstrdup(buf->data); + /* set combine_type, it is COMBINE_TYPE_NONE for SELECT */ + fstep->combine_type = rel_loc_info->locatorType == LOCATOR_TYPE_REPLICATED ? + COMBINE_TYPE_SAME : COMBINE_TYPE_SUM; + fstep->read_only = false; + + pfree(buf->data); + pfree(buf); + + return topplan; + } + + /* + * Top plan will return CTIDs and we should delete tuples with these CTIDs + * on the nodes. To determine target node + */ + fstep = make_remotequery(NIL, ttab, NIL, ttab->relid); + + innerPlan(fstep) = topplan; + /* + * TODO replicated handling: add extra step with step query + * SELECT * FROM ttab WHERE ctid = ? and final step with step query + * DELETE FROM ttab WHERE * = ? + */ + appendStringInfoString(buf, " WHERE ctid = $1"); + fstep->sql_statement = pstrdup(buf->data); + fstep->combine_type = COMBINE_TYPE_SUM; + fstep->read_only = false; + fstep->exec_nodes = makeNode(ExecNodes); + fstep->exec_nodes->baselocatortype = rel_loc_info->locatorType; + fstep->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; + fstep->exec_nodes->primarynodelist = NULL; + fstep->exec_nodes->nodelist = NULL; + fstep->exec_nodes->relid = ttab->relid; + fstep->exec_nodes->accesstype = RELATION_ACCESS_UPDATE; + + /* first and only target entry of topplan is ctid, reference it */ + ctid = makeVar(INNER, 1, TIDOID, -1, 0); + fstep->exec_nodes->expr = (Var *) ctid; + + pfree(buf->data); + pfree(buf); + + return fstep; +} #endif diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index dc6ff35..8ddf5a8 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -226,6 +226,27 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) lfirst(lp) = set_plan_references(glob, subplan, subrtable); } +#ifdef PGXC + /* + * PGXC should apply INSERT/UPDATE/DELETE to a datanode. We are overriding + * normal Postgres behavior by modifying final plan or by adding a node on + * top of it. + */ + if (IS_PGXC_COORDINATOR) + switch (parse->commandType) + { + case CMD_INSERT: + top_plan = create_remoteinsert_plan(root, top_plan); + break; + case CMD_UPDATE: + top_plan = create_remoteupdate_plan(root, top_plan); + break; + case CMD_DELETE: + top_plan = create_remotedelete_plan(root, top_plan); + break; + } +#endif + /* build the PlannedStmt result */ result = makeNode(PlannedStmt); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ca2e2a2..5804787 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1328,9 +1328,11 @@ create_remotequery_path(PlannerInfo *root, RelOptInfo *rel) /* PGXCTODO - set cost properly */ cost_seqscan(pathnode, root, rel); - /* - * Insert a materialization plan above this temporarily - * until we better handle multiple steps using the same connection. + /* + * ReScan of RemoteQuery is not implemented so we have to materialize + * results. Anyway, it may be more effective to materialize results then + * execute query against remote query multiple times. + * Subject for future optimization */ pathnode = create_material_path(rel, pathnode); diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 1a56b44..ace4635 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -1832,6 +1832,9 @@ makeRemoteQuery(void) result->inner_reduce_level = 0; result->outer_relids = NULL; result->inner_relids = NULL; + result->inner_statement = NULL; + result->outer_statement = NULL; + result->join_condition = NULL; return result; } @@ -2784,16 +2787,16 @@ pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams) get_plan_nodes_command(query_step, root); - if (query_step->exec_nodes == NULL) + /* standard planner handles correlated UPDATE or DELETE */ + if ((query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) + && list_length(query->rtable) > 1) { - /* Do not yet allow multi-node correlated UPDATE or DELETE */ - if (query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) - { - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("UPDATE and DELETE that are correlated or use non-immutable functions not yet supported")))); - } + result = standard_planner(query, cursorOptions, boundParams); + return result; + } + if (query_step->exec_nodes == NULL) + { /* * Processing guery against catalog tables, or multi-step command. * Run through standard planner diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index c1d191e..4fbb11f 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -121,5 +121,8 @@ extern void extract_query_dependencies(List *queries, #ifdef PGXC extern Var *search_tlist_for_var(Var *var, List *jtlist); +extern Plan *create_remoteinsert_plan(PlannerInfo *root, Plan *topplan); +extern Plan *create_remoteupdate_plan(PlannerInfo *root, Plan *topplan); +extern Plan *create_remotedelete_plan(PlannerInfo *root, Plan *topplan); #endif #endif /* PLANMAIN_H */ diff --git a/src/include/pgxc/planner.h b/src/include/pgxc/planner.h index 61cb6d3..42dd2b6 100644 --- a/src/include/pgxc/planner.h +++ b/src/include/pgxc/planner.h @@ -104,6 +104,9 @@ typedef struct int inner_reduce_level; Relids outer_relids; Relids inner_relids; + char *inner_statement; + char *outer_statement; + char *join_condition; } RemoteQuery; ----------------------------------------------------------------------- Summary of changes: src/backend/nodes/copyfuncs.c | 3 + src/backend/optimizer/plan/createplan.c | 311 +++++++++++++++++++++++++++++-- src/backend/optimizer/plan/planner.c | 21 ++ src/backend/optimizer/util/pathnode.c | 8 +- src/backend/pgxc/plan/planner.c | 19 +- src/include/optimizer/planmain.h | 3 + src/include/pgxc/planner.h | 3 + 7 files changed, 338 insertions(+), 30 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-20 08:33:26
|
Project "Postgres-XC". The branch, master has been updated via efcf364436c54517788299e49d1d755553d854dd (commit) via 8cf0de56a706ad0da72d3ea889844ae6c9e4a6fb (commit) from 90a3e337ead46e9029e877b6e8d577c26307ebe5 (commit) - Log ----------------------------------------------------------------- commit efcf364436c54517788299e49d1d755553d854dd Author: Michael P <mic...@us...> Date: Thu Jan 20 17:30:57 2011 +0900 Fix and Clean up in Executor Fixes for various bugs found during development and testing. Includes also some clean ups. Patch from Andrei Martsinchyk diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 63031a7..c124fc3 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -1193,18 +1193,15 @@ slot_deform_datarow(TupleTableSlot *slot) (errcode(ERRCODE_DATA_CORRUPTED), errmsg("Tuple does not match the descriptor"))); - if (slot->tts_attinmeta == NULL) - { - /* - * Ensure info about input functions is available as long as slot lives - */ - MemoryContext oldcontext = MemoryContextSwitchTo(slot->tts_mcxt); + /* + * Ensure info about input functions is available as long as slot lives + * as well as deformed values + */ + MemoryContext oldcontext = MemoryContextSwitchTo(slot->tts_mcxt); + if (slot->tts_attinmeta == NULL) slot->tts_attinmeta = TupleDescGetAttInMetadata(slot->tts_tupleDescriptor); - MemoryContextSwitchTo(oldcontext); - } - buffer = makeStringInfo(); for (i = 0; i < attnum; i++) { @@ -1240,6 +1237,8 @@ slot_deform_datarow(TupleTableSlot *slot) pfree(buffer); slot->tts_nvalid = attnum; + + MemoryContextSwitchTo(oldcontext); } #endif @@ -1292,6 +1291,16 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) return (Datum) 0; } +#ifdef PGXC + /* If it is a data row tuple extract all and return requested */ + if (slot->tts_dataRow) + { + slot_deform_datarow(slot); + *isnull = slot->tts_isnull[attnum - 1]; + return slot->tts_values[attnum - 1]; + } +#endif + /* * otherwise we had better have a physical tuple (tts_nvalid should equal * natts in all virtual-tuple cases) @@ -1336,11 +1345,6 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) /* * Extract the attribute, along with any preceding attributes. */ -#ifdef PGXC - if (slot->tts_dataRow) - slot_deform_datarow(slot); - else -#endif slot_deform_tuple(slot, attnum); /* @@ -1495,6 +1499,15 @@ slot_attisnull(TupleTableSlot *slot, int attnum) if (attnum > tupleDesc->natts) return true; +#ifdef PGXC + /* If it is a data row tuple extract all and return requested */ + if (slot->tts_dataRow) + { + slot_deform_datarow(slot); + return slot->tts_isnull[attnum - 1]; + } +#endif + /* * otherwise we had better have a physical tuple (tts_nvalid should equal * natts in all virtual-tuple cases) diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index 3ca96c6..3803fa5 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -19,6 +19,7 @@ #include "postgres.h" #include "access/gtm.h" #include "access/xact.h" +#include "catalog/pg_type.h" #include "commands/prepare.h" #include "executor/executor.h" #include "gtm/gtm_c.h" @@ -135,6 +136,7 @@ stat_transaction(int node_count) } +#ifdef NOT_USED /* * To collect statistics: count a two-phase commit on nodes */ @@ -146,6 +148,7 @@ stat_2pc() else nonautocommit_2pc++; } +#endif /* @@ -586,6 +589,8 @@ HandleRowDescription(RemoteQueryState *combiner, char *msg_body, size_t len) return false; } + +#ifdef NOT_USED /* * Handle ParameterStatus ('S') message from a data node connection (SET command) */ @@ -607,6 +612,7 @@ HandleParameterStatus(RemoteQueryState *combiner, char *msg_body, size_t len) pq_putmessage('S', msg_body, len); } } +#endif /* * Handle CopyInResponse ('G') message from a data node connection @@ -1010,7 +1016,6 @@ BufferConnection(PGXCNodeHandle *conn) conn->state = DN_CONNECTION_STATE_ERROR_FATAL; add_error_message(conn, "Failed to fetch from data node"); } - break; } else if (res == RESPONSE_COMPLETE) { @@ -1054,49 +1059,49 @@ FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot) { bool have_tuple = false; - while (combiner->conn_count > 0) + /* If we have message in the buffer, consume it */ + if (combiner->currentRow.msg) { - PGXCNodeHandle *conn; - int res; + ExecStoreDataRowTuple(combiner->currentRow.msg, + combiner->currentRow.msglen, + combiner->currentRow.msgnode, slot, true); + combiner->currentRow.msg = NULL; + combiner->currentRow.msglen = 0; + combiner->currentRow.msgnode = 0; + have_tuple = true; + } - /* If we have message in the buffer, consume it */ - if (combiner->currentRow.msg) - { - ExecStoreDataRowTuple(combiner->currentRow.msg, - combiner->currentRow.msglen, - combiner->currentRow.msgnode, slot, true); - combiner->currentRow.msg = NULL; - combiner->currentRow.msglen = 0; - combiner->currentRow.msgnode = 0; - have_tuple = true; - } - /* - * If this is ordered fetch we can not know what is the node - * to handle next, so sorter will choose next itself and set it as - * currentRow to have it consumed on the next call to FetchTuple - */ - if (((RemoteQuery *)combiner->ss.ps.plan)->sort) - return have_tuple; + /* + * If this is ordered fetch we can not know what is the node + * to handle next, so sorter will choose next itself and set it as + * currentRow to have it consumed on the next call to FetchTuple. + * Otherwise allow to prefetch next tuple + */ + if (((RemoteQuery *)combiner->ss.ps.plan)->sort) + return have_tuple; - /* - * Note: If we are fetching not sorted results we can not have both - * currentRow and buffered rows. When connection is buffered currentRow - * is moved to buffer, and then it is cleaned after buffering is - * completed. Afterwards rows will be taken from the buffer bypassing - * currentRow until buffer is empty, and only after that data are read - * from a connection. - */ - if (list_length(combiner->rowBuffer) > 0) - { - RemoteDataRow dataRow = (RemoteDataRow) linitial(combiner->rowBuffer); - combiner->rowBuffer = list_delete_first(combiner->rowBuffer); - ExecStoreDataRowTuple(dataRow->msg, dataRow->msglen, - dataRow->msgnode, slot, true); - pfree(dataRow); - return true; - } + /* + * Note: If we are fetching not sorted results we can not have both + * currentRow and buffered rows. When connection is buffered currentRow + * is moved to buffer, and then it is cleaned after buffering is + * completed. Afterwards rows will be taken from the buffer bypassing + * currentRow until buffer is empty, and only after that data are read + * from a connection. + */ + if (list_length(combiner->rowBuffer) > 0) + { + RemoteDataRow dataRow = (RemoteDataRow) linitial(combiner->rowBuffer); + combiner->rowBuffer = list_delete_first(combiner->rowBuffer); + ExecStoreDataRowTuple(dataRow->msg, dataRow->msglen, + dataRow->msgnode, slot, true); + pfree(dataRow); + return true; + } - conn = combiner->connections[combiner->current_conn]; + while (combiner->conn_count > 0) + { + int res; + PGXCNodeHandle *conn = combiner->connections[combiner->current_conn]; /* Going to use a connection, buffer it if needed */ if (conn->state == DN_CONNECTION_STATE_QUERY && conn->combiner != NULL @@ -1116,7 +1121,7 @@ FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot) * connection clean */ if (have_tuple) - return have_tuple; + return true; else { if (pgxc_node_send_execute(conn, combiner->cursor, 1) != 0) @@ -1160,25 +1165,42 @@ FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot) else combiner->current_conn = 0; } - - /* If we have a tuple we can leave now. */ - if (have_tuple) + else if (res = RESPONSE_DATAROW && have_tuple) + { + /* + * We already have a tuple and received another one, leave it till + * next fetch + */ return true; + } + + /* If we have message in the buffer, consume it */ + if (combiner->currentRow.msg) + { + ExecStoreDataRowTuple(combiner->currentRow.msg, + combiner->currentRow.msglen, + combiner->currentRow.msgnode, slot, true); + combiner->currentRow.msg = NULL; + combiner->currentRow.msglen = 0; + combiner->currentRow.msgnode = 0; + have_tuple = true; + } + + /* + * If this is ordered fetch we can not know what is the node + * to handle next, so sorter will choose next itself and set it as + * currentRow to have it consumed on the next call to FetchTuple. + * Otherwise allow to prefetch next tuple + */ + if (((RemoteQuery *)combiner->ss.ps.plan)->sort) + return have_tuple; } - /* Wrap up last message if exists */ - if (combiner->currentRow.msg) - { - ExecStoreDataRowTuple(combiner->currentRow.msg, - combiner->currentRow.msglen, - combiner->currentRow.msgnode, slot, true); - combiner->currentRow.msg = NULL; - combiner->currentRow.msglen = 0; - combiner->currentRow.msgnode = 0; - return true; - } - /* otherwise report end of data to the caller */ - ExecClearTuple(slot); - return false; + + /* report end of data to the caller */ + if (!have_tuple) + ExecClearTuple(slot); + + return have_tuple; } @@ -2747,8 +2769,31 @@ ExecInitRemoteQuery(RemoteQuery *node, EState *estate, int eflags) /* We need expression context to evaluate */ if (node->exec_nodes && node->exec_nodes->expr) - ExecAssignExprContext(estate, &remotestate->ss.ps); + { + Expr *expr = node->exec_nodes->expr; + if (IsA(expr, Var) && ((Var *) expr)->vartype == TIDOID) + { + /* Special case if expression does not need to be evaluated */ + } + else + { + /* + * Inner plan provides parameter values and may be needed + * to determine target nodes. In this case expression is evaluated + * and we should made values available for evaluator. + * So allocate storage for the values. + */ + if (innerPlan(node)) + { + int nParams = list_length(node->scan.plan.targetlist); + estate->es_param_exec_vals = (ParamExecData *) palloc0( + nParams * sizeof(ParamExecData)); + } + /* prepare expression evaluation */ + ExecAssignExprContext(estate, &remotestate->ss.ps); + } + } if (innerPlan(node)) innerPlanState(remotestate) = ExecInitNode(innerPlan(node), estate, eflags); @@ -2838,27 +2883,70 @@ get_exec_connections(RemoteQueryState *planstate, { if (exec_nodes->expr) { - /* execution time determining of target data nodes */ - bool isnull; - ExprState *estate = ExecInitExpr(exec_nodes->expr, - (PlanState *) planstate); - Datum partvalue = ExecEvalExpr(estate, - planstate->ss.ps.ps_ExprContext, - &isnull, - NULL); - if (!isnull) + /* + * Special case (argh, another one): if expression data type is TID + * the ctid value is specific to the node from which it has been + * returned. + * So try and determine originating node and execute command on + * that node only + */ + if (IsA(exec_nodes->expr, Var) && ((Var *) exec_nodes->expr)->vartype == TIDOID) + { + Var *ctid = (Var *) exec_nodes->expr; + PlanState *source = (PlanState *) planstate; + TupleTableSlot *slot; + + /* Find originating RemoteQueryState */ + if (ctid->varno == INNER) + source = innerPlanState(source); + else if (ctid->varno == OUTER) + source = outerPlanState(source); + + while (!IsA(source, RemoteQueryState)) + { + TargetEntry *tle = list_nth(source->plan->targetlist, + ctid->varattno - 1); + Assert(IsA(tle->expr, Var)); + ctid = (Var *) tle->expr; + if (ctid->varno == INNER) + source = innerPlanState(source); + else if (ctid->varno == OUTER) + source = outerPlanState(source); + else + elog(ERROR, "failed to determine target node"); + } + + slot = source->ps_ResultTupleSlot; + /* The slot should be of type DataRow */ + Assert(!TupIsNull(slot) && slot->tts_dataRow); + + nodelist = list_make1_int(slot->tts_dataNode); + primarynode = NIL; + } + else { - RelationLocInfo *rel_loc_info = GetRelationLocInfo(exec_nodes->relid); - ExecNodes *nodes = GetRelationNodes(rel_loc_info, - (long *) &partvalue, - exec_nodes->accesstype); - if (nodes) + /* execution time determining of target data nodes */ + bool isnull; + ExprState *estate = ExecInitExpr(exec_nodes->expr, + (PlanState *) planstate); + Datum partvalue = ExecEvalExpr(estate, + planstate->ss.ps.ps_ExprContext, + &isnull, + NULL); + if (!isnull) { - nodelist = nodes->nodelist; - primarynode = nodes->primarynodelist; - pfree(nodes); + RelationLocInfo *rel_loc_info = GetRelationLocInfo(exec_nodes->relid); + ExecNodes *nodes = GetRelationNodes(rel_loc_info, + (long *) &partvalue, + exec_nodes->accesstype); + if (nodes) + { + nodelist = nodes->nodelist; + primarynode = nodes->primarynodelist; + pfree(nodes); + } + FreeRelationLocInfo(rel_loc_info); } - FreeRelationLocInfo(rel_loc_info); } } else { nodelist = exec_nodes->nodelist; @@ -3134,7 +3222,6 @@ do_query(RemoteQueryState *node) errmsg("Failed to send command to data nodes"))); } } - primaryconnection->combiner = node; Assert(node->combine_type == COMBINE_TYPE_SAME); /* Make sure the command is completed on the primary node */ @@ -3365,6 +3452,7 @@ TupleTableSlot * ExecRemoteQuery(RemoteQueryState *node) { RemoteQuery *step = (RemoteQuery *) node->ss.ps.plan; + EState *estate = node->ss.ps.state; TupleTableSlot *resultslot = node->ss.ps.ps_ResultTupleSlot; TupleTableSlot *scanslot = node->ss.ss_ScanTupleSlot; bool have_tuple = false; @@ -3386,11 +3474,31 @@ ExecRemoteQuery(RemoteQueryState *node) /* * Use data row returned by the previus step as a parameters for * the main query. - * Exit if no more slots. */ if (!TupIsNull(innerSlot)) + { step->paramval_len = ExecCopySlotDatarow(innerSlot, &step->paramval_data); + + /* Needed for expression evaluation */ + if (estate->es_param_exec_vals) + { + int i; + int natts = innerSlot->tts_tupleDescriptor->natts; + + slot_getallattrs(innerSlot); + for (i = 0; i < natts; i++) + estate->es_param_exec_vals[i].value = slot_getattr( + innerSlot, + i+1, + &estate->es_param_exec_vals[i].isnull); + } + } + else + { + /* no parameters, exit */ + return NULL; + } } do_query(node); @@ -3509,6 +3617,28 @@ handle_results: TupleTableSlot *innerSlot = ExecProcNode(innerPlanState(node)); if (!TupIsNull(innerSlot)) { + /* reset the counter */ + node->command_complete_count = 0; + /* + * Use data row returned by the previus step as a parameters for + * the main query. + */ + step->paramval_len = ExecCopySlotDatarow(innerSlot, + &step->paramval_data); + + /* Needed for expression evaluation */ + if (estate->es_param_exec_vals) + { + int i; + int natts = innerSlot->tts_tupleDescriptor->natts; + + slot_getallattrs(innerSlot); + for (i = 0; i < natts; i++) + estate->es_param_exec_vals[i].value = slot_getattr( + innerSlot, + i+1, + &estate->es_param_exec_vals[i].isnull); + } do_query(node); goto handle_results; } commit 8cf0de56a706ad0da72d3ea889844ae6c9e4a6fb Author: Michael P <mic...@us...> Date: Thu Jan 20 17:28:45 2011 +0900 Clean up in Materialize code Removes a PGXC added workaround when Materialize fetches all tuples from the subnode to keep connection clean. Now buffering handles this. diff --git a/src/backend/executor/nodeMaterial.c b/src/backend/executor/nodeMaterial.c index 2cd3298..446b400 100644 --- a/src/backend/executor/nodeMaterial.c +++ b/src/backend/executor/nodeMaterial.c @@ -24,9 +24,6 @@ #include "executor/executor.h" #include "executor/nodeMaterial.h" #include "miscadmin.h" -#ifdef PGXC -#include "pgxc/pgxc.h" -#endif /* ---------------------------------------------------------------- * ExecMaterial @@ -59,24 +56,9 @@ ExecMaterial(MaterialState *node) /* * If first time through, and we need a tuplestore, initialize it. */ -#ifdef PGXC - /* - * For PGXC, temporarily always create the storage. - * This allows us to easily use the same connection to - * in multiple steps of the plan. - */ - if ((IS_PGXC_COORDINATOR && tuplestorestate == NULL) - || (IS_PGXC_DATANODE && tuplestorestate == NULL && node->eflags != 0)) -#else if (tuplestorestate == NULL && node->eflags != 0) -#endif { tuplestorestate = tuplestore_begin_heap(true, false, work_mem); -#ifdef PGXC - if (IS_PGXC_COORDINATOR) - /* Note that we will rescan these results */ - node->eflags |= EXEC_FLAG_REWIND; -#endif tuplestore_set_eflags(tuplestorestate, node->eflags); if (node->eflags & EXEC_FLAG_MARK) { @@ -91,26 +73,6 @@ ExecMaterial(MaterialState *node) Assert(ptrno == 1); } node->tuplestorestate = tuplestorestate; - -#ifdef PGXC - if (IS_PGXC_COORDINATOR) - { - TupleTableSlot *outerslot; - PlanState *outerNode = outerPlanState(node); - - /* We want to always materialize first temporarily in PG-XC */ - while (!node->eof_underlying) - { - outerslot = ExecProcNode(outerNode); - if (TupIsNull(outerslot)) - node->eof_underlying = true; - else - /* Append a copy of the returned tuple to tuplestore. */ - tuplestore_puttupleslot(tuplestorestate, outerslot); - } - tuplestore_rescan(node->tuplestorestate); - } -#endif } /* ----------------------------------------------------------------------- Summary of changes: src/backend/access/common/heaptuple.c | 41 +++-- src/backend/executor/nodeMaterial.c | 38 ----- src/backend/pgxc/pool/execRemote.c | 288 ++++++++++++++++++++++++--------- 3 files changed, 236 insertions(+), 131 deletions(-) hooks/post-receive -- Postgres-XC |