You can subscribe to this list here.
2010 |
Jan
|
Feb
|
Mar
|
Apr
(4) |
May
(28) |
Jun
(12) |
Jul
(11) |
Aug
(12) |
Sep
(5) |
Oct
(19) |
Nov
(14) |
Dec
(12) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(18) |
Feb
(30) |
Mar
(115) |
Apr
(89) |
May
(50) |
Jun
(44) |
Jul
(22) |
Aug
(13) |
Sep
(11) |
Oct
(30) |
Nov
(28) |
Dec
(39) |
2012 |
Jan
(38) |
Feb
(18) |
Mar
(43) |
Apr
(91) |
May
(108) |
Jun
(46) |
Jul
(37) |
Aug
(44) |
Sep
(33) |
Oct
(29) |
Nov
(36) |
Dec
(15) |
2013 |
Jan
(35) |
Feb
(611) |
Mar
(5) |
Apr
(55) |
May
(30) |
Jun
(28) |
Jul
(458) |
Aug
(34) |
Sep
(9) |
Oct
(39) |
Nov
(22) |
Dec
(32) |
2014 |
Jan
(16) |
Feb
(16) |
Mar
(42) |
Apr
(179) |
May
(7) |
Jun
(6) |
Jul
(9) |
Aug
|
Sep
(4) |
Oct
|
Nov
(3) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
(2) |
May
(4) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Michael P. <mic...@us...> - 2011-02-02 07:45:17
|
Project "Postgres-XC". The branch, ha_support has been updated via 63810afccc1ae9ea8ee0831994106adb3087b795 (commit) from 0049fd0c80b376fb653dcffab154aa7daf0fa9a0 (commit) - Log ----------------------------------------------------------------- commit 63810afccc1ae9ea8ee0831994106adb3087b795 Author: Michael P <mic...@us...> Date: Wed Feb 2 16:45:58 2011 +0900 Addition of a stored function pgxc_is_committed This function can be used to look the status of a transaction depending on its XID number. This feature will be used for 2PC cleanup when a Coordinator crashes. For given ID, this function returns the following values: - true if transaction is committed - false if transaction is aborted - nothing if transaction is not known Examples: select pgxc_is_committed('1000'); execute direct on node 1 'pgxc_is_committed(''1000'')'; diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index 2a1eab4..b196ddf 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -24,6 +24,9 @@ #include "access/transam.h" #include "utils/snapmgr.h" +#ifdef PGXC +#include "utils/builtins.h" +#endif /* * Single-item cache for results of TransactionLogFetch. It's worth having @@ -41,6 +44,10 @@ static const XLogRecPtr InvalidXLogRecPtr = {0, 0}; /* Local functions */ static XidStatus TransactionLogFetch(TransactionId transactionId); +#ifdef PGXC +/* It is not really necessary to make it appear in header file */ +Datum pgxc_is_committed(PG_FUNCTION_ARGS); +#endif /* ---------------------------------------------------------------- * Postgres log access method interface @@ -97,6 +104,27 @@ TransactionLogFetch(TransactionId transactionId) return xidstatus; } +#ifdef PGXC +/* + * For given Transaction ID, check if transaction is committed or aborted + */ +Datum +pgxc_is_committed(PG_FUNCTION_ARGS) +{ + TransactionId tid = (TransactionId) PG_GETARG_UINT32(0); + XidStatus xidstatus; + + xidstatus = TransactionLogFetch(tid); + + if (xidstatus == TRANSACTION_STATUS_COMMITTED) + PG_RETURN_BOOL(true); + else if (xidstatus == TRANSACTION_STATUS_ABORTED) + PG_RETURN_BOOL(false); + else + PG_RETURN_NULL(); +} +#endif + /* ---------------------------------------------------------------- * Interface functions * diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index bfccbe5..d0dbc3f 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4715,7 +4715,8 @@ DATA(insert OID = 3113 ( last_value PGNSP PGUID 12 1 0 0 f t f t f i 1 0 2283 " DESCR("fetch the last row value"); DATA(insert OID = 3114 ( nth_value PGNSP PGUID 12 1 0 0 f t f t f i 2 0 2283 "2283 23" _null_ _null_ _null_ _null_ window_nth_value _null_ _null_ _null_ )); DESCR("fetch the Nth row value"); - +DATA(insert OID = 3115 ( pgxc_is_committed PGNSP PGUID 12 1 0 0 f f f t t s 1 0 16 "28" _null_ _null_ _null_ _null_ pgxc_is_committed _null_ _null_ _null_ )); +DESCR("is given GXID committed or aborted?"); /* * Symbolic values for provolatile column: these indicate whether the result diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 30cd971..26e1b2b 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -1057,4 +1057,9 @@ extern Datum pg_prepared_statement(PG_FUNCTION_ARGS); /* utils/mmgr/portalmem.c */ extern Datum pg_cursor(PG_FUNCTION_ARGS); +/* backend/access/transam/transam.c */ +#ifdef PGXC +extern Datum pgxc_is_committed(PG_FUNCTION_ARGS); +#endif + #endif /* BUILTINS_H */ ----------------------------------------------------------------------- Summary of changes: src/backend/access/transam/transam.c | 28 ++++++++++++++++++++++++++++ src/include/catalog/pg_proc.h | 3 ++- src/include/utils/builtins.h | 5 +++++ 3 files changed, 35 insertions(+), 1 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-02 05:21:16
|
Project "Postgres-XC". The branch, ha_support has been created at 0049fd0c80b376fb653dcffab154aa7daf0fa9a0 (commit) - Log ----------------------------------------------------------------- commit 0049fd0c80b376fb653dcffab154aa7daf0fa9a0 Author: Michael P <mic...@us...> Date: Wed Feb 2 14:16:29 2011 +0900 Extension of 2PC catalog tables Basics for 2PC cleanup in Postgres-XC cluster in case of node crash. Following parameters are added in the view pg_prepared_xacts: - isimplicit, Type of 2PC made: implicit or explicit - isddl, flag indicating if transaction prepared used DDL or not - coordnum, number of Coordinator from where 2PC has been issued - nodelist, list of Datanodes where PREPARE has been issued "nodelist" contains only the list of Datanodes prepared. For prepared transaction involving only Coordinators, nodelist is set to 'n'. Note: 2PC can be involved in a transaction COMMIT if multiple Datanodes have been involved in a write operation. This is a implicit 2PC. Explicit 2PC is the case of an application issuing PREPARE and COMMIT/ABORT PREPARED. diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index af15e79..04c9f00 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -61,7 +61,9 @@ #include "storage/smgr.h" #include "utils/builtins.h" #include "utils/memutils.h" - +#ifdef PGXC +#include "pgxc/execRemote.h" +#endif /* * Directory where Two-phase commit files reside within PGDATA @@ -120,6 +122,12 @@ typedef struct GlobalTransactionData Oid owner; /* ID of user that executed the xact */ TransactionId locking_xid; /* top-level XID of backend working on xact */ bool valid; /* TRUE if fully prepared */ +#ifdef PGXC + bool isimplicit; /* Type of 2PC made: implicit or explicit */ + bool isddl; /* Flag if DDL have been used (Check if Co also are prepared) */ + int32 coordnum; /* Number of Coordinator from where 2PC has been issued */ + char nodelist[NODELISTSIZE]; /* List of Datanodes where PREPARE has been issued */ +#endif char gid[GIDSIZE]; /* The GID assigned to the prepared xact */ } GlobalTransactionData; @@ -144,7 +152,6 @@ typedef struct TwoPhaseStateData static TwoPhaseStateData *TwoPhaseState; - static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, @@ -234,10 +241,27 @@ TwoPhaseShmemInit(void) * Internally, this creates a gxact struct and puts it into the active array. * NOTE: this is also used when reloading a gxact after a crash; so avoid * assuming that we can use very much backend context. + * + * This contains additional information necessary for a PGXC PREPARE: + * - the commit GXID (to be committed at GTM) + * - flag if transaction is a DDL (commit on Coordinators or not) + * - flag if transaction is implicit or explicit (information is on GTM for explicit) + * - list of Datanodes involved + * - Coordinator number from where PREPARE has been issued. */ +#ifdef PGXC +GlobalTransaction +MarkAsPreparing(TransactionId xid, + const char *gid, + TimestampTz prepared_at, + Oid owner, + Oid databaseid, + Remote2PCData *PGXC2PCData) +#else GlobalTransaction MarkAsPreparing(TransactionId xid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid) +#endif { GlobalTransaction gxact; int i; @@ -334,6 +358,13 @@ MarkAsPreparing(TransactionId xid, const char *gid, gxact->locking_xid = xid; gxact->valid = false; strcpy(gxact->gid, gid); +#ifdef PGXC + /* Add also the records associated to a PGXC 2PC */ + memcpy(gxact->nodelist, PGXC2PCData->nodelist, strlen(PGXC2PCData->nodelist)); + gxact->isimplicit = PGXC2PCData->isimplicit; + gxact->isddl = PGXC2PCData->isddl; + gxact->coordnum = PGXC2PCData->coordnum; +#endif /* And insert it into the active array */ Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts); @@ -606,7 +637,11 @@ pg_prepared_xact(PG_FUNCTION_ARGS) /* build tupdesc for result tuples */ /* this had better match pg_prepared_xacts view in system_views.sql */ +#ifdef PGXC + tupdesc = CreateTemplateTupleDesc(9, false); +#else tupde |
From: Michael P. <mic...@us...> - 2011-02-02 05:15:43
|
Project "Postgres-XC". The branch, ha_support has been deleted was 497b27848af35363b745dbce20d2b8cb806c43dc ----------------------------------------------------------------------- 497b27848af35363b745dbce20d2b8cb806c43dc Support for EXECUTE DIRECT ----------------------------------------------------------------------- hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-02 05:08:40
|
Project "Postgres-XC". The branch, master has been updated via a1b041e2656b64689cd9719cc5662984a4f5f5a4 (commit) from 10a271b9287eb59b1846f5334e2cb1b35e69b4d6 (commit) - Log ----------------------------------------------------------------- commit a1b041e2656b64689cd9719cc5662984a4f5f5a4 Author: Michael P <mic...@us...> Date: Wed Feb 2 14:13:02 2011 +0900 Support for EXECUTE DIRECT EXECUTE DIRECT is a utility query allowing to launch queries directly on targetted PGXC nodes. EXECUTE DIRECT ON (COORDINATOR num | NODE num) 'query'; This implementation contains the following use restrictions: - only a superuser is allowed to use it - DML queries (DELETE, INSERT, UPDATE) cannot be launched with it (easy to break data consistency) - utilities cannot be launched launched on local coordinator - utilities cannot be launched inside a transaction block (though SELECT queries in EXECUTE DIRECT keep the same visibility if used in a transaction block). - only one query can be launched at a time - query can be launched on a unique node This feature will be used to have a look at 2PC catalog data when nodes crash and to clean up 2PC transactions on targetted nodes. Ex: EXECUTE DIRECT ON NODE 1 'SELECT * from pg_prepared_xact()'; EXECUTE DIRECT ON COORDINATOR 2 'COMMIT PREPARED ''foo'''; diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 0e9aa43..cb7a1a8 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -264,8 +264,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * If we are on the coordinator, we always want to use * the remote query path unless it is a pg_catalog table. */ - if (IS_PGXC_COORDINATOR - && get_rel_namespace(rte->relid) != PG_CATALOG_NAMESPACE) + if (IS_PGXC_COORDINATOR && + !IsConnFromCoord() && + get_rel_namespace(rte->relid) != PG_CATALOG_NAMESPACE) add_path(rel, create_remotequery_path(root, rel)); else { diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 5b2e03f..40777bf 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -41,8 +41,10 @@ #include "rewrite/rewriteManip.h" #ifdef PGXC #include "pgxc/pgxc.h" +#include "access/gtm.h" #include "pgxc/planner.h" #include "tcop/tcopprot.h" +#include "pgxc/poolmgr.h" #endif #include "utils/rel.h" @@ -2066,9 +2068,160 @@ transformExplainStmt(ParseState *pstate, ExplainStmt *stmt) static Query * transformExecDirectStmt(ParseState *pstate, ExecDirectStmt *stmt) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Support for EXECUTE DIRECT is temporary broken"))); + Query *result = makeNode(Query); + bool is_coordinator = stmt->coordinator; + char *query = stmt->query; + List *nodelist = stmt->nodes; + ListCell *nodeitem; + RemoteQuery *step = makeNode(RemoteQuery); + bool is_local = false; + List *raw_parsetree_list; + ListCell *raw_parsetree_item; + + if (list_length(nodelist) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Support for EXECUTE DIRECT on multiple nodes is not available yet"))); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use EXECUTE DIRECT"))); + + /* Check if execute direct is local and if node number is correct*/ + foreach(nodeitem, nodelist) + { + int nodenum = intVal(lfirst(nodeitem)); + + if (nodenum < 1 || + (!is_coordinator && nodenum > NumDataNodes) || + (is_coordinator && nodenum > NumCoords)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Node Number %d is incorrect", nodenum))); + + if (nodenum == PGXCNodeId && is_coordinator) + is_local = true; + } + + /* Transform the query into a raw parse list */ + raw_parsetree_list = pg_parse_query(query); + + /* EXECUTE DIRECT can just be executed with a single query */ + if (list_length(raw_parsetree_list) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("EXECUTE DIRECT cannot execute multiple queries"))); + + /* + * Analyze the Raw parse tree + * EXECUTE DIRECT is restricted to one-step usage + */ + foreach(raw_parsetree_item, raw_parsetree_list) + { + Node *parsetree = (Node *) lfirst(raw_parsetree_item); + result = parse_analyze(parsetree, query, NULL, 0); + } + + /* Needed by planner */ + result->sql_statement = pstrdup(query); + + /* Default list of parameters to set */ + step->is_single_step = true; + step->sql_statement = NULL; + step->exec_nodes = NULL; + step->combine_type = COMBINE_TYPE_NONE; + step->simple_aggregates = NIL; + step->sort = NULL; + step->distinct = NULL; + step->read_only = true; + step->force_autocommit = false; + step->cursor = NULL; + step->exec_type = EXEC_ON_DATANODES; + step->paramval_data = NULL; + step->paramval_len = 0; + + step->relname = NULL; + step->remotejoin = false; + step->partitioned_replicated = false; + step->reduce_level = 0; + step->base_tlist = NIL; + step->outer_alias = NULL; + step->inner_alias = NULL; + step->outer_reduce_level = 0; + step->inner_reduce_level = 0; + step->outer_relids = NULL; + step->inner_relids = NULL; + step->inner_statement = NULL; + step->outer_statement = NULL; + step->join_condition = NULL; + + /* Change the list of nodes that will be executed for the query and others */ + step->exec_nodes = (ExecNodes *) palloc(sizeof(ExecNodes)); + step->exec_nodes->primarynodelist = NIL; + step->exec_nodes->nodelist = NIL; + step->exec_nodes->expr = NIL; + step->force_autocommit = false; + step->combine_type = COMBINE_TYPE_SAME; + step->read_only = true; + step->exec_direct_type = EXEC_DIRECT_NONE; + + /* Set up EXECUTE DIRECT flag */ + if (is_local) + { + if (result->commandType == CMD_UTILITY) + step->exec_direct_type = EXEC_DIRECT_LOCAL_UTILITY; + else + step->exec_direct_type = EXEC_DIRECT_LOCAL; + } + else + { + if (result->commandType == CMD_UTILITY) + step->exec_direct_type = EXEC_DIRECT_UTILITY; + else if (result->commandType == CMD_SELECT) + step->exec_direct_type = EXEC_DIRECT_SELECT; + else if (result->commandType == CMD_INSERT) + step->exec_direct_type = EXEC_DIRECT_INSERT; + else if (result->commandType == CMD_UPDATE) + step->exec_direct_type = EXEC_DIRECT_UPDATE; + else if (result->commandType == CMD_DELETE) + step->exec_direct_type = EXEC_DIRECT_DELETE; + } + + /* + * Features not yet supported + * DML can be launched without errors but this could compromise data + * consistency, so block it. + */ + if (step->exec_direct_type == EXEC_DIRECT_DELETE + || step->exec_direct_type == EXEC_DIRECT_UPDATE + || step->exec_direct_type == EXEC_DIRECT_INSERT) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("EXECUTE DIRECT cannot execute DML queries"))); + if (step->exec_direct_type == EXEC_DIRECT_LOCAL_UTILITY) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("EXECUTE DIRECT cannot execute locally utility queries"))); + + /* Build Execute Node list */ + foreach(nodeitem, nodelist) + { + int nodenum = intVal(lfirst(nodeitem)); + step->exec_nodes->nodelist = lappend_int(step->exec_nodes->nodelist, nodenum); + } + + step->sql_statement = pstrdup(query); + + if (is_coordinator) + step->exec_type = EXEC_ON_COORDS; + else + step->exec_type = EXEC_ON_DATANODES; + + /* Associate newly-created RemoteQuery node to the returned Query result */ + result->utilityStmt = (Node *) step; + + return result; } #endif diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 37b012b..b0d8c61 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -6534,16 +6534,17 @@ opt_analyze: /***************************************************************************** * * QUERY: - * EXECUTE DIRECT ON (COORDINATOR | NODE num, ...) query + * EXECUTE DIRECT ON (COORDINATOR num, ... | NODE num, ...) query * *****************************************************************************/ -ExecDirectStmt: EXECUTE DIRECT ON COORDINATOR DirectStmt +ExecDirectStmt: EXECUTE DIRECT ON COORDINATOR coord_list DirectStmt { ExecDirectStmt *n = makeNode(ExecDirectStmt); n->coordinator = TRUE; n->nodes = NIL; - n->query = $5; + n->nodes = $5; + n->query = $6; $$ = (Node *)n; } | EXECUTE DIRECT ON NODE data_node_list DirectStmt diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index fbfe733..c8e67a6 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -943,10 +943,22 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) } return false; } - // ??? current plan node is not a remote query - context->query_step->exec_nodes = makeNode(ExecNodes); - context->query_step->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_PGCATALOG; - context->exec_on_coord = true; + + /* Even with a catalog table EXECUTE direct in launched on dedicated nodes */ + if (context->query_step->exec_direct_type == EXEC_DIRECT_LOCAL + || context->query_step->exec_direct_type == EXEC_DIRECT_NONE + || context->query_step->exec_direct_type == EXEC_DIRECT_LOCAL_UTILITY) + { + context->query_step->exec_nodes = makeNode(ExecNodes); + context->query_step->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_PGCATALOG; + context->exec_on_coord = true; + } + else + { + context->query_step->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; + context->exec_on_coord = false; + } + return false; } @@ -1404,6 +1416,10 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) if (!query_node && !IsA(query_node,Query)) return true; + /* if EXECUTE DIRECT, just return */ + if (context->query_step->exec_direct_type != EXEC_DIRECT_NONE) + return false; + query = (Query *) query_node; /* If no tables, just return */ @@ -1574,7 +1590,7 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) } } - /* If we are just dealing with pg_catalog, just return */ + /* If we are just dealing with pg_catalog, just return. */ if (table_usage_type == TABLE_USAGE_TYPE_PGCATALOG) { context->query_step->exec_nodes = makeNode(ExecNodes); @@ -1816,6 +1832,7 @@ makeRemoteQuery(void) result->exec_type = EXEC_ON_DATANODES; result->paramval_data = NULL; result->paramval_len = 0; + result->exec_direct_type = EXEC_DIRECT_NONE; result->relname = NULL; result->remotejoin = false; @@ -1852,7 +1869,7 @@ get_plan_nodes(PlannerInfo *root, RemoteQuery *step, RelationAccessType accessTy context.rtables = lappend(context.rtables, query->rtable); if ((get_plan_nodes_walker((Node *) query, &context) - || context.exec_on_coord) && context.query_step->exec_nodes) + || context.exec_on_coord) && context.query_step->exec_nodes) { pfree(context.query_step->exec_nodes); context.query_step->exec_nodes = NULL; @@ -2748,13 +2765,33 @@ pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams) result->intoClause = query->intoClause; result->rtable = query->rtable; - query_step = makeRemoteQuery(); + /* EXECUTE DIRECT statements have their RemoteQuery node already built when analyzing */ + if (query->utilityStmt + && IsA(query->utilityStmt, RemoteQuery)) + { + RemoteQuery *stmt = (RemoteQuery *) query->utilityStmt; + if (stmt->exec_direct_type != EXEC_DIRECT_NONE) + { + query_step = stmt; + query->utilityStmt = NULL; + result->utilityStmt = NULL; + } + else + { + query_step = makeRemoteQuery(); + query_step->exec_nodes = query->execNodes; + } + } + else + { + query_step = makeRemoteQuery(); + query_step->exec_nodes = query->execNodes; + } - query_step->exec_nodes = query->execNodes; /* Optimize multi-node handling */ query_step->read_only = query->commandType == CMD_SELECT; - if (query->utilityStmt && + if (query->utilityStmt && IsA(query->utilityStmt, DeclareCursorStmt)) cursorOptions |= ((DeclareCursorStmt *) query->utilityStmt)->options; diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index c5610dc..1d41a5c 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -3024,12 +3024,19 @@ get_exec_connections(RemoteQueryState *planstate, FreeRelationLocInfo(rel_loc_info); } } - } else { - nodelist = exec_nodes->nodelist; + } + else + { + if (exec_type == EXEC_ON_DATANODES || exec_type == EXEC_ON_ALL_NODES) + nodelist = exec_nodes->nodelist; + else if (exec_type == EXEC_ON_COORDS) + coordlist = exec_nodes->nodelist; + primarynode = exec_nodes->primarynodelist; } } + /* Set node list and DN number */ if (list_length(nodelist) == 0 && (exec_type == EXEC_ON_ALL_NODES || exec_type == EXEC_ON_DATANODES)) @@ -3039,20 +3046,31 @@ get_exec_connections(RemoteQueryState *planstate, } else { - if (primarynode) - dn_conn_count = list_length(nodelist) + 1; + if (exec_type == EXEC_ON_DATANODES || exec_type == EXEC_ON_ALL_NODES) + { + if (primarynode) + dn_conn_count = list_length(nodelist) + 1; + else + dn_conn_count = list_length(nodelist); + } else - dn_conn_count = list_length(nodelist); + dn_conn_count = 0; } - if (exec_type == EXEC_ON_ALL_NODES || - exec_type == EXEC_ON_COORDS) + /* Set Coordinator list and coordinator number */ + if ((list_length(nodelist) == 0 && exec_type == EXEC_ON_ALL_NODES) || + (list_length(coordlist) == 0 && exec_type == EXEC_ON_COORDS)) { co_conn_count = NumCoords; coordlist = GetAllCoordNodes(); } else - co_conn_count = 0; + { + if (exec_type == EXEC_ON_COORDS) + co_conn_count = list_length(coordlist); + else + co_conn_count = 0; + } /* Get other connections (non-primary) */ pgxc_handles = get_handles(nodelist, coordlist, is_query_coord_only); @@ -3138,11 +3156,20 @@ do_query(RemoteQueryState *node) * are launched in ExecRemoteUtility */ pgxc_connections = get_exec_connections(node, step->exec_nodes, - EXEC_ON_DATANODES); + step->exec_type); + + if (step->exec_type == EXEC_ON_DATANODES) + { + connections = pgxc_connections->datanode_handles; + total_conn_count = regular_conn_count = pgxc_connections->dn_conn_count; + } + else if (step->exec_type == EXEC_ON_COORDS) + { + connections = pgxc_connections->coord_handles; + total_conn_count = regular_conn_count = pgxc_connections->co_conn_count; + } - connections = pgxc_connections->datanode_handles; primaryconnection = pgxc_connections->primary_handle; - total_conn_count = regular_conn_count = pgxc_connections->dn_conn_count; /* * Primary connection is counted separately but is included in total_conn_count if used. @@ -4011,6 +4038,7 @@ ExecRemoteUtility(RemoteQuery *node) int co_conn_count; int dn_conn_count; bool need_tran; + ExecDirectType exec_direct_type = node->exec_direct_type; int i; implicit_force_autocommit = force_autocommit; @@ -4020,7 +4048,15 @@ ExecRemoteUtility(RemoteQuery *node) pgxc_connections = get_exec_connections(NULL, node->exec_nodes, exec_type); dn_conn_count = pgxc_connections->dn_conn_count; - co_conn_count = pgxc_connections->co_conn_count; + + /* + * EXECUTE DIRECT can only be launched on a single node + * but we have to count local node also here. + */ + if (exec_direct_type != EXEC_DIRECT_NONE && exec_type == EXEC_ON_COORDS) + co_conn_count = 2; + else + co_conn_count = pgxc_connections->co_conn_count; /* Registering new connections needs the sum of Connections to Datanodes AND to Coordinators */ total_conn_count = dn_conn_count + co_conn_count; @@ -4033,6 +4069,17 @@ ExecRemoteUtility(RemoteQuery *node) else need_tran = !autocommit || total_conn_count > 1; + /* Commands launched through EXECUTE DIRECT do not need start a transaction */ + if (exec_direct_type == EXEC_DIRECT_UTILITY) + { + need_tran = false; + + /* This check is not done when analyzing to limit dependencies */ + if (IsTransactionBlock()) + ereport(ERROR, + (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("cannot run EXECUTE DIRECT with utility inside a transaction block"))); + } if (!is_read_only) { diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 93bf626..a070db1 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -23,6 +23,7 @@ #include "pg_trace.h" #ifdef PGXC #include "pgxc/pgxc.h" +#include "pgxc/planner.h" #include "pgxc/execRemote.h" #endif #include "tcop/pquery.h" @@ -293,12 +294,26 @@ ChoosePortalStrategy(List *stmts) #ifdef PGXC else if (IsA(stmt, RemoteQuery)) { + RemoteQuery *step = (RemoteQuery *) stmt; /* * Let's choose PORTAL_ONE_SELECT for now * After adding more PGXC functionality we may have more * sophisticated algorithm of determining portal strategy + * + * EXECUTE DIRECT is a utility but depending on its inner query + * it can return tuples or not depending on the query used. */ - return PORTAL_ONE_SELECT; + if (step->exec_direct_type == EXEC_DIRECT_SELECT + || step->exec_direct_type == EXEC_DIRECT_UPDATE + || step->exec_direct_type == EXEC_DIRECT_DELETE + || step->exec_direct_type == EXEC_DIRECT_INSERT + || step->exec_direct_type == EXEC_DIRECT_LOCAL) + return PORTAL_ONE_SELECT; + else if (step->exec_direct_type == EXEC_DIRECT_UTILITY + || step->exec_direct_type == EXEC_DIRECT_LOCAL_UTILITY) + return PORTAL_MULTI_QUERY; + else + return PORTAL_ONE_SELECT; } #endif else if (IsA(stmt, PlannedStmt)) diff --git a/src/include/pgxc/planner.h b/src/include/pgxc/planner.h index ad0f05d..872a2fc 100644 --- a/src/include/pgxc/planner.h +++ b/src/include/pgxc/planner.h @@ -68,6 +68,18 @@ typedef enum EXEC_ON_ALL_NODES } RemoteQueryExecType; +typedef enum +{ + EXEC_DIRECT_NONE, + EXEC_DIRECT_LOCAL, + EXEC_DIRECT_LOCAL_UTILITY, + EXEC_DIRECT_UTILITY, + EXEC_DIRECT_SELECT, + EXEC_DIRECT_INSERT, + EXEC_DIRECT_UPDATE, + EXEC_DIRECT_DELETE +} ExecDirectType; + /* * Contains instructions on processing a step of a query. * In the prototype this will be simple, but it will eventually @@ -77,6 +89,7 @@ typedef struct { Scan scan; bool is_single_step; /* special case, skip extra work */ + ExecDirectType exec_direct_type; /* track if remote query is execute direct and what type it is */ char *sql_statement; ExecNodes *exec_nodes; /* List of Datanodes where to launch query */ CombineType combine_type; ----------------------------------------------------------------------- Summary of changes: src/backend/optimizer/path/allpaths.c | 5 +- src/backend/parser/analyze.c | 159 ++++++++++++++++++++++++++++++++- src/backend/parser/gram.y | 7 +- src/backend/pgxc/plan/planner.c | 55 ++++++++++-- src/backend/pgxc/pool/execRemote.c | 71 ++++++++++++--- src/backend/tcop/pquery.c | 17 ++++- src/include/pgxc/planner.h | 13 +++ 7 files changed, 297 insertions(+), 30 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-28 02:02:24
|
Project "Postgres-XC". The branch, master has been updated via 10a271b9287eb59b1846f5334e2cb1b35e69b4d6 (commit) from ed71dbc99039efb837b76cb2f5c998230a7e0f3e (commit) - Log ----------------------------------------------------------------- commit 10a271b9287eb59b1846f5334e2cb1b35e69b4d6 Author: Michael P <mic...@us...> Date: Fri Jan 28 11:05:49 2011 +0900 maintenance for 2011, change header files A little bit late, but change the headers for 2011. diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 2a0f245..2640e45 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -25,7 +25,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 35c9d83..3832f82 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -21,7 +21,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 97f6c76..af15e79 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $PostgreSQL$ diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index d87abed..64acec8 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -4,7 +4,7 @@ * postgres OID & XID variables support routines * * Copyright (c) 2000-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $PostgreSQL$ diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index da9e3b1..0fa83fa 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -7,7 +7,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 19be784..8a5064d 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $PostgreSQL$ diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 5333cf0..d0c356b 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $PostgreSQL$ diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index a8f9d30..75637fc 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/catalog/pgxc_class.c b/src/backend/catalog/pgxc_class.c index bf54ca4..6b897c8 100644 --- a/src/backend/catalog/pgxc_class.c +++ b/src/backend/catalog/pgxc_class.c @@ -4,7 +4,7 @@ * routines to support manipulation of the pgxc_class relation * * Copyright (c) 1996-2010, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * *------------------------------------------------------------------------- diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index ed5061a..1b87bcb 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 7f98b4d..ffe8b63 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 94d820b..53f7c4d 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 86f0fa5..2277c66 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -10,7 +10,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 40cb4f5..13e47e9 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -13,7 +13,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $PostgreSQL$ diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index fc72cf6..32ac010 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -20,7 +20,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $PostgreSQL$ diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index c928bf8..e3b3b21 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0c76d7a..37b012b 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -8,7 +8,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 7a31dcb..580ed18 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -18,7 +18,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/backend/pgxc/Makefile b/src/backend/pgxc/Makefile index d978720..eecac20 100644 --- a/src/backend/pgxc/Makefile +++ b/src/backend/pgxc/Makefile @@ -2,7 +2,7 @@ # Makefile for the access methods module # # -# Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation # # $PostgreSQL$ # diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index 4442310..199293f 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -8,7 +8,7 @@ * PGXCTODO - do not use a single mappingTable for all * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index ff6ce81..fbfe733 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -5,7 +5,7 @@ * Functions for generating a PGXC style plan. * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/pgxc/pool/Makefile b/src/backend/pgxc/pool/Makefile index 8c2b66a..f0701c5 100644 --- a/src/backend/pgxc/pool/Makefile +++ b/src/backend/pgxc/pool/Makefile @@ -3,7 +3,7 @@ # Makefile-- # Makefile for pool # -# Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation # # IDENTIFICATION # $PostgreSQL$ diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index 5cd60b7..c5610dc 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -6,7 +6,7 @@ * * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $$ diff --git a/src/backend/pgxc/pool/pgxcnode.c b/src/backend/pgxc/pool/pgxcnode.c index 1e23489..fa4f384 100644 --- a/src/backend/pgxc/pool/pgxcnode.c +++ b/src/backend/pgxc/pool/pgxcnode.c @@ -7,7 +7,7 @@ * * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $$ diff --git a/src/backend/pgxc/pool/poolcomm.c b/src/backend/pgxc/pool/poolcomm.c index bdb6cb1..79a3776 100644 --- a/src/backend/pgxc/pool/poolcomm.c +++ b/src/backend/pgxc/pool/poolcomm.c @@ -6,7 +6,7 @@ * * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * *------------------------------------------------------------------------- */ diff --git a/src/backend/pgxc/pool/poolmgr.c b/src/backend/pgxc/pool/poolmgr.c index 50e38dc..fb9ca00 100644 --- a/src/backend/pgxc/pool/poolmgr.c +++ b/src/backend/pgxc/pool/poolmgr.c @@ -25,7 +25,7 @@ * * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $$ diff --git a/src/backend/pgxc/pool/poolutils.c b/src/backend/pgxc/pool/poolutils.c index e14c936..cccc9de 100644 --- a/src/backend/pgxc/pool/poolutils.c +++ b/src/backend/pgxc/pool/poolutils.c @@ -5,7 +5,7 @@ * Utilities for Postgres-XC pooler * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $$ diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index b36347d..07779ae 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -34,7 +34,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 69c25d1..0a4c3a3 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -28,7 +28,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 87ccde5..9615b52 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 05eab29..4b5fb65 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 9f8d0fc..d8833b3 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 684396c..a148265 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -7,7 +7,7 @@ * * * Copyright (c) 2000-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * Written by Peter Eisentraut <pe...@gm...>. * * IDENTIFICATION diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index b4dd50b..67aa3e5 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -40,7 +40,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * Portions taken from FreeBSD. * * $PostgreSQL$ diff --git a/src/bin/pg_ctl/pg_ctl.c b/src/bin/pg_ctl/pg_ctl.c index 17dd620..baef131 100644 --- a/src/bin/pg_ctl/pg_ctl.c +++ b/src/bin/pg_ctl/pg_ctl.c @@ -3,7 +3,7 @@ * pg_ctl --- start/stops/restarts the PostgreSQL server * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/bin/scripts/pgxc_ddl b/src/bin/scripts/pgxc_ddl index 2442595..658a3b3 100644 --- a/src/bin/scripts/pgxc_ddl +++ b/src/bin/scripts/pgxc_ddl @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation #Scripts to launch DDL in PGXC cluster using a cold_backup method #Be sure to have set a correct ssl environment in all the servers of the cluster diff --git a/src/gtm/Makefile b/src/gtm/Makefile index 5214936..2616cec 100644 --- a/src/gtm/Makefile +++ b/src/gtm/Makefile @@ -3,7 +3,7 @@ # Makefile for src/gtm # GTM and GTM proxy # -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation #------------------------------------------------------------------------- PGFILEDESC = "gtm - Global Transaction Manager for Postgres-XC" diff --git a/src/gtm/Makefile.global b/src/gtm/Makefile.global index f130bdb..bfc052f 100644 --- a/src/gtm/Makefile.global +++ b/src/gtm/Makefile.global @@ -2,7 +2,7 @@ ########################################################################## # # Meta configuration -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation .PHONY: all install install-strip installdirs uninstall clean distclean check installcheck .SILENT: installdirs diff --git a/src/gtm/Makefile.port b/src/gtm/Makefile.port index 611c8b7..e616574 100644 --- a/src/gtm/Makefile.port +++ b/src/gtm/Makefile.port @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation AROPT = crs export_dynamic = -Wl,-E diff --git a/src/gtm/Makefile.shlib b/src/gtm/Makefile.shlib index 83aca38..4f3470e 100644 --- a/src/gtm/Makefile.shlib +++ b/src/gtm/Makefile.shlib @@ -4,7 +4,7 @@ # Common rules for building shared libraries # # Copyright (c) 1998, Regents of the University of California -# Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation # # IDENTIFICATION # $PostgreSQL: pgsql/src/Makefile.shlib,v 1.119 2008/12/11 07:34:07 petere Exp $ diff --git a/src/gtm/client/Makefile b/src/gtm/client/Makefile index 216adf2..eff3f9b 100644 --- a/src/gtm/client/Makefile +++ b/src/gtm/client/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global diff --git a/src/gtm/client/fe-connect.c b/src/gtm/client/fe-connect.c index 5a2ad1d..fddad3f 100644 --- a/src/gtm/client/fe-connect.c +++ b/src/gtm/client/fe-connect.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/client/fe-misc.c b/src/gtm/client/fe-misc.c index 89bdd52..5c9e257 100644 --- a/src/gtm/client/fe-misc.c +++ b/src/gtm/client/fe-misc.c @@ -21,7 +21,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.137 2008/12/11 07:34:09 petere Exp $ diff --git a/src/gtm/client/fe-protocol.c b/src/gtm/client/fe-protocol.c index a70be31..a873b2a 100644 --- a/src/gtm/client/fe-protocol.c +++ b/src/gtm/client/fe-protocol.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/client/gtm_client.c b/src/gtm/client/gtm_client.c index 9501478..f5cafd9 100644 --- a/src/gtm/client/gtm_client.c +++ b/src/gtm/client/gtm_client.c @@ -4,7 +4,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/client/ip.c b/src/gtm/client/ip.c index b210e20..9d24391 100644 --- a/src/gtm/client/ip.c +++ b/src/gtm/client/ip.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/client/pqexpbuffer.c b/src/gtm/client/pqexpbuffer.c index 95c6ee0..2b852d6 100644 --- a/src/gtm/client/pqexpbuffer.c +++ b/src/gtm/client/pqexpbuffer.c @@ -16,7 +16,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/interfaces/libpq/pqexpbuffer.c,v 1.25 2008/11/26 00:26:23 tgl Exp $ * diff --git a/src/gtm/client/strlcpy.c b/src/gtm/client/strlcpy.c index ae031e2..28a3740 100644 --- a/src/gtm/client/strlcpy.c +++ b/src/gtm/client/strlcpy.c @@ -4,7 +4,7 @@ * strncpy done right * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/client/test/Makefile b/src/gtm/client/test/Makefile index 46ddbe9..f2fe3b7 100644 --- a/src/gtm/client/test/Makefile +++ b/src/gtm/client/test/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation top_build_dir=../../../ include $(top_build_dir)/gtm/Makefile.global diff --git a/src/gtm/client/test/test_proxy.sh b/src/gtm/client/test/test_proxy.sh index c0d3cae..5c68745 100644 --- a/src/gtm/client/test/test_proxy.sh +++ b/src/gtm/client/test/test_proxy.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation GTM_SERVER_HOSTNAME=gtm GTM_SERVER_PORT=16667 diff --git a/src/gtm/client/test/test_seq.c b/src/gtm/client/test/test_seq.c index ba1981e..b1a076d 100644 --- a/src/gtm/client/test/test_seq.c +++ b/src/gtm/client/test/test_seq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation */ #include <sys/types.h> #include <unistd.h> diff --git a/src/gtm/client/test/test_snap.c b/src/gtm/client/test/test_snap.c index 718ad3c..f4b60ff 100644 --- a/src/gtm/client/test/test_snap.c +++ b/src/gtm/client/test/test_snap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation */ #include <sys/types.h> #include <unistd.h> diff --git a/src/gtm/client/test/test_snapperf.c b/src/gtm/client/test/test_snapperf.c index 3218c2a..4f415f8 100644 --- a/src/gtm/client/test/test_snapperf.c +++ b/src/gtm/client/test/test_snapperf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation */ #include <sys/types.h> diff --git a/src/gtm/client/test/test_txn.c b/src/gtm/client/test/test_txn.c index 2c805de..f988923 100644 --- a/src/gtm/client/test/test_txn.c +++ b/src/gtm/client/test/test_txn.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation */ #include <sys/types.h> diff --git a/src/gtm/client/test/test_txnperf.c b/src/gtm/client/test/test_txnperf.c index 04b218e..816db34 100644 --- a/src/gtm/client/test/test_txnperf.c +++ b/src/gtm/client/test/test_txnperf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation */ #include <sys/types.h> #include <unistd.h> diff --git a/src/gtm/common/Makefile b/src/gtm/common/Makefile index 104382c..5ee1eb8 100644 --- a/src/gtm/common/Makefile +++ b/src/gtm/common/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global diff --git a/src/gtm/common/aset.c b/src/gtm/common/aset.c index aa95330..c95f9a1 100644 --- a/src/gtm/common/aset.c +++ b/src/gtm/common/aset.c @@ -9,7 +9,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/mmgr/aset.c,v 1.77 2008/04/11 22:54:23 tgl Exp $ diff --git a/src/gtm/common/assert.c b/src/gtm/common/assert.c index 58b9448..ea182d5 100644 --- a/src/gtm/common/assert.c +++ b/src/gtm/common/assert.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/common/elog.c b/src/gtm/common/elog.c index 65f28a2..cc6a9a5 100644 --- a/src/gtm/common/elog.c +++ b/src/gtm/common/elog.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/common/gtm_list.c b/src/gtm/common/gtm_list.c index 3ea2ce7..234483c 100644 --- a/src/gtm/common/gtm_list.c +++ b/src/gtm/common/gtm_list.c @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/common/gtm_lock.c b/src/gtm/common/gtm_lock.c index c919730..560ef9d 100644 --- a/src/gtm/common/gtm_lock.c +++ b/src/gtm/common/gtm_lock.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/common/mcxt.c b/src/gtm/common/mcxt.c index 9325ae3..1cb0079 100644 --- a/src/gtm/common/mcxt.c +++ b/src/gtm/common/mcxt.c @@ -11,7 +11,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/common/stringinfo.c b/src/gtm/common/stringinfo.c index 5023bd9..35e4cd8 100644 --- a/src/gtm/common/stringinfo.c +++ b/src/gtm/common/stringinfo.c @@ -8,7 +8,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/backend/lib/stringinfo.c,v 1.49 2008/01/01 19:45:49 momjian Exp $ * diff --git a/src/gtm/gtm_ctl/Makefile b/src/gtm/gtm_ctl/Makefile index eddcc9a..19cc20a 100644 --- a/src/gtm/gtm_ctl/Makefile +++ b/src/gtm/gtm_ctl/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global diff --git a/src/gtm/gtm_ctl/gtm_ctl.c b/src/gtm/gtm_ctl/gtm_ctl.c index 46d9364..58e1e16 100644 --- a/src/gtm/gtm_ctl/gtm_ctl.c +++ b/src/gtm/gtm_ctl/gtm_ctl.c @@ -3,7 +3,7 @@ * gtm_ctl --- start/stops/restarts the GTM server/proxy * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/gtm/libpq/Makefile b/src/gtm/libpq/Makefile index 9036ba8..b02dfbe 100644 --- a/src/gtm/libpq/Makefile +++ b/src/gtm/libpq/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global diff --git a/src/gtm/libpq/ip.c b/src/gtm/libpq/ip.c index 5611614..08929e9 100644 --- a/src/gtm/libpq/ip.c +++ b/src/gtm/libpq/ip.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/libpq/pqcomm.c b/src/gtm/libpq/pqcomm.c index e697a7f..d7c3081 100644 --- a/src/gtm/libpq/pqcomm.c +++ b/src/gtm/libpq/pqcomm.c @@ -29,7 +29,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/backend/libpq/pqcomm.c,v 1.198 2008/01/01 19:45:49 momjian Exp $ * diff --git a/src/gtm/libpq/pqformat.c b/src/gtm/libpq/pqformat.c index 41ef105..6d8e5f9 100644 --- a/src/gtm/libpq/pqformat.c +++ b/src/gtm/libpq/pqformat.c @@ -23,7 +23,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/backend/libpq/pqformat.c,v 1.48 2009/01/01 17:23:42 momjian Exp $ * diff --git a/src/gtm/libpq/pqsignal.c b/src/gtm/libpq/pqsignal.c index 6bff3d4..0c7914e 100644 --- a/src/gtm/libpq/pqsignal.c +++ b/src/gtm/libpq/pqsignal.c @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/libpq/strlcpy.c b/src/gtm/libpq/strlcpy.c index ae031e2..28a3740 100644 --- a/src/gtm/libpq/strlcpy.c +++ b/src/gtm/libpq/strlcpy.c @@ -4,7 +4,7 @@ * strncpy done right * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/main/Makefile b/src/gtm/main/Makefile index 3ed2c78..bcdaf89 100644 --- a/src/gtm/main/Makefile +++ b/src/gtm/main/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global diff --git a/src/gtm/main/gtm_seq.c b/src/gtm/main/gtm_seq.c index 390d0f6..359a959 100644 --- a/src/gtm/main/gtm_seq.c +++ b/src/gtm/main/gtm_seq.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/main/gtm_snap.c b/src/gtm/main/gtm_snap.c index 5c0125e..1308765 100644 --- a/src/gtm/main/gtm_snap.c +++ b/src/gtm/main/gtm_snap.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/main/gtm_stat.c b/src/gtm/main/gtm_stat.c index fac6b64..d9fa7aa 100644 --- a/src/gtm/main/gtm_stat.c +++ b/src/gtm/main/gtm_stat.c @@ -4,7 +4,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/main/gtm_stats.c b/src/gtm/main/gtm_stats.c index aba1a21..2548cd4 100644 --- a/src/gtm/main/gtm_stats.c +++ b/src/gtm/main/gtm_stats.c @@ -4,7 +4,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/main/gtm_thread.c b/src/gtm/main/gtm_thread.c index 61ea640..fd49e85 100644 --- a/src/gtm/main/gtm_thread.c +++ b/src/gtm/main/gtm_thread.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/main/gtm_time.c b/src/gtm/main/gtm_time.c index ea795af..1e1451e 100644 --- a/src/gtm/main/gtm_time.c +++ b/src/gtm/main/gtm_time.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/main/gtm_txn.c b/src/gtm/main/gtm_txn.c index 27f7372..ab5a09b 100644 --- a/src/gtm/main/gtm_txn.c +++ b/src/gtm/main/gtm_txn.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/main/main.c b/src/gtm/main/main.c index 8c1f4ca..ea6d5d2 100644 --- a/src/gtm/main/main.c +++ b/src/gtm/main/main.c @@ -4,7 +4,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/path/path.c b/src/gtm/path/path.c index ea0eb6d..11c6290 100644 --- a/src/gtm/path/path.c +++ b/src/gtm/path/path.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/proxy/Makefile b/src/gtm/proxy/Makefile index d2e6623..ebc97ea 100644 --- a/src/gtm/proxy/Makefile +++ b/src/gtm/proxy/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global diff --git a/src/gtm/proxy/proxy_main.c b/src/gtm/proxy/proxy_main.c index 4950e04..9a00bee 100644 --- a/src/gtm/proxy/proxy_main.c +++ b/src/gtm/proxy/proxy_main.c @@ -4,7 +4,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/proxy/proxy_thread.c b/src/gtm/proxy/proxy_thread.c index 844f2f7..4139936 100644 --- a/src/gtm/proxy/proxy_thread.c +++ b/src/gtm/proxy/proxy_thread.c @@ -4,7 +4,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/gtm/recovery/Makefile b/src/gtm/recovery/Makefile index 5092a56..3af7fd7 100644 --- a/src/gtm/recovery/Makefile +++ b/src/gtm/recovery/Makefile @@ -1,4 +1,4 @@ -# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation +# Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global diff --git a/src/gtm/recovery/register.c b/src/gtm/recovery/register.c index bb5cfc3..766072d 100644 --- a/src/gtm/recovery/register.c +++ b/src/gtm/recovery/register.c @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * * IDENTIFICATION diff --git a/src/include/access/transam.h b/src/include/access/transam.h index d7c7b7b..7210189 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 5bf6b6c..9b2be9f 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h index 8cf2839..468bccf 100644 --- a/src/include/bootstrap/bootstrap.h +++ b/src/include/bootstrap/bootstrap.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 74c6d15..e72b783 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index f3c5ab1..4e84562 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index bcb48d2..9bf8143 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -7,7 +7,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/assert.h b/src/include/gtm/assert.h index 5c71363..b46df46 100644 --- a/src/include/gtm/assert.h +++ b/src/include/gtm/assert.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/elog.h b/src/include/gtm/elog.h index 49c463f..115ea1f 100644 --- a/src/include/gtm/elog.h +++ b/src/include/gtm/elog.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/utils/elog.h,v 1.98 2009/01/01 17:24:02 momjian Exp $ * diff --git a/src/include/gtm/gtm.h b/src/include/gtm/gtm.h index 9075433..5041b0e 100644 --- a/src/include/gtm/gtm.h +++ b/src/include/gtm/gtm.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_c.h b/src/include/gtm/gtm_c.h index e1fc4bd..e243317 100644 --- a/src/include/gtm/gtm_c.h +++ b/src/include/gtm/gtm_c.h @@ -11,7 +11,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/c.h,v 1.234 2009/01/01 17:23:55 momjian Exp $ * diff --git a/src/include/gtm/gtm_client.h b/src/include/gtm/gtm_client.h index b8b6af0..0159212 100644 --- a/src/include/gtm/gtm_client.h +++ b/src/include/gtm/gtm_client.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_conn.h b/src/include/gtm/gtm_conn.h index 911a345..c000497 100644 --- a/src/include/gtm/gtm_conn.h +++ b/src/include/gtm/gtm_conn.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_ext.h b/src/include/gtm/gtm_ext.h index b492941..49b0253 100644 --- a/src/include/gtm/gtm_ext.h +++ b/src/include/gtm/gtm_ext.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_ip.h b/src/include/gtm/gtm_ip.h index 30da308..2f89418 100644 --- a/src/include/gtm/gtm_ip.h +++ b/src/include/gtm/gtm_ip.h @@ -7,7 +7,7 @@ * what you include here! * * Copyright (c) 2003-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/libpq/ip.h,v 1.20 2008/01/01 19:45:58 momjian Exp $ * diff --git a/src/include/gtm/gtm_list.h b/src/include/gtm/gtm_list.h index 6a5727f..c5dbd93 100644 --- a/src/include/gtm/gtm_list.h +++ b/src/include/gtm/gtm_list.h @@ -29,7 +29,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/nodes/pg_list.h,v 1.59 2008/08/14 18:48:00 tgl Exp $ * diff --git a/src/include/gtm/gtm_lock.h b/src/include/gtm/gtm_lock.h index f4a5e02..3da85c1 100644 --- a/src/include/gtm/gtm_lock.h +++ b/src/include/gtm/gtm_lock.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_msg.h b/src/include/gtm/gtm_msg.h index 16dfaac..aeee095 100644 --- a/src/include/gtm/gtm_msg.h +++ b/src/include/gtm/gtm_msg.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_proxy.h b/src/include/gtm/gtm_proxy.h index 4c55639..9b721e1 100644 --- a/src/include/gtm/gtm_proxy.h +++ b/src/include/gtm/gtm_proxy.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_seq.h b/src/include/gtm/gtm_seq.h index 7dc1e3e..6ac8092 100644 --- a/src/include/gtm/gtm_seq.h +++ b/src/include/gtm/gtm_seq.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_time.h b/src/include/gtm/gtm_time.h index b3d7005..90f36ea 100644 --- a/src/include/gtm/gtm_time.h +++ b/src/include/gtm/gtm_time.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/gtm_txn.h b/src/include/gtm/gtm_txn.h index be6852f..9b2fb83 100644 --- a/src/include/gtm/gtm_txn.h +++ b/src/include/gtm/gtm_txn.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/ip.h b/src/include/gtm/ip.h index c5d9752..fa4e3d3 100644 --- a/src/include/gtm/ip.h +++ b/src/include/gtm/ip.h @@ -7,7 +7,7 @@ * what you include here! * * Copyright (c) 2003-2009, PostgreSQL Global Development Group - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/libpq/ip.h,v 1.20 2008/01/01 19:45:58 momjian Exp $ * diff --git a/src/include/gtm/libpq-be.h b/src/include/gtm/libpq-be.h index a157a73..d5a5df5 100644 --- a/src/include/gtm/libpq-be.h +++ b/src/include/gtm/libpq-be.h @@ -10,7 +10,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/libpq/libpq-be.h,v 1.69 2009/01/01 17:23:59 momjian Exp $ * diff --git a/src/include/gtm/libpq-fe.h b/src/include/gtm/libpq-fe.h index 2c5c2c4..1ae16c1 100644 --- a/src/include/gtm/libpq-fe.h +++ b/src/include/gtm/libpq-fe.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/interfaces/libpq/libpq-fe.h,v 1.145 2009/01/01 17:24:03 momjian Exp $ * diff --git a/src/include/gtm/libpq-int.h b/src/include/gtm/libpq-int.h index 43fa3ea..5c6714b 100644 --- a/src/include/gtm/libpq-int.h +++ b/src/include/gtm/libpq-int.h @@ -11,7 +11,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/interfaces/libpq/libpq-int.h,v 1.139 2009/01/01 17:24:03 momjian Exp $ * diff --git a/src/include/gtm/libpq.h b/src/include/gtm/libpq.h index 29621a4..11c4dda 100644 --- a/src/include/gtm/libpq.h +++ b/src/include/gtm/libpq.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/libpq/libpq.h,v 1.70 2008/11/20 09:29:36 mha Exp $ * diff --git a/src/include/gtm/memnodes.h b/src/include/gtm/memnodes.h index dea51b2..9c55e46 100644 --- a/src/include/gtm/memnodes.h +++ b/src/include/gtm/memnodes.h @@ -6,7 +6,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/nodes/memnodes.h,v 1.34 2008/01/01 19:45:58 momjian Exp $ * diff --git a/src/include/gtm/memutils.h b/src/include/gtm/memutils.h index 5d89995..9666c44 100644 --- a/src/include/gtm/memutils.h +++ b/src/include/gtm/memutils.h @@ -9,7 +9,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/utils/memutils.h,v 1.64 2008/01/01 19:45:59 momjian Exp $ * diff --git a/src/include/gtm/palloc.h b/src/include/gtm/palloc.h index 380e280..e81e9d1 100644 --- a/src/include/gtm/palloc.h +++ b/src/include/gtm/palloc.h @@ -20,7 +20,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/utils/palloc.h,v 1.40 2008/06/28 16:45:22 tgl Exp $ * diff --git a/src/include/gtm/path.h b/src/include/gtm/path.h index 624fd18..e95ca5e 100644 --- a/src/include/gtm/path.h +++ b/src/include/gtm/path.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL$ * diff --git a/src/include/gtm/pqcomm.h b/src/include/gtm/pqcomm.h index cdae6ca..df655a5 100644 --- a/src/include/gtm/pqcomm.h +++ b/src/include/gtm/pqcomm.h @@ -8,7 +8,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/include/libpq/pqcomm.h,v 1.109 2008/10/28 12:10:44 mha Exp $ * diff --git a/src/include/gtm/pqexpbuffer.h b/src/include/gtm/pqexpbuffer.h index 7ae0411..039a481 100644 --- a/src/include/gtm/pqexpbuffer.h +++ b/src/include/gtm/pqexpbuffer.h @@ -17,7 +17,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation * * $PostgreSQL: pgsql/src/interfaces/libpq/pqexpbuffer.h,v 1.21 2008/11/26 16:23:11 tgl Exp $ * diff --git a/src/include/gtm/pqformat.h b/src/include/gtm/pqformat.h index 3febf2c..4a07367 100644 --- a/src/include/gtm/pqformat.h +++ b/src/include/gtm/pqformat.h @@ -5,7 +5,7 @@ * * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California - * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * Port... [truncated message content] |
From: Michael P. <mic...@us...> - 2011-01-28 01:24:36
|
Project "Postgres-XC". The branch, master has been updated via ed71dbc99039efb837b76cb2f5c998230a7e0f3e (commit) from 0230e78ec19bceb2ff5b34ea7a035cc2b0b7321a (commit) - Log ----------------------------------------------------------------- commit ed71dbc99039efb837b76cb2f5c998230a7e0f3e Author: Michael P <mic...@us...> Date: Fri Jan 28 10:21:14 2011 +0900 Fix for replicated tables using multi-INSERT queries. Comments have also been added on functions created for this support. A structure used to combine results has been moved from pquery.c to ExecRemote.c to limit dependencies. Patch by Benny Wang, with some editorialization from me diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 1cbf587..ff6ce81 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -488,23 +488,19 @@ get_plan_nodes_insert(PlannerInfo *root, RemoteQuery *step) /* Bad relation type */ return; - /* Get result relation info */ rel_loc_info = GetRelationLocInfo(rte->relid); if (!rel_loc_info) ereport(ERROR, (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("Could not find relation for oid = %d", rte->relid)))); + (errmsg("Could not find relation for oid = %d", rte->relid)))); - if (query->jointree != NULL && query->jointree->fromlist != NULL) + /* Optimization is only done for distributed tables */ + if (query->jointree != NULL + && query->jointree->fromlist != NULL + && rel_loc_info->locatorType == LOCATOR_TYPE_HASH) { - /* INSERT SELECT suspected */ - - /* We only optimize for when the destination is partitioned */ - if (rel_loc_info->locatorType != LOCATOR_TYPE_HASH) - return; - /* * See if it is "single-step" * Optimize for just known common case with 2 RTE entries @@ -546,7 +542,7 @@ get_plan_nodes_insert(PlannerInfo *root, RemoteQuery *step) if (rel_loc_info->locatorType == LOCATOR_TYPE_HASH && rel_loc_info->partAttrName != NULL) { - Expr *checkexpr; + Expr *checkexpr; TargetEntry *tle = NULL; /* It is a partitioned table, get value by looking in targetList */ @@ -602,7 +598,7 @@ get_plan_nodes_insert(PlannerInfo *root, RemoteQuery *step) if (!source_rel_loc_info) ereport(ERROR, (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("Could not find relation for oid = %d", rte->relid)))); + (errmsg("Could not find relation for oid = %d", rte->relid)))); if (source_rel_loc_info->locatorType == LOCATOR_TYPE_HASH && strcmp(col_base->colname, source_rel_loc_info->partAttrName) == 0) @@ -3247,16 +3243,26 @@ is_pgxc_safe_func(Oid funcid) return ret_val; } -/* code is borrowed from get_plan_nodes_insert */ +/* + * GetHashExecNodes - + * Get hash key of execution nodes according to the expression value + * + * Input parameters: + * rel_loc_info is a locator function. It contains distribution information. + * exec_nodes is the list of nodes to be executed + * expr is the partition column value + * + * code is borrowed from get_plan_nodes_insert + */ void GetHashExecNodes(RelationLocInfo *rel_loc_info, ExecNodes **exec_nodes, const Expr *expr) { /* We may have a cast, try and handle it */ Expr *checkexpr; Expr *eval_expr = NULL; - Const *constant; - long part_value; - long *part_value_ptr = NULL; + Const *constant; + long part_value; + long *part_value_ptr = NULL; eval_expr = (Expr *) eval_const_expressions(NULL, (Node *)expr); checkexpr = get_numeric_constant(eval_expr); @@ -3276,7 +3282,7 @@ GetHashExecNodes(RelationLocInfo *rel_loc_info, ExecNodes **exec_nodes, const Ex /* single call handles both replicated and partitioned types */ *exec_nodes = GetRelationNodes(rel_loc_info, part_value_ptr, - RELATION_ACCESS_INSERT); + RELATION_ACCESS_INSERT); if (eval_expr) pfree(eval_expr); diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index 7621c15..5cd60b7 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -815,9 +815,18 @@ HandleError(RemoteQueryState *combiner, char *msg_body, size_t len) combiner->command_complete_count++; } -/* combine deparsed sql statements execution results */ +/* + * HandleCmdComplete - + * combine deparsed sql statements execution results + * + * Input parameters: + * commandType is dml command type + * combineTag is used to combine the completion result + * msg_body is execution result needed to combine + * len is msg_body size + */ void -HandleCmdComplete(CmdType commandType, combineTag *combine, +HandleCmdComplete(CmdType commandType, CombineTag *combine, const char *msg_body, size_t len) { int digits = 0; diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index ca3ec53..d9fc689 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -2156,12 +2156,18 @@ QueryRewrite(Query *parsetree) } #ifdef PGXC +/* + * Part of handling INSERT queries with multiple values + * + * GetRelPartColPos - + * Get the partition column position in targetList + */ static int GetRelPartColPos(const Query *query, const char *partColName) { ListCell *lc; int rescol = -1; - + foreach(lc, query->targetList) { TargetEntry *tle = (TargetEntry *) lfirst(lc); @@ -2187,18 +2193,40 @@ GetRelPartColPos(const Query *query, const char *partColName) return rescol; } +/* + * Part of handling INSERT queries with multiple values + * + * ProcessHashValue - + * associates the inserted row to the specified datanode + * + * Input parameters: + * subList is the inserted row + * node is the node number + */ static void ProcessHashValue(List **valuesList, const List *subList, const int node) { valuesList[node - 1] = lappend(valuesList[node - 1], subList); } +/* + * Part of handling INSERT queries with multiple values + * + * InitValuesList - + * Allocate and initialize the list of values + */ static void InitValuesList(List **valuesList[], int size) { *valuesList = palloc0(size * sizeof(List *)); } +/* + * Part of handling INSERT queries with multiple values + * + * InitValuesList - + * free all the list of values + */ static void DestroyValuesList(List **valuesList[]) { @@ -2206,9 +2234,20 @@ DestroyValuesList(List **valuesList[]) *valuesList = NIL; } +/* + * Part of handling INSERT queries with multiple values + * + * ProcessRobinValue - + * assign insert values list to each node averagely + * + * Input parameters: + * valuesList is an array of lists used to assign value list to specified nodes + * size is number of assigned nodes + * values_rte is the values list + */ static void ProcessRobinValue(Oid relid, List **valuesList, - int size, const RangeTblEntry *values_rte) + int size, const RangeTblEntry *values_rte) { List *values = values_rte->values_lists; int length = values->length; @@ -2217,9 +2256,9 @@ ProcessRobinValue(Oid relid, List **valuesList, int processNum = 0; int node; - /* get average insert value number of each node */ - if (length > NumDataNodes) - dist = length/NumDataNodes; + /* Get average insert value number of each node */ + if (length > size) + dist = length/size; else dist = 1; @@ -2227,7 +2266,7 @@ ProcessRobinValue(Oid relid, List **valuesList, { node = GetRoundRobinNode(relid); - /* assign insert value */ + /* Assign insert value */ for(j = 0; j < dist; j++) { processNum += 1; @@ -2235,17 +2274,30 @@ ProcessRobinValue(Oid relid, List **valuesList, list_nth(values, processNum - 1)); } } - - /* assign remained value */ + + /* Assign remained value */ while(processNum < length) { processNum += 1; node = GetRoundRobinNode(relid); valuesList[node - 1] = lappend(valuesList[node - 1], - list_nth(values, processNum - 1)); + list_nth(values, processNum - 1)); } } +/* + * Part of handling INSERT queries with multiple values + * + * RewriteInsertStmt - + * Rewrite INSERT statement. + * Split the INSERT statement with mutiple values into mutiple insert statements + * according to its distribution key. Distribution rule is as follows: + * 1.LOCATOR_TYPE_HASH: associates correct node with its distribution key + * 2.LOCATOR_TYPE_RROBIN: assign value lists to each datanodes averagely + * 3.DEFAULT: no need to process (replicate case) + * + * values_rte is the values list range table. + */ static List * RewriteInsertStmt(Query *query, RangeTblEntry *values_rte) { @@ -2260,7 +2312,7 @@ RewriteInsertStmt(Query *query, RangeTblEntry *values_rte) char *partColName; List **valuesList; int i; - + rte = (RangeTblEntry *) list_nth(query->rtable, query->resultRelation - 1); rte_loc_info = GetRelationLocInfo(rte->relid); locatorType = rte_loc_info->locatorType; @@ -2284,44 +2336,45 @@ RewriteInsertStmt(Query *query, RangeTblEntry *values_rte) foreach(values_lc, values_rte->values_lists) { List *sublist = (List *)lfirst(values_lc); - + if (first) { + /* Get the partition column number in the targetList */ partColno = GetRelPartColPos(query, partColName); first = false; } - /* get the exec node according to partition column value */ + /* Get the exec node according to partition column value */ GetHashExecNodes(rte_loc_info, &exec_nodes, (Expr *)list_nth(sublist, partColno)); Assert(exec_nodes->nodelist->length == 1); - /* assign valueList to specified exec node */ + /* Assign valueList to specified execution node */ ProcessHashValue(valuesList, sublist, list_nth_int(exec_nodes->nodelist, 0)); } } - + goto collect; case LOCATOR_TYPE_RROBIN: - + InitValuesList(&valuesList, NumDataNodes); - /* assign valueList to specified exec node */ + /* Assign valueList to specified execution node */ ProcessRobinValue(rte->relid, valuesList, NumDataNodes, values_rte); -collect: - /* produce query for relative datanodes */ - for(i = 0; i < NumDataNodes; i++) +collect: + /* Produce query for relative Datanodes */ + for (i = 0; i < NumDataNodes; i++) { if (valuesList[i] != NIL) { ExecNodes *execNodes = makeNode(ExecNodes); execNodes->baselocatortype = rte_loc_info->locatorType; execNodes->nodelist = lappend_int(execNodes->nodelist, i + 1); - + element = copyObject(query); - + rte = (RangeTblEntry *)list_nth(element->rtable, rtr->rtindex - 1); rte->values_lists = valuesList[i]; @@ -2335,16 +2388,15 @@ collect: rwInsertList = lappend(rwInsertList, element); } } - + DestroyValuesList(&valuesList); break; - + default: /* distribute by replication: just do it as usual */ rwInsertList = lappend(rwInsertList, query); break; } - return rwInsertList; } #endif diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 247ca83..93bf626 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -1241,7 +1241,7 @@ PortalRunMulti(Portal portal, bool isTopLevel, { ListCell *stmtlist_item; #ifdef PGXC - combineTag combine; + CombineTag combine; combine.cmdType = CMD_UNKNOWN; combine.data[0] = '\0'; diff --git a/src/include/pgxc/execRemote.h b/src/include/pgxc/execRemote.h index 8f6c10a..d50b415 100644 --- a/src/include/pgxc/execRemote.h +++ b/src/include/pgxc/execRemote.h @@ -46,6 +46,12 @@ typedef enum REQUEST_TYPE_COPY_OUT /* Copy Out response */ } RequestType; +/* Combines results of INSERT statements using multiple values */ +typedef struct CombineTag +{ + CmdType cmdType; /* DML command type */ + char data[COMPLETION_TAG_BUFSIZE]; /* execution result combination data */ +} CombineTag; /* * Represents a DataRow message received from a remote node. @@ -142,7 +148,7 @@ extern void ExecRemoteUtility(RemoteQuery *node); extern int handle_response(PGXCNodeHandle * conn, RemoteQueryState *combiner); #ifdef PGXC -extern void HandleCmdComplete(CmdType commandType, combineTag *combine, const char *msg_body, +extern void HandleCmdComplete(CmdType commandType, CombineTag *combine, const char *msg_body, size_t len); #endif extern bool FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot); diff --git a/src/include/tcop/pquery.h b/src/include/tcop/pquery.h index e947989..4919dc5 100644 --- a/src/include/tcop/pquery.h +++ b/src/include/tcop/pquery.h @@ -17,13 +17,6 @@ #include "nodes/parsenodes.h" #include "utils/portal.h" -#ifdef PGXC -typedef struct combineTag -{ - CmdType cmdType; - char data[COMPLETION_TAG_BUFSIZE]; -} combineTag; -#endif extern PGDLLIMPORT Portal ActivePortal; ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/plan/planner.c | 38 ++++++++------ src/backend/pgxc/pool/execRemote.c | 13 ++++- src/backend/rewrite/rewriteHandler.c | 100 ++++++++++++++++++++++++++-------- src/backend/tcop/pquery.c | 2 +- src/include/pgxc/execRemote.h | 8 +++- src/include/tcop/pquery.h | 7 --- 6 files changed, 117 insertions(+), 51 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-27 06:44:01
|
Project "Postgres-XC". The branch, master has been updated via 0230e78ec19bceb2ff5b34ea7a035cc2b0b7321a (commit) from f83cf55da8f200a4f89395737245aaddb37c88e0 (commit) - Log ----------------------------------------------------------------- commit 0230e78ec19bceb2ff5b34ea7a035cc2b0b7321a Author: Michael P <mic...@us...> Date: Thu Jan 27 15:40:34 2011 +0900 Fix for bug 3147497 INSERT.. DEFAULT VALUES This fix is linked to tables having no default values defined. When doing an INSERT.. DEFAULT VALUES, XC was returning an error at planner level when deparsing the query. create table aa(a int); INSERT INTO aa DEFAULT VALUES ; XC result: ERROR: syntax error at or near ")" pg_regress results: INSERT 0 1 In this fix, when an INSERT using DEFAULT VALUES is made, query is directly returned. Patch written by sch19831106 with some editorialization by me. diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 507c643..1551b9c 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -55,7 +55,9 @@ #include "utils/syscache.h" #include "utils/typcache.h" #include "utils/xml.h" - +#ifdef PGXC +#include "pgxc/pgxc.h" +#endif /* ---------- * Pretty formatting constants @@ -3223,6 +3225,18 @@ get_insert_query_def(Query *query, deparse_context *context) ListCell *l; List *strippedexprs; +#ifdef PGXC + /* + * In the case of "INSERT ... DEFAULT VALUES" analyzed in pgxc planner, + * return the sql statement directly if the table has no default values. + */ + if (IS_PGXC_COORDINATOR && !IsConnFromCoord() && !query->targetList) + { + appendStringInfo(buf, "%s", query->sql_statement); + return; + } +#endif + /* * If it's an INSERT ... SELECT or VALUES (...), (...), ... there will be * a single RTE for the SELECT or VALUES. ----------------------------------------------------------------------- Summary of changes: src/backend/utils/adt/ruleutils.c | 16 +++++++++++++++- 1 files changed, 15 insertions(+), 1 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-27 05:49:13
|
Project "Postgres-XC". The branch, master has been updated via f83cf55da8f200a4f89395737245aaddb37c88e0 (commit) from fe557aa50bcd7401a1f5cf9acec4285904bb4d24 (commit) - Log ----------------------------------------------------------------- commit f83cf55da8f200a4f89395737245aaddb37c88e0 Author: Michael P <mic...@us...> Date: Thu Jan 27 14:50:40 2011 +0900 Fix for make -j This fixes dependency problems when invocating j option during a make. Patch written by Wang Diancheng diff --git a/src/Makefile b/src/Makefile index 9028d85..7fbbcb3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -18,11 +18,11 @@ all install installdirs uninstall distprep: $(MAKE) -C timezone $@ # GTM should be built before backend because of dependancy $(MAKE) -C gtm $@ + $(MAKE) -C interfaces $@ $(MAKE) -C backend $@ $(MAKE) -C backend/utils/mb/conversion_procs $@ $(MAKE) -C backend/snowball $@ $(MAKE) -C include $@ - $(MAKE) -C interfaces $@ $(MAKE) -C bin $@ $(MAKE) -C pl $@ $(MAKE) -C makefiles $@ diff --git a/src/include/Makefile b/src/include/Makefile index ea0cb1b..153166e 100644 --- a/src/include/Makefile +++ b/src/include/Makefile @@ -18,7 +18,7 @@ all: pg_config.h pg_config_os.h # Subdirectories containing headers for server-side dev SUBDIRS = access bootstrap catalog commands executor foreign lib libpq mb \ - nodes optimizer parser postmaster regex rewrite storage tcop \ + nodes optimizer parser pgxc postmaster regex rewrite storage tcop \ snowball snowball/libstemmer tsearch tsearch/dicts utils \ port port/win32 port/win32_msvc port/win32_msvc/sys \ port/win32/arpa port/win32/netinet port/win32/sys \ ----------------------------------------------------------------------- Summary of changes: src/Makefile | 2 +- src/include/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-26 07:07:23
|
Project "Postgres-XC". The branch, master has been updated via fe557aa50bcd7401a1f5cf9acec4285904bb4d24 (commit) from c19b46835e586023fbaaf93c3c84e4898f4fe6bd (commit) - Log ----------------------------------------------------------------- commit fe557aa50bcd7401a1f5cf9acec4285904bb4d24 Author: Michael P <mic...@us...> Date: Wed Jan 26 16:01:00 2011 +0900 The patch implements multiple insert syntax in PGXC. Multiple insert means using a single insert statement to insert multiple rows into a table using the syntax e.g. insert into students(rno, class, pos) values (1, 10, 5), (2, 10, 6), (3, 10, 7), (4, 10, 8); Without the patch statements like these pass, but actually do not insert any thing in the table. The main code changes are in re-writer. The patch checks to see if the insert statement has more than one sets in the provided list of values (FOUR in the above example), and in that case rewrites the insert statement. The insert rewriter separates the sets in the provided list of values into independent lists depending on the distribution of the table, the distribution column and the value provided for the distribution column. Next the main re-writer is separated into two possible paths, one without a for loop and if we have a separated list of insert values, we run a for loop on the list and create an insert statement for each of the data nodes providing it that sub-group of the original list that is supposed to run on this particular data node. Main work is done now, all that is left is to handle multiple command result tags from the data nodes. HandleCmdComplete does this, it simply keeps adding into the insert row count until all data nodes are done. With this patch, multi insert does not work for replicated tables. Additional comments are also necessary. diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index ace4635..1cbf587 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -2754,6 +2754,7 @@ pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams) query_step = makeRemoteQuery(); + query_step->exec_nodes = query->execNodes; /* Optimize multi-node handling */ query_step->read_only = query->commandType == CMD_SELECT; @@ -2785,7 +2786,8 @@ pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams) if (query->commandType != CMD_SELECT) result->resultRelations = list_make1_int(query->resultRelation); - get_plan_nodes_command(query_step, root); + if (query_step->exec_nodes == NULL) + get_plan_nodes_command(query_step, root); /* standard planner handles correlated UPDATE or DELETE */ if ((query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) @@ -3244,3 +3246,39 @@ is_pgxc_safe_func(Oid funcid) ReleaseSysCache(tp); return ret_val; } + +/* code is borrowed from get_plan_nodes_insert */ +void +GetHashExecNodes(RelationLocInfo *rel_loc_info, ExecNodes **exec_nodes, const Expr *expr) +{ + /* We may have a cast, try and handle it */ + Expr *checkexpr; + Expr *eval_expr = NULL; + Const *constant; + long part_value; + long *part_value_ptr = NULL; + + eval_expr = (Expr *) eval_const_expressions(NULL, (Node *)expr); + checkexpr = get_numeric_constant(eval_expr); + + if (checkexpr == NULL) + return; + + constant = (Const *) checkexpr; + + if (constant->consttype == INT4OID || + constant->consttype == INT2OID || + constant->consttype == INT8OID) + { + part_value = (long) constant->constvalue; + part_value_ptr = &part_value; + } + + /* single call handles both replicated and partitioned types */ + *exec_nodes = GetRelationNodes(rel_loc_info, part_value_ptr, + RELATION_ACCESS_INSERT); + if (eval_expr) + pfree(eval_expr); + +} + diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index 3803fa5..7621c15 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -815,6 +815,73 @@ HandleError(RemoteQueryState *combiner, char *msg_body, size_t len) combiner->command_complete_count++; } +/* combine deparsed sql statements execution results */ +void +HandleCmdComplete(CmdType commandType, combineTag *combine, + const char *msg_body, size_t len) +{ + int digits = 0; + uint64 originrowcount = 0; + uint64 rowcount = 0; + uint64 total = 0; + + if (msg_body == NULL) + return; + + /* if there's nothing in combine, just copy the msg_body */ + if (strlen(combine->data) == 0) + { + strcpy(combine->data, msg_body); + combine->cmdType = commandType; + return; + } + else + { + /* commandType is conflict */ + if (combine->cmdType != commandType) + return; + + /* get the processed row number from msg_body */ + digits = parse_row_count(msg_body, len + 1, &rowcount); + elog(DEBUG1, "digits is %d\n", digits); + Assert(digits >= 0); + + /* no need to combine */ + if (digits == 0) + return; + + /* combine the processed row number */ + parse_row_count(combine->data, strlen(combine->data) + 1, &originrowcount); + elog(DEBUG1, "originrowcount is %lu, rowcount is %lu\n", originrowcount, rowcount); + total = originrowcount + rowcount; + + } + + /* output command completion tag */ + switch (commandType) + { + case CMD_SELECT: + strcpy(combine->data, "SELECT"); + break; + case CMD_INSERT: + snprintf(combine->data, COMPLETION_TAG_BUFSIZE, + "INSERT %u %lu", 0, total); + break; + case CMD_UPDATE: + snprintf(combine->data, COMPLETION_TAG_BUFSIZE, + "UPDATE %lu", total); + break; + case CMD_DELETE: + snprintf(combine->data, COMPLETION_TAG_BUFSIZE, + "DELETE %lu", total); + break; + default: + strcpy(combine->data, ""); + break; + } + +} + /* * Examine the specified combiner state and determine if command was completed * successfully diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 4fa533f..ca3ec53 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -27,6 +27,11 @@ #include "utils/lsyscache.h" #include "commands/trigger.h" +#ifdef PGXC +#include "pgxc/pgxc.h" +#include "pgxc/poolmgr.h" +#endif + /* We use a list of these to detect recursion in RewriteQuery */ typedef struct rewrite_event @@ -56,7 +61,15 @@ static void markQueryForLocking(Query *qry, Node *jtnode, static List *matchLocks(CmdType event, RuleLock *rulelocks, int varno, Query *parsetree); static Query *fireRIRrules(Query *parsetree, List *activeRIRs); - +#ifdef PGXC +static int GetRelPartColPos(const Query *query, const char *partColName); +static void ProcessHashValue(List **valuesList, const List *subList, const int node); +static void InitValuesList(List **valuesList[], int size); +static void DestroyValuesList(List **valuesList[]); +static void ProcessRobinValue(Oid relid, List **valuesList, + int size, const RangeTblEntry *values_rte); +static List *RewriteInsertStmt(Query *parsetree, RangeTblEntry *values_rte); +#endif /* * AcquireRewriteLocks - @@ -1619,6 +1632,12 @@ RewriteQuery(Query *parsetree, List *rewrite_events) Query *qual_product = NULL; List *rewritten = NIL; +#ifdef PGXC + List *parsetree_list = NIL; + List *qual_product_list = NIL; + ListCell *pt_cell = NULL; +#endif + /* * If the statement is an update, insert or delete - fire rules on it. * @@ -1681,6 +1700,11 @@ RewriteQuery(Query *parsetree, List *rewrite_events) rewriteTargetList(parsetree, rt_entry_relation, &attrnos); /* ... and the VALUES expression lists */ rewriteValuesRTE(values_rte, rt_entry_relation, attrnos); +#ifdef PGXC + if (IS_PGXC_COORDINATOR && + list_length(values_rte->values_lists) > 1) + parsetree_list = RewriteInsertStmt(parsetree, values_rte); +#endif } else { @@ -1688,7 +1712,11 @@ RewriteQuery(Query *parsetree, List *rewrite_events) rewriteTargetList(parsetree, rt_entry_relation, NULL); } } - + +#ifdef PGXC + if (parsetree_list == NIL) + { +#endif /* * Collect and apply the appropriate rules. */ @@ -1787,8 +1815,142 @@ RewriteQuery(Query *parsetree, List *rewrite_events) } heap_close(rt_entry_relation, NoLock); - } +#ifdef PGXC + } + else + { + foreach(pt_cell, parsetree_list) + { + Query *query; + + query = (Query *)lfirst(pt_cell); + + locks = matchLocks(event, rt_entry_relation->rd_rules, + result_relation, query); + + /* + * Collect and apply the appropriate rules. + */ + locks = matchLocks(event, rt_entry_relation->rd_rules, + result_relation, parsetree); + if (locks != NIL) + { + List *product_queries; + + + product_queries = fireRules(query, + result_relation, + event, + locks, + &instead, + &returning, + &qual_product); + + qual_product_list = lappend(qual_product_list, qual_product); + + /* + * If we got any product queries, recursively rewrite them --- but + * first check for recursion! + */ + if (product_queries != NIL) + { + ListCell *n; + rewrite_event *rev; + + foreach(n, rewrite_events) + { + rev = (rewrite_event *) lfirst(n); + if (rev->relation == RelationGetRelid(rt_entry_relation) && + rev->event == event) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("infinite recursion detected in rules for relation \"%s\"", + RelationGetRelationName(rt_entry_relation)))); + } + + rev = (rewrite_event *) palloc(sizeof(rewrite_event)); + rev->relation = RelationGetRelid(rt_entry_relation); + rev->event = event; + rewrite_events = lcons(rev, rewrite_events); + + foreach(n, product_queries) + { + Query *pt = (Query *) lfirst(n); + List *newstuff; + + newstuff = RewriteQuery(pt, rewrite_events); + rewritten = list_concat(rewritten, newstuff); + } + + rewrite_events = list_delete_first(rewrite_events); + } + } + + /* + * If there is an INSTEAD, and the original query has a RETURNING, we + * have to have found a RETURNING in the rule(s), else fail. (Because + * DefineQueryRewrite only allows RETURNING in unconditional INSTEAD + * rules, there's no need to worry whether the substituted RETURNING + * will actually be executed --- it must be.) + */ + if ((instead || qual_product != NULL) && + query->returningList && + !returning) + { + switch (event) + { + case CMD_INSERT: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform INSERT RETURNING on relation \"%s\"", + RelationGetRelationName(rt_entry_relation)), + errhint("You need an unconditional ON INSERT DO INSTEAD rule with a RETURNING clause."))); + break; + case CMD_UPDATE: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform UPDATE RETURNING on relation \"%s\"", + RelationGetRelationName(rt_entry_relation)), + errhint("You need an unconditional ON UPDATE DO INSTEAD rule with a RETURNING clause."))); + break; + case CMD_DELETE: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot perform DELETE RETURNING on relation \"%s\"", + RelationGetRelationName(rt_entry_relation)), + errhint("You need an unconditional ON DELETE DO INSTEAD rule with a RETURNING clause."))); + break; + default: + elog(ERROR, "unrecognized commandType: %d", + (int) event); + break; + } + } + } + heap_close(rt_entry_relation, NoLock); + } + } +#endif + + + + /* + * For INSERTs, the original query is done first; for UPDATE/DELETE, it is + * done last. This is needed because update and delete rule actions might + * not do anything if they are invoked after the update or delete is + * performed. The command counter increment between the query executions + * makes the deleted (and maybe the updated) tuples disappear so the scans + * for them in the rule actions cannot find them. + * + * If we found any unqualified INSTEAD, the original query is not done at + * all, in any form. Otherwise, we add the modified form if qualified + * INSTEADs were found, else the unmodified form. + */ +#ifdef PGXC + if (parsetree_list == NIL) + { +#endif /* * For INSERTs, the original query is done first; for UPDATE/DELETE, it is * done last. This is needed because update and delete rule actions might @@ -1818,11 +1980,52 @@ RewriteQuery(Query *parsetree, List *rewrite_events) rewritten = lappend(rewritten, parsetree); } } +#ifdef PGXC + } + else + { + int query_no = -1; + + foreach(pt_cell, parsetree_list) + { - return rewritten; -} + Query *query; + Query *qual; + query_no ++; + + query = (Query *)lfirst(pt_cell); + if (!instead) + { + if (query->commandType == CMD_INSERT) + { + if (qual_product_list != NIL) + { + qual = (Query *)list_nth(qual_product_list, + query_no); + rewritten = lcons(qual, rewritten); + } + else + rewritten = lcons(query, rewritten); + } + } + else + { + if (qual_product_list != NIL) + { + qual = (Query *)list_nth(qual_product_list, + query_no); + rewritten = lcons(qual, rewritten); + } + else + rewritten = lappend(rewritten, query); + } + } + } +#endif + return rewritten; +} /* * QueryRewrite - * Primary entry point to the query rewriter. @@ -1928,7 +2131,9 @@ QueryRewrite(Query *parsetree) if (query->querySource == QSRC_ORIGINAL) { Assert(query->canSetTag); +#ifndef PGXC Assert(!foundOriginalQuery); +#endif foundOriginalQuery = true; #ifndef USE_ASSERT_CHECKING break; @@ -1949,3 +2154,198 @@ QueryRewrite(Query *parsetree) return results; } + +#ifdef PGXC +static int +GetRelPartColPos(const Query *query, const char *partColName) +{ + ListCell *lc; + int rescol = -1; + + foreach(lc, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + if (tle->resjunk) + continue; + + rescol += 1; + + /* + * See if we have a constant expression comparing against the + * designated partitioned column + */ + if (strcmp(tle->resname, partColName) == 0) + break; + } + + if (rescol == -1) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("Can't find partition column"))); + + return rescol; +} + +static void +ProcessHashValue(List **valuesList, const List *subList, const int node) +{ + valuesList[node - 1] = lappend(valuesList[node - 1], subList); +} + +static void +InitValuesList(List **valuesList[], int size) +{ + *valuesList = palloc0(size * sizeof(List *)); +} + +static void +DestroyValuesList(List **valuesList[]) +{ + pfree(*valuesList); + *valuesList = NIL; +} + +static void +ProcessRobinValue(Oid relid, List **valuesList, + int size, const RangeTblEntry *values_rte) +{ + List *values = values_rte->values_lists; + int length = values->length; + int dist; + int i, j; + int processNum = 0; + int node; + + /* get average insert value number of each node */ + if (length > NumDataNodes) + dist = length/NumDataNodes; + else + dist = 1; + + for(i = 0; i < size && processNum < length; i++) + { + node = GetRoundRobinNode(relid); + + /* assign insert value */ + for(j = 0; j < dist; j++) + { + processNum += 1; + valuesList[node - 1] = lappend(valuesList[node - 1], + list_nth(values, processNum - 1)); + } + } + + /* assign remained value */ + while(processNum < length) + { + processNum += 1; + node = GetRoundRobinNode(relid); + valuesList[node - 1] = lappend(valuesList[node - 1], + list_nth(values, processNum - 1)); + } +} + +static List * +RewriteInsertStmt(Query *query, RangeTblEntry *values_rte) +{ + ListCell *values_lc; + List *rwInsertList = NIL; + Query *element = NULL; + StringInfoData buf; + RangeTblRef *rtr = (RangeTblRef *) linitial(query->jointree->fromlist); + RangeTblEntry *rte; + RelationLocInfo *rte_loc_info; + char locatorType; + char *partColName; + List **valuesList; + int i; + + rte = (RangeTblEntry *) list_nth(query->rtable, query->resultRelation - 1); + rte_loc_info = GetRelationLocInfo(rte->relid); + locatorType = rte_loc_info->locatorType; + partColName = rte_loc_info->partAttrName; + + /* + * Do this first so that string is alloc'd in outer context not SPI's. + */ + initStringInfo(&buf); + + switch(locatorType) + { + case LOCATOR_TYPE_HASH: + { + bool first = true; + int partColno; + ExecNodes *exec_nodes; + + InitValuesList(&valuesList, NumDataNodes); + + foreach(values_lc, values_rte->values_lists) + { + List *sublist = (List *)lfirst(values_lc); + + if (first) + { + partColno = GetRelPartColPos(query, partColName); + first = false; + } + + /* get the exec node according to partition column value */ + GetHashExecNodes(rte_loc_info, &exec_nodes, + (Expr *)list_nth(sublist, partColno)); + + Assert(exec_nodes->nodelist->length == 1); + + /* assign valueList to specified exec node */ + ProcessHashValue(valuesList, sublist, list_nth_int(exec_nodes->nodelist, 0)); + } + } + + goto collect; + + case LOCATOR_TYPE_RROBIN: + + InitValuesList(&valuesList, NumDataNodes); + /* assign valueList to specified exec node */ + ProcessRobinValue(rte->relid, valuesList, NumDataNodes, values_rte); + +collect: + /* produce query for relative datanodes */ + for(i = 0; i < NumDataNodes; i++) + { + if (valuesList[i] != NIL) + { + ExecNodes *execNodes = makeNode(ExecNodes); + execNodes->baselocatortype = rte_loc_info->locatorType; + execNodes->nodelist = lappend_int(execNodes->nodelist, i + 1); + + element = copyObject(query); + + rte = (RangeTblEntry *)list_nth(element->rtable, rtr->rtindex - 1); + rte->values_lists = valuesList[i]; + + get_query_def_from_valuesList(element, &buf); + element->sql_statement = pstrdup(buf.data); + element->execNodes = execNodes; + elog(DEBUG1, "deparsed sql statement is %s\n", element->sql_statement); + + resetStringInfo(&buf); + + rwInsertList = lappend(rwInsertList, element); + } + } + + DestroyValuesList(&valuesList); + break; + + default: /* distribute by replication: just do it as usual */ + rwInsertList = lappend(rwInsertList, query); + break; + } + + + return rwInsertList; +} +#endif + diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index e61d444..87ccde5 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -648,16 +648,18 @@ pg_analyze_and_rewrite(Node *parsetree, const char *query_string, querytree_list = pg_rewrite_query(query); #ifdef PGXC - if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) - { - ListCell *lc; - - foreach(lc, querytree_list) - { - Query *query = (Query *) lfirst(lc); - query->sql_statement = pstrdup(query_string); - } - } + if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + { + ListCell *lc; + + foreach(lc, querytree_list) + { + Query *query = (Query *) lfirst(lc); + + if (query->sql_statement == NULL) + query->sql_statement = pstrdup(query_string); + } + } #endif TRACE_POSTGRESQL_QUERY_REWRITE_DONE(query_string); diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index eb704ce..247ca83 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -23,6 +23,7 @@ #include "pg_trace.h" #ifdef PGXC #include "pgxc/pgxc.h" +#include "pgxc/execRemote.h" #endif #include "tcop/pquery.h" #include "tcop/tcopprot.h" @@ -1239,6 +1240,12 @@ PortalRunMulti(Portal portal, bool isTopLevel, char *completionTag) { ListCell *stmtlist_item; +#ifdef PGXC + combineTag combine; + + combine.cmdType = CMD_UNKNOWN; + combine.data[0] = '\0'; +#endif /* * If the destination is DestRemoteExecute, change to DestNone. The @@ -1288,6 +1295,13 @@ PortalRunMulti(Portal portal, bool isTopLevel, portal->sourceText, portal->portalParams, dest, completionTag); +#ifdef PGXC + /* it's special for INSERT */ + if (IS_PGXC_COORDINATOR && + pstmt->commandType == CMD_INSERT) + HandleCmdComplete(pstmt->commandType, &combine, + completionTag, strlen(completionTag)); +#endif } else { @@ -1340,6 +1354,12 @@ PortalRunMulti(Portal portal, bool isTopLevel, * counts, so fake something up if necessary. (This could happen if the * original query was replaced by a DO INSTEAD rule.) */ + +#ifdef PGXC + if (IS_PGXC_COORDINATOR && combine.data[0] != '\0') + strcpy(completionTag, combine.data); +#endif + if (completionTag && completionTag[0] == '\0') { if (portal->commandTag) @@ -1654,3 +1674,4 @@ DoPortalRewind(Portal portal) portal->portalPos = 0; portal->posOverflow = false; } + diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 5c521c1..507c643 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -2302,8 +2302,160 @@ deparse_query(Query *query, StringInfo buf, List *parentnamespace) { get_query_def(query, buf, parentnamespace, NULL, 0, 0); } -#endif +/* code borrowed from get_insert_query_def */ +void +get_query_def_from_valuesList(Query *query, StringInfo buf) +{ + + RangeTblEntry *select_rte = NULL; + RangeTblEntry *values_rte = NULL; + RangeTblEntry *rte; + char *sep; + ListCell *values_cell; + ListCell *l; + List *strippedexprs; + deparse_context context; + deparse_namespace dpns; + + /* + * Before we begin to examine the query, acquire locks on referenced + * relations, and fix up deleted columns in JOIN RTEs. This ensures + * consistent results. Note we assume it's OK to scribble on the passed + * querytree! + */ + AcquireRewriteLocks(query); + + context.buf = buf; + context.namespaces = NIL; + context.windowClause = NIL; + context.windowTList = NIL; + context.varprefix = (list_length(query->rtable) != 1); + context.prettyFlags = 0; + context.indentLevel = 0; + + dpns.rtable = query->rtable; + dpns.ctes = query->cteList; + dpns.subplans = NIL; + dpns.outer_plan = dpns.inner_plan = NULL; + dpns.remotequery = false; + + /* + * If it's an INSERT ... SELECT or VALUES (...), (...), ... there will be + * a single RTE for the SELECT or VALUES. + */ + foreach(l, query->rtable) + { + rte = (RangeTblEntry *) lfirst(l); + + if (rte->rtekind == RTE_SUBQUERY) + { + if (select_rte) + elog(ERROR, "too many subquery RTEs in INSERT"); + select_rte = rte; + } + + if (rte->rtekind == RTE_VALUES) + { + if (values_rte) + elog(ERROR, "too many values RTEs in INSERT"); + values_rte = rte; + } + } + if (select_rte && values_rte) + elog(ERROR, "both subquery and values RTEs in INSERT"); + + /* + * Start the query with INSERT INTO relname + */ + rte = rt_fetch(query->resultRelation, query->rtable); + Assert(rte->rtekind == RTE_RELATION); + + appendStringInfo(buf, "INSERT INTO %s (", + generate_relation_name(rte->relid, NIL)); + + /* + * Add the insert-column-names list. To handle indirection properly, we + * need to look for indirection nodes in the top targetlist (if it's + * INSERT ... SELECT or INSERT ... single VALUES), or in the first + * expression list of the VALUES RTE (if it's INSERT ... multi VALUES). We + * assume that all the expression lists will have similar indirection in + * the latter case. + */ + if (values_rte) + values_cell = list_head((List *) linitial(values_rte->values_lists)); + else + values_cell = NULL; + strippedexprs = NIL; + sep = ""; + foreach(l, query->targetList) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + + elog(DEBUG1, "targetEntry type is %d\n)", tle->expr->type); + if (tle->resjunk || !IsA(tle->expr, Var)) + continue; /* ignore junk entries */ + + appendStringInfoString(buf, sep); + sep = ", "; + + /* + * Put out name of target column; look in the catalogs, not at + * tle->resname, since resname will fail to track RENAME. + */ + appendStringInfoString(buf,quote_identifier(get_relid_attribute_name(rte->relid, tle->resno))); + + /* + * Print any indirection needed (subfields or subscripts), and strip + * off the top-level nodes representing the indirection assignments. + */ + if (values_cell) + { + /* we discard the stripped expression in this case */ + processIndirection((Node *) lfirst(values_cell), &context, true); + values_cell = lnext(values_cell); + } + else + { + /* we keep a list of the stripped expressions in this case */ + strippedexprs = lappend(strippedexprs, processIndirection((Node *) tle->expr, &context, true)); + } + } + appendStringInfo(buf, ") "); + + if (select_rte) + { + /* Add the SELECT */ + get_query_def(select_rte->subquery, buf, NIL, NULL, + context.prettyFlags, context.indentLevel); + } + else if (values_rte) + { + /* A WITH clause is possible here */ + get_with_clause(query, &context); + /* Add the multi-VALUES expression lists */ + get_values_def(values_rte->values_lists, &context); + } + else + { + /* A WITH clause is possible here */ + get_with_clause(query, &context); + /* Add the single-VALUES expression list */ + appendContextKeyword(&context, "VALUES (", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 2); + get_rule_expr((Node *) strippedexprs, &context, false); + appendStringInfoChar(buf, ')'); + } + + /* Add RETURNING if present */ + if (query->returningList) + { + appendContextKeyword(&context, " RETURNING", + -PRETTYINDENT_STD, PRETTYINDENT_STD, 1); + get_target_list(query->returningList, &context, NULL); + } +} +#endif /* ---------- * get_query_def - Parse back one query parsetree * diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 7f4b20c..e2357b0 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -26,6 +26,7 @@ #include "nodes/value.h" #ifdef PGXC #include "access/tupdesc.h" +#include "pgxc/locator.h" #endif /* Possible sources of a Query */ @@ -152,6 +153,7 @@ typedef struct Query #ifdef PGXC /* need this info for PGXC Planner, may be temporary */ char *sql_statement; /* original query */ + ExecNodes *execNodes; /* execute nodes */ #endif } Query; diff --git a/src/include/optimizer/planner.h b/src/include/optimizer/planner.h index 182310e..e25ca45 100644 --- a/src/include/optimizer/planner.h +++ b/src/include/optimizer/planner.h @@ -36,5 +36,9 @@ extern Plan *subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo **subroot); extern Expr *expression_planner(Expr *expr); +#ifdef PGXC +extern void GetHashExecNodes(RelationLocInfo *rel_loc_info, + ExecNodes **exec_nodes, const Expr *expr); +#endif #endif /* PLANNER_H */ diff --git a/src/include/pgxc/execRemote.h b/src/include/pgxc/execRemote.h index 8d7a348..8f6c10a 100644 --- a/src/include/pgxc/execRemote.h +++ b/src/include/pgxc/execRemote.h @@ -25,6 +25,9 @@ #include "nodes/pg_list.h" #include "tcop/dest.h" #include "utils/snapshot.h" +#ifdef PGXC +#include "tcop/pquery.h" +#endif /* Outputs of handle_response() */ #define RESPONSE_EOF EOF @@ -138,6 +141,10 @@ extern void ExecEndRemoteQuery(RemoteQueryState *step); extern void ExecRemoteUtility(RemoteQuery *node); extern int handle_response(PGXCNodeHandle * conn, RemoteQueryState *combiner); +#ifdef PGXC +extern void HandleCmdComplete(CmdType commandType, combineTag *combine, const char *msg_body, + size_t len); +#endif extern bool FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot); extern void BufferConnection(PGXCNodeHandle *conn); diff --git a/src/include/tcop/pquery.h b/src/include/tcop/pquery.h index 4919dc5..e947989 100644 --- a/src/include/tcop/pquery.h +++ b/src/include/tcop/pquery.h @@ -17,6 +17,13 @@ #include "nodes/parsenodes.h" #include "utils/portal.h" +#ifdef PGXC +typedef struct combineTag +{ + CmdType cmdType; + char data[COMPLETION_TAG_BUFSIZE]; +} combineTag; +#endif extern PGDLLIMPORT Portal ActivePortal; diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index b8f8a9d..30cd971 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -17,7 +17,9 @@ #include "fmgr.h" #include "lib/stringinfo.h" #include "nodes/parsenodes.h" - +#ifdef PGXC +#include "lib/stringinfo.h" +#endif /* * Defined in adt/ */ @@ -599,6 +601,7 @@ extern char *deparse_expression(Node *expr, List *dpcontext, #ifdef PGXC extern List *deparse_context_for_remotequery(const char *aliasname, Oid relid); extern List *deparse_context_for(const char *aliasname, Oid relid); +extern void get_query_def_from_valuesList(Query *query, StringInfo buf); extern void deparse_query(Query *query, StringInfo buf, List *parentnamespace); #endif extern List *deparse_context_for_plan(Node *plan, Node *outer_plan, ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/plan/planner.c | 40 ++++- src/backend/pgxc/pool/execRemote.c | 67 ++++++ src/backend/rewrite/rewriteHandler.c | 410 +++++++++++++++++++++++++++++++++- src/backend/tcop/postgres.c | 22 +- src/backend/tcop/pquery.c | 21 ++ src/backend/utils/adt/ruleutils.c | 154 +++++++++++++- src/include/nodes/parsenodes.h | 2 + src/include/optimizer/planner.h | 4 + src/include/pgxc/execRemote.h | 7 + src/include/tcop/pquery.h | 7 + src/include/utils/builtins.h | 5 +- 11 files changed, 721 insertions(+), 18 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-25 05:29:17
|
Project "Postgres-XC". The branch, ha_support has been created at 497b27848af35363b745dbce20d2b8cb806c43dc (commit) - Log ----------------------------------------------------------------- commit 497b27848af35363b745dbce20d2b8cb806c43dc Author: Michael P <mic...@us...> Date: Tue Jan 25 14:15:25 2011 +0900 Support for EXECUTE DIRECT EXECUTE DIRECT is a utility query allowing to launch queries directly on targetted PGXC nodes. EXECUTE DIRECT ON (COORDINATOR num | NODE num) 'query'; This implementation contains the following use restrictions: - only a superuser is allowed to use it - DML queries (DELETE, INSERT, UPDATE) cannot be launched with it (easy to break data consistency) - utilities cannot be launched launched on local coordinator - utilities cannot be launched inside a transaction block (though SELECT queries in EXECUTE DIRECT keep the same visibility if used in a transaction block). - only one query can be launched at a time - query can be launched on a unique node This feature will be used to have a look at 2PC catalog data when nodes crash and to clean up 2PC transactions on targetted nodes. Ex: EXECUTE DIRECT ON NODE 1 'SELECT * from pg_prepared_xact()'; EXECUTE DIRECT ON COORDINATOR 2 'COMMIT PREPARED ''foo'''; diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 0e9aa43..cb7a1a8 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -264,8 +264,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) * If we are on the coordinator, we always want to use * the remote query path unless it is a pg_catalog table. */ - if (IS_PGXC_COORDINATOR - && get_rel_namespace(rte->relid) != PG_CATALOG_NAMESPACE) + if (IS_PGXC_COORDINATOR && + !IsConnFromCoord() && + get_rel_namespace(rte->relid) != PG_CATALOG_NAMESPACE) add_path(rel, create_remotequery_path(root, rel)); else { diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 5b2e03f..40777bf 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -41,8 +41,10 @@ #include "rewrite/rewriteManip.h" #ifdef PGXC #include "pgxc/pgxc.h" +#include "access/gtm.h" #include "pgxc/planner.h" #include "tcop/tcopprot.h" +#include "pgxc/poolmgr.h" #endif #include "utils/rel.h" @@ -2066,9 +2068,160 @@ transformExplainStmt(ParseState *pstate, ExplainStmt *stmt) static Query * transformExecDirectStmt(ParseState *pstate, ExecDirectStmt *stmt) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("Support for EXECUTE DIRECT is temporary broken"))); + Query *result = makeNode(Query); + bool is_coordinator = stmt->coordinator; + char *query = stmt->query; + List *nodelist = stmt->nodes; + ListCell *nodeitem; + RemoteQuery *step = makeNode(RemoteQuery); + bool is_local = false; + List *raw_parsetree_list; + ListCell *raw_parsetree_item; + + if (list_length(nodelist) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("Support for EXECUTE DIRECT on multiple nodes is not available yet"))); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use EXECUTE DIRECT"))); + + /* Check if execute direct is local and if node number is correct*/ + foreach(nodeitem, nodelist) + { + int nodenum = intVal(lfirst(nodeitem)); + + if (nodenum < 1 || + (!is_coordinator && nodenum > NumDataNodes) || + (is_coordinator && nodenum > NumCoords)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Node Number %d is incorrect", nodenum))); + + if (nodenum == PGXCNodeId && is_coordinator) + is_local = true; + } + + /* Transform the query into a raw parse list */ + raw_parsetree_list = pg_parse_query(query); + + /* EXECUTE DIRECT can just be executed with a single query */ + if (list_length(raw_parsetree_list) > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("EXECUTE DIRECT cannot execute multiple queries"))); + + /* + * Analyze the Raw parse tree + * EXECUTE DIRECT is restricted to one-step usage + */ + foreach(raw_parsetree_item, raw_parsetree_list) + { + Node *parsetree = (Node *) lfirst(raw_parsetree_item); + result = parse_analyze(parsetree, query, NULL, 0); + } + + /* Needed by planner */ + result->sql_statement = pstrdup(query); + + /* Default list of parameters to set */ + step->is_single_step = true; + step->sql_statement = NULL; + step->exec_nodes = NULL; + step->combine_type = COMBINE_TYPE_NONE; + step->simple_aggregates = NIL; + step->sort = NULL; + step->distinct = NULL; + step->read_only = true; + step->force_autocommit = false; + step->cursor = NULL; + step->exec_type = EXEC_ON_DATANODES; + step->paramval_data = NULL; + step->paramval_len = 0; + + step->relname = NULL; + step->remotejoin = false; + step->partitioned_replicated = false; + step->reduce_level = 0; + step->base_tlist = NIL; + step->outer_alias = NULL; + step->inner_alias = NULL; + step->outer_reduce_level = 0; + step->inner_reduce_level = 0; + step->outer_relids = NULL; + step->inner_relids = NULL; + step->inner_statement = NULL; + step->outer_statement = NULL; + step->join_condition = NULL; + + /* Change the list of nodes that will be executed for the query and others */ + step->exec_nodes = (ExecNodes *) palloc(sizeof(ExecNodes)); + step->exec_nodes->primarynodelist = NIL; + step->exec_nodes->nodelist = NIL; + step->exec_nodes->expr = NIL; + step->force_autocommit = false; + step->combine_type = COMBINE_TYPE_SAME; + step->read_only = true; + step->exec_direct_type = EXEC_DIRECT_NONE; + + /* Set up EXECUTE DIRECT flag */ + if (is_local) + { + if (result->commandType == CMD_UTILITY) + step->exec_direct_type = EXEC_DIRECT_LOCAL_UTILITY; + else + step->exec_direct_type = EXEC_DIRECT_LOCAL; + } + else + { + if (result->commandType == CMD_UTILITY) + step->exec_direct_type = EXEC_DIRECT_UTILITY; + else if (result->commandType == CMD_SELECT) + step->exec_direct_type = EXEC_DIRECT_SELECT; + else if (result->commandType == CMD_INSERT) + step->exec_direct_type = EXEC_DIRECT_INSERT; + else if (result->commandType == CMD_UPDATE) + step->exec_direct_type = EXEC_DIRECT_UPDATE; + else if (result->commandType == CMD_DELETE) + step->exec_direct_type = EXEC_DIRECT_DELETE; + } + + /* + * Features not yet supported + * DML can be launched without errors but this could compromise data + * consistency, so block it. + */ + if (step->exec_direct_type == EXEC_DIRECT_DELETE + || step->exec_direct_type == EXEC_DIRECT_UPDATE + || step->exec_direct_type == EXEC_DIRECT_INSERT) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("EXECUTE DIRECT cannot execute DML queries"))); + if (step->exec_direct_type == EXEC_DIRECT_LOCAL_UTILITY) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("EXECUTE DIRECT cannot execute locally utility queries"))); + + /* Build Execute Node list */ + foreach(nodeitem, nodelist) + { + int nodenum = intVal(lfirst(nodeitem)); + step->exec_nodes->nodelist = lappend_int(step->exec_nodes->nodelist, nodenum); + } + + step->sql_statement = pstrdup(query); + + if (is_coordinator) + step->exec_type = EXEC_ON_COORDS; + else + step->exec_type = EXEC_ON_DATANODES; + + /* Associate newly-created RemoteQuery node to the returned Query result */ + result->utilityStmt = (Node *) step; + + return result; } #endif diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0c76d7a..1ed5686 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -6534,16 +6534,17 @@ opt_analyze: /***************************************************************************** * * QUERY: - * EXECUTE DIRECT ON (COORDINATOR | NODE num, ...) query + * EXECUTE DIRECT ON (COORDINATOR num, ... | NODE num, ...) query * *****************************************************************************/ -ExecDirectStmt: EXECUTE DIRECT ON COORDINATOR DirectStmt +ExecDirectStmt: EXECUTE DIRECT ON COORDINATOR coord_list DirectStmt { ExecDirectStmt *n = makeNode(ExecDirectStmt); n->coordinator = TRUE; n->nodes = NIL; - |
From: Michael P. <mic...@us...> - 2011-01-20 08:53:46
|
Project "Postgres-XC". The branch, master has been updated via c19b46835e586023fbaaf93c3c84e4898f4fe6bd (commit) via 05fc3e19fcafeb61b057a88b3c0c1285dff1a7a7 (commit) from efcf364436c54517788299e49d1d755553d854dd (commit) - Log ----------------------------------------------------------------- commit c19b46835e586023fbaaf93c3c84e4898f4fe6bd Author: Michael P <mic...@us...> Date: Thu Jan 20 17:40:49 2011 +0900 Support for correlated DELETE for replicated tables Implementation of correlated delete against replicated tables. written by Andrei Martsinchyk diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index a97b4fa..01c51c6 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4725,7 +4725,8 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) StringInfo buf; Oid nspid; char *nspname; - Var *ctid; + Var *ctid; + /* Get target table */ ttab = (RangeTblEntry *) list_nth(parse->rtable, parse->resultRelation - 1); @@ -4814,31 +4815,118 @@ create_remotedelete_plan(PlannerInfo *root, Plan *topplan) */ fstep = make_remotequery(NIL, ttab, NIL, ttab->relid); - innerPlan(fstep) = topplan; - /* - * TODO replicated handling: add extra step with step query - * SELECT * FROM ttab WHERE ctid = ? and final step with step query - * DELETE FROM ttab WHERE * = ? - */ - appendStringInfoString(buf, " WHERE ctid = $1"); - fstep->sql_statement = pstrdup(buf->data); - fstep->combine_type = COMBINE_TYPE_SUM; - fstep->read_only = false; - fstep->exec_nodes = makeNode(ExecNodes); - fstep->exec_nodes->baselocatortype = rel_loc_info->locatorType; - fstep->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; - fstep->exec_nodes->primarynodelist = NULL; - fstep->exec_nodes->nodelist = NULL; - fstep->exec_nodes->relid = ttab->relid; - fstep->exec_nodes->accesstype = RELATION_ACCESS_UPDATE; - - /* first and only target entry of topplan is ctid, reference it */ - ctid = makeVar(INNER, 1, TIDOID, -1, 0); - fstep->exec_nodes->expr = (Var *) ctid; + if (rel_loc_info->locatorType == LOCATOR_TYPE_REPLICATED) + { + /* + * For replicated case we need two extra steps. One is to determine + * all values by CTID on the node from which the tuple has come, next + * is to remove all rows with these values on all nodes + */ + RemoteQuery *xstep; + List *xtlist = NIL; + StringInfo xbuf = makeStringInfo(); + int natts = get_relnatts(ttab->relid); + int att; + + appendStringInfoString(xbuf, "SELECT "); + appendStringInfoString(buf, " WHERE"); + + /* + * Populate projections of the extra SELECT step and WHERE clause of + * the final DELETE step + */ + for (att = 1; att <= natts; att++) + { + TargetEntry *tle; + Var *expr; + HeapTuple tp; + + tp = SearchSysCache(ATTNUM, + ObjectIdGetDatum(ttab->relid), + Int16GetDatum(att), + 0, 0); + if (HeapTupleIsValid(tp)) + { + Form_pg_attribute att_tup = (Form_pg_attribute) GETSTRUCT(tp); + + /* add comma before all except first attributes */ + if (att > 1) + { + appendStringInfoString(xbuf, ", "); + appendStringInfoString(buf, " AND"); + } + appendStringInfoString(xbuf, NameStr(att_tup->attname)); + appendStringInfo(buf, " %s = $%d", NameStr(att_tup->attname), att); + + expr = makeVar(att, att, att_tup->atttypid, + att_tup->atttypmod, 0); + tle = makeTargetEntry((Expr *) expr, att, + NameStr(att_tup->attname), false); + xtlist = lappend(xtlist, tle); + ReleaseSysCache(tp); + } + else + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + att, ttab->relid); + } + + /* complete SELECT command */ + appendStringInfo(xbuf, " FROM %s.%s WHERE ctid = $1", + quote_identifier(nspname), + quote_identifier(ttab->relname)); + + /* build up the extra select step */ + xstep = make_remotequery(xtlist, ttab, NIL, ttab->relid); + innerPlan(xstep) = topplan; + xstep->sql_statement = pstrdup(xbuf->data); + xstep->read_only = true; + xstep->exec_nodes = makeNode(ExecNodes); + xstep->exec_nodes->baselocatortype = rel_loc_info->locatorType; + xstep->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; + xstep->exec_nodes->primarynodelist = NULL; + xstep->exec_nodes->nodelist = NULL; + xstep->exec_nodes->relid = ttab->relid; + xstep->exec_nodes->accesstype = RELATION_ACCESS_READ; + + /* first and only target entry of topplan is ctid, reference it */ + ctid = makeVar(INNER, 1, TIDOID, -1, 0); + xstep->exec_nodes->expr = (Expr *) ctid; + + pfree(xbuf->data); + pfree(xbuf); + + /* build up the final delete step */ + innerPlan(fstep) = (Plan *) xstep; + fstep->sql_statement = pstrdup(buf->data); + fstep->combine_type = COMBINE_TYPE_SAME; + fstep->read_only = false; + fstep->exec_nodes = GetRelationNodes(rel_loc_info, NULL, + RELATION_ACCESS_UPDATE); + } + else + { + /* build up the final delete step */ + innerPlan(fstep) = topplan; + appendStringInfoString(buf, " WHERE ctid = $1"); + fstep->sql_statement = pstrdup(buf->data); + fstep->combine_type = COMBINE_TYPE_SUM; + fstep->read_only = false; + fstep->exec_nodes = makeNode(ExecNodes); + fstep->exec_nodes->baselocatortype = rel_loc_info->locatorType; + fstep->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; + fstep->exec_nodes->primarynodelist = NULL; + fstep->exec_nodes->nodelist = NULL; + fstep->exec_nodes->relid = ttab->relid; + fstep->exec_nodes->accesstype = RELATION_ACCESS_UPDATE; + + /* first and only target entry of topplan is ctid, reference it */ + ctid = makeVar(INNER, 1, TIDOID, -1, 0); + fstep->exec_nodes->expr = (Expr *) ctid; + } pfree(buf->data); pfree(buf); - return fstep; + return (Plan *) fstep; } #endif commit 05fc3e19fcafeb61b057a88b3c0c1285dff1a7a7 Author: Michael P <mic...@us...> Date: Thu Jan 20 17:38:39 2011 +0900 Base code to support correlated DELETE and UPDATE Base code to support correlated deletes and updates, implementation of correlated delete without replicated case. depends on executor_bugfixes patch. Patch written by Andrei Martsinchyk diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index ad227f4..40cb4f5 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -855,6 +855,9 @@ _copyRemoteQuery(RemoteQuery *from) COPY_SCALAR_FIELD(inner_reduce_level); COPY_BITMAPSET_FIELD(outer_relids); COPY_BITMAPSET_FIELD(inner_relids); + COPY_STRING_FIELD(inner_statement); + COPY_STRING_FIELD(outer_statement); + COPY_STRING_FIELD(join_condition); return newnode; } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index e91eb57..a97b4fa 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -39,6 +39,7 @@ #include "utils/builtins.h" #include "utils/syscache.h" #include "catalog/pg_proc.h" +#include "catalog/pg_type.h" #include "executor/executor.h" #endif #include "utils/lsyscache.h" @@ -636,6 +637,8 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla { NestLoop *nest_parent; JoinReduceInfo join_info; + RemoteQuery *outer = NULL; + RemoteQuery *inner = NULL; if (!enable_remotejoin) return parent; @@ -658,21 +661,26 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla else nest_parent = (NestLoop *)parent; + /* + * Now RemoteQuery subnode is behind Matherial but this may be changed later + */ + if (IsA(outer_plan, Material) && IsA(outer_plan->lefttree, RemoteQuery)) + outer = (RemoteQuery *) outer_plan->lefttree; + else if (IsA(outer_plan, RemoteQuery)) + outer = (RemoteQuery *) outer_plan; + + if (IsA(inner_plan, Material) && IsA(inner_plan->lefttree, RemoteQuery)) + inner = (RemoteQuery *) inner_plan->lefttree; + else if (IsA(inner_plan, RemoteQuery)) + inner = (RemoteQuery *) inner_plan; + + /* check if both the nodes qualify for reduction */ - if (IsA(outer_plan, Material) && - IsA(((Material *) outer_plan)->plan.lefttree, RemoteQuery) && - IsA(inner_plan, Material) && - IsA(((Material *) inner_plan)->plan.lefttree, RemoteQuery)) + if (outer && inner) { int i; List *rtable_list = NIL; - Material *outer_mat = (Material *)outer_plan; - Material *inner_mat = (Material *)inner_plan; - - RemoteQuery *outer = (RemoteQuery *)outer_mat->plan.lefttree; - RemoteQuery *inner = (RemoteQuery *)inner_mat->plan.lefttree; - /* * Check if both these plans are from the same remote node. If yes, * replace this JOIN along with it's two children with one equivalent @@ -697,7 +705,7 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla { RemoteQuery *result; Plan *result_plan; - StringInfoData targets, clauses, scan_clauses, fromlist; + StringInfoData targets, clauses, scan_clauses, fromlist, join_condition; StringInfoData squery; List *parent_vars, *out_tlist = NIL, *in_tlist = NIL, *base_tlist; ListCell *l; @@ -769,13 +777,13 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla } else { - /* + /* * there is no local bound clause, all the clauses are remote * scan clauses */ remote_scan_clauses = nest_parent->join.plan.qual; } - + /* generate the tlist for the new RemoteScan node using out_tlist, in_tlist */ initStringInfo(&targets); create_remote_target_list(root, &targets, out_tlist, in_tlist, @@ -830,6 +838,9 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla result->outer_reduce_level = outer->reduce_level; result->inner_relids = in_relids; result->outer_relids = out_relids; + result->inner_statement = pstrdup(inner->sql_statement); + result->outer_statement = pstrdup(outer->sql_statement); + result->join_condition = NULL; result->exec_nodes = copyObject(join_info.exec_nodes); appendStringInfo(&fromlist, " %s (%s) %s", @@ -896,22 +907,27 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla /* generate the squery for this node */ /* NOTE: it's assumed that the remote_paramNums array is - * filled in the same order as we create the query here. + * filled in the same order as we create the query here. * - * TODO: we need some way to ensure that the remote_paramNums - * is filled in the same order as the order in which the clauses + * TODO: we need some way to ensure that the remote_paramNums + * is filled in the same order as the order in which the clauses * are added in the query below. */ initStringInfo(&squery); appendStringInfo(&squery, "SELECT %s FROM %s", targets.data, fromlist.data); + initStringInfo(&join_condition); if (clauses.data[0] != '\0') - appendStringInfo(&squery, " %s %s", use_where? " WHERE " : " ON ", clauses.data); + appendStringInfo(&join_condition, " %s %s", use_where? " WHERE " : " ON ", clauses.data); if (scan_clauses.data[0] != '\0') - appendStringInfo(&squery, " %s %s", use_where? " AND " : " WHERE ", scan_clauses.data); + appendStringInfo(&join_condition, " %s %s", use_where? " AND " : " WHERE ", scan_clauses.data); + + if (join_condition.data[0] != '\0') + appendStringInfoString(&squery, join_condition.data); result->sql_statement = squery.data; + result->join_condition = join_condition.data; /* don't forget to increment the index for the next time around! */ result->reduce_level = root->rs_alias_index++; @@ -939,7 +955,7 @@ create_remotejoin_plan(PlannerInfo *root, JoinPath *best_path, Plan *parent, Pla result_plan->plan_rows = outer_plan->plan_rows; result_plan->plan_width = outer_plan->plan_width; - return (Plan *)make_material(result_plan); + return (Plan *) make_material(result_plan); } } @@ -4656,4 +4672,173 @@ findReferencedVars(List *parent_vars, Plan *plan, List **out_tlist, Relids *out_ *out_tlist = tlist; *out_relids = relids; } + + +/* + * create_remoteinsert_plan() + * + * Dummy + */ +Plan * +create_remoteinsert_plan(PlannerInfo *root, Plan *topplan) +{ + return topplan; +} + + +/* + * create_remoteupdate_plan() + * + * Dummy + */ +Plan * +create_remoteupdate_plan(PlannerInfo *root, Plan *topplan) +{ + return topplan; +} + +/* + * create_remotedelete_plan() + * + * Builds up a final node of the plan executing DELETE command. + * + * If target table is on coordinator (like catalog tables) the plan is left + * unchanged and delete will be handled using standard postgres procedure. + * + * If topmost node of the plan is a RemoteQuery the step query looks like + * SELECT ctid FROM target_table WHERE condition, and we should convert it to + * DELETE FROM target_table WHERE condition. + * + * In correlated case the step query looks like + * SELECT target_table.ctid FROM target_table, other_tables WHERE condition, and + * we should convert it to DELETE FROM target_table USING other_tables WHERE condition. + * + * XXX Is it ever possible if the topmost node is not a RemoteQuery? + */ +Plan * +create_remotedelete_plan(PlannerInfo *root, Plan *topplan) +{ + Query *parse = root->parse; + RangeTblEntry *ttab; + RelationLocInfo *rel_loc_info; + RemoteQuery *fstep; + StringInfo buf; + Oid nspid; + char *nspname; + Var *ctid; + + /* Get target table */ + ttab = (RangeTblEntry *) list_nth(parse->rtable, parse->resultRelation - 1); + /* Bad relation ? */ + if (ttab == NULL || ttab->rtekind != RTE_RELATION) + return topplan; + + /* Get location info of the target table */ + rel_loc_info = GetRelationLocInfo(ttab->relid); + if (rel_loc_info == NULL) + return topplan; + + buf = makeStringInfo(); + + /* Compose DELETE FROM target_table */ + nspid = get_rel_namespace(ttab->relid); + nspname = get_namespace_name(nspid); + + appendStringInfo(buf, "DELETE FROM %s.%s", quote_identifier(nspname), + quote_identifier(ttab->relname)); + + /* See if we can push down DELETE */ + if (IsA(topplan, RemoteQuery)) + { + char *query; + + fstep = (RemoteQuery *) topplan; + query = fstep->sql_statement; + + if (strncmp(query, "SELECT ctid", 11) == 0) + { + /* + * Single table case + * We need to find position of the WHERE keyword in the string and + * append to the buffer part of original string starting from the + * position found. It is possible WHERE clause is absent (DELETE ALL) + * In this case buffer already has new step query + */ + char *where = strstr(query, " WHERE "); + if (where) + appendStringInfoString(buf, where); + } + else + { + /* + * multi-table case + * Assuming the RemoteQuery is created in create_remotejoin_plan(). + * If the final RemoteQuery is for correlated delete outer_statement + * is just a SELECT FROM target_table, outer_statement is correlated + * part and we can put it into USING clause. + * Join type should be plain jon (comma-separated list) and all + * conditions are in WHERE clause. + * No GROUP BY or ORDER BY clauses expected. + * If create_remotejoin_plan is modified the code below should be + * revisited. + */ + /* + * In expressions target table is referenced as outer_alias, append + * alias name before USING clause + */ + appendStringInfo(buf, " %s USING ", fstep->outer_alias); + + /* Make up USING clause */ + appendStringInfo(buf, "(%s) %s ", fstep->inner_statement, fstep->inner_alias); + + /* Append WHERE clause */ + appendStringInfoString(buf, fstep->join_condition); + } + /* replace step query */ + pfree(fstep->sql_statement); + fstep->sql_statement = pstrdup(buf->data); + /* set combine_type, it is COMBINE_TYPE_NONE for SELECT */ + fstep->combine_type = rel_loc_info->locatorType == LOCATOR_TYPE_REPLICATED ? + COMBINE_TYPE_SAME : COMBINE_TYPE_SUM; + fstep->read_only = false; + + pfree(buf->data); + pfree(buf); + + return topplan; + } + + /* + * Top plan will return CTIDs and we should delete tuples with these CTIDs + * on the nodes. To determine target node + */ + fstep = make_remotequery(NIL, ttab, NIL, ttab->relid); + + innerPlan(fstep) = topplan; + /* + * TODO replicated handling: add extra step with step query + * SELECT * FROM ttab WHERE ctid = ? and final step with step query + * DELETE FROM ttab WHERE * = ? + */ + appendStringInfoString(buf, " WHERE ctid = $1"); + fstep->sql_statement = pstrdup(buf->data); + fstep->combine_type = COMBINE_TYPE_SUM; + fstep->read_only = false; + fstep->exec_nodes = makeNode(ExecNodes); + fstep->exec_nodes->baselocatortype = rel_loc_info->locatorType; + fstep->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; + fstep->exec_nodes->primarynodelist = NULL; + fstep->exec_nodes->nodelist = NULL; + fstep->exec_nodes->relid = ttab->relid; + fstep->exec_nodes->accesstype = RELATION_ACCESS_UPDATE; + + /* first and only target entry of topplan is ctid, reference it */ + ctid = makeVar(INNER, 1, TIDOID, -1, 0); + fstep->exec_nodes->expr = (Var *) ctid; + + pfree(buf->data); + pfree(buf); + + return fstep; +} #endif diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index dc6ff35..8ddf5a8 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -226,6 +226,27 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams) lfirst(lp) = set_plan_references(glob, subplan, subrtable); } +#ifdef PGXC + /* + * PGXC should apply INSERT/UPDATE/DELETE to a datanode. We are overriding + * normal Postgres behavior by modifying final plan or by adding a node on + * top of it. + */ + if (IS_PGXC_COORDINATOR) + switch (parse->commandType) + { + case CMD_INSERT: + top_plan = create_remoteinsert_plan(root, top_plan); + break; + case CMD_UPDATE: + top_plan = create_remoteupdate_plan(root, top_plan); + break; + case CMD_DELETE: + top_plan = create_remotedelete_plan(root, top_plan); + break; + } +#endif + /* build the PlannedStmt result */ result = makeNode(PlannedStmt); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index ca2e2a2..5804787 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1328,9 +1328,11 @@ create_remotequery_path(PlannerInfo *root, RelOptInfo *rel) /* PGXCTODO - set cost properly */ cost_seqscan(pathnode, root, rel); - /* - * Insert a materialization plan above this temporarily - * until we better handle multiple steps using the same connection. + /* + * ReScan of RemoteQuery is not implemented so we have to materialize + * results. Anyway, it may be more effective to materialize results then + * execute query against remote query multiple times. + * Subject for future optimization */ pathnode = create_material_path(rel, pathnode); diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 1a56b44..ace4635 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -1832,6 +1832,9 @@ makeRemoteQuery(void) result->inner_reduce_level = 0; result->outer_relids = NULL; result->inner_relids = NULL; + result->inner_statement = NULL; + result->outer_statement = NULL; + result->join_condition = NULL; return result; } @@ -2784,16 +2787,16 @@ pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams) get_plan_nodes_command(query_step, root); - if (query_step->exec_nodes == NULL) + /* standard planner handles correlated UPDATE or DELETE */ + if ((query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) + && list_length(query->rtable) > 1) { - /* Do not yet allow multi-node correlated UPDATE or DELETE */ - if (query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) - { - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("UPDATE and DELETE that are correlated or use non-immutable functions not yet supported")))); - } + result = standard_planner(query, cursorOptions, boundParams); + return result; + } + if (query_step->exec_nodes == NULL) + { /* * Processing guery against catalog tables, or multi-step command. * Run through standard planner diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index c1d191e..4fbb11f 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -121,5 +121,8 @@ extern void extract_query_dependencies(List *queries, #ifdef PGXC extern Var *search_tlist_for_var(Var *var, List *jtlist); +extern Plan *create_remoteinsert_plan(PlannerInfo *root, Plan *topplan); +extern Plan *create_remoteupdate_plan(PlannerInfo *root, Plan *topplan); +extern Plan *create_remotedelete_plan(PlannerInfo *root, Plan *topplan); #endif #endif /* PLANMAIN_H */ diff --git a/src/include/pgxc/planner.h b/src/include/pgxc/planner.h index 61cb6d3..42dd2b6 100644 --- a/src/include/pgxc/planner.h +++ b/src/include/pgxc/planner.h @@ -104,6 +104,9 @@ typedef struct int inner_reduce_level; Relids outer_relids; Relids inner_relids; + char *inner_statement; + char *outer_statement; + char *join_condition; } RemoteQuery; ----------------------------------------------------------------------- Summary of changes: src/backend/nodes/copyfuncs.c | 3 + src/backend/optimizer/plan/createplan.c | 311 +++++++++++++++++++++++++++++-- src/backend/optimizer/plan/planner.c | 21 ++ src/backend/optimizer/util/pathnode.c | 8 +- src/backend/pgxc/plan/planner.c | 19 +- src/include/optimizer/planmain.h | 3 + src/include/pgxc/planner.h | 3 + 7 files changed, 338 insertions(+), 30 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-20 08:33:26
|
Project "Postgres-XC". The branch, master has been updated via efcf364436c54517788299e49d1d755553d854dd (commit) via 8cf0de56a706ad0da72d3ea889844ae6c9e4a6fb (commit) from 90a3e337ead46e9029e877b6e8d577c26307ebe5 (commit) - Log ----------------------------------------------------------------- commit efcf364436c54517788299e49d1d755553d854dd Author: Michael P <mic...@us...> Date: Thu Jan 20 17:30:57 2011 +0900 Fix and Clean up in Executor Fixes for various bugs found during development and testing. Includes also some clean ups. Patch from Andrei Martsinchyk diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 63031a7..c124fc3 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -1193,18 +1193,15 @@ slot_deform_datarow(TupleTableSlot *slot) (errcode(ERRCODE_DATA_CORRUPTED), errmsg("Tuple does not match the descriptor"))); - if (slot->tts_attinmeta == NULL) - { - /* - * Ensure info about input functions is available as long as slot lives - */ - MemoryContext oldcontext = MemoryContextSwitchTo(slot->tts_mcxt); + /* + * Ensure info about input functions is available as long as slot lives + * as well as deformed values + */ + MemoryContext oldcontext = MemoryContextSwitchTo(slot->tts_mcxt); + if (slot->tts_attinmeta == NULL) slot->tts_attinmeta = TupleDescGetAttInMetadata(slot->tts_tupleDescriptor); - MemoryContextSwitchTo(oldcontext); - } - buffer = makeStringInfo(); for (i = 0; i < attnum; i++) { @@ -1240,6 +1237,8 @@ slot_deform_datarow(TupleTableSlot *slot) pfree(buffer); slot->tts_nvalid = attnum; + + MemoryContextSwitchTo(oldcontext); } #endif @@ -1292,6 +1291,16 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) return (Datum) 0; } +#ifdef PGXC + /* If it is a data row tuple extract all and return requested */ + if (slot->tts_dataRow) + { + slot_deform_datarow(slot); + *isnull = slot->tts_isnull[attnum - 1]; + return slot->tts_values[attnum - 1]; + } +#endif + /* * otherwise we had better have a physical tuple (tts_nvalid should equal * natts in all virtual-tuple cases) @@ -1336,11 +1345,6 @@ slot_getattr(TupleTableSlot *slot, int attnum, bool *isnull) /* * Extract the attribute, along with any preceding attributes. */ -#ifdef PGXC - if (slot->tts_dataRow) - slot_deform_datarow(slot); - else -#endif slot_deform_tuple(slot, attnum); /* @@ -1495,6 +1499,15 @@ slot_attisnull(TupleTableSlot *slot, int attnum) if (attnum > tupleDesc->natts) return true; +#ifdef PGXC + /* If it is a data row tuple extract all and return requested */ + if (slot->tts_dataRow) + { + slot_deform_datarow(slot); + return slot->tts_isnull[attnum - 1]; + } +#endif + /* * otherwise we had better have a physical tuple (tts_nvalid should equal * natts in all virtual-tuple cases) diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index 3ca96c6..3803fa5 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -19,6 +19,7 @@ #include "postgres.h" #include "access/gtm.h" #include "access/xact.h" +#include "catalog/pg_type.h" #include "commands/prepare.h" #include "executor/executor.h" #include "gtm/gtm_c.h" @@ -135,6 +136,7 @@ stat_transaction(int node_count) } +#ifdef NOT_USED /* * To collect statistics: count a two-phase commit on nodes */ @@ -146,6 +148,7 @@ stat_2pc() else nonautocommit_2pc++; } +#endif /* @@ -586,6 +589,8 @@ HandleRowDescription(RemoteQueryState *combiner, char *msg_body, size_t len) return false; } + +#ifdef NOT_USED /* * Handle ParameterStatus ('S') message from a data node connection (SET command) */ @@ -607,6 +612,7 @@ HandleParameterStatus(RemoteQueryState *combiner, char *msg_body, size_t len) pq_putmessage('S', msg_body, len); } } +#endif /* * Handle CopyInResponse ('G') message from a data node connection @@ -1010,7 +1016,6 @@ BufferConnection(PGXCNodeHandle *conn) conn->state = DN_CONNECTION_STATE_ERROR_FATAL; add_error_message(conn, "Failed to fetch from data node"); } - break; } else if (res == RESPONSE_COMPLETE) { @@ -1054,49 +1059,49 @@ FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot) { bool have_tuple = false; - while (combiner->conn_count > 0) + /* If we have message in the buffer, consume it */ + if (combiner->currentRow.msg) { - PGXCNodeHandle *conn; - int res; + ExecStoreDataRowTuple(combiner->currentRow.msg, + combiner->currentRow.msglen, + combiner->currentRow.msgnode, slot, true); + combiner->currentRow.msg = NULL; + combiner->currentRow.msglen = 0; + combiner->currentRow.msgnode = 0; + have_tuple = true; + } - /* If we have message in the buffer, consume it */ - if (combiner->currentRow.msg) - { - ExecStoreDataRowTuple(combiner->currentRow.msg, - combiner->currentRow.msglen, - combiner->currentRow.msgnode, slot, true); - combiner->currentRow.msg = NULL; - combiner->currentRow.msglen = 0; - combiner->currentRow.msgnode = 0; - have_tuple = true; - } - /* - * If this is ordered fetch we can not know what is the node - * to handle next, so sorter will choose next itself and set it as - * currentRow to have it consumed on the next call to FetchTuple - */ - if (((RemoteQuery *)combiner->ss.ps.plan)->sort) - return have_tuple; + /* + * If this is ordered fetch we can not know what is the node + * to handle next, so sorter will choose next itself and set it as + * currentRow to have it consumed on the next call to FetchTuple. + * Otherwise allow to prefetch next tuple + */ + if (((RemoteQuery *)combiner->ss.ps.plan)->sort) + return have_tuple; - /* - * Note: If we are fetching not sorted results we can not have both - * currentRow and buffered rows. When connection is buffered currentRow - * is moved to buffer, and then it is cleaned after buffering is - * completed. Afterwards rows will be taken from the buffer bypassing - * currentRow until buffer is empty, and only after that data are read - * from a connection. - */ - if (list_length(combiner->rowBuffer) > 0) - { - RemoteDataRow dataRow = (RemoteDataRow) linitial(combiner->rowBuffer); - combiner->rowBuffer = list_delete_first(combiner->rowBuffer); - ExecStoreDataRowTuple(dataRow->msg, dataRow->msglen, - dataRow->msgnode, slot, true); - pfree(dataRow); - return true; - } + /* + * Note: If we are fetching not sorted results we can not have both + * currentRow and buffered rows. When connection is buffered currentRow + * is moved to buffer, and then it is cleaned after buffering is + * completed. Afterwards rows will be taken from the buffer bypassing + * currentRow until buffer is empty, and only after that data are read + * from a connection. + */ + if (list_length(combiner->rowBuffer) > 0) + { + RemoteDataRow dataRow = (RemoteDataRow) linitial(combiner->rowBuffer); + combiner->rowBuffer = list_delete_first(combiner->rowBuffer); + ExecStoreDataRowTuple(dataRow->msg, dataRow->msglen, + dataRow->msgnode, slot, true); + pfree(dataRow); + return true; + } - conn = combiner->connections[combiner->current_conn]; + while (combiner->conn_count > 0) + { + int res; + PGXCNodeHandle *conn = combiner->connections[combiner->current_conn]; /* Going to use a connection, buffer it if needed */ if (conn->state == DN_CONNECTION_STATE_QUERY && conn->combiner != NULL @@ -1116,7 +1121,7 @@ FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot) * connection clean */ if (have_tuple) - return have_tuple; + return true; else { if (pgxc_node_send_execute(conn, combiner->cursor, 1) != 0) @@ -1160,25 +1165,42 @@ FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot) else combiner->current_conn = 0; } - - /* If we have a tuple we can leave now. */ - if (have_tuple) + else if (res = RESPONSE_DATAROW && have_tuple) + { + /* + * We already have a tuple and received another one, leave it till + * next fetch + */ return true; + } + + /* If we have message in the buffer, consume it */ + if (combiner->currentRow.msg) + { + ExecStoreDataRowTuple(combiner->currentRow.msg, + combiner->currentRow.msglen, + combiner->currentRow.msgnode, slot, true); + combiner->currentRow.msg = NULL; + combiner->currentRow.msglen = 0; + combiner->currentRow.msgnode = 0; + have_tuple = true; + } + + /* + * If this is ordered fetch we can not know what is the node + * to handle next, so sorter will choose next itself and set it as + * currentRow to have it consumed on the next call to FetchTuple. + * Otherwise allow to prefetch next tuple + */ + if (((RemoteQuery *)combiner->ss.ps.plan)->sort) + return have_tuple; } - /* Wrap up last message if exists */ - if (combiner->currentRow.msg) - { - ExecStoreDataRowTuple(combiner->currentRow.msg, - combiner->currentRow.msglen, - combiner->currentRow.msgnode, slot, true); - combiner->currentRow.msg = NULL; - combiner->currentRow.msglen = 0; - combiner->currentRow.msgnode = 0; - return true; - } - /* otherwise report end of data to the caller */ - ExecClearTuple(slot); - return false; + + /* report end of data to the caller */ + if (!have_tuple) + ExecClearTuple(slot); + + return have_tuple; } @@ -2747,8 +2769,31 @@ ExecInitRemoteQuery(RemoteQuery *node, EState *estate, int eflags) /* We need expression context to evaluate */ if (node->exec_nodes && node->exec_nodes->expr) - ExecAssignExprContext(estate, &remotestate->ss.ps); + { + Expr *expr = node->exec_nodes->expr; + if (IsA(expr, Var) && ((Var *) expr)->vartype == TIDOID) + { + /* Special case if expression does not need to be evaluated */ + } + else + { + /* + * Inner plan provides parameter values and may be needed + * to determine target nodes. In this case expression is evaluated + * and we should made values available for evaluator. + * So allocate storage for the values. + */ + if (innerPlan(node)) + { + int nParams = list_length(node->scan.plan.targetlist); + estate->es_param_exec_vals = (ParamExecData *) palloc0( + nParams * sizeof(ParamExecData)); + } + /* prepare expression evaluation */ + ExecAssignExprContext(estate, &remotestate->ss.ps); + } + } if (innerPlan(node)) innerPlanState(remotestate) = ExecInitNode(innerPlan(node), estate, eflags); @@ -2838,27 +2883,70 @@ get_exec_connections(RemoteQueryState *planstate, { if (exec_nodes->expr) { - /* execution time determining of target data nodes */ - bool isnull; - ExprState *estate = ExecInitExpr(exec_nodes->expr, - (PlanState *) planstate); - Datum partvalue = ExecEvalExpr(estate, - planstate->ss.ps.ps_ExprContext, - &isnull, - NULL); - if (!isnull) + /* + * Special case (argh, another one): if expression data type is TID + * the ctid value is specific to the node from which it has been + * returned. + * So try and determine originating node and execute command on + * that node only + */ + if (IsA(exec_nodes->expr, Var) && ((Var *) exec_nodes->expr)->vartype == TIDOID) + { + Var *ctid = (Var *) exec_nodes->expr; + PlanState *source = (PlanState *) planstate; + TupleTableSlot *slot; + + /* Find originating RemoteQueryState */ + if (ctid->varno == INNER) + source = innerPlanState(source); + else if (ctid->varno == OUTER) + source = outerPlanState(source); + + while (!IsA(source, RemoteQueryState)) + { + TargetEntry *tle = list_nth(source->plan->targetlist, + ctid->varattno - 1); + Assert(IsA(tle->expr, Var)); + ctid = (Var *) tle->expr; + if (ctid->varno == INNER) + source = innerPlanState(source); + else if (ctid->varno == OUTER) + source = outerPlanState(source); + else + elog(ERROR, "failed to determine target node"); + } + + slot = source->ps_ResultTupleSlot; + /* The slot should be of type DataRow */ + Assert(!TupIsNull(slot) && slot->tts_dataRow); + + nodelist = list_make1_int(slot->tts_dataNode); + primarynode = NIL; + } + else { - RelationLocInfo *rel_loc_info = GetRelationLocInfo(exec_nodes->relid); - ExecNodes *nodes = GetRelationNodes(rel_loc_info, - (long *) &partvalue, - exec_nodes->accesstype); - if (nodes) + /* execution time determining of target data nodes */ + bool isnull; + ExprState *estate = ExecInitExpr(exec_nodes->expr, + (PlanState *) planstate); + Datum partvalue = ExecEvalExpr(estate, + planstate->ss.ps.ps_ExprContext, + &isnull, + NULL); + if (!isnull) { - nodelist = nodes->nodelist; - primarynode = nodes->primarynodelist; - pfree(nodes); + RelationLocInfo *rel_loc_info = GetRelationLocInfo(exec_nodes->relid); + ExecNodes *nodes = GetRelationNodes(rel_loc_info, + (long *) &partvalue, + exec_nodes->accesstype); + if (nodes) + { + nodelist = nodes->nodelist; + primarynode = nodes->primarynodelist; + pfree(nodes); + } + FreeRelationLocInfo(rel_loc_info); } - FreeRelationLocInfo(rel_loc_info); } } else { nodelist = exec_nodes->nodelist; @@ -3134,7 +3222,6 @@ do_query(RemoteQueryState *node) errmsg("Failed to send command to data nodes"))); } } - primaryconnection->combiner = node; Assert(node->combine_type == COMBINE_TYPE_SAME); /* Make sure the command is completed on the primary node */ @@ -3365,6 +3452,7 @@ TupleTableSlot * ExecRemoteQuery(RemoteQueryState *node) { RemoteQuery *step = (RemoteQuery *) node->ss.ps.plan; + EState *estate = node->ss.ps.state; TupleTableSlot *resultslot = node->ss.ps.ps_ResultTupleSlot; TupleTableSlot *scanslot = node->ss.ss_ScanTupleSlot; bool have_tuple = false; @@ -3386,11 +3474,31 @@ ExecRemoteQuery(RemoteQueryState *node) /* * Use data row returned by the previus step as a parameters for * the main query. - * Exit if no more slots. */ if (!TupIsNull(innerSlot)) + { step->paramval_len = ExecCopySlotDatarow(innerSlot, &step->paramval_data); + + /* Needed for expression evaluation */ + if (estate->es_param_exec_vals) + { + int i; + int natts = innerSlot->tts_tupleDescriptor->natts; + + slot_getallattrs(innerSlot); + for (i = 0; i < natts; i++) + estate->es_param_exec_vals[i].value = slot_getattr( + innerSlot, + i+1, + &estate->es_param_exec_vals[i].isnull); + } + } + else + { + /* no parameters, exit */ + return NULL; + } } do_query(node); @@ -3509,6 +3617,28 @@ handle_results: TupleTableSlot *innerSlot = ExecProcNode(innerPlanState(node)); if (!TupIsNull(innerSlot)) { + /* reset the counter */ + node->command_complete_count = 0; + /* + * Use data row returned by the previus step as a parameters for + * the main query. + */ + step->paramval_len = ExecCopySlotDatarow(innerSlot, + &step->paramval_data); + + /* Needed for expression evaluation */ + if (estate->es_param_exec_vals) + { + int i; + int natts = innerSlot->tts_tupleDescriptor->natts; + + slot_getallattrs(innerSlot); + for (i = 0; i < natts; i++) + estate->es_param_exec_vals[i].value = slot_getattr( + innerSlot, + i+1, + &estate->es_param_exec_vals[i].isnull); + } do_query(node); goto handle_results; } commit 8cf0de56a706ad0da72d3ea889844ae6c9e4a6fb Author: Michael P <mic...@us...> Date: Thu Jan 20 17:28:45 2011 +0900 Clean up in Materialize code Removes a PGXC added workaround when Materialize fetches all tuples from the subnode to keep connection clean. Now buffering handles this. diff --git a/src/backend/executor/nodeMaterial.c b/src/backend/executor/nodeMaterial.c index 2cd3298..446b400 100644 --- a/src/backend/executor/nodeMaterial.c +++ b/src/backend/executor/nodeMaterial.c @@ -24,9 +24,6 @@ #include "executor/executor.h" #include "executor/nodeMaterial.h" #include "miscadmin.h" -#ifdef PGXC -#include "pgxc/pgxc.h" -#endif /* ---------------------------------------------------------------- * ExecMaterial @@ -59,24 +56,9 @@ ExecMaterial(MaterialState *node) /* * If first time through, and we need a tuplestore, initialize it. */ -#ifdef PGXC - /* - * For PGXC, temporarily always create the storage. - * This allows us to easily use the same connection to - * in multiple steps of the plan. - */ - if ((IS_PGXC_COORDINATOR && tuplestorestate == NULL) - || (IS_PGXC_DATANODE && tuplestorestate == NULL && node->eflags != 0)) -#else if (tuplestorestate == NULL && node->eflags != 0) -#endif { tuplestorestate = tuplestore_begin_heap(true, false, work_mem); -#ifdef PGXC - if (IS_PGXC_COORDINATOR) - /* Note that we will rescan these results */ - node->eflags |= EXEC_FLAG_REWIND; -#endif tuplestore_set_eflags(tuplestorestate, node->eflags); if (node->eflags & EXEC_FLAG_MARK) { @@ -91,26 +73,6 @@ ExecMaterial(MaterialState *node) Assert(ptrno == 1); } node->tuplestorestate = tuplestorestate; - -#ifdef PGXC - if (IS_PGXC_COORDINATOR) - { - TupleTableSlot *outerslot; - PlanState *outerNode = outerPlanState(node); - - /* We want to always materialize first temporarily in PG-XC */ - while (!node->eof_underlying) - { - outerslot = ExecProcNode(outerNode); - if (TupIsNull(outerslot)) - node->eof_underlying = true; - else - /* Append a copy of the returned tuple to tuplestore. */ - tuplestore_puttupleslot(tuplestorestate, outerslot); - } - tuplestore_rescan(node->tuplestorestate); - } -#endif } /* ----------------------------------------------------------------------- Summary of changes: src/backend/access/common/heaptuple.c | 41 +++-- src/backend/executor/nodeMaterial.c | 38 ----- src/backend/pgxc/pool/execRemote.c | 288 ++++++++++++++++++++++++--------- 3 files changed, 236 insertions(+), 131 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-13 05:55:16
|
Project "Postgres-XC". The branch, master has been updated via 90a3e337ead46e9029e877b6e8d577c26307ebe5 (commit) from e58bc619b910c872ee8ade4dfd5903cd97c7f66d (commit) - Log ----------------------------------------------------------------- commit 90a3e337ead46e9029e877b6e8d577c26307ebe5 Author: Michael P <mic...@us...> Date: Thu Jan 13 14:53:32 2011 +0900 Fix for partially committed transactions in Implicit 2PC When a Datanode crashed during an implicit 2PC, the list of nodes that crashed were not saved on GTM when aborting transaction on remote Coordinator. This problem was introduced by commit 981e2bbe81c94c0427ed9504d0390119a7770a83 in execRemote.c when handle error is set at DN_CONNECTION_STATE_ERROR_FATAL when a connection to backend was cut. diff --git a/src/backend/pgxc/pool/pgxcnode.c b/src/backend/pgxc/pool/pgxcnode.c index dafbec5..1e23489 100644 --- a/src/backend/pgxc/pool/pgxcnode.c +++ b/src/backend/pgxc/pool/pgxcnode.c @@ -1643,7 +1643,9 @@ get_transaction_nodes(PGXCNodeHandle **connections, char client_conn_type, { for (i = 0; i < NumDataNodes; i++) { - if (dn_handles[i].sock != NO_SOCKET && dn_handles[i].state != DN_CONNECTION_STATE_ERROR_FATAL) + if (dn_handles[i].sock != NO_SOCKET && + (dn_handles[i].state != DN_CONNECTION_STATE_ERROR_FATAL || + status_requested == HANDLE_ERROR)) { if (status_requested == HANDLE_IDLE && dn_handles[i].transaction_status == 'I') connections[tran_count++] = &dn_handles[i]; @@ -1659,7 +1661,9 @@ get_transaction_nodes(PGXCNodeHandle **connections, char client_conn_type, { for (i = 0; i < NumCoords; i++) { - if (co_handles[i].sock != NO_SOCKET && co_handles[i].state != DN_CONNECTION_STATE_ERROR_FATAL) + if (co_handles[i].sock != NO_SOCKET && + (co_handles[i].state != DN_CONNECTION_STATE_ERROR_FATAL || + status_requested == HANDLE_ERROR)) { if (status_requested == HANDLE_IDLE && co_handles[i].transaction_status == 'I') connections[tran_count++] = &co_handles[i]; ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/pool/pgxcnode.c | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-13 02:51:45
|
Project "Postgres-XC". The branch, master has been updated via e58bc619b910c872ee8ade4dfd5903cd97c7f66d (commit) from 03c338b1a32b2e33b9535423ed6fe462512eb4d8 (commit) - Log ----------------------------------------------------------------- commit e58bc619b910c872ee8ade4dfd5903cd97c7f66d Author: Michael P <mic...@us...> Date: Thu Jan 13 11:53:18 2011 +0900 Allow only Remote Coordinator to use functionnalities introduced with SQL/MED diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index ad90109..e91eb57 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -578,7 +578,7 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path) * Check if this join can be reduced to an equiv. remote scan node * This can only be executed on a remote Coordinator */ - if (IS_PGXC_COORDINATOR && IsConnFromCoord()) + if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) plan = create_remotejoin_plan(root, best_path, plan, outer_plan, inner_plan); #endif diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 63b0581..3f8c00d 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -114,7 +114,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) * This has to be set on a remote Coordinator only * as it hugely penalizes performance on backend Nodes */ - if (IS_PGXC_COORDINATOR && IsConnFromCoord()) + if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) { rel->pages = 1; rel->tuples = 1; ----------------------------------------------------------------------- Summary of changes: src/backend/optimizer/plan/createplan.c | 2 +- src/backend/optimizer/util/relnode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-13 02:12:30
|
Project "Postgres-XC". The branch, master has been updated via 03c338b1a32b2e33b9535423ed6fe462512eb4d8 (commit) from 88b9cd684939307f91e3c77e83c8f597642ee69b (commit) - Log ----------------------------------------------------------------- commit 03c338b1a32b2e33b9535423ed6fe462512eb4d8 Author: Michael P <mic...@us...> Date: Thu Jan 13 11:09:41 2011 +0900 Node Registering feature Feature to register a Proxy, Coordinator or Datanode in Postgres-XC cluster. When a PGXC Node connects in the cluster, Node connects to GTM to register the following information: - Node ID - Node Type - Data folder - Port number - Socket IP (got directly from remote socket when registering) - Proxy Number Node goes through (0 if no Proxy used) When a node is shutdown, it automatically unregisters itself. If node is quickly shutdown, it disconnects itself, and can reconnect itself later. In this case, node ID is reserved for this node. When a node tries to register with an ID already used, it cannot start up. Socket IP is forwarded to GTM if Node goes through a Proxy. As proxy also registers, a start-up option "-i" has been added to set Proxy ID. diff --git a/src/backend/access/transam/gtm.c b/src/backend/access/transam/gtm.c index a464578..a9bf1d6 100644 --- a/src/backend/access/transam/gtm.c +++ b/src/backend/access/transam/gtm.c @@ -426,3 +426,65 @@ RenameSequenceGTM(char *seqname, const char *newseqname) return conn ? rename_sequence(conn, &seqkey, &newseqkey) : -1; } + +/* + * Register Given Node + * Connection for registering is just used once then closed + */ +int +RegisterGTM(GTM_PGXCNodeType type, GTM_PGXCNodePort port, char *datafolder) +{ + int ret; + + CheckConnection(); + + if (!conn) + return EOF; + + ret = node_register(conn, type, port, PGXCNodeId, datafolder); + + /* If something went wrong, retry once */ + if (ret < 0) + { + CloseGTM(); + InitGTM(); + if (conn) + ret = node_register(conn, type, port, PGXCNodeId, datafolder); + } + + return ret; +} + +/* + * UnRegister Given Node + * Connection for registering is just used once then closed + */ +int +UnregisterGTM(GTM_PGXCNodeType type) +{ + int ret; + + CheckConnection(); + + if (!conn) + return EOF; + + ret = node_unregister(conn, type, PGXCNodeId); + + /* If something went wrong, retry once */ + if (ret < 0) + { + CloseGTM(); + InitGTM(); + if (conn) + ret = node_unregister(conn, type, PGXCNodeId); + } + + /* + * If node is unregistered cleanly, cut the connection. + * and Node shuts down smoothly. + */ + CloseGTM(); + + return ret; +} diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 0add0e6..b36347d 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -1042,6 +1042,24 @@ PostmasterMain(int argc, char *argv[]) */ whereToSendOutput = DestNone; +#ifdef PGXC + /* Register node on GTM during Postmaster Startup. */ + if (IS_PGXC_COORDINATOR) + { + if (RegisterGTM(PGXC_NODE_COORDINATOR, PostPortNumber, userDoption) < 0) + ereport(FATAL, + (errcode(ERRCODE_IO_ERROR), + errmsg("Can not register Coordinator on GTM"))); + } + if (IS_PGXC_DATANODE) + { + if (RegisterGTM(PGXC_NODE_DATANODE, PostPortNumber, userDoption) < 0) + ereport(FATAL, + (errcode(ERRCODE_IO_ERROR), + errmsg("Can not register Datanode on GTM"))); + } +#endif + /* * Initialize stats collection subsystem (this does NOT start the * collector process!) @@ -2188,6 +2206,12 @@ pmdie(SIGNAL_ARGS) /* and the pool manager too */ if (IS_PGXC_COORDINATOR && PgPoolerPID != 0) signal_child(PgPoolerPID, SIGTERM); + + /* Unregister Node on GTM */ + if (IS_PGXC_COORDINATOR) + UnregisterGTM(PGXC_NODE_COORDINATOR); + else if (IS_PGXC_DATANODE) + UnregisterGTM(PGXC_NODE_DATANODE); #endif pmState = PM_WAIT_BACKUP; } @@ -2240,6 +2264,12 @@ pmdie(SIGNAL_ARGS) /* and the pool manager too */ if (IS_PGXC_COORDINATOR && PgPoolerPID != 0) signal_child(PgPoolerPID, SIGTERM); + + /* Unregister Node on GTM */ + if (IS_PGXC_COORDINATOR) + UnregisterGTM(PGXC_NODE_COORDINATOR); + else if (IS_PGXC_DATANODE) + UnregisterGTM(PGXC_NODE_DATANODE); #endif pmState = PM_WAIT_BACKENDS; } diff --git a/src/gtm/Makefile b/src/gtm/Makefile index 51c55e0..5214936 100644 --- a/src/gtm/Makefile +++ b/src/gtm/Makefile @@ -11,7 +11,7 @@ subdir = src/gtm top_builddir = ../.. include $(top_builddir)/src/Makefile.global -WANTED_DIRS=common path libpq client main proxy gtm_ctl +WANTED_DIRS=common path libpq client recovery main proxy gtm_ctl all: @for dir in $(WANTED_DIRS); do \ diff --git a/src/gtm/client/fe-protocol.c b/src/gtm/client/fe-protocol.c index 117f89f..a70be31 100644 --- a/src/gtm/client/fe-protocol.c +++ b/src/gtm/client/fe-protocol.c @@ -587,6 +587,21 @@ gtmpqParseSuccess(GTM_Conn *conn, GTM_Result *result) } break; + case NODE_UNREGISTER_RESULT: + case NODE_REGISTER_RESULT: + if (gtmpqGetnchar((char *)&result->gr_resdata.grd_node.type, + sizeof (GTM_PGXCNodeType), conn)) + { + result->gr_status = -1; + break; + } + if (gtmpqGetnchar((char *)&result->gr_resdata.grd_node.nodenum, + sizeof (GTM_PGXCNodeId), conn)) + { + result->gr_status = -1; + } + break; + default: printfGTMPQExpBuffer(&conn->errorMessage, "unexpected result type from server; result typr was \"%d\"\n", diff --git a/src/gtm/client/gtm_client.c b/src/gtm/client/gtm_client.c index 08911d8..9501478 100644 --- a/src/gtm/client/gtm_client.c +++ b/src/gtm/client/gtm_client.c @@ -791,6 +791,100 @@ send_failed: return -1; } +/* + * Register a Node on GTM + * Seen from a Node viewpoint, we do not know if we are directly connected to GTM + * or go through a proxy, so register 0 as proxy number. + * This number is modified at proxy level automatically. + */ +int node_register(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodePort port, GTM_PGXCNodeId nodenum, + char *datafolder) +{ + GTM_Result *res = NULL; + time_t finish_time; + GTM_PGXCNodeId proxynum = 0; + + if (gtmpqPutMsgStart('C', true, conn) || + gtmpqPutInt(MSG_NODE_REGISTER, sizeof (GTM_MessageType), conn) || + gtmpqPutnchar((char *)&type, sizeof(GTM_PGXCNodeType), conn) || + gtmpqPutnchar((char *)&nodenum, sizeof(GTM_PGXCNodeId), conn) || + gtmpqPutnchar((char *)&port, sizeof(GTM_PGXCNodePort), conn) || + gtmpqPutnchar((char *)&proxynum, sizeof(GTM_PGXCNodeId), conn) || + gtmpqPutInt(strlen(datafolder), sizeof (GTM_StrLen), conn) || + gtmpqPutnchar(datafolder, strlen(datafolder), conn)) + goto send_failed; + + /* Finish the message. */ + if (gtmpqPutMsgEnd(conn)) + goto send_failed; + + /* Flush to ensure backend gets it. */ + if (gtmpqFlush(conn)) + goto send_failed; + + finish_time = time(NULL) + CLIENT_GTM_TIMEOUT; + if (gtmpqWaitTimed(true, false, conn, finish_time) || + gtmpqReadData(conn) < 0) + goto receive_failed; + + if ((res = GTMPQgetResult(conn)) == NULL) + goto receive_failed; + + /* Check on node type and node number */ + if (res->gr_status == 0) + { + Assert(res->gr_resdata.grd_node.type == type); + Assert(res->gr_resdata.grd_node.nodenum == nodenum); + } + + return res->gr_status; + +receive_failed: +send_failed: + return -1; +} + +int node_unregister(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum) +{ + GTM_Result *res = NULL; + time_t finish_time; + + if (gtmpqPutMsgStart('C', true, conn) || + gtmpqPutInt(MSG_NODE_UNREGISTER, sizeof (GTM_MessageType), conn) || + gtmpqPutnchar((char *)&type, sizeof(GTM_PGXCNodeType), conn) || + gtmpqPutnchar((char *)&nodenum, sizeof(GTM_PGXCNodeId), conn)) + goto send_failed; + + /* Finish the message. */ + if (gtmpqPutMsgEnd(conn)) + goto send_failed; + + /* Flush to ensure backend gets it. */ + if (gtmpqFlush(conn)) + goto send_failed; + + finish_time = time(NULL) + CLIENT_GTM_TIMEOUT; + if (gtmpqWaitTimed(true, false, conn, finish_time) || + gtmpqReadData(conn) < 0) + goto receive_failed; + + if ((res = GTMPQgetResult(conn)) == NULL) + goto receive_failed; + + /* Check on node type and node number */ + if (res->gr_status == 0) + { + Assert(res->gr_resdata.grd_node.type == type); + Assert(res->gr_resdata.grd_node.nodenum == nodenum); + } + + return res->gr_status; + +receive_failed: +send_failed: + return -1; +} + void GTM_FreeResult(GTM_Result *result, GTM_PGXCNodeType remote_type) { diff --git a/src/gtm/main/Makefile b/src/gtm/main/Makefile index 5d8aaea..3ed2c78 100644 --- a/src/gtm/main/Makefile +++ b/src/gtm/main/Makefile @@ -3,7 +3,7 @@ top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global -OBJS=main.o gtm_thread.o gtm_txn.o gtm_seq.o gtm_snap.o gtm_time.o ../common/libgtm.a ../libpq/libpqcomm.a ../path/libgtmpath.a +OBJS=main.o gtm_thread.o gtm_txn.o gtm_seq.o gtm_snap.o gtm_time.o ../common/libgtm.a ../libpq/libpqcomm.a ../path/libgtmpath.a ../recovery/libgtmrecovery.a ../client/libgtmclient.a LDFLAGS=-L$(top_build_dir)/common -L$(top_build_dir)/libpq LIBS=-lpthread diff --git a/src/gtm/main/main.c b/src/gtm/main/main.c index 118faab..8c1f4ca 100644 --- a/src/gtm/main/main.c +++ b/src/gtm/main/main.c @@ -34,6 +34,7 @@ #include "gtm/pqsignal.h" #include "gtm/pqformat.h" #include "gtm/assert.h" +#include "gtm/register.h" #include "gtm/gtm_txn.h" #include "gtm/gtm_seq.h" #include "gtm/gtm_msg.h" @@ -69,7 +70,7 @@ static int GTMAddConnection(Port *port); static int ReadCommand(Port *myport, StringInfo inBuf); static void ProcessCommand(Port *myport, StringInfo input_message); -static void ProcessCoordinatorCommand(Port *myport, GTM_MessageType mtype, StringInfo message); +static void ProcessPGXCNodeCommand(Port *myport, GTM_MessageType mtype, StringInfo message); static void ProcessTransactionCommand(Port *myport, GTM_MessageType mtype, StringInfo message); static void ProcessSnapshotCommand(Port *myport, GTM_MessageType mtype, StringInfo message); static void ProcessSequenceCommand(Port *myport, GTM_MessageType mtype, StringInfo message); @@ -77,7 +78,7 @@ static void ProcessQueryCommand(Port *myport, GTM_MessageType mtype, StringInfo static void GTM_RegisterPGXCNode(Port *myport, GTM_PGXCNodeId pgxc_node_id); static void GTM_UnregisterPGXCNode(Port *myport, GTM_PGXCNodeId pgxc_node_id); - + static bool CreateOptsFile(int argc, char *argv[]); static void CreateDataDirLockFile(void); static void CreateLockFile(const char *filename, const char *refName); @@ -146,6 +147,9 @@ BaseInit() sprintf(GTMLogFile, "%s/%s", GTMDataDir, GTM_LOG_FILE); } + /* Save Node Register File in register.c */ + Recovery_SaveRegisterFileName(GTMDataDir); + DebugFileOpen(); GTM_InitTxnManager(); @@ -185,6 +189,10 @@ GTM_SigleHandler(int signal) /* * XXX We should do a clean shutdown here. */ + + /* Rewrite Register Information (clean up unregister records) */ + Recovery_SaveRegisterInfo(); + /* Delete pid file before shutting down */ DeleteLockFile(GTM_PID_FILE); @@ -306,6 +314,10 @@ main(int argc, char *argv[]) GTM_RestoreSeqInfo(ctlfd); close(ctlfd); + + /* Recover Data of Registered nodes. */ + Recovery_RestoreRegisterInfo(); + /* * Establish input sockets. */ @@ -712,6 +724,9 @@ GTM_ThreadMain(void *argp) * Remove all transactions opened within the thread */ GTM_RemoveAllTransInfos(-1); + + /* Disconnect node if necessary */ + Recovery_PGXCNodeDisconnect(thrinfo->thr_conn->con_port); pthread_exit(thrinfo); break; @@ -731,6 +746,9 @@ GTM_ThreadMain(void *argp) */ GTM_RemoveAllTransInfos(-1); + /* Disconnect node if necessary */ + Recovery_PGXCNodeDisconnect(thrinfo->thr_conn->con_port); + ereport(FATAL, (EPROTO, errmsg("invalid frontend message type %d", @@ -762,8 +780,9 @@ ProcessCommand(Port *myport, StringInfo input_message) switch (mtype) { - case MSG_UNREGISTER_COORD: - ProcessCoordinatorCommand(myport, mtype, input_message); + case MSG_NODE_REGISTER: + case MSG_NODE_UNREGISTER: + ProcessPGXCNodeCommand(myport, mtype, input_message); break; case MSG_TXN_BEGIN: @@ -807,6 +826,9 @@ ProcessCommand(Port *myport, StringInfo input_message) case MSG_BACKEND_DISCONNECT: GTM_RemoveAllTransInfos(proxyhdr.ph_conid); + + /* Mark PGXC Node as disconnected if backend disconnected is postmaster */ + ProcessPGXCNodeBackendDisconnect(myport, input_message); break; default: @@ -917,22 +939,21 @@ ReadCommand(Port *myport, StringInfo inBuf) } static void -ProcessCoordinatorCommand(Port *myport, GTM_MessageType mtype, StringInfo message) +ProcessPGXCNodeCommand(Port *myport, GTM_MessageType mtype, StringInfo message) { - GTM_PGXCNodeId cid; - - cid = pq_getmsgint(message, sizeof (GTM_PGXCNodeId)); - switch (mtype) { - case MSG_UNREGISTER_COORD: - GTM_UnregisterPGXCNode(myport, cid); + case MSG_NODE_REGISTER: + ProcessPGXCNodeRegister(myport, message); + break; + + case MSG_NODE_UNREGISTER: + ProcessPGXCNodeUnregister(myport, message); break; default: Assert(0); /* Shouldn't come here.. keep compiler quite */ } - pq_getmsgend(message); } static void diff --git a/src/gtm/proxy/Makefile b/src/gtm/proxy/Makefile index 3ed6ccc..d2e6623 100644 --- a/src/gtm/proxy/Makefile +++ b/src/gtm/proxy/Makefile @@ -3,7 +3,7 @@ top_build_dir=../.. include $(top_build_dir)/gtm/Makefile.global -OBJS=proxy_main.o proxy_thread.o ../common/libgtm.a ../libpq/libpqcomm.a ../client/libgtmclient.a ../path/libgtmpath.a +OBJS=proxy_main.o proxy_thread.o ../common/libgtm.a ../libpq/libpqcomm.a ../client/libgtmclient.a ../path/libgtmpath.a ../recovery/libgtmrecovery.a ../client/libgtmclient.a LDFLAGS=-L$(top_build_dir)/common -L$(top_build_dir)/libpq LIBS=-lpthread diff --git a/src/gtm/proxy/proxy_main.c b/src/gtm/proxy/proxy_main.c index 4275d91..4950e04 100644 --- a/src/gtm/proxy/proxy_main.c +++ b/src/gtm/proxy/proxy_main.c @@ -25,6 +25,7 @@ #include "gtm/gtm_c.h" #include "gtm/gtm_proxy.h" +#include "gtm/register.h" #include "gtm/elog.h" #include "gtm/memutils.h" #include "gtm/gtm_list.h" @@ -48,6 +49,7 @@ extern char *optarg; #define GTM_PROXY_DEFAULT_WORKERS 2 #define GTM_PID_FILE "gtm_proxy.pid" #define GTM_LOG_FILE "gtm_proxy.log" +#define PROXY_CLIENT_TIMEOUT 20 static char *progname = "gtm_proxy"; char *ListenAddresses; @@ -58,12 +60,15 @@ char *GTMProxyDataDir; char *GTMServerHost; int GTMServerPortNumber; +GTM_PGXCNodeId GTMProxyID = 0; + /* The socket(s) we're listening to. */ #define MAXLISTEN 64 static int ListenSocket[MAXLISTEN]; pthread_key_t threadinfo_key; static bool GTMProxyAbortPending = false; +static GTM_Conn *master_conn; static Port *ConnCreate(int serverFd); static void ConnFree(Port *conn); @@ -77,10 +82,12 @@ static void GTMProxy_HandleDisconnect(GTMProxy_ConnectionInfo *conninfo, GTM_Con static void GTMProxy_ProxyCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, GTM_MessageType mtype, StringInfo message); +static void GTMProxy_ProxyPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo, + GTM_Conn *gtm_conn, GTM_MessageType mtype, GTMProxy_CommandData cmd_data); static void ProcessCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, StringInfo input_message); -static void ProcessCoordinatorCommand(GTMProxy_ConnectionInfo *conninfo, +static void ProcessPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, GTM_MessageType mtype, StringInfo message); static void ProcessTransactionCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, GTM_MessageType mtype, StringInfo message); @@ -92,9 +99,7 @@ static void ProcessSequenceCommand(GTMProxy_ConnectionInfo *conninfo, static void GTMProxy_RegisterPGXCNode(GTMProxy_ConnectionInfo *conninfo, GTM_PGXCNodeId cid, GTM_PGXCNodeType remote_type, - bool is_postmaster); -static void GTMProxy_UnregisterPGXCNode(GTMProxy_ConnectionInfo *conninfo, - GTM_PGXCNodeId pgxc_node_id); + bool is_postmaster); static void ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, GTM_Result *res); @@ -109,6 +114,9 @@ static void CreateLockFile(const char *filename, const char *refName); static void ChangeToDataDir(void); static void checkDataDir(void); static void DeleteLockFile(const char *filename); +static void RegisterProxy(void); +static void UnregisterProxy(void); +static GTM_Conn *ConnectGTM(void); /* * One-time initialization. It's called immediately after the main process @@ -170,6 +178,12 @@ BaseInit() sprintf(GTMLogFile, "%s/%s", GTMProxyDataDir, GTM_LOG_FILE); } + /* Save Node Register File in register.c */ + Recovery_SaveRegisterFileName(GTMProxyDataDir); + + /* Register Proxy on GTM */ + RegisterProxy(); + DebugFileOpen(); /* @@ -203,6 +217,12 @@ GTMProxy_SigleHandler(int signal) return; } + /* Unregister Proxy on GTM */ + UnregisterProxy(); + + /* Rewrite Register Information (clean up unregister records) */ + Recovery_SaveRegisterInfo(); + /* * XXX We should do a clean shutdown here. */ @@ -224,10 +244,11 @@ help(const char *progname) printf(_("This is the GTM proxy.\n\n")); printf(_("Usage:\n %s [OPTION]...\n\n"), progname); printf(_("Options:\n")); - printf(_(" -h hostname GTM proxy hostname/IP\n")); + printf(_(" -h hostname GTM proxy hostname/IP\n")); printf(_(" -p port GTM proxy port number\n")); printf(_(" -s hostname GTM server hostname/IP \n")); printf(_(" -t port GTM server port number\n")); + printf(_(" -i ID number GTM proxy ID number\n")); printf(_(" -n count Number of worker threads\n")); printf(_(" -D directory GTM proxy working directory\n")); printf(_(" -l filename GTM proxy log file name \n")); @@ -257,11 +278,11 @@ main(int argc, char *argv[]) ListenAddresses = GTM_PROXY_DEFAULT_HOSTNAME; GTMProxyPortNumber = GTM_PROXY_DEFAULT_PORT; GTMProxyWorkerThreads = GTM_PROXY_DEFAULT_WORKERS; - + /* * Parse the command like options and set variables */ - while ((opt = getopt(argc, argv, "h:p:n:D:l:s:t:")) != -1) + while ((opt = getopt(argc, argv, "h:i:p:n:D:l:s:t:")) != -1) { switch (opt) { @@ -270,6 +291,11 @@ main(int argc, char *argv[]) ListenAddresses = strdup(optarg); break; + case 'i': + /* GTM Proxy identification number */ + GTMProxyID = (GTM_PGXCNodeId) atoi(optarg); + break; + case 'p': /* Port number for the proxy to listen on */ GTMProxyPortNumber = atoi(optarg); @@ -313,6 +339,14 @@ main(int argc, char *argv[]) progname); exit(1); } + if (GTMProxyID == 0) + { + write_stderr("GTM Proxy ID must be specified\n"); + write_stderr("Try \"%s --help\" for more information.\n", + progname); + exit(1); + } + /* * GTM accepts no non-option switch arguments. */ @@ -333,6 +367,9 @@ main(int argc, char *argv[]) elog(DEBUG3, "Starting GTM proxy at (%s:%d)", ListenAddresses, GTMProxyPortNumber); + /* Recover Data of Registered nodes. */ + Recovery_RestoreRegisterInfo(); + /* * Establish input sockets. */ @@ -600,8 +637,8 @@ GTMProxy_ThreadMain(void *argp) /* * Set up connection with the GTM server */ - sprintf(gtm_connect_string, "host=%s port=%d pgxc_node_id=1 remote_type=%d", - GTMServerHost, GTMServerPortNumber, PGXC_NODE_GTM_PROXY); + sprintf(gtm_connect_string, "host=%s port=%d pgxc_node_id=%d remote_type=%d", + GTMServerHost, GTMServerPortNumber, GTMProxyID, PGXC_NODE_GTM_PROXY); thrinfo->thr_gtm_conn = PQconnectGTM(gtm_connect_string); @@ -943,8 +980,9 @@ ProcessCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, switch (mtype) { - case MSG_UNREGISTER_COORD: - ProcessCoordinatorCommand(conninfo, gtm_conn, mtype, input_message); + case MSG_NODE_REGISTER: + case MSG_NODE_UNREGISTER: + ProcessPGXCNodeCommand(conninfo, gtm_conn, mtype, input_message); break; case MSG_TXN_BEGIN: @@ -1125,6 +1163,8 @@ ProcessResponse(GTMProxy_ThreadInfo *thrinfo, GTMProxy_CommandInfo *cmdinfo, case MSG_TXN_COMMIT_PREPARED: case MSG_TXN_GET_GXID: case MSG_TXN_GET_GID_DATA: + case MSG_NODE_REGISTER: + case MSG_NODE_UNREGISTER: case MSG_SNAPSHOT_GXID_GET: case MSG_SEQUENCE_INIT: case MSG_SEQUENCE_GET_CURRENT: @@ -1243,23 +1283,115 @@ ReadCommand(GTMProxy_ConnectionInfo *conninfo, StringInfo inBuf) } static void -ProcessCoordinatorCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, +ProcessPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, GTM_MessageType mtype, StringInfo message) { - GTM_PGXCNodeId cid; + GTMProxy_CommandData cmd_data; - cid = pq_getmsgint(message, sizeof (GTM_PGXCNodeId)); - - switch (mtype) + /* + * For Node registering, proxy number is also saved and registered on GTM with node. + * So get and modify the register message in consequence. + */ + switch(mtype) { - case MSG_UNREGISTER_COORD: - GTMProxy_UnregisterPGXCNode(conninfo, cid); + case MSG_NODE_REGISTER: + { + int len; + MemoryContext oldContext; + char remote_host[NI_MAXHOST]; + char remote_port[NI_MAXSERV]; + + /* Get Remote IP and port from Conn structure to register */ + remote_host[0] = '\0'; + remote_port[0] = '\0'; + + if (gtm_getnameinfo_all(&conninfo->con_port->raddr.addr, + conninfo->con_port->raddr.salen, + remote_host, sizeof(remote_host), + remote_port, sizeof(remote_port), + NI_NUMERICSERV)) + { + int ret = gtm_getnameinfo_all(&conninfo->con_port->raddr.addr, + conninfo->con_port->raddr.salen, + remote_host, sizeof(remote_host), + remote_port, sizeof(remote_port), + NI_NUMERICHOST | NI_NUMERICSERV); + + if (ret) + ereport(WARNING, + (errmsg_internal("gtm_getnameinfo_all() failed"))); + } + + memcpy(&cmd_data.cd_reg.type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)), + sizeof (GTM_PGXCNodeType)); + memcpy(&cmd_data.cd_reg.nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)), + sizeof (GTM_PGXCNodeId)); + memcpy(&cmd_data.cd_reg.port, pq_getmsgbytes(message, sizeof (GTM_PGXCNodePort)), + sizeof (GTM_PGXCNodePort)); + memcpy(&cmd_data.cd_reg.proxynum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)), + sizeof (GTM_PGXCNodeId)); + + len = pq_getmsgint(message, sizeof (int)); + cmd_data.cd_reg.datafolder = (char *)pq_getmsgbytes(message, len); + pq_getmsgend(message); + + /* Copy also remote host address in data to be proxied */ + cmd_data.cd_reg.ipaddress = (char *) palloc(strlen(remote_host)); + memcpy(cmd_data.cd_reg.ipaddress, remote_host, strlen(remote_host)); + + /* Registering has to be saved where it can be seen by all the threads */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + /* Register Node also on Proxy */ + if (Recovery_PGXCNodeRegister(cmd_data.cd_reg.type, + cmd_data.cd_reg.nodenum, + cmd_data.cd_reg.port, + GTMProxyID, + NODE_CONNECTED, + remote_host, + cmd_data.cd_reg.datafolder, + false)) + { + ereport(ERROR, + (EINVAL, + errmsg("Failed to Register node"))); + } + + MemoryContextSwitchTo(oldContext); + + GTMProxy_ProxyPGXCNodeCommand(conninfo, gtm_conn, mtype, cmd_data); break; + } + case MSG_NODE_UNREGISTER: + { + MemoryContext oldContext; + + memcpy(&cmd_data.cd_reg.type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)), + sizeof (GTM_PGXCNodeType)); + memcpy(&cmd_data.cd_reg.nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)), + sizeof (GTM_PGXCNodeId)); + pq_getmsgend(message); + + /* Unregistering has to be saved in a place where it can be seen by all the threads */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + /* Unregister Node also on Proxy */ + if (Recovery_PGXCNodeUnregister(cmd_data.cd_reg.type, cmd_data.cd_reg.nodenum, false)) + { + ereport(ERROR, + (EINVAL, + errmsg("Failed to Unregister node"))); + } + + MemoryContextSwitchTo(oldContext); + GTMProxy_ProxyPGXCNodeCommand(conninfo, gtm_conn, mtype, cmd_data); + break; + } default: - Assert(0); /* Shouldn't come here.. keep compiler quite */ + Assert(0); /* Shouldn't come here.. Keep compiler quiet */ } - pq_getmsgend(message); + return; } static void @@ -1317,7 +1449,7 @@ ProcessTransactionCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, break; default: - Assert(0); /* Shouldn't come here.. keep compiler quite */ + Assert(0); /* Shouldn't come here.. keep compiler quiet */ } } @@ -1367,7 +1499,7 @@ ProcessSnapshotCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, break; default: - Assert(0); /* Shouldn't come here.. keep compiler quite */ + Assert(0); /* Shouldn't come here.. keep compiler quiet */ } } @@ -1427,6 +1559,66 @@ GTMProxy_ProxyCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, /* + * Proxy the incoming message to the GTM server after adding our own identifier + * to it. Add also in the registration message the GTM Proxy number and rebuilt message. + */ +static void GTMProxy_ProxyPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo,GTM_Conn *gtm_conn, GTM_MessageType mtype, GTMProxy_CommandData cmd_data) +{ + GTMProxy_CommandInfo *cmdinfo; + GTMProxy_ThreadInfo *thrinfo = GetMyThreadInfo; + GTM_ProxyMsgHeader proxyhdr; + + proxyhdr.ph_conid = conninfo->con_id; + + switch(mtype) + { + case MSG_NODE_REGISTER: + /* Rebuild the message */ + if (gtmpqPutMsgStart('C', true, gtm_conn) || + gtmpqPutnchar((char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader), gtm_conn) || + gtmpqPutInt(MSG_NODE_REGISTER, sizeof (GTM_MessageType), gtm_conn) || + gtmpqPutnchar((char *)&cmd_data.cd_reg.type, sizeof(GTM_PGXCNodeType), gtm_conn) || + gtmpqPutnchar((char *)&cmd_data.cd_reg.nodenum, sizeof(GTM_PGXCNodeId), gtm_conn) || + gtmpqPutnchar((char *)&cmd_data.cd_reg.port, sizeof(GTM_PGXCNodePort), gtm_conn) || + gtmpqPutnchar((char *)>MProxyID, sizeof(GTM_PGXCNodeId), gtm_conn) || + gtmpqPutInt(strlen(cmd_data.cd_reg.ipaddress), sizeof (GTM_StrLen), gtm_conn) || + gtmpqPutnchar(cmd_data.cd_reg.ipaddress, strlen(cmd_data.cd_reg.ipaddress), gtm_conn) || + gtmpqPutInt(strlen(cmd_data.cd_reg.datafolder), 4, gtm_conn) || + gtmpqPutnchar(cmd_data.cd_reg.datafolder, strlen(cmd_data.cd_reg.datafolder), gtm_conn)) + elog(ERROR, "Error proxing data"); + break; + + case MSG_NODE_UNREGISTER: + if (gtmpqPutMsgStart('C', true, gtm_conn) || + gtmpqPutnchar((char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader), gtm_conn) || + gtmpqPutInt(MSG_NODE_UNREGISTER, sizeof (GTM_MessageType), gtm_conn) || + gtmpqPutnchar((char *)&cmd_data.cd_reg.type, sizeof(GTM_PGXCNodeType), gtm_conn) || + gtmpqPutnchar((char *)&cmd_data.cd_reg.nodenum, sizeof(GTM_PGXCNodeId), gtm_conn)) + elog(ERROR, "Error proxing data"); + break; + + default: + Assert(0); /* Shouldn't come here.. Keep compiler quiet */ + } + + /* + * Add the message to the pending command list + */ + cmdinfo = palloc0(sizeof (GTMProxy_CommandInfo)); + cmdinfo->ci_mtype = mtype; + cmdinfo->ci_conn = conninfo; + cmdinfo->ci_res_index = 0; + thrinfo->thr_processed_commands = lappend(thrinfo->thr_processed_commands, cmdinfo); + + /* Finish the message. */ + if (gtmpqPutMsgEnd(gtm_conn)) + elog(ERROR, "Error finishing the message"); + + return; +} + + +/* * Record the incoming message as per its type. After all messages of this type * are collected, they will be sent in a single message to the GTM server. */ @@ -1449,32 +1641,29 @@ GTMProxy_CommandPending(GTMProxy_ConnectionInfo *conninfo, GTM_MessageType mtype return; } + +/* + * Register PGXC Node Connection in Proxy + * Registery on GTM is made with MSG_NODE_REGISTER message type when node is launched. + */ static void -GTMProxy_RegisterPGXCNode(GTMProxy_ConnectionInfo *conninfo, GTM_PGXCNodeId cid, GTM_PGXCNodeType remote_type, bool is_postmaster) +GTMProxy_RegisterPGXCNode(GTMProxy_ConnectionInfo *conninfo, + GTM_PGXCNodeId cid, + GTM_PGXCNodeType remote_type, + bool is_postmaster) { - elog(DEBUG3, "Registering PGXC Node with cid %d", cid); + elog(DEBUG3, "Registering PGXC Node with id %d", cid); conninfo->con_port->pgxc_node_id = cid; conninfo->con_port->remote_type = remote_type; conninfo->con_port->is_postmaster = is_postmaster; } - -static void -GTMProxy_UnregisterPGXCNode(GTMProxy_ConnectionInfo *conninfo, GTM_PGXCNodeId cid) -{ - /* - * Do a clean shutdown - */ - return; -} - - static void GTMProxy_HandshakeConnection(GTMProxy_ConnectionInfo *conninfo) { /* * We expect a startup message at the very start. The message type is - * REGISTER_COORD, followed by the 4 byte coordinator ID + * REGISTER_COORD, followed by the 4 byte PGXC node ID */ char startup_type; GTM_StartupPacket sp; @@ -1499,7 +1688,7 @@ GTMProxy_HandshakeConnection(GTMProxy_ConnectionInfo *conninfo) if (pq_getmessage(conninfo->con_port, &inBuf, 0)) ereport(ERROR, (EPROTO, - errmsg("Expecting coordinator ID, but received EOF"))); + errmsg("Expecting PGXC Node ID, but received EOF"))); memcpy(&sp, pq_getmsgbytes(&inBuf, sizeof (GTM_StartupPacket)), @@ -1526,29 +1715,47 @@ GTMProxy_HandleDisconnect(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn) { GTM_ProxyMsgHeader proxyhdr; - conninfo->con_disconnected = true; - if (conninfo->con_port->sock > 0) - StreamClose(conninfo->con_port->sock); - ConnFree(conninfo->con_port); - conninfo->con_port = NULL; - proxyhdr.ph_conid = conninfo->con_id; + /* Mark node as disconnected if it is a postmaster backend */ + Recovery_PGXCNodeDisconnect(conninfo->con_port); /* Start the message. */ if (gtmpqPutMsgStart('C', true, gtm_conn) || gtmpqPutnchar((char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader), gtm_conn) || - gtmpqPutInt(MSG_BACKEND_DISCONNECT, sizeof (GTM_MessageType), gtm_conn)) + gtmpqPutInt(MSG_BACKEND_DISCONNECT, sizeof (GTM_MessageType), gtm_conn) || + gtmpqPutc(conninfo->con_port->is_postmaster, gtm_conn)) elog(ERROR, "Error proxing data"); + /* + * Then send node type and node number if backend is a postmaster to + * disconnect the correct node. + */ + if (conninfo->con_port->is_postmaster) + { + if (gtmpqPutnchar((char *)&conninfo->con_port->remote_type, + sizeof(GTM_PGXCNodeType), gtm_conn) || + gtmpqPutnchar((char *)&conninfo->con_port->pgxc_node_id, + sizeof(GTM_PGXCNodeId), gtm_conn)) + elog(ERROR, "Error proxing data"); + } + /* Finish the message. */ if (gtmpqPutMsgEnd(gtm_conn)) elog(ERROR, "Error finishing the message"); + conninfo->con_disconnected = true; + if (conninfo->con_port->sock > 0) + StreamClose(conninfo->con_port->sock); + ConnFree(conninfo->con_port); + conninfo->con_port = NULL; + + proxyhdr.ph_conid = conninfo->con_id; + return; } /* - * Proces all the pending messages now. + * Process all the pending messages now. */ static void GTMProxy_ProcessPendingCommands(GTMProxy_ThreadInfo *thrinfo) @@ -2038,3 +2245,139 @@ DeleteLockFile(const char *filename) "it could not be removed. Please remove the file " "by hand and try again."))); } + +/* + * Unregister Proxy on GTM + */ +static void +UnregisterProxy(void) +{ + GTM_PGXCNodeType type = PGXC_NODE_GTM_PROXY; + GTM_Result *res = NULL; + time_t finish_time; + + if (!master_conn || GTMPQstatus(master_conn) != CONNECTION_OK) + master_conn = ConnectGTM(); + if (!master_conn) + goto failed; + + if (gtmpqPutMsgStart('C', true, master_conn) || + gtmpqPutInt(MSG_NODE_UNREGISTER, sizeof (GTM_MessageType), master_conn) || + gtmpqPutnchar((char *)&type, sizeof(GTM_PGXCNodeType), master_conn) || + gtmpqPutnchar((char *)>MProxyID, sizeof(GTM_PGXCNodeId), master_conn)) + goto failed; + + /* Finish the message. */ + if (gtmpqPutMsgEnd(master_conn)) + goto failed; + + /* Flush to ensure backend gets it. */ + if (gtmpqFlush(master_conn)) + goto failed; + + finish_time = time(NULL) + PROXY_CLIENT_TIMEOUT; + if (gtmpqWaitTimed(true, false, master_conn, finish_time) || + gtmpqReadData(master_conn) < 0) + goto failed; + + if ((res = GTMPQgetResult(master_conn)) == NULL) + goto failed; + + /* Check on node type and node number */ + if (res->gr_status == 0) + { + Assert(res->gr_resdata.grd_node.type == type); + Assert(res->gr_resdata.grd_node.nodenum == GTMProxyID); + } + + /* Disconnect cleanly as Proxy is shutting down */ + GTMPQfinish(master_conn); + + return; + +failed: + return elog(ERROR, "can not Unregister Proxy on GTM"); +} + +/* + * Register Proxy on GTM + */ +static void +RegisterProxy(void) +{ + GTM_PGXCNodeType type = PGXC_NODE_GTM_PROXY; + GTM_PGXCNodePort port = (GTM_PGXCNodePort) GTMProxyPortNumber; + GTM_Result *res = NULL; + GTM_PGXCNodeId proxynum = 0; + time_t finish_time; + + master_conn = ConnectGTM(); + if (!master_conn) + goto failed; + + /* + * As this node is itself a Proxy it registers 0 as Proxy ID on GTM + * as it doesn't go through any other proxy. + */ + if (gtmpqPutMsgStart('C', true, master_conn) || + gtmpqPutInt(MSG_NODE_REGISTER, sizeof (GTM_MessageType), master_conn) || + gtmpqPutnchar((char *)&type, sizeof(GTM_PGXCNodeType), master_conn) || + gtmpqPutnchar((char *)>MProxyID, sizeof(GTM_PGXCNodeId), master_conn) || /* nodenum */ + gtmpqPutnchar((char *)&port, sizeof(GTM_PGXCNodePort), master_conn) || + gtmpqPutnchar((char *)&proxynum, sizeof(GTM_PGXCNodeId), master_conn) || + gtmpqPutInt(strlen(GTMProxyDataDir), 4, master_conn) || + gtmpqPutnchar(GTMProxyDataDir, strlen(GTMProxyDataDir), master_conn)) + goto failed; + + /* Finish the message. */ + if (gtmpqPutMsgEnd(master_conn)) + goto failed; + + /* Flush to ensure backend gets it. */ + if (gtmpqFlush(master_conn)) + goto failed; + + finish_time = time(NULL) + PROXY_CLIENT_TIMEOUT; + if (gtmpqWaitTimed(true, false, master_conn, finish_time) || + gtmpqReadData(master_conn) < 0) + goto failed; + + if ((res = GTMPQgetResult(master_conn)) == NULL) + goto failed; + + if (res->gr_status == 0) + { + Assert(res->gr_resdata.grd_node.type == type); + Assert(res->gr_resdata.grd_node.nodenum == GTMProxyID); + } + + return; + +failed: + elog(ERROR, "can not register Proxy on GTM"); +} + +static GTM_Conn* +ConnectGTM(void) +{ + char conn_str[256]; + GTM_Conn *conn; + + sprintf(conn_str, "host=%s port=%d pgxc_node_id=%d remote_type=%d postmaster=1", + GTMServerHost, GTMServerPortNumber, GTMProxyID, PGXC_NODE_GTM_PROXY_POSTMASTER); + + conn = PQconnectGTM(conn_str); + if (GTMPQstatus(conn) != CONNECTION_OK) + { + int save_errno = errno; + + elog(ERROR, "can not connect to GTM"); + + errno = save_errno; + + GTMPQfinish(conn); + conn = NULL; + } + + return conn; +} diff --git a/src/gtm/recovery/Makefile b/src/gtm/recovery/Makefile new file mode 100644 index 0000000..5092a56 --- /dev/null +++ b/src/gtm/recovery/Makefile @@ -0,0 +1,24 @@ +# Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + +top_build_dir=../.. +include $(top_build_dir)/gtm/Makefile.global + + +NAME=gtmrecovery +SO_MAJOR_VERSION= 1 +SO_MINOR_VERSION= 0 + +OBJS=register.o ../client/libgtmclient.a + +all:all-lib + +include $(top_build_dir)/Makefile.shlib + +clean: + rm -f $(OBJS) + rm -f libgtmrecovery.a libgtmrecovery.so libgtmrecovery.so.1 libgtmrecovery.so.1.0 + +distclean: clean + +maintainer-clean: distclean + diff --git a/src/gtm/recovery/register.c b/src/gtm/recovery/register.c new file mode 100644 index 0000000..bb5cfc3 --- /dev/null +++ b/src/gtm/recovery/register.c @@ -0,0 +1,779 @@ +/*------------------------------------------------------------------------- + * + * register.c + * PGXC Node Register on GTM and GTM Proxy, node registering functions + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * Portions Copyright (c) 2010 Nippon Telegraph and Telephone Corporation + * + * + * IDENTIFICATION + * $PostgreSQL$ + * + *------------------------------------------------------------------------- + */ + +#include <fcntl.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "gtm/gtm_c.h" +#include "gtm/gtm.h" +#include "gtm/register.h" +#include "gtm/assert.h" +#include <stdio.h> +#include "gtm/libpq.h" +#include "gtm/pqformat.h" +#include "gtm/gtm_msg.h" +#include "gtm/stringinfo.h" +#include "gtm/gtm_ip.h" + +#define GTM_NODE_FILE "register.node" +#define NODE_HASH_TABLE_SIZE 16 +#define GTM_NODE_FILE_MAX_PATH 1024 + +typedef struct GTM_NodeInfoHashBucket +{ + List *nhb_list; + GTM_RWLock nhb_lock; +} GTM_PGXCNodeInfoHashBucket; + +static char GTMPGXCNodeFile[GTM_NODE_FILE_MAX_PATH]; + +/* Lock access of record file when necessary */ +static GTM_RWLock RegisterFileLock; + +static int NodeRegisterMagic = 0xeaeaeaea; +static int NodeUnregisterMagic = 0xebebebeb; +static int NodeEndMagic = 0xefefefef; + +static GTM_PGXCNodeInfoHashBucket GTM_PGXCNodes[NODE_HASH_TABLE_SIZE]; + +static GTM_PGXCNodeInfo *pgxcnode_find_info(GTM_PGXCNodeType type, + GTM_PGXCNodeId nodenum); +static uint32 pgxcnode_gethash(GTM_PGXCNodeId nodenum); +static int pgxcnode_remove_info(GTM_PGXCNodeInfo *node); +static int pgxcnode_add_info(GTM_PGXCNodeInfo *node); +static char *pgxcnode_copy_char(const char *str); + +#define pgxcnode_type_equal(type1,type2) (type1 == type2) +#define pgxcnode_nodenum_equal(num1,num2) (num1 == num2) +#define pgxcnode_port_equal(port1,port2) (port1 == port2) + +/* + * Find the pgxcnode info structure for the given node type and number key. + */ +static GTM_PGXCNodeInfo * +pgxcnode_find_info(GTM_PGXCNodeType type, + GTM_PGXCNodeId nodenum) +{ + uint32 hash = pgxcnode_gethash(nodenum); + GTM_PGXCNodeInfoHashBucket *bucket; + ListCell *elem; + GTM_PGXCNodeInfo *curr_nodeinfo = NULL; + + bucket = >M_PGXCNodes[hash]; + + GTM_RWLockAcquire(&bucket->nhb_lock, GTM_LOCKMODE_READ); + + foreach(elem, bucket->nhb_list) + { + curr_nodeinfo = (GTM_PGXCNodeInfo *) lfirst(elem); + if (pgxcnode_type_equal(curr_nodeinfo->type, type) && + pgxcnode_nodenum_equal(curr_nodeinfo->nodenum, nodenum)) + break; + curr_nodeinfo = NULL; + } + + GTM_RWLockRelease(&bucket->nhb_lock); + + return curr_nodeinfo; +} + +/* + * Get the Hash Key depending on the node number + * We do not except to have hundreds of nodes yet, + * This function could be replaced by a better one + * such as a double hash function indexed on type and Node Number + */ +static uint32 +pgxcnode_gethash(GTM_PGXCNodeId nodenum) +{ + uint32 hash = 0; + + hash = (uint32) nodenum; + + return (hash % NODE_HASH_TABLE_SIZE); +} + +/* + * Remove a PGXC Node Info structure from the global hash table + */ +static int +pgxcnode_remove_info(GTM_PGXCNodeInfo *nodeinfo) +{ + uint32 hash = pgxcnode_gethash(nodeinfo->nodenum); + GTM_PGXCNodeInfoHashBucket *bucket; + + bucket = >M_PGXCNodes[hash]; + + GTM_RWLockAcquire(&bucket->nhb_lock, GTM_LOCKMODE_WRITE); + GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_WRITE); + + bucket->nhb_list = list_delete(bucket->nhb_list, nodeinfo); + + GTM_RWLockRelease(&nodeinfo->node_lock); + GTM_RWLockRelease(&bucket->nhb_lock); + + return 0; +} + +/* + * Add a PGXC Node info structure to the global hash table + */ +static int +pgxcnode_add_info(GTM_PGXCNodeInfo *nodeinfo) +{ + uint32 hash = pgxcnode_gethash(nodeinfo->nodenum); + GTM_PGXCNodeInfoHashBucket *bucket; + ListCell *elem; + + bucket = >M_PGXCNodes[hash]; + + GTM_RWLockAcquire(&bucket->nhb_lock, GTM_LOCKMODE_WRITE); + + foreach(elem, bucket->nhb_list) + { + GTM_PGXCNodeInfo *curr_nodeinfo = NULL; + curr_nodeinfo = (GTM_PGXCNodeInfo *) lfirst(elem); + + /* GTM Proxy are always registered as they do not have Identification numbers yet */ + if (pgxcnode_type_equal(curr_nodeinfo->type, nodeinfo->type) && + pgxcnode_nodenum_equal(curr_nodeinfo->nodenum, nodeinfo->nodenum)) + { + if (curr_nodeinfo->status == NODE_CONNECTED) + { + GTM_RWLockRelease(&bucket->nhb_lock); + ereport(LOG, + (EEXIST, + errmsg("Node with the given ID number already exists"))); + return EEXIST; + } + else + { + /* + * Check if its data (port, datafolder and remote IP) has changed + * and modify it + */ + if (!pgxcnode_port_equal(curr_nodeinfo->port, nodeinfo->port)) + curr_nodeinfo->port = nodeinfo->port; + + if (strlen(curr_nodeinfo->datafolder) == strlen(nodeinfo->datafolder)) + { + if (memcpy(curr_nodeinfo->datafolder, + nodeinfo->datafolder, + strlen(nodeinfo->datafolder)) != 0) + { + pfree(curr_nodeinfo->ipaddress); + curr_nodeinfo->ipaddress = nodeinfo->ipaddress; + } + } + + if (strlen(curr_nodeinfo->ipaddress) == strlen(nodeinfo->ipaddress)) + { + if (memcpy(curr_nodeinfo->datafolder, + nodeinfo->datafolder, + strlen(nodeinfo->datafolder)) != 0) + { + pfree(curr_nodeinfo->datafolder); + curr_nodeinfo->datafolder = nodeinfo->datafolder; + } + } + + /* Reconnect a disconnected node */ + curr_nodeinfo->status = NODE_CONNECTED; + GTM_RWLockRelease(&bucket->nhb_lock); + return 0; + } + } + } + + /* + * Safe to add the structure to the list + */ + bucket->nhb_list = lappend(bucket->nhb_list, nodeinfo); + GTM_RWLockRelease(&bucket->nhb_lock); + + return 0; +} + +/* + * Makes a copy of given string in TopMostMemoryContext + */ +static char * +pgxcnode_copy_char(const char *str) +{ + char *retstr = NULL; + + /* + * We must use the TopMostMemoryContext because the node information is + * not bound to a thread and can outlive any of the thread specific + * contextes. + */ + retstr = (char *) MemoryContextAlloc(TopMostMemoryContext, + strlen(str)); + + if (retstr == NULL) + ereport(ERROR, (ENOMEM, errmsg("Out of memory"))); + + memcpy(retstr, str, strlen(str)); + + return retstr; +} + +/* + * Unregister the given node + */ +int +Recovery_PGXCNodeUnregister(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, bool in_recovery) +{ + GTM_PGXCNodeInfo *nodeinfo = pgxcnode_find_info(type, nodenum); + + if (nodeinfo != NULL) + { + pgxcnode_remove_info(nodeinfo); + + /* Add a record to file on disk saying that this node has been unregistered correctly */ + if (!in_recovery) + Recovery_RecordRegisterInfo(nodeinfo, false); + + pfree(nodeinfo->ipaddress); + pfree(nodeinfo->datafolder); + pfree(nodeinfo); + } + else + return EINVAL; + + return 0; +} + +int +Recovery_PGXCNodeRegister(GTM_PGXCNodeType type, + GTM_PGXCNodeId nodenum, + GTM_PGXCNodePort port, + GTM_PGXCNodeId proxynum, + GTM_PGXCNodeStatus status, + char *ipaddress, + char *datafolder, + bool in_recovery) +{ + GTM_PGXCNodeInfo *nodeinfo = NULL; + int errcode = 0; + + nodeinfo = (GTM_PGXCNodeInfo *) palloc(sizeof (GTM_PGXCNodeInfo)); + + if (nodeinfo == NULL) + ereport(ERROR, (ENOMEM, errmsg("Out of memory"))); + + GTM_RWLockInit(&nodeinfo->node_lock); + + /* Fill in structure */ + nodeinfo->type = type; + nodeinfo->nodenum = nodenum; + nodeinfo->port = port; + nodeinfo->proxynum = proxynum; + nodeinfo->datafolder = pgxcnode_copy_char(datafolder); + nodeinfo->ipaddress = pgxcnode_copy_char(ipaddress); + nodeinfo->status = status; + + /* Add PGXC Node Info to the global hash table */ + errcode = pgxcnode_add_info(nodeinfo); + + /* + * Add a Record to file disk saying that this node + * with given data has been correctly registered + */ + if (!in_recovery && errcode == 0) + Recovery_RecordRegisterInfo(nodeinfo, true); + + return errcode; +} + + +/* + * Process MSG_NODE_REGISTER + */ +void +ProcessPGXCNodeRegister(Port *myport, StringInfo message) +{ + GTM_PGXCNodeType type; + GTM_PGXCNodeId nodenum, proxynum; + GTM_PGXCNodePort port; + char remote_host[NI_MAXHOST]; + char remote_port[NI_MAXSERV]; + char *datafolder; + char *ipaddress; + MemoryContext oldContext; + int strlen; + StringInfoData buf; + + /* Get the Remote node IP and port to register it */ + remote_host[0] = '\0'; + remote_port[0] = '\0'; + + if (myport->remote_type != PGXC_NODE_GTM_PROXY) + { + if (gtm_getnameinfo_all(&myport->raddr.addr, myport->raddr.salen, + remote_host, sizeof(remote_host), + remote_port, sizeof(remote_port), + NI_NUMERICSERV)) + { + int ret = gtm_getnameinfo_all(&myport->raddr.addr, myport->raddr.salen, + remote_host, sizeof(remote_host), + remote_port, sizeof(remote_port), + NI_NUMERICHOST | NI_NUMERICSERV); + + if (ret) + ereport(WARNING, + (errmsg_internal("gtm_getnameinfo_all() failed"))); + } + } + + /* Read Node Type and number */ + memcpy(&type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)), + sizeof (GTM_PGXCNodeType)); + memcpy(&nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)), + sizeof (GTM_PGXCNodeId)); + + /* Read Port Number */ + memcpy(&port, pq_getmsgbytes(message, sizeof (GTM_PGXCNodePort)), + sizeof (GTM_PGXCNodePort)); + + /* Read Proxy ID number (0 if no proxy used) */ + memcpy(&proxynum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)), + sizeof (GTM_PGXCNodeId)); + + /* + * Message is received from a proxy, get also the remote node address + * In the case a proxy registering itself, the remote address + * is directly taken from socket. + */ + if (myport->remote_type == PGXC_NODE_GTM_PROXY && + !myport->is_postmaster) + { + strlen = pq_getmsgint(message, sizeof (GTM_StrLen)); + ipaddress = (char *)pq_getmsgbytes(message, strlen); + } + else + ipaddress = remote_host; + + /* + * Finish by reading Data Folder (length and then string) + */ + + strlen = pq_getmsgint(message, sizeof (GTM_StrLen)); + datafolder = (char *)pq_getmsgbytes(message, strlen); + + /* + * We must use the TopMostMemoryContext because the Node ID information is + * not bound to a thread and can outlive any of the thread specific + * contextes. + */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + if (Recovery_PGXCNodeRegister(type, nodenum, port, + proxynum, NODE_CONNECTED, + ipaddress, datafolder, false)) + { + ereport(ERROR, + (EINVAL, + errmsg("Failed to Register node"))); + } + + MemoryContextSwitchTo(oldContext); + + pq_getmsgend(message); + + /* + * Send a SUCCESS message back to the client + */ + pq_beginmessage(&buf, 'S'); + pq_sendint(&buf, NODE_REGISTER_RESULT, 4); + if (myport->remote_type == PGXC_NODE_GTM_PROXY) + { + GTM_ProxyMsgHeader proxyhdr; + proxyhdr.ph_conid = myport->conn_id; + pq_sendbytes(&buf, (char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader)); + } + pq_sendbytes(&buf, (char *)&type, sizeof(GTM_PGXCNodeType)); + pq_sendbytes(&buf, (char *)&nodenum, sizeof(GTM_PGXCNodeId)); + pq_endmessage(myport, &buf); + + if (myport->remote_type != PGXC_NODE_GTM_PROXY) + pq_flush(myport); +} + +/* + * Process MSG_NODE_UNREGISTER + */ +void +ProcessPGXCNodeUnregister(Port *myport, StringInfo message) +{ + GTM_PGXCNodeType type; + GTM_PGXCNodeId nodenum; + MemoryContext oldContext; + StringInfoData buf; + + /* Read Node Type and number */ + memcpy(&type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)), + sizeof (GTM_PGXCNodeType)); + memcpy(&nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)), + sizeof (GTM_PGXCNodeId)); + + /* + * We must use the TopMostMemoryContext because the Node ID information is + * not bound to a thread and can outlive any of the thread specific + * contextes. + */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + if (Recovery_PGXCNodeUnregister(type, nodenum, false)) + { + ereport(ERROR, + (EINVAL, + errmsg("Failed to Unregister node"))); + } + + MemoryContextSwitchTo(oldContext); + + pq_getmsgend(message); + + /* + * Send a SUCCESS message back to the client + */ + pq_beginmessage(&buf, 'S'); + pq_sendint(&buf, NODE_UNREGISTER_RESULT, 4); + if (myport->remote_type == PGXC_NODE_GTM_PROXY) + { + GTM_ProxyMsgHeader proxyhdr; + proxyhdr.ph_conid = myport->conn_id; + pq_sendbytes(&buf, (char *)&proxyhdr, sizeof (GTM_ProxyMsgHeader)); + } + pq_sendbytes(&buf, (char *)&type, sizeof(GTM_PGXCNodeType)); + pq_sendbytes(&buf, (char *)&nodenum, sizeof(GTM_PGXCNodeId)); + pq_endmessage(myport, &buf); + + if (myport->remote_type != PGXC_NODE_GTM_PROXY) + pq_flush(myport); +} + +/* + * Called at GTM shutdown, rewrite on disk register information + * and write only data of nodes currently registered. + */ +void +Recovery_SaveRegisterInfo(void) +{ + GTM_PGXCNodeInfoHashBucket *bucket; + ListCell *elem; + GTM_PGXCNodeInfo *nodeinfo = NULL; + int hash, ctlfd; + char filebkp[GTM_NODE_FILE_MAX_PATH]; + + GTM_RWLockAcquire(&RegisterFileLock, GTM_LOCKMODE_WRITE); + + /* Create a backup file in case their is a problem during file writing */ + sprintf(filebkp, "%s.bkp", GTMPGXCNodeFile); + + ctlfd = open(filebkp, O_WRONLY | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR); + + if (ctlfd < 0) + { + GTM_RWLockRelease(&RegisterFileLock); + return; + } + + for (hash = 0; hash < NODE_HASH_TABLE_SIZE; hash++) + { + bucket = >M_PGXCNodes[hash]; + + GTM_RWLockAcquire(&bucket->nhb_lock, GTM_LOCKMODE_READ); + + /* Write one by one information about registered nodes */ + foreach(elem, bucket->nhb_list) + { + int len; + + nodeinfo = (GTM_PGXCNodeInfo *) lfirst(elem); + if (nodeinfo == NULL) + break; + + GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_READ); + + write(ctlfd, &NodeRegisterMagic, sizeof (NodeRegisterMagic)); + + write(ctlfd, &nodeinfo->type, sizeof (GTM_PGXCNodeType)); + write(ctlfd, &nodeinfo->nodenum, sizeof (GTM_PGXCNodeId)); + + write(ctlfd, &nodeinfo->port, sizeof (GTM_PGXCNodePort)); + write(ctlfd, &nodeinfo->proxynum, sizeof (GTM_PGXCNodeId)); + write(ctlfd, &nodeinfo->status, sizeof (GTM_PGXCNodeStatus)); + + len = strlen(nodeinfo->ipaddress); + write(ctlfd, &len, sizeof(uint32)); + write(ctlfd, nodeinfo->ipaddress, len); + + len = strlen(nodeinfo->datafolder); + write(ctlfd, &len, sizeof(uint32)); + write(ctlfd, nodeinfo->datafolder, len); + + write(ctlfd, &NodeEndMagic, sizeof(NodeEndMagic)); + + GTM_RWLockRelease(&nodeinfo->node_lock); + } + + GTM_RWLockRelease(&bucket->nhb_lock); + } + + close(ctlfd); + + /* Replace former file by backup file */ + if (rename(filebkp, GTMPGXCNodeFile) < 0) + { + elog(LOG, "Cannot save register file"); + } + + GTM_RWLockRelease(&RegisterFileLock); +} + +/* + * Add a Register or Unregister record on PGXC Node file on disk. + */ +void +Recovery_RecordRegisterInfo(GTM_PGXCNodeInfo *nodeinfo, bool is_register) +{ + int ctlfd; + + GTM_RWLockAcquire(&RegisterFileLock, GTM_LOCKMODE_WRITE); + + ctlfd = open(GTMPGXCNodeFile, O_WRONLY | O_CREAT | O_APPEND, + S_IRUSR | S_IWUSR); + + if (ctlfd == -1 || nodeinfo == NULL) + { + GTM_RWLockRelease(&RegisterFileLock); + return; + } + + GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_READ); + + if (is_register) + write(ctlfd, &NodeRegisterMagic, sizeof (NodeRegisterMagic)); + else + write(ctlfd, &NodeUnregisterMagic, sizeof (NodeUnregisterMagic)); + + write(ctlfd, &nodeinfo->type, sizeof (GTM_PGXCNodeType)); + write(ctlfd, &nodeinfo->nodenum, sizeof (GTM_PGXCNodeId)); + + if (is_register) + { + int len; + + write(ctlfd, &nodeinfo->port, sizeof (GTM_PGXCNodePort)); + write(ctlfd, &nodeinfo->proxynum, sizeof (GTM_PGXCNodeId)); + write(ctlfd, &nodeinfo->status, sizeof (GTM_PGXCNodeStatus)); + + len = strlen(nodeinfo->ipaddress); + write(ctlfd, &len, sizeof(uint32)); + write(ctlfd, nodeinfo->ipaddress, len); + + len = strlen(nodeinfo->datafolder); + write(ctlfd, &len, sizeof(uint32)); + write(ctlfd, nodeinfo->datafolder, len); + } + + write(ctlfd, &NodeEndMagic, sizeof(NodeEndMagic)); + + GTM_RWLockRelease(&nodeinfo->node_lock); + + close(ctlfd); + GTM_RWLockRelease(&RegisterFileLock); +} + +void +Recovery_RestoreRegisterInfo(void) +{ + int magic; + int ctlfd; + + /* This is made when GTM/Proxy restarts, so it is not necessary to take a lock */ + ctlfd = open(GTMPGXCNodeFile, O_RDONLY); + + if (ctlfd == -1) + return; + + while (read(ctlfd, &magic, sizeof (NodeRegisterMagic)) == sizeof (NodeRegisterMagic)) + { + GTM_PGXCNodeType type; + GTM_PGXCNodeId nodenum, proxynum; + GTM_PGXCNodePort port; + GTM_PGXCNodeStatus status; + char *ipaddress, *datafolder; + int len; + + if (magic != NodeRegisterMagic && magic != NodeUnregisterMagic) + { + elog(WARNING, "Start magic mismatch %x", magic); + break; + } + + read(ctlfd, &type, sizeof (GTM_PGXCNodeType)); + read(ctlfd, &nodenum, sizeof (GTM_PGXCNodeId)); + + if (magic == NodeRegisterMagic) + { + read(ctlfd, &port, sizeof (GTM_PGXCNodePort)); + read(ctlfd, &proxynum, sizeof (GTM_PGXCNodeId)); + read(ctlfd, &status, sizeof (GTM_PGXCNodeStatus)); + + /* Read size of ipaddress string */ + read(ctlfd, &len, sizeof (uint32)); + ipaddress = (char *) palloc(len); + read(ctlfd, ipaddress, len); + + /* Read size of datafolder string */ + read(ctlfd, &len, sizeof (uint32)); + datafolder = (char *) palloc(len); + read(ctlfd, datafolder, len); + } + + /* Rebuild based on the records */ + if (magic == NodeRegisterMagic) + Recovery_PGXCNodeRegister(type, nodenum, port, proxynum, status, + ipaddress, datafolder, true); + else + Recovery_PGXCNodeUnregister(type, nodenum, true); + + read(ctlfd, &magic, sizeof(NodeEndMagic)); + + if (magic != NodeEndMagic) + { + elog(WARNING, "Corrupted control file"); + return; + } + } + + close(ctlfd); +} + +void +Recovery_SaveRegisterFileName(char *dir) +{ + if (!dir) + return; + + sprintf(GTMPGXCNodeFile, "%s/%s", dir, GTM_NODE_FILE); +} + +/* + * Disconnect node whose master connection has been cut with GTM + */ +void +Recovery_PGXCNodeDisconnect(Port *myport) +{ + GTM_PGXCNodeType type = myport->remote_type; + GTM_PGXCNodeId nodenum = myport->pgxc_node_id; + GTM_PGXCNodeInfo *nodeinfo = NULL; + MemoryContext oldContext; + + /* Only a master connection can disconnect a node */ + if (!myport->is_postmaster) + return; + + /* + * We must use the TopMostMemoryContext because the Node ID information is + * not bound to a thread and can outlive any of the thread specific + * contextes. + */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + nodeinfo = pgxcnode_find_info(type, nodenum); + + if (nodeinfo != NULL) + { + GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_WRITE); + + nodeinfo->status = NODE_DISCONNECTED; + + GTM_RWLockRelease(&nodeinfo->node_lock); + } + + MemoryContextSwitchTo(oldContext); +} + +int +Recovery_PGXCNodeBackendDisconnect(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum) +{ + GTM_PGXCNodeInfo *nodeinfo = pgxcnode_find_info(type, nodenum); + int errcode = 0; + + if (nodeinfo != NULL) + { + GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_WRITE); + + nodeinfo->status = NODE_DISCONNECTED; + + GTM_RWLockRelease(&nodeinfo->node_lock); + } + else + errcode = -1; + + return errcode; +} + +/* + * Process MSG_BACKEND_DISCONNECT + * + * A Backend has disconnected on a Proxy. + * If this backend is postmaster, mark the referenced node as disconnected. + */ +void +ProcessPGXCNodeBackendDisconnect(Port *myport, StringInfo message) +{ + MemoryContext oldContext; + GTM_PGXCNodeId nodenum; + GTM_PGXCNodeType type; + bool is_postmaster; + + is_postmaster = pq_getmsgbyte(message); + + if (is_postmaster) + { + /* Read Node Type and number */ + memcpy(&type, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeType)), + sizeof (GTM_PGXCNodeType)); + memcpy(&nodenum, pq_getmsgbytes(message, sizeof (GTM_PGXCNodeId)), + sizeof (GTM_PGXCNodeId)); + } + + pq_getmsgend(message); + + if (!is_postmaster) + return; /* Nothing to do */ + + /* + * We must use the TopMostMemoryContext because the Node ID information is + * not bound to a thread and can outlive any of the thread specific + * contextes. + */ + oldContext = MemoryContextSwitchTo(TopMostMemoryContext); + + if (Recovery_PGXCNodeBackendDisconnect(type, nodenum) < 0) + { + elog(LOG, "Cannot disconnect Unregistered node"); + } + + MemoryContextSwitchTo(oldContext); +} diff --git a/src/include/access/gtm.h b/src/include/access/gtm.h index 6687474..3a03d3c 100644 --- a/src/include/access/gtm.h +++ b/src/include/access/gtm.h @@ -43,6 +43,10 @@ extern int CommitPreparedTranGTM(GlobalTransactionId gxid, extern GTM_Snapshot GetSnapshotGTM(GlobalTransactionId gxid, bool canbe_grouped); +/* Node registration APIs with GTM */ +extern int RegisterGTM(GTM_PGXCNodeType type, GTM_PGXCNodePort port, char *datafolder); +extern int UnregisterGTM(GTM_PGXCNodeType type); + /* Sequence interface APIs with GTM */ extern GTM_Sequence GetCurrentValGTM(char *seqname); extern GTM_Sequence GetNextValGTM(char *seqname); diff --git a/src/include/gtm/gtm.h b/src/include/gtm/gtm.h index 77522b2..9075433 100644 --- a/src/include/gtm/gtm.h +++ b/src/include/gtm/gtm.h @@ -131,6 +131,10 @@ extern MemoryContext TopMostMemoryContext; #if 0 +/* Coordinator registration */ +int GTM_RegisterCoordinator(GTM_CoordInfo *cinfo); +int GTM_UnregisterCoordinator(GTM_PGXCNodeId cid); + #endif #endif diff --git a/src/include/gtm/gtm_c.h b/src/include/gtm/gtm_c.h index f918592..e1fc4bd 100644 --- a/src/include/gtm/gtm_c.h +++ b/src/include/gtm/gtm_c.h @@ -54,7 +54,7 @@ typedef enum GTM_PGXCNodeType PGXC_NODE_COORDINATOR, PGXC_NODE_DATANODE, PGXC_NODE_DEFAULT /* In case nothing is associated to connection */ -} GTM_PGXCNodeType; +} GTM_PGXCNodeType; /* * A unique handle to identify transaction at the GTM. It could just be diff --git a/src/include/gtm/gtm_client.h b/src/include/gtm/gtm_client.h index ff1befd..b8b6af0 100644 --- a/src/include/gtm/gtm_client.h +++ b/src/include/gtm/gtm_client.h @@ -82,6 +82,12 @@ typedef union GTM_ResultData PGXC_NodeId *coordinators; } grd_txn_get_gid_data; /* TXN_GET_GID_DATA_RESULT */ + struct + { + GTM_PGXCNodeType type; /* NODE_REGISTER */ + GTM_PGXCNodeId nodenum; /* NODE_UNREGISTER */ + } grd_node; + /* * TODO * TXN_GET_STATUS @@ -141,6 +147,13 @@ GTM_SnapshotData *get_snapshot(GTM_Conn *conn, GlobalTransactionId gxid, bool canbe_grouped); /* + * Node Registering management API + */ +int node_register(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, + GTM_PGXCNodePort port, char *datafolder); +int node_unregister(GTM_Conn *conn, GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum); + +/* * Sequence Management API */ int open_sequence(GTM_Conn *conn, GTM_SequenceKey key, GTM_Sequence increment, diff --git a/src/include/gtm/gtm_msg.h b/src/include/gtm/gtm_msg.h index 77c97ac..16dfaac 100644 --- a/src/include/gtm/gtm_msg.h +++ b/src/include/gtm/gtm_msg.h @@ -17,8 +17,8 @@ typedef enum GTM_MessageType { MSG_TYPE_INVALID, - MSG_REGISTER_COORD, /* Register a Coordinator with GTM */ - MSG_UNREGISTER_COORD, /* Unregister a Coordinator with GTM */ + MSG_NODE_REGISTER, /* Register a PGXC Node with GTM */ + MSG_NODE_UNREGISTER, /* Unregister a PGXC Node with GTM */ MSG_TXN_BEGIN, /* Start a new transaction */ MSG_TXN_BEGIN_GETGXID, /* Start a new transaction and get GXID */ MSG_TXN_BEGIN_GETGXID_MULTI, /* Start multiple new transactions and get GXIDs */ @@ -58,6 +58,8 @@ typedef enum GTM_MessageType typedef enum GTM_ResultType { + NODE_REGISTER_RESULT, + NODE_UNREGISTER_RESULT, TXN_BEGIN_RESULT, TXN_BEGIN_GETGXID_RESULT, TXN_BEGIN_GETGXID_MULTI_RESULT, diff --git a/src/include/gtm/gtm_proxy.h b/src/include/gtm/gtm_proxy.h index 8dc16bc..4c55639 100644 --- a/src/include/gtm/gtm_proxy.h +++ b/src/include/gtm/gtm_proxy.h @@ -141,6 +141,7 @@ extern int GTMProxy_ThreadRemoveConnection(GTMProxy_ThreadInfo *thrinfo, /* * Command data - the only relevant information right now is the XID + * and data necessary for registering (modification of Proxy number registered) */ typedef union GTMProxy_CommandData { @@ -163,6 +164,16 @@ typedef union GTMProxy_CommandData GlobalTransactionId gxid; GTM_TransactionHandle handle; } cd_snap; + + struct + { + GTM_PGXCNodeType type; + GTM_PGXCNodeId nodenum; + GTM_PGXCNodePort port; + GTM_PGXCNodeId proxynum; + char *datafolder; + char *ipaddress; + } cd_reg; } GTMProxy_CommandData; /* diff --git a/src/include/gtm/gtm_txn.h b/src/include/gtm/gtm_t... [truncated message content] |
From: mason_s <ma...@us...> - 2011-01-12 15:28:09
|
Project "Postgres-XC". The branch, master has been updated via 88b9cd684939307f91e3c77e83c8f597642ee69b (commit) from 07a222d11e4cc561e9aeb1edbd1ee2dd68ba2965 (commit) - Log ----------------------------------------------------------------- commit 88b9cd684939307f91e3c77e83c8f597642ee69b Author: Mason Sharp <ma...@us...> Date: Wed Jan 12 10:27:32 2011 -0500 Block the creation of temp tables until properly supported diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index d3506c8..94d820b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -379,10 +379,17 @@ DefineRelation(CreateStmt *stmt, char relkind) * code. This is needed because calling code might not expect untrusted * tables to appear in pg_temp at the front of its search path. */ +#ifdef PGXC + if (stmt->relation->istemp && IsUnderPostmaster) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("PG-XC does not yet support temporary tables"))); +#else if (stmt->relation->istemp && InSecurityRestrictedOperation()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("cannot create temporary table within security-restricted operation"))); +#endif /* * Look up the namespace in which we are supposed to create the relation. ----------------------------------------------------------------------- Summary of changes: src/backend/commands/tablecmds.c | 7 +++++++ 1 files changed, 7 insertions(+), 0 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2011-01-12 15:23:36
|
Project "Postgres-XC". The branch, master has been updated via 07a222d11e4cc561e9aeb1edbd1ee2dd68ba2965 (commit) via 032f391616cc3055d197fb2f737f37614bc03f7f (commit) from 9a57b4e60139aa0799f4b6ea3bc6716b2a974022 (commit) - Log ----------------------------------------------------------------- commit 07a222d11e4cc561e9aeb1edbd1ee2dd68ba2965 Author: Mason Sharp <ma...@us...> Date: Wed Jan 12 10:19:57 2011 -0500 Updated regression tests. In Postgres-XC, we cannot be sure of the return order of results, so the tests were updated with ORDER BY added to many SELECT statements. There are still many tests that fail, but this at least gives us a better baseline to more easily find and fix the open bugs. Written by Benny diff --git a/src/test/regress/expected/abstime.out b/src/test/regress/expected/abstime.out index a04f091..2c1f0a9 100644 --- a/src/test/regress/expected/abstime.out +++ b/src/test/regress/expected/abstime.out @@ -43,80 +43,80 @@ LINE 1: INSERT INTO ABSTIME_TBL (f1) VALUES ('bad date format'); ^ INSERT INTO ABSTIME_TBL (f1) VALUES ('Jun 10, 1843'); -- test abstime operators -SELECT '' AS eight, * FROM ABSTIME_TBL; +SELECT '' AS eight, * FROM ABSTIME_TBL ORDER BY f1; eight | f1 -------+------------------------------ + | -infinity + | Sat May 10 23:59:12 1947 PST + | Wed Dec 31 16:00:00 1969 PST | Sun Jan 14 03:14:21 1973 PST | Mon May 01 00:30:30 1995 PDT - | Wed Dec 31 16:00:00 1969 PST | infinity - | -infinity - | Sat May 10 23:59:12 1947 PST | invalid (7 rows) SELECT '' AS six, * FROM ABSTIME_TBL - WHERE ABSTIME_TBL.f1 < abstime 'Jun 30, 2001'; + WHERE ABSTIME_TBL.f1 < abstime 'Jun 30, 2001' ORDER BY f1; six | f1 -----+------------------------------ - | Sun Jan 14 03:14:21 1973 PST - | Mon May 01 00:30:30 1995 PDT - | Wed Dec 31 16:00:00 1969 PST | -infinity | Sat May 10 23:59:12 1947 PST + | Wed Dec 31 16:00:00 1969 PST + | Sun Jan 14 03:14:21 1973 PST + | Mon May 01 00:30:30 1995 PDT (5 rows) SELECT '' AS six, * FROM ABSTIME_TBL - WHERE ABSTIME_TBL.f1 > abstime '-infinity'; + WHERE ABSTIME_TBL.f1 > abstime '-infinity' ORDER BY f1; six | f1 -----+------------------------------ + | Sat May 10 23:59:12 1947 PST + | Wed Dec 31 16:00:00 1969 PST | Sun Jan 14 03:14:21 1973 PST | Mon May 01 00:30:30 1995 PDT - | Wed Dec 31 16:00:00 1969 PST | infinity - | Sat May 10 23:59:12 1947 PST | invalid (6 rows) SELECT '' AS six, * FROM ABSTIME_TBL - WHERE abstime 'May 10, 1947 23:59:12' <> ABSTIME_TBL.f1; + WHERE abstime 'May 10, 1947 23:59:12' <> ABSTIME_TBL.f1 ORDER BY f1; six | f1 -----+------------------------------ + | -infinity + | Wed Dec 31 16:00:00 1969 PST | Sun Jan 14 03:14:21 1973 PST | Mon May 01 00:30:30 1995 PDT - | Wed Dec 31 16:00:00 1969 PST | infinity - | -infinity | invalid (6 rows) SELECT '' AS three, * FROM ABSTIME_TBL - WHERE abstime 'epoch' >= ABSTIME_TBL.f1; + WHERE abstime 'epoch' >= ABSTIME_TBL.f1 ORDER BY f1; three | f1 -------+------------------------------ - | Wed Dec 31 16:00:00 1969 PST | -infinity | Sat May 10 23:59:12 1947 PST + | Wed Dec 31 16:00:00 1969 PST (3 rows) SELECT '' AS four, * FROM ABSTIME_TBL - WHERE ABSTIME_TBL.f1 <= abstime 'Jan 14, 1973 03:14:21'; + WHERE ABSTIME_TBL.f1 <= abstime 'Jan 14, 1973 03:14:21' ORDER BY f1; four | f1 ------+------------------------------ - | Sun Jan 14 03:14:21 1973 PST - | Wed Dec 31 16:00:00 1969 PST | -infinity | Sat May 10 23:59:12 1947 PST + | Wed Dec 31 16:00:00 1969 PST + | Sun Jan 14 03:14:21 1973 PST (4 rows) SELECT '' AS four, * FROM ABSTIME_TBL WHERE ABSTIME_TBL.f1 <?> - tinterval '["Apr 1 1950 00:00:00" "Dec 30 1999 23:00:00"]'; + tinterval '["Apr 1 1950 00:00:00" "Dec 30 1999 23:00:00"]' ORDER BY f1; four | f1 ------+------------------------------ + | Wed Dec 31 16:00:00 1969 PST | Sun Jan 14 03:14:21 1973 PST | Mon May 01 00:30:30 1995 PDT - | Wed Dec 31 16:00:00 1969 PST (3 rows) SELECT '' AS four, f1 AS abstime, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index a48b450..280bda5 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -277,7 +277,8 @@ SELECT sum2(q1,q2) FROM int8_tbl; -- this should work select ten, sum(distinct four) from onek a group by ten -having exists (select 1 from onek b where sum(distinct a.four) = b.four); +having exists (select 1 from onek b where sum(distinct a.four) = b.four) +order by ten; ten | sum -----+----- 0 | 2 @@ -486,14 +487,15 @@ select min(tenthous) from tenk1 where thousand = 33; -- check parameter propagation into an indexscan subquery select f1, (select min(unique1) from tenk1 where unique1 > f1) AS gt -from int4_tbl; +from int4_tbl +order by f1; f1 | gt -------------+---- + -2147483647 | 0 + -123456 | 0 0 | 1 123456 | - -123456 | 0 2147483647 | - -2147483647 | 0 (5 rows) -- check some cases that were handled incorrectly in 8.3.0 diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index b14d61f..85af220 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -138,7 +138,7 @@ ANALYZE tenk1; set enable_seqscan to off; set enable_bitmapscan to off; -- 5 values, sorted -SELECT unique1 FROM tenk1 WHERE unique1 < 5; +SELECT unique1 FROM tenk1 WHERE unique1 < 5 ORDER BY unique1; unique1 --------- 0 @@ -639,13 +639,13 @@ insert into def_test default values; alter table def_test alter column c1 set default 10; alter table def_test alter column c2 set default 'new_default'; insert into def_test default values; -select * from def_test; +select * from def_test order by 1, 2; c1 | c2 ----+----------------- 5 | initial_default + 10 | new_default | initial_default | - 10 | new_default (4 rows) -- set defaults to an incorrect type: this should fail @@ -667,16 +667,16 @@ alter table def_view_test alter column c1 set default 45; insert into def_view_test default values; alter table def_view_test alter column c2 set default 'view_default'; insert into def_view_test default values; -select * from def_view_test; +select * from def_view_test order by 1, 2; c1 | c2 ----+----------------- 5 | initial_default + 10 | new_default + 45 | view_default + 45 | | initial_default | - 10 | new_default | - 45 | - 45 | view_default (7 rows) drop rule def_view_test_ins on def_view_test; @@ -951,7 +951,7 @@ insert into parent values (1, 2, 3); alter table parent drop a; create table child (d varchar(255)) inherits (parent); insert into child values (12, 13, 'testing'); -select * from parent; +select * from parent order by b; b | c ----+---- 2 | 3 @@ -965,7 +965,7 @@ select * from child; (1 row) alter table parent drop c; -select * from parent; +select * from parent order by b; b ---- 2 @@ -993,14 +993,14 @@ ERROR: column "........pg.dropped.1........" of relation "test" does not exist copy test from stdin; ERROR: extra data after last expected column CONTEXT: COPY test, line 1: "10 11 12" -select * from test; +select * from test order by b; b | c ---+--- 2 | 3 (1 row) copy test from stdin; -select * from test; +select * from test order by b; b | c ----+---- 2 | 3 @@ -1012,7 +1012,7 @@ ERROR: column "a" of relation "test" does not exist copy test("........pg.dropped.1........") from stdin; ERROR: column "........pg.dropped.1........" of relation "test" does not exist copy test(b,c) from stdin; -select * from test; +select * from test order by b; b | c ----+---- 2 | 3 @@ -1305,7 +1305,7 @@ insert into p1 values (1,2,'abc'); insert into c1 values(11,'xyz',33,0); -- should fail ERROR: new row for relation "c1" violates check constraint "p1_a1_check" insert into c1 values(11,'xyz',33,22); -select * from p1; +select * from p1 order by f1; f1 | a1 | f2 ----+----+----- 1 | 2 | abc @@ -1313,7 +1313,7 @@ select * from p1; (2 rows) update p1 set a1 = a1 + 1, f2 = upper(f2); -select * from p1; +select * from p1 order by f1; f1 | a1 | f2 ----+----+----- 1 | 3 | ABC @@ -1327,7 +1327,7 @@ NOTICE: drop cascades to table c1 create domain mytype as text; create temp table foo (f1 text, f2 mytype, f3 text); insert into foo values('bb','cc','dd'); -select * from foo; +select * from foo order by f1; f1 | f2 | f3 ----+----+---- bb | cc | dd @@ -1335,14 +1335,14 @@ select * from foo; drop domain mytype cascade; NOTICE: drop cascades to table foo column f2 -select * from foo; +select * from foo order by f1; f1 | f3 ----+---- bb | dd (1 row) insert into foo values('qq','rr'); -select * from foo; +select * from foo order by f1; f1 | f3 ----+---- bb | dd @@ -1350,7 +1350,7 @@ select * from foo; (2 rows) update foo set f3 = 'zz'; -select * from foo; +select * from foo order by f1; f1 | f3 ----+---- bb | zz @@ -1372,7 +1372,7 @@ create table anothertab (atcol1 serial8, atcol2 boolean, NOTICE: CREATE TABLE will create implicit sequence "anothertab_atcol1_seq" for serial column "anothertab.atcol1" insert into anothertab (atcol1, atcol2) values (default, true); insert into anothertab (atcol1, atcol2) values (default, false); -select * from anothertab; +select * from anothertab order by atcol1, atcol2; atcol1 | atcol2 --------+-------- 1 | t @@ -1382,7 +1382,7 @@ select * from anothertab; alter table anothertab alter column atcol1 type boolean; -- fails ERROR: column "atcol1" cannot be cast to type boolean alter table anothertab alter column atcol1 type integer; -select * from anothertab; +select * from anothertab order by atcol1, atcol2; atcol1 | atcol2 --------+-------- 1 | t @@ -1392,7 +1392,7 @@ select * from anothertab; insert into anothertab (atcol1, atcol2) values (45, null); -- fails ERROR: new row for relation "anothertab" violates check constraint "anothertab_chk" insert into anothertab (atcol1, atcol2) values (default, null); -select * from anothertab; +select * from anothertab order by atcol1, atcol2; atcol1 | atcol2 --------+-------- 1 | t @@ -1404,7 +1404,7 @@ alter table anothertab alter column atcol2 type text using case when atcol2 is true then 'IT WAS TRUE' when atcol2 is false then 'IT WAS FALSE' else 'IT WAS NULL!' end; -select * from anothertab; +select * from anothertab order by atcol1, atcol2; atcol1 | atcol2 --------+-------------- 1 | IT WAS TRUE @@ -1423,12 +1423,12 @@ HINT: No operator matches the given name and argument type(s). You might need t alter table anothertab drop constraint anothertab_chk; alter table anothertab alter column atcol1 type boolean using case when atcol1 % 2 = 0 then true else false end; -select * from anothertab; +select * from anothertab order by atcol1, atcol2; atcol1 | atcol2 --------+-------------- + f | IT WAS NULL! f | IT WAS TRUE t | IT WAS FALSE - f | IT WAS NULL! (3 rows) drop table anothertab; @@ -1436,7 +1436,7 @@ create table another (f1 int, f2 text); insert into another values(1, 'one'); insert into another values(2, 'two'); insert into another values(3, 'three'); -select * from another; +select * from another order by f1, f2; f1 | f2 ----+------- 1 | one @@ -1447,12 +1447,12 @@ select * from another; alter table another alter f1 type text using f2 || ' more', alter f2 type bigint using f1 * 10; -select * from another; +select * from another order by f1, f2; f1 | f2 ------------+---- one more | 10 - two more | 20 three more | 30 + two more | 20 (3 rows) drop table another; @@ -1514,7 +1514,7 @@ alter type alter1.ctype set schema alter2; drop schema alter1; insert into alter2.t1(f2) values(13); insert into alter2.t1(f2) values(14); -select * from alter2.t1; +select * from alter2.t1 order by f1, f2; f1 | f2 ----+---- 1 | 11 @@ -1523,7 +1523,7 @@ select * from alter2.t1; 4 | 14 (4 rows) -select * from alter2.v1; +select * from alter2.v1 order by f1, f2; f1 | f2 ----+---- 1 | 11 diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 03c32a5..272a621 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -26,12 +26,12 @@ INSERT INTO arrtest (a, b[1:2][1:2], c, d, e, f, g) '{"abc","abcde"}', '{"abc","abcde"}'); INSERT INTO arrtest (a, b[1:2], c, d[1:2]) VALUES ('{}', '{3,4}', '{foo,bar}', '{bar,foo}'); -SELECT * FROM arrtest; +SELECT * FROM arrtest ORDER BY a, b, c; a | b | c | d | e | f | g -------------+-----------------+-----------+---------------+-----------------+-----------------+------------- + {} | {3,4} | {foo,bar} | {bar,foo} | | | {1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | [0:1]={1.1,2.2} | {} | {} {11,12,23} | {{3,4},{4,5}} | {foobar} | {{elt1,elt2}} | {3.4,6.7} | {"abc ",abcde} | {abc,abcde} - {} | {3,4} | {foo,bar} | {bar,foo} | | | (3 rows) SELECT arrtest.a[1], @@ -39,7 +39,8 @@ SELECT arrtest.a[1], arrtest.c[1], arrtest.d[1][1], arrtest.e[0] - FROM arrtest; + FROM arrtest + ORDER BY a, b, c; a | b | c | d | e ----+---+--------+------+----- 1 | 0 | | | 1.1 @@ -48,7 +49,8 @@ SELECT arrtest.a[1], (3 rows) SELECT a[1], b[1][1][1], c[1], d[1][1], e[0] - FROM arrtest; + FROM arrtest + ORDER BY a, b, c; a | b | c | d | e ----+---+--------+------+----- 1 | 0 | | | 1.1 @@ -60,30 +62,33 @@ SELECT a[1:3], b[1:1][1:2][1:2], c[1:2], d[1:1][1:2] - FROM arrtest; + FROM arrtest + ORDER BY a, b, c; a | b | c | d ------------+-----------------+-----------+--------------- + {} | {} | {foo,bar} | {} {1,2,3} | {{{0,0},{1,2}}} | {} | {} {11,12,23} | {} | {foobar} | {{elt1,elt2}} - {} | {} | {foo,bar} | {} (3 rows) SELECT array_ndims(a) AS a,array_ndims(b) AS b,array_ndims(c) AS c - FROM arrtest; + FROM arrtest + ORDER BY b; a | b | c ---+---+--- - 1 | 3 | - 1 | 2 | 1 | 1 | 1 + 1 | 2 | 1 + 1 | 3 | (3 rows) SELECT array_dims(a) AS a,array_dims(b) AS b,array_dims(c) AS c - FROM arrtest; + FROM arrtest + ORDER BY b; a | b | c -------+-----------------+------- [1:5] | [1:1][1:2][1:2] | - [1:3] | [1:2][1:2] | [1:1] | [1:2] | [1:2] + [1:3] | [1:2][1:2] | [1:1] (3 rows) -- returns nothing @@ -105,11 +110,11 @@ UPDATE arrtest UPDATE arrtest SET c[2:2] = '{"new_word"}' WHERE array_dims(c) is not null; -SELECT a,b,c FROM arrtest; +SELECT a,b,c FROM arrtest ORDER BY a, b, c; a | b | c ---------------+-----------------------+------------------- - {16,25,3,4,5} | {{{113,142},{1,147}}} | {} {} | {3,4} | {foo,new_word} + {16,25,3,4,5} | {{{113,142},{1,147}}} | {} {16,25,23} | {{3,4},{4,5}} | {foobar,new_word} (3 rows) @@ -117,34 +122,35 @@ SELECT a[1:3], b[1:1][1:2][1:2], c[1:2], d[1:1][2:2] - FROM arrtest; + FROM arrtest + ORDER BY a, b, c; a | b | c | d ------------+-----------------------+-------------------+---------- - {16,25,3} | {{{113,142},{1,147}}} | {} | {} {} | {} | {foo,new_word} | {} + {16,25,3} | {{{113,142},{1,147}}} | {} | {} {16,25,23} | {} | {foobar,new_word} | {{elt2}} (3 rows) INSERT INTO arrtest(a) VALUES('{1,null,3}'); -SELECT a FROM arrtest; +SELECT a FROM arrtest ORDER BY 1; a --------------- - {16,25,3,4,5} {} - {16,25,23} {1,NULL,3} + {16,25,3,4,5} + {16,25,23} (4 rows) UPDATE arrtest SET a[4] = NULL WHERE a[2] IS NULL; -SELECT a FROM arrtest WHERE a[2] IS NULL; +SELECT a FROM arrtest WHERE a[2] IS NULL ORDER BY 1; a ----------------- - [4:4]={NULL} {1,NULL,3,NULL} + [4:4]={NULL} (2 rows) DELETE FROM arrtest WHERE a[2] IS NULL AND b IS NULL; -SELECT a,b,c FROM arrtest; +SELECT a,b,c FROM arrtest ORDER BY a, b, c; a | b | c ---------------+-----------------------+------------------- {16,25,3,4,5} | {{{113,142},{1,147}}} | {} @@ -718,14 +724,14 @@ insert into arr_tbl values ('{1,5,3}'); insert into arr_tbl values ('{1,2,10}'); set enable_seqscan to off; set enable_bitmapscan to off; -select * from arr_tbl where f1 > '{1,2,3}' and f1 <= '{1,5,3}'; +select * from arr_tbl where f1 > '{1,2,3}' and f1 <= '{1,5,3}' ORDER BY 1; f1 ---------- {1,2,10} {1,5,3} (2 rows) -select * from arr_tbl where f1 >= '{1,2,3}' and f1 < '{1,5,3}'; +select * from arr_tbl where f1 >= '{1,2,3}' and f1 < '{1,5,3}' ORDER BY 1; f1 ---------- {1,2,3} diff --git a/src/test/regress/expected/bit.out b/src/test/regress/expected/bit.out index 3563d23..28bea52 100644 --- a/src/test/regress/expected/bit.out +++ b/src/test/regress/expected/bit.out @@ -14,12 +14,12 @@ INSERT INTO BIT_TABLE VALUES (B'101011111010'); -- too long ERROR: bit string length 12 does not match type bit(11) --INSERT INTO BIT_TABLE VALUES ('X554'); --INSERT INTO BIT_TABLE VALUES ('X555'); -SELECT * FROM BIT_TABLE; +SELECT * FROM BIT_TABLE ORDER BY b; b ------------- 00000000000 - 11011000000 01010101010 + 11011000000 (3 rows) CREATE TABLE VARBIT_TABLE(v BIT VARYING(11)); @@ -31,7 +31,7 @@ INSERT INTO VARBIT_TABLE VALUES (B'101011111010'); -- too long ERROR: bit string too long for type bit varying(11) --INSERT INTO VARBIT_TABLE VALUES ('X554'); --INSERT INTO VARBIT_TABLE VALUES ('X555'); -SELECT * FROM VARBIT_TABLE; +SELECT * FROM VARBIT_TABLE ORDER BY v; v ------------- @@ -62,16 +62,16 @@ SELECT v, b, (v || b) AS concat -- Length SELECT b, length(b) AS lb - FROM BIT_TABLE; + FROM BIT_TABLE ORDER BY b; b | lb -------------+---- 00000000000 | 11 - 11011000000 | 11 01010101010 | 11 + 11011000000 | 11 (3 rows) SELECT v, length(v) AS lv - FROM VARBIT_TABLE; + FROM VARBIT_TABLE ORDER BY v; v | lv -------------+---- | 0 @@ -85,19 +85,19 @@ SELECT b, SUBSTRING(b FROM 2 FOR 4) AS sub_2_4, SUBSTRING(b FROM 7 FOR 13) AS sub_7_13, SUBSTRING(b FROM 6) AS sub_6 - FROM BIT_TABLE; + FROM BIT_TABLE ORDER BY b; b | sub_2_4 | sub_7_13 | sub_6 -------------+---------+----------+-------- 00000000000 | 0000 | 00000 | 000000 - 11011000000 | 1011 | 00000 | 000000 01010101010 | 1010 | 01010 | 101010 + 11011000000 | 1011 | 00000 | 000000 (3 rows) SELECT v, SUBSTRING(v FROM 2 FOR 4) AS sub_2_4, SUBSTRING(v FROM 7 FOR 13) AS sub_7_13, SUBSTRING(v FROM 6) AS sub_6 - FROM VARBIT_TABLE; + FROM VARBIT_TABLE ORDER BY v; v | sub_2_4 | sub_7_13 | sub_6 -------------+---------+----------+-------- | | | @@ -111,50 +111,50 @@ DROP TABLE varbit_table; CREATE TABLE varbit_table (a BIT VARYING(16), b BIT VARYING(16)); COPY varbit_table FROM stdin; SELECT a, b, ~a AS "~ a", a & b AS "a & b", - a | b AS "a | b", a # b AS "a # b" FROM varbit_table; + a | b AS "a | b", a # b AS "a # b" FROM varbit_table ORDER BY a,b; a | b | ~ a | a & b | a | b | a # b ------------------+------------------+------------------+------------------+------------------+------------------ + 0000000000001111 | 0000000000010000 | 1111111111110000 | 0000000000000000 | 0000000000011111 | 0000000000011111 + 0000000100100011 | 1111111111111111 | 1111111011011100 | 0000000100100011 | 1111111111111111 | 1111111011011100 00001111 | 00010000 | 11110000 | 00000000 | 00011111 | 00011111 + 0001001000110100 | 1111111111110101 | 1110110111001011 | 0001001000110100 | 1111111111110101 | 1110110111000001 00011111 | 00010001 | 11100000 | 00010001 | 00011111 | 00001110 + 0010010001101000 | 0010010001101000 | 1101101110010111 | 0010010001101000 | 0010010001101000 | 0000000000000000 00101111 | 00010010 | 11010000 | 00000010 | 00111111 | 00111101 00111111 | 00010011 | 11000000 | 00010011 | 00111111 | 00101100 10001111 | 00000100 | 01110000 | 00000100 | 10001111 | 10001011 - 0000000000001111 | 0000000000010000 | 1111111111110000 | 0000000000000000 | 0000000000011111 | 0000000000011111 - 0000000100100011 | 1111111111111111 | 1111111011011100 | 0000000100100011 | 1111111111111111 | 1111111011011100 - 0010010001101000 | 0010010001101000 | 1101101110010111 | 0010010001101000 | 0010010001101000 | 0000000000000000 1111101001010000 | 0000010110101111 | 0000010110101111 | 0000000000000000 | 1111111111111111 | 1111111111111111 - 0001001000110100 | 1111111111110101 | 1110110111001011 | 0001001000110100 | 1111111111110101 | 1110110111000001 (10 rows) SELECT a,b,a<b AS "a<b",a<=b AS "a<=b",a=b AS "a=b", - a>=b AS "a>=b",a>b AS "a>b",a<>b AS "a<>b" FROM varbit_table; + a>=b AS "a>=b",a>b AS "a>b",a<>b AS "a<>b" FROM varbit_table ORDER BY a,b; a | b | a<b | a<=b | a=b | a>=b | a>b | a<>b ------------------+------------------+-----+------+-----+------+-----+------ + 0000000000001111 | 0000000000010000 | t | t | f | f | f | t + 0000000100100011 | 1111111111111111 | t | t | f | f | f | t 00001111 | 00010000 | t | t | f | f | f | t + 0001001000110100 | 1111111111110101 | t | t | f | f | f | t 00011111 | 00010001 | f | f | f | t | t | t + 0010010001101000 | 0010010001101000 | f | t | t | t | f | f 00101111 | 00010010 | f | f | f | t | t | t 00111111 | 00010011 | f | f | f | t | t | t 10001111 | 00000100 | f | f | f | t | t | t - 0000000000001111 | 0000000000010000 | t | t | f | f | f | t - 0000000100100011 | 1111111111111111 | t | t | f | f | f | t - 0010010001101000 | 0010010001101000 | f | t | t | t | f | f 1111101001010000 | 0000010110101111 | f | f | f | t | t | t - 0001001000110100 | 1111111111110101 | t | t | f | f | f | t (10 rows) -SELECT a,a<<4 AS "a<<4",b,b>>2 AS "b>>2" FROM varbit_table; +SELECT a,a<<4 AS "a<<4",b,b>>2 AS "b>>2" FROM varbit_table ORDER BY a,b; a | a<<4 | b | b>>2 ------------------+------------------+------------------+------------------ + 0000000000001111 | 0000000011110000 | 0000000000010000 | 0000000000000100 + 0000000100100011 | 0001001000110000 | 1111111111111111 | 0011111111111111 00001111 | 11110000 | 00010000 | 00000100 + 0001001000110100 | 0010001101000000 | 1111111111110101 | 0011111111111101 00011111 | 11110000 | 00010001 | 00000100 + 0010010001101000 | 0100011010000000 | 0010010001101000 | 0000100100011010 00101111 | 11110000 | 00010010 | 00000100 00111111 | 11110000 | 00010011 | 00000100 10001111 | 11110000 | 00000100 | 00000001 - 0000000000001111 | 0000000011110000 | 0000000000010000 | 0000000000000100 - 0000000100100011 | 0001001000110000 | 1111111111111111 | 0011111111111111 - 0010010001101000 | 0100011010000000 | 0010010001101000 | 0000100100011010 1111101001010000 | 1010010100000000 | 0000010110101111 | 0000000101101011 - 0001001000110100 | 0010001101000000 | 1111111111110101 | 0011111111111101 (10 rows) DROP TABLE varbit_table; @@ -163,50 +163,50 @@ DROP TABLE bit_table; CREATE TABLE bit_table (a BIT(16), b BIT(16)); COPY bit_table FROM stdin; SELECT a,b,~a AS "~ a",a & b AS "a & b", - a|b AS "a | b", a # b AS "a # b" FROM bit_table; + a|b AS "a | b", a # b AS "a # b" FROM bit_table ORDER BY a,b; a | b | ~ a | a & b | a | b | a # b ------------------+------------------+------------------+------------------+------------------+------------------ + 0000000000001111 | 0000000000010000 | 1111111111110000 | 0000000000000000 | 0000000000011111 | 0000000000011111 + 0000000100100011 | 1111111111111111 | 1111111011011100 | 0000000100100011 | 1111111111111111 | 1111111011011100 0000111100000000 | 0001000000000000 | 1111000011111111 | 0000000000000000 | 0001111100000000 | 0001111100000000 + 0001001000110100 | 1111111111110101 | 1110110111001011 | 0001001000110100 | 1111111111110101 | 1110110111000001 0001111100000000 | 0001000100000000 | 1110000011111111 | 0001000100000000 | 0001111100000000 | 0000111000000000 + 0010010001101000 | 0010010001101000 | 1101101110010111 | 0010010001101000 | 0010010001101000 | 0000000000000000 0010111100000000 | 0001001000000000 | 1101000011111111 | 0000001000000000 | 0011111100000000 | 0011110100000000 0011111100000000 | 0001001100000000 | 1100000011111111 | 0001001100000000 | 0011111100000000 | 0010110000000000 1000111100000000 | 0000010000000000 | 0111000011111111 | 0000010000000000 | 1000111100000000 | 1000101100000000 - 0000000000001111 | 0000000000010000 | 1111111111110000 | 0000000000000000 | 0000000000011111 | 0000000000011111 - 0000000100100011 | 1111111111111111 | 1111111011011100 | 0000000100100011 | 1111111111111111 | 1111111011011100 - 0010010001101000 | 0010010001101000 | 1101101110010111 | 0010010001101000 | 0010010001101000 | 0000000000000000 1111101001010000 | 0000010110101111 | 0000010110101111 | 0000000000000000 | 1111111111111111 | 1111111111111111 - 0001001000110100 | 1111111111110101 | 1110110111001011 | 0001001000110100 | 1111111111110101 | 1110110111000001 (10 rows) SELECT a,b,a<b AS "a<b",a<=b AS "a<=b",a=b AS "a=b", - a>=b AS "a>=b",a>b AS "a>b",a<>b AS "a<>b" FROM bit_table; + a>=b AS "a>=b",a>b AS "a>b",a<>b AS "a<>b" FROM bit_table ORDER BY a,b; a | b | a<b | a<=b | a=b | a>=b | a>b | a<>b ------------------+------------------+-----+------+-----+------+-----+------ + 0000000000001111 | 0000000000010000 | t | t | f | f | f | t + 0000000100100011 | 1111111111111111 | t | t | f | f | f | t 0000111100000000 | 0001000000000000 | t | t | f | f | f | t + 0001001000110100 | 1111111111110101 | t | t | f | f | f | t 0001111100000000 | 0001000100000000 | f | f | f | t | t | t + 0010010001101000 | 0010010001101000 | f | t | t | t | f | f 0010111100000000 | 0001001000000000 | f | f | f | t | t | t 0011111100000000 | 0001001100000000 | f | f | f | t | t | t 1000111100000000 | 0000010000000000 | f | f | f | t | t | t - 0000000000001111 | 0000000000010000 | t | t | f | f | f | t - 0000000100100011 | 1111111111111111 | t | t | f | f | f | t - 0010010001101000 | 0010010001101000 | f | t | t | t | f | f 1111101001010000 | 0000010110101111 | f | f | f | t | t | t - 0001001000110100 | 1111111111110101 | t | t | f | f | f | t (10 rows) -SELECT a,a<<4 AS "a<<4",b,b>>2 AS "b>>2" FROM bit_table; +SELECT a,a<<4 AS "a<<4",b,b>>2 AS "b>>2" FROM bit_table ORDER BY a,b; a | a<<4 | b | b>>2 ------------------+------------------+------------------+------------------ + 0000000000001111 | 0000000011110000 | 0000000000010000 | 0000000000000100 + 0000000100100011 | 0001001000110000 | 1111111111111111 | 0011111111111111 0000111100000000 | 1111000000000000 | 0001000000000000 | 0000010000000000 + 0001001000110100 | 0010001101000000 | 1111111111110101 | 0011111111111101 0001111100000000 | 1111000000000000 | 0001000100000000 | 0000010001000000 + 0010010001101000 | 0100011010000000 | 0010010001101000 | 0000100100011010 0010111100000000 | 1111000000000000 | 0001001000000000 | 0000010010000000 0011111100000000 | 1111000000000000 | 0001001100000000 | 0000010011000000 1000111100000000 | 1111000000000000 | 0000010000000000 | 0000000100000000 - 0000000000001111 | 0000000011110000 | 0000000000010000 | 0000000000000100 - 0000000100100011 | 0001001000110000 | 1111111111111111 | 0011111111111111 - 0010010001101000 | 0100011010000000 | 0010010001101000 | 0000100100011010 1111101001010000 | 1010010100000000 | 0000010110101111 | 0000000101101011 - 0001001000110100 | 0010001101000000 | 1111111111110101 | 0011111111111101 (10 rows) DROP TABLE bit_table; @@ -456,25 +456,25 @@ INSERT INTO BIT_SHIFT_TABLE SELECT b>>8 FROM BIT_SHIFT_TABLE; SELECT POSITION(B'1101' IN b), POSITION(B'11011' IN b), b - FROM BIT_SHIFT_TABLE ; + FROM BIT_SHIFT_TABLE ORDER BY b; position | position | b ----------+----------+------------------ - 1 | 1 | 1101100000000000 - 2 | 2 | 0110110000000000 - 3 | 3 | 0011011000000000 - 4 | 4 | 0001101100000000 - 5 | 5 | 0000110110000000 - 6 | 6 | 0000011011000000 - 7 | 7 | 0000001101100000 - 8 | 8 | 0000000110110000 - 9 | 9 | 0000000011011000 - 10 | 10 | 0000000001101100 - 11 | 11 | 0000000000110110 - 12 | 12 | 0000000000011011 - 13 | 0 | 0000000000001101 - 0 | 0 | 0000000000000110 - 0 | 0 | 0000000000000011 0 | 0 | 0000000000000001 + 0 | 0 | 0000000000000011 + 0 | 0 | 0000000000000110 + 13 | 0 | 0000000000001101 + 12 | 12 | 0000000000011011 + 11 | 11 | 0000000000110110 + 10 | 10 | 0000000001101100 + 9 | 9 | 0000000011011000 + 8 | 8 | 0000000110110000 + 7 | 7 | 0000001101100000 + 6 | 6 | 0000011011000000 + 5 | 5 | 0000110110000000 + 4 | 4 | 0001101100000000 + 3 | 3 | 0011011000000000 + 2 | 2 | 0110110000000000 + 1 | 1 | 1101100000000000 (16 rows) CREATE TABLE VARBIT_SHIFT_TABLE(v BIT VARYING(20)); @@ -486,25 +486,25 @@ INSERT INTO VARBIT_SHIFT_TABLE SELECT CAST(v || B'00000000' AS BIT VARYING(20)) SELECT POSITION(B'1101' IN v), POSITION(B'11011' IN v), v - FROM VARBIT_SHIFT_TABLE ; + FROM VARBIT_SHIFT_TABLE ORDER BY v; position | position | v ----------+----------+---------------------- - 1 | 1 | 11011 - 2 | 2 | 011011 - 3 | 3 | 0011011 - 4 | 4 | 00011011 - 5 | 5 | 000011011 - 6 | 6 | 0000011011 - 7 | 7 | 00000011011 - 8 | 8 | 000000011011 - 9 | 9 | 0000000011011 - 10 | 10 | 00000000011011 - 11 | 11 | 000000000011011 - 12 | 12 | 0000000000011011 - 13 | 13 | 00000000000011011 - 14 | 14 | 000000000000011011 - 15 | 15 | 0000000000000011011 16 | 16 | 00000000000000011011 + 15 | 15 | 0000000000000011011 + 14 | 14 | 000000000000011011 + 13 | 13 | 00000000000011011 + 12 | 12 | 0000000000011011 + 11 | 11 | 000000000011011 + 10 | 10 | 00000000011011 + 9 | 9 | 0000000011011 + 8 | 8 | 000000011011 + 7 | 7 | 00000011011 + 6 | 6 | 0000011011 + 5 | 5 | 000011011 + 4 | 4 | 00011011 + 3 | 3 | 0011011 + 2 | 2 | 011011 + 1 | 1 | 11011 (16 rows) DROP TABLE BIT_SHIFT_TABLE; diff --git a/src/test/regress/expected/box.out b/src/test/regress/expected/box.out index 2a94e33..097bb4b 100644 --- a/src/test/regress/expected/box.out +++ b/src/test/regress/expected/box.out @@ -41,30 +41,30 @@ SELECT '' AS four, * FROM BOX_TBL; (4 rows) SELECT '' AS four, b.*, area(b.f1) as barea - FROM BOX_TBL b; + FROM BOX_TBL b ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; four | f1 | barea ------+---------------------+------- | (2,2),(0,0) | 4 - | (3,3),(1,1) | 4 | (2.5,3.5),(2.5,2.5) | 0 + | (3,3),(1,1) | 4 | (3,3),(3,3) | 0 (4 rows) -- overlap SELECT '' AS three, b.f1 FROM BOX_TBL b - WHERE b.f1 && box '(2.5,2.5,1.0,1.0)'; + WHERE b.f1 && box '(2.5,2.5,1.0,1.0)' ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; three | f1 -------+--------------------- | (2,2),(0,0) - | (3,3),(1,1) | (2.5,3.5),(2.5,2.5) + | (3,3),(1,1) (3 rows) -- left-or-overlap (x only) SELECT '' AS two, b1.* FROM BOX_TBL b1 - WHERE b1.f1 &< box '(2.0,2.0,2.5,2.5)'; + WHERE b1.f1 &< box '(2.0,2.0,2.5,2.5)' ORDER BY (b1.f1[0])[0], (b1.f1[0])[1], (b1.f1[2])[0], (b1.f1[2])[1]; two | f1 -----+--------------------- | (2,2),(0,0) @@ -74,7 +74,7 @@ SELECT '' AS two, b1.* -- right-or-overlap (x only) SELECT '' AS two, b1.* FROM BOX_TBL b1 - WHERE b1.f1 &> box '(2.0,2.0,2.5,2.5)'; + WHERE b1.f1 &> box '(2.0,2.0,2.5,2.5)' ORDER BY (b1.f1[0])[0], (b1.f1[0])[1], (b1.f1[2])[0], (b1.f1[2])[1]; two | f1 -----+--------------------- | (2.5,3.5),(2.5,2.5) @@ -84,7 +84,7 @@ SELECT '' AS two, b1.* -- left of SELECT '' AS two, b.f1 FROM BOX_TBL b - WHERE b.f1 << box '(3.0,3.0,5.0,5.0)'; + WHERE b.f1 << box '(3.0,3.0,5.0,5.0)' ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; two | f1 -----+--------------------- | (2,2),(0,0) @@ -94,19 +94,19 @@ SELECT '' AS two, b.f1 -- area <= SELECT '' AS four, b.f1 FROM BOX_TBL b - WHERE b.f1 <= box '(3.0,3.0,5.0,5.0)'; + WHERE b.f1 <= box '(3.0,3.0,5.0,5.0)' ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; four | f1 ------+--------------------- | (2,2),(0,0) - | (3,3),(1,1) | (2.5,3.5),(2.5,2.5) + | (3,3),(1,1) | (3,3),(3,3) (4 rows) -- area < SELECT '' AS two, b.f1 FROM BOX_TBL b - WHERE b.f1 < box '(3.0,3.0,5.0,5.0)'; + WHERE b.f1 < box '(3.0,3.0,5.0,5.0)' ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; two | f1 -----+--------------------- | (2.5,3.5),(2.5,2.5) @@ -116,7 +116,7 @@ SELECT '' AS two, b.f1 -- area = SELECT '' AS two, b.f1 FROM BOX_TBL b - WHERE b.f1 = box '(3.0,3.0,5.0,5.0)'; + WHERE b.f1 = box '(3.0,3.0,5.0,5.0)' ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; two | f1 -----+------------- | (2,2),(0,0) @@ -126,7 +126,7 @@ SELECT '' AS two, b.f1 -- area > SELECT '' AS two, b.f1 FROM BOX_TBL b -- zero area - WHERE b.f1 > box '(3.5,3.0,4.5,3.0)'; + WHERE b.f1 > box '(3.5,3.0,4.5,3.0)' ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; two | f1 -----+------------- | (2,2),(0,0) @@ -136,19 +136,19 @@ SELECT '' AS two, b.f1 -- area >= SELECT '' AS four, b.f1 FROM BOX_TBL b -- zero area - WHERE b.f1 >= box '(3.5,3.0,4.5,3.0)'; + WHERE b.f1 >= box '(3.5,3.0,4.5,3.0)' ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; four | f1 ------+--------------------- | (2,2),(0,0) - | (3,3),(1,1) | (2.5,3.5),(2.5,2.5) + | (3,3),(1,1) | (3,3),(3,3) (4 rows) -- right of SELECT '' AS two, b.f1 FROM BOX_TBL b - WHERE box '(3.0,3.0,5.0,5.0)' >> b.f1; + WHERE box '(3.0,3.0,5.0,5.0)' >> b.f1 ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; two | f1 -----+--------------------- | (2,2),(0,0) @@ -158,7 +158,7 @@ SELECT '' AS two, b.f1 -- contained in SELECT '' AS three, b.f1 FROM BOX_TBL b - WHERE b.f1 <@ box '(0,0,3,3)'; + WHERE b.f1 <@ box '(0,0,3,3)' ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; three | f1 -------+------------- | (2,2),(0,0) @@ -169,7 +169,7 @@ SELECT '' AS three, b.f1 -- contains SELECT '' AS three, b.f1 FROM BOX_TBL b - WHERE box '(0,0,3,3)' @> b.f1; + WHERE box '(0,0,3,3)' @> b.f1 ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; three | f1 -------+------------- | (2,2),(0,0) @@ -180,7 +180,7 @@ SELECT '' AS three, b.f1 -- box equality SELECT '' AS one, b.f1 FROM BOX_TBL b - WHERE box '(1,1,3,3)' ~= b.f1; + WHERE box '(1,1,3,3)' ~= b.f1 ORDER BY (b.f1[0])[0], (b.f1[0])[1], (b.f1[2])[0], (b.f1[2])[1]; one | f1 -----+------------- | (3,3),(1,1) @@ -188,30 +188,31 @@ SELECT '' AS one, b.f1 -- center of box, left unary operator SELECT '' AS four, @@(b1.f1) AS p - FROM BOX_TBL b1; + FROM BOX_TBL b1 ORDER BY (b1.f1[0])[0], (b1.f1[0])[1], (b1.f1[2])[0], (b1.f1[2])[1]; four | p ------+--------- | (1,1) - | (2,2) | (2.5,3) + | (2,2) | (3,3) (4 rows) -- wholly-contained SELECT '' AS one, b1.*, b2.* FROM BOX_TBL b1, BOX_TBL b2 - WHERE b1.f1 @> b2.f1 and not b1.f1 ~= b2.f1; + WHERE b1.f1 @> b2.f1 and not b1.f1 ~= b2.f1 + ORDER BY (b1.f1[0])[0], (b1.f1[0])[1], (b1.f1[2])[0], (b1.f1[2])[1], (b2.f1[0])[0], (b2.f1[0])[1], (b2.f1[2])[0], (b2.f1[2])[1]; one | f1 | f1 -----+-------------+------------- | (3,3),(1,1) | (3,3),(3,3) (1 row) -SELECT '' AS four, height(f1), width(f1) FROM BOX_TBL; +SELECT '' AS four, height(f1), width(f1) FROM BOX_TBL ORDER BY (f1[0])[0], (f1[0])[1], (f1[2])[0], (f1[2])[1]; four | height | width ------+--------+------- | 2 | 2 - | 2 | 2 | 1 | 0 + | 2 | 2 | 0 | 0 (4 rows) diff --git a/src/test/regress/expected/case.out b/src/test/regress/expected/case.out index 9ec32b8..6dd98ed 100644 --- a/src/test/regress/expected/case.out +++ b/src/test/regress/expected/case.out @@ -103,20 +103,22 @@ SELECT '' AS "Five", CASE WHEN i >= 3 THEN i END AS ">= 3 or Null" - FROM CASE_TBL; + FROM CASE_TBL + ORDER BY 2; Five | >= 3 or Null ------+-------------- - | - | | 3 | 4 + | + | (4 rows) SELECT '' AS "Five", CASE WHEN i >= 3 THEN (i + i) ELSE i END AS "Simplest Math" - FROM CASE_TBL; + FROM CASE_TBL + ORDER BY 2; Five | Simplest Math ------+--------------- | 1 @@ -132,7 +134,8 @@ SELECT '' AS "Five", i AS "Value", WHEN (i = 2) THEN 'two' ELSE 'big' END AS "Category" - FROM CASE_TBL; + FROM CASE_TBL + ORDER BY 2, 3; Five | Value | Category ------+-------+---------- | 1 | one @@ -148,13 +151,14 @@ SELECT '' AS "Five", WHEN ((i = 2) or (i = 2)) THEN 'two' ELSE 'big' END AS "Category" - FROM CASE_TBL; + FROM CASE_TBL + ORDER BY 2; Five | Category ------+---------- - | one - | two | big | big + | one + | two (4 rows) -- @@ -177,82 +181,86 @@ SELECT * FROM CASE_TBL WHERE NULLIF(f,i) = 2; (0 rows) SELECT COALESCE(a.f, b.i, b.j) - FROM CASE_TBL a, CASE2_TBL b; + FROM CASE_TBL a, CASE2_TBL b + ORDER BY coalesce; coalesce ---------- - 10.1 - 20.2 -30.3 - 1 - 10.1 - 20.2 -30.3 - 2 - 10.1 - 20.2 -30.3 - 3 - 10.1 - 20.2 -30.3 - 2 - 10.1 - 20.2 -30.3 + -30.3 + -6 + 1 1 + 2 + 2 + 3 + 10.1 + 10.1 + 10.1 + 10.1 + 10.1 10.1 20.2 - -30.3 - -6 + 20.2 + 20.2 + 20.2 + 20.2 + 20.2 (24 rows) SELECT * FROM CASE_TBL a, CASE2_TBL b - WHERE COALESCE(a.f, b.i, b.j) = 2; + WHERE COALESCE(a.f, b.i, b.j) = 2 + ORDER BY a.i, a.f, b.i, b.j; i | f | i | j ---+---+---+---- - 4 | | 2 | -2 4 | | 2 | -4 + 4 | | 2 | -2 (2 rows) SELECT '' AS Five, NULLIF(a.i,b.i) AS "NULLIF(a.i,b.i)", NULLIF(b.i, 4) AS "NULLIF(b.i,4)" - FROM CASE_TBL a, CASE2_TBL b; + FROM CASE_TBL a, CASE2_TBL b + ORDER BY 2, 3; five | NULLIF(a.i,b.i) | NULLIF(b.i,4) ------+-----------------+--------------- - | | 1 - | 2 | 1 - | 3 | 1 - | 4 | 1 | 1 | 2 - | | 2 - | 3 | 2 - | 4 | 2 - | 1 | 3 - | 2 | 3 - | | 3 - | 4 | 3 | 1 | 2 - | | 2 - | 3 | 2 - | 4 | 2 - | | 1 - | 2 | 1 - | 3 | 1 - | 4 | 1 + | 1 | 3 | 1 | + | 2 | 1 + | 2 | 1 + | 2 | 3 | 2 | + | 3 | 1 + | 3 | 1 + | 3 | 2 + | 3 | 2 | 3 | + | 4 | 1 + | 4 | 1 + | 4 | 2 + | 4 | 2 + | 4 | 3 | 4 | + | | 1 + | | 1 + | | 2 + | | 2 + | | 3 (24 rows) SELECT '' AS "Two", * FROM CASE_TBL a, CASE2_TBL b - WHERE COALESCE(f,b.i) = 2; + WHERE COALESCE(f,b.i) = 2 + ORDER BY a.i, a.f, b.i, b.j; Two | i | f | i | j -----+---+---+---+---- - | 4 | | 2 | -2 | 4 | | 2 | -4 + | 4 | | 2 | -2 (2 rows) -- @@ -261,25 +269,25 @@ SELECT '' AS "Two", * UPDATE CASE_TBL SET i = CASE WHEN i >= 3 THEN (- i) ELSE (2 * i) END; -SELECT * FROM CASE_TBL; +SELECT * FROM CASE_TBL ORDER BY i, f; i | f ----+------- + -4 | + -3 | -30.3 2 | 10.1 4 | 20.2 - -3 | -30.3 - -4 | (4 rows) UPDATE CASE_TBL SET i = CASE WHEN i >= 2 THEN (2 * i) ELSE (3 * i) END; -SELECT * FROM CASE_TBL; +SELECT * FROM CASE_TBL ORDER BY i, f; i | f -----+------- + -12 | + -9 | -30.3 4 | 10.1 8 | 20.2 - -9 | -30.3 - -12 | (4 rows) UPDATE CASE_TBL @@ -287,13 +295,13 @@ UPDATE CASE_TBL ELSE (3 * j) END FROM CASE2_TBL b WHERE j = -CASE_TBL.i; -SELECT * FROM CASE_TBL; +SELECT * FROM CASE_TBL ORDER BY i, f; i | f -----+------- - 8 | 20.2 - -9 | -30.3 -12 | + -9 | -30.3 -8 | 10.1 + 8 | 20.2 (4 rows) -- diff --git a/src/test/regress/expected/char.out b/src/test/regress/expected/char.out index a0ba3d4..cf8eeca 100644 --- a/src/test/regress/expected/char.out +++ b/src/test/regress/expected/char.out @@ -25,28 +25,28 @@ INSERT INTO CHAR_TBL (f1) VALUES (''); INSERT INTO CHAR_TBL (f1) VALUES ('cd'); ERROR: value too long for type character(1) INSERT INTO CHAR_TBL (f1) VALUES ('c '); -SELECT '' AS seven, * FROM CHAR_TBL; +SELECT '' AS seven, * FROM CHAR_TBL ORDER BY f1; seven | f1 -------+---- - | a - | A + | | 1 | 2 | 3 - | + | A + | a | c (7 rows) SELECT '' AS six, c.* FROM CHAR_TBL c - WHERE c.f1 <> 'a'; + WHERE c.f1 <> 'a' ORDER BY f1; six | f1 -----+---- - | A + | | 1 | 2 | 3 - | + | A | c (6 rows) @@ -60,32 +60,32 @@ SELECT '' AS one, c.* SELECT '' AS five, c.* FROM CHAR_TBL c - WHERE c.f1 < 'a'; + WHERE c.f1 < 'a' ORDER BY f1; five | f1 ------+---- - | A + | | 1 | 2 | 3 - | + | A (5 rows) SELECT '' AS six, c.* FROM CHAR_TBL c - WHERE c.f1 <= 'a'; + WHERE c.f1 <= 'a' ORDER BY f1; six | f1 -----+---- - | a - | A + | | 1 | 2 | 3 - | + | A + | a (6 rows) SELECT '' AS one, c.* FROM CHAR_TBL c - WHERE c.f1 > 'a'; + WHERE c.f1 > 'a' ORDER BY f1; one | f1 -----+---- | c @@ -93,7 +93,7 @@ SELECT '' AS one, c.* SELECT '' AS two, c.* FROM CHAR_TBL c - WHERE c.f1 >= 'a'; + WHERE c.f1 >= 'a' ORDER BY f1; two | f1 -----+---- | a @@ -111,7 +111,7 @@ INSERT INTO CHAR_TBL (f1) VALUES ('abcd'); INSERT INTO CHAR_TBL (f1) VALUES ('abcde'); ERROR: value too long for type character(4) INSERT INTO CHAR_TBL (f1) VALUES ('abcd '); -SELECT '' AS four, * FROM CHAR_TBL; +SELECT '' AS four, * FROM CHAR_TBL ORDER BY f1; four | f1 ------+------ | a diff --git a/src/test/regress/expected/circle.out b/src/test/regress/expected/circle.out index 9ba4a04..2b098a8 100644 --- a/src/test/regress/expected/circle.out +++ b/src/test/regress/expected/circle.out @@ -21,65 +21,65 @@ INSERT INTO CIRCLE_TBL VALUES ('(3,(1,2),3)'); ERROR: invalid input syntax for type circle: "(3,(1,2),3)" LINE 1: INSERT INTO CIRCLE_TBL VALUES ('(3,(1,2),3)'); ^ -SELECT * FROM CIRCLE_TBL; +SELECT * FROM CIRCLE_TBL ORDER BY (point(f1))[0], (point(f1))[0], radius(f1); f1 ---------------- - <(5,1),3> - <(1,2),100> - <(1,3),5> <(1,2),3> + <(1,3),5> + <(1,2),100> + <(5,1),3> <(100,200),10> <(100,1),115> (6 rows) SELECT '' AS six, center(f1) AS center - FROM CIRCLE_TBL; + FROM CIRCLE_TBL ORDER BY (point(f1))[0], (point(f1))[0], radius(f1); six | center -----+----------- - | (5,1) | (1,2) | (1,3) | (1,2) + | (5,1) | (100,200) | (100,1) (6 rows) SELECT '' AS six, radius(f1) AS radius - FROM CIRCLE_TBL; + FROM CIRCLE_TBL ORDER BY (point(f1))[0], (point(f1))[0], radius(f1); six | radius -----+-------- | 3 - | 100 | 5 + | 100 | 3 | 10 | 115 (6 rows) SELECT '' AS six, diameter(f1) AS diameter - FROM CIRCLE_TBL; + FROM CIRCLE_TBL ORDER BY (point(f1))[0], (point(f1))[0], radius(f1); six | diameter -----+---------- | 6 - | 200 | 10 + | 200 | 6 | 20 | 230 (6 rows) -SELECT '' AS two, f1 FROM CIRCLE_TBL WHERE radius(f1) < 5; +SELECT '' AS two, f1 FROM CIRCLE_TBL WHERE radius(f1) < 5 ORDER BY (point(f1))[0], (point(f1))[0], radius(f1); two | f1 -----+----------- - | <(5,1),3> | <(1,2),3> + | <(5,1),3> (2 rows) -SELECT '' AS four, f1 FROM CIRCLE_TBL WHERE diameter(f1) >= 10; +SELECT '' AS four, f1 FROM CIRCLE_TBL WHERE diameter(f1) >= 10 ORDER BY (point(f1))[0], (point(f1))[0], radius(f1); four | f1 ------+---------------- - | <(1,2),100> | <(1,3),5> + | <(1,2),100> | <(100,200),10> | <(100,1),115> (4 rows) diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index 96bd816..4d28cc0 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -57,41 +57,41 @@ INSERT INTO clstr_tst (b, c) VALUES (8, 'ocho'); -- This entry is needed to test that TOASTED values are copied correctly. INSERT INTO clstr_tst (b, c, d) VALUES (6, 'seis', repeat('xyzzy', 100000)); CLUSTER clstr_tst_c ON clstr_tst; -SELECT a,b,c,substring(d for 30), length(d) from clstr_tst; +SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY a, b, c; a | b | c | substring | length ----+----+---------------+--------------------------------+-------- - 10 | 14 | catorce | | - 18 | 5 | cinco | | - 9 | 4 | cuatro | | - 26 | 19 | diecinueve | | - 12 | 18 | dieciocho | | - 30 | 16 | dieciseis | | - 24 | 17 | diecisiete | | - 2 | 10 | diez | | - 23 | 12 | doce | | - 11 | 2 | dos | | - 25 | 9 | nueve | | - 31 | 8 | ocho | | 1 | 11 | once | | - 28 | 15 | quince | | - 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000 - 29 | 7 | siete | | - 15 | 13 | trece | | - 22 | 30 | treinta | | - 17 | 32 | treinta y dos | | + 2 | 10 | diez | | 3 | 31 | treinta y uno | | + 4 | 22 | veintidos | | 5 | 3 | tres | | - 20 | 1 | uno | | 6 | 20 | veinte | | + 7 | 23 | veintitres | | + 8 | 21 | veintiuno | | + 9 | 4 | cuatro | | + 10 | 14 | catorce | | + 11 | 2 | dos | | + 12 | 18 | dieciocho | | + 13 | 27 | veintisiete | | 14 | 25 | veinticinco | | - 21 | 24 | veinticuatro | | - 4 | 22 | veintidos | | - 19 | 29 | veintinueve | | + 15 | 13 | trece | | 16 | 28 | veintiocho | | + 17 | 32 | treinta y dos | | + 18 | 5 | cinco | | + 19 | 29 | veintinueve | | + 20 | 1 | uno | | + 21 | 24 | veinticuatro | | + 22 | 30 | treinta | | + 23 | 12 | doce | | + 24 | 17 | diecisiete | | + 25 | 9 | nueve | | + 26 | 19 | diecinueve | | 27 | 26 | veintiseis | | - 13 | 27 | veintisiete | | - 7 | 23 | veintitres | | - 8 | 21 | veintiuno | | + 28 | 15 | quince | | + 29 | 7 | siete | | + 30 | 16 | dieciseis | | + 31 | 8 | ocho | | + 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000 (32 rows) SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY a; @@ -207,42 +207,42 @@ SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY c; -- Verify that inheritance link still works INSERT INTO clstr_tst_inh VALUES (0, 100, 'in child table'); -SELECT a,b,c,substring(d for 30), length(d) from clstr_tst; +SELECT a,b,c,substring(d for 30), length(d) from clstr_tst ORDER BY a, b, c; a | b | c | substring | length ----+-----+----------------+--------------------------------+-------- - 10 | 14 | catorce | | - 18 | 5 | cinco | | - 9 | 4 | cuatro | | - 26 | 19 | diecinueve | | - 12 | 18 | dieciocho | | - 30 | 16 | dieciseis | | - 24 | 17 | diecisiete | | - 2 | 10 | diez | | - 23 | 12 | doce | | - 11 | 2 | dos | | - 25 | 9 | nueve | | - 31 | 8 | ocho | | + 0 | 100 | in child table | | 1 | 11 | once | | - 28 | 15 | quince | | - 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000 - 29 | 7 | siete | | - 15 | 13 | trece | | - 22 | 30 | treinta | | - 17 | 32 | treinta y dos | | + 2 | 10 | diez | | 3 | 31 | treinta y uno | | + 4 | 22 | veintidos | | 5 | 3 | tres | | - 20 | 1 | uno | | 6 | 20 | veinte | | + 7 | 23 | veintitres | | + 8 | 21 | veintiuno | | + 9 | 4 | cuatro | | + 10 | 14 | catorce | | + 11 | 2 | dos | | + 12 | 18 | dieciocho | | + 13 | 27 | veintisiete | | 14 | 25 | veinticinco | | - 21 | 24 | veinticuatro | | - 4 | 22 | veintidos | | - 19 | 29 | veintinueve | | + 15 | 13 | trece | | 16 | 28 | veintiocho | | + 17 | 32 | treinta y dos | | + 18 | 5 | cinco | | + 19 | 29 | veintinueve | | + 20 | 1 | uno | | + 21 | 24 | veinticuatro | | + 22 | 30 | treinta | | + 23 | 12 | doce | | + 24 | 17 | diecisiete | | + 25 | 9 | nueve | | + 26 | 19 | diecinueve | | 27 | 26 | veintiseis | | - 13 | 27 | veintisiete | | - 7 | 23 | veintitres | | - 8 | 21 | veintiuno | | - 0 | 100 | in child table | | + 28 | 15 | quince | | + 29 | 7 | siete | | + 30 | 16 | dieciseis | | + 31 | 8 | ocho | | + 32 | 6 | seis | xyzzyxyzzyxyzzyxyzzyxyzzyxyzzy | 500000 (33 rows) -- Verify that foreign key link still works @@ -333,15 +333,16 @@ CLUSTER clstr_1_pkey ON clstr_1; CLUSTER clstr_2 USING clstr_2_pkey; SELECT * FROM clstr_1 UNION ALL SELECT * FROM clstr_2 UNION ALL - SELECT * FROM clstr_3; + SELECT * FROM clstr_3 + ORDER BY 1; a --- 1 - 2 + 1 1 2 2 - 1 + 2 (6 rows) -- revert to the original state @@ -360,15 +361,16 @@ SET SESSION AUTHORIZATION clstr_user; CLUSTER; SELECT * FROM clstr_1 UNION ALL SELECT * FROM clstr_2 UNION ALL - SELECT * FROM clstr_3; + SELECT * FROM clstr_3 + ORDER BY 1; a --- 1 + 1 + 1 2 2 - 1 2 - 1 (6 rows) -- cluster a single table using the indisclustered bit previously set @@ -376,7 +378,8 @@ DELETE FROM clstr_1; INSERT INTO clstr_1 VALUES (2); INSERT INTO clstr_1 VALUES (1); CLUSTER clstr_1; -SELECT * FROM clstr_1; +SELECT * FROM clstr_1 +ORDER BY 1; a --- 1 @@ -402,18 +405,18 @@ UPDATE clustertest SET key = 35 WHERE key = 40; UPDATE clustertest SET key = 60 WHERE key = 50; UPDATE clustertest SET key = 70 WHERE key = 60; UPDATE clustertest SET key = 80 WHERE key = 70; -SELECT * FROM clustertest; +SELECT * FROM clustertest ORDER BY 1; key ----- 20 30 - 100 35 80 + 100 (5 rows) CLUSTER clustertest_pkey ON clustertest; -SELECT * FROM clustertest; +SELECT * FROM clustertest ORDER BY 1; key ----- 20 @@ -424,7 +427,7 @@ SELECT * FROM clustertest; (5 rows) COMMIT; -SELECT * FROM clustertest; +SELECT * FROM clustertest ORDER BY 1; key ----- 20 @@ -439,7 +442,7 @@ create temp table clstr_temp (col1 int primary key, col2 text); NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "clstr_temp_pkey" for table "clstr_temp" insert into clstr_temp values (2, 'two'), (1, 'one'); cluster clstr_temp using clstr_temp_pkey; -select * from clstr_temp; +select * from clstr_temp ORDER BY 1; col1 | col2 ------+------ 1 | one diff --git a/src/test/regress/expected/combocid.out b/src/test/regress/expected/combocid.out index b63894c..f234ff4 100644 --- a/src/test/regress/expected/combocid.out +++ b/src/test/regress/expected/combocid.out @@ -16,7 +16,7 @@ INSERT INTO combocidtest SELECT 1 LIMIT 0; INSERT INTO combocidtest SELECT 1 LIMIT 0; INSERT INTO combocidtest VALUES (1); INSERT INTO combocidtest VALUES (2); -SELECT ctid,cmin,* FROM combocidtest; +SELECT ctid,... [truncated message content] |
From: Michael P. <mic...@us...> - 2011-01-12 09:00:35
|
Project "Postgres-XC". The branch, master has been updated via 9a57b4e60139aa0799f4b6ea3bc6716b2a974022 (commit) from 7c06c36bb8b29257ab07998fc4680993a7cb503a (commit) - Log ----------------------------------------------------------------- commit 9a57b4e60139aa0799f4b6ea3bc6716b2a974022 Author: Michael P <mic...@us...> Date: Wed Jan 12 18:01:13 2011 +0900 Sequence Renaming Fix to allow a remote Coordinator to rename sequences correctly on backend nodes. This has been unfortunately introduced by previous commit 28da86b88e667b5e45e6deef831cc839ccba2703 diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index dbd9401..2a13bad 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -732,7 +732,7 @@ ProcessUtility(Node *parsetree, */ case T_RenameStmt: #ifdef PGXC - if (IS_PGXC_COORDINATOR && IsConnFromCoord()) + if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) { RemoteQueryExecType remoteExecType = EXEC_ON_ALL_NODES; RenameStmt *stmt = (RenameStmt *) parsetree; ----------------------------------------------------------------------- Summary of changes: src/backend/tcop/utility.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-12 08:43:13
|
Project "Postgres-XC". The branch, master has been updated via 7c06c36bb8b29257ab07998fc4680993a7cb503a (commit) from 28da86b88e667b5e45e6deef831cc839ccba2703 (commit) - Log ----------------------------------------------------------------- commit 7c06c36bb8b29257ab07998fc4680993a7cb503a Author: Michael P <mic...@us...> Date: Wed Jan 12 17:34:16 2011 +0900 Fix for bug 3136262: Improvement of Explicit 2PC error handling There was a problem with PREPARE TRANSACTION when trying to use the same GID multiple times. This commits fixes the case where: CREATE TABLE test (a int); BEGIN; INSERT INTO test VALUES (1); PREPARE TRANSACTION 'gid_name'; BEGIN; INSERT INTO test VALUES (2); PREPARE TRANSACTION 'gid_name'; At this point PREPARE message was sent down to nodes making previously prepared transaction being rollbacked. Now GTM returns an error if GID is already in use and sends a ROLLBACK to corresponding nodes. diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index 855a1b5..3ca96c6 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -1490,6 +1490,13 @@ finish: /* Clean up connections */ pfree_pgxc_all_handles(pgxc_connections); + if (res != 0) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Could not prepare transaction on data nodes"))); + } + return local_operation; } @@ -1508,6 +1515,7 @@ pgxc_node_prepare(PGXCNodeAllHandles *pgxc_handles, char *gid) GlobalTransactionId gxid = InvalidGlobalTransactionId; PGXC_NodeId *datanodes = NULL; PGXC_NodeId *coordinators = NULL; + bool gtm_error = false; gxid = GetCurrentGlobalTransactionId(); @@ -1545,7 +1553,10 @@ pgxc_node_prepare(PGXCNodeAllHandles *pgxc_handles, char *gid) datanodes, real_co_conn_count, coordinators); if (result < 0) - return EOF; + { + gtm_error = true; + goto finish; + } sprintf(buffer, "PREPARE TRANSACTION '%s'", gid); @@ -1570,7 +1581,7 @@ pgxc_node_prepare(PGXCNodeAllHandles *pgxc_handles, char *gid) finish: /* - * An error has happened on a Datanode or GTM, + * An error has happened on a Datanode, * It is necessary to rollback the transaction on already prepared nodes. * But not on nodes where the error occurred. */ @@ -1579,17 +1590,28 @@ finish: GlobalTransactionId rollback_xid = InvalidGlobalTransactionId; result = 0; - buffer = (char *) repalloc(buffer, 20 + strlen(gid) + 1); - sprintf(buffer, "ROLLBACK PREPARED '%s'", gid); + if (gtm_error) + { + buffer = (char *) repalloc(buffer, 9); + sprintf(buffer, "ROLLBACK"); + } + else + { + buffer = (char *) repalloc(buffer, 20 + strlen(gid) + 1); + sprintf(buffer, "ROLLBACK PREPARED '%s'", gid); - rollback_xid = BeginTranGTM(NULL); + rollback_xid = BeginTranGTM(NULL); + } /* * Send xid and rollback prepared down to Datanodes and Coordinators * Even if we get an error on one, we try and send to the others - */ - if (pgxc_all_handles_send_gxid(pgxc_handles, rollback_xid, false)) - result = EOF; + * Only query is sent down to nodes if error occured on GTM. + */ + if (!gtm_error) + if (pgxc_all_handles_send_gxid(pgxc_handles, rollback_xid, false)) + result = EOF; + if (pgxc_all_handles_send_query(pgxc_handles, buffer, false)) result = EOF; @@ -1597,10 +1619,11 @@ finish: result |= pgxc_node_receive_and_validate(co_conn_count, pgxc_handles->coord_handles, false); /* - * Don't forget to rollback also on GTM + * Don't forget to rollback also on GTM if error happened on Datanodes * Both GXIDs used for PREPARE and COMMIT PREPARED are discarded from GTM snapshot here. */ - CommitPreparedTranGTM(gxid, rollback_xid); + if (!gtm_error) + CommitPreparedTranGTM(gxid, rollback_xid); return EOF; } diff --git a/src/gtm/main/gtm_txn.c b/src/gtm/main/gtm_txn.c index 252fc43..27f7372 100644 --- a/src/gtm/main/gtm_txn.c +++ b/src/gtm/main/gtm_txn.c @@ -888,6 +888,12 @@ GTM_StartPreparedTransaction(GTM_TransactionHandle txn, return STATUS_ERROR; /* + * Check if given GID is already in use by another transaction. + */ + if (GTM_GIDToHandle(gid) != InvalidTransactionHandle) + return STATUS_ERROR; + + /* * Mark the transaction as being prepared */ GTM_RWLockAcquire(>m_txninfo->gti_lock, GTM_LOCKMODE_WRITE); ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/pool/execRemote.c | 43 +++++++++++++++++++++++++++-------- src/gtm/main/gtm_txn.c | 6 +++++ 2 files changed, 39 insertions(+), 10 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-12 06:52:57
|
Project "Postgres-XC". The branch, master has been updated via 28da86b88e667b5e45e6deef831cc839ccba2703 (commit) from bc45a6fdb6afe5dd72526c4919bd8c138f11eb0d (commit) - Log ----------------------------------------------------------------- commit 28da86b88e667b5e45e6deef831cc839ccba2703 Author: Michael P <mic...@us...> Date: Wed Jan 12 15:53:43 2011 +0900 Fix for bug 3142311:renaming sequences error Sequences could not be renamed correctly as Rename statement was running on all the nodes but sequences are not defined on Datanodes. Patch written by Benny with some editorialization by me. diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 968b921..dbd9401 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -732,8 +732,23 @@ ProcessUtility(Node *parsetree, */ case T_RenameStmt: #ifdef PGXC - if (IS_PGXC_COORDINATOR) - ExecUtilityStmtOnNodes(queryString, NULL, false, EXEC_ON_ALL_NODES); + if (IS_PGXC_COORDINATOR && IsConnFromCoord()) + { + RemoteQueryExecType remoteExecType = EXEC_ON_ALL_NODES; + RenameStmt *stmt = (RenameStmt *) parsetree; + + if (stmt->renameType == OBJECT_SEQUENCE) + remoteExecType = EXEC_ON_COORDS; + else if (stmt->renameType == OBJECT_TABLE) + { + Oid relid = RangeVarGetRelid(stmt->relation, false); + + if (get_rel_relkind(relid) == RELKIND_SEQUENCE) + remoteExecType = EXEC_ON_COORDS; + } + + ExecUtilityStmtOnNodes(queryString, NULL, false, remoteExecType); + } #endif ExecRenameStmt((RenameStmt *) parsetree); break; ----------------------------------------------------------------------- Summary of changes: src/backend/tcop/utility.c | 19 +++++++++++++++++-- 1 files changed, 17 insertions(+), 2 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-11 02:48:27
|
Project "Postgres-XC". The branch, master has been updated via bc45a6fdb6afe5dd72526c4919bd8c138f11eb0d (commit) from 981e2bbe81c94c0427ed9504d0390119a7770a83 (commit) - Log ----------------------------------------------------------------- commit bc45a6fdb6afe5dd72526c4919bd8c138f11eb0d Author: Michael P <mic...@us...> Date: Tue Jan 11 11:46:53 2011 +0900 Improvement of performance with tuple scan This fixes some performance issues that have been introduced with commit: 44ca05af2742271abdc5c14f5ca313d5ea307875 Tuple were scanned with the cheapest cost, degrading performance a lot. Patch written by Benny, with some editorialization by me. diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 63893b4..ad90109 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -33,6 +33,7 @@ #include "parser/parse_clause.h" #include "parser/parsetree.h" #ifdef PGXC +#include "pgxc/pgxc.h" #include "pgxc/planner.h" #include "access/sysattr.h" #include "utils/builtins.h" @@ -573,8 +574,12 @@ create_join_plan(PlannerInfo *root, JoinPath *best_path) #endif #ifdef PGXC - /* check if this join can be reduced to an equiv. remote scan node */ - plan = create_remotejoin_plan(root, best_path, plan, outer_plan, inner_plan); + /* + * Check if this join can be reduced to an equiv. remote scan node + * This can only be executed on a remote Coordinator + */ + if (IS_PGXC_COORDINATOR && IsConnFromCoord()) + plan = create_remotejoin_plan(root, best_path, plan, outer_plan, inner_plan); #endif return plan; diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 04bf594..63b0581 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -23,6 +23,9 @@ #include "parser/parsetree.h" #include "utils/hsearch.h" +#ifdef PGXC +#include "pgxc/pgxc.h" +#endif typedef struct JoinHashEntry { @@ -107,10 +110,16 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) * * These allow for maximum query shipping to the remote * side later during the planning phase + * + * This has to be set on a remote Coordinator only + * as it hugely penalizes performance on backend Nodes */ - rel->pages = 1; - rel->tuples = 1; - rel->rows = 1; + if (IS_PGXC_COORDINATOR && IsConnFromCoord()) + { + rel->pages = 1; + rel->tuples = 1; + rel->rows = 1; + } #endif break; case RTE_SUBQUERY: ----------------------------------------------------------------------- Summary of changes: src/backend/optimizer/plan/createplan.c | 9 +++++++-- src/backend/optimizer/util/relnode.c | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 5 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-01-11 02:08:37
|
Project "Postgres-XC". The branch, master has been updated via 981e2bbe81c94c0427ed9504d0390119a7770a83 (commit) from 45e1d4e389e966d072aaf98a49d9702aa253d976 (commit) - Log ----------------------------------------------------------------- commit 981e2bbe81c94c0427ed9504d0390119a7770a83 Author: Michael P <mic...@us...> Date: Tue Jan 11 11:05:18 2011 +0900 Clean up of execRemote.c There was some code that was used to clean up connection thread between Coordinator and Datanodes that was not really necessary. This has been introduced with version 0.9.2 to stabilize the code by consuming messages on connections where error happened on backend Noce. This patch also corrects a bug on Datanode with GXID that was not correctly set at initialization. This leaded to transactions being committed twice on backend nodes, crashing it with a FATAL error. Patch written by Andrei Martsinchyk diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index a387354..855a1b5 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -36,7 +36,6 @@ #include "pgxc/pgxc.h" #define END_QUERY_TIMEOUT 20 -#define CLEAR_TIMEOUT 5 #define DATA_NODE_FETCH_SIZE 1 @@ -79,8 +78,6 @@ static void clear_write_node_list(void); static void pfree_pgxc_all_handles(PGXCNodeAllHandles *pgxc_handles); -static int handle_response_clear(PGXCNodeHandle * conn); - static void close_node_cursors(PGXCNodeHandle **connections, int conn_count, char *cursor); static PGXCNodeAllHandles *pgxc_get_all_transaction_nodes(PGXCNode_HandleRequested status_requested); @@ -955,14 +952,16 @@ void BufferConnection(PGXCNodeHandle *conn) { RemoteQueryState *combiner = conn->combiner; + MemoryContext oldcontext; + + Assert(conn->state == DN_CONNECTION_STATE_QUERY && combiner); + /* * When BufferConnection is invoked CurrentContext is related to other * portal, which is trying to control the connection. * TODO See if we can find better context to switch to */ - MemoryContext oldcontext = MemoryContextSwitchTo(combiner->ss.ss_ScanTupleSlot->tts_mcxt); - - Assert(conn->state == DN_CONNECTION_STATE_QUERY && combiner); + oldcontext = MemoryContextSwitchTo(combiner->ss.ss_ScanTupleSlot->tts_mcxt); /* Verify the connection is in use by the combiner */ combiner->current_conn = 0; @@ -1007,10 +1006,11 @@ BufferConnection(PGXCNodeHandle *conn) { /* incomplete message, read more */ if (pgxc_node_receive(1, &conn, NULL)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to fetch from data node"))); - continue; + { + conn->state = DN_CONNECTION_STATE_ERROR_FATAL; + add_error_message(conn, "Failed to fetch from data node"); + } + break; } else if (res == RESPONSE_COMPLETE) { @@ -1261,7 +1261,6 @@ handle_response(PGXCNodeHandle * conn, RemoteQueryState *combiner) for (;;) { Assert(conn->state != DN_CONNECTION_STATE_IDLE); - Assert(conn->combiner == combiner || conn->combiner == NULL); /* * If we are in the process of shutting down, we @@ -1280,6 +1279,8 @@ handle_response(PGXCNodeHandle * conn, RemoteQueryState *combiner) if (!HAS_MESSAGE_BUFFERED(conn)) return RESPONSE_EOF; + Assert(conn->combiner == combiner || conn->combiner == NULL); + /* TODO handle other possible responses */ msg_type = get_message(conn, &msg_len, &msg); switch (msg_type) @@ -1374,75 +1375,6 @@ handle_response(PGXCNodeHandle * conn, RemoteQueryState *combiner) /* - * Like handle_response, but for consuming the messages, - * in case we of an error to clean the data node connection. - * Return values: - * RESPONSE_EOF - need to receive more data for the connection - * RESPONSE_COMPLETE - done with the connection, or done trying (error) - */ -static int -handle_response_clear(PGXCNodeHandle * conn) -{ - char *msg; - int msg_len; - char msg_type; - - for (;;) - { - /* No data available, exit */ - if (conn->state == DN_CONNECTION_STATE_QUERY) - return RESPONSE_EOF; - - /* - * If we are in the process of shutting down, we - * may be rolling back, and the buffer may contain other messages. - * We want to avoid a procarray exception - * as well as an error stack overflow. - */ - if (proc_exit_inprogress) - { - conn->state = DN_CONNECTION_STATE_ERROR_FATAL; - return RESPONSE_COMPLETE; - } - - msg_type = get_message(conn, &msg_len, &msg); - switch (msg_type) - { - case '\0': /* Not enough data in the buffer */ - case 'c': /* CopyToCommandComplete */ - case 'C': /* CommandComplete */ - case 'T': /* RowDescription */ - case 'D': /* DataRow */ - case 'H': /* CopyOutResponse */ - case 'd': /* CopyOutDataRow */ - case 'A': /* NotificationResponse */ - case 'N': /* NoticeResponse */ - break; - case 'E': /* ErrorResponse */ - /* - * conn->state = DN_CONNECTION_STATE_ERROR_NOT_READY; - * Do not return with an error, we still need to consume Z, - * ready-for-query - */ - break; - case 'Z': /* ReadyForQuery */ - conn->transaction_status = msg[0]; - conn->state = DN_CONNECTION_STATE_IDLE; - return RESPONSE_COMPLETE; - case 'I': /* EmptyQuery */ - default: - /* sync lost? */ - elog(WARNING, "Received unsupported message type: %c", msg_type); - conn->state = DN_CONNECTION_STATE_ERROR_FATAL; - return RESPONSE_COMPLETE; - } - } - - return RESPONSE_EOF; -} - - -/* * Send BEGIN command to the Datanodes or Coordinators and receive responses */ static int @@ -1551,7 +1483,7 @@ finish: if (!autocommit) stat_transaction(pgxc_connections->dn_conn_count); if (!PersistentConnections) - release_handles(false); + release_handles(); autocommit = true; clear_write_node_list(); @@ -1650,9 +1582,6 @@ finish: buffer = (char *) repalloc(buffer, 20 + strlen(gid) + 1); sprintf(buffer, "ROLLBACK PREPARED '%s'", gid); - /* Consume any messages on the Datanodes and Coordinators first if necessary */ - PGXCNodeConsumeMessages(); - rollback_xid = BeginTranGTM(NULL); /* @@ -1786,7 +1715,7 @@ finish: * is aborted after the list of nodes in error state has been saved to be sent to GTM */ if (!PersistentConnections && res == 0) - release_handles(false); + release_handles(); autocommit = true; clear_write_node_list(); @@ -1918,7 +1847,7 @@ finish: if (!autocommit) stat_transaction(tran_count); if (!PersistentConnections) - release_handles(false); + release_handles(); autocommit = true; clear_write_node_list(); @@ -2040,7 +1969,7 @@ finish: if (!autocommit) stat_transaction(tran_count); if (!PersistentConnections) - release_handles(true); + release_handles(); autocommit = true; clear_write_node_list(); @@ -2130,7 +2059,7 @@ finish: if (!autocommit) stat_transaction(tran_count); if (!PersistentConnections) - release_handles(false); + release_handles(); autocommit = true; clear_write_node_list(); @@ -2184,9 +2113,6 @@ PGXCNodeRollback(void) tran_count = pgxc_connections->dn_conn_count + pgxc_connections->co_conn_count; - /* Consume any messages on the Datanodes and Coordinators first if necessary */ - PGXCNodeConsumeMessages(); - /* * If we do not have open transactions we have nothing to rollback just * report success @@ -2201,7 +2127,7 @@ finish: if (!autocommit) stat_transaction(tran_count); if (!PersistentConnections) - release_handles(true); + release_handles(); autocommit = true; clear_write_node_list(); @@ -2396,7 +2322,7 @@ DataNodeCopyBegin(const char *query, List *nodelist, Snapshot snapshot, bool is_ if (need_tran) DataNodeCopyFinish(connections, 0, COMBINE_TYPE_NONE); else if (!PersistentConnections) - release_handles(false); + release_handles(); } pfree(connections); @@ -2620,7 +2546,7 @@ DataNodeCopyOut(ExecNodes *exec_nodes, PGXCNodeHandle** copy_connections, FILE* if (!ValidateAndCloseCombiner(combiner)) { if (autocommit && !PersistentConnections) - release_handles(false); + release_handles(); pfree(copy_connections); ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), @@ -3188,20 +3114,27 @@ do_query(RemoteQueryState *node) primaryconnection->combiner = node; Assert(node->combine_type == COMBINE_TYPE_SAME); - while (node->command_complete_count < 1) + /* Make sure the command is completed on the primary node */ + while (true) { - if (pgxc_node_receive(1, &primaryconnection, NULL)) + int res; + pgxc_node_receive(1, &primaryconnection, NULL); + res = handle_response(primaryconnection, node); + if (res == RESPONSE_COMPLETE) + break; + else if (res == RESPONSE_EOF) + continue; + else ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to read response from data nodes"))); - handle_response(primaryconnection, node); - if (node->errorMessage) - { - char *code = node->errorCode; - ereport(ERROR, - (errcode(MAKE_SQLSTATE(code[0], code[1], code[2], code[3], code[4])), - errmsg("%s", node->errorMessage))); - } + errmsg("Unexpected response from data node"))); + } + if (node->errorMessage) + { + char *code = node->errorCode; + ereport(ERROR, + (errcode(MAKE_SQLSTATE(code[0], code[1], code[2], code[3], code[4])), + errmsg("%s", node->errorMessage))); } } @@ -3805,65 +3738,6 @@ ParamListToDataRow(ParamListInfo params, char** result) } -/* - * Consume any remaining messages on the connections. - * This is useful for calling after ereport() - */ -void -PGXCNodeConsumeMessages(void) -{ - int i; - int active_count = 0; - int res; - struct timeval timeout; - PGXCNodeHandle *connection = NULL; - PGXCNodeHandle **connections = NULL; - PGXCNodeHandle *active_connections[NumDataNodes+NumCoords]; - - /* Get all active Coordinators and Datanodes */ - active_count = get_active_nodes(active_connections); - - /* Iterate through handles in use and try and clean */ - for (i = 0; i < active_count; i++) - { - elog(WARNING, "Consuming data node messages after error."); - - connection = active_connections[i]; - - res = RESPONSE_EOF; - - while (res != RESPONSE_COMPLETE) - { - int res = handle_response_clear(connection); - - if (res == RESPONSE_EOF) - { - if (!connections) - connections = (PGXCNodeHandle **) palloc(sizeof(PGXCNodeHandle*)); - - connections[0] = connection; - - /* Use a timeout so we do not wait forever */ - timeout.tv_sec = CLEAR_TIMEOUT; - timeout.tv_usec = 0; - if (pgxc_node_receive(1, connections, &timeout)) - { - /* Mark this as bad, move on to next one */ - connection->state = DN_CONNECTION_STATE_ERROR_FATAL; - break; - } - } - if (connection->state == DN_CONNECTION_STATE_ERROR_FATAL - || connection->state == DN_CONNECTION_STATE_IDLE) - break; - } - } - - if (connections) - pfree(connections); -} - - /* ---------------------------------------------------------------- * ExecRemoteQueryReScan * @@ -3893,9 +3767,6 @@ ExecRemoteQueryReScan(RemoteQueryState *node, ExprContext *exprCtxt) * * But does not need an Estate instance and does not do some unnecessary work, * like allocating tuple slots. - * - * Handles are freed when an error occurs during Transaction Abort, it is first necessary - * to consume all the messages on the connections. */ void ExecRemoteUtility(RemoteQuery *node) @@ -3907,10 +3778,9 @@ ExecRemoteUtility(RemoteQuery *node) GlobalTransactionId gxid = InvalidGlobalTransactionId; Snapshot snapshot = GetActiveSnapshot(); PGXCNodeAllHandles *pgxc_connections; - PGXCNodeHandle *primaryconnection = NULL;/* For the moment only Datanode has a primary */ - int regular_conn_count; int total_conn_count; int co_conn_count; + int dn_conn_count; bool need_tran; int i; @@ -3920,23 +3790,11 @@ ExecRemoteUtility(RemoteQuery *node) pgxc_connections = get_exec_connections(NULL, node->exec_nodes, exec_type); - primaryconnection = pgxc_connections->primary_handle; - - /* Registering new connections needs the sum of Connections to Datanodes AND to Coordinators */ - total_conn_count = regular_conn_count = pgxc_connections->dn_conn_count - + pgxc_connections->co_conn_count; - - regular_conn_count = pgxc_connections->dn_conn_count; + dn_conn_count = pgxc_connections->dn_conn_count; co_conn_count = pgxc_connections->co_conn_count; - /* - * Primary connection is counted separately in regular connection count - * but is included in total connection count if used. - */ - if (primaryconnection) - { - regular_conn_count--; - } + /* Registering new connections needs the sum of Connections to Datanodes AND to Coordinators */ + total_conn_count = dn_conn_count + co_conn_count; if (force_autocommit) need_tran = false; @@ -3949,9 +3807,7 @@ ExecRemoteUtility(RemoteQuery *node) if (!is_read_only) { - if (primaryconnection) - register_write_nodes(1, &primaryconnection); - register_write_nodes(regular_conn_count, pgxc_connections->datanode_handles); + register_write_nodes(dn_conn_count, pgxc_connections->datanode_handles); } gxid = GetCurrentGlobalTransactionId(); @@ -3971,11 +3827,8 @@ ExecRemoteUtility(RemoteQuery *node) PGXCNodeHandle *new_connections[total_conn_count]; int new_count = 0; - if (primaryconnection && primaryconnection->transaction_status != 'T') - new_connections[new_count++] = primaryconnection; - /* Check for Datanodes */ - for (i = 0; i < regular_conn_count; i++) + for (i = 0; i < dn_conn_count; i++) if (pgxc_connections->datanode_handles[i]->transaction_status != 'T') new_connections[new_count++] = pgxc_connections->datanode_handles[i]; @@ -4005,64 +3858,11 @@ ExecRemoteUtility(RemoteQuery *node) } } - /* See if we have a primary nodes, execute on it first before the others */ - if (primaryconnection) - { - if (primaryconnection->state == DN_CONNECTION_STATE_QUERY) - BufferConnection(primaryconnection); - /* If explicit transaction is needed gxid is already sent */ - if (!need_tran && pgxc_node_send_gxid(primaryconnection, gxid)) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to send command to data nodes"))); - } - if (snapshot && pgxc_node_send_snapshot(primaryconnection, snapshot)) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to send command to data nodes"))); - } - if (pgxc_node_send_query(primaryconnection, node->sql_statement) != 0) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to send command to data nodes"))); - } - - Assert(remotestate->combine_type == COMBINE_TYPE_SAME); - - while (remotestate->command_complete_count < 1) - { - PG_TRY(); - { - pgxc_node_receive(1, &primaryconnection, NULL); - while (handle_response(primaryconnection, remotestate) == RESPONSE_EOF) - pgxc_node_receive(1, &primaryconnection, NULL); - if (remotestate->errorMessage) - { - char *code = remotestate->errorCode; - ereport(ERROR, - (errcode(MAKE_SQLSTATE(code[0], code[1], code[2], code[3], code[4])), - errmsg("%s", remotestate->errorMessage))); - } - } - /* If we got an error response return immediately */ - PG_CATCH(); - { - pfree_pgxc_all_handles(pgxc_connections); - - PG_RE_THROW(); - } - PG_END_TRY(); - } - } - /* Send query down to Datanodes */ if (exec_type == EXEC_ON_ALL_NODES || exec_type == EXEC_ON_DATANODES) { - for (i = 0; i < regular_conn_count; i++) + for (i = 0; i < dn_conn_count; i++) { PGXCNodeHandle *conn = pgxc_connections->datanode_handles[i]; @@ -4126,24 +3926,19 @@ ExecRemoteUtility(RemoteQuery *node) if (exec_type == EXEC_ON_ALL_NODES || exec_type == EXEC_ON_DATANODES) { - while (regular_conn_count > 0) + while (dn_conn_count > 0) { int i = 0; - pgxc_node_receive(regular_conn_count, pgxc_connections->datanode_handles, NULL); + pgxc_node_receive(dn_conn_count, pgxc_connections->datanode_handles, NULL); /* * Handle input from the data nodes. - * If we got a RESPONSE_DATAROW we can break handling to wrap - * it into a tuple and return. Handling will be continued upon - * subsequent invocations. - * If we got 0, we exclude connection from the list. We do not - * expect more input from it. In case of non-SELECT query we quit - * the loop when all nodes finish their work and send ReadyForQuery - * with empty connections array. + * We do not expect data nodes returning tuples when running utility + * command. * If we got EOF, move to the next connection, will receive more * data on the next iteration. */ - while (i < regular_conn_count) + while (i < dn_conn_count) { PGXCNodeHandle *conn = pgxc_connections->datanode_handles[i]; int res = handle_response(conn, remotestate); @@ -4153,9 +3948,9 @@ ExecRemoteUtility(RemoteQuery *node) } else if (res == RESPONSE_COMPLETE) { - if (i < --regular_conn_count) + if (i < --dn_conn_count) pgxc_connections->datanode_handles[i] = - pgxc_connections->datanode_handles[regular_conn_count]; + pgxc_connections->datanode_handles[dn_conn_count]; } else if (res == RESPONSE_TUPDESC) { @@ -4171,6 +3966,19 @@ ExecRemoteUtility(RemoteQuery *node) } } } + + /* + * We have processed all responses from the data nodes and if we have + * error message pending we can report it. All connections should be in + * consistent state now and can be released to the pool after rollback. + */ + if (remotestate->errorMessage) + { + char *code = remotestate->errorCode; + ereport(ERROR, + (errcode(MAKE_SQLSTATE(code[0], code[1], code[2], code[3], code[4])), + errmsg("%s", remotestate->errorMessage))); + } } /* Make the same for Coordinators */ @@ -4211,6 +4019,18 @@ ExecRemoteUtility(RemoteQuery *node) } } } + /* + * We have processed all responses from the data nodes and if we have + * error message pending we can report it. All connections should be in + * consistent state now and can be released to the pool after rollback. + */ + if (remotestate->errorMessage) + { + char *code = remotestate->errorCode; + ereport(ERROR, + (errcode(MAKE_SQLSTATE(code[0], code[1], code[2], code[3], code[4])), + errmsg("%s", remotestate->errorMessage))); + } } } @@ -4228,11 +4048,9 @@ PGXCNodeCleanAndRelease(int code, Datum arg) /* Rollback on GTM if transaction id opened. */ RollbackTranGTM((GlobalTransactionId) GetCurrentTransactionIdIfAny()); - - release_handles(true); - } else - /* Release data node connections */ - release_handles(false); + } + /* Release data node connections */ + release_handles(); /* Close connection with GTM */ CloseGTM(); @@ -4474,5 +4292,5 @@ PGXCNodeGetNodeList(PGXC_NodeId **datanodes, * and will be sent to GTM. */ if (!PersistentConnections) - release_handles(false); + release_handles(); } diff --git a/src/backend/pgxc/pool/pgxcnode.c b/src/backend/pgxc/pool/pgxcnode.c index fc63457..dafbec5 100644 --- a/src/backend/pgxc/pool/pgxcnode.c +++ b/src/backend/pgxc/pool/pgxcnode.c @@ -327,6 +327,8 @@ retry: if (read_status == EOF || read_status < 0) { + /* Can not read - no more actions, just discard connection */ + conn->state = DN_CONNECTION_STATE_ERROR_FATAL; add_error_message(conn, "unexpected EOF on datanode connection"); elog(WARNING, "unexpected EOF on datanode connection"); /* Should we read from the other connections before returning? */ @@ -577,12 +579,9 @@ get_message(PGXCNodeHandle *conn, int *len, char **msg) /* * Release all data node connections and coordinator connections * back to pool and release occupied memory - * - * If force_drop is true, we force dropping all of the connections, such as after - * a rollback, which was likely issued due to an error. */ void -release_handles(bool force_drop) +release_handles(void) { int i; int dn_discard[NumDataNodes]; @@ -604,7 +603,7 @@ release_handles(bool force_drop) if (handle->sock != NO_SOCKET) { - if (force_drop) + if (handle->state == DN_CONNECTION_STATE_ERROR_FATAL) dn_discard[dn_ndisc++] = handle->nodenum; else if (handle->state != DN_CONNECTION_STATE_IDLE) { @@ -622,7 +621,7 @@ release_handles(bool force_drop) if (handle->sock != NO_SOCKET) { - if (force_drop) + if (handle->state == DN_CONNECTION_STATE_ERROR_FATAL) co_discard[co_ndisc++] = handle->nodenum; else if (handle->state != DN_CONNECTION_STATE_IDLE) { @@ -899,19 +898,29 @@ pgxc_node_send_bind(PGXCNodeHandle * handle, const char *portal, const char *statement, int paramlen, char *params) { uint16 n16; + int pnameLen; + int stmtLen; + int paramCodeLen; + int paramValueLen; + int paramOutLen; + int msgLen; + + /* Invalid connection state, return error */ + if (handle->state != DN_CONNECTION_STATE_IDLE) + return EOF; + /* portal name size (allow NULL) */ - int pnameLen = portal ? strlen(portal) + 1 : 1; + pnameLen = portal ? strlen(portal) + 1 : 1; /* statement name size (allow NULL) */ - int stmtLen = statement ? strlen(statement) + 1 : 1; + stmtLen = statement ? strlen(statement) + 1 : 1; /* size of parameter codes array (always empty for now) */ - int paramCodeLen = 2; + paramCodeLen = 2; /* size of parameter values array, 2 if no params */ - int paramValueLen = paramlen ? paramlen : 2; + paramValueLen = paramlen ? paramlen : 2; /* size of output parameter codes array (always empty for now) */ - int paramOutLen = 2; - + paramOutLen = 2; /* size + pnameLen + stmtLen + parameters */ - int msgLen = 4 + pnameLen + stmtLen + paramCodeLen + paramValueLen + paramOutLen; + msgLen = 4 + pnameLen + stmtLen + paramCodeLen + paramValueLen + paramOutLen; /* msgType + msgLen */ if (ensure_out_buffer_capacity(handle->outEnd + 1 + msgLen, handle) != 0) @@ -970,11 +979,18 @@ int pgxc_node_send_describe(PGXCNodeHandle * handle, bool is_statement, const char *name) { + int nameLen; + int msgLen; + + /* Invalid connection state, return error */ + if (handle->state != DN_CONNECTION_STATE_IDLE) + return EOF; + /* statement or portal name size (allow NULL) */ - int nameLen = name ? strlen(name) + 1 : 1; + nameLen = name ? strlen(name) + 1 : 1; /* size + statement/portal + name */ - int msgLen = 4 + 1 + nameLen; + msgLen = 4 + 1 + nameLen; /* msgType + msgLen */ if (ensure_out_buffer_capacity(handle->outEnd + 1 + msgLen, handle) != 0) @@ -1191,10 +1207,16 @@ pgxc_node_flush(PGXCNodeHandle *handle) int pgxc_node_send_query(PGXCNodeHandle * handle, const char *query) { - int strLen = strlen(query) + 1; + int strLen; + int msgLen; + /* Invalid connection state, return error */ + if (handle->state != DN_CONNECTION_STATE_IDLE) + return EOF; + + strLen = strlen(query) + 1; /* size + strlen */ - int msgLen = 4 + strLen; + msgLen = 4 + strLen; /* msgType + msgLen */ if (ensure_out_buffer_capacity(handle->outEnd + 1 + msgLen, handle) != 0) @@ -1225,6 +1247,10 @@ pgxc_node_send_gxid(PGXCNodeHandle *handle, GlobalTransactionId gxid) int msglen = 8; int i32; + /* Invalid connection state, return error */ + if (handle->state != DN_CONNECTION_STATE_IDLE) + return EOF; + /* msgType + msgLen */ if (ensure_out_buffer_capacity(handle->outEnd + 1 + msglen, handle) != 0) { @@ -1254,6 +1280,10 @@ pgxc_node_send_snapshot(PGXCNodeHandle *handle, Snapshot snapshot) int nval; int i; + /* Invalid connection state, return error */ + if (handle->state != DN_CONNECTION_STATE_IDLE) + return EOF; + /* calculate message length */ msglen = 20; if (snapshot->xcnt > 0) @@ -1307,6 +1337,10 @@ pgxc_node_send_timestamp(PGXCNodeHandle *handle, TimestampTz timestamp) uint32 n32; int64 i = (int64) timestamp; + /* Invalid connection state, return error */ + if (handle->state != DN_CONNECTION_STATE_IDLE) + return EOF; + /* msgType + msgLen */ if (ensure_out_buffer_capacity(handle->outEnd + 1 + msglen, handle) != 0) { @@ -1533,7 +1567,9 @@ get_handles(List *datanodelist, List *coordlist, bool is_coord_only_query) list_free(dn_allocate); if (co_allocate) list_free(co_allocate); - return NULL; + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_RESOURCES), + errmsg("Failed to get pooled connections"))); } /* Initialisation for Datanodes */ if (dn_allocate) @@ -1607,46 +1643,29 @@ get_transaction_nodes(PGXCNodeHandle **connections, char client_conn_type, { for (i = 0; i < NumDataNodes; i++) { - /* - * We may want to consider also not returning connections with a - * state of DN_CONNECTION_STATE_ERROR_NOT_READY or - * DN_CONNECTION_STATE_ERROR_FATAL. - * ERROR_NOT_READY can happen if the data node abruptly disconnects. - */ - if (status_requested == HANDLE_IDLE) + if (dn_handles[i].sock != NO_SOCKET && dn_handles[i].state != DN_CONNECTION_STATE_ERROR_FATAL) { - if (dn_handles[i].sock != NO_SOCKET && dn_handles[i].transaction_status == 'I') + if (status_requested == HANDLE_IDLE && dn_handles[i].transaction_status == 'I') connections[tran_count++] = &dn_handles[i]; - } - else if (status_requested == HANDLE_ERROR) - { - if (dn_handles[i].transaction_status == 'E') + else if (status_requested == HANDLE_ERROR && dn_handles[i].transaction_status == 'E') connections[tran_count++] = &dn_handles[i]; - } - else - { - if (dn_handles[i].sock != NO_SOCKET && dn_handles[i].transaction_status != 'I') + else if (dn_handles[i].transaction_status != 'I') connections[tran_count++] = &dn_handles[i]; } } } + if (coord_count && client_conn_type == REMOTE_CONN_COORD) { for (i = 0; i < NumCoords; i++) { - if (status_requested == HANDLE_IDLE) + if (co_handles[i].sock != NO_SOCKET && co_handles[i].state != DN_CONNECTION_STATE_ERROR_FATAL) { - if (co_handles[i].sock != NO_SOCKET && co_handles[i].transaction_status == 'I') + if (status_requested == HANDLE_IDLE && co_handles[i].transaction_status == 'I') connections[tran_count++] = &co_handles[i]; - } - else if (status_requested == HANDLE_ERROR) - { - if (co_handles[i].transaction_status == 'E') - connections[tran_count++] = &co_handles[i]; - } - else - { - if (co_handles[i].sock != NO_SOCKET && co_handles[i].transaction_status != 'I') + else if (status_requested == HANDLE_ERROR && co_handles[i].transaction_status == 'E') + connections[tran_count++] = &co_handles[i]; + else if (co_handles[i].transaction_status != 'I') connections[tran_count++] = &co_handles[i]; } } @@ -1789,11 +1808,11 @@ pgxc_all_handles_send_query(PGXCNodeAllHandles *pgxc_handles, const char *buffer /* Send to Datanodes */ for (i = 0; i < dn_conn_count; i++) { - /* - * Clean connection if fetch in progress - */ - if (pgxc_handles->datanode_handles[i]->state == DN_CONNECTION_STATE_QUERY) - BufferConnection(pgxc_handles->datanode_handles[i]); + if (pgxc_handles->datanode_handles[i]->state != DN_CONNECTION_STATE_IDLE) + { + pgxc_handles->datanode_handles[i]->state = DN_CONNECTION_STATE_ERROR_FATAL; + continue; + } if (pgxc_node_send_query(pgxc_handles->datanode_handles[i], buffer)) { add_error_message(pgxc_handles->datanode_handles[i], "Can not send request"); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index c83563b..e61d444 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3615,7 +3615,7 @@ PostgresMain(int argc, char *argv[], const char *username) * Abort the current transaction in order to recover. */ #ifdef PGXC - /* + /* * Temporarily do not abort if we are already in an abort state. * This change tries to handle the case where the error data stack fills up. */ @@ -3657,7 +3657,6 @@ PostgresMain(int argc, char *argv[], const char *username) /* * Non-error queries loop here. */ - for (;;) { /* diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 327ba7c..ef5e218 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -30,6 +30,9 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "pgstat.h" +#ifdef PGXC +#include "pgxc/pgxc.h" +#endif #include "postmaster/autovacuum.h" #include "postmaster/postmaster.h" #include "storage/backendid.h" @@ -491,6 +494,18 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, */ on_shmem_exit(ShutdownPostgres, 0); +#ifdef PGXC + /* + * The transaction below consumes a xid, and we should let GTM know about + * that. Session being initializing now and value from the coordinator + * is not available, so try and connect to GTM directly + * The check for PostmasterPid is to detect --single mode as it runs + * under initdb. PostmasterPid is not set in this case + */ + if (!bootstrap && IS_PGXC_DATANODE && PostmasterPid) + SetForceXidFromGTM(true); +#endif + /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're @@ -655,6 +670,14 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, if (!bootstrap) CommitTransactionCommand(); +#ifdef PGXC + /* + * We changed the flag, set it back to default + */ + if (!bootstrap && IS_PGXC_DATANODE && PostmasterPid) + SetForceXidFromGTM(false); +#endif + return am_superuser; } diff --git a/src/include/pgxc/execRemote.h b/src/include/pgxc/execRemote.h index c5c45c0..8d7a348 100644 --- a/src/include/pgxc/execRemote.h +++ b/src/include/pgxc/execRemote.h @@ -118,7 +118,6 @@ extern void PGXCNodeImplicitCommitPrepared(GlobalTransactionId prepare_xid, GlobalTransactionId commit_xid, char *gid, bool is_commit); -extern void PGXCNodeConsumeMessages(void); /* Get list of nodes */ extern void PGXCNodeGetNodeList(PGXC_NodeId **datanodes, diff --git a/src/include/pgxc/pgxcnode.h b/src/include/pgxc/pgxcnode.h index 47b0b96..098a7f2 100644 --- a/src/include/pgxc/pgxcnode.h +++ b/src/include/pgxc/pgxcnode.h @@ -102,7 +102,7 @@ extern int PGXCNodeConnClean(NODE_CONNECTION * conn); extern void PGXCNodeCleanAndRelease(int code, Datum arg); extern PGXCNodeAllHandles *get_handles(List *datanodelist, List *coordlist, bool is_query_coord_only); -extern void release_handles(bool force_drop); +extern void release_handles(void); extern int get_transaction_nodes(PGXCNodeHandle ** connections, char client_conn_type, ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/pool/execRemote.c | 344 +++++++++--------------------------- src/backend/pgxc/pool/pgxcnode.c | 119 +++++++------ src/backend/tcop/postgres.c | 3 +- src/backend/utils/init/postinit.c | 23 +++ src/include/pgxc/execRemote.h | 1 - src/include/pgxc/pgxcnode.h | 2 +- 6 files changed, 175 insertions(+), 317 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-12-23 20:18:12
|
Project "Postgres-XC". The branch, master has been updated via 45e1d4e389e966d072aaf98a49d9702aa253d976 (commit) from 0ab9bbc7600c157618d566f4d9985399e446519d (commit) - Log ----------------------------------------------------------------- commit 45e1d4e389e966d072aaf98a49d9702aa253d976 Author: Mason Sharp <ma...@us...> Date: Thu Dec 23 15:10:38 2010 -0500 Add support for single-step prepared statements. Works for both named and unnamed prepared statements, works for PREPARE and EXECUTE commands. The Coordinator tracks a list of the prepared statements, and prepares them in turn on Data Nodes, and only on demand, when they are first executed on the target node(s). At the end of a transaction, if there are still prepared statements that exist for the session, the connections are not released to the pool. (We should do something similar for temporary tables.) This commit also changes an existing kluge with using the SQL string in some cases, and now deparses from the Query tree instead. Written by Andrei Martsinchyk, multi-step check added by me. diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index 58f9845..9ef6f05 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -33,7 +33,11 @@ #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/snapmgr.h" - +#ifdef PGXC +#include "pgxc/pgxc.h" +#include "pgxc/poolmgr.h" +#include "pgxc/execRemote.h" +#endif /* * The hash table in which prepared queries are stored. This is @@ -42,6 +46,14 @@ * (statement names); the entries are PreparedStatement structs. */ static HTAB *prepared_queries = NULL; +#ifdef PGXC +/* + * The hash table where datanode prepared statements are stored. + * The keys are statement names referenced from cached RemoteQuery nodes; the + * entries are DatanodeStatement structs + */ +static HTAB *datanode_queries = NULL; +#endif static void InitQueryHashTable(void); static ParamListInfo EvaluateParams(PreparedStatement *pstmt, List *params, @@ -147,6 +159,22 @@ PrepareQuery(PrepareStmt *stmt, const char *queryString) /* Generate plans for queries. */ plan_list = pg_plan_queries(query_list, 0, NULL); +#ifdef PGXC + /* + * Check if we are dealing with more than one step. + * Multi-step preapred statements are not yet supported. + * PGXCTODO - temporary - Once we add support, this code should be removed. + */ + if (IS_PGXC_COORDINATOR && plan_list && plan_list->head) + { + PlannedStmt *stmt = (PlannedStmt *) lfirst(plan_list->head); + + if (stmt->planTree->lefttree || stmt->planTree->righttree) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PSTATEMENT_DEFINITION), + errmsg("Multi-step Prepared Statements not yet supported"))); + } +#endif /* * Save the results. */ @@ -419,7 +447,76 @@ InitQueryHashTable(void) 32, &hash_ctl, HASH_ELEM); +#ifdef PGXC + if (IS_PGXC_COORDINATOR) + { + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + + hash_ctl.keysize = NAMEDATALEN; + hash_ctl.entrysize = sizeof(DatanodeStatement) + NumDataNodes * sizeof(int); + + datanode_queries = hash_create("Datanode Queries", + 64, + &hash_ctl, + HASH_ELEM); + } +#endif +} + +#ifdef PGXC +/* + * Assign the statement name for all the RemoteQueries in the plan tree, so + * they use datanode statements + */ +static int +set_remote_stmtname(Plan *plan, const char *stmt_name, int n) +{ + if (IsA(plan, RemoteQuery)) + { + DatanodeStatement *entry; + bool exists; + + char name[NAMEDATALEN]; + do + { + strcpy(name, stmt_name); + /* + * Append modifier. If resulting string is going to be truncated, + * truncate better the base string, otherwise we may enter endless + * loop + */ + if (n) + { + char modifier[NAMEDATALEN]; + sprintf(modifier, "__%d", n); + /* + * if position NAMEDATALEN - strlen(modifier) - 1 is beyond the + * base string this is effectively noop, otherwise it truncates + * the base string + */ + name[NAMEDATALEN - strlen(modifier) - 1] = '\0'; + strcat(name, modifier); + } + n++; + hash_search(datanode_queries, name, HASH_FIND, &exists); + } while (exists); + ((RemoteQuery *) plan)->statement = pstrdup(name); + entry = (DatanodeStatement *) hash_search(datanode_queries, + name, + HASH_ENTER, + NULL); + entry->nodenum = 0; + } + + if (innerPlan(plan)) + n = set_remote_stmtname(innerPlan(plan), stmt_name, n); + + if (outerPlan(plan)) + n = set_remote_stmtname(outerPlan(plan), stmt_name, n); + + return n; } +#endif /* * Store all the data pertaining to a query in the hash table using @@ -459,6 +556,25 @@ StorePreparedStatement(const char *stmt_name, errmsg("prepared statement \"%s\" already exists", stmt_name))); +#ifdef PGXC + if (IS_PGXC_COORDINATOR) + { + ListCell *lc; + int n; + + /* + * Scan the plans and set the statement field for all found RemoteQuery + * nodes so they use data node statements + */ + n = 0; + foreach(lc, stmt_list) + { + PlannedStmt *ps = (PlannedStmt *) lfirst(lc); + n = set_remote_stmtname(ps->planTree, stmt_name, n); + } + } +#endif + /* Create a plancache entry */ plansource = CreateCachedPlan(raw_parse_tree, query_string, @@ -840,3 +956,114 @@ build_regtype_array(Oid *param_types, int num_params) result = construct_array(tmp_ary, num_params, REGTYPEOID, 4, true, 'i'); return PointerGetDatum(result); } + + +#ifdef PGXC +DatanodeStatement * +FetchDatanodeStatement(const char *stmt_name, bool throwError) +{ + DatanodeStatement *entry; + + /* + * If the hash table hasn't been initialized, it can't be storing + * anything, therefore it couldn't possibly store our plan. + */ + if (datanode_queries) + entry = (DatanodeStatement *) hash_search(datanode_queries, + stmt_name, + HASH_FIND, + NULL); + else + entry = NULL; + + /* Report error if entry is not found */ + if (!entry && throwError) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_PSTATEMENT), + errmsg("datanode statement \"%s\" does not exist", + stmt_name))); + + return entry; +} + +/* + * Drop datanode statement and close it on nodes if active + */ +void +DropDatanodeStatement(const char *stmt_name) +{ + DatanodeStatement *entry; + + entry = FetchDatanodeStatement(stmt_name, false); + if (entry) + { + int i; + List *nodelist = NIL; + + /* make a List of integers from node numbers */ + for (i = 0; i < entry->nodenum; i++) + nodelist = lappend_int(nodelist, entry->nodes[i]); + entry->nodenum = 0; + + ExecCloseRemoteStatement(stmt_name, nodelist); + + hash_search(datanode_queries, entry->stmt_name, HASH_REMOVE, NULL); + } +} + + +/* + * Return true if there is at least one active datanode statement, so acquired + * datanode connections should not be released + */ +bool +HaveActiveDatanodeStatements(void) +{ + HASH_SEQ_STATUS seq; + DatanodeStatement *entry; + + /* nothing cached */ + if (!datanode_queries) + return false; + + /* walk over cache */ + hash_seq_init(&seq, datanode_queries); + while ((entry = hash_seq_search(&seq)) != NULL) + { + /* Stop walking and return true */ + if (entry->nodenum > 0) + { + hash_seq_term(&seq); + return true; + } + } + /* nothing found */ + return false; +} + + +/* + * Mark datanode statement as active on specified node + * Return true if statement has already been active on the node and can be used + * Returns falsee if statement has not been active on the node and should be + * prepared on the node + */ +bool +ActivateDatanodeStatementOnNode(const char *stmt_name, int node) +{ + DatanodeStatement *entry; + int i; + + /* find the statement in cache */ + entry = FetchDatanodeStatement(stmt_name, true); + + /* see if statement already active on the node */ + for (i = 0; i < entry->nodenum; i++) + if (entry->nodes[i] == node) + return true; + + /* statement is not active on the specified node append item to the list */ + entry->nodes[entry->nodenum++] = node; + return false; +} +#endif diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 3a65361..ec33781 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -793,6 +793,90 @@ ExecCopySlotMinimalTuple(TupleTableSlot *slot) slot->tts_isnull); } +#ifdef PGXC +/* -------------------------------- + * ExecCopySlotDatarow + * Obtain a copy of a slot's data row. The copy is + * palloc'd in the current memory context. + * Pointer to the datarow is returned as a var parameter, function + * returns the length of the data row + * The slot itself is undisturbed + * -------------------------------- + */ +int +ExecCopySlotDatarow(TupleTableSlot *slot, char **datarow) +{ + Assert(datarow); + + if (slot->tts_dataRow) + { + /* if we already have datarow make a copy */ + *datarow = (char *)palloc(slot->tts_dataLen); + memcpy(*datarow, slot->tts_dataRow, slot->tts_dataLen); + return slot->tts_dataLen; + } + else + { + TupleDesc tdesc = slot->tts_tupleDescriptor; + StringInfoData buf; + uint16 n16; + int i; + + initStringInfo(&buf); + /* Number of parameter values */ + n16 = htons(tdesc->natts); + appendBinaryStringInfo(&buf, (char *) &n16, 2); + + /* ensure we have all values */ + slot_getallattrs(slot); + for (i = 0; i < tdesc->natts; i++) + { + uint32 n32; + + if (slot->tts_isnull[i]) + { + n32 = htonl(-1); + appendBinaryStringInfo(&buf, (char *) &n32, 4); + } + else + { + Form_pg_attribute attr = tdesc->attrs[i]; + Oid typOutput; + bool typIsVarlena; + Datum pval; + char *pstring; + int len; + + /* Get info needed to output the value */ + getTypeOutputInfo(attr->atttypid, &typOutput, &typIsVarlena); + /* + * If we have a toasted datum, forcibly detoast it here to avoid + * memory leakage inside the type's output routine. + */ + if (typIsVarlena) + pval = PointerGetDatum(PG_DETOAST_DATUM(slot->tts_values[i])); + else + pval = slot->tts_values[i]; + + /* Convert Datum to string */ + pstring = OidOutputFunctionCall(typOutput, pval); + + /* copy data to the buffer */ + len = strlen(pstring); + n32 = htonl(len); + appendBinaryStringInfo(&buf, (char *) &n32, 4); + appendBinaryStringInfo(&buf, pstring, len); + } + } + /* copy data to the buffer */ + *datarow = palloc(buf.len); + memcpy(*datarow, buf.data, buf.len); + pfree(buf.data); + return buf.len; + } +} +#endif + /* -------------------------------- * ExecFetchSlotTuple * Fetch the slot's regular physical tuple. diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index b184987..ad227f4 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -839,12 +839,16 @@ _copyRemoteQuery(RemoteQuery *from) COPY_NODE_FIELD(distinct); COPY_SCALAR_FIELD(read_only); COPY_SCALAR_FIELD(force_autocommit); + COPY_STRING_FIELD(statement); COPY_STRING_FIELD(cursor); + COPY_SCALAR_FIELD(exec_type); + COPY_SCALAR_FIELD(paramval_data); + COPY_SCALAR_FIELD(paramval_len); COPY_STRING_FIELD(relname); COPY_SCALAR_FIELD(remotejoin); - COPY_SCALAR_FIELD(reduce_level); - COPY_NODE_FIELD(base_tlist); + COPY_SCALAR_FIELD(reduce_level); + COPY_NODE_FIELD(base_tlist); COPY_STRING_FIELD(outer_alias); COPY_STRING_FIELD(inner_alias); COPY_SCALAR_FIELD(outer_reduce_level); @@ -867,6 +871,9 @@ _copyExecNodes(ExecNodes *from) COPY_NODE_FIELD(nodelist); COPY_SCALAR_FIELD(baselocatortype); COPY_SCALAR_FIELD(tableusagetype); + COPY_NODE_FIELD(expr); + COPY_SCALAR_FIELD(relid); + COPY_SCALAR_FIELD(accesstype); return newnode; } @@ -2305,7 +2312,9 @@ _copyQuery(Query *from) COPY_NODE_FIELD(limitCount); COPY_NODE_FIELD(rowMarks); COPY_NODE_FIELD(setOperations); - +#ifdef PGXC + COPY_STRING_FIELD(sql_statement); +#endif return newnode; } diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index 790b81d..4442310 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -354,22 +354,15 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, case LOCATOR_TYPE_HASH: if (partValue != NULL) - { /* in prototype, all partitioned tables use same map */ exec_nodes->nodelist = lappend_int(NULL, get_node_from_hash(hash_range_int(*partValue))); - } else - { - /* If no info, go to node 1 */ if (accessType == RELATION_ACCESS_INSERT) + /* Insert NULL to node 1 */ exec_nodes->nodelist = lappend_int(NULL, 1); else - /* - * No partitioning value passed in - * (no where qualification on part column - use all) - */ + /* Use all nodes for other types of access */ exec_nodes->nodelist = list_copy(rel_loc_info->nodeList); - } break; case LOCATOR_TYPE_SINGLE: diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 8d900f1..1a56b44 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -58,6 +58,19 @@ typedef struct long constant; /* assume long PGXCTODO - should be Datum */ } Literal_Comparison; +/* + * Comparison of partitioned column and expression + * Expression can be evaluated at execution time to determine target nodes + */ +typedef struct +{ + Oid relid; + RelationLocInfo *rel_loc_info; + Oid attrnum; + char *col_name; + Expr *expr; /* assume long PGXCTODO - should be Datum */ +} Expr_Comparison; + /* Parent-Child joins for relations being joined on * their respective hash distribuion columns */ @@ -75,6 +88,7 @@ typedef struct typedef struct { List *partitioned_literal_comps; /* List of Literal_Comparison */ + List *partitioned_expressions; /* List of Expr_Comparison */ List *partitioned_parent_child; /* List of Parent_Child_Join */ List *replicated_joins; @@ -127,6 +141,7 @@ typedef struct XCWalkerContext Query *query; RelationAccessType accessType; RemoteQuery *query_step; /* remote query step being analized */ + PlannerInfo *root; /* planner data for the subquery */ Special_Conditions *conditions; bool multilevel_join; List *rtables; /* a pointer to a list of rtables */ @@ -144,11 +159,12 @@ bool StrictStatementChecking = true; /* Forbid multi-node SELECT statements with an ORDER BY clause */ bool StrictSelectChecking = false; -static void get_plan_nodes(Query *query, RemoteQuery *step, RelationAccessType accessType); +static void get_plan_nodes(PlannerInfo *root, RemoteQuery *step, RelationAccessType accessType); static bool get_plan_nodes_walker(Node *query_node, XCWalkerContext *context); static bool examine_conditions_walker(Node *expr_node, XCWalkerContext *context); static int handle_limit_offset(RemoteQuery *query_step, Query *query, PlannedStmt *plan_stmt); static void InitXCWalkerContext(XCWalkerContext *context); +static RemoteQuery *makeRemoteQuery(void); static void validate_part_col_updatable(const Query *query); static bool is_pgxc_safe_func(Oid funcid); @@ -307,6 +323,7 @@ free_special_relations(Special_Conditions *special_conditions) /* free all items in list, including Literal_Comparison struct */ list_free_deep(special_conditions->partitioned_literal_comps); + list_free_deep(special_conditions->partitioned_expressions); /* free list, but not items pointed to */ list_free(special_conditions->partitioned_parent_child); @@ -451,8 +468,9 @@ get_base_var(Var *var, XCWalkerContext *context) * then the caller should use the regular PG planner */ static void -get_plan_nodes_insert(Query *query, RemoteQuery *step) +get_plan_nodes_insert(PlannerInfo *root, RemoteQuery *step) { + Query *query = root->parse; RangeTblEntry *rte; RelationLocInfo *rel_loc_info; Const *constant; @@ -502,15 +520,15 @@ get_plan_nodes_insert(Query *query, RemoteQuery *step) if (sub_rte->rtekind == RTE_SUBQUERY && !sub_rte->subquery->limitCount && !sub_rte->subquery->limitOffset) - get_plan_nodes(sub_rte->subquery, step, RELATION_ACCESS_READ); + get_plan_nodes(root, step, RELATION_ACCESS_READ); } /* Send to general planner if the query is multiple step */ if (!step->exec_nodes) return; - /* If the source is not hash-based (eg, replicated) also send - * through general planner + /* If the source is not hash-based (eg, replicated) also send + * through general planner */ if (step->exec_nodes->baselocatortype != LOCATOR_TYPE_HASH) { @@ -612,7 +630,18 @@ get_plan_nodes_insert(Query *query, RemoteQuery *step) } if (checkexpr == NULL) - return; /* no constant */ + { + /* try and determine nodes on execution time */ + step->exec_nodes = makeNode(ExecNodes); + step->exec_nodes->baselocatortype = rel_loc_info->locatorType; + step->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; + step->exec_nodes->primarynodelist = NULL; + step->exec_nodes->nodelist = NULL; + step->exec_nodes->expr = eval_expr; + step->exec_nodes->relid = rel_loc_info->relid; + step->exec_nodes->accesstype = RELATION_ACCESS_INSERT; + return; + } constant = (Const *) checkexpr; @@ -788,7 +817,7 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) initStringInfo(&buf); /* Step 1: select tuple values by ctid */ - step1 = makeNode(RemoteQuery); + step1 = makeRemoteQuery(); appendStringInfoString(&buf, "SELECT "); for (att = 1; att <= natts; att++) { @@ -822,13 +851,11 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) appendStringInfo(&buf, " FROM %s WHERE ctid = '%s'", tableName, ctid_str); step1->sql_statement = pstrdup(buf.data); - step1->is_single_step = true; step1->exec_nodes = makeNode(ExecNodes); - step1->read_only = true; step1->exec_nodes->nodelist = list_make1_int(nodenum); /* Step 2: declare cursor for update target table */ - step2 = makeNode(RemoteQuery); + step2 = makeRemoteQuery(); resetStringInfo(&buf); appendStringInfoString(&buf, step->cursor); @@ -852,18 +879,14 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) } appendStringInfoString(&buf, "FOR UPDATE"); step2->sql_statement = pstrdup(buf.data); - step2->is_single_step = true; - step2->read_only = true; step2->exec_nodes = makeNode(ExecNodes); step2->exec_nodes->nodelist = list_copy(rel_loc_info1->nodeList); innerPlan(step2) = (Plan *) step1; /* Step 3: move cursor to first position */ - step3 = makeNode(RemoteQuery); + step3 = makeRemoteQuery(); resetStringInfo(&buf); appendStringInfo(&buf, "MOVE %s", node_cursor); step3->sql_statement = pstrdup(buf.data); - step3->is_single_step = true; - step3->read_only = true; step3->exec_nodes = makeNode(ExecNodes); step3->exec_nodes->nodelist = list_copy(rel_loc_info1->nodeList); innerPlan(step3) = (Plan *) step2; @@ -1024,7 +1047,7 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) if (!IsA(arg2, Const)) { /* this gets freed when the memory context gets freed */ - Expr *eval_expr = (Expr *) eval_const_expressions(NULL, (Node *) arg2); + Expr *eval_expr = (Expr *) eval_const_expressions(context->root, (Node *) arg2); checkexpr = get_numeric_constant(eval_expr); } @@ -1176,6 +1199,32 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) */ return false; } + /* + * Check if it is an expression like pcol = expr, where pcol is + * a partitioning column of the rel1 and planner could not + * evaluate expr. We probably can evaluate it at execution time. + * Save the expression, and if we do not have other hint, + * try and evaluate it at execution time + */ + rel_loc_info1 = GetRelationLocInfo(column_base->relid); + + if (!rel_loc_info1) + return true; + + if (IsHashColumn(rel_loc_info1, column_base->colname)) + { + Expr_Comparison *expr_comp = + palloc(sizeof(Expr_Comparison)); + + expr_comp->relid = column_base->relid; + expr_comp->rel_loc_info = rel_loc_info1; + expr_comp->col_name = column_base->colname; + expr_comp->expr = arg2; + context->conditions->partitioned_expressions = + lappend(context->conditions->partitioned_expressions, + expr_comp); + return false; + } } } } @@ -1599,24 +1648,19 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) } if (rtesave) - { /* a single table, just grab it */ rel_loc_info = GetRelationLocInfo(rtesave->relid); + } - if (!rel_loc_info) - return true; + /* have complex case */ + if (!rel_loc_info) + return true; - context->query_step->exec_nodes = GetRelationNodes(rel_loc_info, - NULL, - context->accessType); - } - } - else - { + if (rel_loc_info->locatorType != LOCATOR_TYPE_HASH) + /* do not need to determine partitioning expression */ context->query_step->exec_nodes = GetRelationNodes(rel_loc_info, NULL, context->accessType); - } /* Note replicated table usage for determining safe queries */ if (context->query_step->exec_nodes) @@ -1625,6 +1669,38 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) table_usage_type = TABLE_USAGE_TYPE_USER_REPLICATED; context->query_step->exec_nodes->tableusagetype = table_usage_type; + } else if (context->conditions->partitioned_expressions) { + /* probably we can determine nodes on execution time */ + foreach(lc, context->conditions->partitioned_expressions) { + Expr_Comparison *expr_comp = (Expr_Comparison *) lfirst(lc); + if (rel_loc_info->relid == expr_comp->relid) + { + context->query_step->exec_nodes = makeNode(ExecNodes); + context->query_step->exec_nodes->baselocatortype = + rel_loc_info->locatorType; + context->query_step->exec_nodes->tableusagetype = + TABLE_USAGE_TYPE_USER; + context->query_step->exec_nodes->primarynodelist = NULL; + context->query_step->exec_nodes->nodelist = NULL; + context->query_step->exec_nodes->expr = expr_comp->expr; + context->query_step->exec_nodes->relid = expr_comp->relid; + context->query_step->exec_nodes->accesstype = context->accessType; + break; + } + } + } else { + /* run query on all nodes */ + context->query_step->exec_nodes = makeNode(ExecNodes); + context->query_step->exec_nodes->baselocatortype = + rel_loc_info->locatorType; + context->query_step->exec_nodes->tableusagetype = + TABLE_USAGE_TYPE_USER; + context->query_step->exec_nodes->primarynodelist = NULL; + context->query_step->exec_nodes->nodelist = + list_copy(rel_loc_info->nodeList); + context->query_step->exec_nodes->expr = NULL; + context->query_step->exec_nodes->relid = NULL; + context->query_step->exec_nodes->accesstype = context->accessType; } } /* check for partitioned col comparison against a literal */ @@ -1712,6 +1788,7 @@ InitXCWalkerContext(XCWalkerContext *context) context->query = NULL; context->accessType = RELATION_ACCESS_READ; context->query_step = NULL; + context->root = NULL; context->conditions = (Special_Conditions *) palloc0(sizeof(Special_Conditions)); context->rtables = NIL; context->multilevel_join = false; @@ -1722,20 +1799,57 @@ InitXCWalkerContext(XCWalkerContext *context) context->join_list = NIL; } + +/* + * Create an instance of RemoteQuery and initialize fields + */ +static RemoteQuery * +makeRemoteQuery(void) +{ + RemoteQuery *result = makeNode(RemoteQuery); + result->is_single_step = true; + result->sql_statement = NULL; + result->exec_nodes = NULL; + result->combine_type = COMBINE_TYPE_NONE; + result->simple_aggregates = NIL; + result->sort = NULL; + result->distinct = NULL; + result->read_only = true; + result->force_autocommit = false; + result->cursor = NULL; + result->exec_type = EXEC_ON_DATANODES; + result->paramval_data = NULL; + result->paramval_len = 0; + + result->relname = NULL; + result->remotejoin = false; + result->partitioned_replicated = false; + result->reduce_level = 0; + result->base_tlist = NIL; + result->outer_alias = NULL; + result->inner_alias = NULL; + result->outer_reduce_level = 0; + result->inner_reduce_level = 0; + result->outer_relids = NULL; + result->inner_relids = NULL; + return result; +} + /* * Top level entry point before walking query to determine plan nodes * */ static void -get_plan_nodes(Query *query, RemoteQuery *step, RelationAccessType accessType) +get_plan_nodes(PlannerInfo *root, RemoteQuery *step, RelationAccessType accessType) { + Query *query = root->parse; XCWalkerContext context; - InitXCWalkerContext(&context); context.query = query; context.accessType = accessType; context.query_step = step; + context.root = root; context.rtables = lappend(context.rtables, query->rtable); if ((get_plan_nodes_walker((Node *) query, &context) @@ -1754,24 +1868,24 @@ get_plan_nodes(Query *query, RemoteQuery *step, RelationAccessType accessType) * */ static void -get_plan_nodes_command(Query *query, RemoteQuery *step) +get_plan_nodes_command(RemoteQuery *step, PlannerInfo *root) { - switch (query->commandType) + switch (root->parse->commandType) { case CMD_SELECT: - get_plan_nodes(query, step, query->rowMarks ? + get_plan_nodes(root, step, root->parse->rowMarks ? RELATION_ACCESS_READ_FOR_UPDATE : RELATION_ACCESS_READ); break; case CMD_INSERT: - get_plan_nodes_insert(query, step); + get_plan_nodes_insert(root, step); break; case CMD_UPDATE: case CMD_DELETE: /* treat as a select */ - get_plan_nodes(query, step, RELATION_ACCESS_UPDATE); + get_plan_nodes(root, step, RELATION_ACCESS_UPDATE); break; default: @@ -2589,9 +2703,41 @@ PlannedStmt * pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams) { PlannedStmt *result; - Plan *standardPlan; + PlannerGlobal *glob; + PlannerInfo *root; RemoteQuery *query_step; + StringInfoData buf; + /* + * Set up global state for this planner invocation. This data is needed + * across all levels of sub-Query that might exist in the given command, + * so we keep it in a separate struct that's linked to by each per-Query + * PlannerInfo. + */ + glob = makeNode(PlannerGlobal); + + glob->boundParams = boundParams; + glob->paramlist = NIL; + glob->subplans = NIL; + glob->subrtables = NIL; + glob->rewindPlanIDs = NULL; + glob->finalrtable = NIL; + glob->relationOids = NIL; + glob->invalItems = NIL; + glob->lastPHId = 0; + glob->transientPlan = false; + + /* Create a PlannerInfo data structure, usually it is done for a subquery */ + root = makeNode(PlannerInfo); + root->parse = query; + root->glob = glob; + root->query_level = 1; + root->parent_root = NULL; + root->planner_cxt = CurrentMemoryContext; + root->init_plans = NIL; + root->cte_plan_ids = NIL; + root->eq_classes = NIL; + root->append_rel_list = NIL; /* build the PlannedStmt result */ result = makeNode(PlannedStmt); @@ -2603,184 +2749,151 @@ pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams) result->intoClause = query->intoClause; result->rtable = query->rtable; - query_step = makeNode(RemoteQuery); - query_step->is_single_step = false; + query_step = makeRemoteQuery(); + + /* Optimize multi-node handling */ + query_step->read_only = query->commandType == CMD_SELECT; if (query->utilityStmt && IsA(query->utilityStmt, DeclareCursorStmt)) cursorOptions |= ((DeclareCursorStmt *) query->utilityStmt)->options; - query_step->exec_nodes = NULL; - query_step->combine_type = COMBINE_TYPE_NONE; - query_step->simple_aggregates = NULL; - /* Optimize multi-node handling */ - query_step->read_only = query->commandType == CMD_SELECT; - query_step->force_autocommit = false; - result->planTree = (Plan *) query_step; - /* - * Determine where to execute the command, either at the Coordinator - * level, Data Nodes, or both. By default we choose both. We should be - * able to quickly expand this for more commands. - */ - switch (query->commandType) - { - case CMD_SELECT: - /* Perform some checks to make sure we can support the statement */ - if (query->intoClause) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("INTO clause not yet supported")))); - /* fallthru */ - case CMD_INSERT: - case CMD_UPDATE: - case CMD_DELETE: - /* PGXCTODO: This validation will not be removed - * until we support moving tuples from one node to another - * when the partition column of a table is updated - */ - if (query->commandType == CMD_UPDATE) - validate_part_col_updatable(query); - - if (query->returningList) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("RETURNING clause not yet supported")))); - - /* Set result relations */ - if (query->commandType != CMD_SELECT) - result->resultRelations = list_make1_int(query->resultRelation); + /* Perform some checks to make sure we can support the statement */ + if (query->commandType == CMD_SELECT && query->intoClause) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("INTO clause not yet supported")))); - get_plan_nodes_command(query, query_step); + /* PGXCTODO: This validation will not be removed + * until we support moving tuples from one node to another + * when the partition column of a table is updated + */ + if (query->commandType == CMD_UPDATE) + validate_part_col_updatable(query); - if (query_step->exec_nodes == NULL) - { - /* Do not yet allow multi-node correlated UPDATE or DELETE */ - if (query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) - { - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("UPDATE and DELETE that are correlated or use non-immutable functions not yet supported")))); - } + if (query->returningList) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("RETURNING clause not yet supported")))); - /* - * Processing guery against catalog tables, or multi-step command. - * Run through standard planner - */ - result = standard_planner(query, cursorOptions, boundParams); - return result; - } + /* Set result relations */ + if (query->commandType != CMD_SELECT) + result->resultRelations = list_make1_int(query->resultRelation); - /* Do not yet allow multi-node correlated UPDATE or DELETE */ - if ((query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) - && !query_step->exec_nodes - && list_length(query->rtable) > 1) - { - result = standard_planner(query, cursorOptions, boundParams); - return result; - } + get_plan_nodes_command(query_step, root); - /* - * get_plan_nodes_command may alter original statement, so do not - * process it before the call - * - * Declare Cursor case: - * We should leave as a step query only SELECT statement - * Further if we need refer source statement for planning we should take - * the truncated string - */ - if (query->utilityStmt && - IsA(query->utilityStmt, DeclareCursorStmt)) - { + if (query_step->exec_nodes == NULL) + { + /* Do not yet allow multi-node correlated UPDATE or DELETE */ + if (query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) + { + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("UPDATE and DELETE that are correlated or use non-immutable functions not yet supported")))); + } - /* search for SELECT keyword in the normalized string */ - char *select = strpos(query->sql_statement, " SELECT "); - /* Take substring of the original string using found offset */ - query_step->sql_statement = pstrdup(select + 1); - } - else - query_step->sql_statement = pstrdup(query->sql_statement); + /* + * Processing guery against catalog tables, or multi-step command. + * Run through standard planner + */ + result = standard_planner(query, cursorOptions, boundParams); + return result; + } - /* - * If there already is an active portal, we may be doing planning - * within a function. Just use the standard plan, but check if - * it is part of an EXPLAIN statement so that we do not show that - * we plan multiple steps when it is a single-step operation. - */ - if (ActivePortal && strcmp(ActivePortal->commandTag, "EXPLAIN")) - return standard_planner(query, cursorOptions, boundParams); + /* Do not yet allow multi-node correlated UPDATE or DELETE */ + if ((query->commandType == CMD_UPDATE || query->commandType == CMD_DELETE) + && !query_step->exec_nodes + && list_length(query->rtable) > 1) + { + result = standard_planner(query, cursorOptions, boundParams); + return result; + } - query_step->is_single_step = true; - /* - * PGXCTODO - * When Postgres runs insert into t (a) values (1); against table - * defined as create table t (a int, b int); the plan is looking - * like insert into t (a,b) values (1,null); - * Later executor is verifying plan, to make sure table has not - * been altered since plan has been created and comparing table - * definition with plan target list and output error if they do - * not match. - * I could not find better way to generate targetList for pgxc plan - * then call standard planner and take targetList from the plan - * generated by Postgres. - */ - query_step->scan.plan.targetlist = query->targetList; + /* + * Deparse query tree to get step query. It may be modified later on + */ + initStringInfo(&buf); + deparse_query(query, &buf, NIL); + query_step->sql_statement = pstrdup(buf.data); + pfree(buf.data); - if (query_step->exec_nodes) - query_step->combine_type = get_plan_combine_type( - query, query_step->exec_nodes->baselocatortype); + query_step->is_single_step = true; + /* + * PGXCTODO + * When Postgres runs insert into t (a) values (1); against table + * defined as create table t (a int, b int); the plan is looking + * like insert into t (a,b) values (1,null); + * Later executor is verifying plan, to make sure table has not + * been altered since plan has been created and comparing table + * definition with plan target list and output error if they do + * not match. + * I could not find better way to generate targetList for pgxc plan + * then call standard planner and take targetList from the plan + * generated by Postgres. + */ + query_step->scan.plan.targetlist = query->targetList; - /* Set up simple aggregates */ - /* PGXCTODO - we should detect what types of aggregates are used. - * in some cases we can avoid the final step and merely proxy results - * (when there is only one data node involved) instead of using - * coordinator consolidation. At the moment this is needed for AVG() - */ - query_step->simple_aggregates = get_simple_aggregates(query); + if (query_step->exec_nodes) + query_step->combine_type = get_plan_combine_type( + query, query_step->exec_nodes->baselocatortype); + + /* Set up simple aggregates */ + /* PGXCTODO - we should detect what types of aggregates are used. + * in some cases we can avoid the final step and merely proxy results + * (when there is only one data node involved) instead of using + * coordinator consolidation. At the moment this is needed for AVG() + */ + query_step->simple_aggregates = get_simple_aggregates(query); - /* - * Add sorting to the step - */ - if (list_length(query_step->exec_nodes->nodelist) > 1 && - (query->sortClause || query->distinctClause)) - make_simple_sort_from_sortclauses(query, query_step); + /* + * Add sorting to the step + */ + if (list_length(query_step->exec_nodes->nodelist) > 1 && + (query->sortClause || query->distinctClause)) + make_simple_sort_from_sortclauses(query, query_step); - /* Handle LIMIT and OFFSET for single-step queries on multiple nodes */ - if (handle_limit_offset(query_step, query, result)) - { - /* complicated expressions, just fallback to standard plan */ - result = standard_planner(query, cursorOptions, boundParams); - return result; - } + /* Handle LIMIT and OFFSET for single-step queries on multiple nodes */ + if (handle_limit_offset(query_step, query, result)) + { + /* complicated expressions, just fallback to standard plan */ + result = standard_planner(query, cursorOptions, boundParams); + return result; + } - /* - * Use standard plan if we have more than one data node with either - * group by, hasWindowFuncs, or hasRecursive - */ - /* - * PGXCTODO - this could be improved to check if the first - * group by expression is the partitioning column, in which - * case it is ok to treat as a single step. - */ - if (query->commandType == CMD_SELECT - && query_step->exec_nodes - && list_length(query_step->exec_nodes->nodelist) > 1 - && (query->groupClause || query->hasWindowFuncs || query->hasRecursive)) - { - result->planTree = standardPlan; - return result; - } - break; + /* + * Use standard plan if we have more than one data node with either + * group by, hasWindowFuncs, or hasRecursive + */ + /* + * PGXCTODO - this could be improved to check if the first + * group by expression is the partitioning column, in which + * case it is ok to treat as a single step. + */ + if (query->commandType == CMD_SELECT + && query_step->exec_nodes + && list_length(query_step->exec_nodes->nodelist) > 1 + && (query->groupClause || query->hasWindowFuncs || query->hasRecursive)) + { + result = standard_planner(query, cursorOptions, boundParams); + return result; + } - default: - /* Allow for override */ - if (StrictStatementChecking) - ereport(ERROR, - (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), - (errmsg("This command is not yet supported.")))); - else - result->planTree = standardPlan; + /* Allow for override */ + /* AM: Is this ever possible? */ + if (query->commandType != CMD_SELECT && + query->commandType != CMD_INSERT && + query->commandType != CMD_UPDATE && + query->commandType != CMD_DELETE) + { + if (StrictStatementChecking) + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("This command is not yet supported.")))); + else + result = standard_planner(query, cursorOptions, boundParams); + return result; } /* @@ -2808,6 +2921,13 @@ pgxc_planner(Query *query, int cursorOptions, ParamListInfo boundParams) } /* + * Assume single step. If there are multiple steps we should make up + * parameters for each step where they referenced + */ + if (boundParams) + query_step->paramval_len = ParamListToDataRow(boundParams, + &query_step->paramval_data); + /* * If query is FOR UPDATE fetch CTIDs from the remote node * Use CTID as a key to update tuples on remote nodes when handling * WHERE CURRENT OF @@ -3068,7 +3188,7 @@ validate_part_col_updatable(const Query *query) * * Based on is_immutable_func from postgresql_fdw.c * We add an exeption for base postgresql functions, to - * allow now() and others to still execute as part of single step + * allow now() and others to still execute as part of single step * queries. * * PGXCTODO - we currently make the false assumption that immutable diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index b954003..a387354 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -19,6 +19,7 @@ #include "postgres.h" #include "access/gtm.h" #include "access/xact.h" +#include "commands/prepare.h" #include "executor/executor.h" #include "gtm/gtm_c.h" #include "libpq/libpq.h" @@ -27,6 +28,7 @@ #include "pgxc/poolmgr.h" #include "storage/ipc.h" #include "utils/datum.h" +#include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/tuplesort.h" #include "utils/snapmgr.h" @@ -38,9 +40,6 @@ #define DATA_NODE_FETCH_SIZE 1 - -extern char *deparseSql(RemoteQueryState *scanstate); - /* * Buffer size does not affect performance significantly, just do not allow * connection buffer grows infinitely @@ -62,6 +61,9 @@ static int pgxc_node_rollback_prepared(GlobalTransactionId gxid, GlobalTransacti PGXCNodeAllHandles * pgxc_handles, char *gid); static int pgxc_node_commit_prepared(GlobalTransactionId gxid, GlobalTransactionId prepared_gxid, PGXCNodeAllHandles * pgxc_handles, char *gid); +static PGXCNodeAllHandles * get_exec_connections(RemoteQueryState *planstate, + ExecNodes *exec_nodes, + RemoteQueryExecType exec_type); static int pgxc_node_implicit_commit_prepared(GlobalTransactionId prepare_xid, GlobalTransactionId commit_xid, PGXCNodeAllHandles * pgxc_handles, @@ -70,8 +72,6 @@ static int pgxc_node_implicit_commit_prepared(GlobalTransactionId prepare_xid, static int pgxc_node_implicit_prepare(GlobalTransactionId prepare_xid, PGXCNodeAllHandles * pgxc_handles, char *gid); -static PGXCNodeAllHandles * get_exec_connections(ExecNodes *exec_nodes, - RemoteQueryExecType exec_type); static int pgxc_node_receive_and_validate(const int conn_count, PGXCNodeHandle ** connections, bool reset_combiner); @@ -1265,10 +1265,10 @@ handle_response(PGXCNodeHandle * conn, RemoteQueryState *combiner) /* * If we are in the process of shutting down, we - * may be rolling back, and the buffer may contain other messages. - * We want to avoid a procarray exception - * as well as an error stack overflow. - */ + * may be rolling back, and the buffer may contain other messages. + * We want to avoid a procarray exception + * as well as an error stack overflow. + */ if (proc_exit_inprogress) conn->state = DN_CONNECTION_STATE_ERROR_FATAL; @@ -1364,10 +1364,11 @@ handle_response(PGXCNodeHandle * conn, RemoteQueryState *combiner) /* sync lost? */ elog(WARNING, "Received unsupported message type: %c", msg_type); conn->state = DN_CONNECTION_STATE_ERROR_FATAL; - return RESPONSE_EOF; + /* stop reading */ + return RESPONSE_COMPLETE; } } - + /* never happen, but keep compiler quiet */ return RESPONSE_EOF; } @@ -2746,7 +2747,6 @@ RemoteQueryState * ExecInitRemoteQuery(RemoteQuery *node, EState *estate, int eflags) { RemoteQueryState *remotestate; - Relation currentRelation; remotestate = CreateResponseCombiner(0, node->combine_type); remotestate->ss.ps.plan = (Plan *) node; @@ -2788,6 +2788,19 @@ ExecInitRemoteQuery(RemoteQuery *node, EState *estate, int eflags) ALLOCSET_DEFAULT_MAXSIZE); } + /* + * If we have parameter values here and planner has not had them we + * should prepare them now + */ + if (estate->es_param_list_info && !node->paramval_data) + node->paramval_len = ParamListToDataRow(estate->es_param_list_info, + &node->paramval_data); + + /* We need expression context to evaluate */ + if (node->exec_nodes && node->exec_nodes->expr) + ExecAssignExprContext(estate, &remotestate->ss.ps); + + if (innerPlan(node)) innerPlanState(remotestate) = ExecInitNode(innerPlan(node), estate, eflags); @@ -2853,7 +2866,8 @@ copy_slot(RemoteQueryState *node, TupleTableSlot *src, TupleTableSlot *dst) * Datanodes Only, Coordinators only or both types */ static PGXCNodeAllHandles * -get_exec_connections(ExecNodes *exec_nodes, +get_exec_connections(RemoteQueryState *planstate, + ExecNodes *exec_nodes, RemoteQueryExecType exec_type) { List *nodelist = NIL; @@ -2873,8 +2887,34 @@ get_exec_connections(ExecNodes *exec_nodes, if (exec_nodes) { - nodelist = exec_nodes->nodelist; - primarynode = exec_nodes->primarynodelist; + if (exec_nodes->expr) + { + /* execution time determining of target data nodes */ + bool isnull; + ExprState *estate = ExecInitExpr(exec_nodes->expr, + (PlanState *) planstate); + Datum partvalue = ExecEvalExpr(estate, + planstate->ss.ps.ps_ExprContext, + &isnull, + NULL); + if (!isnull) + { + RelationLocInfo *rel_loc_info = GetRelationLocInfo(exec_nodes->relid); + ExecNodes *nodes = GetRelationNodes(rel_loc_info, + (long *) &partvalue, + exec_nodes->accesstype); + if (nodes) + { + nodelist = nodes->nodelist; + primarynode = nodes->primarynodelist; + pfree(nodes); + } + FreeRelationLocInfo(rel_loc_info); + } + } else { + nodelist = exec_nodes->nodelist; + primarynode = exec_nodes->primarynodelist; + } } if (list_length(nodelist) == 0 && @@ -2961,212 +3001,273 @@ register_write_nodes(int conn_count, PGXCNodeHandle **connections) } } -/* - * Execute step of PGXC plan. - * The step specifies a command to be executed on specified nodes. - * On first invocation connections to the data nodes are initialized and - * command is executed. Further, as well as within subsequent invocations, - * responses are received until step is completed or there is a tuple to emit. - * If there is a tuple it is returned, otherwise returned NULL. The NULL result - * from the function indicates completed step. - * The function returns at most one tuple per invocation. - */ -TupleTableSlot * -ExecRemoteQuery(RemoteQueryState *node) + +static void +do_query(RemoteQueryState *node) { RemoteQuery *step = (RemoteQuery *) node->ss.ps.plan; - TupleTableSlot *resultslot = node->ss.ps.ps_ResultTupleSlot; TupleTableSlot *scanslot = node->ss.ss_ScanTupleSlot; - bool have_tuple = false; + bool force_autocommit = step->force_autocommit; + bool is_read_only = step->read_only; + GlobalTransactionId gxid = InvalidGlobalTransactionId; + Snapshot snapshot = GetActiveSnapshot(); + TimestampTz timestamp = GetCurrentGTMStartTimestamp(); + PGXCNodeHandle **connections = NULL; + PGXCNodeHandle *primaryconnection = NULL; + int i; + int regular_conn_count; + int total_conn_count; + bool need_tran; + PGXCNodeAllHandles *pgxc_connections; + /* + * Get connections for Datanodes only, utilities and DDLs + * are launched in ExecRemoteUtility + */ + pgxc_connections = get_exec_connections(node, step->exec_nodes, + EXEC_ON_DATANODES); - if (!node->query_Done) + connections = pgxc_connections->datanode_handles; + primaryconnection = pgxc_connections->primary_handle; + total_conn_count = regular_conn_count = pgxc_connections->dn_conn_count; + + /* + * Primary connection is counted separately but is included in total_conn_count if used. + */ + if (primaryconnection) { - /* First invocation, initialize */ - bool force_autocommit = step->force_autocommit; - bool is_read_only = step->read_only; - GlobalTransactionId gxid = InvalidGlobalTransactionId; - Snapshot snapshot = GetActiveSnapshot(); - TimestampTz timestamp = GetCurrentGTMStartTimestamp(); - PGXCNodeHandle **connections = NULL; - PGXCNodeHandle *primaryconnection = NULL; - int i; - int regular_conn_count; - int total_conn_count; - bool need_tran; - PGXCNodeAllHandles *pgxc_connections; - TupleTableSlot *innerSlot = NULL; - - implicit_force_autocommit = force_autocommit; + regular_conn_count--; + } - /* - * Inner plan for RemoteQuery supplies parameters. - * We execute inner plan to get a tuple and use values of the tuple as - * parameter values when executing this remote query. - * If returned slot contains NULL tuple break execution. - * TODO there is a problem how to handle the case if both inner and - * outer plans exist. We can decide later, since it is never used now. - */ - if (innerPlanState(node)) - { - innerSlot = ExecProcNode(innerPlanState(node)); -// if (TupIsNull(innerSlot)) -// return innerSlot; - } + pfree(pgxc_connections); - /* - * Get connections for Datanodes only, utilities and DDLs - * are launched in ExecRemoteUtility - */ - pgxc_connections = get_exec_connections(step->exec_nodes, - EXEC_ON_DATANODES); + /* + * We save only regular connections, at the time we exit the function + * we finish with the primary connection and deal only with regular + * connections on subsequent invocations + */ + node->node_count = regular_conn_count; - connections = pgxc_connections->datanode_handles; - primaryconnection = pgxc_connections->primary_handle; - total_conn_count = regular_conn_count = pgxc_connections->dn_conn_count; + if (force_autocommit) + need_tran = false; + else + need_tran = !autocommit || (!is_read_only && total_conn_count > 1); - /* - * Primary connection is counted separately but is included in total_conn_count if used. - */ + elog(DEBUG1, "autocommit = %s, has primary = %s, regular_conn_count = %d, need_tran = %s", autocommit ? "true" : "false", primaryconnection ? "true" : "false", regular_conn_count, need_tran ? "true" : "false"); + + stat_statement(); + if (autocommit) + { + stat_transaction(total_conn_count); + /* We normally clear for transactions, but if autocommit, clear here, too */ + clear_write_node_list(); + } + + if (!is_read_only) + { if (primaryconnection) - { - regular_conn_count--; - } + register_write_nodes(1, &primaryconnection); + register_write_nodes(regular_conn_count, connections); + } + + gxid = GetCurrentGlobalTransactionId(); - pfree(pgxc_connections); + if (!GlobalTransactionIdIsValid(gxid)) + { + if (primaryconnection) + pfree(primaryconnection); + pfree(connections); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Failed to get next transaction ID"))); + } + if (need_tran) + { /* - * We save only regular connections, at the time we exit the function - * we finish with the primary connection and deal only with regular - * connections on subsequent invocations + * Check if data node connections are in transaction and start + * transactions on nodes where it is not started */ - node->node_count = regular_conn_count; + PGXCNodeHandle *new_connections[total_conn_count]; + int new_count = 0; - if (force_autocommit) - need_tran = false; - else - need_tran = !autocommit || (!is_read_only && total_conn_count > 1); + if (primaryconnection && primaryconnection->transaction_status != 'T') + new_connections[new_count++] = primaryconnection; + for (i = 0; i < regular_conn_count; i++) + if (connections[i]->transaction_status != 'T') + new_connections[new_count++] = connections[i]; + + if (new_count && pgxc_node_begin(new_count, new_connections, gxid)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Could not begin transaction on data nodes."))); + } - elog(DEBUG1, "autocommit = %s, has primary = %s, regular_conn_count = %d, need_tran = %s", autocommit ? "true" : "false", primaryconnection ? "true" : "false", regular_conn_count, need_tran ? "true" : "false"); + /* See if we have a primary node, execute on it first before the others */ + if (primaryconnection) + { + if (primaryconnection->state == DN_CONNECTION_STATE_QUERY) + BufferConnection(primaryconnection); - stat_statement(); - if (autocommit) + /* If explicit transaction is needed gxid is already sent */ + if (!need_tran && pgxc_node_send_gxid(primaryconnection, gxid)) { - stat_transaction(total_conn_count); - /* We normally clear for transactions, but if autocommit, clear here, too */ - clear_write_node_list(); + pfree(connections); + pfree(primaryconnection); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Failed to send command to data nodes"))); } - - if (!is_read_only) + if (total_conn_count == 1 && pgxc_node_send_timestamp(primaryconnection, timestamp)) { - if (primaryconnection) - register_write_nodes(1, &primaryconnection); - register_write_nodes(regular_conn_count, connections); + /* + * If a transaction involves multiple connections timestamp is + * always sent down to Datanodes with pgxc_node_begin. + * An autocommit transaction needs the global timestamp also, + * so handle this case here. + */ + pfree(connections); + pfree(primaryconnection); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Failed to send command to data nodes"))); } - - gxid = GetCurrentGlobalTransactionId(); - - if (!GlobalTransactionIdIsValid(gxid)) + if (snapshot && pgxc_node_send_snapshot(primaryconnection, snapshot)) { - if (primaryconnection) - pfree(primaryconnection); pfree(connections); + pfree(primaryconnection); ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to get next transaction ID"))); + errmsg("Failed to send command to data nodes"))); } - - if (need_tran) + if (step->statement || step->cursor || step->paramval_data) { + /* need to use Extended Query Protocol */ + int fetch = 0; + bool prepared = false; + + /* if prepared statement is referenced see if it is already exist */ + if (step->statement) + prepared = ActivateDatanodeStatementOnNode(step->statement, + primaryconnection->nodenum); /* - * Check if data node connections are in transaction and start - * transactions on nodes where it is not started + * execute and fetch rows only if they will be consumed + * immediately by the sorter */ - PGXCNodeHandle *new_connections[total_conn_count]; - int new_count = 0; - - if (primaryconnection && primaryconnection->transaction_status != 'T') - new_connections[new_count++] = primaryconnection; - for (i = 0; i < regular_conn_count; i++) - if (connections[i]->transaction_status != 'T') - new_connections[new_count++] = connections[i]; - - if (new_count && pgxc_node_begin(new_count, new_connections, gxid)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Could not begin transaction on data nodes."))); - } - - /* See if we have a primary node, execute on it first before the others */ - if (primaryconnection) - { - if (primaryconnection->state == DN_CONNECTION_STATE_QUERY) - BufferConnection(primaryconnection); - - /* If explicit transaction is needed gxid is already sent */ - if (!need_tran && pgxc_node_send_gxid(primaryconnection, gxid)) + if (step->cursor) + fetch = 1; + + if (pgxc_node_send_query_extended(primaryconnection, + prepared ? NULL : step->sql_statement, + step->statement, + step->cursor, + step->paramval_len, + step->paramval_data, + step->read_only, + fetch) != 0) { pfree(connections); - pfree(primaryconnection); + if (primaryconnection) + pfree(primaryconnection); ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("Failed to send command to data nodes"))); } - if (total_conn_count == 1 && pgxc_node_send_timestamp(primaryconnection, timestamp)) + } + else + { + if (pgxc_node_send_query(primaryconnection, step->sql_statement) != 0) { - /* - * If a transaction involves multiple connections timestamp is - * always sent down to Datanodes with pgxc_node_begin. - * An autocommit transaction needs the global timestamp also, - * so handle this case here. - */ pfree(connections); pfree(primaryconnection); ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("Failed to send command to data nodes"))); } - if (snapshot && pgxc_node_send_snapshot(primaryconnection, snapshot)) - { - pfree(connections); - pfree(primaryconnection); + } + primaryconnection->combiner = node; + Assert(node->combine_type == COMBINE_TYPE_SAME); + + while (node->command_complete_count < 1) + { + if (pgxc_node_receive(1, &primaryconnection, NULL)) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to send command to data nodes"))); - } - if (pgxc_node_send_query(primaryconnection, step->sql_statement) != 0) + errmsg("Failed to read response from data nodes"))); + handle_response(primaryconnection, node); + if (node->errorMessage) { - pfree(connections); - pfree(primaryconnection); + char *code = node->errorCode; ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to send command to data nodes"))); - } - Assert(node->combine_type == COMBINE_TYPE_SAME); - - while (node->command_complete_count < 1) - { - if (pgxc_node_receive(1, &primaryconnection, NULL)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to read response from data nodes"))); - handle_response(primaryconnection, node); - if (node->errorMessage) - { - char *code = node->errorCode; - ereport(ERROR, - (errcode(MAKE_SQLSTATE(code[0], code[1], code[2], code[3], code[4])), - errmsg("%s", node->errorMessage))); - } + (errcode(MAKE_SQLSTATE(code[0], code[1], code[2], code[3], code[4])), + errmsg("%s", node->errorMessage))); } } + } - for (i = 0; i < regular_conn_count; i++) + for (i = 0; i < regular_conn_count; i++) + { + if (connections[i]->state == DN_CONNECTION_STATE_QUERY) + BufferConnection(connections[i]); + /* If explicit transaction is needed gxid is already sent */ + if (!need_tran && pgxc_node_send_gxid(connections[i], gxid)) { - if (connections[i]->state == DN_CONNECTION_STATE_QUERY) - BufferConnection(connections[i]); - /* If explicit transaction is needed gxid is already sent */ - if (!need_tran && pgxc_node_send_gxid(connections[i], gxid)) + pfree(connections); + if (primaryconnection) + pfree(primaryconnection); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Failed to send command to data nodes"))); + } + if (total_conn_count == 1 && pgxc_node_send_timestamp(connections[i], timestamp)) + { + /* + * If a transaction involves multiple connections timestamp is + * always sent down to Datanodes with pgxc_node_begin. + * An autocommit transaction needs the global timestamp also, + * so handle this case here. + */ + pfree(connections); + if (primaryconnection) + pfree(primaryconnection); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Failed to send command to data nodes"))); + } + if (snapshot && pgxc_node_send_snapshot(connections[i], snapshot)) + { + pfree(connections); + if (primaryconnection) + pfree(primaryconnection); + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Failed to send command to data nodes"))); + } + if (step->statement || step->cursor || step->paramval_data) + { + /* need to use Extended Query Protocol */ + int fetch = 0; + bool prepared = false; + + /* if prepared statement is referenced see if it is already exist */ + if (step->statement) + prepared = ActivateDatanodeStatementOnNode(step->statement, + connections[i]->nodenum); + /* + * execute and fetch rows only if they will be consumed + * immediately by the sorter + */ + if (step->cursor) + fetch = 1; + + if (pgxc_node_send_query_extended(connections[i], + prepared ? NULL : step->sql_statement, + step->statement, + step->cursor, + step->paramval_len, + step->paramval_data, + step->read_only, + fetch) != 0) { pfree(connections); if (primaryconnection) @@ -3175,14 +3276,11 @@ ExecRemoteQuery(RemoteQueryState *node) (errcode(ERRCODE_INTERNAL_ERROR), errmsg("Failed to send command to data nodes"))); } - if (total_conn_count == 1 && pgxc_node_send_timestamp(connections[i], timestamp)) + } + else + { + if (pgxc_node_send_query(connections[i], step->sql_statement) != 0) { - /* - * If a transaction involves multiple connections timestamp is - * always sent down to Datanodes with pgxc_node_begin. - * An autocommit transaction needs the global timestamp also, - * so handle this case here. - */ pfree(connections); if (primaryconnection) pfree(primary... [truncated message content] |
From: Michael P. <mic...@us...> - 2010-12-22 05:58:53
|
Project "Postgres-XC". The branch, master has been updated via 0ab9bbc7600c157618d566f4d9985399e446519d (commit) from bb22b7d667c20228e23526c5627197c10ae54672 (commit) - Log ----------------------------------------------------------------- commit 0ab9bbc7600c157618d566f4d9985399e446519d Author: Michael P <mic...@us...> Date: Wed Dec 22 14:48:48 2010 +0900 Correction for implicit 2PC When a COMMIT is issued for a write transaction involving multiple Postgres-XC nodes, a 2PC is used internally. For implicit 2PC, the following process is respected for DDL transactions: 1) PREPARE on local Coordinator (if DDL is involved) 2) PREPARE on Postgres-XC nodes 3) COMMIT PREPARED on local Coordinator (if DDL is involved) 4) COMMIT PREPARED on Postgres-XC nodes For transaction containing no DDL: 1) PREPARE on Datanodes 2) COMMIT on Coordinator 3) COMMIT PREPARED on Datanodes In case of a Node failure after Coordinator has committed, transaction becomes partially committed on Nodes. To maintain data consistency, it is absolutely necessary to COMMIT this transaction on all nodes. In this case, the remaining list PREPARED nodes is saved on GTM as if it was an explicit 2PC. And this transaction is kept open to avoid visibility issues. It is necessary to issue a COMMIT PREPARED from application to finish the COMMIT of this transaction. diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 7465847..da9e3b1 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -26,7 +26,7 @@ #include "access/gtm.h" /* PGXC_COORD */ #include "gtm/gtm_c.h" -#include "pgxc/pgxcnode.h" +#include "pgxc/execRemote.h" /* PGXC_DATANODE */ #include "postmaster/autovacuum.h" #endif @@ -139,6 +139,9 @@ typedef struct TransactionStateData TransactionId transactionId; /* my XID, or Invalid if none */ #ifdef PGXC /* PGXC_COORD */ GlobalTransactionId globalTransactionId; /* my GXID, or Invalid if none */ + GlobalTransactionId globalCommitTransactionId; /* Commit GXID used by implicit 2PC */ + bool ArePGXCNodesPrepared; /* Checks if PGXC Nodes are prepared and + * rollbacks then in case of an Abort */ #endif SubTransactionId subTransactionId; /* my subxact ID */ char *name; /* savepoint name, if any */ @@ -169,6 +172,8 @@ static TransactionStateData TopTransactionStateData = { 0, /* transaction id */ #ifdef PGXC 0, /* global transaction id */ + 0, /* global commit transaction id */ + 0, /* flag if nodes are prepared or not */ #endif 0, /* subtransaction id */ NULL, /* savepoint name */ @@ -307,6 +312,7 @@ static const char *TransStateAsString(TransState state); #ifdef PGXC /* PGXC_COORD */ static GlobalTransactionId GetGlobalTransactionId(TransactionState s); +static void PrepareTransaction(bool write_2pc_file, bool is_implicit); /* ---------------------------------------------------------------- * PG-XC Functions @@ -1631,10 +1637,15 @@ StartTransaction(void) * start processing */ s->state = TRANS_START; -#ifdef PGXC /* PGXC_COORD */ +#ifdef PGXC /* GXID is assigned already by a remote Coordinator */ if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + { s->globalTransactionId = InvalidGlobalTransactionId; /* until assigned */ + /* Until assigned by implicit 2PC */ + s->globalCommitTransactionId = InvalidGlobalTransactionId; + s->ArePGXCNodesPrepared = false; + } #endif s->transactionId = InvalidTransactionId; /* until assigned */ /* @@ -1737,7 +1748,31 @@ CommitTransaction(void) { TransactionState s = CurrentTransactionState; TransactionId latestXid; +#ifdef PGXC + bool PrepareLocalCoord = false; + bool PreparePGXCNodes = false; + char implicitgid[256]; + TransactionId xid = GetCurrentTransactionId(); + if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + PreparePGXCNodes = PGXCNodeIsImplicit2PC(&PrepareLocalCoord); + + if (PrepareLocalCoord || PreparePGXCNodes) + sprintf(implicitgid, "T%d", xid); + + /* Save GID where PrepareTransaction can find it again */ + if (PrepareLocalCoord) + { + prepareGID = MemoryContextStrdup(TopTransactionContext, implicitgid); + /* + * If current transaction has a DDL, and involves more than 1 Coordinator, + * PREPARE first on local Coordinator. + */ + PrepareTransaction(true, true); + } + else + { +#endif ShowTransactionState("CommitTransaction"); /* @@ -1747,6 +1782,28 @@ CommitTransaction(void) elog(WARNING, "CommitTransaction while in %s state", TransStateAsString(s->state)); Assert(s->parent == NULL); +#ifdef PGXC + } + + /* + * If Transaction has involved several nodes, prepare them before committing on Coordinator. + */ + if (PreparePGXCNodes) + { + /* + * Prepare all the nodes involved in this Implicit 2PC + * If Coordinator COMMIT fails, nodes are also rollbacked during AbortTransaction(). + * + * Track if PGXC Nodes are already prepared + */ + if (PGXCNodeImplicitPrepare(xid, implicitgid) < 0) + ereport(ERROR, + (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), + errmsg("cannot COMMIT a transaction whose PREPARE has failed on Nodes"))); + else + s->ArePGXCNodesPrepared = true; + } +#endif /* * Do pre-commit processing (most of this stuff requires database access, @@ -1756,6 +1813,10 @@ CommitTransaction(void) * deferred triggers, and it's also possible that triggers create holdable * cursors. So we have to loop until there's nothing left to do. */ +#ifdef PGXC + if (!PrepareLocalCoord) + { +#endif for (;;) { /* @@ -1800,8 +1861,11 @@ CommitTransaction(void) /* * There can be error on the data nodes. So go to data nodes before * changing transaction state and local clean up + * Here simply commit on nodes, we know that 2PC is not involved implicitely. + * + * This is called only if it is not necessary to prepare the nodes. */ - if (IS_PGXC_COORDINATOR) + if (IS_PGXC_COORDINATOR && !IsConnFromCoord() && !PreparePGXCNodes) PGXCNodeCommit(); #endif @@ -1825,8 +1889,10 @@ CommitTransaction(void) /* * Now we can let GTM know about transaction commit. * Only a Remote Coordinator is allowed to do that. + * + * Also do not commit a transaction that has already been prepared on Datanodes */ - if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + if (IS_PGXC_COORDINATOR && !IsConnFromCoord() && !PreparePGXCNodes) { CommitTranGTM(s->globalTransactionId); latestXid = s->globalTransactionId; @@ -1908,6 +1974,46 @@ CommitTransaction(void) AtEOXact_MultiXact(); +#ifdef PGXC + }/* End of !PrepareLocalCoord */ + + /* + * At this point, if no 2pc has been used, we have a transaction that committed on GTM, + * local coord and nodes, so the remaining stuff is only ressource cleanup. + * If 2pc has been used, Coordinator has been prepared (if 2 Coordinators at least are involved + * in current transaction). + * Datanodes have also been prepared if more than 1 Datanode has been written. + * + * Here we complete Implicit 2PC in the following order + * - Commit the prepared transaction on local coordinator (if necessary) + * - Commit on the remaining nodes + */ + + if (PreparePGXCNodes) + { + /* + * Preparing for Commit, transaction has to take a new TransactionID for Commit + * It is considered as in Progress state. + */ + s->state = TRANS_INPROGRESS; + s->globalCommitTransactionId = BeginTranGTM(NULL); + + /* COMMIT local Coordinator */ + if (PrepareLocalCoord) + { + FinishPreparedTransaction(implicitgid, true); + } + + /* + * Commit all the nodes involved in this implicit 2PC. + * COMMIT on GTM is made here and is made at the same time + * for prepared GXID and commit GXID to limit interactions between GTM and Coord. + * This explains why prepared GXID is also in argument. + */ + PGXCNodeImplicitCommitPrepared(xid, s->globalCommitTransactionId, implicitgid, true); + } +#endif + ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_LOCKS, true, true); @@ -1948,7 +2054,11 @@ CommitTransaction(void) #ifdef PGXC if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + { s->globalTransactionId = InvalidGlobalTransactionId; + s->globalCommitTransactionId = InvalidGlobalTransactionId; + s->ArePGXCNodesPrepared = false; + } else if (IS_PGXC_DATANODE || IsConnFromCoord()) SetNextTransactionId(InvalidTransactionId); #endif @@ -1972,9 +2082,11 @@ CommitTransaction(void) /* * Only a Postgres-XC Coordinator that received a PREPARE Command from * an application can use this special prepare. + * If PrepareTransaction is called during an implicit 2PC, do not release ressources, + * this is made by CommitTransaction when transaction has been committed on Nodes. */ static void -PrepareTransaction(bool write_2pc_file) +PrepareTransaction(bool write_2pc_file, bool is_implicit) #else static void PrepareTransaction(void) @@ -2170,6 +2282,14 @@ PrepareTransaction(void) } #endif +#ifdef PGXC + /* + * In case of an implicit 2PC, ressources are released by CommitTransaction() + */ + if (!is_implicit) + { +#endif + ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_LOCKS, true, true); @@ -2219,6 +2339,9 @@ PrepareTransaction(void) s->state = TRANS_DEFAULT; RESUME_INTERRUPTS(); +#ifdef PGXC + } /* is_implicit END */ +#endif } @@ -2286,8 +2409,13 @@ AbortTransaction(void) /* * We should rollback on the data nodes before cleaning up portals * to be sure data structures used by connections are not freed yet + * + * It is also necessary to check that node are not partially committed + * in an implicit 2PC, correct handling is made below. */ - if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + if (IS_PGXC_COORDINATOR && + !IsConnFromCoord() && + !TransactionIdIsValid(s->globalCommitTransactionId)) { /* * Make sure this is rolled back on the DataNodes @@ -2310,6 +2438,10 @@ AbortTransaction(void) * Advertise the fact that we aborted in pg_clog (assuming that we got as * far as assigning an XID to advertise). */ +#ifdef PGXC + /* Do not abort a transaction that has already been committed in an implicit 2PC */ + if (!TransactionIdIsValid(s->globalCommitTransactionId)) +#endif latestXid = RecordTransactionAbort(false); TRACE_POSTGRESQL_TRANSACTION_ABORT(MyProc->lxid); @@ -2317,8 +2449,56 @@ AbortTransaction(void) /* This is done by remote Coordinator */ if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) { - RollbackTranGTM(s->globalTransactionId); + /* + * Rollback the transaction ID only if it is not being used by an implicit 2PC. + */ + if (!s->ArePGXCNodesPrepared) + RollbackTranGTM(s->globalTransactionId); + latestXid = s->globalTransactionId; + + /* Rollback Prepared Nodes if they are totally prepared but not committed at all */ + if (s->ArePGXCNodesPrepared && !TransactionIdIsValid(s->globalCommitTransactionId)) + { + char implicitgid[256]; + + sprintf(implicitgid, "T%d", s->globalTransactionId); + PGXCNodeImplicitCommitPrepared(s->globalTransactionId, + s->globalCommitTransactionId, + implicitgid, false); + } + else if (s->ArePGXCNodesPrepared && TransactionIdIsValid(s->globalCommitTransactionId)) + { + /* + * In this case transaction is partially committed, pick up the list of nodes + * prepared and not committed and register them on GTM as if it is an explicit 2PC. + * This permits to keep the transaction alive in snapshot and other transaction + * don't have any side effects with partially committed transactions + */ + char implicitgid[256]; + int co_conn_count, dn_conn_count; + PGXC_NodeId *datanodes = NULL; + PGXC_NodeId *coordinators = NULL; + + sprintf(implicitgid, "T%d", s->globalTransactionId); + + /* Get the list of nodes in error state */ + PGXCNodeGetNodeList(&datanodes, &dn_conn_count, &coordinators, &co_conn_count); + + /* Save the node list and gid on GTM. */ + StartPreparedTranGTM(s->globalTransactionId, implicitgid, + dn_conn_count, datanodes, co_conn_count, coordinators); + + /* Finish to prepare the transaction. */ + PrepareTranGTM(s->globalTransactionId); + + /* + * Rollback commit GXID as it has been used by an implicit 2PC. + * It is important at this point not to Commit the GXID used for PREPARE + * to keep it visible in snapshot for other transactions. + */ + RollbackTranGTM(s->globalCommitTransactionId); + } } else if (IS_PGXC_DATANODE || IsConnFromCoord()) { @@ -2602,7 +2782,7 @@ CommitTransactionCommand(void) * return to the idle state. */ case TBLOCK_PREPARE: - PrepareTransaction(true); + PrepareTransaction(true, false); s->blockState = TBLOCK_DEFAULT; break; @@ -2612,7 +2792,7 @@ CommitTransactionCommand(void) * that involved DDLs on a Coordinator. */ case TBLOCK_PREPARE_NO_2PC_FILE: - PrepareTransaction(false); + PrepareTransaction(false, false); s->blockState = TBLOCK_DEFAULT; break; #endif @@ -2647,17 +2827,20 @@ CommitTransactionCommand(void) CommitTransaction(); s->blockState = TBLOCK_DEFAULT; } -#ifdef PGXC - else if (s->blockState == TBLOCK_PREPARE || - s->blockState == TBLOCK_PREPARE_NO_2PC_FILE) -#else else if (s->blockState == TBLOCK_PREPARE) -#endif { Assert(s->parent == NULL); - PrepareTransaction(true); + PrepareTransaction(true, false); s->blockState = TBLOCK_DEFAULT; } +#ifdef PGXC + else if (s->blockState == TBLOCK_PREPARE_NO_2PC_FILE) + { + Assert(s->parent == NULL); + PrepareTransaction(false, false); + s->blockState = TBLOCK_DEFAULT; + } +#endif else { Assert(s->blockState == TBLOCK_INPROGRESS || diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index a524c13..b954003 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -49,6 +49,7 @@ extern char *deparseSql(RemoteQueryState *scanstate); #define PRIMARY_NODE_WRITEAHEAD 1024 * 1024 static bool autocommit = true; +static bool implicit_force_autocommit = false; static PGXCNodeHandle **write_node_list = NULL; static int write_node_count = 0; @@ -61,6 +62,14 @@ static int pgxc_node_rollback_prepared(GlobalTransactionId gxid, GlobalTransacti PGXCNodeAllHandles * pgxc_handles, char *gid); static int pgxc_node_commit_prepared(GlobalTransactionId gxid, GlobalTransactionId prepared_gxid, PGXCNodeAllHandles * pgxc_handles, char *gid); +static int pgxc_node_implicit_commit_prepared(GlobalTransactionId prepare_xid, + GlobalTransactionId commit_xid, + PGXCNodeAllHandles * pgxc_handles, + char *gid, + bool is_commit); +static int pgxc_node_implicit_prepare(GlobalTransactionId prepare_xid, + PGXCNodeAllHandles * pgxc_handles, char *gid); + static PGXCNodeAllHandles * get_exec_connections(ExecNodes *exec_nodes, RemoteQueryExecType exec_type); static int pgxc_node_receive_and_validate(const int conn_count, @@ -74,7 +83,7 @@ static int handle_response_clear(PGXCNodeHandle * conn); static void close_node_cursors(PGXCNodeHandle **connections, int conn_count, char *cursor); -static PGXCNodeAllHandles *pgxc_get_all_transaction_nodes(void); +static PGXCNodeAllHandles *pgxc_get_all_transaction_nodes(PGXCNode_HandleRequested status_requested); #define MAX_STATEMENTS_PER_TRAN 10 @@ -1505,7 +1514,7 @@ PGXCNodePrepare(char *gid) PGXCNodeAllHandles *pgxc_connections; bool local_operation = false; - pgxc_connections = pgxc_get_all_transaction_nodes(); + pgxc_connections = pgxc_get_all_transaction_nodes(HANDLE_DEFAULT); /* DDL involved in transaction, so make a local prepare too */ if (pgxc_connections->co_conn_count != 0) @@ -1669,6 +1678,176 @@ finish: return result; } +/* + * Prepare all the nodes involved in this implicit Prepare + * Abort transaction if this is not done correctly + */ +int +PGXCNodeImplicitPrepare(GlobalTransactionId prepare_xid, char *gid) +{ + int res = 0; + int tran_count; + PGXCNodeAllHandles *pgxc_connections = pgxc_get_all_transaction_nodes(HANDLE_DEFAULT); + + if (!pgxc_connections) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Could not prepare connection implicitely"))); + + tran_count = pgxc_connections->dn_conn_count + pgxc_connections->co_conn_count; + + /* + * This should not happen because an implicit 2PC is always using other nodes, + * but it is better to check. + */ + if (tran_count == 0) + { + goto finish; + } + + res = pgxc_node_implicit_prepare(prepare_xid, pgxc_connections, gid); + +finish: + if (!autocommit) + stat_transaction(pgxc_connections->dn_conn_count); + + return res; +} + +/* + * Prepare transaction on dedicated nodes for Implicit 2PC + * This is done inside a Transaction commit if multiple nodes are involved in write operations + * Implicit prepare in done internally on Coordinator, so this does not interact with GTM. + */ +static int +pgxc_node_implicit_prepare(GlobalTransactionId prepare_xid, + PGXCNodeAllHandles *pgxc_handles, + char *gid) +{ + int result = 0; + int co_conn_count = pgxc_handles->co_conn_count; + int dn_conn_count = pgxc_handles->dn_conn_count; + char buffer[256]; + + sprintf(buffer, "PREPARE TRANSACTION '%s'", gid); + + /* Continue even after an error here, to consume the messages */ + result = pgxc_all_handles_send_query(pgxc_handles, buffer, true); + + /* Receive and Combine results from Datanodes and Coordinators */ + result |= pgxc_node_receive_and_validate(dn_conn_count, pgxc_handles->datanode_handles, false); + result |= pgxc_node_receive_and_validate(co_conn_count, pgxc_handles->coord_handles, false); + + return result; +} + +/* + * Commit all the nodes involved in this Implicit Commit. + * Prepared XID is committed at the same time as Commit XID on GTM. + */ +void +PGXCNodeImplicitCommitPrepared(GlobalTransactionId prepare_xid, + GlobalTransactionId commit_xid, + char *gid, + bool is_commit) +{ + int res = 0; + int tran_count; + PGXCNodeAllHandles *pgxc_connections = pgxc_get_all_transaction_nodes(HANDLE_IDLE); + + if (!pgxc_connections) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Could not commit prepared transaction implicitely"))); + + tran_count = pgxc_connections->dn_conn_count + pgxc_connections->co_conn_count; + + /* + * This should not happen because an implicit 2PC is always using other nodes, + * but it is better to check. + */ + if (tran_count == 0) + { + elog(WARNING, "Nothing to PREPARE on Datanodes and Coordinators"); + goto finish; + } + + res = pgxc_node_implicit_commit_prepared(prepare_xid, commit_xid, + pgxc_connections, gid, is_commit); + +finish: + /* Clear nodes, signals are clear */ + if (!autocommit) + stat_transaction(pgxc_connections->dn_conn_count); + + /* + * If an error happened, do not release handles yet. This is done when transaction + * is aborted after the list of nodes in error state has been saved to be sent to GTM + */ + if (!PersistentConnections && res == 0) + release_handles(false); + autocommit = true; + clear_write_node_list(); + + if (res != 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Could not commit prepared transaction implicitely"))); + + /* + * Commit on GTM is made once we are sure that Nodes are not only partially committed + * If an error happens on a Datanode during implicit COMMIT PREPARED, a special handling + * is made in AbortTransaction(). + * The list of datanodes is saved on GTM and the partially committed transaction can be committed + * with a COMMIT PREPARED delivered directly from application. + * This permits to keep the gxid alive in snapshot and avoids other transactions to see only + * partially committed results. + */ + CommitPreparedTranGTM(prepare_xid, commit_xid); +} + +/* + * Commit a transaction implicitely transaction on all nodes + * Prepared transaction with this gid has reset the datanodes, + * so we need a new gxid. + * + * GXID used for Prepare and Commit are committed at the same time on GTM. + * This saves Network ressource a bit. + */ +static int +pgxc_node_implicit_commit_prepared(GlobalTransactionId prepare_xid, + GlobalTransactionId commit_xid, + PGXCNodeAllHandles *pgxc_handles, + char *gid, + bool is_commit) +{ + char buffer[256]; + int result = 0; + int co_conn_count = pgxc_handles->co_conn_count; + int dn_conn_count = pgxc_handles->dn_conn_count; + + if (is_commit) + sprintf(buffer, "COMMIT PREPARED '%s'", gid); + else + sprintf(buffer, "ROLLBACK PREPARED '%s'", gid); + + if (pgxc_all_handles_send_gxid(pgxc_handles, commit_xid, true)) + { + result = EOF; + goto finish; + } + + /* Send COMMIT to all handles */ + if (pgxc_all_handles_send_query(pgxc_handles, buffer, false)) + result = EOF; + + /* Receive and Combine results from Datanodes and Coordinators */ + result |= pgxc_node_receive_and_validate(dn_conn_count, pgxc_handles->datanode_handles, false); + result |= pgxc_node_receive_and_validate(co_conn_count, pgxc_handles->coord_handles, false); + +finish: + return result; +} /* * Commit prepared transaction on Datanodes and Coordinators (as necessary) @@ -1684,7 +1863,7 @@ PGXCNodeCommitPrepared(char *gid) { int res = 0; int res_gtm = 0; - PGXCNodeAllHandles *pgxc_handles; + PGXCNodeAllHandles *pgxc_handles = NULL; List *datanodelist = NIL; List *coordlist = NIL; int i, tran_count; @@ -1812,7 +1991,7 @@ PGXCNodeRollbackPrepared(char *gid) { int res = 0; int res_gtm = 0; - PGXCNodeAllHandles *pgxc_handles; + PGXCNodeAllHandles *pgxc_handles = NULL; List *datanodelist = NIL; List *coordlist = NIL; int i, tran_count; @@ -1922,6 +2101,8 @@ pgxc_node_rollback_prepared(GlobalTransactionId gxid, GlobalTransactionId prepar /* * Commit current transaction on data nodes where it has been started + * This function is called when no 2PC is involved implicitely. + * So only send a commit to the involved nodes. */ void PGXCNodeCommit(void) @@ -1930,7 +2111,7 @@ PGXCNodeCommit(void) int tran_count; PGXCNodeAllHandles *pgxc_connections; - pgxc_connections = pgxc_get_all_transaction_nodes(); + pgxc_connections = pgxc_get_all_transaction_nodes(HANDLE_DEFAULT); tran_count = pgxc_connections->dn_conn_count + pgxc_connections->co_conn_count; @@ -1952,7 +2133,7 @@ finish: autocommit = true; clear_write_node_list(); - /* Clear up connection */ + /* Clean up connections */ pfree_pgxc_all_handles(pgxc_connections); if (res != 0) ereport(ERROR, @@ -1969,71 +2150,11 @@ static int pgxc_node_commit(PGXCNodeAllHandles *pgxc_handles) { char buffer[256]; - GlobalTransactionId gxid = InvalidGlobalTransactionId; int result = 0; int co_conn_count = pgxc_handles->co_conn_count; int dn_conn_count = pgxc_handles->dn_conn_count; - /* can set this to false to disable temporarily */ - /* bool do2PC = conn_count > 1; */ - - /* - * Only use 2PC if more than one node was written to. Otherwise, just send - * COMMIT to all - */ - bool do2PC = write_node_count > 1; - - /* Extra XID for Two Phase Commit */ - GlobalTransactionId two_phase_xid = 0; - - if (do2PC) - { - stat_2pc(); - - /* - * Formally we should be using GetCurrentGlobalTransactionIdIfAny() here, - * but since we need 2pc, we surely have sent down a command and got - * gxid for it. Hence GetCurrentGlobalTransactionId() just returns - * already allocated gxid - */ - gxid = GetCurrentGlobalTransactionId(); - - sprintf(buffer, "PREPARE TRANSACTION 'T%d'", gxid); - - if (pgxc_all_handles_send_query(pgxc_handles, buffer, false)) - result = EOF; - - /* Receive and Combine results from Datanodes and Coordinators */ - result |= pgxc_node_receive_and_validate(dn_conn_count, pgxc_handles->datanode_handles, true); - result |= pgxc_node_receive_and_validate(co_conn_count, pgxc_handles->coord_handles, true); - } - - if (!do2PC) - strcpy(buffer, "COMMIT"); - else - { - if (result) - { - sprintf(buffer, "ROLLBACK PREPARED 'T%d'", gxid); - /* Consume any messages on the Datanodes and Coordinators first if necessary */ - PGXCNodeConsumeMessages(); - } - else - sprintf(buffer, "COMMIT PREPARED 'T%d'", gxid); - - /* - * We need to use a new xid, the data nodes have reset - * Timestamp has already been set with BEGIN on remote Datanodes, - * so don't use it here. - */ - two_phase_xid = BeginTranGTM(NULL); - - if (pgxc_all_handles_send_gxid(pgxc_handles, two_phase_xid, true)) - { - result = EOF; - goto finish; - } - } + strcpy(buffer, "COMMIT"); /* Send COMMIT to all handles */ if (pgxc_all_handles_send_query(pgxc_handles, buffer, false)) @@ -2043,10 +2164,6 @@ pgxc_node_commit(PGXCNodeAllHandles *pgxc_handles) result |= pgxc_node_receive_and_validate(dn_conn_count, pgxc_handles->datanode_handles, false); result |= pgxc_node_receive_and_validate(co_conn_count, pgxc_handles->coord_handles, false); -finish: - if (do2PC) - CommitTranGTM((GlobalTransactionId) two_phase_xid); - return result; } @@ -2062,7 +2179,7 @@ PGXCNodeRollback(void) int tran_count; PGXCNodeAllHandles *pgxc_connections; - pgxc_connections = pgxc_get_all_transaction_nodes(); + pgxc_connections = pgxc_get_all_transaction_nodes(HANDLE_DEFAULT); tran_count = pgxc_connections->dn_conn_count + pgxc_connections->co_conn_count; @@ -2099,7 +2216,6 @@ finish: static int pgxc_node_rollback(PGXCNodeAllHandles *pgxc_handles) { - int i; int result = 0; int co_conn_count = pgxc_handles->co_conn_count; int dn_conn_count = pgxc_handles->dn_conn_count; @@ -2881,6 +2997,8 @@ ExecRemoteQuery(RemoteQueryState *node) PGXCNodeAllHandles *pgxc_connections; TupleTableSlot *innerSlot = NULL; + implicit_force_autocommit = force_autocommit; + /* * Inner plan for RemoteQuery supplies parameters. * We execute inner plan to get a tuple and use values of the tuple as @@ -3622,6 +3740,8 @@ ExecRemoteUtility(RemoteQuery *node) bool need_tran; int i; + implicit_force_autocommit = force_autocommit; + remotestate = CreateResponseCombiner(0, node->combine_type); pgxc_connections = get_exec_connections(node->exec_nodes, @@ -3984,7 +4104,7 @@ finish: * for both data nodes and coordinators */ static PGXCNodeAllHandles * -pgxc_get_all_transaction_nodes() +pgxc_get_all_transaction_nodes(PGXCNode_HandleRequested status_requested) { PGXCNodeAllHandles *pgxc_connections; @@ -4009,9 +4129,13 @@ pgxc_get_all_transaction_nodes() /* gather needed connections */ pgxc_connections->dn_conn_count = get_transaction_nodes( - pgxc_connections->datanode_handles, REMOTE_CONN_DATANODE); + pgxc_connections->datanode_handles, + REMOTE_CONN_DATANODE, + status_requested); pgxc_connections->co_conn_count = get_transaction_nodes( - pgxc_connections->coord_handles, REMOTE_CONN_COORD); + pgxc_connections->coord_handles, + REMOTE_CONN_COORD, + status_requested); return pgxc_connections; } @@ -4032,3 +4156,68 @@ pfree_pgxc_all_handles(PGXCNodeAllHandles *pgxc_handles) pfree(pgxc_handles); } + +/* + * Check if an Implicit 2PC is necessary for this transaction. + * Check also if it is necessary to prepare transaction locally. + */ +bool +PGXCNodeIsImplicit2PC(bool *prepare_local_coord) +{ + PGXCNodeAllHandles *pgxc_handles = pgxc_get_all_transaction_nodes(HANDLE_DEFAULT); + int co_conn_count = pgxc_handles->co_conn_count; + + /* Prepare Local Coord only if DDL is involved on multiple nodes */ + *prepare_local_coord = co_conn_count > 0; + + /* + * In case of an autocommit or forced autocommit transaction, 2PC is not involved + * This case happens for Utilities using force autocommit (CREATE DATABASE, VACUUM...) + */ + if (implicit_force_autocommit) + { + implicit_force_autocommit = false; + return false; + } + + /* + * 2PC is necessary at other Nodes if one Datanode or one Coordinator + * other than the local one has been involved in a write operation. + */ + return (write_node_count > 1 || co_conn_count > 0); +} + +/* + * Return the list of active nodes + */ +void +PGXCNodeGetNodeList(PGXC_NodeId **datanodes, + int *dn_conn_count, + PGXC_NodeId **coordinators, + int *co_conn_count) +{ + PGXCNodeAllHandles *pgxc_connections = pgxc_get_all_transaction_nodes(HANDLE_ERROR); + + *dn_conn_count = pgxc_connections->dn_conn_count; + + /* Add in the list local coordinator also if necessary */ + if (pgxc_connections->co_conn_count == 0) + *co_conn_count = pgxc_connections->co_conn_count; + else + *co_conn_count = pgxc_connections->co_conn_count + 1; + + if (pgxc_connections->dn_conn_count != 0) + *datanodes = collect_pgxcnode_numbers(pgxc_connections->dn_conn_count, + pgxc_connections->datanode_handles, REMOTE_CONN_DATANODE); + + if (pgxc_connections->co_conn_count != 0) + *coordinators = collect_pgxcnode_numbers(pgxc_connections->co_conn_count, + pgxc_connections->coord_handles, REMOTE_CONN_COORD); + + /* + * Now release handles properly, the list of handles in error state has been saved + * and will be sent to GTM. + */ + if (!PersistentConnections) + release_handles(false); +} diff --git a/src/backend/pgxc/pool/pgxcnode.c b/src/backend/pgxc/pool/pgxcnode.c index cbaf68c..4790f95 100644 --- a/src/backend/pgxc/pool/pgxcnode.c +++ b/src/backend/pgxc/pool/pgxcnode.c @@ -1579,9 +1579,19 @@ get_handles(List *datanodelist, List *coordlist, bool is_coord_only_query) * to a PGXCNodeHandle structure. * The function returns number of pointers written to the connections array. * Remaining items in the array, if any, will be kept unchanged + * + * In an implicit 2PC, status of connections is set back to idle after preparing + * the transaction on each backend. + * At commit phase, it is necessary to get backends in idle state to be able to + * commit properly the backends. + * + * In the case of an error occuring with an implicit 2PC that has been partially + * committed on nodes, return the list of connections that has an error state + * to register the list of remaining nodes not commit prepared on GTM. */ int -get_transaction_nodes(PGXCNodeHandle **connections, char client_conn_type) +get_transaction_nodes(PGXCNodeHandle **connections, char client_conn_type, + PGXCNode_HandleRequested status_requested) { int tran_count = 0; int i; @@ -1596,16 +1606,42 @@ get_transaction_nodes(PGXCNodeHandle **connections, char client_conn_type) * DN_CONNECTION_STATE_ERROR_FATAL. * ERROR_NOT_READY can happen if the data node abruptly disconnects. */ - if (dn_handles[i].sock != NO_SOCKET && dn_handles[i].transaction_status != 'I') - connections[tran_count++] = &dn_handles[i]; + if (status_requested == HANDLE_IDLE) + { + if (dn_handles[i].sock != NO_SOCKET && dn_handles[i].transaction_status == 'I') + connections[tran_count++] = &dn_handles[i]; + } + else if (status_requested == HANDLE_ERROR) + { + if (dn_handles[i].transaction_status == 'E') + connections[tran_count++] = &dn_handles[i]; + } + else + { + if (dn_handles[i].sock != NO_SOCKET && dn_handles[i].transaction_status != 'I') + connections[tran_count++] = &dn_handles[i]; + } } } if (coord_count && client_conn_type == REMOTE_CONN_COORD) { for (i = 0; i < NumCoords; i++) { - if (co_handles[i].sock != NO_SOCKET && co_handles[i].transaction_status != 'I') - connections[tran_count++] = &co_handles[i]; + if (status_requested == HANDLE_IDLE) + { + if (co_handles[i].sock != NO_SOCKET && co_handles[i].transaction_status == 'I') + connections[tran_count++] = &co_handles[i]; + } + else if (status_requested == HANDLE_ERROR) + { + if (co_handles[i].transaction_status == 'E') + connections[tran_count++] = &co_handles[i]; + } + else + { + if (co_handles[i].sock != NO_SOCKET && co_handles[i].transaction_status != 'I') + connections[tran_count++] = &co_handles[i]; + } } } diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 6000fdb..69c25d1 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -280,6 +280,14 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) * taking a snapshot. See discussion in * src/backend/access/transam/README. */ +#ifdef PGXC + /* + * Remove this assertion check for PGXC on Coordinator + * We could abort even after a Coordinator has committed + * for a 2PC transaction if Datanodes have failed committed the transaction + */ + if (IS_PGXC_DATANODE) +#endif Assert(TransactionIdIsValid(proc->xid)); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); diff --git a/src/include/pgxc/execRemote.h b/src/include/pgxc/execRemote.h index 4a33842..a3c1868 100644 --- a/src/include/pgxc/execRemote.h +++ b/src/include/pgxc/execRemote.h @@ -43,6 +43,7 @@ typedef enum REQUEST_TYPE_COPY_OUT /* Copy Out response */ } RequestType; + /* * Represents a DataRow message received from a remote node. * Contains originating node number and message body in DataRow format without @@ -111,6 +112,19 @@ extern int PGXCNodeRollback(void); extern bool PGXCNodePrepare(char *gid); extern bool PGXCNodeRollbackPrepared(char *gid); extern bool PGXCNodeCommitPrepared(char *gid); +extern bool PGXCNodeIsImplicit2PC(bool *prepare_local_coord); +extern int PGXCNodeImplicitPrepare(GlobalTransactionId prepare_xid, char *gid); +extern void PGXCNodeImplicitCommitPrepared(GlobalTransactionId prepare_xid, + GlobalTransactionId commit_xid, + char *gid, + bool is_commit); +extern void PGXCNodeConsumeMessages(void); + +/* Get list of nodes */ +extern void PGXCNodeGetNodeList(PGXC_NodeId **datanodes, + int *dn_conn_count, + PGXC_NodeId **coordinators, + int *co_conn_count); /* Copy command just involves Datanodes */ extern PGXCNodeHandle** DataNodeCopyBegin(const char *query, List *nodelist, Snapshot snapshot, bool is_from); diff --git a/src/include/pgxc/pgxcnode.h b/src/include/pgxc/pgxcnode.h index a57e4f1..47b0b96 100644 --- a/src/include/pgxc/pgxcnode.h +++ b/src/include/pgxc/pgxcnode.h @@ -39,6 +39,13 @@ typedef enum DN_CONNECTION_STATE_COPY_OUT } DNConnectionState; +typedef enum +{ + HANDLE_IDLE, + HANDLE_ERROR, + HANDLE_DEFAULT +} PGXCNode_HandleRequested; + #define DN_CONNECTION_STATE_ERROR(dnconn) \ ((dnconn)->state == DN_CONNECTION_STATE_ERROR_FATAL \ || (dnconn)->transaction_status == 'E') @@ -97,7 +104,9 @@ extern void PGXCNodeCleanAndRelease(int code, Datum arg); extern PGXCNodeAllHandles *get_handles(List *datanodelist, List *coordlist, bool is_query_coord_only); extern void release_handles(bool force_drop); -extern int get_transaction_nodes(PGXCNodeHandle ** connections, char client_conn_type); +extern int get_transaction_nodes(PGXCNodeHandle ** connections, + char client_conn_type, + PGXCNode_HandleRequested type_requested); extern PGXC_NodeId* collect_pgxcnode_numbers(int conn_count, PGXCNodeHandle ** connections, char client_conn_type); extern int get_active_nodes(PGXCNodeHandle ** connections); ----------------------------------------------------------------------- Summary of changes: src/backend/access/transam/xact.c | 213 ++++++++++++++++++++-- src/backend/pgxc/pool/execRemote.c | 341 +++++++++++++++++++++++++++-------- src/backend/pgxc/pool/pgxcnode.c | 46 +++++- src/backend/storage/ipc/procarray.c | 8 + src/include/pgxc/execRemote.h | 14 ++ src/include/pgxc/pgxcnode.h | 11 +- 6 files changed, 536 insertions(+), 97 deletions(-) hooks/post-receive -- Postgres-XC |
From: mason_s <ma...@us...> - 2010-12-21 21:06:15
|
Project "Postgres-XC". The branch, master has been updated via bb22b7d667c20228e23526c5627197c10ae54672 (commit) from ce57f6ba483c2cb76a96fe32f3850e1eac4dfdd6 (commit) - Log ----------------------------------------------------------------- commit bb22b7d667c20228e23526c5627197c10ae54672 Author: Mason Sharp <ma...@us...> Date: Tue Dec 21 16:05:07 2010 -0500 Fixed recently introduced bug with node determination for write operations. Extracted from Andrei Martsinchyk's patch diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 436a1dd..62cb748 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2320,7 +2320,7 @@ CopyFrom(CopyState cstate) if (DataNodeCopyIn(cstate->line_buf.data, cstate->line_buf.len, GetRelationNodes(cstate->rel_loc, (long *)hash_value, - RELATION_ACCESS_WRITE), + RELATION_ACCESS_INSERT), cstate->connections)) ereport(ERROR, (errcode(ERRCODE_CONNECTION_EXCEPTION), @@ -4023,7 +4023,7 @@ DoInsertSelectCopy(EState *estate, TupleTableSlot *slot) /* Send item to the appropriate data node(s) (buffer) */ if (DataNodeCopyIn(cstate->fe_msgbuf->data, cstate->fe_msgbuf->len, - GetRelationNodes(cstate->rel_loc, (long *)hash_value, RELATION_ACCESS_WRITE), + GetRelationNodes(cstate->rel_loc, (long *)hash_value, RELATION_ACCESS_INSERT), cstate->connections)) ereport(ERROR, (errcode(ERRCODE_CONNECTION_EXCEPTION), diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index 4191455..790b81d 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -299,7 +299,8 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, { case LOCATOR_TYPE_REPLICATED: - if (accessType == RELATION_ACCESS_WRITE) + if (accessType == RELATION_ACCESS_UPDATE || + accessType == RELATION_ACCESS_INSERT) { /* we need to write to all synchronously */ exec_nodes->nodelist = list_copy(rel_loc_info->nodeList); @@ -360,7 +361,7 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, else { /* If no info, go to node 1 */ - if (accessType == RELATION_ACCESS_WRITE) + if (accessType == RELATION_ACCESS_INSERT) exec_nodes->nodelist = lappend_int(NULL, 1); else /* @@ -380,7 +381,7 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, case LOCATOR_TYPE_RROBIN: /* round robin, get next one */ - if (accessType == RELATION_ACCESS_WRITE) + if (accessType == RELATION_ACCESS_INSERT) { /* write to just one of them */ exec_nodes->nodelist = lappend_int(NULL, GetRoundRobinNode(rel_loc_info->relid)); diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index fa61826..8d900f1 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -550,7 +550,7 @@ get_plan_nodes_insert(Query *query, RemoteQuery *step) if (!lc) { /* Skip rest, handle NULL */ - step->exec_nodes = GetRelationNodes(rel_loc_info, NULL, RELATION_ACCESS_WRITE); + step->exec_nodes = GetRelationNodes(rel_loc_info, NULL, RELATION_ACCESS_INSERT); return; } @@ -629,7 +629,7 @@ get_plan_nodes_insert(Query *query, RemoteQuery *step) /* single call handles both replicated and partitioned types */ step->exec_nodes = GetRelationNodes(rel_loc_info, part_value_ptr, - RELATION_ACCESS_WRITE); + RELATION_ACCESS_INSERT); if (eval_expr) pfree(eval_expr); @@ -1771,7 +1771,7 @@ get_plan_nodes_command(Query *query, RemoteQuery *step) case CMD_UPDATE: case CMD_DELETE: /* treat as a select */ - get_plan_nodes(query, step, RELATION_ACCESS_WRITE); + get_plan_nodes(query, step, RELATION_ACCESS_UPDATE); break; default: diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h index b01606f..ee28c5a 100644 --- a/src/include/pgxc/locator.h +++ b/src/include/pgxc/locator.h @@ -31,6 +31,27 @@ typedef int PartAttrNumber; +/* track if tables use pg_catalog */ +typedef enum +{ + TABLE_USAGE_TYPE_NO_TABLE, + TABLE_USAGE_TYPE_PGCATALOG, + TABLE_USAGE_TYPE_USER, + TABLE_USAGE_TYPE_USER_REPLICATED, /* based on a replicated table */ + TABLE_USAGE_TYPE_MIXED +} TableUsageType; + +/* + * How relation is accessed in the query + */ +typedef enum +{ + RELATION_ACCESS_READ, /* SELECT */ + RELATION_ACCESS_READ_FOR_UPDATE, /* SELECT FOR UPDATE */ + RELATION_ACCESS_UPDATE, /* UPDATE OR DELETE */ + RELATION_ACCESS_INSERT /* INSERT */ +} RelationAccessType; + typedef struct { Oid relid; @@ -42,17 +63,6 @@ typedef struct ListCell *roundRobinNode; /* points to next one to use */ } RelationLocInfo; - -/* track if tables use pg_catalog */ -typedef enum -{ - TABLE_USAGE_TYPE_NO_TABLE, - TABLE_USAGE_TYPE_PGCATALOG, - TABLE_USAGE_TYPE_USER, - TABLE_USAGE_TYPE_USER_REPLICATED, /* based on a replicated table */ - TABLE_USAGE_TYPE_MIXED -} TableUsageType; - /* * Nodes to execute on * primarynodelist is for replicated table writes, where to execute first. @@ -68,15 +78,6 @@ typedef struct TableUsageType tableusagetype; /* track pg_catalog usage */ } ExecNodes; -/* - * How relation is accessed in the query - */ -typedef enum -{ - RELATION_ACCESS_READ, - RELATION_ACCESS_READ_FOR_UPDATE, - RELATION_ACCESS_WRITE -} RelationAccessType; extern char *PreferredDataNodes; ----------------------------------------------------------------------- Summary of changes: src/backend/commands/copy.c | 4 +- src/backend/pgxc/locator/locator.c | 7 +++-- src/backend/pgxc/plan/planner.c | 6 ++-- src/include/pgxc/locator.h | 41 ++++++++++++++++++----------------- 4 files changed, 30 insertions(+), 28 deletions(-) hooks/post-receive -- Postgres-XC |