You can subscribe to this list here.
2010 |
Jan
|
Feb
|
Mar
|
Apr
(4) |
May
(28) |
Jun
(12) |
Jul
(11) |
Aug
(12) |
Sep
(5) |
Oct
(19) |
Nov
(14) |
Dec
(12) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2011 |
Jan
(18) |
Feb
(30) |
Mar
(115) |
Apr
(89) |
May
(50) |
Jun
(44) |
Jul
(22) |
Aug
(13) |
Sep
(11) |
Oct
(30) |
Nov
(28) |
Dec
(39) |
2012 |
Jan
(38) |
Feb
(18) |
Mar
(43) |
Apr
(91) |
May
(108) |
Jun
(46) |
Jul
(37) |
Aug
(44) |
Sep
(33) |
Oct
(29) |
Nov
(36) |
Dec
(15) |
2013 |
Jan
(35) |
Feb
(611) |
Mar
(5) |
Apr
(55) |
May
(30) |
Jun
(28) |
Jul
(458) |
Aug
(34) |
Sep
(9) |
Oct
(39) |
Nov
(22) |
Dec
(32) |
2014 |
Jan
(16) |
Feb
(16) |
Mar
(42) |
Apr
(179) |
May
(7) |
Jun
(6) |
Jul
(9) |
Aug
|
Sep
(4) |
Oct
|
Nov
(3) |
Dec
|
2015 |
Jan
|
Feb
|
Mar
|
Apr
(2) |
May
(4) |
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Michael P. <mic...@us...> - 2011-02-23 07:14:26
|
Project "Postgres-XC". The branch, ha_support has been updated via bd357d3c92dcd3d7bb82ba63d942ba3841cff751 (commit) from 101c354db37e5642023f80467fd19849b147d644 (commit) - Log ----------------------------------------------------------------- commit bd357d3c92dcd3d7bb82ba63d942ba3841cff751 Author: Michael P <mic...@us...> Date: Wed Feb 23 16:18:27 2011 +0900 Support for Preferred Datanode and Primary datanodes Mirror mode itself doesn't need that but this is necessary when XCM is activated to get necessary information. This was still missing in the implementation. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 2d0fb13..8871c05 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -34,7 +34,7 @@ #include "optimizer/planner.h" #include "parser/parse_relation.h" #ifdef PGXC -#include "pgxc/pgxc.h" +#include "pgxc/mirror.h" #include "pgxc/execRemote.h" #include "pgxc/locator.h" #include "pgxc/poolmgr.h" @@ -1343,9 +1343,17 @@ DoCopy(const CopyStmt *stmt, const char *queryString) { if (IS_PGXC_COORDINATOR && is_from && cstate->rel_loc) { + int primary_node_id = IsXCM ? PGXCMirror_GetPrimaryDatanodeID() : primary_data_node; + int primary_node_num; + + if (IsPGXCMirrorMode) + primary_node_num = PGXCMirror_GetPrimaryMirrorNum(primary_node_id); + else + primary_node_num = primary_node_id; + DataNodeCopyFinish( cstate->connections, - primary_data_node, + primary_node_num, COMBINE_TYPE_NONE); pfree(cstate->connections); pfree(cstate->query_buf.data); @@ -1357,9 +1365,17 @@ DoCopy(const CopyStmt *stmt, const char *queryString) if (IS_PGXC_COORDINATOR && is_from && cstate->rel_loc) { bool replicated = cstate->rel_loc->locatorType == LOCATOR_TYPE_REPLICATED; + int primary_node_id = IsXCM ? PGXCMirror_GetPrimaryDatanodeID() : primary_data_node; + int primary_node_num; + + if (IsPGXCMirrorMode) + primary_node_num = PGXCMirror_GetPrimaryMirrorNum(primary_node_id); + else + primary_node_num = primary_node_id; + DataNodeCopyFinish( cstate->connections, - replicated ? primary_data_node : 0, + replicated ? primary_node_num : 0, replicated ? COMBINE_TYPE_SAME : COMBINE_TYPE_SUM); pfree(cstate->connections); pfree(cstate->query_buf.data); @@ -4039,11 +4055,19 @@ DoInsertSelectCopy(EState *estate, TupleTableSlot *slot) void EndInsertSelectCopy(void) { + int primary_node_id = IsXCM ? PGXCMirror_GetPrimaryDatanodeID() : primary_data_node; + int primary_node_num; + + if (IsPGXCMirrorMode) + primary_node_num = PGXCMirror_GetPrimaryMirrorNum(primary_node_id); + else + primary_node_num = primary_node_id; + Assert(IS_PGXC_COORDINATOR); DataNodeCopyFinish( insertstate->connections, - primary_data_node, + primary_node_num, COMBINE_TYPE_NONE); pfree(insertstate->connections); MemoryContextDelete(insertstate->rowcontext); diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index 6729b15..dfac11f 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -86,6 +86,7 @@ List * GetAnyDataNode(void) { List *destList = NULL; + int preferred_datanode = IsXCM ? PGXCMirror_GetPreferredNodeID(true) : PreferredDataNode; /* * Try and pick the preferred node. * In Mirror mode, pick up the preferred Mirror. @@ -95,10 +96,10 @@ GetAnyDataNode(void) * PGXCTODO: When tables defined in a subset of nodes is supported, * we need also to check if those tables can use it. */ - if (PreferredDataNode != 0 && !IsPGXCMirrorMode) - return destList = lappend_int(NULL, PreferredDataNode); - else if (PreferredDataNode != 0 && IsPGXCMirrorMode) - return destList = PGXCMirror_GetSubsetMirrors(PreferredDataNode, false); + if (preferred_datanode != 0 && !IsPGXCMirrorMode) + return destList = lappend_int(NULL, preferred_datanode); + else if (preferred_datanode != 0 && IsPGXCMirrorMode) + return destList = PGXCMirror_GetSubsetMirrors(preferred_datanode, false); return destList = lappend_int(NULL, 1); } @@ -457,12 +458,13 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, case LOCATOR_TYPE_REPLICATED: { int primary_node_num; + int primary_datanode_id = IsXCM ? PGXCMirror_GetPrimaryDatanodeID() : primary_data_node; /* In Mirror mode, primary mirror is selected in subset of datanodes */ if (IsPGXCMirrorMode) - primary_node_num = PGXCMirror_GetPrimaryMirrorNum(primary_data_node); + primary_node_num = PGXCMirror_GetPrimaryMirrorNum(primary_datanode_id); else - primary_node_num = primary_data_node; + primary_node_num = primary_datanode_id; if (accessType == RELATION_ACCESS_UPDATE || accessType == RELATION_ACCESS_INSERT) @@ -483,6 +485,7 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, } else { + int preferred_datanode = IsXCM ? PGXCMirror_GetPreferredNodeID(true) : PreferredDataNode; if (accessType == RELATION_ACCESS_READ_FOR_UPDATE && primary_data_node) { @@ -493,7 +496,7 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, */ exec_nodes->nodelist = lappend_int(NULL, primary_node_num); } - else if (PreferredDataNode != 0) + else if (preferred_datanode != 0) { /* * Try and pick the preferred node. @@ -502,12 +505,12 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, */ foreach(stepItem, rel_loc_info->nodeList) { - if (lfirst_int(stepItem) == PreferredDataNode) + if (lfirst_int(stepItem) == preferred_datanode) { if (IsPGXCMirrorMode) - exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(PreferredDataNode, false); + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(preferred_datanode, false); else - exec_nodes->nodelist = lappend_int(NULL, PreferredDataNode); + exec_nodes->nodelist = lappend_int(NULL, preferred_datanode); break; } diff --git a/src/backend/pgxc/pool/mirror.c b/src/backend/pgxc/pool/mirror.c index 1700695..fd51b4d 100644 --- a/src/backend/pgxc/pool/mirror.c +++ b/src/backend/pgxc/pool/mirror.c @@ -81,33 +81,86 @@ static int MirrorTotalCount = 0; /* PGXCMirror_GetXXX and PGXCMirror_SetXXX APIs can be invocated only at node startup */ /* - * PGXCMirror_SetNodePrimary + * PGXCMirror_GetPrimaryDatanodeID * - * Set PGXCNodePrimary parameters for Replicated Handling - * This is called at node startup + * Get Primary Datanode number to be used instead of GUC parameter */ -void -PGXCMirror_SetNodePrimary(void) +int +PGXCMirror_GetPrimaryDatanodeID(void) { + int primary_id = 0; /* * Get the primary node parameters from Fault Sync module * In other cases GUC params have all the necessary data. */ if (IsXCM) { - int datanode_id, mirror_id; + if (get_xcm_primary_datanode(&primary_id) < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not get primary node ID"))); + } + + return primary_id; +} + + +/* + * PGXCMirror_GetPrimaryMirrorID + * + * Get Primary Mirror ID to be used instead of GUC parameter + * To be used with results of PGXCMirror_GetPrimaryDatanodeID + * when calculating global ID for node lists. + */ +int +PGXCMirror_GetPrimaryMirrorID(void) +{ + int datanode_id = PGXCMirror_GetPrimaryDatanodeID(); + int mirror_id = 0; + + if (IsXCM) + { + if (get_xcm_primary_mirror(datanode_id, &mirror_id) < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not get primary mirror ID"))); + + Assert(datanode_id <= NumDataNodes && datanode_id > 0); + Assert(mirror_id < PGXCMirror_GetMirrorCount(datanode_id)); + } + + return mirror_id; +} + +/* + * PGXCMirror_GetPreferredNodeID + * + * Get preferred Datanode ID or preferred mirror ID + */ +int +PGXCMirror_GetPreferredNodeID(bool is_datanode) +{ + int datanode_id = 0; + int mirror_id = 0; + if (IsXCM) + { if (get_xcm_preferred_mirror(PGXCNodeId, &datanode_id, &mirror_id) < 0) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Fault Sync ERROR: could not get Primary node data"))); + errmsg("Fault Sync ERROR: could not get primary mirror ID"))); - /* Assign obtained values */ - PreferredDataNode = datanode_id; - PreferredMirrorId = mirror_id; + Assert(datanode_id <= NumDataNodes && datanode_id > 0); + Assert(mirror_id < PGXCMirror_GetMirrorCount(datanode_id)); } + + if (is_datanode) + return datanode_id; + else + return mirror_id; } + /* * PGXCMirror_SetMirrorCountList * @@ -316,23 +369,28 @@ PGXCMirror_GetSubsetMirrors(int pgxc_node_id, bool is_write) * If the preferred node is not online, choose another mirror randomly */ bool done = false; + int preferred_mirror_id = PreferredMirrorId; + + if (IsXCM) + preferred_mirror_id = PGXCMirror_GetPreferredNodeID(false); + + Assert(preferred_mirror_id <= PGXCNodeMirrorCount[pgxc_node_id - 1]); - Assert(PreferredMirrorId <= PGXCNodeMirrorCount[pgxc_node_id - 1]); /* Check also that preferred node is online */ if (PreferredMirrorId != 0) { if (IsXCM && !XCM_IS_FAULT(PGXCMirror_CheckStatus(REMOTE_CONN_DATANODE, - pgxc_node_id, PreferredMirrorId))) + pgxc_node_id, preferred_mirror_id))) { done = true; - list_mirrors = lappend_int(list_mirrors, sum + PreferredMirrorId); + list_mirrors = lappend_int(list_mirrors, sum + preferred_mirror_id); } else if (!IsXCM) { /* In mirror mode without XCM active, we suppose the mirror is online */ done = true; - list_mirrors = lappend_int(list_mirrors, sum + PreferredMirrorId); + list_mirrors = lappend_int(list_mirrors, sum + preferred_mirror_id); } } @@ -363,12 +421,13 @@ PGXCMirror_GetPrimaryMirrorNum(int primary_data_node) { int count; int sum = 0; + int primary_mirror_id = IsXCM ? PGXCMirror_GetPrimaryMirrorID() : PrimaryMirrorId; /* Determine number of Primary Node in the Global Array of Datanodes */ for (count = 0; count < primary_data_node - 1; count++) sum += PGXCNodeMirrorCount[count]; - return sum + PrimaryMirrorId; + return sum + primary_mirror_id; } /* @@ -1001,34 +1060,4 @@ PGXCMirror_GetConnPoint(RemoteConnTypes conn_type, return conn_pts; } -/* - * PGXCMirror_GetPreferredNode - * - * Get preferred Datanode number for given Coordinator - * Preferred Datanode means one node, identified by the couple Datanode ID/Mirror ID - */ -int -PGXCMirror_GetPreferredNode(int pgxc_node_id, int *mirror_id) -{ - int datanode_id = 0; - Assert(IS_PGXC_COORDINATOR); - - if (get_xcm_preferred_mirror(pgxc_node_id, &datanode_id, mirror_id)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Fault Sync ERROR: could not get preferred node data"))); - - return datanode_id; -} -/* - * PGXCMirror_GetLocalPreferredNode - * - * Get Preferred Mirror for local Coordinator - */ -int -PGXCMirror_GetLocalPreferredNode(int *mirror_id) -{ - Assert(IS_PGXC_COORDINATOR); - return PGXCMirror_GetPreferredNode(PGXCNodeId, mirror_id); -} diff --git a/src/include/pgxc/mirror.h b/src/include/pgxc/mirror.h index 5428fb1..f0dbd83 100644 --- a/src/include/pgxc/mirror.h +++ b/src/include/pgxc/mirror.h @@ -32,8 +32,12 @@ extern char *MirrorCount; extern bool IsPrimaryMirror; extern int PGXCMirrorId; +/* Get Primary and preferred parameters from XCM */ +extern int PGXCMirror_GetPrimaryDatanodeID(void); +extern int PGXCMirror_GetPrimaryMirrorID(void); +extern int PGXCMirror_GetPreferredNodeID(bool is_datanode); + /* Set or Get parameters explicit to mirror mode */ -extern void PGXCMirror_SetNodePrimary(void); extern int PGXCMirror_SetMirrorCountList(void); extern int PGXCMirror_GetMirrorTotalCount(void); extern int PGXCMirror_GetMirrorCount(int pgxc_node_id); @@ -86,7 +90,4 @@ extern xcm_connPoint* PGXCMirror_GetConnPoint(RemoteConnTypes conn_type, int mirror_id, int *n_connections); -extern int PGXCMirror_GetPreferredNode(int pgxc_node_id, int *mirror_id); -extern int PGXCMirror_GetLocalPreferredNode(int *mirror_id); - #endif /* MIRROR_H */ ----------------------------------------------------------------------- Summary of changes: src/backend/commands/copy.c | 32 +++++++++- src/backend/pgxc/locator/locator.c | 23 ++++--- src/backend/pgxc/pool/mirror.c | 119 ++++++++++++++++++++++-------------- src/include/pgxc/mirror.h | 9 ++- 4 files changed, 120 insertions(+), 63 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-23 05:46:11
|
Project "Postgres-XC". The branch, ha_support has been updated via 101c354db37e5642023f80467fd19849b147d644 (commit) via b260079109da14ed446f18eb008fe6b873a322f0 (commit) from 2d3b90de7d334184f5be3e6ae5faeaa34b4ab7d6 (commit) - Log ----------------------------------------------------------------- commit 101c354db37e5642023f80467fd19849b147d644 Merge: 2d3b90d b260079 Author: Michael P <mic...@us...> Date: Wed Feb 23 14:52:22 2011 +0900 Merge branch 'master' into ha_support ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/pool/poolmgr.c | 4 ++-- src/backend/tcop/utility.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-23 04:37:19
|
Project "Postgres-XC". The branch, master has been updated via b260079109da14ed446f18eb008fe6b873a322f0 (commit) from 69838d0f61e1599983d5036ffcc30bcc5166af79 (commit) - Log ----------------------------------------------------------------- commit b260079109da14ed446f18eb008fe6b873a322f0 Author: Michael P <mic...@us...> Date: Wed Feb 23 13:41:59 2011 +0900 Fix for DROP DATABASE If a database pool is not found in pooler when cleaning connections, consider it as already clean. When dropping a database, connection clean is made only in the local node. diff --git a/src/backend/pgxc/pool/poolmgr.c b/src/backend/pgxc/pool/poolmgr.c index fb9ca00..fc8be2f 100644 --- a/src/backend/pgxc/pool/poolmgr.c +++ b/src/backend/pgxc/pool/poolmgr.c @@ -1920,9 +1920,9 @@ clean_connection(List *dn_discard, List *co_discard, const char *database) /* Find correct Database pool to clean */ databasePool = find_database_pool(database); - /* Database pool has not been found */ + /* Database pool has not been found, it is already clean */ if (!databasePool) - return CLEAN_CONNECTION_NOT_COMPLETED; + return CLEAN_CONNECTION_COMPLETED; /* * Clean each Pool Correctly diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index b8c7b0d..90c1f81 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1073,8 +1073,8 @@ ProcessUtility(Node *parsetree, DropdbStmt *stmt = (DropdbStmt *) parsetree; #ifdef PGXC - /* Clean connections before dropping a database */ - if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + /* Clean connections before dropping a database on local node */ + if (IS_PGXC_COORDINATOR) DropDBCleanConnection(stmt->dbname); #endif ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/pool/poolmgr.c | 4 ++-- src/backend/tcop/utility.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-22 08:00:42
|
Project "Postgres-XC". The branch, ha_support has been updated via 2d3b90de7d334184f5be3e6ae5faeaa34b4ab7d6 (commit) via 69838d0f61e1599983d5036ffcc30bcc5166af79 (commit) via f4e30a760b14921cca1b84ac249f1861685950fd (commit) via a1e1a0eb08a0094ecfc2eafb8d8b7f9e804dad07 (commit) from de1318f64694a7a668d445e26be1fcb2e3be3e65 (commit) - Log ----------------------------------------------------------------- commit 2d3b90de7d334184f5be3e6ae5faeaa34b4ab7d6 Merge: de1318f 69838d0 Author: Michael P <mic...@us...> Date: Tue Feb 22 17:07:03 2011 +0900 Merge branch 'master' into ha_support ----------------------------------------------------------------------- Summary of changes: src/backend/rewrite/rewriteHandler.c | 43 ++++++++++++--------------------- src/backend/tcop/utility.c | 6 ++++ src/gtm/main/gtm_txn.c | 9 +----- 3 files changed, 24 insertions(+), 34 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-22 07:03:30
|
Project "Postgres-XC". The branch, master has been updated via 69838d0f61e1599983d5036ffcc30bcc5166af79 (commit) from f4e30a760b14921cca1b84ac249f1861685950fd (commit) - Log ----------------------------------------------------------------- commit 69838d0f61e1599983d5036ffcc30bcc5166af79 Author: Michael P <mic...@us...> Date: Tue Feb 22 16:03:39 2011 +0900 Fix for bug 3136262: PREPARE information not correctly got This caused errors for the following series of queries if repeated 2 times in a row: BEGIN; CREATE TABLE pxtest2 (a int); INSERT INTO pxtest2 VALUES (1); INSERT INTO pxtest2 VALUES (2); PREPARE TRANSACTION 'regress-one'; COMMIT PREPARED 'regress-one'; SELECT * FROM pxtest2; --- create table test(a int); BEGIN; DROP TABLE test; PREPARE TRANSACTION 'prepare_test'; COMMIT PREPARED 'prepare_test'; An \0 was forgotten when saving the gid in transaction handle of GTM, what had as consequence not to be able to find the handle for a given transaction ID the second time the same GID was used. The first time memory was used was OK because memeory is clean, but it gets dirty after finishing the first series. At second time, the dirty memory area let by first transaction is still there, causing uses when trying to find transaction handles on GTM for given GID diff --git a/src/gtm/main/gtm_txn.c b/src/gtm/main/gtm_txn.c index ab5a09b..c16570a 100644 --- a/src/gtm/main/gtm_txn.c +++ b/src/gtm/main/gtm_txn.c @@ -916,7 +916,7 @@ GTM_StartPreparedTransaction(GTM_TransactionHandle txn, if (gtm_txninfo->gti_gid == NULL) gtm_txninfo->gti_gid = (char *)MemoryContextAlloc(TopMostMemoryContext, GTM_MAX_GID_LEN); - memcpy(gtm_txninfo->gti_gid, gid, strlen(gid)); + memcpy(gtm_txninfo->gti_gid, gid, strlen(gid) + 1); GTM_RWLockRelease(>m_txninfo->gti_lock); @@ -1419,14 +1419,13 @@ ProcessGetGIDDataTransactionCommand(Port *myport, StringInfo message) pq_getmsgend(message); + /* Get the prepared Transaction for given GID */ prepared_txn = GTM_GIDToHandle(gid); if (prepared_txn == InvalidTransactionHandle) ereport(ERROR, (EINVAL, errmsg("Failed to get GID Data for prepared transaction"))); - oldContext = MemoryContextSwitchTo(TopMemoryContext); - /* First get the GXID for the new transaction */ txn = GTM_BeginTransaction(0, txn_isolation_level, txn_read_only); if (txn == InvalidTransactionHandle) @@ -1444,13 +1443,9 @@ ProcessGetGIDDataTransactionCommand(Port *myport, StringInfo message) * Make the internal process, get the prepared information from GID. */ if (GTM_GetGIDData(prepared_txn, &prepared_gxid, &datanodecnt, &datanodes, &coordcnt, &coordinators) != STATUS_OK) - { ereport(ERROR, (EINVAL, errmsg("Failed to get the information of prepared transaction"))); - } - - MemoryContextSwitchTo(oldContext); /* * Send a SUCCESS message back to the client ----------------------------------------------------------------------- Summary of changes: src/gtm/main/gtm_txn.c | 9 ++------- 1 files changed, 2 insertions(+), 7 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-22 05:42:36
|
Project "website". The branch, master has been updated via 6180d077309d20675daf8ba1101d173a18724baa (commit) from d71e7047cde9775855104225592e2a6b9cba7bc3 (commit) - Log ----------------------------------------------------------------- commit 6180d077309d20675daf8ba1101d173a18724baa Author: Michael P <mic...@us...> Date: Tue Feb 22 14:48:42 2011 +0900 Couple of typos corrected Corrections done by Devrim diff --git a/events.html b/events.html index 9a14813..0ded082 100755 --- a/events.html +++ b/events.html @@ -13,7 +13,7 @@ --> <h2 class="plain">Events</h2> <p class="plain"> -A lot of opportunities to meet the Core developpers!! +A lot of opportunities to meet the Core developers!! <ul> <li>PgCon 2011 in May 2011</li> </ul> diff --git a/getting_started.html b/getting_started.html index e3ea444..9e0b4a5 100755 --- a/getting_started.html +++ b/getting_started.html @@ -40,7 +40,7 @@ memory even in this case. <p> You may be able to choose any popular Linux operating system for Intel 64bit architecture. -The developemnt team uses CentOS 5.4 for test and evaluation. +The developemnt team uses CentOS 5.4 for testing and evaluation. </p> <p> A minimum hardware configuration will be useful to test Postgres-XC features. diff --git a/maincontents.html b/maincontents.html index 267f8fc..dd74dc3 100755 --- a/maincontents.html +++ b/maincontents.html @@ -96,7 +96,7 @@ memory even in this case. You may be able to choose any popular Linux operating system for Intel 64bit architecture. For your information, -the developemnt team uses CentOS 5.4 for test and evaluation. +the developemnt team uses CentOS 5.4 for testing and evaluation. </p> <p> Minimum hardware configuration will be useful to test Postgres-XC feature. diff --git a/members.html b/members.html index 1300a32..88bdf2a 100755 --- a/members.html +++ b/members.html @@ -52,7 +52,7 @@ He is also helping in source code review and PostgreSQL internals.<br /> <p class="inner"> He has worked on Data Node, connection pooling and cursor support<br /> -He is also GridSQL developer and is now developping aggregate +He is also GridSQL developer and is now developing aggregate functions and other cross-node operation. </p> @@ -75,7 +75,7 @@ Test, performance evaluation and analysis, related documents and utilities. <p class="inner"> Binary buiding for releases.<br /> -He is also developping binary packages of PostgreSQL. +He is also developing binary packages of PostgreSQL. </p> </body> diff --git a/roadmap.html b/roadmap.html index f01ff86..56bdca1 100755 --- a/roadmap.html +++ b/roadmap.html @@ -96,7 +96,7 @@ Version 0.9.4 (March, 2011) <li>Merge with PostgreSQL 9.0.3</li> <li>HA Capability <ul> - <li>Datanode mirroring functionnality: multiple Datanodes seen as only one node,<br /> + <li>Datanode mirroring functionality: multiple Datanodes seen as only one node,<br /> SQL-based replication</li> <li>XC Cluster Manager: manage in an allocated shared memory space node information,<br /> used to report node failures</li> ----------------------------------------------------------------------- Summary of changes: events.html | 2 +- getting_started.html | 2 +- maincontents.html | 2 +- members.html | 4 ++-- roadmap.html | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) hooks/post-receive -- website |
From: Koichi S. <koi...@us...> - 2011-02-22 05:16:42
|
Project "Postgres-XC". The branch, ha_support has been updated via de1318f64694a7a668d445e26be1fcb2e3be3e65 (commit) from 249f1124281a7a00532b2c86a423582563b0b085 (commit) - Log ----------------------------------------------------------------- commit de1318f64694a7a668d445e26be1fcb2e3be3e65 Author: Koichi Suzuki <ko...@in...> Date: Tue Feb 22 14:09:58 2011 +0900 This patch fixes the bug ID 3188179, xcm_initmember's SEGV issue when no argument is given. This commit also include change history of xcm_initmember.c and xcm_canstart.c. diff --git a/src/pgxc/xcm/xcm_canstart.c b/src/pgxc/xcm/xcm_canstart.c index 21913f5..07a23d4 100644 --- a/src/pgxc/xcm/xcm_canstart.c +++ b/src/pgxc/xcm/xcm_canstart.c @@ -25,6 +25,9 @@ * History * Feb. 2011: First Version * + * Feb.21, 2011: Fixed the check of argument count. It should be + * 1, not 2. + * * Author: Koichi Suzuki * * Copyright (c) 2011, Nippon Telegraph and Telephone Corporation diff --git a/src/pgxc/xcm/xcm_initmember.c b/src/pgxc/xcm/xcm_initmember.c index 840b08b..2e71542 100644 --- a/src/pgxc/xcm/xcm_initmember.c +++ b/src/pgxc/xcm/xcm_initmember.c @@ -41,6 +41,10 @@ * * Feb. 2011: First Version * + * Feb.21, 2011: Fixed the bug to encouter SEGV when invoked + * without arguments. Error in the timing to convert argv[1] to + * lower case. + * * Author: Koichi Suzuki * * Copyright (c) 2011, Nippon Telegraph and Telephone Corporation @@ -269,10 +273,10 @@ int main(int argc, char *argv[]) /* * Version and usage. */ - strncpy(argv_1, argv[1], MAXPATH-1); - conv_tolower(argv_1); if (argc == 2) { + strncpy(argv_1, argv[1], MAXPATH-1); + conv_tolower(argv_1); if (strcmp(argv_1, "--version") == 0) version(0); ----------------------------------------------------------------------- Summary of changes: src/pgxc/xcm/xcm_canstart.c | 3 +++ src/pgxc/xcm/xcm_initmember.c | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-22 04:24:20
|
Project "Postgres-XC". The branch, master has been updated via f4e30a760b14921cca1b84ac249f1861685950fd (commit) from a1e1a0eb08a0094ecfc2eafb8d8b7f9e804dad07 (commit) - Log ----------------------------------------------------------------- commit f4e30a760b14921cca1b84ac249f1861685950fd Author: Michael P <mic...@us...> Date: Tue Feb 22 13:30:14 2011 +0900 Block SAVEPOINT because of non-support This functionnality is not yet supported. diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index bccea10..b8c7b0d 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -433,6 +433,12 @@ ProcessUtility(Node *parsetree, ListCell *cell; char *name = NULL; +#ifdef PGXC + ereport(ERROR, + (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), + (errmsg("SAVEPOINT is not yet supported.")))); +#endif + RequireTransactionChain(isTopLevel, "SAVEPOINT"); foreach(cell, stmt->options) ----------------------------------------------------------------------- Summary of changes: src/backend/tcop/utility.c | 6 ++++++ 1 files changed, 6 insertions(+), 0 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-22 02:24:09
|
Project "Postgres-XC". The branch, master has been updated via a1e1a0eb08a0094ecfc2eafb8d8b7f9e804dad07 (commit) from 9c2e68462db0e41ddacb4a0c4ea935b54f6dccc1 (commit) - Log ----------------------------------------------------------------- commit a1e1a0eb08a0094ecfc2eafb8d8b7f9e804dad07 Author: Michael P <mic...@us...> Date: Tue Feb 22 11:29:55 2011 +0900 Fix for bug 3188711: Fire rules only on Coordinator Rules are just launched on Coordinator. Coordinator is then in charge on locating the correct nodes for queries in rules diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 62875f8..255be72 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1725,8 +1725,13 @@ RewriteQuery(Query *parsetree, List *rewrite_events) if (locks != NIL) { - List *product_queries; +#ifdef PGXC + List *product_queries = NIL; + if (IS_PGXC_COORDINATOR) +#else + List *product_queries; +#endif product_queries = fireRules(parsetree, result_relation, event, @@ -1835,16 +1840,16 @@ RewriteQuery(Query *parsetree, List *rewrite_events) result_relation, parsetree); if (locks != NIL) { - List *product_queries; + List *product_queries = NIL; - - product_queries = fireRules(query, - result_relation, - event, - locks, - &instead, - &returning, - &qual_product); + if (IS_PGXC_COORDINATOR) + product_queries = fireRules(query, + result_relation, + event, + locks, + &instead, + &returning, + &qual_product); qual_product_list = lappend(qual_product_list, qual_product); @@ -1931,23 +1936,7 @@ RewriteQuery(Query *parsetree, List *rewrite_events) heap_close(rt_entry_relation, NoLock); } } -#endif - - - - /* - * For INSERTs, the original query is done first; for UPDATE/DELETE, it is - * done last. This is needed because update and delete rule actions might - * not do anything if they are invoked after the update or delete is - * performed. The command counter increment between the query executions - * makes the deleted (and maybe the updated) tuples disappear so the scans - * for them in the rule actions cannot find them. - * - * If we found any unqualified INSTEAD, the original query is not done at - * all, in any form. Otherwise, we add the modified form if qualified - * INSTEADs were found, else the unmodified form. - */ -#ifdef PGXC + if (parsetree_list == NIL) { #endif ----------------------------------------------------------------------- Summary of changes: src/backend/rewrite/rewriteHandler.c | 43 ++++++++++++--------------------- 1 files changed, 16 insertions(+), 27 deletions(-) hooks/post-receive -- Postgres-XC |
From: Koichi S. <koi...@us...> - 2011-02-21 10:23:26
|
Project "Postgres-XC". The branch, ha_support has been updated via 249f1124281a7a00532b2c86a423582563b0b085 (commit) via ea23e2fc2e2b325183785605180b9164fe32fcec (commit) from 124b4427aa8532f470d47054baedbaea1867e816 (commit) - Log ----------------------------------------------------------------- commit 249f1124281a7a00532b2c86a423582563b0b085 Merge: ea23e2f 124b442 Author: Koichi Suzuki <ko...@in...> Date: Mon Feb 21 19:23:42 2011 +0900 Merge branch 'ha_support' of ssh://postgres-xc.git.sourceforge.net/gitroot/postgres-xc/postgres-xc into ha_support commit ea23e2fc2e2b325183785605180b9164fe32fcec Author: Koichi Suzuki <ko...@in...> Date: Mon Feb 21 19:14:19 2011 +0900 This is to fix xcm_canstart bug. With no argument, it should just exit with exit code 0 or 1 to show if you can start the entire Postgres-XC cluster safely. In this case, it printed usage. K.Suzuki, Feb.21, 2011 diff --git a/src/pgxc/xcm/xcm_canstart.c b/src/pgxc/xcm/xcm_canstart.c index d52efc7..21913f5 100644 --- a/src/pgxc/xcm/xcm_canstart.c +++ b/src/pgxc/xcm/xcm_canstart.c @@ -79,7 +79,7 @@ int main(int ac, char *av[]) if (strcmp(av[1], "--help") == 0 || strcmp(av[1], "-h") == 0) usage(0, 0); } - if (ac != 2) + if (ac != 1) usage(__LINE__, 1); /* Check argument. --- No argument here :-) */ ----------------------------------------------------------------------- Summary of changes: src/pgxc/xcm/xcm_canstart.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-21 08:03:09
|
Project "Postgres-XC". The branch, ha_support has been updated via 124b4427aa8532f470d47054baedbaea1867e816 (commit) via 9c2e68462db0e41ddacb4a0c4ea935b54f6dccc1 (commit) from 91f0a7733b532bf234dbb3e3d66c5ddc48a0f16e (commit) - Log ----------------------------------------------------------------- commit 124b4427aa8532f470d47054baedbaea1867e816 Merge: 91f0a77 9c2e684 Author: Michael P <mic...@us...> Date: Mon Feb 21 17:09:42 2011 +0900 Merge branch 'master' into ha_support ----------------------------------------------------------------------- Summary of changes: src/backend/utils/adt/ruleutils.c | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-21 08:01:01
|
Project "Postgres-XC". The branch, master has been updated via 9c2e68462db0e41ddacb4a0c4ea935b54f6dccc1 (commit) from 73e92494f122343eac5895b1070bbe13ff6aee99 (commit) - Log ----------------------------------------------------------------- commit 9c2e68462db0e41ddacb4a0c4ea935b54f6dccc1 Author: Michael P <mic...@us...> Date: Mon Feb 21 16:53:16 2011 +0900 Fix for bug 3170708: Default values support for MULTI INSERT XC was complaining about DEFAULT values when doing a multiple INSERT. Ex: create table rep (a int, b int, c int default 444) distribute by round robin; insert into rep (a, b) values (44,55), (3,4); Patch written by Benny Wang diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 1551b9c..2643ab0 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -3262,6 +3262,18 @@ get_insert_query_def(Query *query, deparse_context *context) if (select_rte && values_rte) elog(ERROR, "both subquery and values RTEs in INSERT"); +#ifdef PGXC + /* + * If it's an INSERT ... SELECT or VALUES (...), (...), ... + * sql_statement is rewritten and assigned in RewriteQuery. + * Just return it here. + */ + if (IS_PGXC_COORDINATOR && !IsConnFromCoord() && values_rte != NULL) + { + appendStringInfo(buf, "%s", query->sql_statement); + return; + } +#endif /* * Start the query with INSERT INTO relname */ ----------------------------------------------------------------------- Summary of changes: src/backend/utils/adt/ruleutils.c | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-21 06:53:32
|
Project "Postgres-XC". The branch, ha_support has been updated via 91f0a7733b532bf234dbb3e3d66c5ddc48a0f16e (commit) via 72266bff1bb04ed730dc4404f4d419e341602adf (commit) via 73e92494f122343eac5895b1070bbe13ff6aee99 (commit) from d73ae5182149b08e0728edb96eee339e0c0498b7 (commit) - Log ----------------------------------------------------------------- commit 91f0a7733b532bf234dbb3e3d66c5ddc48a0f16e Merge: 72266bf 73e9249 Author: Michael P <mic...@us...> Date: Mon Feb 21 15:59:35 2011 +0900 Merge branch 'master' into ha_support commit 72266bff1bb04ed730dc4404f4d419e341602adf Author: Michael P <mic...@us...> Date: Mon Feb 21 15:57:29 2011 +0900 CLEAN CONNECTION support for Mirror mode Following syntax is now taken into account: CLEAN CONNECTION TO NODE 1/2,4/6,8/2 FOR DATABASE template1; This cleans connections for individual mirrors. It is also possible to combine datanode and individual mirrors: CLEAN CONNECTION TO NODE 1/2,3,8/2 FOR DATABASE template1; diff --git a/src/backend/pgxc/pool/poolutils.c b/src/backend/pgxc/pool/poolutils.c index cccc9de..c271287 100644 --- a/src/backend/pgxc/pool/poolutils.c +++ b/src/backend/pgxc/pool/poolutils.c @@ -17,7 +17,8 @@ #include "miscadmin.h" #include "libpq/pqsignal.h" -#include "pgxc/pgxc.h" +#include "nodes/pg_list.h" +#include "pgxc/mirror.h" #include "pgxc/poolmgr.h" #include "pgxc/locator.h" #include "pgxc/poolutils.h" @@ -37,19 +38,25 @@ * * SQL query synopsis is as follows: * CLEAN CONNECTION TO - * (COORDINATOR num | DATANODE num | ALL {FORCE}) + * (COORDINATOR num | NODE num | ALL {FORCE}) * FOR DATABASE dbname * * Connection cleaning has to be made on a chosen database called dbname. * * It is also possible to clean connections of several Coordinators or Datanodes - * Ex: CLEAN CONNECTION TO DATANODE 1,5,7 FOR DATABASE template1 + * Ex: CLEAN CONNECTION TO NODE 1,5,7 FOR DATABASE template1 * CLEAN CONNECTION TO COORDINATOR 2,4,6 FOR DATABASE template1 * * Or even to all Coordinators/Datanodes at the same time - * Ex: CLEAN CONNECTION TO DATANODE * FOR DATABASE template1 + * Ex: CLEAN CONNECTION TO NODE * FOR DATABASE template1 * CLEAN CONNECTION TO COORDINATOR * FOR DATABASE template1 * + * In Mirror mode, the following syntax can also be used + * It is possible to choose datanodes (effective on all mirrors of this datanode) + * and individual mirrors. + * Ex: CLEAN CONNECTION TO NODE 1/2 FOR DATABASE template1 + * CLEAN CONNECTION TO NODE 1/3,3,4/6 FOR DATABASE template1 + * * When FORCE is used, all the transactions using pooler connections are aborted, * and pooler connections are cleaned up. * Ex: CLEAN CONNECTION TO ALL FORCE FOR DATABASE template1; @@ -144,14 +151,57 @@ CleanConnection(CleanConnStmt *stmt) foreach(nodelist_item, stmt->nodes) { - int node_num = intVal(lfirst(nodelist_item)); - stmt_nodes = lappend_int(stmt_nodes, node_num); - - if (node_num > max_node_number || - node_num < 1) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Node Number %d is incorrect", node_num))); + int nodenum; + if (IsA(lfirst(nodelist_item), PGXCMirror)) + { + PGXCMirror *node = lfirst(nodelist_item); + + if (!IsPGXCMirrorMode) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Cannot use Mirror ID format in non-mirror mode"))); + if (node->data_node_id > NumDataNodes) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Node Number %d is incorrect", node->data_node_id))); + + if (node->mirror_id > PGXCMirror_GetMirrorCount(node->data_node_id)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Mirror Number %d is incorrect", node->mirror_id))); + + if (PGXCMirror_IsMirrorOffline(node->data_node_id, node->mirror_id)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Datanode %d Mirror %d is offline", + node->data_node_id, node->mirror_id))); + + nodenum = PGXCMirror_GetMirrorGlobalID(node->data_node_id, node->mirror_id); + if (nodenum > PGXCMirror_GetMirrorTotalCount() || + nodenum < 1) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Node Number %d is incorrect", nodenum))); + + stmt_nodes = lappend_int(stmt_nodes, nodenum); + } + else + { + nodenum = intVal(lfirst(nodelist_item)); + + /* In this case select all the mirrors associated to this Datanode */ + if (!is_coord && IsPGXCMirrorMode) + stmt_nodes = list_concat(stmt_nodes, + PGXCMirror_GetSubsetMirrors(nodenum, true)); + else + stmt_nodes = lappend_int(stmt_nodes, nodenum); + + if (nodenum > max_node_number || + nodenum < 1) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Node Number %d is incorrect", nodenum))); + } } /* Build lists to be sent to Pooler Manager */ ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/pool/poolutils.c | 99 ++++++++++++++++++++++++++++++++---- src/backend/tcop/utility.c | 6 ++ src/include/pgxc/poolutils.h | 1 + 3 files changed, 95 insertions(+), 11 deletions(-) hooks/post-receive -- Postgres-XC |
From: Koichi S. <koi...@gm...> - 2011-02-21 06:26:40
|
Hi, XCM module is added to Postgres-XC (ha_support branch so far). I added the following file in sourceforge development web-site. XCM_Module_Document_20110221.pdf You can download this from the page https://sourceforge.net/projects/postgres-xc/files/misc/ Misc is created to store temporary materials intended to be a part of further releases. Good luck. ---------- Koichi Suzuki |
From: Michael P. <mic...@us...> - 2011-02-21 04:54:17
|
Project "Postgres-XC". The branch, master has been updated via 73e92494f122343eac5895b1070bbe13ff6aee99 (commit) from 3fdc303e50d599104c551dc62aaabb369086dd22 (commit) - Log ----------------------------------------------------------------- commit 73e92494f122343eac5895b1070bbe13ff6aee99 Author: Michael P <mic...@us...> Date: Mon Feb 21 13:29:22 2011 +0900 Fix for bug 3170713: DROP DATABASE wihout cleaning connections This adds a call to clean pooler connections for the given database before dropping the given DB on each backend. I added also an owner check to avoid a user to clean connections if he has no rights on the given db. diff --git a/src/backend/pgxc/pool/poolutils.c b/src/backend/pgxc/pool/poolutils.c index cccc9de..eeeba8a 100644 --- a/src/backend/pgxc/pool/poolutils.c +++ b/src/backend/pgxc/pool/poolutils.c @@ -23,6 +23,7 @@ #include "pgxc/poolutils.h" #include "access/gtm.h" #include "commands/dbcommands.h" +#include "utils/acl.h" #include "nodes/parsenodes.h" @@ -185,3 +186,29 @@ CleanConnection(CleanConnStmt *stmt) if (dn_list) list_free(dn_list); } + +/* + * DropDBCleanConnection + * + * Clean Connection for given database before dropping it + * FORCE is not used here + */ +void +DropDBCleanConnection(char *dbname) +{ + List *co_list = GetAllCoordNodes(); + List *dn_list = GetAllDataNodes(); + + /* Check permissions for this database */ + if (!pg_database_ownercheck(get_database_oid(dbname), GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_DATABASE, + dbname); + + PoolManagerCleanConnection(dn_list, co_list, dbname); + + /* Clean up memory */ + if (co_list) + list_free(co_list); + if (dn_list) + list_free(dn_list); +} diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 2a13bad..bccea10 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1066,6 +1066,12 @@ ProcessUtility(Node *parsetree, { DropdbStmt *stmt = (DropdbStmt *) parsetree; +#ifdef PGXC + /* Clean connections before dropping a database */ + if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) + DropDBCleanConnection(stmt->dbname); +#endif + PreventTransactionChain(isTopLevel, "DROP DATABASE"); dropdb(stmt->dbname, stmt->missing_ok); } diff --git a/src/include/pgxc/poolutils.h b/src/include/pgxc/poolutils.h index 5b87494..fff8b9e 100644 --- a/src/include/pgxc/poolutils.h +++ b/src/include/pgxc/poolutils.h @@ -27,4 +27,5 @@ #define CLEAN_CONNECTION_EOF -1 void CleanConnection(CleanConnStmt *stmt); +void DropDBCleanConnection(char *dbname); #endif ----------------------------------------------------------------------- Summary of changes: src/backend/pgxc/pool/poolutils.c | 27 +++++++++++++++++++++++++++ src/backend/tcop/utility.c | 6 ++++++ src/include/pgxc/poolutils.h | 1 + 3 files changed, 34 insertions(+), 0 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-21 02:18:01
|
Project "website". The branch, master has been updated via d71e7047cde9775855104225592e2a6b9cba7bc3 (commit) from f990bdd1895bd99641d4492de3862b402b638279 (commit) - Log ----------------------------------------------------------------- commit d71e7047cde9775855104225592e2a6b9cba7bc3 Author: Michael P <mic...@us...> Date: Mon Feb 21 11:24:14 2011 +0900 Change roadmap with clean unordered lists diff --git a/roadmap.html b/roadmap.html index feec748..f01ff86 100755 --- a/roadmap.html +++ b/roadmap.html @@ -91,19 +91,22 @@ Version 0.9.4 (March, 2011) </h4> <p class="inner"> -License modified from LGPL to BSD<br /> -Merge with PostgreSQL 9.0.3<br /> -HA Capability<br /> <ul> -<li>Datanode mirroring functionnality: multiple Datanodes seen as only one node,<br /> - SQL-based replication</li> -<li>XC Cluster Manager: manage in an allocated shared memory space node information,<br /> - used to report node failures</li> -<li>XC watcher: Central part managing the XC Cluster.<br /> - It contains commands to start/stop/restart the cluster.<br /> - It is also in charge of gathering and reporting to XCM entities nodes that failed.</li> +<li>License modified from LGPL to BSD</li> +<li>Merge with PostgreSQL 9.0.3</li> +<li>HA Capability + <ul> + <li>Datanode mirroring functionnality: multiple Datanodes seen as only one node,<br /> + SQL-based replication</li> + <li>XC Cluster Manager: manage in an allocated shared memory space node information,<br /> + used to report node failures</li> + <li>XC watcher: Central part managing the XC Cluster.<br /> + It contains commands to start/stop/restart the cluster.<br /> + It is also in charge of gathering and reporting to XCM entities nodes that failed.</li> + </ul> +</li> +<li>Bug corrections</li> </ul> -Bug corrections<br /> </p> <!-- === Beyond Version 1.0 === ---> ----------------------------------------------------------------------- Summary of changes: roadmap.html | 25 ++++++++++++++----------- 1 files changed, 14 insertions(+), 11 deletions(-) hooks/post-receive -- website |
From: Michael P. <mic...@us...> - 2011-02-21 02:11:08
|
Project "website". The branch, master has been updated via f990bdd1895bd99641d4492de3862b402b638279 (commit) via de189e7f1247b0032ebe33b0af3503b7dde248b9 (commit) from ed16e38e3decb21b3cdc0a51b9a26ccb1d4c9cc6 (commit) - Log ----------------------------------------------------------------- commit f990bdd1895bd99641d4492de3862b402b638279 Author: Michael P <mic...@us...> Date: Mon Feb 21 11:17:05 2011 +0900 Update for release 0.9.4 and other stuff diff --git a/events.html b/events.html index cc3a412..9a14813 100755 --- a/events.html +++ b/events.html @@ -15,9 +15,7 @@ <p class="plain"> A lot of opportunities to meet the Core developpers!! <ul> -<li><a href="http://2010.pgday.eu/" target="_blank">PGDay-EU</a> in November 2010</li> -<li>PG-East in March 2011</li> -<li>PG-Con 2010 in May 2011</li> +<li>PgCon 2011 in May 2011</li> </ul> </p> diff --git a/members.html b/members.html index 4db05ef..1300a32 100755 --- a/members.html +++ b/members.html @@ -26,31 +26,32 @@ Postgres-XC development team <h4>Koichi Suzuki</h4> <p class="inner"> -Project leader and architect.<br> -His background includes object relational database engine (UniSQL) and<br> -PostgreSQL development. +Project leader and architect.<br /> +His background includes object relational database engine (UniSQL) and<br /> +PostgreSQL development.<br /> +He is also the developer of XCM (XC cluster manager). </p> <h4>Mason Sharp</h4> <p class="inner"> -Architect and development leader.<br> -Coordinator developer.<br> -He is also the main architect of GridSQL database cluster.<br> +Architect and development leader.<br /> +As a Coordinator developer, he has worked on planner and locator extensions.<br /> +He is also the main architect of GridSQL database cluster.<br /> </p> <h4>Pavan Deolasee</h4> <p class="inner"> -Global Transaction Manager developer.<br> -He is well known as HOT developer in PostgreSQL.<br> -He is also helping in source code review and PostgreSQL internals.<br> +Global Transaction Manager developer.<br /> +He is well known as HOT developer in PostgreSQL.<br /> +He is also helping in source code review and PostgreSQL internals.<br /> </p> <h4>Andrei Martsinchyk</h4> <p class="inner"> -Data Node and connection pooling developer.<br> +He has worked on Data Node, connection pooling and cursor support<br /> He is also GridSQL developer and is now developping aggregate functions and other cross-node operation. </p> @@ -58,10 +59,10 @@ functions and other cross-node operation. <h4><a href="http://michaelpq.users.sourceforge.net/">Michael Paquier</a></h4> <p class="inner"> -Coordinator feature developer.<br> -Currently working on user-defined function, Sequence handling and Global values.<br> -He helped in modifying DBT-1 benchmark for Postgres-XC.<br> -He also contributed to enhance pgbench and 2PC. +Coordinator feature developer.<br /> +He has developed a couple of GTM features, pooler extension and DDL synchronization.<br /> +He helped in modifying DBT-1 benchmark for Postgres-XC.<br /> +He also contributed to enhance pgbench in PostgreSQL 9.0. </p> <h4>Takayuki Suto</h4> @@ -73,7 +74,8 @@ Test, performance evaluation and analysis, related documents and utilities. <h4>Devrim Gunduz</h4> <p class="inner"> -Binary buiding for releases.<br> He is also developping binary packages of PostgreSQL. +Binary buiding for releases.<br /> +He is also developping binary packages of PostgreSQL. </p> </body> diff --git a/roadmap.html b/roadmap.html index d2fbece..feec748 100755 --- a/roadmap.html +++ b/roadmap.html @@ -22,7 +22,7 @@ TODO: Should prototyping/application partner issue be included here? Postgres-XC Roadmap </h2> -<!-- ==== Current Limintation ==== --> +<!-- ==== Current Limitation ==== --> <h3> Current Limitations of Postgres-XC </h3> @@ -36,15 +36,17 @@ On the other hand, Postgres-XC needs to enhance support for general statements.< As of Version 0.9.3, Postgres-XC supports statements which can be executed on a single data node, or on multiple nodes for single and multi step.<br /> This new version adds support for:<br /> -- Cursor Support<br /> -- Basic cross-node operation<br /> -- Global timestamp<br /> -- DDL synchronisation<br /> -- Cluster-wide installer<br /> -- Cluster-wide operation utilities<br /> -- Driver support (ECPG, JDBC, PHP, etc.)<br /> -- Extended Query Protocol (for JDBC)<br /> -- Support of external 2PC from application<br /> +<ul> +<li>Cursor Support</li> +<li>Basic cross-node operation</li> +<li>Global timestamp</li> +<li>DDL synchronisation</li> +<li>Cluster-wide installer</li> +<li>Cluster-wide operation utilities</li> +<li>Driver support (ECPG, JDBC, PHP, etc.)</li> +<li>Extended Query Protocol (for JDBC)</li> +<li>Support of external 2PC from application</li> +</ul> However there are some limitations please refer to <a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.3/PG-XC_SQL_Limitations_v0_9_3.pdf/download" target="_blank"> SQL Limitations </a> document for further details. @@ -85,34 +87,48 @@ subqueries<br> <!-- ==== For Version 1.0 ==== --> <h4> -<!-- Version 1.0 (Late in December, 2010) --> -Version 1.0 (March, 2011) -</h4> +Version 0.9.4 (March, 2011) +</h4> -<p class="inner"> -Cross-node oepration optimization<br /> -More variety of statements.<br /> +<p class="inner"> +License modified from LGPL to BSD<br /> +Merge with PostgreSQL 9.0.3<br /> HA Capability<br /> -Trigger<br /> -</p> +<ul> +<li>Datanode mirroring functionnality: multiple Datanodes seen as only one node,<br /> + SQL-based replication</li> +<li>XC Cluster Manager: manage in an allocated shared memory space node information,<br /> + used to report node failures</li> +<li>XC watcher: Central part managing the XC Cluster.<br /> + It contains commands to start/stop/restart the cluster.<br /> + It is also in charge of gathering and reporting to XCM entities nodes that failed.</li> +</ul> +Bug corrections<br /> +</p> <!-- === Beyond Version 1.0 === ---> <h4> -Beyond Version 1.0 -</h4> +After Version 0.9.4, on the way to Version 1.0 +</h4> <p class="inner"> -PITR cluster-wide recovery<br /> -Multi-step Prepared statments<br /> -More variety of statements, such as <code>SELECT</code> in <code>INSERT</code><br /> -GTM-Standby<br /> -Savepoint<br /> -Session Parameters<br /> -Backward cursor<br /> -Batch, statement pushdown<br /> -Global constraints<br /> -Tuple relocation (distrubute key update)<br /> -Regression tests<br /> +<ul> +<li>PITR cluster-wide recovery</li> +<li>Trigger</li> +<li>Temporary (TEMP) tables</li> +<li>HOT standby full support</li> +<li>Cross-node operation optimization</li> +<li>Multi-step Prepared statments</li> +<li>More variety of statements</li> +<li>GTM-Standby</li> +<li>Savepoint</li> +<li>Session Parameters</li> +<li>Backward cursor</li> +<li>Batch, statement pushdown</li> +<li>Global constraints</li> +<li>Tuple relocation (distribute key update)</li> +<li>Regression tests</li> +</ul> </p> </body> commit de189e7f1247b0032ebe33b0af3503b7dde248b9 Author: Michael P <mic...@us...> Date: Mon Feb 21 10:48:06 2011 +0900 Deletion of a useless file diff --git a/download.html~ b/download.html~ deleted file mode 100755 index cfe22b6..0000000 --- a/download.html~ +++ /dev/null @@ -1,169 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" - "http://www.w3.org/TR/htmol4/strict.dtd"> -<!-- - ==== Postgres-XC Download Page ==== ---> -<html lang="en"> -<head> -<title>Postgres_XC Project Home</title> -<meta http-equiv="Content-Type" content="text/html"> -<meta name="author" content="koichi-szk"> -<meta name="description" content="Postgres-XC project home"> -<meta name="keywords" content="PostgreSQL cluster scalability multi-master"> - -<base href="."/> -<link rel="stylesheet" type="text/css" href="pgcx_1.css"> -</head> -<body bgcolor=#FFFFFF> - -<h2> -Downloading Postgres-XC -</h2> - -<h3> -List of Release Materials -</h3> - -<p> -The current release includes the following materials. -Please note that documentation is not included in the source material. -Please download documentation from -<a href="https://sourceforge.net/projects/postgres-xc/files/" target="_blank"> -the project download page. -</a> -</p> -<p> -Please also note tarball files do not include Postgres-XC documents. -</p> - -<!-- Documents of version 0.9.1 --> -<h4> -Version 0.9.1 -</h4> - -<p> -<ul> -<!-- tarball --> -<li> -<code>pgxc_v0.9.1.tar.gz</code>:  -Latest version of Postgres-XC available. -Please note that Postgres-XC documentation is not included in this file. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/pgxc_v0_9_1.tar.gz/download" target="_blank"> -(download) -</a> -</li> - -<!-- tarball (diff) --> -<li> -<code>PGXC_v0_9_1-PG_REL8_4_3.patch.gz</code>:  -The same material as above, but this file includes only the patch to apply -to the PostgreSQL 8.4.3 release source code. -It is useful if you would like to see just a difference between PostgreSQL -and Postgres-XC. -No Postgres-XC documentation is included in this file either. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/PGXC_v0_9_1-PG_REL8_4_3.patch.gz/download" target="_blank"> -(download) -</a> -</li> - -<!-- License --> -<li> -<code>COPYING</code>:  -License description. Postgres-XC is distributed under LGPL version 2.1 -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/COPYING/download" target="_blank"> -(download) -</a> -</li> - -<!-- Files --> -<li> -<code>FILES</code>:  -Description of files included in Postgres-XC 0.9.1 release. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/FILES/download" target="_blank"> -(download) -</a> -</li> - -<!-- Readme --> -<li> -<code>README</code>:  -Overview of the release. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/README/download" target="_blank"> -(download) -</a> -</li> - -<!-- Reference Manual --> -<li> -<code>PG-XC_ReferenceManual_v0_9_1.pdf</code>:  -Reference of Postgres-XC extension. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/PG-XC_ReferenceManual_v0_9_1.pdf/download" target="_blank"> -(download) -</a> -</li> - -<!-- pgbench Tutorial Manual --> -<li> -<code>PG-XC_pgbench_Tutorial_v0_9_1.pdf</code>:  -Step by step description how to build and configure pgbench to run with -Postgres-XC. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/PG-XC_pgbench_Tutorial_v0_9_1.pdf/download" target="_blank"> -(download) -</a> -</li> - -<!-- DBT-1 Tutorial Manual --> -<li> -<code>PG-XC_DBT1_Tutorial_v0_9_1.pdf</code>:  -Step by step description how to build and configure DBT-1 to run with -Postgres-XC. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/PG-XC_DBT1_Tutorial_v0_9_1.pdf/download" target="_blank"> -(download) -</a> -</li> - -<!-- Install Manual --> -<li> -<code>PG-XC_InstallManual_v0_9_1.pdf</code>:  -Step by step description how to build, install and configure Postgres-XC. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9.1/PG-XC_InstallManual_v0_9_1.pdf/download" target="_blank"> -(download) -</a> -</li> - -<!-- Architecture Document --> -<li> -<code>PG-XC_Architecture_v0_9.pdf</code>:  -Description of the outline of Postgres-XC internals. -⇒ -<a href="https://sourceforge.net/projects/postgres-xc/files/Version_0.9/PG-XC_Architecture.pdf/download" target="_blank"> -(download) -</a> -</li> -</ul> - -</p> - -<!--div align="left" style="font-size:95%;"--> -<h4> -<a href="benchmarks.html" target="main">Benchmarks</a> -  -</h4> - -<h4> -<a href="prev_vers/version0_9.html" target="main">Previous Versions</a> -  -</h4> -</div> - -</body> -</html> \ No newline at end of file ----------------------------------------------------------------------- Summary of changes: download.html~ | 169 -------------------------------------------------------- events.html | 4 +- members.html | 32 ++++++----- roadmap.html | 78 ++++++++++++++++---------- 4 files changed, 65 insertions(+), 218 deletions(-) delete mode 100755 download.html~ hooks/post-receive -- website |
From: Michael P. <mic...@us...> - 2011-02-18 07:06:32
|
Project "Postgres-XC". The branch, ha_support has been updated via d73ae5182149b08e0728edb96eee339e0c0498b7 (commit) from f42b489b49f366c78d816708d47b380f9db640d9 (commit) - Log ----------------------------------------------------------------- commit d73ae5182149b08e0728edb96eee339e0c0498b7 Author: Michael P <mic...@us...> Date: Fri Feb 18 15:49:39 2011 +0900 Mirroring and XCM (XC Cluster Manager) implementation This commit adds support for Datanode Mirroring. This permits to create multiple mirrors of a datanode. From the application, Mirrors of a Datanode are seen as a unique datanode. This is customizable with some new GUC parameters in the section DATANODE MIRRORING of postgresql.conf: - mirror_mode, switch for mirror mode off by default - mirror_count (Coordinator param), string to set the number of mirrors for each node It has a format 'num1,num2,...numN'. For Example '2,3' means that your cluster has 2 Datanodes. Datanode 1 has 2 mirrors and Datanode 2 has 3 mirrors The number of elements in this string has to be the same as num_data_nodes This is set at '1,1' by default - preferred_mirror_id (Coordinator param), an integer to set the mirror that is chosen for read operations on replicated tables (in correlation with preferred_data_node) - preferred_data_node (Coordinator param) has been modified as an integer, it permits to set a unique preferred node. Note: This parameter was disabled before. - is_primary_mirror (Datanode param), determines if the datanode is itself a primary or not Non-primary datanodes are not authorized to register on GTM From GTM, Mirrors are seen as a unique Datanode About XCM (XC cluster manager), roughly this allocates a portion of shared memory containing a lot of information about the cluster. A new configuration parameter is available called pgxc_ha.conf. A couple of new GUC parameters have been created for XCM activation in CLUSTER MANAGER section: - cluster_manager, switch to activate or not XCM for the node if activated, the node will try to take from shared memory the connection information from GTM. Pooler will do the same for connection parameters (host and port) for other datanodes. - pgxc_mirror_id (Datanode parameter), ID necessary to help the mirror to check if it is itself a primary or not. In case it is not a primary it does not register on GTM. XCM creates a couple of utilities: xcm_canstart - Test if the cluster can start xcm_freemember - Free the shared memory area xcm_getevent - Get status reports emitted from Postgres-XC nodes xcm_getflag - Get the status flag for a given component xcm_home - Sets the repository where shared memory information is saved xcm_initmember - Initialize the allocated shared memory based on information from pgxc_ha.conf xcm_printshm - Print to stdout the information in shared memory xcm_putevent - Inquire the status of the component and writes the status to stdout xcm_setflag - Change the status flag for a given component xcm_telhowto - Help for XCM Postgres-XC nodes report reports of failed Nodes (Coordinator or Datanodes) through the pooler when acquiring a connection fails. Reports about Failed GTM are also delivered. XCM has been written by Koichi Suzuki. Datanode Mirroring and XCM interface for XC have been written by me. diff --git a/src/Makefile b/src/Makefile index 7fbbcb3..cd6a6dc 100644 --- a/src/Makefile +++ b/src/Makefile @@ -19,6 +19,7 @@ all install installdirs uninstall distprep: # GTM should be built before backend because of dependancy $(MAKE) -C gtm $@ $(MAKE) -C interfaces $@ + $(MAKE) -C pgxc $@ $(MAKE) -C backend $@ $(MAKE) -C backend/utils/mb/conversion_procs $@ $(MAKE) -C backend/snowball $@ @@ -50,6 +51,7 @@ clean: $(MAKE) -C port $@ $(MAKE) -C timezone $@ $(MAKE) -C gtm $@ + $(MAKE) -C pgxc $@ $(MAKE) -C backend $@ $(MAKE) -C backend/snowball $@ $(MAKE) -C include $@ @@ -65,6 +67,7 @@ distclean maintainer-clean: $(MAKE) -C port $@ $(MAKE) -C timezone $@ $(MAKE) -C gtm $@ + $(MAKE) -C pgxc $@ $(MAKE) -C backend $@ $(MAKE) -C backend/snowball $@ $(MAKE) -C include $@ diff --git a/src/backend/access/transam/gtm.c b/src/backend/access/transam/gtm.c index a9bf1d6..5264708 100644 --- a/src/backend/access/transam/gtm.c +++ b/src/backend/access/transam/gtm.c @@ -17,7 +17,8 @@ #include "access/transam.h" #include "utils/elog.h" #include "miscadmin.h" -#include "pgxc/pgxc.h" +#include "nodes/pg_list.h" +#include "pgxc/mirror.h" /* Configuration variables */ char *GtmHost = "localhost"; @@ -42,6 +43,15 @@ InitGTM() { /* 256 bytes should be enough */ char conn_str[256]; + char *gtm_host = NULL; + int gtm_port; + + /* Get Connection parameters from Cluster manager if necessary */ + if (IsXCM) + { + gtm_port = PGXCMirror_GetLocalGTMPort(); + gtm_host = PGXCMirror_GetLocalGTMHost(); + } /* If this thread is postmaster itself, it contacts gtm identifying itself */ if (!IsUnderPostmaster) @@ -53,11 +63,20 @@ InitGTM() else if (IS_PGXC_DATANODE) remote_type = PGXC_NODE_DATANODE; - sprintf(conn_str, "host=%s port=%d pgxc_node_id=%d remote_type=%d postmaster=1", - GtmHost, GtmPort, PGXCNodeId, remote_type); + if (IsXCM) + sprintf(conn_str, "host=%s port=%d pgxc_node_id=%d remote_type=%d postmaster=1", + gtm_host, gtm_port, PGXCNodeId, remote_type); + else + sprintf(conn_str, "host=%s port=%d pgxc_node_id=%d remote_type=%d postmaster=1", + GtmHost, GtmPort, PGXCNodeId, remote_type); } else - sprintf(conn_str, "host=%s port=%d pgxc_node_id=%d", GtmHost, GtmPort, PGXCNodeId); + { + if (IsXCM) + sprintf(conn_str, "host=%s port=%d pgxc_node_id=%d", gtm_host, gtm_port, PGXCNodeId); + else + sprintf(conn_str, "host=%s port=%d pgxc_node_id=%d", GtmHost, GtmPort, PGXCNodeId); + } conn = PQconnectGTM(conn_str); if (GTMPQstatus(conn) != CONNECTION_OK) @@ -71,6 +90,10 @@ InitGTM() errno = save_errno; CloseGTM(); + + /* Report error to Cluster manager */ + if (IsXCM) + PGXCMirror_ReportGTMFail(); } } diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 40777bf..9cd45a1 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -40,11 +40,11 @@ #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #ifdef PGXC -#include "pgxc/pgxc.h" #include "access/gtm.h" #include "pgxc/planner.h" #include "tcop/tcopprot.h" #include "pgxc/poolmgr.h" +#include "pgxc/mirror.h" #endif #include "utils/rel.h" @@ -2075,8 +2075,16 @@ transformExecDirectStmt(ParseState *pstate, ExecDirectStmt *stmt) ListCell *nodeitem; RemoteQuery *step = makeNode(RemoteQuery); bool is_local = false; + bool nodenum_defined = false; List *raw_parsetree_list; ListCell *raw_parsetree_item; + int total_num_nodes; + int nodenum_real = 0; + + if (is_coordinator) + total_num_nodes = NumCoords; + else + total_num_nodes = NumDataNodes; if (list_length(nodelist) > 1) ereport(ERROR, @@ -2088,20 +2096,63 @@ transformExecDirectStmt(ParseState *pstate, ExecDirectStmt *stmt) (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use EXECUTE DIRECT"))); - /* Check if execute direct is local and if node number is correct*/ + /* + * Check if execute direct is local and if node number is correct + * For the moment EXECUTE DIRECT does not support multiple nodes. + */ foreach(nodeitem, nodelist) { - int nodenum = intVal(lfirst(nodeitem)); + if (IsA(lfirst(nodeitem), PGXCMirror)) + { + PGXCMirror *node = lfirst(nodeitem); + int nodenum; + if (!IsPGXCMirrorMode) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Cannot use Mirror ID format in non-mirror mode"))); + if (is_coordinator) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Cannot use Mirror ID format for Coordinator"))); - if (nodenum < 1 || - (!is_coordinator && nodenum > NumDataNodes) || - (is_coordinator && nodenum > NumCoords)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Node Number %d is incorrect", nodenum))); + if (node->data_node_id > NumDataNodes) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Node Number %d is incorrect", node->data_node_id))); + + if (node->mirror_id > PGXCMirror_GetMirrorCount(node->data_node_id)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Mirror Number %d is incorrect", node->mirror_id))); - if (nodenum == PGXCNodeId && is_coordinator) - is_local = true; + if (PGXCMirror_IsMirrorOffline(node->data_node_id, node->mirror_id)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Datanode %d Mirror %d is offline", + node->data_node_id, node->mirror_id))); + + nodenum = PGXCMirror_GetMirrorGlobalID(node->data_node_id, node->mirror_id); + if (nodenum > PGXCMirror_GetMirrorTotalCount() || + nodenum < 1) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Node Number %d is incorrect", nodenum))); + + nodenum_real = nodenum; + nodenum_defined = true; + } + else + { + int nodenum = intVal(lfirst(nodeitem)); + if (nodenum < 1 || + (!is_coordinator && nodenum > total_num_nodes) || + (is_coordinator && nodenum > total_num_nodes)) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Node Number %d is incorrect", nodenum))); + if (nodenum == PGXCNodeId && is_coordinator) + is_local = true; + } } /* Transform the query into a raw parse list */ @@ -2208,7 +2259,26 @@ transformExecDirectStmt(ParseState *pstate, ExecDirectStmt *stmt) foreach(nodeitem, nodelist) { int nodenum = intVal(lfirst(nodeitem)); - step->exec_nodes->nodelist = lappend_int(step->exec_nodes->nodelist, nodenum); + + /* + * In Mirror mode, + * If node number is a simple datanode number + * for a SELECT command pick up only one node in the subset + * for a UTILITY command pick up the whole subset + * If node number is mirror format dn_id/mirror_id, keep it as it is. + */ + if (step->exec_direct_type == EXEC_DIRECT_LOCAL_UTILITY && + IsPGXCMirrorMode && + !nodenum_defined) + step->exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(nodenum, true); + else if (step->exec_direct_type == EXEC_DIRECT_SELECT && + IsPGXCMirrorMode && + !nodenum_defined) + step->exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(nodenum, false); + else if (nodenum_defined) /* Node Number where to run has already been calculated */ + step->exec_nodes->nodelist = lappend_int(step->exec_nodes->nodelist, nodenum_real); + else + step->exec_nodes->nodelist = lappend_int(step->exec_nodes->nodelist, nodenum); } step->sql_statement = pstrdup(query); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index e1266ed..09186d3 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -183,6 +183,7 @@ static TypeName *TableFuncTypeName(List *columns); VariableSetStmt *vsetstmt; /* PGXC_BEGIN */ DistributeBy *distby; + PGXCMirror *pgxc_mirror; /* PGXC_END */ } @@ -425,6 +426,7 @@ static TypeName *TableFuncTypeName(List *columns); %type <ival> opt_frame_clause frame_extent frame_bound /* PGXC_BEGIN */ %type <distby> OptDistributeBy +%type <pgxc_mirror> mirror_elt /* PGXC_END */ @@ -485,7 +487,7 @@ static TypeName *TableFuncTypeName(List *columns); LEAST LEFT LEVEL LIKE LIMIT LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P LOGIN_P /* PGXC_BEGIN */ - MAPPING MATCH MAXVALUE MINUTE_P MINVALUE MODE MODULO MONTH_P MOVE + MAPPING MATCH MAXVALUE MINUTE_P MINVALUE MIRROR MODE MODULO MONTH_P MOVE /* PGXC_END */ NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NOCREATEDB NOCREATEROLE NOCREATEUSER NODE NOINHERIT NOLOGIN_P NONE NOSUPERUSER @@ -6541,7 +6543,9 @@ opt_analyze: /***************************************************************************** * * QUERY: - * EXECUTE DIRECT ON (COORDINATOR num, ... | NODE num, ...) query + * EXECUTE DIRECT ON + * (COORDINATOR num, ... | + * NODE (node_num | node_num/mirror_num), ... ) query * *****************************************************************************/ @@ -6549,7 +6553,6 @@ ExecDirectStmt: EXECUTE DIRECT ON COORDINATOR coord_list DirectStmt { ExecDirectStmt *n = makeNode(ExecDirectStmt); n->coordinator = TRUE; - n->nodes = NIL; n->nodes = $5; n->query = $6; $$ = (Node *)n; @@ -6569,7 +6572,7 @@ DirectStmt: ; coord_list: - Iconst { $$ = list_make1(makeInteger($1)); } + Iconst { $$ = list_make1(makeInteger($1)); } | coord_list ',' Iconst { $$ = lappend($1, makeInteger($3)); } | '*' { @@ -6581,8 +6584,10 @@ coord_list: ; data_node_list: - Iconst { $$ = list_make1(makeInteger($1)); } + Iconst { $$ = list_make1(makeInteger($1)); } + | mirror_elt { $$ = list_make1($1); } | data_node_list ',' Iconst { $$ = lappend($1, makeInteger($3)); } + | data_node_list ',' mirror_elt { $$ = lappend($1, $3); } | '*' { int i; @@ -6592,6 +6597,17 @@ data_node_list: } ; +mirror_elt: + Iconst '/' Iconst + { + PGXCMirror *n = makeNode(PGXCMirror); + + n->data_node_id = $1; + n->mirror_id = $3; + $$ = (Node *) n; + } + ; + /***************************************************************************** * * QUERY: @@ -10425,6 +10441,9 @@ unreserved_keyword: | MAXVALUE | MINUTE_P | MINVALUE +/* PGXC_BEGIN */ + | MIRROR +/* PGXC_END */ | MODE /* PGXC_BEGIN */ | MODULO diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index d1aef8b..6729b15 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -38,6 +38,7 @@ #include "utils/tqual.h" #include "pgxc/poolmgr.h" #include "pgxc/locator.h" +#include "pgxc/mirror.h" #include "catalog/pgxc_class.h" #include "catalog/namespace.h" @@ -55,12 +56,9 @@ bool locatorInited = false; /* GUC parameter */ -char *PreferredDataNodes = NULL; +int PreferredDataNode = 0; int primary_data_node = 1; -/* Preferred to use when reading from replicated tables */ -static List *globalPreferredNodes = NIL; - /* * init_mapping_table - initializes a mapping table * @@ -88,10 +86,19 @@ List * GetAnyDataNode(void) { List *destList = NULL; - - /* try and pick from the preferred list */ - if (globalPreferredNodes != NULL) - return destList = lappend_int(NULL, linitial_int(globalPreferredNodes)); + /* + * Try and pick the preferred node. + * In Mirror mode, pick up the preferred Mirror. + * If a preferred datanode is defined but not a mirror, + * use one mirror randomly in the subset of mirrors of this Datanode. + * + * PGXCTODO: When tables defined in a subset of nodes is supported, + * we need also to check if those tables can use it. + */ + if (PreferredDataNode != 0 && !IsPGXCMirrorMode) + return destList = lappend_int(NULL, PreferredDataNode); + else if (PreferredDataNode != 0 && IsPGXCMirrorMode) + return destList = PGXCMirror_GetSubsetMirrors(PreferredDataNode, false); return destList = lappend_int(NULL, 1); } @@ -112,18 +119,14 @@ hash_range(char *key) int value; if (key == NULL || key == '\0') - { return 0; - } length = strlen(key); value = 0x238F13AF * length; for (i = 0; i < length; i++) - { value = value + ((key[i] << i * 5 % 24) & 0x7fffffff); - } return (1103515243 * value + 12345) % 65537 & HASH_MASK; } @@ -369,21 +372,47 @@ int GetRoundRobinNode(Oid relid) { int ret_node; - Relation rel = relation_open(relid, AccessShareLock); - Assert (rel->rd_locator_info->locatorType == LOCATOR_TYPE_REPLICATED || + Assert (rel->rd_locator_info->locatorType == LOCATOR_TYPE_REPLICATED || rel->rd_locator_info->locatorType == LOCATOR_TYPE_RROBIN); - ret_node = lfirst_int(rel->rd_locator_info->roundRobinNode); + if (IsPGXCMirrorMode) + { + /* + * PGXCTODO: Round Robin list contains also the list of mirrors + * But we have to be sure that a Global Datanode ID is + * returned. + * When node subsets are supported, this part should be + * modified to support node subsets correctly + */ + bool done = false; + while (!done) + { + ret_node = lfirst_int(rel->rd_locator_info->roundRobinNode); - /* Move round robin indicator to next node */ - if (rel->rd_locator_info->roundRobinNode->next != NULL) - rel->rd_locator_info->roundRobinNode = rel->rd_locator_info->roundRobinNode->next; - else - /* reset to first one */ - rel->rd_locator_info->roundRobinNode = rel->rd_locator_info->nodeList->head; + if (ret_node < NumDataNodes + 1) + done = true; + /* Move round robin indicator to next node */ + if (rel->rd_locator_info->roundRobinNode->next != NULL) + rel->rd_locator_info->roundRobinNode = rel->rd_locator_info->roundRobinNode->next; + else + /* reset to first one */ + rel->rd_locator_info->roundRobinNode = rel->rd_locator_info->nodeList->head; + } + } + else + { + ret_node = lfirst_int(rel->rd_locator_info->roundRobinNode); + + /* Move round robin indicator to next node */ + if (rel->rd_locator_info->roundRobinNode->next != NULL) + rel->rd_locator_info->roundRobinNode = rel->rd_locator_info->roundRobinNode->next; + else + /* reset to first one */ + rel->rd_locator_info->roundRobinNode = rel->rd_locator_info->nodeList->head; + } relation_close(rel, AccessShareLock); return ret_node; @@ -426,9 +455,17 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, switch (rel_loc_info->locatorType) { case LOCATOR_TYPE_REPLICATED: + { + int primary_node_num; + + /* In Mirror mode, primary mirror is selected in subset of datanodes */ + if (IsPGXCMirrorMode) + primary_node_num = PGXCMirror_GetPrimaryMirrorNum(primary_data_node); + else + primary_node_num = primary_data_node; if (accessType == RELATION_ACCESS_UPDATE || - accessType == RELATION_ACCESS_INSERT) + accessType == RELATION_ACCESS_INSERT) { /* we need to write to all synchronously */ exec_nodes->nodelist = list_copy(rel_loc_info->nodeList); @@ -437,11 +474,11 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, * Write to primary node first, to reduce chance of a deadlock * on replicated tables. If 0, do not use primary copy. */ - if (primary_data_node && exec_nodes->nodelist + if (primary_node_num && exec_nodes->nodelist && list_length(exec_nodes->nodelist) > 1) /* make sure more than 1 */ { - exec_nodes->primarynodelist = lappend_int(NULL, primary_data_node); - list_delete_int(exec_nodes->nodelist, primary_data_node); + exec_nodes->primarynodelist = lappend_int(NULL, primary_node_num); + list_delete_int(exec_nodes->nodelist, primary_node_num); } } else @@ -454,40 +491,71 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, * avoid distributed deadlock if updating the same row * concurrently */ - exec_nodes->nodelist = lappend_int(NULL, primary_data_node); + exec_nodes->nodelist = lappend_int(NULL, primary_node_num); } - else if (globalPreferredNodes != NULL) + else if (PreferredDataNode != 0) { - /* try and pick from the preferred list */ - foreach(prefItem, globalPreferredNodes) + /* + * Try and pick the preferred node. + * make sure it is valid for this relation. + * In Mirror mode, be sur to pick up the right node. + */ + foreach(stepItem, rel_loc_info->nodeList) { - /* make sure it is valid for this relation */ - foreach(stepItem, rel_loc_info->nodeList) + if (lfirst_int(stepItem) == PreferredDataNode) { - if (lfirst_int(stepItem) == lfirst_int(prefItem)) - { - exec_nodes->nodelist = lappend_int(NULL, lfirst_int(prefItem)); - break; - } + if (IsPGXCMirrorMode) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(PreferredDataNode, false); + else + exec_nodes->nodelist = lappend_int(NULL, PreferredDataNode); + + break; } } } if (exec_nodes->nodelist == NULL) - /* read from just one of them. Use round robin mechanism */ - exec_nodes->nodelist = lappend_int(NULL, GetRoundRobinNode(rel_loc_info->relid)); + { + int rr_node_num = GetRoundRobinNode(rel_loc_info->relid); + /* We are in READ case here, just pick up one of them with round robin */ + if (IsPGXCMirrorMode) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(rr_node_num, false); + else + exec_nodes->nodelist = lappend_int(NULL, rr_node_num); + } } break; - + } case LOCATOR_TYPE_HASH: if (partValue != NULL) - /* in prototype, all partitioned tables use same map */ - exec_nodes->nodelist = lappend_int(NULL, get_node_from_hash(hash_range_int(*partValue))); + { + int hash_node_num = get_node_from_hash(hash_range_int(*partValue)); + /* + * In prototype, all partitioned tables use same map. + * + * In Mirror mode, + * for a SELECT query, pick up one node in the subset of datanodes + * for a DML query, pick up all the nodes of the subset + */ + if ((IsPGXCMirrorMode && accessType == RELATION_ACCESS_INSERT) || + (IsPGXCMirrorMode && accessType == RELATION_ACCESS_UPDATE)) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(hash_node_num, true); + else if (IsPGXCMirrorMode && accessType == RELATION_ACCESS_READ) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(hash_node_num, false); + else + exec_nodes->nodelist = lappend_int(NULL, hash_node_num); + } else if (accessType == RELATION_ACCESS_INSERT) - /* Insert NULL to node 1 */ - exec_nodes->nodelist = lappend_int(NULL, 1); + { + /* Insert NULL to node 1 + * In mirror mode, pick up all the mirrors of one Datanode */ + if (IsPGXCMirrorMode) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(1, true); + else + exec_nodes->nodelist = lappend_int(NULL, 1); + } else /* Use all nodes for other types of access */ exec_nodes->nodelist = list_copy(rel_loc_info->nodeList); @@ -495,12 +563,33 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, case LOCATOR_TYPE_MODULO: if (partValue != NULL) - /* in prototype, all partitioned tables use same map */ - exec_nodes->nodelist = lappend_int(NULL, get_node_from_modulo(compute_modulo(*partValue))); + { + int modulo_node_num = get_node_from_modulo(compute_modulo(*partValue)); + /* + * In prototype, all partitioned tables use same map. + * + * In Mirror mode, + * for a SELECT query, pick up one node in the subset of datanodes + * for a DML query, pick up all the nodes of the subset + */ + if ((IsPGXCMirrorMode && accessType == RELATION_ACCESS_INSERT) || + (IsPGXCMirrorMode && RELATION_ACCESS_UPDATE)) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(modulo_node_num, true); + else if (IsPGXCMirrorMode && accessType == RELATION_ACCESS_READ) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(modulo_node_num, false); + else + exec_nodes->nodelist = lappend_int(NULL, modulo_node_num); + } else if (accessType == RELATION_ACCESS_INSERT) - /* Insert NULL to node 1 */ - exec_nodes->nodelist = lappend_int(NULL, 1); + { + /* Insert NULL to node 1 + * In mirror mode, pick up all the mirrors of one Datanode */ + if (IsPGXCMirrorMode) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(1, true); + else + exec_nodes->nodelist = lappend_int(NULL, 1); + } else /* Use all nodes for other types of access */ exec_nodes->nodelist = list_copy(rel_loc_info->nodeList); @@ -517,8 +606,12 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, /* round robin, get next one */ if (accessType == RELATION_ACCESS_INSERT) { + int rr_node_num = GetRoundRobinNode(rel_loc_info->relid); /* write to just one of them */ - exec_nodes->nodelist = lappend_int(NULL, GetRoundRobinNode(rel_loc_info->relid)); + if (IsPGXCMirrorMode) + exec_nodes->nodelist = PGXCMirror_GetSubsetMirrors(rr_node_num, true); + else + exec_nodes->nodelist = lappend_int(NULL, rr_node_num); } else { @@ -602,14 +695,19 @@ List * GetAllDataNodes(void) { int i; + int node_num = NumDataNodes; + List *nodeList = NIL; + + /* In Mirroring mode, all the mirrors are selected */ + if (IsPGXCMirrorMode) + node_num = PGXCMirror_GetMirrorTotalCount(); /* * PGXCTODO - add support for having nodes on a subset of nodes * For now, assume on all nodes */ - List *nodeList = NIL; - for (i = 1; i < NumDataNodes + 1; i++) + for (i = 1; i < node_num + 1; i++) { nodeList = lappend_int(nodeList, i); } @@ -709,6 +807,7 @@ RelationBuildLocator(Relation rel) /** PGXCTODO - add support for having nodes on a subset of nodes * For now, assume on all nodes + * In Mirror mode, this includes also Mirrors !! */ relationLocInfo->nodeList = GetAllDataNodes(); relationLocInfo->nodeCount = relationLocInfo->nodeList->length; @@ -725,6 +824,14 @@ RelationBuildLocator(Relation rel) * pick a random one to start with, * since each process will do this independently */ + /* + * PGXCTODO: Now mapping table uses all the nodes... + * In Mirror mode, NodeCount contains the TOTAL number of mirrors + * even if real number of Datanode is NumDataNodes. + * When changing mapping table to support subsets of nodes, + * this part should be changed in accordance to Mirror Mode also. + * GetRoundRobin has to return the global Datanode number, not a single mirror number. + */ srand(time(NULL)); offset = rand() % relationLocInfo->nodeCount + 1; relationLocInfo->roundRobinNode = relationLocInfo->nodeList->head; /* initialize */ diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index 0c19756..7d01528 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -35,6 +35,8 @@ #include "pgxc/execRemote.h" #include "pgxc/locator.h" #include "pgxc/planner.h" +#include "pgxc/mirror.h" +#include "pgxc/poolmgr.h" #include "tcop/pquery.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -167,7 +169,7 @@ static void InitXCWalkerContext(XCWalkerContext *context); static RemoteQuery *makeRemoteQuery(void); static void validate_part_col_updatable(const Query *query); static bool is_pgxc_safe_func(Oid funcid); - +static List *get_mirror_nodes(void); /* * Find position of specified substring in the string @@ -728,6 +730,10 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) errmsg("cursor \"%s\" is held from a previous transaction", cursor_name))); + if (IsPGXCMirrorMode) + ereport(ERROR, + (errcode(ERRCODE_INVALID_CURSOR_STATE), + errmsg("CURRENT OF not yet supported in Mirroring mode"))); /* * The cursor must have a current result row: per the SQL spec, it's * an error if not. @@ -901,7 +907,13 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) { /* Take target node from last scan tuple of referenced step */ int curr_node = node->ss.ss_ScanTupleSlot->tts_dataNode; - context->query_step->exec_nodes->nodelist = lappend_int(context->query_step->exec_nodes->nodelist, curr_node); + if (IsPGXCMirrorMode) + context->query_step->exec_nodes->nodelist = + list_concat(context->query_step->exec_nodes->nodelist, + PGXCMirror_GetSubsetMirrors(curr_node, true)); + else + context->query_step->exec_nodes->nodelist = + lappend_int(context->query_step->exec_nodes->nodelist, curr_node); } FreeRelationLocInfo(rel_loc_info1); @@ -1690,7 +1702,8 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) table_usage_type = TABLE_USAGE_TYPE_USER_REPLICATED; context->query_step->exec_nodes->tableusagetype = table_usage_type; - } else if (context->conditions->partitioned_expressions) { + } + else if (context->conditions->partitioned_expressions) { /* probably we can determine nodes on execution time */ foreach(lc, context->conditions->partitioned_expressions) { Expr_Comparison *expr_comp = (Expr_Comparison *) lfirst(lc); @@ -1709,7 +1722,8 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) break; } } - } else { + } + else { /* run query on all nodes */ context->query_step->exec_nodes = makeNode(ExecNodes); context->query_step->exec_nodes->baselocatortype = @@ -1717,8 +1731,13 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) context->query_step->exec_nodes->tableusagetype = TABLE_USAGE_TYPE_USER; context->query_step->exec_nodes->primarynodelist = NULL; - context->query_step->exec_nodes->nodelist = - list_copy(rel_loc_info->nodeList); + /* In Mirror mode, get one mirror for a read operation */ + if (IsPGXCMirrorMode && context->accessType == RELATION_ACCESS_READ || + IsPGXCMirrorMode && context->accessType == RELATION_ACCESS_READ_FOR_UPDATE) + context->query_step->exec_nodes->nodelist = get_mirror_nodes(); + else + context->query_step->exec_nodes->nodelist = list_copy(rel_loc_info->nodeList); + context->query_step->exec_nodes->expr = NULL; context->query_step->exec_nodes->relid = NULL; context->query_step->exec_nodes->accesstype = context->accessType; @@ -1737,9 +1756,14 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) { Literal_Comparison *lit_comp = (Literal_Comparison *) lfirst(lc); - test_exec_nodes = GetRelationNodes( - lit_comp->rel_loc_info, &(lit_comp->constant), - RELATION_ACCESS_READ); + if (context->accessType == RELATION_ACCESS_UPDATE) + test_exec_nodes = GetRelationNodes( + lit_comp->rel_loc_info, &(lit_comp->constant), + RELATION_ACCESS_UPDATE); + else + test_exec_nodes = GetRelationNodes( + lit_comp->rel_loc_info, &(lit_comp->constant), + RELATION_ACCESS_READ); test_exec_nodes->tableusagetype = table_usage_type; if (context->query_step->exec_nodes == NULL) @@ -3334,3 +3358,24 @@ GetHashExecNodes(RelationLocInfo *rel_loc_info, ExecNodes **exec_nodes, const Ex } +/* + * In Mirror mode + * A query that has to be executed on all nodes just needs + * to execute on one mirror of each node. + */ +static List* +get_mirror_nodes(void) +{ + List *list_nodes = NIL; + List *res; + int count; + + for (count = 0; count < NumDataNodes; count++) + { + res = PGXCMirror_GetSubsetMirrors(count + 1, false); + + list_nodes = list_concat(list_nodes, res); + } + + return list_nodes; +} diff --git a/src/backend/pgxc/pool/Makefile b/src/backend/pgxc/pool/Makefile index f0701c5..b43d5b4 100644 --- a/src/backend/pgxc/pool/Makefile +++ b/src/backend/pgxc/pool/Makefile @@ -14,6 +14,6 @@ subdir = src/backend/pgxc/pool top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = pgxcnode.o execRemote.o poolmgr.o poolcomm.o postgresql_fdw.o poolutils.o +OBJS = pgxcnode.o execRemote.o poolmgr.o poolcomm.o postgresql_fdw.o poolutils.o mirror.o ../../../pgxc/xcm/libxcm.a include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/pgxc/pool/execRemote.c b/src/backend/pgxc/pool/execRemote.c index 3673421..4570bdf 100644 --- a/src/backend/pgxc/pool/execRemote.c +++ b/src/backend/pgxc/pool/execRemote.c @@ -34,7 +34,7 @@ #include "utils/tuplesort.h" #include "utils/snapmgr.h" #include "pgxc/locator.h" -#include "pgxc/pgxc.h" +#include "pgxc/mirror.h" #define END_QUERY_TIMEOUT 20 #define DATA_NODE_FETCH_SIZE 1 @@ -112,6 +112,8 @@ stat_statement() static void stat_transaction(int node_count) { + int node_num = IsPGXCMirrorMode ? PGXCMirror_GetMirrorTotalCount() : NumDataNodes; + total_transactions++; if (autocommit) total_autocommit++; @@ -125,12 +127,12 @@ stat_transaction(int node_count) else statements_per_transaction[current_tran_statements]++; current_tran_statements = 0; - if (node_count > 0 && node_count <= NumDataNodes) + if (node_count > 0 && node_count <= node_num) { if (!nodes_per_transaction) { - nodes_per_transaction = (int *) malloc(NumDataNodes * sizeof(int)); - memset(nodes_per_transaction, 0, NumDataNodes * sizeof(int)); + nodes_per_transaction = (int *) malloc(node_num * sizeof(int)); + memset(nodes_per_transaction, 0, node_num * sizeof(int)); } nodes_per_transaction[node_count - 1]++; } @@ -175,9 +177,10 @@ stat_log() MAX_STATEMENTS_PER_TRAN, statements_per_transaction[MAX_STATEMENTS_PER_TRAN], statements_per_transaction[MAX_STATEMENTS_PER_TRAN] * 100 / total_transactions); if (nodes_per_transaction) { - int i; + int i; + int node_num = IsPGXCMirrorMode ? PGXCMirror_GetMirrorTotalCount() : NumDataNodes; - for (i = 0; i < NumDataNodes; i++) + for (i = 0; i < node_num; i++) elog(DEBUG1, "%d Nodes per Transaction: %d (%d%%)", i + 1, nodes_per_transaction[i], nodes_per_transaction[i] * 100 / total_transactions); } @@ -1103,9 +1106,10 @@ BufferConnection(PGXCNodeHandle *conn) */ if (combiner->tuplesortstate) { + int node_num = IsPGXCMirrorMode ? PGXCMirror_GetMirrorTotalCount() : NumDataNodes; combiner->connections[combiner->current_conn] = NULL; if (combiner->tapenodes == NULL) - combiner->tapenodes = (int*) palloc0(NumDataNodes * sizeof(int)); + combiner->tapenodes = (int*) palloc0(node_num * sizeof(int)); combiner->tapenodes[combiner->current_conn] = conn->nodenum; } else @@ -1242,7 +1246,7 @@ FetchTuple(RemoteQueryState *combiner, TupleTableSlot *slot) else combiner->current_conn = 0; } - else if (res = RESPONSE_DATAROW && have_tuple) + else if (res == RESPONSE_DATAROW && have_tuple) { /* * We already have a tuple and received another one, leave it till @@ -1514,11 +1518,12 @@ pgxc_node_begin(int conn_count, PGXCNodeHandle ** connections, static void clear_write_node_list() { + int node_count = IsPGXCMirrorMode ? PGXCMirror_GetMirrorTotalCount() : NumDataNodes; + /* we just malloc once and use counter */ if (write_node_list == NULL) - { - write_node_list = (PGXCNodeHandle **) malloc(NumDataNodes * sizeof(PGXCNodeHandle *)); - } + write_node_list = (PGXCNodeHandle **) malloc(node_count * sizeof(PGXCNodeHandle *)); + write_node_count = 0; } @@ -2312,7 +2317,8 @@ PGXCNodeHandle** DataNodeCopyBegin(const char *query, List *nodelist, Snapshot snapshot, bool is_from) { int i, j; - int conn_count = list_length(nodelist) == 0 ? NumDataNodes : list_length(nodelist); + const int node_num = IsPGXCMirrorMode ? PGXCMirror_GetMirrorTotalCount() : NumDataNodes; + int conn_count = list_length(nodelist) == 0 ? node_num : list_length(nodelist); struct timeval *timeout = NULL; PGXCNodeAllHandles *pgxc_handles; PGXCNodeHandle **connections; @@ -2344,7 +2350,7 @@ DataNodeCopyBegin(const char *query, List *nodelist, Snapshot snapshot, bool is_ * So store connections in an array where index is node-1. * Unused items in the array should be NULL */ - copy_connections = (PGXCNodeHandle **) palloc0(NumDataNodes * sizeof(PGXCNodeHandle *)); + copy_connections = (PGXCNodeHandle **) palloc0(node_num * sizeof(PGXCNodeHandle *)); i = 0; foreach(nodeitem, nodelist) copy_connections[lfirst_int(nodeitem) - 1] = connections[i++]; @@ -2363,7 +2369,7 @@ DataNodeCopyBegin(const char *query, List *nodelist, Snapshot snapshot, bool is_ /* Check status of connections */ /* We want to track new "write" nodes, and new nodes in the current transaction * whether or not they are write nodes. */ - if (write_node_count < NumDataNodes) + if (write_node_count < node_num) { for (i = 0; i < conn_count; i++) { @@ -2710,15 +2716,16 @@ DataNodeCopyFinish(PGXCNodeHandle** copy_connections, int primary_data_node, { int i; int nLen = htonl(4); + const int node_num = IsPGXCMirrorMode ? PGXCMirror_GetMirrorTotalCount() : NumDataNodes; RemoteQueryState *combiner = NULL; bool need_tran; bool error = false; struct timeval *timeout = NULL; /* wait forever */ - PGXCNodeHandle *connections[NumDataNodes]; + PGXCNodeHandle *connections[node_num]; PGXCNodeHandle *primary_handle = NULL; int conn_count = 0; - for (i = 0; i < NumDataNodes; i++) + for (i = 0; i < node_num; i++) { PGXCNodeHandle *handle = copy_connections[i]; @@ -3020,7 +3027,12 @@ get_exec_connections(RemoteQueryState *planstate, /* The slot should be of type DataRow */ Assert(!TupIsNull(slot) && slot->tts_dataRow); - nodelist = list_make1_int(slot->tts_dataNode); + if (IsPGXCMirrorMode) + nodelist = list_concat(nodelist, + PGXCMirror_GetSubsetMirrors(slot->tts_dataNode, false)); + else + nodelist = list_make1_int(slot->tts_dataNode); + primarynode = NIL; } else @@ -3064,10 +3076,8 @@ get_exec_connections(RemoteQueryState *planstate, if (list_length(nodelist) == 0 && (exec_type == EXEC_ON_ALL_NODES || exec_type == EXEC_ON_DATANODES)) - { /* Primary connection is included in this number of connections if it exists */ - dn_conn_count = NumDataNodes; - } + dn_conn_count = IsPGXCMirrorMode ? PGXCMirror_GetMirrorTotalCount() : NumDataNodes; else { if (exec_type == EXEC_ON_DATANODES || exec_type == EXEC_ON_ALL_NODES) @@ -4398,6 +4408,7 @@ static PGXCNodeAllHandles * pgxc_get_all_transaction_nodes(PGXCNode_HandleRequested status_requested) { PGXCNodeAllHandles *pgxc_connections; + int node_num = IsPGXCMirrorMode ? PGXCMirror_GetMirrorTotalCount() : NumDataNodes; pgxc_connections = (PGXCNodeAllHandles *) palloc0(sizeof(PGXCNodeAllHandles)); if (!pgxc_connections) @@ -4408,7 +4419,7 @@ pgxc_get_all_transaction_nodes(PGXCNode_HandleRequested status_requested) } pgxc_connections->datanode_handles = (PGXCNodeHandle **) - palloc(NumDataNodes * sizeof(PGXCNodeHandle *)); + palloc(node_num * sizeof(PGXCNodeHandle *)); pgxc_connections->coord_handles = (PGXCNodeHandle **) palloc(NumCoords * sizeof(PGXCNodeHandle *)); if (!pgxc_connections->datanode_handles || !pgxc_connections->coord_handles) @@ -4481,7 +4492,7 @@ ExecCloseRemoteStatement(const char *stmt_name, List *nodelist) connections[i]->state = DN_CONNECTION_STATE_ERROR_FATAL; ereport(WARNING, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("Failed to close data node statemrnt"))); + errmsg("Failed to close data node statement"))); } if (pgxc_node_send_sync(connections[i]) != 0) { diff --git a/src/backend/pgxc/pool/mirror.c b/src/backend/pgxc/pool/mirror.c new file mode 100644 index 0000000..1700695 --- /dev/null +++ b/src/backend/pgxc/pool/mirror.c @@ -0,0 +1,1034 @@ +/*------------------------------------------------------------------------- + * + * mirror.c + * + * File containing API to interact with Fault Sync module + * It is necessary to activate the GUC parameter mirror_mode + * to call the APIs of this file. + * Only this file is authorized to call APIs of Fault Sync + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 2010-2011 Nippon Telegraph and Telephone Corporation + * + * IDENTIFICATION + * $$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "utils/memutils.h" +#include "pgxc/poolmgr.h" +#include "pgxc/mirror.h" +#include "pgxc/locator.h" +#include "utils/builtins.h" +#include "pgxc/xcm/node_membership.h" +#include "nodes/pg_list.h" +#include "access/gtm.h" + +/* This include should contain all the APIs this file needs */ + +/* + * Get list of Datanode mirrors + * We need to get a complete array of Datanodes (How many mirrors for each Datanode) + * and save it as a global value. + * This has to be done at Node startup. + */ + +/* List of guc params for mirroring mode */ +bool IsPGXCMirrorMode = false; /* Activate Datanode Mirroring */ +bool IsXCM = false; /* Activate Cluster manager module */ + +/* Pooler (Coordinator) GUC parameters */ +int PreferredMirrorId = 0; /* Preferred Mirror ID, coupled with PreferredDataNode */ +int PrimaryMirrorId = 1; /* Primary Mirror ID for Replicated Handling */ +char *MirrorCount = NULL; /* Number of Mirrors for each Datanode */ + +/* Datanode GUC Parameters */ +bool IsPrimaryMirror = true; /* Is Datanode a Primary Mirror or not (for registration on GTM) */ +int PGXCMirrorId = 1; /* Defines mirror ID of Local Node (for Datanode only) */ + +/* + * Contains the number of mirrors for each node + * 1 means that there is only 1 Datanode, like in normal mode + */ +/* Number of Mirrors for each Datanode */ +static int *PGXCNodeMirrorCount; +/* Total number of mirrors */ +static int MirrorTotalCount = 0; + +/* + * Get list of connection parameters and save them as global values. + * It is important to respect the format used by Pooler. + * For Datanode, bring back the GTM connection info only. + * A Datanode has also to know if it is itself a mirror or not, + * we need to know if this node needs to register (only primary are allowed to). + * + * if node is a datanode mirror, it needs to know its mirror number. + * its Datanode id can be get from pgxc_node_id. + * + * For Coordinator, bring back the mirror count for each Datanode + * Connection parameters. + * Connection parameters to Coordinators, to Datanode, to GTM. + * Prepare it in a nice shape and save them as global values. + * In case an error occurs at this step, node cannot startup. + */ + +/* + * Note: users list and password list is still managed by postgresql.conf + */ + +/* PGXCMirror_GetXXX and PGXCMirror_SetXXX APIs can be invocated only at node startup */ + +/* + * PGXCMirror_SetNodePrimary + * + * Set PGXCNodePrimary parameters for Replicated Handling + * This is called at node startup + */ +void +PGXCMirror_SetNodePrimary(void) +{ + /* + * Get the primary node parameters from Fault Sync module + * In other cases GUC params have all the necessary data. + */ + if (IsXCM) + { + int datanode_id, mirror_id; + + if (get_xcm_preferred_mirror(PGXCNodeId, &datanode_id, &mirror_id) < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not get Primary node data"))); + + /* Assign obtained values */ + PreferredDataNode = datanode_id; + PreferredMirrorId = mirror_id; + } +} + +/* + * PGXCMirror_SetMirrorCountList + * + * Set PGXCNodeMirrorCount array with the list of primary node numbers + * This is called at node startup in TopMemoryContext + */ +int +PGXCMirror_SetMirrorCountList(void) +{ + MirrorTotalCount = 0; + + PGXCNodeMirrorCount = (int *) MemoryContextAlloc(TopMemoryContext, sizeof(int) * NumDataNodes); + if (!PGXCNodeMirrorCount) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* Get number of mirrors for each Datanode */ + if (IsXCM) + { + int i; + + /* Here cluster manager is activated */ + for (i = 0; i < NumDataNodes; i++) + { + int mirror_count; + + if (get_xcm_mirror_count(i + 1, &mirror_count) < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not get Mirror count"))); + PGXCNodeMirrorCount[i] = mirror_count; + MirrorTotalCount += PGXCNodeMirrorCount[i]; + } + } + else + { + /* + * In this case we get the necessary info from GUC param. + * Same parsing policy as in pooler is used. + */ + char *rawstring; + List *elemlist; + ListCell *l; + int i, j; + + rawstring = pstrdup(MirrorCount); + + if (!SplitIdentifierString(rawstring, ',', &elemlist)) + { + /* syntax error in list */ + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid list syntax for \"mirror_count\""))); + } + + i = 0; + foreach(l, elemlist) + { + int curnum = pg_atoi((char *) lfirst(l), 4, 0); + + /* Ignore extra entries, if any */ + if (i >= NumDataNodes) + break; + + PGXCNodeMirrorCount[i] = curnum; + MirrorTotalCount += curnum; + i++; + } + + list_free(elemlist); + pfree(rawstring); + + /* Validate */ + if (i < NumDataNodes - 1) + ereport(FATAL, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid list syntax for \"mirror_count\""))); + + /* if only 1 element, copy the first value to all the others */ + if (i == 1) + { + for (j = 1; j < NumDataNodes; j++) + PGXCNodeMirrorCount[i] = PGXCNodeMirrorCount[0]; + + MirrorTotalCount = PGXCNodeMirrorCount[0] * NumDataNodes; + } + } + + return MirrorTotalCount; +} + +/* + * PGXCMirror_GetMirrorTotalCount + * + * Return total number of Mirrors in the cluster + */ +int +PGXCMirror_GetMirrorTotalCount(void) +{ + return MirrorTotalCount; +} + +int +PGXCMirror_GetMirrorCount(int pgxc_node_id) +{ + Assert(pgxc_node_id > 0 && pgxc_node_id < NumDataNodes + 1); + return PGXCNodeMirrorCount[pgxc_node_id - 1]; +} + +/* Check mirror(s) status */ +bool +PGXCMirror_AreAllMirrorsOffline(int pgxc_node_id) +{ + Assert(pgxc_node_id > 0 && pgxc_node_id <= NumDataNodes); + + if (IsXCM) + { + int count; + int mirror_count = PGXCMirror_GetMirrorCount(pgxc_node_id); + + /* Check if all the mirrors are offline */ + for (count = 0; count < mirror_count;count++) + if (!XCM_IS_FAULT(PGXCMirror_CheckStatus(REMOTE_CONN_DATANODE, pgxc_node_id, count + 1))) + return false; /* Got 1 mirror online */ + + /* If we are here, no mirrors have been found online for this datanode :( */ + return true; + } + + /* + * There is now no implementation to determine if mirror status without XCM + * so let's think they are online. + * If it is not the case, Coordinator will return an error btw. + */ + return false; +} + +bool +PGXCMirror_IsMirrorOffline(int pgxc_node_id, int mirror_id) +{ + Assert(mirror_id <= PGXCMirror_GetMirrorCount(pgxc_node_id) && + pgxc_node_id <= NumDataNodes); + + /* We need an interface compatible with XCM in this case */ + if (IsXCM) + { + if (XCM_IS_FAULT(PGXCMirror_CheckStatus(REMOTE_CONN_DATANODE, pgxc_node_id, mirror_id))) + return true; /* It looks that it is offline */ + else + return false; + } + + /* + * In Mirror mode, there is no functionnality to check if a mirror is online or not + * so let's believe it is online by default + */ + return false; +} + +/* + * PGXCMirror_GetSubsetMirrors + * + * Return a subset of mirrors for given node ID depending on the type of operation + * in global array of Datanodes. + */ +List* +PGXCMirror_GetSubsetMirrors(int pgxc_node_id, bool is_write) +{ + List *list_mirrors = NIL; + int count; + int sum = 0; + int mirror_count = PGXCNodeMirrorCount[pgxc_node_id - 1]; + + /* Determine number of where Mirror subset is located in global Array */ + for (count = 0; count < pgxc_node_id - 1; count++) + sum += PGXCNodeMirrorCount[count]; + + if (PGXCMirror_AreAllMirrorsOffline(pgxc_node_id)) + ereport(LOG, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("All Mirrors are Offline for Node %d", pgxc_node_id))); + + if (is_write) + { + /* Select all the subset of online Mirrors */ + for (count = sum + 1; count < sum + mirror_count + 1; count++) + { + /* check if this mirror is online before associating it */ + if (IsXCM && + !XCM_IS_FAULT(PGXCMirror_CheckStatus(REMOTE_CONN_DATANODE, + pgxc_node_id, count - sum))) + list_mirrors = lappend_int(list_mirrors, count); + else if (!IsXCM) + list_mirrors = lappend_int(list_mirrors, count); + else + elog(WARNING, "Datanode %d mirror %d is in failed state", pgxc_node_id, count - sum); + } + } + else + { + /* + * Select only one node + * If a preferred node exists, choose it. + * If no preferred node, pick up one randomly. + * If the preferred node is not online, choose another mirror randomly + */ + bool done = false; + + Assert(PreferredMirrorId <= PGXCNodeMirrorCount[pgxc_node_id - 1]); + /* Check also that preferred node is online */ + if (PreferredMirrorId != 0) + { + if (IsXCM && + !XCM_IS_FAULT(PGXCMirror_CheckStatus(REMOTE_CONN_DATANODE, + pgxc_node_id, PreferredMirrorId))) + { + done = true; + list_mirrors = lappend_int(list_mirrors, sum + PreferredMirrorId); + } + else if (!IsXCM) + { + /* In mirror mode without XCM active, we suppose the mirror is online */ + done = true; + list_mirrors = lappend_int(list_mirrors, sum + PreferredMirrorId); + } + } + + while (!done) + { + int offset; + + srand(time(NULL)); + offset = rand() % mirror_count + 1; + if (!XCM_IS_FAULT(PGXCMirror_CheckStatus(REMOTE_CONN_DATANODE, pgxc_node_id, offset))) + { + list_mirrors = lappend_int(list_mirrors, sum + offset); + done = true; + } + } + } + + return list_mirrors; +} + +/* + * PGXCMirror_GetPrimaryMirrorNum + * + * Return Primary Mirror location in global array of Datanodes (pooler format). + */ +int +PGXCMirror_GetPrimaryMirrorNum(int primary_data_node) +{ + int count; + int sum = 0; + + /* Determine number of Primary Node in the Global Array of Datanodes */ + for (count = 0; count < primary_data_node - 1; count++) + sum += PGXCNodeMirrorCount[count]; + + return sum + PrimaryMirrorId; +} + +/* + * PGXCMirror_GetMirrorGlobalID + * + * Return position in pooler array of given mirror + */ +int +PGXCMirror_GetMirrorGlobalID(int pgxc_node_id, int mirror_id) +{ + int count; + int sum = 0; + + for (count = 0; count < pgxc_node_id - 1; count++) + sum += PGXCNodeMirrorCount[count]; + + return sum + mirror_id; +} + +/* + * PGXCMirror_GetMirrorGlobalID + * + * Return Datanode ID and Mirror ID for a given node ID of pooler array + */ +int +PGXCMirror_GetMirrorIDAndDatanodeID(int global_id, int *mirror_id) +{ + int datanode_id = 0; + int sum = 0; + + /* Determine datanode ID */ + while (datanode_id < NumDataNodes) + { + sum += PGXCNodeMirrorCount[datanode_id]; + datanode_id++; + if (global_id <= sum) + break; + } + + /* Determine Mirror ID */ + sum -= PGXCNodeMirrorCount[datanode_id - 1]; + *mirror_id = global_id - sum; + + return datanode_id; +} + +/* + * To respect Pooler Connection format, Host and Port strings respect the following format + * + * For example, in the case of a configuration with 2 Datanodes, each having 2 mirrors: + * Datanode(1,1),Datanode(1,2),Datanode(2,1),Datanode(2,2) + * + * With 3 Datanodes, having 1 mirror for the 1st, 3 mirrors for the 2nd, and 2 mirrors for the 3rd: + * Datanode(1,1),Datanode(2,1),Datanode(2,2),Datanode(2,3),Datanode(3,1),Datanode(3,2) + * + * This permits to keep a simple array format that is easily managed by pooler. + * Complementary data about primary numbers and number of mirrors for each Datanode + * is contained in additional arrays locally saved here. + */ + +/* + * PGXCMirror_GetHostString + * + * Called at pooler Initialization to get + * the Datanode or Coordinator string at Pooler initialization. + * This is used to replace CoordHosts and DataNodeHosts in pooler + */ +char* +PGXCMirror_GetHostTotalString(RemoteConnTypes conn_type) +{ + char *host_str = NULL; + int i, num_nodes; + + Assert(IS_PGXC_COORDINATOR && IsXCM); + Assert(conn_type == REMOTE_CONN_COORD || conn_type == REMOTE_CONN_DATANODE); + + if (conn_type == REMOTE_CONN_COORD) + num_nodes = NumCoords; + else if (conn_type == REMOTE_CONN_DATANODE) + num_nodes = NumDataNodes; + + for (i = 0; i < num_nodes; i++) + { + if (conn_type == REMOTE_CONN_COORD) + { + int local_len; + char *buf; + + buf = PGXCMirror_GetCoordHost(i + 1); + local_len = strlen(buf); + + if (!host_str) + { + host_str = (char *) palloc(local_len + 1); + sprintf(host_str, "%s", buf); + } + else + { + host_str = (char *) repalloc(host_str, + strlen(host_str) + local_len + 2); + sprintf(host_str, "%s,%s", host_str, buf); + } + pfree(buf); + } + else if (conn_type == REMOTE_CONN_DATANODE) + { + int count; + + /* Build Datanode Host string */ + Assert(PGXCNodeMirrorCount[i] > 0); + + for (count = 0; count < PGXCNodeMirrorCount[i]; count++) + { + int local_len; + char *buf; + + buf = PGXCMirror_GetDataNodeHost(i + 1, count + 1); + local_len = strlen(buf); + + /* OK, got it. Build the string. */ + if (!host_str) + { + host_str = (char *) palloc(local_len + 1); + sprintf(host_str, "%s", buf); + } + else + { + host_str = (char *) repalloc(host_str, + strlen(host_str) + local_len + 2); + sprintf(host_str, "%s,%s", host_str, buf); + } + pfree(buf); + } + } + } + + return host_str; +} + +/* + * PGXCMirror_GetPortTotalString + * + * Called at pooler Initialization to get + * the Datanode string at Pooler initialization. + * This is used to replace DataNodePorts and CoordPorts + */ +char* +PGXCMirror_GetPortTotalString(RemoteConnTypes conn_type) +{ + char *port_str = NULL; + int i, num_nodes; + + Assert(IS_PGXC_COORDINATOR && IsXCM); + Assert(conn_type == REMOTE_CONN_COORD || conn_type == REMOTE_CONN_DATANODE); + + if (conn_type == REMOTE_CONN_COORD) + num_nodes = NumCoords; + else if (conn_type == REMOTE_CONN_DATANODE) + num_nodes = NumDataNodes; + + for (i = 0; i < num_nodes; i++) + { + if (conn_type == REMOTE_CONN_COORD) + { + int local_len; + char *buf; + + buf = PGXCMirror_GetCoordPort(i + 1); + local_len = strlen(buf); + + /* OK, got it. Build the string. */ + if (!port_str) + { + port_str = (char *) palloc(local_len + 1); + sprintf(port_str, "%s", buf); + } + else + { + port_str = (char *) repalloc(port_str, + strlen(port_str) + local_len + 2); + sprintf(port_str, "%s,%s", port_str, buf); + } + pfree(buf); + } + else if (conn_type == REMOTE_CONN_DATANODE) + { + int count; + + /* Build Datanode Host string */ + Assert(PGXCNodeMirrorCount[i] > 0); + + for (count = 0; count < PGXCNodeMirrorCount[i]; count++) + { + int local_len; + char *buf; + + buf = PGXCMirror_GetDataNodePort(i + 1, count + 1); + local_len = strlen(buf); + + /* OK, got it. Build the string. */ + if (!port_str) + { + port_str = (char *) palloc(local_len + 1); + sprintf(port_str, "%s", buf); + } + else + { + port_str = (char *) repalloc(port_str, + strlen(port_str) + local_len + 2); + sprintf(port_str, "%s,%s", port_str, buf); + } + pfree(buf); + } + } + } + + return port_str; +} + +/* + * PGXCMirror_GetGTMHost + * + * Get GTM Host value for connection to GTM + * This is done for local node only + * This replaces GUC parameter GtmHost in Mirror mode + */ +char* +PGXCMirror_GetLocalGTMHost(void) +{ + int gtm_id = 0; + int err = 0; + + Assert(IS_PGXC_COORDINATOR || IS_PGXC_DATANODE); + + /* Get GTM Id local node is connected to */ + if (IS_PGXC_COORDINATOR) + err = find_xcm_coordinator_gtm_pxy(PGXCNodeId, >m_id); + else if (IS_PGXC_DATANODE) + err = find_xcm_mirror_gtm_pxy(PGXCNodeId, PGXCMirrorId, >m_id); + else + return NULL; + + if (err < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not get GTM information"))); + + /* Need Connection points for GTM or Proxy */ + if (gtm_id > 0) + return PGXCMirror_GetGTMProxyHost(gtm_id); + else if (gtm_id == 0) + return PGXCMirror_GetGTMHost(); + else + return NULL; +} + +/* + * PGXCMirror_GetLocalGTMPort + * + * Get GTM Port value for connection to GTM + * This is done for local node only + * This replaces GUC parameter GtmPort in Mirror mode + */ +int +PGXCMirror_GetLocalGTMPort(void) +{ + int gtm_id = 0; + int err = 0; + + Assert(IS_PGXC_COORDINATOR || IS_PGXC_DATANODE); + + /* Get GTM Id local node is connected to */ + if (IS_PGXC_COORDINATOR) + err = find_xcm_coordinator_gtm_pxy(PGXCNodeId, >m_id); + else if (IS_PGXC_DATANODE) + err = find_xcm_mirror_gtm_pxy(PGXCNodeId, PGXCMirrorId, >m_id); + else + return NULL; + + if (err < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not get GTM information"))); + + /* Need Connection points for GTM or Proxy */ + if (gtm_id > 0) + return pg_atoi(PGXCMirror_GetGTMProxyPort(gtm_id), 4, 0); + else + return pg_atoi(PGXCMirror_GetGTMPort(), 4, 0); +} + +/* + * PGXCMirror_ChangeFlag + * + * Set Flag of given Node to the status wanted + */ +int +PGXCMirror_ReportFail(RemoteConnTypes conn_type, int pgxc_node_id, int mirror_id) +{ + int err = 0; + + Assert(conn_type == REMOTE_CONN_COORD || + conn_type == REMOTE_CONN_DATANODE || + conn_type == REMOTE_CONN_GTM || + conn_type == REMOTE_CONN_GTM_PROXY); + + switch(conn_type) + { + case REMOTE_CONN_COORD: + err = report_xcm_coordinator_failure(pgxc_node_id); + break; + + case REMOTE_CONN_DATANODE: + err = report_xcm_mirror_failure(pgxc_node_id, mirror_id); + break; + + case REMOTE_CONN_GTM: + err = report_xcm_gtm_failure(pgxc_node_id); + break; + + case REMOTE_CONN_GTM_PROXY: + err = report_xcm_gtm_pxy_failure(pgxc_node_id); + break; + + default: + err = -1; + } + + return err; +} + +/* Series of functions to report failures for each component type */ +void +PGXCMirror_ReportCoordFail(int pgxc_node_id) +{ + Assert(pgxc_node_id > 0 && pgxc_node_id <= NumCoords); + + if (PGXCMirror_ReportFail(REMOTE_CONN_COORD, pgxc_node_id, 0) < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not report failed Coordinator"))); +} + +void +PGXCMirror_ReportDataNodeFail(int pgxc_node_id, int mirror_id) +{ + Assert(pgxc_node_id > 0 && pgxc_node_id <= NumDataNodes); + Assert(PGXCNodeMirrorCount[pgxc_node_id - 1] >= pgxc_node_id); + + if (PGXCMirror_ReportFail(REMOTE_CONN_DATANODE, pgxc_node_id, mirror_id) < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not report failed Datanode"))); +} + +/* + * A node doesn't know if it is connected to a proxy or a gtm, + * so report the failure for the component connected to this node. + */ +void +PGXCMirror_ReportGTMFail(void) +{ + int err = 0; + int gtm_id = 0; + + if (IS_PGXC_COORDINATOR) + err = find_xcm_coordinator_gtm_pxy(PGXCNodeId, >m_id); + else if (IS_PGXC_DATANODE) + err = find_xcm_mirror_gtm_pxy(PGXCNodeId, PGXCMirrorId, >m_id); + else + err = -1; + + if (err < 0) + goto report_error; + + if (gtm_id > 0) + err = PGXCMirror_ReportFail(REMOTE_CONN_GTM_PROXY, gtm_id, 0); + else if (gtm_id == 0) + err = PGXCMirror_ReportFail(REMOTE_CONN_GTM, 0, 0); + else + err = -1; + +report_error: + if (err < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not report failed GTM"))); +} + +/* + * PGXCMirror_CheckStatus + * + * Check if Coordinator or datanodes have failed or not. + * It is necessary in order not to take connections to components that crashed + * have a failed status. + */ +uint32 +PGXCMirror_CheckStatus(RemoteConnTypes conn_type, int pgxc_node_id, int mirror_id) +{ + int err = 0; + uint32 status = 0; + + Assert(conn_type == REMOTE_CONN_COORD || + conn_type == REMOTE_CONN_DATANODE); + + switch(conn_type) + { + case REMOTE_CONN_COORD: + err = get_xcm_coordinator_status(pgxc_node_id, &status); + break; + + case REMOTE_CONN_DATANODE: + err = get_xcm_mirror_status(pgxc_node_id, mirror_id, &status); + break; + + default: + err = -1; + } + + if (err < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("Fault Sync ERROR: could not get Node Status"))); + + return status; +} + +/* Series of functions to check status for each component type */ +uint32 +PGXCMirror_CheckCoordStatus(int pgxc_node_id) +{ + return PGXCMirror_CheckStatus(REMOTE_CONN_COORD, pgxc_node_id, 0); +} + +uint32 +PGXCMirror_CheckDataNodeStatus(int pgxc_node_id, int mirror_id) +{ + return PGXCMirror_CheckStatus(REMOTE_CONN_DATANODE, pgxc_node_id, 0); +} + +/* + * PGXCMirror_GetNodeHost + * + * Get Node host name for given node + */ +char* +PGXCMirror_GetNodeHost(RemoteConnTypes conn_type, int pgxc_node_id, int mirror_id) +{ + char *host_str = NULL; + int n_connections, conn_pt_num, local_len; + xcm_connPoint *conn_pts; + + Assert(conn_type == REMOTE_CONN_COORD || + conn_type == REMOTE_CONN_DATANODE || + conn_type == R... [truncated message content] |
From: Michael P. <mic...@us...> - 2011-02-17 05:17:59
|
Project "Postgres-XC". The branch, ha_support has been updated via f42b489b49f366c78d816708d47b380f9db640d9 (commit) via 3fdc303e50d599104c551dc62aaabb369086dd22 (commit) via d8d5bedcbde1f8476617102410b27ac74458f12b (commit) from 61d595c720755a27dfd1d53fb62b60e85f4e2732 (commit) - Log ----------------------------------------------------------------- commit f42b489b49f366c78d816708d47b380f9db640d9 Merge: 61d595c 3fdc303 Author: Michael P <mic...@us...> Date: Thu Feb 17 14:23:59 2011 +0900 Merge branch 'master' into ha_support ----------------------------------------------------------------------- Summary of changes: src/backend/rewrite/rewriteHandler.c | 1 + src/gtm/proxy/proxy_main.c | 8 ++++- src/gtm/recovery/register.c | 49 +++++++++++++++++++++++++++------- src/include/gtm/register.h | 27 +++++++++++------- 4 files changed, 62 insertions(+), 23 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-17 05:16:01
|
Project "Postgres-XC". The branch, master has been updated via 3fdc303e50d599104c551dc62aaabb369086dd22 (commit) from d8d5bedcbde1f8476617102410b27ac74458f12b (commit) - Log ----------------------------------------------------------------- commit 3fdc303e50d599104c551dc62aaabb369086dd22 Author: Michael P <mic...@us...> Date: Thu Feb 17 14:19:57 2011 +0900 Multiple INSERT support for MODULO tables MODULO tables use the same distribution mechanism to select nodes as HASH tables (but not the same distribution functions). diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index d9fc689..62875f8 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -2325,6 +2325,7 @@ RewriteInsertStmt(Query *query, RangeTblEntry *values_rte) switch(locatorType) { + case LOCATOR_TYPE_MODULO: case LOCATOR_TYPE_HASH: { bool first = true; ----------------------------------------------------------------------- Summary of changes: src/backend/rewrite/rewriteHandler.c | 1 + 1 files changed, 1 insertions(+), 0 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-17 01:32:53
|
Project "Postgres-XC". The branch, master has been updated via d8d5bedcbde1f8476617102410b27ac74458f12b (commit) from f01d77a3423159c85cb6efd65cee01910adf793a (commit) - Log ----------------------------------------------------------------- commit d8d5bedcbde1f8476617102410b27ac74458f12b Author: Michael P <mic...@us...> Date: Thu Feb 17 10:31:43 2011 +0900 Fix for bug 3172438: node registration protocol When a node using the same ID as a node, already running and registered, tried to register itself GTM returned an error and the node that failed registration tries to register once again. on GTM. Between the first and second registration, connection to GTM was reinitialiazed, making the node running marked as disconnected. Then the node that failed registration tries a second time and modifies the information of the node already running. At this point, GTM had to return an error but the registration succeeds at the second time. This patch adds some additional control when disconnecting a registered node on GTM by checking the socket ID of the connection used for node registration. This permits to keep consistent data in GTM and proxies for nodes in the cluster. diff --git a/src/gtm/proxy/proxy_main.c b/src/gtm/proxy/proxy_main.c index 9a00bee..d892680 100644 --- a/src/gtm/proxy/proxy_main.c +++ b/src/gtm/proxy/proxy_main.c @@ -1350,7 +1350,8 @@ ProcessPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, NODE_CONNECTED, remote_host, cmd_data.cd_reg.datafolder, - false)) + false, + conninfo->con_port->sock)) { ereport(ERROR, (EINVAL, @@ -1376,7 +1377,10 @@ ProcessPGXCNodeCommand(GTMProxy_ConnectionInfo *conninfo, GTM_Conn *gtm_conn, oldContext = MemoryContextSwitchTo(TopMostMemoryContext); /* Unregister Node also on Proxy */ - if (Recovery_PGXCNodeUnregister(cmd_data.cd_reg.type, cmd_data.cd_reg.nodenum, false)) + if (Recovery_PGXCNodeUnregister(cmd_data.cd_reg.type, + cmd_data.cd_reg.nodenum, + false, + conninfo->con_port->sock)) { ereport(ERROR, (EINVAL, diff --git a/src/gtm/recovery/register.c b/src/gtm/recovery/register.c index 766072d..163a860 100644 --- a/src/gtm/recovery/register.c +++ b/src/gtm/recovery/register.c @@ -163,8 +163,11 @@ pgxcnode_add_info(GTM_PGXCNodeInfo *nodeinfo) else { /* - * Check if its data (port, datafolder and remote IP) has changed - * and modify it + * Node has been disconnected abruptly. + * And we are sure that disconnections are not done by other node + * trying to use the same ID. + * So check if its data (port, datafolder and remote IP) has changed + * and modify it. */ if (!pgxcnode_port_equal(curr_nodeinfo->port, nodeinfo->port)) curr_nodeinfo->port = nodeinfo->port; @@ -193,6 +196,9 @@ pgxcnode_add_info(GTM_PGXCNodeInfo *nodeinfo) /* Reconnect a disconnected node */ curr_nodeinfo->status = NODE_CONNECTED; + + /* Set socket number with the new one */ + curr_nodeinfo->socket = nodeinfo->socket; GTM_RWLockRelease(&bucket->nhb_lock); return 0; } @@ -236,12 +242,19 @@ pgxcnode_copy_char(const char *str) * Unregister the given node */ int -Recovery_PGXCNodeUnregister(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, bool in_recovery) +Recovery_PGXCNodeUnregister(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, bool in_recovery, int socket) { GTM_PGXCNodeInfo *nodeinfo = pgxcnode_find_info(type, nodenum); if (nodeinfo != NULL) { + /* + * Unregistration has to be made by the same connection as the one used for registration + * or the one that reconnected the node. + */ + if (!in_recovery && socket != nodeinfo->socket) + return EINVAL; + pgxcnode_remove_info(nodeinfo); /* Add a record to file on disk saying that this node has been unregistered correctly */ @@ -266,7 +279,8 @@ Recovery_PGXCNodeRegister(GTM_PGXCNodeType type, GTM_PGXCNodeStatus status, char *ipaddress, char *datafolder, - bool in_recovery) + bool in_recovery, + int socket) { GTM_PGXCNodeInfo *nodeinfo = NULL; int errcode = 0; @@ -286,6 +300,7 @@ Recovery_PGXCNodeRegister(GTM_PGXCNodeType type, nodeinfo->datafolder = pgxcnode_copy_char(datafolder); nodeinfo->ipaddress = pgxcnode_copy_char(ipaddress); nodeinfo->status = status; + nodeinfo->socket = socket; /* Add PGXC Node Info to the global hash table */ errcode = pgxcnode_add_info(nodeinfo); @@ -384,7 +399,7 @@ ProcessPGXCNodeRegister(Port *myport, StringInfo message) if (Recovery_PGXCNodeRegister(type, nodenum, port, proxynum, NODE_CONNECTED, - ipaddress, datafolder, false)) + ipaddress, datafolder, false, myport->sock)) { ereport(ERROR, (EINVAL, @@ -438,7 +453,7 @@ ProcessPGXCNodeUnregister(Port *myport, StringInfo message) */ oldContext = MemoryContextSwitchTo(TopMostMemoryContext); - if (Recovery_PGXCNodeUnregister(type, nodenum, false)) + if (Recovery_PGXCNodeUnregister(type, nodenum, false, myport->sock)) { ereport(ERROR, (EINVAL, @@ -652,9 +667,9 @@ Recovery_RestoreRegisterInfo(void) /* Rebuild based on the records */ if (magic == NodeRegisterMagic) Recovery_PGXCNodeRegister(type, nodenum, port, proxynum, status, - ipaddress, datafolder, true); + ipaddress, datafolder, true, 0); else - Recovery_PGXCNodeUnregister(type, nodenum, true); + Recovery_PGXCNodeUnregister(type, nodenum, true, 0); read(ctlfd, &magic, sizeof(NodeEndMagic)); @@ -703,9 +718,14 @@ Recovery_PGXCNodeDisconnect(Port *myport) if (nodeinfo != NULL) { + /* Disconnection cannot be made with another socket than the one used for registration */ + if (myport->sock != nodeinfo->socket) + return; + GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_WRITE); nodeinfo->status = NODE_DISCONNECTED; + nodeinfo->socket = 0; GTM_RWLockRelease(&nodeinfo->node_lock); } @@ -714,16 +734,25 @@ Recovery_PGXCNodeDisconnect(Port *myport) } int -Recovery_PGXCNodeBackendDisconnect(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum) +Recovery_PGXCNodeBackendDisconnect(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, int socket) { GTM_PGXCNodeInfo *nodeinfo = pgxcnode_find_info(type, nodenum); int errcode = 0; + if (nodeinfo != NULL) { + /* + * A node can be only disconnected by the same connection as the one used for registration + * or reconnection. + */ + if (socket != nodeinfo->socket) + return -1; + GTM_RWLockAcquire(&nodeinfo->node_lock, GTM_LOCKMODE_WRITE); nodeinfo->status = NODE_DISCONNECTED; + nodeinfo->socket = 0; GTM_RWLockRelease(&nodeinfo->node_lock); } @@ -770,7 +799,7 @@ ProcessPGXCNodeBackendDisconnect(Port *myport, StringInfo message) */ oldContext = MemoryContextSwitchTo(TopMostMemoryContext); - if (Recovery_PGXCNodeBackendDisconnect(type, nodenum) < 0) + if (Recovery_PGXCNodeBackendDisconnect(type, nodenum, myport->sock) < 0) { elog(LOG, "Cannot disconnect Unregistered node"); } diff --git a/src/include/gtm/register.h b/src/include/gtm/register.h index 0e4b23f..9b11d1d 100644 --- a/src/include/gtm/register.h +++ b/src/include/gtm/register.h @@ -40,14 +40,15 @@ typedef enum GTM_PGXCNodeStatus typedef struct GTM_PGXCNodeInfo { - GTM_PGXCNodeType type; - GTM_PGXCNodeId nodenum; - GTM_PGXCNodeId proxynum; - GTM_PGXCNodePort port; - char *ipaddress; - char *datafolder; - GTM_PGXCNodeStatus status; - GTM_RWLock node_lock; + GTM_PGXCNodeType type; /* Type of node */ + GTM_PGXCNodeId nodenum; /* Node number */ + GTM_PGXCNodeId proxynum; /* Proxy number the node goes through */ + GTM_PGXCNodePort port; /* Port number of the node */ + char *ipaddress; /* IP address of the nodes */ + char *datafolder; /* Data folder of the node */ + GTM_PGXCNodeStatus status; /* Node status */ + GTM_RWLock node_lock; /* Lock on this structure */ + int socket; /* socket number used for registration */ } GTM_PGXCNodeInfo; int Recovery_PGXCNodeRegister(GTM_PGXCNodeType type, @@ -57,9 +58,13 @@ int Recovery_PGXCNodeRegister(GTM_PGXCNodeType type, GTM_PGXCNodeStatus status, char *ipaddress, char *datafolder, - bool in_recovery); -int Recovery_PGXCNodeUnregister(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, bool in_recovery); -int Recovery_PGXCNodeBackendDisconnect(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum); + bool in_recovery, + int socket); +int Recovery_PGXCNodeUnregister(GTM_PGXCNodeType type, + GTM_PGXCNodeId nodenum, + bool in_recovery, + int socket); +int Recovery_PGXCNodeBackendDisconnect(GTM_PGXCNodeType type, GTM_PGXCNodeId nodenum, int socket); void Recovery_RecordRegisterInfo(GTM_PGXCNodeInfo *nodeinfo, bool is_register); void Recovery_RestoreRegisterInfo(void); ----------------------------------------------------------------------- Summary of changes: src/gtm/proxy/proxy_main.c | 8 +++++- src/gtm/recovery/register.c | 49 ++++++++++++++++++++++++++++++++++-------- src/include/gtm/register.h | 27 ++++++++++++++--------- 3 files changed, 61 insertions(+), 23 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-16 09:28:45
|
Project "Postgres-XC". The branch, ha_support has been updated via 61d595c720755a27dfd1d53fb62b60e85f4e2732 (commit) via f01d77a3423159c85cb6efd65cee01910adf793a (commit) from c2a5947bcb71f92e2a6ebf11b8ec3f3937a66f61 (commit) - Log ----------------------------------------------------------------- commit 61d595c720755a27dfd1d53fb62b60e85f4e2732 Merge: c2a5947 f01d77a Author: Michael P <mic...@us...> Date: Wed Feb 16 18:34:40 2011 +0900 Merge branch 'master' into ha_support ----------------------------------------------------------------------- Summary of changes: src/gtm/main/gtm_seq.c | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-16 09:24:49
|
Project "Postgres-XC". The branch, master has been updated via f01d77a3423159c85cb6efd65cee01910adf793a (commit) from 07465f400258a1ad59d313c008ebc3f8a42491fb (commit) - Log ----------------------------------------------------------------- commit f01d77a3423159c85cb6efd65cee01910adf793a Author: Michael P <mic...@us...> Date: Wed Feb 16 18:28:43 2011 +0900 Fix for bug 3142430 Setting a sequence with setval for non-called cases made nextval return wrong results. This was a bug from pg_regress. diff --git a/src/gtm/main/gtm_seq.c b/src/gtm/main/gtm_seq.c index 359a959..570636b 100644 --- a/src/gtm/main/gtm_seq.c +++ b/src/gtm/main/gtm_seq.c @@ -695,8 +695,12 @@ GTM_SeqSetVal(GTM_SequenceKey seqkey, GTM_Sequence nextval, bool iscalled) if (seqinfo->gs_value != nextval) seqinfo->gs_value = nextval; - if (seqinfo->gs_called != iscalled) - seqinfo->gs_called = iscalled; + + seqinfo->gs_called = iscalled; + + /* If sequence is not called, reset the init value to the value set */ + if (!iscalled) + seqinfo->gs_init_value = nextval; /* Remove the old key with the old name */ GTM_RWLockRelease(&seqinfo->gs_lock); ----------------------------------------------------------------------- Summary of changes: src/gtm/main/gtm_seq.c | 8 ++++++-- 1 files changed, 6 insertions(+), 2 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-02 08:15:44
|
Project "Postgres-XC". The branch, ha_support has been updated via c2a5947bcb71f92e2a6ebf11b8ec3f3937a66f61 (commit) via 07465f400258a1ad59d313c008ebc3f8a42491fb (commit) from 63810afccc1ae9ea8ee0831994106adb3087b795 (commit) - Log ----------------------------------------------------------------- commit c2a5947bcb71f92e2a6ebf11b8ec3f3937a66f61 Merge: 63810af 07465f4 Author: Michael P <mic...@us...> Date: Wed Feb 2 17:21:19 2011 +0900 Merge branch 'master' into ha_support ----------------------------------------------------------------------- Summary of changes: src/backend/catalog/heap.c | 36 ++++++++- src/backend/catalog/pgxc_class.c | 2 +- src/backend/commands/copy.c | 34 ++++---- src/backend/commands/indexcmds.c | 2 +- src/backend/parser/gram.y | 20 ++++- src/backend/parser/parse_utilcmd.c | 13 ++-- src/backend/pgxc/locator/locator.c | 148 +++++++++++++++++++++++++++++++++++- src/backend/pgxc/plan/planner.c | 39 ++++++---- src/include/nodes/primnodes.h | 3 +- src/include/parser/kwlist.h | 3 + src/include/pgxc/locator.h | 8 ++ 11 files changed, 255 insertions(+), 53 deletions(-) hooks/post-receive -- Postgres-XC |
From: Michael P. <mic...@us...> - 2011-02-02 08:14:13
|
Project "Postgres-XC". The branch, master has been updated via 07465f400258a1ad59d313c008ebc3f8a42491fb (commit) from a1b041e2656b64689cd9719cc5662984a4f5f5a4 (commit) - Log ----------------------------------------------------------------- commit 07465f400258a1ad59d313c008ebc3f8a42491fb Author: Michael P <mic...@us...> Date: Wed Feb 2 17:12:58 2011 +0900 CREATE TABLE: new distribution function MODULO new catalogue is named pgxc_class. It has the following columns pcrelid - The OID of the added relation in pg_class pclocatortype - XC defines distribution types as a single character in locator.h The provided distribution type is stored here. Distribution types can be 'R' - Replicated 'H' - Based on a column's hash value 'N' - Based on round robin for all data nodes 'M' - Based on modulo of a column's value This new distribution type is added by this patch. Similarly G, S and C are also defined. pcattnum - The number of the column used for replication valid only for H and M - Zero for the rest This patch supports also copy and INSERT(SELECT). It does NOT SUPPPORT multiple INSERT. The following issues are remaining though: 1) Non-definition of distribution column in table makes a crash 2) Creation of an index on a column that is not the distribution column 3) Creation of a foreign key on distribution column 4) Distribution on another column other than unique makes a crash diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 75637fc..aa12288 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -828,6 +828,7 @@ AddRelationDistribution (Oid relid, switch (locatortype) { case LOCATOR_TYPE_HASH: + case LOCATOR_TYPE_MODULO: attnum = rel_loc_info->partAttrNum; break; @@ -895,6 +896,26 @@ AddRelationDistribution (Oid relid, locatortype = LOCATOR_TYPE_HASH; break; + case DISTTYPE_MODULO: + /* User specified modulo column, validate */ + attnum = get_attnum(relid, distributeby->colname); + if (!attnum) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_TABLE_DEFINITION), + errmsg("Invalid distribution column specified"))); + } + + if (!IsModuloDistributable(descriptor->attrs[attnum-1]->atttypid)) + { + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("Column %s is not modulo distributable data type", + distributeby->colname))); + } + locatortype = LOCATOR_TYPE_MODULO; + break; + case DISTTYPE_REPLICATION: locatortype = LOCATOR_TYPE_REPLICATED; break; @@ -910,12 +931,17 @@ AddRelationDistribution (Oid relid, } } - if (locatortype == LOCATOR_TYPE_HASH) + switch (locatortype) { - /* PGXCTODO */ - /* Use these for now until we make allowing different algorithms more flexible */ - hashalgorithm = 1; - hashbuckets = HASH_SIZE; + case LOCATOR_TYPE_HASH: + /* PGXCTODO */ + /* Use these for now until we make allowing different algorithms more flexible */ + hashalgorithm = 1; + hashbuckets = HASH_SIZE; + break; + + case LOCATOR_TYPE_MODULO: + break; } PgxcClassCreate (relid, locatortype, attnum, hashalgorithm, hashbuckets); diff --git a/src/backend/catalog/pgxc_class.c b/src/backend/catalog/pgxc_class.c index 6b897c8..08462c2 100644 --- a/src/backend/catalog/pgxc_class.c +++ b/src/backend/catalog/pgxc_class.c @@ -51,7 +51,7 @@ PgxcClassCreate(Oid pcrelid, values[Anum_pgxc_class_pcrelid - 1] = ObjectIdGetDatum(pcrelid); values[Anum_pgxc_class_pclocatortype - 1] = ObjectIdGetDatum(pclocatortype); - if (pclocatortype == LOCATOR_TYPE_HASH) + if (pclocatortype == LOCATOR_TYPE_HASH || pclocatortype == LOCATOR_TYPE_MODULO) { values[Anum_pgxc_class_pcattnum - 1] = ObjectIdGetDatum(pcattnum); values[Anum_pgxc_class_pchashalgorithm - 1] = ObjectIdGetDatum(pchashalgorithm); diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 62cb748..2d0fb13 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -181,7 +181,7 @@ typedef struct CopyStateData /* Locator information */ RelationLocInfo *rel_loc; /* the locator key */ - int hash_idx; /* index of the hash column */ + int idx_dist_by_col; /* index of the distributed by column */ PGXCNodeHandle **connections; /* Involved data node connections */ TupleDesc tupDesc; /* for INSERT SELECT */ @@ -2312,14 +2312,14 @@ CopyFrom(CopyState cstate) #ifdef PGXC if (IS_PGXC_COORDINATOR && cstate->rel_loc) { - Datum *hash_value = NULL; + Datum *dist_col_value = NULL; - if (cstate->hash_idx >= 0 && !nulls[cstate->hash_idx]) - hash_value = &values[cstate->hash_idx]; + if (cstate->idx_dist_by_col >= 0 && !nulls[cstate->idx_dist_by_col]) + dist_col_value = &values[cstate->idx_dist_by_col]; if (DataNodeCopyIn(cstate->line_buf.data, cstate->line_buf.len, - GetRelationNodes(cstate->rel_loc, (long *)hash_value, + GetRelationNodes(cstate->rel_loc, (long *)dist_col_value, RELATION_ACCESS_INSERT), cstate->connections)) ereport(ERROR, @@ -3737,7 +3737,7 @@ static ExecNodes* build_copy_statement(CopyState cstate, List *attnamelist, TupleDesc tupDesc, bool is_from, List *force_quote, List *force_notnull) { - char *hash_att; + char *pPartByCol; ExecNodes *exec_nodes = makeNode(ExecNodes); @@ -3749,10 +3749,10 @@ build_copy_statement(CopyState cstate, List *attnamelist, */ cstate->rel_loc = GetRelationLocInfo(RelationGetRelid(cstate->rel)); - hash_att = GetRelationHashColumn(cstate->rel_loc); + pPartByCol = GetRelationDistColumn(cstate->rel_loc); if (cstate->rel_loc) { - if (is_from || hash_att) + if (is_from || pPartByCol) exec_nodes->nodelist = list_copy(cstate->rel_loc->nodeList); else { @@ -3764,8 +3764,8 @@ build_copy_statement(CopyState cstate, List *attnamelist, } } - cstate->hash_idx = -1; - if (hash_att) + cstate->idx_dist_by_col = -1; + if (pPartByCol) { List *attnums; ListCell *cur; @@ -3774,9 +3774,9 @@ build_copy_statement(CopyState cstate, List *attnamelist, foreach(cur, attnums) { int attnum = lfirst_int(cur); - if (namestrcmp(&(tupDesc->attrs[attnum - 1]->attname), hash_att) == 0) + if (namestrcmp(&(tupDesc->attrs[attnum - 1]->attname), pPartByCol) == 0) { - cstate->hash_idx = attnum - 1; + cstate->idx_dist_by_col = attnum - 1; break; } } @@ -3903,7 +3903,7 @@ DoInsertSelectCopy(EState *estate, TupleTableSlot *slot) HeapTuple tuple; Datum *values; bool *nulls; - Datum *hash_value = NULL; + Datum *dist_col_value = NULL; MemoryContext oldcontext; CopyState cstate; @@ -4016,14 +4016,14 @@ DoInsertSelectCopy(EState *estate, TupleTableSlot *slot) /* Format the input tuple for sending */ CopyOneRowTo(cstate, 0, values, nulls); - /* Get hash partition column, if any */ - if (cstate->hash_idx >= 0 && !nulls[cstate->hash_idx]) - hash_value = &values[cstate->hash_idx]; + /* Get dist column, if any */ + if (cstate->idx_dist_by_col >= 0 && !nulls[cstate->idx_dist_by_col]) + dist_col_value = &values[cstate->idx_dist_by_col]; /* Send item to the appropriate data node(s) (buffer) */ if (DataNodeCopyIn(cstate->fe_msgbuf->data, cstate->fe_msgbuf->len, - GetRelationNodes(cstate->rel_loc, (long *)hash_value, RELATION_ACCESS_INSERT), + GetRelationNodes(cstate->rel_loc, (long *)dist_col_value, RELATION_ACCESS_INSERT), cstate->connections)) ereport(ERROR, (errcode(ERRCODE_CONNECTION_EXCEPTION), diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 1b87bcb..4df4da3 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -430,7 +430,7 @@ DefineIndex(RangeVar *heapRelation, if (!isSafe) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), - errmsg("Unique index of partitioned table must contain the hash distribution column."))); + errmsg("Unique index of partitioned table must contain the hash/modulo distribution column."))); } #endif /* diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index b0d8c61..e1266ed 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -436,7 +436,7 @@ static TypeName *TableFuncTypeName(List *columns); */ /* ordinary key words in alphabetical order */ -/* PGXC - added REPLICATION, DISTRIBUTE, and HASH */ +/* PGXC - added REPLICATION, DISTRIBUTE, MODULO and HASH */ %token <keyword> ABORT_P ABSOLUTE_P ACCESS ACTION ADD_P ADMIN AFTER AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC ASSERTION ASSIGNMENT ASYMMETRIC AT AUTHORIZATION @@ -484,9 +484,9 @@ static TypeName *TableFuncTypeName(List *columns); LANCOMPILER LANGUAGE LARGE_P LAST_P LC_COLLATE_P LC_CTYPE_P LEADING LEAST LEFT LEVEL LIKE LIMIT LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION LOCK_P LOGIN_P - - MAPPING MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE - +/* PGXC_BEGIN */ + MAPPING MATCH MAXVALUE MINUTE_P MINVALUE MODE MODULO MONTH_P MOVE +/* PGXC_END */ NAME_P NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NOCREATEDB NOCREATEROLE NOCREATEUSER NODE NOINHERIT NOLOGIN_P NONE NOSUPERUSER NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF NULLS_P NUMERIC @@ -2550,6 +2550,13 @@ OptDistributeBy: DistributeByHash '(' name ')' n->colname = $3; $$ = n; } + | DISTRIBUTE BY MODULO '(' name ')' + { + DistributeBy *n = makeNode(DistributeBy); + n->disttype = DISTTYPE_MODULO; + n->colname = $5; + $$ = n; + } | DISTRIBUTE BY REPLICATION { DistributeBy *n = makeNode(DistributeBy); @@ -10282,7 +10289,7 @@ ColLabel: IDENT { $$ = $1; } /* "Unreserved" keywords --- available for use as any kind of name. */ -/* PGXC - added DISTRIBUTE, HASH, REPLICATION */ +/* PGXC - added DISTRIBUTE, HASH, REPLICATION, MODULO */ unreserved_keyword: ABORT_P | ABSOLUTE_P @@ -10419,6 +10426,9 @@ unreserved_keyword: | MINUTE_P | MINVALUE | MODE +/* PGXC_BEGIN */ + | MODULO +/* PGXC_END */ | MONTH_P | MOVE | NAME_P diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 580ed18..ea8cc45 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -2266,7 +2266,7 @@ CheckLocalIndexColumn (char loctype, char *partcolname, char *indexcolname) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), errmsg("Cannot locally enforce a unique index on round robin distributed table."))); - else if (loctype == LOCATOR_TYPE_HASH) + else if (loctype == LOCATOR_TYPE_HASH || loctype == LOCATOR_TYPE_MODULO) { if (partcolname && indexcolname && strcmp(partcolname, indexcolname) == 0) return true; @@ -2307,12 +2307,13 @@ static checkLocalFKConstraints(CreateStmtContext *cxt) } /* - * See if we are hash partitioned and the column appears in the + * See if we are hash or modulo partitioned and the column appears in the * constraint, and it corresponds to the position in the referenced table. */ if (cxt->isalter) { - if (cxt->rel->rd_locator_info->locatorType == LOCATOR_TYPE_HASH) + if (cxt->rel->rd_locator_info->locatorType == LOCATOR_TYPE_HASH || + cxt->rel->rd_locator_info->locatorType == LOCATOR_TYPE_MODULO) { checkcolname = cxt->rel->rd_locator_info->partAttrName; } @@ -2352,13 +2353,13 @@ static checkLocalFKConstraints(CreateStmtContext *cxt) if (pos >= list_length(fkconstraint->fk_attrs)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("Hash distributed table must include distribution column in index"))); + errmsg("Hash/Modulo distributed table must include distribution column in index"))); /* Verify that the referenced table is partitioned at the same position in the index */ - if (!IsHashColumnForRelId(pk_rel_id, strVal(list_nth(fkconstraint->pk_attrs,pos)))) + if (!IsDistColumnForRelId(pk_rel_id, strVal(list_nth(fkconstraint->pk_attrs,pos)))) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("Hash distribution column does not refer to hash distribution column in referenced table."))); + errmsg("Hash/Modulo distribution column does not refer to hash/modulo distribution column in referenced table."))); } } } diff --git a/src/backend/pgxc/locator/locator.c b/src/backend/pgxc/locator/locator.c index 199293f..d1aef8b 100644 --- a/src/backend/pgxc/locator/locator.c +++ b/src/backend/pgxc/locator/locator.c @@ -158,6 +158,46 @@ get_node_from_hash(int hash) return mappingTable[hash]; } +/* + * compute_modulo + */ +static int +compute_modulo(int valueOfPartCol) +{ + return ((abs(valueOfPartCol)) % NumDataNodes)+1; +} + +/* + * get_node_from_modulo - determine node based on modulo + * + */ +static int +get_node_from_modulo(int modulo) +{ + if (modulo > NumDataNodes || modulo <= 0) + ereport(ERROR, (errmsg("Modulo value out of range\n"))); + + return modulo; +} + +/* + * GetRelationDistColumn - Returns the name of the hash or modulo distribution column + * First hash distribution is checked + * Retuens NULL if the table is neither hash nor modulo distributed + */ +char * +GetRelationDistColumn(RelationLocInfo * rel_loc_info) +{ +char *pColName; + + pColName = NULL; + + pColName = GetRelationHashColumn(rel_loc_info); + if (pColName == NULL) + pColName = GetRelationModuloColumn(rel_loc_info); + + return pColName; +} /* * Returns whether or not the data type is hash distributable with PG-XC @@ -172,9 +212,8 @@ IsHashDistributable(Oid col_type) return false; } - /* - * get_hash_column - return hash column for relation. + * GetRelationHashColumn - return hash column for relation. * * Returns NULL if the relation is not hash partitioned. */ @@ -230,6 +269,95 @@ IsHashColumnForRelId(Oid relid, char *part_col_name) return IsHashColumn(rel_loc_info, part_col_name); } +/* + * IsDistColumnForRelId - return whether or not column for relation is used for hash or modulo distribution + * + */ +bool +IsDistColumnForRelId(Oid relid, char *part_col_name) +{ +bool bRet; +RelationLocInfo *rel_loc_info; + + rel_loc_info = GetRelationLocInfo(relid); + bRet = false; + + bRet = IsHashColumn(rel_loc_info, part_col_name); + if (bRet == false) + IsModuloColumn(rel_loc_info, part_col_name); + return bRet; +} + + +/* + * Returns whether or not the data type is modulo distributable with PG-XC + * PGXCTODO - expand support for other data types! + */ +bool +IsModuloDistributable(Oid col_type) +{ + if (col_type == INT4OID || col_type == INT2OID) + return true; + + return false; +} + +/* + * GetRelationModuloColumn - return modulo column for relation. + * + * Returns NULL if the relation is not modulo partitioned. + */ +char * +GetRelationModuloColumn(RelationLocInfo * rel_loc_info) +{ + char *column_str = NULL; + + if (rel_loc_info == NULL) + column_str = NULL; + else if (rel_loc_info->locatorType != LOCATOR_TYPE_MODULO) + column_str = NULL; + else + { + int len = strlen(rel_loc_info->partAttrName); + + column_str = (char *) palloc(len + 1); + strncpy(column_str, rel_loc_info->partAttrName, len + 1); + } + + return column_str; +} + +/* + * IsModuloColumn - return whether or not column for relation is used for modulo distribution. + * + */ +bool +IsModuloColumn(RelationLocInfo *rel_loc_info, char *part_col_name) +{ + bool ret_value = false; + + if (!rel_loc_info || !part_col_name) + ret_value = false; + else if (rel_loc_info->locatorType != LOCATOR_TYPE_MODULO) + ret_value = false; + else + ret_value = !strcmp(part_col_name, rel_loc_info->partAttrName); + + return ret_value; +} + + +/* + * IsModuloColumnForRelId - return whether or not column for relation is used for modulo distribution. + * + */ +bool +IsModuloColumnForRelId(Oid relid, char *part_col_name) +{ + RelationLocInfo *rel_loc_info = GetRelationLocInfo(relid); + + return IsModuloColumn(rel_loc_info, part_col_name); +} /* * Update the round robin node for the relation @@ -365,6 +493,19 @@ GetRelationNodes(RelationLocInfo *rel_loc_info, long *partValue, exec_nodes->nodelist = list_copy(rel_loc_info->nodeList); break; + case LOCATOR_TYPE_MODULO: + if (partValue != NULL) + /* in prototype, all partitioned tables use same map */ + exec_nodes->nodelist = lappend_int(NULL, get_node_from_modulo(compute_modulo(*partValue))); + else + if (accessType == RELATION_ACCESS_INSERT) + /* Insert NULL to node 1 */ + exec_nodes->nodelist = lappend_int(NULL, 1); + else + /* Use all nodes for other types of access */ + exec_nodes->nodelist = list_copy(rel_loc_info->nodeList); + break; + case LOCATOR_TYPE_SINGLE: /* just return first (there should only be one) */ @@ -420,6 +561,9 @@ ConvertToLocatorType(int disttype) case DISTTYPE_REPLICATION: loctype = LOCATOR_TYPE_REPLICATED; break; + case DISTTYPE_MODULO: + loctype = LOCATOR_TYPE_MODULO; + break; default: ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), diff --git a/src/backend/pgxc/plan/planner.c b/src/backend/pgxc/plan/planner.c index c8e67a6..0c19756 100644 --- a/src/backend/pgxc/plan/planner.c +++ b/src/backend/pgxc/plan/planner.c @@ -499,7 +499,8 @@ get_plan_nodes_insert(PlannerInfo *root, RemoteQuery *step) /* Optimization is only done for distributed tables */ if (query->jointree != NULL && query->jointree->fromlist != NULL - && rel_loc_info->locatorType == LOCATOR_TYPE_HASH) + && (rel_loc_info->locatorType == LOCATOR_TYPE_HASH || + rel_loc_info->locatorType == LOCATOR_TYPE_MODULO)) { /* * See if it is "single-step" @@ -526,7 +527,8 @@ get_plan_nodes_insert(PlannerInfo *root, RemoteQuery *step) /* If the source is not hash-based (eg, replicated) also send * through general planner */ - if (step->exec_nodes->baselocatortype != LOCATOR_TYPE_HASH) + if (step->exec_nodes->baselocatortype != LOCATOR_TYPE_HASH && + step->exec_nodes->baselocatortype != LOCATOR_TYPE_MODULO) { step->exec_nodes = NULL; return; @@ -539,8 +541,9 @@ get_plan_nodes_insert(PlannerInfo *root, RemoteQuery *step) } - if (rel_loc_info->locatorType == LOCATOR_TYPE_HASH && - rel_loc_info->partAttrName != NULL) + if ( (rel_loc_info->partAttrName != NULL) && + ( (rel_loc_info->locatorType == LOCATOR_TYPE_HASH) || + (rel_loc_info->locatorType == LOCATOR_TYPE_MODULO) )) { Expr *checkexpr; TargetEntry *tle = NULL; @@ -600,8 +603,9 @@ get_plan_nodes_insert(PlannerInfo *root, RemoteQuery *step) (errcode(ERRCODE_STATEMENT_TOO_COMPLEX), (errmsg("Could not find relation for oid = %d", rte->relid)))); - if (source_rel_loc_info->locatorType == LOCATOR_TYPE_HASH && - strcmp(col_base->colname, source_rel_loc_info->partAttrName) == 0) + if ((strcmp(col_base->colname, source_rel_loc_info->partAttrName) == 0) && + ( (source_rel_loc_info->locatorType == LOCATOR_TYPE_HASH) || + (source_rel_loc_info->locatorType == LOCATOR_TYPE_MODULO) )) { /* * Partition columns match, we have a "single-step INSERT SELECT". @@ -1072,8 +1076,9 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) if (!rel_loc_info1) return true; - /* If hash partitioned, check if the part column was used */ - if (IsHashColumn(rel_loc_info1, column_base->colname)) + /* If hash or modulo partitioned, check if the part column was used */ + if (IsHashColumn(rel_loc_info1, column_base->colname) || + IsModuloColumn(rel_loc_info1, column_base->colname)) { /* add to partitioned literal join conditions */ Literal_Comparison *lit_comp = @@ -1174,8 +1179,10 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) * PGXCTODO - for the prototype, we assume all partitioned * tables are on the same nodes. */ - if (IsHashColumn(rel_loc_info1, column_base->colname) - && IsHashColumn(rel_loc_info2, column_base2->colname)) + if ( ( (IsHashColumn(rel_loc_info1, column_base->colname)) && + (IsHashColumn(rel_loc_info2, column_base2->colname))) || + ( (IsModuloColumn(rel_loc_info1, column_base->colname)) && + (IsModuloColumn(rel_loc_info2, column_base2->colname)))) { /* We found a partitioned join */ Parent_Child_Join *parent_child = (Parent_Child_Join *) @@ -1219,7 +1226,8 @@ examine_conditions_walker(Node *expr_node, XCWalkerContext *context) if (!rel_loc_info1) return true; - if (IsHashColumn(rel_loc_info1, column_base->colname)) + if (IsHashColumn(rel_loc_info1, column_base->colname) || + IsModuloColumn(rel_loc_info1, column_base->colname)) { Expr_Comparison *expr_comp = palloc(sizeof(Expr_Comparison)); @@ -1668,7 +1676,8 @@ get_plan_nodes_walker(Node *query_node, XCWalkerContext *context) if (!rel_loc_info) return true; - if (rel_loc_info->locatorType != LOCATOR_TYPE_HASH) + if (rel_loc_info->locatorType != LOCATOR_TYPE_HASH && + rel_loc_info->locatorType != LOCATOR_TYPE_MODULO) /* do not need to determine partitioning expression */ context->query_step->exec_nodes = GetRelationNodes(rel_loc_info, NULL, @@ -3197,9 +3206,9 @@ validate_part_col_updatable(const Query *query) (errmsg("Could not find relation for oid = %d", rte->relid)))); - /* Only LOCATOR_TYPE_HASH should be checked */ - if (rel_loc_info->locatorType == LOCATOR_TYPE_HASH && - rel_loc_info->partAttrName != NULL) + /* Only LOCATOR_TYPE_HASH & LOCATOR_TYPE_MODULO should be checked */ + if ( (rel_loc_info->partAttrName != NULL) && + ( (rel_loc_info->locatorType == LOCATOR_TYPE_HASH) || (rel_loc_info->locatorType == LOCATOR_TYPE_MODULO) ) ) { /* It is a partitioned table, check partition column in targetList */ foreach(lc, query->targetList) diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 94fe89c..b48df0f 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1185,7 +1185,8 @@ typedef enum DistributionType { DISTTYPE_REPLICATION, /* Replicated */ DISTTYPE_HASH, /* Hash partitioned */ - DISTTYPE_ROUNDROBIN /* Round Robin */ + DISTTYPE_ROUNDROBIN, /* Round Robin */ + DISTTYPE_MODULO /* Modulo partitioned */ } DistributionType; /*---------- diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index c29e316..2c84923 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -242,6 +242,9 @@ PG_KEYWORD("maxvalue", MAXVALUE, UNRESERVED_KEYWORD) PG_KEYWORD("minute", MINUTE_P, UNRESERVED_KEYWORD) PG_KEYWORD("minvalue", MINVALUE, UNRESERVED_KEYWORD) PG_KEYWORD("mode", MODE, UNRESERVED_KEYWORD) +#ifdef PGXC +PG_KEYWORD("modulo", MODULO, UNRESERVED_KEYWORD) +#endif PG_KEYWORD("month", MONTH_P, UNRESERVED_KEYWORD) PG_KEYWORD("move", MOVE, UNRESERVED_KEYWORD) PG_KEYWORD("name", NAME_P, UNRESERVED_KEYWORD) diff --git a/src/include/pgxc/locator.h b/src/include/pgxc/locator.h index 1bc8949..5948fae 100644 --- a/src/include/pgxc/locator.h +++ b/src/include/pgxc/locator.h @@ -21,6 +21,7 @@ #define LOCATOR_TYPE_SINGLE 'S' #define LOCATOR_TYPE_RROBIN 'N' #define LOCATOR_TYPE_CUSTOM 'C' +#define LOCATOR_TYPE_MODULO 'M' #define HASH_SIZE 4096 #define HASH_MASK 0x00000FFF; @@ -107,4 +108,11 @@ extern List *GetAnyDataNode(void); extern void RelationBuildLocator(Relation rel); extern void FreeRelationLocInfo(RelationLocInfo *relationLocInfo); +extern bool IsModuloDistributable(Oid col_type); +extern char *GetRelationModuloColumn(RelationLocInfo * rel_loc_info); +extern bool IsModuloColumn(RelationLocInfo *rel_loc_info, char *part_col_name); +extern bool IsModuloColumnForRelId(Oid relid, char *part_col_name); +extern char *GetRelationDistColumn(RelationLocInfo * rel_loc_info); +extern bool IsDistColumnForRelId(Oid relid, char *part_col_name); + #endif /* LOCATOR_H */ ----------------------------------------------------------------------- Summary of changes: src/backend/catalog/heap.c | 36 ++++++++- src/backend/catalog/pgxc_class.c | 2 +- src/backend/commands/copy.c | 34 ++++---- src/backend/commands/indexcmds.c | 2 +- src/backend/parser/gram.y | 20 ++++- src/backend/parser/parse_utilcmd.c | 13 ++-- src/backend/pgxc/locator/locator.c | 148 +++++++++++++++++++++++++++++++++++- src/backend/pgxc/plan/planner.c | 39 ++++++---- src/include/nodes/primnodes.h | 3 +- src/include/parser/kwlist.h | 3 + src/include/pgxc/locator.h | 8 ++ 11 files changed, 255 insertions(+), 53 deletions(-) hooks/post-receive -- Postgres-XC |