diff options
author | Michael P | 2012-03-09 02:30:02 +0000 |
---|---|---|
committer | Michael P | 2012-03-09 02:37:33 +0000 |
commit | 36d40e8b5dd844c99af25a053f6540f36ebd82db (patch) | |
tree | ad788c12509d8f066093103bd7cc5b6da29584f1 | |
parent | b3f877f80a81eab00444d21887fc5a25d3ea2876 (diff) |
Switch node definition information to shared memory in pooler
This removes dependency of pooler process with catalog table cache.
Shared memory on pooler is organized now as follows:
- PoolerMemoryContext (well an existing one), allocated in TopMemoryContext
and used by the pooler process
- PoolerCoreContext, allocated in PoolerMemoryContext, used by database pool
contexts.
- PoolerAgentContext, pooler agent context and used by pooler agents.
The pooler agent now uses node Oids instead of node indexes. This
protects pooler agents in case of node reordering after catalog tables
being modified due to node DDL. The warning/error message which was thrown
back to client connection from server if connection information was
inconsistent is also removed thanks to that.
Two new GUC parameters are used to define the maximum number of
Coordinators and Datanodes on Coordinator respectively called
max_coordinators and max_datanodes. This represents the maximum
number of nodes that can be defined in cluster, and does not influence
the dynamic behavior of cluster.
A node definition slot in shared memory takes approximately 140 bytes.
Patch by Andrei Martsinchyk.
Review, some fix issues (preferred/primary support...) and some workarounds
by me. Performance has been checked by Sutou Takayuki.
-rw-r--r-- | doc-xc/src/sgml/config.sgmlin | 24 | ||||
-rw-r--r-- | doc-xc/src/sgml/installation.sgmlin | 24 | ||||
-rw-r--r-- | doc-xc/src/sgml/runtime.sgmlin | 32 | ||||
-rw-r--r-- | src/backend/catalog/heap.c | 42 | ||||
-rw-r--r-- | src/backend/pgxc/nodemgr/nodemgr.c | 281 | ||||
-rw-r--r-- | src/backend/pgxc/pool/pgxcnode.c | 42 | ||||
-rw-r--r-- | src/backend/pgxc/pool/poolmgr.c | 1083 | ||||
-rw-r--r-- | src/backend/pgxc/pool/poolutils.c | 16 | ||||
-rw-r--r-- | src/backend/storage/ipc/ipci.c | 12 | ||||
-rw-r--r-- | src/backend/tcop/postgres.c | 20 | ||||
-rw-r--r-- | src/backend/utils/adt/dbsize.c | 4 | ||||
-rw-r--r-- | src/backend/utils/adt/lockfuncs.c | 2 | ||||
-rw-r--r-- | src/backend/utils/misc/guc.c | 27 | ||||
-rw-r--r-- | src/backend/utils/misc/postgresql.conf.sample | 8 | ||||
-rw-r--r-- | src/include/pgxc/nodemgr.h | 30 | ||||
-rw-r--r-- | src/include/pgxc/poolmgr.h | 31 | ||||
-rw-r--r-- | src/include/storage/lwlock.h | 1 |
17 files changed, 805 insertions, 874 deletions
diff --git a/doc-xc/src/sgml/config.sgmlin b/doc-xc/src/sgml/config.sgmlin index 3b33b7a4f9..23bf4a3385 100644 --- a/doc-xc/src/sgml/config.sgmlin +++ b/doc-xc/src/sgml/config.sgmlin @@ -6858,6 +6858,30 @@ LOG: CleanUpLock: deleting: lock(0xb7acd844) id(24688,24696,0,0,0,1) </listitem> </varlistentry> + <varlistentry id="guc-max-coordinators" xreflabel="max_coordinators"> + <term><varname>max_coordinators</varname> (<type>integer</type>)</term> + <indexterm> + <primary><varname>max_coordinators</> configuration parameter</primary> + </indexterm> + <listitem> + <para> + Maximum number of Coordinators that can be configured in the cluster. + </para> + </listitem> + </varlistentry> + + <varlistentry id="guc-max-datanodes" xreflabel="max_datanodes"> + <term><varname>max_datanodes</varname> (<type>integer</type>)</term> + <indexterm> + <primary><varname>max_datanodes</> configuration parameter</primary> + </indexterm> + <listitem> + <para> + Maximum number of Datanodes that can be configured in the cluster. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-pgxc-node-name" xreflabel="pgxc_node_name"> <term><varname>pgxc_node_name</varname> (<type>integer</type>)</term> <indexterm> diff --git a/doc-xc/src/sgml/installation.sgmlin b/doc-xc/src/sgml/installation.sgmlin index 3494cf0cbc..f4f7bdcbd3 100644 --- a/doc-xc/src/sgml/installation.sgmlin +++ b/doc-xc/src/sgml/installation.sgmlin @@ -2409,6 +2409,30 @@ postgres$ <userinput>/usr/local/pgsql/bin/initdb -D /usr/local/pgsql/data</> </varlistentry> <varlistentry> + <term><envar>max_coordinators</envar></term> + <listitem> +&xconly; + <para> + This parameter specifies maximum number of the Coordinators that can + be added to the cluster. Cluster would have to be restarted to increase + the value. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term><envar>max_datanodes</envar></term> + <listitem> +&xconly; + <para> + This parameter specifies maximum number of the Datanodes that can + be added to the cluster. Cluster would have to be restarted to increase + the value. + </para> + </listitem> + </varlistentry> + + <varlistentry> <term><envar>pgxc_node_name</envar></term> <listitem> &xconly; diff --git a/doc-xc/src/sgml/runtime.sgmlin b/doc-xc/src/sgml/runtime.sgmlin index c3685abc02..5cf2439df8 100644 --- a/doc-xc/src/sgml/runtime.sgmlin +++ b/doc-xc/src/sgml/runtime.sgmlin @@ -626,10 +626,10 @@ $ <userinput>gtm_ctl start -S gtm_proxy -i 1 -D /usr/local/pgsql/gtm_proxy -i 1 <term>max_pool_size</term> <listitem> <para> - Coordinator maintains connections to datanode as a pool. + Coordinator maintains connections to Datanode as a pool. This parameter specifies max number of connections the - coordinator maintains. - Specify <option>max_connection</> value of datanodes as this + Coordinator maintains. + Specify <option>max_connection</> value of remote nodes as this parameter value. </para> </listitem> @@ -639,13 +639,37 @@ $ <userinput>gtm_ctl start -S gtm_proxy -i 1 -D /usr/local/pgsql/gtm_proxy -i 1 <term>min_pool_size</term> <listitem> <para> - This is the minimum number of coordinator to datanode connections + This is the minimum number of Coordinator to remote node connections maintained by the pooler. Typically specify 1. </para> </listitem> </varlistentry> + <varlistentry> + <term>max_coordinators</term> + <listitem> + <para> + This is the maximum number of Coordinators that can be configured in the cluster. + Specify exact number if it is not planned to add more Coordinators + while cluster is running, or greater, if it is desired to dynamically + resize cluster. It costs about 140 bytes of shared memory per slot. + </para> + </listitem> + </varlistentry> + + <varlistentry> + <term>max_datanodes</term> + <listitem> + <para> + This is the maximum number of Datanodes configured in the cluster. + Specify exact number if it is not planned to add more Datanodes + while cluster is running, or greater, if it is desired to dynamically + resize cluster. It costs about 140 bytes of shared memory per slot. + </para> + </listitem> + </varlistentry> + </variablelist> </sect2> diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index d7f1e3a4d4..6c6b95aa3a 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -80,7 +80,9 @@ #include "catalog/pgxc_class.h" #include "catalog/pgxc_node.h" #include "pgxc/locator.h" +#include "pgxc/nodemgr.h" #include "pgxc/pgxc.h" +#include "pgxc/pgxcnode.h" #include "pgxc/postgresql_fdw.h" #endif @@ -1130,35 +1132,17 @@ build_subcluster_data(PGXCSubCluster *subcluster, int *numnodes) if (!subcluster) { + int i; /* - * If no subcluster is defined, all the Datanode are associated to the - * table. So scan pgxc_node and pick up all the necessary stuff. + * If no subcluster is defined, all the Datanodes are associated to the + * table. So obtain list of node Oids currenly known to the session. + * There could be a difference between the content of pgxc_node catalog + * table and current session, because someone may change nodes and not + * yet update session data. + * We should use session data because Executor uses it as well to run + * commands on nodes. */ - Relation rel; - HeapScanDesc scan; - HeapTuple tuple; - - rel = heap_open(PgxcNodeRelationId, AccessShareLock); - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); - - while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) - { - Form_pgxc_node pgxc_node = (Form_pgxc_node) GETSTRUCT(tuple); - - /* Add only Datanodes */ - if (pgxc_node->node_type != PGXC_NODE_DATANODE) - continue; - - (*numnodes)++; - if (!nodes) - nodes = (Oid *) palloc(*numnodes * sizeof(Oid)); - else - nodes = (Oid *) repalloc(nodes, *numnodes * sizeof(Oid)); - - nodes[*numnodes - 1] = get_pgxc_nodeoid(NameStr(pgxc_node->node_name)); - } - heap_endscan(scan); - heap_close(rel, AccessShareLock); + *numnodes = NumDataNodes; /* No nodes found ?? */ if (*numnodes == 0) @@ -1166,6 +1150,10 @@ build_subcluster_data(PGXCSubCluster *subcluster, int *numnodes) (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("No Datanode defined in cluster"))); + nodes = (Oid *) palloc(NumDataNodes * sizeof(Oid)); + for (i = 0; i < NumDataNodes; i++) + nodes[i] = PGXCNodeGetNodeOid(i, PGXC_NODE_DATANODE); + return nodes; } diff --git a/src/backend/pgxc/nodemgr/nodemgr.c b/src/backend/pgxc/nodemgr/nodemgr.c index 062ee070d9..e6f83c3ef2 100644 --- a/src/backend/pgxc/nodemgr/nodemgr.c +++ b/src/backend/pgxc/nodemgr/nodemgr.c @@ -28,9 +28,86 @@ #include "pgxc/nodemgr.h" #include "pgxc/pgxc.h" -/* Global number of nodes */ -int NumDataNodes = 2; -int NumCoords = 1; + +/* + * GUC parameters. + * Shared memory block can not be resized dynamically, so we should have some + * limits set at startup time to calculate amount of shared memory to store + * node table. Nodes can be added to running cluster until that limit is reached + * if cluster needs grow beyond the configuration value should be changed and + * cluster restarted. + */ +int MaxCoords = 16; +int MaxDataNodes = 16; + +/* Global number of nodes. Point to a shared memory block */ +static int *shmemNumCoords; +static int *shmemNumDataNodes; + +/* Shared memory tables of node definitions */ +NodeDefinition *coDefs; +NodeDefinition *dnDefs; + +/* + * NodeTablesInit + * Initializes shared memory tables of Coordinators and Datanodes. + */ +void +NodeTablesShmemInit(void) +{ + bool found; + /* + * Initialize the table of Coordinators: first sizeof(int) bytes are to + * store actual number of Coordinators, remaining data in the structure is + * array of NodeDefinition that can contain up to MaxCoords entries. + * That is a bit weird and probably it would be better have these in + * separate structures, but I am unsure about cost of having shmem structure + * containing just single integer. + */ + shmemNumCoords = ShmemInitStruct("Coordinator Table", + sizeof(int) + + sizeof(NodeDefinition) * MaxCoords, + &found); + + /* Have coDefs pointing right behind shmemNumCoords */ + coDefs = (NodeDefinition *) (shmemNumCoords + 1); + + /* Mark it empty upon creation */ + if (!found) + *shmemNumCoords = 0; + + /* Same for datanodes */ + shmemNumDataNodes = ShmemInitStruct("Datanode Table", + sizeof(int) + + sizeof(NodeDefinition) * MaxDataNodes, + &found); + + /* Have coDefs pointing right behind shmemNumDataNodes */ + dnDefs = (NodeDefinition *) (shmemNumDataNodes + 1); + + /* Mark it empty upon creation */ + if (!found) + *shmemNumDataNodes = 0; +} + + +/* + * NodeTablesShmemSize + * Get the size of shared memory dedicated to node definitions + */ +Size +NodeTablesShmemSize(void) +{ + Size co_size; + Size dn_size; + + co_size = mul_size(sizeof(NodeDefinition), MaxCoords); + co_size = add_size(co_size, sizeof(int)); + dn_size = mul_size(sizeof(NodeDefinition), MaxDataNodes); + dn_size = add_size(dn_size, sizeof(int)); + + return add_size(co_size, dn_size); +} /* * Check list of options and return things filled. @@ -38,7 +115,7 @@ int NumCoords = 1; */ static void check_node_options(const char *node_name, List *options, char **node_host, - int *node_port, char *node_type, + int *node_port, char *node_type, bool *is_primary, bool *is_preferred) { ListCell *option; @@ -127,7 +204,7 @@ check_node_options(const char *node_name, List *options, char **node_host, /* -------------------------------- * cmp_nodes - * + * * Compare the Oids of two XC nodes * to sort them in ascending order by their names * -------------------------------- @@ -147,27 +224,25 @@ cmp_nodes(const void *p1, const void *p2) return 1; } + /* - * PgxcNodeCount + * PgxcNodeListAndCount * - * Count number of PGXC nodes based on catalog information and return - * an ordered list of node Oids for each PGXC node type. + * Update node definitions in the shared memory tables from the catalog */ void -PgxcNodeListAndCount(Oid **coOids, Oid **dnOids, int *num_coords, int *num_dns) +PgxcNodeListAndCount(void) { Relation rel; HeapScanDesc scan; HeapTuple tuple; - *num_coords = 0; - *num_dns = 0; + LWLockAcquire(NodeTableLock, LW_EXCLUSIVE); - /* Don't forget to reinitialize primary and preferred nodes also */ - primary_data_node = InvalidOid; - num_preferred_data_nodes = 0; + *shmemNumCoords = 0; + *shmemNumDataNodes = 0; - /* + /* * Node information initialization is made in one scan: * 1) Scan pgxc_node catalog to find the number of nodes for * each node type and make proper allocations @@ -178,60 +253,158 @@ PgxcNodeListAndCount(Oid **coOids, Oid **dnOids, int *num_coords, int *num_dns) scan = heap_beginscan(rel, SnapshotNow, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { - int numnodes; - Oid **nodes; Form_pgxc_node nodeForm = (Form_pgxc_node) GETSTRUCT(tuple); + NodeDefinition *node; - /* Take data for given node type */ + /* Take definition for given node type */ switch (nodeForm->node_type) { case PGXC_NODE_COORDINATOR: - (*num_coords)++; - numnodes = *num_coords; - nodes = coOids; + node = &coDefs[(*shmemNumCoords)++]; break; case PGXC_NODE_DATANODE: - (*num_dns)++; - numnodes = *num_dns; - nodes = dnOids; + node = &dnDefs[(*shmemNumDataNodes)++]; break; default: break; } - if (numnodes == 1) - *nodes = (Oid *) palloc(numnodes * sizeof(Oid)); - else - *nodes = (Oid *) repalloc(*nodes, numnodes * sizeof(Oid)); + /* Populate the definition */ + node->nodeoid = HeapTupleGetOid(tuple); + memcpy(&node->nodename, &nodeForm->node_name, NAMEDATALEN); + memcpy(&node->nodehost, &nodeForm->node_host, NAMEDATALEN); + node->nodeport = nodeForm->node_port; + node->nodeisprimary = nodeForm->nodeis_primary; + node->nodeispreferred = nodeForm->nodeis_preferred; + } + heap_endscan(scan); + heap_close(rel, AccessShareLock); + + /* Finally sort the lists */ + if (*shmemNumCoords > 1) + qsort(coDefs, *shmemNumCoords, sizeof(NodeDefinition), cmp_nodes); + if (*shmemNumDataNodes > 1) + qsort(dnDefs, *shmemNumDataNodes, sizeof(NodeDefinition), cmp_nodes); + + LWLockRelease(NodeTableLock); +} - (*nodes)[numnodes - 1] = get_pgxc_nodeoid(NameStr(nodeForm->node_name)); - /* - * Save data related to preferred and primary node - * Preferred and primaries use node Oids - */ - if (nodeForm->nodeis_primary) - primary_data_node = get_pgxc_nodeoid(NameStr(nodeForm->node_name)); - if (nodeForm->nodeis_preferred) +/* + * PgxcNodeGetIds + * + * List into palloc'ed arrays Oids of Coordinators and Datanodes currently + * presented in the node table, as well as number of Coordinators and Datanodes. + * Any parameter may be NULL if caller is not interested in receiving + * appropriate results. Preferred and primary node information can be updated + * in session if requested. + */ +void +PgxcNodeGetOids(Oid **coOids, Oid **dnOids, + int *num_coords, int *num_dns, bool update_preferred) +{ + LWLockAcquire(NodeTableLock, LW_SHARED); + + if (num_coords) + *num_coords = *shmemNumCoords; + if (num_dns) + *num_dns = *shmemNumDataNodes; + + if (coOids) + { + int i; + + *coOids = (Oid *) palloc(*shmemNumCoords * sizeof(Oid)); + for (i = 0; i < *shmemNumCoords; i++) + (*coOids)[i] = coDefs[i].nodeoid; + } + + if (dnOids) + { + int i; + + *dnOids = (Oid *) palloc(*shmemNumDataNodes * sizeof(Oid)); + for (i = 0; i < *shmemNumDataNodes; i++) + (*dnOids)[i] = dnDefs[i].nodeoid; + } + + /* Update also preferred and primary node informations if requested */ + if (update_preferred) + { + int i; + + /* Initialize primary and preferred node information */ + primary_data_node = InvalidOid; + num_preferred_data_nodes = 0; + + for (i = 0; i < *shmemNumDataNodes; i++) { - preferred_data_node[num_preferred_data_nodes] = - get_pgxc_nodeoid(NameStr(nodeForm->node_name)); - num_preferred_data_nodes++; + if (dnDefs[i].nodeisprimary) + primary_data_node = dnDefs[i].nodeoid; + + if (dnDefs[i].nodeispreferred) + { + preferred_data_node[num_preferred_data_nodes] = dnDefs[i].nodeoid; + num_preferred_data_nodes++; + } } } - heap_endscan(scan); - heap_close(rel, AccessShareLock); - /* Finally sort the lists to be sent back */ - if (NumCoords != 0) - qsort(*coOids, *num_coords, sizeof(Oid), cmp_nodes); - if (NumDataNodes != 0) - qsort(*dnOids, *num_dns, sizeof(Oid), cmp_nodes); + LWLockRelease(NodeTableLock); +} + + +/* + * Find node definition in the shared memory node table. + * The structure is a copy palloc'ed in current memory context. + */ +NodeDefinition * +PgxcNodeGetDefinition(Oid node) +{ + NodeDefinition *result = NULL; + int i; + + LWLockAcquire(NodeTableLock, LW_SHARED); + + /* search through the data nodes first */ + for (i = 0; i < *shmemNumDataNodes; i++) + { + if (dnDefs[i].nodeoid == node) + { + result = (NodeDefinition *) palloc(sizeof(NodeDefinition)); + + memcpy(result, dnDefs + i, sizeof(NodeDefinition)); + + LWLockRelease(NodeTableLock); + + return result; + } + } + + /* if not found, search through the Coordinators */ + for (i = 0; i < *shmemNumCoords; i++) + { + if (coDefs[i].nodeoid == node) + { + result = (NodeDefinition *) palloc(sizeof(NodeDefinition)); + + memcpy(result, coDefs + i, sizeof(NodeDefinition)); + + LWLockRelease(NodeTableLock); + + return result; + } + } + + /* not found, return NULL */ + LWLockRelease(NodeTableLock); + return NULL; } + /* * PgxcNodeCreate - * + * * Add a PGXC node */ void @@ -266,9 +439,9 @@ PgxcNodeCreate(CreateNodeStmt *stmt) /* Check length of node name */ if (strlen(node_name) > PGXC_NODENAME_LENGTH) ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("Node name \"%s\" is too long", - node_name))); + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("Node name \"%s\" is too long", + node_name))); /* Filter options */ check_node_options(node_name, stmt->options, &node_host, @@ -321,7 +494,7 @@ PgxcNodeCreate(CreateNodeStmt *stmt) /* Insert tuple in catalog */ simple_heap_insert(pgxcnodesrel, htup); - + CatalogUpdateIndexes(pgxcnodesrel, htup); heap_close(pgxcnodesrel, RowExclusiveLock); @@ -329,7 +502,7 @@ PgxcNodeCreate(CreateNodeStmt *stmt) /* * PgxcNodeAlter - * + * * Alter a PGXC node */ void @@ -405,7 +578,7 @@ PgxcNodeAlter(AlterNodeStmt *stmt) MemSet(new_record_repl, false, sizeof(new_record_repl)); new_record[Anum_pgxc_node_port - 1] = Int32GetDatum(node_port); new_record_repl[Anum_pgxc_node_port - 1] = true; - new_record[Anum_pgxc_node_host - 1] = + new_record[Anum_pgxc_node_host - 1] = DirectFunctionCall1(namein, CStringGetDatum(node_host)); new_record_repl[Anum_pgxc_node_host - 1] = true; new_record[Anum_pgxc_node_type - 1] = CharGetDatum(node_type); @@ -431,7 +604,7 @@ PgxcNodeAlter(AlterNodeStmt *stmt) /* * PgxcNodeRemove - * + * * Remove a PGXC node */ void diff --git a/src/backend/pgxc/pool/pgxcnode.c b/src/backend/pgxc/pool/pgxcnode.c index f16444fc8a..1ff042fe38 100644 --- a/src/backend/pgxc/pool/pgxcnode.c +++ b/src/backend/pgxc/pool/pgxcnode.c @@ -50,6 +50,7 @@ #include "../interfaces/libpq/libpq-fe.h" +/* Number of connections held */ static int datanode_count = 0; static int coord_count = 0; @@ -67,6 +68,10 @@ static PGXCNodeHandle *dn_handles = NULL; */ static PGXCNodeHandle *co_handles = NULL; +/* Current size of dn_handles and co_handles */ +int NumDataNodes; +int NumCoords; + static void pgxc_node_init(PGXCNodeHandle *handle, int sock); static void pgxc_node_free(PGXCNodeHandle *handle); static void pgxc_node_all_free(void); @@ -126,8 +131,11 @@ InitMultinodeExecutor(bool is_force) co_handles != NULL) return; + /* Update node table in the shared memory */ + PgxcNodeListAndCount(); + /* Get classified list of node Oids */ - PgxcNodeListAndCount(&coOids, &dnOids, &NumCoords, &NumDataNodes); + PgxcNodeGetOids(&coOids, &dnOids, &NumCoords, &NumDataNodes, true); /* Do proper initialization of handles */ if (NumDataNodes > 0) @@ -212,7 +220,7 @@ PGXCNodeConnStr(char *host, int port, char *dbname, /* - * Connect to a Data Node using a connection string + * Connect to a Datanode using a connection string */ NODE_CONNECTION * PGXCNodeConnect(char *connstr) @@ -331,7 +339,7 @@ pgxc_node_all_free(void) /* * Create and initialise internal structure to communicate to - * Data Node via supplied socket descriptor. + * Datanode via supplied socket descriptor. * Structure stores state info and I/O buffers */ static void @@ -566,8 +574,8 @@ retry: if (close_if_error) { add_error_message(conn, - "data node closed the connection unexpectedly\n" - "\tThis probably means the data node terminated abnormally\n" + "Datanode closed the connection unexpectedly\n" + "\tThis probably means the Datanode terminated abnormally\n" "\tbefore or while processing the request.\n"); conn->state = DN_CONNECTION_STATE_ERROR_FATAL; /* No more connection to * backend */ @@ -787,7 +795,7 @@ cancel_query(void) if (datanode_count == 0 && coord_count == 0) return; - /* Collect Data Nodes handles */ + /* Collect Datanodes handles */ for (i = 0; i < NumDataNodes; i++) { PGXCNodeHandle *handle = &dn_handles[i]; @@ -873,7 +881,7 @@ clear_all_data(void) if (datanode_count == 0 && coord_count == 0) return; - /* Collect Data Nodes handles */ + /* Collect Datanodes handles */ for (i = 0; i < NumDataNodes; i++) { PGXCNodeHandle *handle = &dn_handles[i]; @@ -1105,7 +1113,7 @@ send_some(PGXCNodeHandle *handle, int len) } /* - * Send PARSE message with specified statement down to the Data node + * Send PARSE message with specified statement down to the Datanode */ int pgxc_node_send_parse(PGXCNodeHandle * handle, const char* statement, @@ -1184,7 +1192,7 @@ pgxc_node_send_parse(PGXCNodeHandle * handle, const char* statement, /* - * Send BIND message down to the Data node + * Send BIND message down to the Datanode */ int pgxc_node_send_bind(PGXCNodeHandle * handle, const char *portal, @@ -1265,7 +1273,7 @@ pgxc_node_send_bind(PGXCNodeHandle * handle, const char *portal, /* - * Send DESCRIBE message (portal or statement) down to the Data node + * Send DESCRIBE message (portal or statement) down to the Datanode */ int pgxc_node_send_describe(PGXCNodeHandle * handle, bool is_statement, @@ -1312,7 +1320,7 @@ pgxc_node_send_describe(PGXCNodeHandle * handle, bool is_statement, /* - * Send CLOSE message (portal or statement) down to the Data node + * Send CLOSE message (portal or statement) down to the Datanode */ int pgxc_node_send_close(PGXCNodeHandle * handle, bool is_statement, @@ -1353,7 +1361,7 @@ pgxc_node_send_close(PGXCNodeHandle * handle, bool is_statement, } /* - * Send EXECUTE message down to the Data node + * Send EXECUTE message down to the Datanode */ int pgxc_node_send_execute(PGXCNodeHandle * handle, const char *portal, int fetch) @@ -1397,7 +1405,7 @@ pgxc_node_send_execute(PGXCNodeHandle * handle, const char *portal, int fetch) /* - * Send FLUSH message down to the Data node + * Send FLUSH message down to the Datanode */ int pgxc_node_send_flush(PGXCNodeHandle * handle) @@ -1423,7 +1431,7 @@ pgxc_node_send_flush(PGXCNodeHandle * handle) /* - * Send SYNC message down to the Data node + * Send SYNC message down to the Datanode */ int pgxc_node_send_sync(PGXCNodeHandle * handle) @@ -1449,7 +1457,7 @@ pgxc_node_send_sync(PGXCNodeHandle * handle) /* - * Send the GXID down to the Data node + * Send the GXID down to the Datanode */ int pgxc_node_send_query_extended(PGXCNodeHandle *handle, const char *query, @@ -1798,7 +1806,7 @@ get_handles(List *datanodelist, List *coordlist, bool is_coord_only_query) { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("Invalid data node number"))); + errmsg("Invalid Datanode number"))); } node_handle = &dn_handles[node]; @@ -1915,7 +1923,7 @@ get_handles(List *datanodelist, List *coordlist, bool is_coord_only_query) { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("Invalid data node number"))); + errmsg("Invalid Datanode number"))); } node_handle = &dn_handles[node]; diff --git a/src/backend/pgxc/pool/poolmgr.c b/src/backend/pgxc/pool/poolmgr.c index 598a5e6cdc..ebef82c8a5 100644 --- a/src/backend/pgxc/pool/poolmgr.c +++ b/src/backend/pgxc/pool/poolmgr.c @@ -79,12 +79,17 @@ typedef struct int port; } PGXCNodeConnectionInfo; -/* The memory context */ +/* The root memory context */ static MemoryContext PoolerMemoryContext = NULL; - -/* PGXC Nodes info list */ -static PGXCNodeConnectionInfo *datanode_connInfos; -static PGXCNodeConnectionInfo *coord_connInfos; +/* + * Allocations of core objects: data node connections, upper level structures, + * connection strings, etc. + */ +static MemoryContext PoolerCoreContext = NULL; +/* + * Memory to store Agents + */ +static MemoryContext PoolerAgentContext = NULL; /* Pool to all the databases (linked list) */ static DatabasePool *databasePools = NULL; @@ -98,9 +103,7 @@ static PoolHandle *poolHandle = NULL; static int is_pool_locked = false; static int server_fd = -1; -static void node_info_free(void); -static void node_info_load(void); -static int node_info_check(void); +static int node_info_check(PoolAgent *agent); static void agent_init(PoolAgent *agent, const char *database, const char *user_name); static void agent_destroy(PoolAgent *agent); static void agent_create(void); @@ -115,33 +118,29 @@ static int agent_temp_command(PoolAgent *agent); static DatabasePool *create_database_pool(const char *database, const char *user_name); static void insert_database_pool(DatabasePool *pool); static int destroy_database_pool(const char *database, const char *user_name); -static void reload_database_pools(void); +static void reload_database_pools(PoolAgent *agent); static DatabasePool *find_database_pool(const char *database, const char *user_name); -static DatabasePool *find_database_pool_to_clean(const char *database, - const char *user_name, - List *dn_list, - List *co_list); static DatabasePool *remove_database_pool(const char *database, const char *user_name); static int *agent_acquire_connections(PoolAgent *agent, List *datanodelist, List *coordlist); static int send_local_commands(PoolAgent *agent, List *datanodelist, List *coordlist); static int cancel_query_on_connections(PoolAgent *agent, List *datanodelist, List *coordlist); -static PGXCNodePoolSlot *acquire_connection(DatabasePool *dbPool, int node, char client_conn_type); +static PGXCNodePoolSlot *acquire_connection(DatabasePool *dbPool, Oid node); static void agent_release_connections(PoolAgent *agent, bool force_destroy); static void agent_reset_session(PoolAgent *agent); -static void release_connection(DatabasePool *dbPool, PGXCNodePoolSlot *slot, int index, - bool force_destroy, char client_conn_type); +static void release_connection(DatabasePool *dbPool, PGXCNodePoolSlot *slot, + Oid node, bool force_destroy); static void destroy_slot(PGXCNodePoolSlot *slot); -static void grow_pool(DatabasePool *dbPool, int index, char client_conn_type); +static PGXCNodePool *grow_pool(DatabasePool *dbPool, Oid node); static void destroy_node_pool(PGXCNodePool *node_pool); static void PoolerLoop(void); -static int clean_connection(List *dn_discard, - List *co_discard, +static int clean_connection(List *node_discard, const char *database, const char *user_name); static int *abort_pids(int *count, int pid, const char *database, const char *user_name); +static char *build_node_conn_str(Oid node, char* dbname, char*user_name); /* Signal handlers */ static void pooler_die(SIGNAL_ARGS); @@ -173,13 +172,23 @@ PoolManagerInit() elog(DEBUG1, "Pooler process is started: %d", getpid()); /* - * Set up memory context for the pooler + * Set up memory contexts for the pooler objects */ PoolerMemoryContext = AllocSetContextCreate(TopMemoryContext, "PoolerMemoryContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); + PoolerCoreContext = AllocSetContextCreate(PoolerMemoryContext, + "PoolerCoreContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + PoolerAgentContext = AllocSetContextCreate(PoolerMemoryContext, + "PoolerAgentContext", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); /* * If possible, make this process a group leader, so that the postmaster @@ -223,151 +232,84 @@ PoolManagerInit() errmsg("out of memory"))); } - /* Initialize process ressources */ - CurrentResourceOwner = ResourceOwnerCreate(NULL, "ForPoolerInfo"); - - /* Initialize pooler in Postgres-way */ - InitPostgres(NULL, InvalidOid, NULL, NULL); - - /* Initialize pooler connection info */ - node_info_load(); - PoolerLoop(); return 0; } -/* - * Free connection info cached - */ -static void -node_info_free(void) -{ - int count; - - for (count = 0; count < NumCoords; count++) - pfree(coord_connInfos[count].host); - for (count = 0; count < NumDataNodes; count++) - pfree(datanode_connInfos[count].host); - - if (datanode_connInfos) - pfree(datanode_connInfos); - if (coord_connInfos) - pfree(coord_connInfos); - - NumCoords = 0; - NumDataNodes = 0; - coord_connInfos = NULL; - datanode_connInfos = NULL; -} - -/* - * Load node info cached by scanning PGXC node catalog - */ -static void -node_info_load(void) -{ - int count; - Oid *coOids = NULL; - Oid *dnOids = NULL; - - /* Update number of PGXC nodes saved in cache */ - PgxcNodeListAndCount(&coOids, &dnOids, &NumCoords, &NumDataNodes); - - /* Then initialize the node informations */ - if (NumDataNodes != 0) - datanode_connInfos = (PGXCNodeConnectionInfo *) - palloc(NumDataNodes * sizeof(PGXCNodeConnectionInfo)); - if (NumCoords != 0) - coord_connInfos = (PGXCNodeConnectionInfo *) - palloc(NumCoords * sizeof(PGXCNodeConnectionInfo)); - - /* Fill in connection info structures */ - for (count = 0; count < NumCoords; count++) - { - coord_connInfos[count].nodeoid = coOids[count]; - coord_connInfos[count].port = get_pgxc_nodeport(coOids[count]); - coord_connInfos[count].host = get_pgxc_nodehost(coOids[count]); - } - for (count = 0; count < NumDataNodes; count++) - { - datanode_connInfos[count].nodeoid = dnOids[count]; - datanode_connInfos[count].port = get_pgxc_nodeport(dnOids[count]); - datanode_connInfos[count].host = get_pgxc_nodehost(dnOids[count]); - } - - /* Clean up resources */ - if (coOids) - pfree(coOids); - if (dnOids) - pfree(dnOids); -} /* * Check connection info consistency with system catalogs */ static int -node_info_check(void) +node_info_check(PoolAgent *agent) { - int res = POOL_CHECK_SUCCESS; - int num_coord, num_dn, i, j; - Oid *coOids = NULL; - Oid *dnOids = NULL; - - /* Update number of PGXC nodes saved in cache */ - PgxcNodeListAndCount(&coOids, &dnOids, - &num_coord, &num_dn); - - /* Check first if node numbers are consistent */ - if (NumCoords != num_coord || - NumDataNodes != num_dn) - { + DatabasePool *dbPool = databasePools; + List *checked = NIL; + int res = POOL_CHECK_SUCCESS; + Oid *coOids; + Oid *dnOids; + int numCo; + int numDn; + + /* + * First check if agent's node information matches to current content of the + * shared memory table. + */ + PgxcNodeGetOids(&coOids, &dnOids, &numCo, &numDn, false); + + if (agent->num_coord_connections != numCo || + agent->num_dn_connections != numDn || + memcmp(agent->coord_conn_oids, coOids, numCo * sizeof(Oid)) || + memcmp(agent->dn_conn_oids, dnOids, numDn * sizeof(Oid))) res = POOL_CHECK_FAILED; - goto finish; - } - /* Now do a check element by element */ - for (i = 0; i < 2; i++) + /* Release palloc'ed memory */ + pfree(coOids); + pfree(dnOids); + + /* + * Iterate over all dbnode pools and check if connection strings + * are matching node definitions. + */ + while (res == POOL_CHECK_SUCCESS && dbPool) { - int numnodes; - PGXCNodeConnectionInfo *conninfo; - Oid *oid_vector; + HASH_SEQ_STATUS hseq_status; + PGXCNodePool *nodePool; - /* Take all the elements necessary for check */ - switch(i) + hash_seq_init(&hseq_status, dbPool->nodePools); + while ((nodePool = (PGXCNodePool *) hash_seq_search(&hseq_status))) { - case 0: - numnodes = NumCoords; - oid_vector = coOids; - conninfo = coord_connInfos; - break; - case 1: - numnodes = NumDataNodes; - oid_vector = dnOids; - conninfo = datanode_connInfos; - break; - default: - Assert(0); - } + char *connstr_chk; - /* Then check data consistency for port, host and node Oid */ - for (j = 0; j < numnodes; j++) - { - if (conninfo[j].nodeoid != oid_vector[j] || - conninfo[j].port != get_pgxc_nodeport(oid_vector[j]) || - strcmp(conninfo[j].host, get_pgxc_nodehost(oid_vector[j]))) + /* No need to check same Datanode twice */ + if (list_member_oid(checked, nodePool->nodeoid)) + continue; + checked = lappend_oid(checked, nodePool->nodeoid); + + connstr_chk = build_node_conn_str(nodePool->nodeoid, + dbPool->database, + dbPool->user_name); + if (connstr_chk == NULL) + { + /* Problem of constructing connection string */ + hash_seq_term(&hseq_status); + res = POOL_CHECK_FAILED; + break; + } + /* return error if there is difference */ + if (strcmp(connstr_chk, nodePool->connstr)) { + pfree(connstr_chk); + hash_seq_term(&hseq_status); res = POOL_CHECK_FAILED; - goto finish; + break; } + + pfree(connstr_chk); } + dbPool = dbPool->next; } - -finish: - /* Clean everything */ - if (coOids) - pfree(coOids); - if (dnOids) - pfree(dnOids); + list_free(checked); return res; } @@ -457,6 +399,7 @@ PoolManagerCloseHandle(PoolHandle *handle) static void agent_create(void) { + MemoryContext oldcontext; int new_fd; PoolAgent *agent; @@ -472,6 +415,8 @@ agent_create(void) return; } + oldcontext = MemoryContextSwitchTo(PoolerAgentContext); + /* Allocate agent */ agent = (PoolAgent *) palloc(sizeof(PoolAgent)); if (!agent) @@ -488,8 +433,15 @@ agent_create(void) agent->port.RecvPointer = 0; agent->port.SendPointer = 0; agent->pool = NULL; - agent->num_dn_connections = NumDataNodes; - agent->num_coord_connections = NumCoords; + agent->mcxt = AllocSetContextCreate(CurrentMemoryContext, + "Agent", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + agent->num_dn_connections = 0; + agent->num_coord_connections = 0; + agent->dn_conn_oids = NULL; + agent->coord_conn_oids = NULL; agent->dn_connections = NULL; agent->coord_connections = NULL; agent->session_params = NULL; @@ -499,6 +451,8 @@ agent_create(void) /* Append new agent to the list */ poolAgents[agentCount++] = agent; + + MemoryContextSwitchTo(oldcontext); } @@ -708,6 +662,8 @@ PoolManagerLock(bool is_lock) static void agent_init(PoolAgent *agent, const char *database, const char *user_name) { + MemoryContext oldcontext; + Assert(agent); Assert(database); Assert(user_name); @@ -716,6 +672,16 @@ agent_init(PoolAgent *agent, const char *database, const char *user_name) if (agent->pool) agent_release_connections(agent, false); + oldcontext = MemoryContextSwitchTo(agent->mcxt); + + /* Get needed info and allocate memory */ + PgxcNodeGetOids(&agent->coord_conn_oids, &agent->dn_conn_oids, + &agent->num_coord_connections, &agent->num_dn_connections, false); + + agent->coord_connections = (PGXCNodePoolSlot **) + palloc0(agent->num_coord_connections * sizeof(PGXCNodePoolSlot *)); + agent->dn_connections = (PGXCNodePoolSlot **) + palloc0(agent->num_dn_connections * sizeof(PGXCNodePoolSlot *)); /* find database */ agent->pool = find_database_pool(database, user_name); @@ -723,6 +689,8 @@ agent_init(PoolAgent *agent, const char *database, const char *user_name) if (agent->pool == NULL) agent->pool = create_database_pool(database, user_name); + MemoryContextSwitchTo(oldcontext); + return; } @@ -760,26 +728,8 @@ agent_destroy(PoolAgent *agent) if (poolAgents[i] == agent) { /* Free memory. All connection slots are NULL at this point */ - if (agent->dn_connections) - { - pfree(agent->dn_connections); - agent->dn_connections = NULL; - } - if (agent->coord_connections) - { - pfree(agent->coord_connections); - agent->coord_connections = NULL; - } - if (agent->local_params) - { - pfree(agent->local_params); - agent->local_params = NULL; - } - if (agent->session_params) - { - pfree(agent->session_params); - agent->session_params = NULL; - } + MemoryContextDelete(agent->mcxt); + pfree(agent); /* shrink the list and move last agent into the freed slot */ if (i < --agentCount) @@ -993,6 +943,7 @@ PoolManagerCheckConnectionInfo(void) int res; Assert(poolHandle); + PgxcNodeListAndCount(); pool_putmessage(&poolHandle->port, 'q', NULL, 0); pool_flush(&poolHandle->port); @@ -1012,6 +963,7 @@ void PoolManagerReloadConnectionInfo(void) { Assert(poolHandle); + PgxcNodeListAndCount(); pool_putmessage(&poolHandle->port, 'p', NULL, 0); pool_flush(&poolHandle->port); } @@ -1031,17 +983,18 @@ agent_handle_input(PoolAgent * agent, StringInfo s) */ for (;;) { - const char *database = NULL; - const char *user_name = NULL; - const char *set_command = NULL; + const char *database = NULL; + const char *user_name = NULL; + const char *set_command = NULL; PoolCommandType command_type; - int datanodecount; - int coordcount; - List *datanodelist = NIL; - List *coordlist = NIL; - int *fds; - int *pids; - int i, len, res; + int datanodecount; + int coordcount; + List *nodelist = NIL; + List *datanodelist = NIL; + List *coordlist = NIL; + int *fds; + int *pids; + int i, len, res; /* * During a pool cleaning, Abort, Connect and Get Connections messages @@ -1078,11 +1031,11 @@ agent_handle_input(PoolAgent * agent, StringInfo s) * Length of message is caused by: * - Message header = 4bytes * - Number of Datanodes sent = 4bytes - * - List of datanodes = NumDataNodes * 4bytes (max) + * - List of Datanodes = NumPoolDataNodes * 4bytes (max) * - Number of Coordinators sent = 4bytes - * - List of coordinators = NumCoords * 4bytes (max) + * - List of Coordinators = NumPoolCoords * 4bytes (max) */ - pool_getmessage(&agent->port, s, 4 * NumDataNodes + 4 * NumCoords + 12); + pool_getmessage(&agent->port, s, 4 * agent->num_dn_connections + 4 * agent->num_coord_connections + 12); datanodecount = pq_getmsgint(s, 4); for (i = 0; i < datanodecount; i++) datanodelist = lappend_int(datanodelist, pq_getmsgint(s, 4)); @@ -1123,11 +1076,21 @@ agent_handle_input(PoolAgent * agent, StringInfo s) datanodecount = pq_getmsgint(s, 4); /* It is possible to clean up only Coordinators connections */ for (i = 0; i < datanodecount; i++) - datanodelist = lappend_int(datanodelist, pq_getmsgint(s, 4)); + { + /* Translate index to Oid */ + int index = pq_getmsgint(s, 4); + Oid node = agent->dn_conn_oids[index]; + nodelist = lappend_oid(nodelist, node); + } coordcount = pq_getmsgint(s, 4); /* It is possible to clean up only Datanode connections */ for (i = 0; i < coordcount; i++) - coordlist = lappend_int(coordlist, pq_getmsgint(s, 4)); + { + /* Translate index to Oid */ + int index = pq_getmsgint(s, 4); + Oid node = agent->coord_conn_oids[index]; + nodelist = lappend_oid(nodelist, node); + } len = pq_getmsgint(s, 4); if (len > 0) database = pq_getmsgbytes(s, len); @@ -1138,10 +1101,9 @@ agent_handle_input(PoolAgent * agent, StringInfo s) pq_getmsgend(s); /* Clean up connections here */ - res = clean_connection(datanodelist, coordlist, database, user_name); + res = clean_connection(nodelist, database, user_name); - list_free(datanodelist); - list_free(coordlist); + list_free(nodelist); /* Send success result */ pool_sendres(&agent->port, res); @@ -1150,14 +1112,14 @@ agent_handle_input(PoolAgent * agent, StringInfo s) /* * Length of message is caused by: * - Message header = 4bytes - * - List of datanodes = NumDataNodes * 4bytes (max) - * - List of coordinators = NumCoords * 4bytes (max) + * - List of Datanodes = NumPoolDataNodes * 4bytes (max) + * - List of Coordinators = NumPoolCoords * 4bytes (max) * - Number of Datanodes sent = 4bytes * - Number of Coordinators sent = 4bytes * It is better to send in a same message the list of Co and Dn at the same * time, this permits to reduce interactions between postmaster and pooler */ - pool_getmessage(&agent->port, s, 4 * NumDataNodes + 4 * NumCoords + 12); + pool_getmessage(&agent->port, s, 4 * agent->num_dn_connections + 4 * agent->num_coord_connections + 12); datanodecount = pq_getmsgint(s, 4); for (i = 0; i < datanodecount; i++) datanodelist = lappend_int(datanodelist, pq_getmsgint(s, 4)); @@ -1184,12 +1146,12 @@ agent_handle_input(PoolAgent * agent, StringInfo s) /* * Length of message is caused by: * - Message header = 4bytes - * - List of datanodes = NumDataNodes * 4bytes (max) - * - List of coordinators = NumCoords * 4bytes (max) + * - List of Datanodes = NumPoolDataNodes * 4bytes (max) + * - List of Coordinators = NumPoolCoords * 4bytes (max) * - Number of Datanodes sent = 4bytes * - Number of Coordinators sent = 4bytes */ - pool_getmessage(&agent->port, s, 4 * NumDataNodes + 4 * NumCoords + 12); + pool_getmessage(&agent->port, s, 4 * agent->num_dn_connections + 4 * agent->num_coord_connections + 12); datanodecount = pq_getmsgint(s, 4); for (i = 0; i < datanodecount; i++) datanodelist = lappend_int(datanodelist, pq_getmsgint(s, 4)); @@ -1232,21 +1194,15 @@ agent_handle_input(PoolAgent * agent, StringInfo s) pool_getmessage(&agent->port, s, 4); pq_getmsgend(s); - /* Free all the node info before reloading */ - node_info_free(); - - /* Reload node information */ - node_info_load(); - /* First update all the pools */ - reload_database_pools(); + reload_database_pools(agent); break; case 'q': /* Check connection info consistency */ pool_getmessage(&agent->port, s, 4); pq_getmsgend(s); /* Check cached info consistency */ - res = node_info_check(); + res = node_info_check(agent); /* Send result */ pool_sendres(&agent->port, res); @@ -1374,22 +1330,16 @@ agent_set_command(PoolAgent *agent, const char *set_command, PoolCommandType com * transaction is not in a transaction block. This has also no effect on local Coordinator * session. */ - if (agent->dn_connections) + for (i = 0; i < agent->num_dn_connections; i++) { - for (i = 0; i < agent->num_dn_connections; i++) - { - if (agent->dn_connections[i]) - res = PGXCNodeSendSetQuery(agent->dn_connections[i]->conn, set_command); - } + if (agent->dn_connections[i]) + res |= PGXCNodeSendSetQuery(agent->dn_connections[i]->conn, set_command); } - if (agent->coord_connections) + for (i = 0; i < agent->num_coord_connections; i++) { - for (i = 0; i < agent->num_coord_connections; i++) - { - if (agent->coord_connections[i]) - res |= PGXCNodeSendSetQuery(agent->coord_connections[i]->conn, set_command); - } + if (agent->coord_connections[i]) + res |= PGXCNodeSendSetQuery(agent->coord_connections[i]->conn, set_command); } /* Save the latest string */ @@ -1410,12 +1360,13 @@ agent_acquire_connections(PoolAgent *agent, List *datanodelist, List *coordlist) int i; int *result; ListCell *nodelist_item; + MemoryContext oldcontext; Assert(agent); /* Check if pooler can accept those requests */ - if (list_length(datanodelist) > NumDataNodes || - list_length(coordlist) > NumCoords) + if (list_length(datanodelist) > agent->num_dn_connections || + list_length(coordlist) > agent->num_coord_connections) return NULL; /* @@ -1435,44 +1386,10 @@ agent_acquire_connections(PoolAgent *agent, List *datanodelist, List *coordlist) } /* - * Initialize connection if it is not initialized yet - * First for the Datanodes + * There are possible memory allocations in the core pooler, we want + * these allocations in the contect of the database pool */ - if (!agent->dn_connections) - { - agent->dn_connections = (PGXCNodePoolSlot **) - palloc(agent->num_dn_connections * sizeof(PGXCNodePoolSlot *)); - if (!agent->dn_connections) - { - pfree(result); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - return NULL; - } - - for (i = 0; i < agent->num_dn_connections; i++) - agent->dn_connections[i] = NULL; - } - - /* Then for the Coordinators */ - if (!agent->coord_connections) - { - agent->coord_connections = (PGXCNodePoolSlot **) - palloc(agent->num_coord_connections * sizeof(PGXCNodePoolSlot *)); - if (!agent->coord_connections) - { - pfree(result); - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - return NULL; - } - - for (i = 0; i < agent->num_coord_connections; i++) - agent->coord_connections[i] = NULL; - } - + oldcontext = MemoryContextSwitchTo(agent->pool->mcxt); /* Initialize result */ i = 0; @@ -1484,12 +1401,14 @@ agent_acquire_connections(PoolAgent *agent, List *datanodelist, List *coordlist) /* Acquire from the pool if none */ if (agent->dn_connections[node] == NULL) { - PGXCNodePoolSlot *slot = acquire_connection(agent->pool, node, REMOTE_CONN_DATANODE); + PGXCNodePoolSlot *slot = acquire_connection(agent->pool, + agent->dn_conn_oids[node]); /* Handle failure */ if (slot == NULL) { pfree(result); + MemoryContextSwitchTo(oldcontext); return NULL; } @@ -1516,12 +1435,13 @@ agent_acquire_connections(PoolAgent *agent, List *datanodelist, List *coordlist) /* Acquire from the pool if none */ if (agent->coord_connections[node] == NULL) { - PGXCNodePoolSlot *slot = acquire_connection(agent->pool, node, REMOTE_CONN_COORD); + PGXCNodePoolSlot *slot = acquire_connection(agent->pool, agent->coord_conn_oids[node]); /* Handle failure */ if (slot == NULL) { pfree(result); + MemoryContextSwitchTo(oldcontext); return NULL; } @@ -1540,6 +1460,8 @@ agent_acquire_connections(PoolAgent *agent, List *datanodelist, List *coordlist) result[i++] = PQsocket((PGconn *) agent->coord_connections[node]->conn); } + MemoryContextSwitchTo(oldcontext); + return result; } @@ -1568,7 +1490,7 @@ send_local_commands(PoolAgent *agent, List *datanodelist, List *coordlist) { int node = lfirst_int(nodelist_item); - if(node < 0 || node >= NumDataNodes) + if(node < 0 || node >= agent->num_dn_connections) continue; slot = agent->dn_connections[node]; @@ -1594,7 +1516,7 @@ send_local_commands(PoolAgent *agent, List *datanodelist, List *coordlist) { int node = lfirst_int(nodelist_item); - if(node < 0 || node >= NumCoords) + if(node < 0 || node >= agent->num_coord_connections) continue; slot = agent->coord_connections[node]; @@ -1618,7 +1540,7 @@ send_local_commands(PoolAgent *agent, List *datanodelist, List *coordlist) /* * Cancel query */ -static int +static int cancel_query_on_connections(PoolAgent *agent, List *datanodelist, List *coordlist) { ListCell *nodelist_item; @@ -1636,7 +1558,7 @@ cancel_query_on_connections(PoolAgent *agent, List *datanodelist, List *coordlis { int node = lfirst_int(nodelist_item); - if(node < 0 || node >= NumDataNodes) + if(node < 0 || node >= agent->num_dn_connections) continue; if (agent->dn_connections == NULL) @@ -1654,7 +1576,7 @@ cancel_query_on_connections(PoolAgent *agent, List *datanodelist, List *coordlis { int node = lfirst_int(nodelist_item); - if(node < 0 || node >= NumCoords) + if(node < 0 || node >= agent->num_coord_connections) continue; if (agent->coord_connections == NULL) @@ -1740,6 +1662,7 @@ PoolManagerCancelQuery(int dn_count, int* dn_list, int co_count, int* co_list) static void agent_release_connections(PoolAgent *agent, bool force_destroy) { + MemoryContext oldcontext; int i; if (!agent->dn_connections && !agent->coord_connections) @@ -1757,11 +1680,16 @@ agent_release_connections(PoolAgent *agent, bool force_destroy) pfree(agent->local_params); agent->local_params = NULL; } - if (agent->session_params || - (agent->is_temp && !force_destroy)) + if ((agent->session_params || agent->is_temp) && !force_destroy) return; /* + * There are possible memory allocations in the core pooler, we want + * these allocations in the contect of the database pool + */ + oldcontext = MemoryContextSwitchTo(agent->pool->mcxt); + + /* * Remaining connections are assumed to be clean. * First clean up for Datanodes */ @@ -1774,7 +1702,7 @@ agent_release_connections(PoolAgent *agent, bool force_destroy) * If connection has temporary objects on it, destroy connection slot. */ if (slot) - release_connection(agent->pool, slot, i, force_destroy, REMOTE_CONN_DATANODE); + release_connection(agent->pool, slot, agent->dn_conn_oids[i], force_destroy); agent->dn_connections[i] = NULL; } /* Then clean up for Coordinator connections */ @@ -1787,9 +1715,11 @@ agent_release_connections(PoolAgent *agent, bool force_destroy) * If connection has temporary objects on it, destroy connection slot. */ if (slot) - release_connection(agent->pool, slot, i, force_destroy, REMOTE_CONN_COORD); + release_connection(agent->pool, slot, agent->coord_conn_oids[i], force_destroy); agent->coord_connections[i] = NULL; } + + MemoryContextSwitchTo(oldcontext); } /* @@ -1800,41 +1730,35 @@ agent_release_connections(PoolAgent *agent, bool force_destroy) static void agent_reset_session(PoolAgent *agent) { - if (!agent->dn_connections && !agent->coord_connections) - return; + int i; if (!agent->session_params && !agent->local_params) return; /* Reset connection params */ - if (agent->session_params || agent->local_params) + /* Check agent slot for each Datanode */ + if (agent->dn_connections) { - int i; - - /* Check agent slot for each Datanode */ - if (agent->dn_connections) + for (i = 0; i < agent->num_dn_connections; i++) { - for (i = 0; i < agent->num_dn_connections; i++) - { - PGXCNodePoolSlot *slot = agent->dn_connections[i]; + PGXCNodePoolSlot *slot = agent->dn_connections[i]; - /* Reset given slot with parameters */ - if (slot) - PGXCNodeSendSetQuery(slot->conn, "SET SESSION AUTHORIZATION DEFAULT;RESET ALL;"); - } + /* Reset given slot with parameters */ + if (slot) + PGXCNodeSendSetQuery(slot->conn, "SET SESSION AUTHORIZATION DEFAULT;RESET ALL;"); } + } - if (agent->coord_connections) + if (agent->coord_connections) + { + /* Check agent slot for each Coordinator */ + for (i = 0; i < agent->num_coord_connections; i++) { - /* Check agent slot for each Coordinator */ - for (i = 0; i < agent->num_coord_connections; i++) - { - PGXCNodePoolSlot *slot = agent->coord_connections[i]; + PGXCNodePoolSlot *slot = agent->coord_connections[i]; - /* Reset given slot with parameters */ - if (slot) - PGXCNodeSendSetQuery(slot->conn, "SET SESSION AUTHORIZATION DEFAULT;RESET ALL;"); - } + /* Reset given slot with parameters */ + if (slot) + PGXCNodeSendSetQuery(slot->conn, "SET SESSION AUTHORIZATION DEFAULT;RESET ALL;"); } } @@ -1863,8 +1787,11 @@ agent_reset_session(PoolAgent *agent) static DatabasePool * create_database_pool(const char *database, const char *user_name) { - DatabasePool *databasePool; - int i; + MemoryContext oldcontext; + MemoryContext dbcontext; + DatabasePool *databasePool; + HASHCTL hinfo; + int hflags; /* check if exist */ databasePool = find_database_pool(database, user_name); @@ -1874,6 +1801,12 @@ create_database_pool(const char *database, const char *user_name) return databasePool; } + dbcontext = AllocSetContextCreate(PoolerCoreContext, + "DB Context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldcontext = MemoryContextSwitchTo(dbcontext); /* Allocate memory */ databasePool = (DatabasePool *) palloc(sizeof(DatabasePool)); if (!databasePool) @@ -1885,6 +1818,7 @@ create_database_pool(const char *database, const char *user_name) return NULL; } + databasePool->mcxt = dbcontext; /* Copy the database name */ databasePool->database = pstrdup(database); /* Copy the user name */ @@ -1902,56 +1836,21 @@ create_database_pool(const char *database, const char *user_name) /* Init next reference */ databasePool->next = NULL; - /* Init Datanode pools */ - if (NumDataNodes != 0) - { - databasePool->dataNodePools = - (PGXCNodePool **) palloc(NumDataNodes * sizeof(PGXCNodePool **)); - if (!databasePool->dataNodePools) - { - /* out of memory */ - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - pfree(databasePool->database); - pfree(databasePool->user_name); - pfree(databasePool); - return NULL; - } - - for (i = 0; i < NumDataNodes; i++) - databasePool->dataNodePools[i] = NULL; - } - else - databasePool->dataNodePools = NULL; + /* Init node hashtable */ + MemSet(&hinfo, 0, sizeof(hinfo)); + hflags = 0; + hinfo.keysize = sizeof(Oid); + hinfo.entrysize = sizeof(PGXCNodePool); + hflags |= HASH_ELEM; - /* Init Coordinator pools */ - if (NumCoords != 0) - { - databasePool->coordNodePools = (PGXCNodePool **) palloc(NumCoords * sizeof(PGXCNodePool **)); - if (!databasePool->coordNodePools) - { - /* out of memory */ - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - pfree(databasePool->database); - pfree(databasePool->user_name); - pfree(databasePool); - return NULL; - } + hinfo.hcxt = dbcontext; + hflags |= HASH_CONTEXT; + databasePool->nodePools = hash_create("Node Pool", MaxDataNodes + MaxCoords, + &hinfo, hflags); - for (i = 0; i < NumCoords; i++) - databasePool->coordNodePools[i] = NULL; - } - else - databasePool->coordNodePools = NULL; - - /* Save number of node pool */ - databasePool->num_dn_pools = NumDataNodes; - databasePool->num_co_pools = NumCoords; + MemoryContextSwitchTo(oldcontext); /* Insert into the list */ insert_database_pool(databasePool); @@ -1967,30 +1866,21 @@ static int destroy_database_pool(const char *database, const char *user_name) { DatabasePool *databasePool; - int i; /* Delete from the list */ databasePool = remove_database_pool(database, user_name); if (databasePool) { - if (databasePool->dataNodePools) - { - for (i = 0; i < NumDataNodes; i++) - if (databasePool->dataNodePools[i]) - destroy_node_pool(databasePool->dataNodePools[i]); - pfree(databasePool->dataNodePools); - } - if (databasePool->coordNodePools) + HASH_SEQ_STATUS hseq_status; + PGXCNodePool *nodePool; + + hash_seq_init(&hseq_status, databasePool->nodePools); + while ((nodePool = (PGXCNodePool *) hash_seq_search(&hseq_status))) { - for (i = 0; i < NumCoords; i++) - if (databasePool->coordNodePools[i]) - destroy_node_pool(databasePool->coordNodePools[i]); - pfree(databasePool->coordNodePools); + destroy_node_pool(nodePool); } /* free allocated memory */ - pfree(databasePool->database); - pfree(databasePool->user_name); - pfree(databasePool); + MemoryContextDelete(databasePool->mcxt); return 1; } return 0; @@ -2019,137 +1909,58 @@ insert_database_pool(DatabasePool *databasePool) * Rebuild information of database pools */ static void -reload_database_pools(void) +reload_database_pools(PoolAgent *agent) { DatabasePool *databasePool; - /* Scan the list and reload each pool */ + /* + * Release node connections if any held. It is not guaranteed client session + * does the same so don't ever try to return them to pool and reuse + */ + agent_release_connections(agent, true); + + /* Forget previously allocated node info */ + MemoryContextReset(agent->mcxt); + + /* and allocate new */ + PgxcNodeGetOids(&agent->coord_conn_oids, &agent->dn_conn_oids, + &agent->num_coord_connections, &agent->num_dn_connections, false); + + agent->coord_connections = (PGXCNodePoolSlot **) + palloc0(agent->num_coord_connections * sizeof(PGXCNodePoolSlot *)); + agent->dn_connections = (PGXCNodePoolSlot **) + palloc0(agent->num_dn_connections * sizeof(PGXCNodePoolSlot *)); + + /* + * Scan the list and destroy any altered pool. They will be recreated + * upon subsequent connection acquisition. + */ databasePool = databasePools; while (databasePool) { /* Update each database pool slot with new connection information */ - int i; - bool co_slot_used[databasePool->num_co_pools]; - bool dn_slot_used[databasePool->num_dn_pools]; - int new_co_num = NumCoords; - int new_dn_num = NumDataNodes; - PGXCNodePool **new_co_pools = NULL; - PGXCNodePool **new_dn_pools = NULL; - - new_co_pools = (PGXCNodePool **) palloc(new_co_num * sizeof(PGXCNodePool **)); - new_dn_pools = (PGXCNodePool **) palloc(new_dn_num * sizeof(PGXCNodePool **)); - - /* Check on the database pools that have been reused */ - for (i = 0; i < databasePool->num_co_pools; i++) - co_slot_used[i] = false; - for (i = 0; i < databasePool->num_dn_pools; i++) - dn_slot_used[i] = false; - - for (i = 0; i < new_co_num; i++) - { - int j; - Oid nodeoid_check = coord_connInfos[i].nodeoid; - bool is_found = false; - int index_id = 0; - char *connstr_chk = PGXCNodeConnStr(coord_connInfos[i].host, - coord_connInfos[i].port, - databasePool->database, - databasePool->user_name, - "coordinator"); - - /* Scan for pool existence */ - for (j = 0; j < databasePool->num_co_pools; j++) - { - PGXCNodePool *pool = databasePool->coordNodePools[j]; - if (!pool) - continue; + HASH_SEQ_STATUS hseq_status; + PGXCNodePool *nodePool; - /* - * Node Oid and connection string has to be the same - * to ensure consistency. - */ - if (pool->nodeoid == nodeoid_check && - strcmp(pool->connstr, connstr_chk) == 0) - { - co_slot_used[j] = true; - is_found = true; - index_id = j; - break; - } - } - - if (co_slot_used[index_id] && - databasePool->coordNodePools && - is_found) - { - new_co_pools[i] = databasePool->coordNodePools[index_id]; - } - else - new_co_pools[i] = NULL; - pfree(connstr_chk); - } - for (i = 0; i < new_dn_num; i++) + hash_seq_init(&hseq_status, databasePool->nodePools); + while ((nodePool = (PGXCNodePool *) hash_seq_search(&hseq_status))) { - int j; - Oid nodeoid_check = datanode_connInfos[i].nodeoid; - int index_id = 0; - bool is_found = false; - char *connstr_chk = PGXCNodeConnStr(datanode_connInfos[i].host, - datanode_connInfos[i].port, - databasePool->database, - databasePool->user_name, - "coordinator"); - - /* Scan for pool existence */ - for (j = 0; j < databasePool->num_dn_pools; j++) - { - PGXCNodePool *pool = databasePool->dataNodePools[j]; - - if (!pool) - continue; - - if (pool->nodeoid == nodeoid_check && - strcmp(pool->connstr, connstr_chk) == 0) - { - dn_slot_used[j] = true; - is_found = true; - index_id = j; - break; - } - } + char *connstr_chk = build_node_conn_str(nodePool->nodeoid, + databasePool->database, + databasePool->user_name); - if (dn_slot_used[index_id] && - databasePool->dataNodePools && - is_found) + if (connstr_chk == NULL || strcmp(connstr_chk, nodePool->connstr)) { - new_dn_pools[i] = databasePool->dataNodePools[index_id]; + /* Node has been removed or altered */ + destroy_node_pool(nodePool); + hash_search(databasePool->nodePools, &nodePool->nodeoid, + HASH_REMOVE, NULL); } - else - new_dn_pools[i] = NULL; - pfree(connstr_chk); - } - /* Clean up node pools that are not necessary anymore */ - for (i = 0; i < databasePool->num_co_pools; i++) - { - if (!co_slot_used[i]) - destroy_node_pool(databasePool->coordNodePools[i]); - } - for (i = 0; i < databasePool->num_dn_pools; i++) - { - if (!dn_slot_used[i]) - destroy_node_pool(databasePool->dataNodePools[i]); + if (connstr_chk) + pfree(connstr_chk); } - if (databasePool->coordNodePools) - pfree(databasePool->coordNodePools); - if (databasePool->dataNodePools) - pfree(databasePool->dataNodePools); - - /* Update new database pool */ - databasePool->num_co_pools = new_co_num; - databasePool->num_dn_pools = new_dn_num; - databasePool->coordNodePools = new_co_pools; - databasePool->dataNodePools = new_dn_pools; + databasePool = databasePool->next; } } @@ -2176,63 +1987,6 @@ find_database_pool(const char *database, const char *user_name) return databasePool; } -/* - * Find pool to be cleaned for specified database in the list - */ -static DatabasePool * -find_database_pool_to_clean(const char *database, - const char *user_name, - List *dn_list, - List *co_list) -{ - DatabasePool *databasePool; - - /* Scan the list */ - databasePool = databasePools; - while (databasePool) - { - ListCell *nodelist_item; - - /* If database name does not correspond, move to next one */ - if (database && strcmp(database, databasePool->database) != 0) - { - databasePool = databasePool->next; - continue; - } - - /* If user name does not correspond, move to next one */ - if (user_name && strcmp(user_name, databasePool->user_name) != 0) - { - databasePool = databasePool->next; - continue; - } - - /* Check if this database pool is clean for given coordinator list */ - foreach (nodelist_item, co_list) - { - int nodenum = lfirst_int(nodelist_item); - - if (databasePool->coordNodePools && - databasePool->coordNodePools[nodenum] && - databasePool->coordNodePools[nodenum]->freeSize != 0) - return databasePool; - } - - /* Check if this database pool is clean for given datanode list */ - foreach (nodelist_item, dn_list) - { - int nodenum = lfirst_int(nodelist_item); - - if (databasePool->dataNodePools && - databasePool->dataNodePools[nodenum] && - databasePool->dataNodePools[nodenum]->freeSize != 0) - return databasePool; - } - - databasePool = databasePool->next; - } - return databasePool; -} /* * Remove pool for specified database from the list @@ -2277,36 +2031,15 @@ remove_database_pool(const char *database, const char *user_name) * Acquire connection */ static PGXCNodePoolSlot * -acquire_connection(DatabasePool *dbPool, int node, char client_conn_type) +acquire_connection(DatabasePool *dbPool, Oid node) { - PGXCNodePool *nodePool; - PGXCNodePoolSlot *slot; + PGXCNodePool *nodePool; + PGXCNodePoolSlot *slot; Assert(dbPool); - /* Manage the case where pool is not ready */ - if (node >= NumCoords && client_conn_type == REMOTE_CONN_COORD) - { - elog(WARNING, "can not connect to coordinator %d", node); - return NULL; - } - if (node >= NumDataNodes && client_conn_type == REMOTE_CONN_DATANODE) - { - elog(WARNING, "can not connect to datanode %d", node); - return NULL; - } - - if (client_conn_type == REMOTE_CONN_DATANODE) - Assert(0 <= node && node < NumDataNodes); - else if (client_conn_type == REMOTE_CONN_COORD) - Assert(0 <= node && node < NumCoords); - - slot = NULL; - /* Find referenced node pool depending on type of client connection */ - if (client_conn_type == REMOTE_CONN_DATANODE) - nodePool = dbPool->dataNodePools[node]; - else if (client_conn_type == REMOTE_CONN_COORD) - nodePool = dbPool->coordNodePools[node]; + nodePool = (PGXCNodePool *) hash_search(dbPool->nodePools, &node, HASH_FIND, + NULL); /* * When a Coordinator pool is initialized by a Coordinator Postmaster, @@ -2315,16 +2048,9 @@ acquire_connection(DatabasePool *dbPool, int node, char client_conn_type) * when creating or dropping Databases. */ if (nodePool == NULL || nodePool->freeSize == 0) - { - grow_pool(dbPool, node, client_conn_type); - - /* Get back the correct slot that has been grown up*/ - if (client_conn_type == REMOTE_CONN_DATANODE) - nodePool = dbPool->dataNodePools[node]; - else if (client_conn_type == REMOTE_CONN_COORD) - nodePool = dbPool->coordNodePools[node]; - } + nodePool = grow_pool(dbPool, node); + slot = NULL; /* Check available connections */ while (nodePool && nodePool->freeSize > 0) { @@ -2333,8 +2059,11 @@ acquire_connection(DatabasePool *dbPool, int node, char client_conn_type) slot = nodePool->slot[--(nodePool->freeSize)]; retry: - /* Make sure connection is ok */ - poll_result = pqReadReady((PGconn *)slot->conn); + /* + * Make sure connection is ok, destroy connection slot if there is a + * problem. + */ + poll_result = pqReadReady((PGconn *) slot->conn); if (poll_result == 0) break; /* ok, no data */ @@ -2354,16 +2083,12 @@ acquire_connection(DatabasePool *dbPool, int node, char client_conn_type) /* Decrement current max pool size */ (nodePool->size)--; /* Ensure we are not below minimum size */ - grow_pool(dbPool, node, client_conn_type); + nodePool = grow_pool(dbPool, node); } if (slot == NULL) - { - if (client_conn_type == REMOTE_CONN_DATANODE) - elog(WARNING, "can not connect to datanode %d", node); - else if (client_conn_type == REMOTE_CONN_COORD) - elog(WARNING, "can not connect to coordinator %d", node); - } + elog(WARNING, "can not connect to node %u", node); + return slot; } @@ -2372,31 +2097,23 @@ acquire_connection(DatabasePool *dbPool, int node, char client_conn_type) * release connection from specified pool and slot */ static void -release_connection(DatabasePool * dbPool, PGXCNodePoolSlot * slot, - int index, bool force_destroy, char client_conn_type) +release_connection(DatabasePool *dbPool, PGXCNodePoolSlot *slot, + Oid node, bool force_destroy) { PGXCNodePool *nodePool; Assert(dbPool); Assert(slot); - if (client_conn_type == REMOTE_CONN_DATANODE) - Assert(0 <= index && index < NumDataNodes); - else if (client_conn_type == REMOTE_CONN_COORD) - Assert(0 <= index && index < NumCoords); - - /* Find referenced node pool depending on client connection type */ - if (client_conn_type == REMOTE_CONN_DATANODE) - nodePool = dbPool->dataNodePools[index]; - else if (client_conn_type == REMOTE_CONN_COORD) - nodePool = dbPool->coordNodePools[index]; - + nodePool = (PGXCNodePool *) hash_search(dbPool->nodePools, &node, HASH_FIND, + NULL); if (nodePool == NULL) { - /* report problem */ - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("database does not use node %d", (index)))); + /* + * The node may be altered or dropped. + * In any case the slot is no longer valid. + */ + destroy_slot(slot); return; } @@ -2413,99 +2130,45 @@ release_connection(DatabasePool * dbPool, PGXCNodePoolSlot * slot, /* Decrement pool size */ (nodePool->size)--; /* Ensure we are not below minimum size */ - grow_pool(dbPool, index, client_conn_type); + grow_pool(dbPool, node); } } /* - * Increase database pool size depending on connection type: - * REMOTE_CONN_COORD or REMOTE_CONN_DATANODE + * Increase database pool size, create new if does not exist */ -static void -grow_pool(DatabasePool * dbPool, int index, char client_conn_type) +static PGXCNodePool * +grow_pool(DatabasePool *dbPool, Oid node) { - PGXCNodePool *nodePool; + PGXCNodePool *nodePool; + bool found; Assert(dbPool); - if (client_conn_type == REMOTE_CONN_DATANODE) - Assert(0 <= index && index < NumDataNodes); - else if (client_conn_type == REMOTE_CONN_COORD) - Assert(0 <= index && index < NumCoords); - - /* Find referenced node pool */ - if (client_conn_type == REMOTE_CONN_DATANODE) - nodePool = dbPool->dataNodePools[index]; - else if (client_conn_type == REMOTE_CONN_COORD) - nodePool = dbPool->coordNodePools[index]; - - if (!nodePool) - { - char *remote_type; - - /* Allocate new DBNode Pool */ - nodePool = (PGXCNodePool *) palloc(sizeof(PGXCNodePool)); - if (!nodePool) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - /* - * Don't forget to define the type of remote connection - * Now PGXC just support Co->Co and Co->Dn connections - * but Dn->Dn Connections could be used for other purposes. - */ - if (IS_PGXC_COORDINATOR) - remote_type = pstrdup("coordinator"); - else if (IS_PGXC_DATANODE) - remote_type = pstrdup("datanode"); - - if (client_conn_type == REMOTE_CONN_DATANODE) - /* initialize it */ - nodePool->connstr = PGXCNodeConnStr(datanode_connInfos[index].host, - datanode_connInfos[index].port, - dbPool->database, - dbPool->user_name, - remote_type); - else if (client_conn_type == REMOTE_CONN_COORD) - nodePool->connstr = PGXCNodeConnStr(coord_connInfos[index].host, - coord_connInfos[index].port, - dbPool->database, - dbPool->user_name, - remote_type); + nodePool = (PGXCNodePool *) hash_search(dbPool->nodePools, &node, + HASH_ENTER, &found); + if (!found) + { + nodePool->connstr = build_node_conn_str(node, dbPool->database, + dbPool->user_name); if (!nodePool->connstr) { - pfree(nodePool); ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("could not build connection string for node %u", node))); } - nodePool->slot = (PGXCNodePoolSlot **) palloc(MaxPoolSize * sizeof(PGXCNodePoolSlot *)); + nodePool->slot = (PGXCNodePoolSlot **) palloc0(MaxPoolSize * sizeof(PGXCNodePoolSlot *)); if (!nodePool->slot) { - pfree(nodePool); - pfree(nodePool->connstr); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); } - memset(nodePool->slot, 0, MaxPoolSize * sizeof(PGXCNodePoolSlot *)); nodePool->freeSize = 0; nodePool->size = 0; - - /* and insert into the array */ - if (client_conn_type == REMOTE_CONN_DATANODE) - { - nodePool->nodeoid = datanode_connInfos[index].nodeoid; - dbPool->dataNodePools[index] = nodePool; - } - else if (client_conn_type == REMOTE_CONN_COORD) - { - nodePool->nodeoid = coord_connInfos[index].nodeoid; - dbPool->coordNodePools[index] = nodePool; - } } while (nodePool->size < MinPoolSize || (nodePool->freeSize == 0 && nodePool->size < MaxPoolSize)) @@ -2546,6 +2209,7 @@ grow_pool(DatabasePool * dbPool, int index, char client_conn_type) nodePool->size, nodePool->connstr); } + return nodePool; } @@ -2682,105 +2346,60 @@ PoolerLoop(void) /* * Clean Connection in all Database Pools for given Datanode and Coordinator list */ -#define TIMEOUT_CLEAN_LOOP 10 - int -clean_connection(List *dn_discard, List *co_discard, const char *database, const char *user_name) +clean_connection(List *node_discard, const char *database, const char *user_name) { DatabasePool *databasePool; - int dn_len = list_length(dn_discard); - int co_len = list_length(co_discard); - int dn_list[list_length(dn_discard)]; - int co_list[list_length(co_discard)]; - int count, i; int res = CLEAN_CONNECTION_COMPLETED; - ListCell *nodelist_item; - PGXCNodePool *nodePool; - /* Save in array the lists of node number */ - count = 0; - foreach(nodelist_item,dn_discard) - dn_list[count++] = lfirst_int(nodelist_item); - - count = 0; - foreach(nodelist_item, co_discard) - co_list[count++] = lfirst_int(nodelist_item); - - /* Find correct Database pool to clean */ - databasePool = find_database_pool_to_clean(database, user_name, dn_discard, co_discard); + databasePool = databasePools; while (databasePool) { - databasePool = find_database_pool_to_clean(database, user_name, dn_discard, co_discard); + ListCell *lc; - /* Database pool has not been found, cleaning is over */ - if (!databasePool) - break; + if ((database && strcmp(database, databasePool->database)) || + (user_name && strcmp(user_name, databasePool->user_name))) + { + /* The pool does not match to request, skip */ + databasePool = databasePool->next; + continue; + } /* - * Clean each Pool Correctly - * First for Datanode Pool + * Clean each requested node pool */ - for (count = 0; count < dn_len; count++) + foreach(lc, node_discard) { - int node_num = dn_list[count]; - nodePool = databasePool->dataNodePools[node_num]; - - if (nodePool) - { - /* Check if connections are in use */ - if (nodePool->freeSize != nodePool->size) - { - elog(WARNING, "Pool of Database %s is using Datanode %d connections", - databasePool->database, node_num); - res = CLEAN_CONNECTION_NOT_COMPLETED; - } - - /* Destroy connections currently in Node Pool */ - if (nodePool->slot) - { - for (i = 0; i < nodePool->freeSize; i++) - destroy_slot(nodePool->slot[i]); - - /* Move slots in use at the beginning of Node Pool array */ - for (i = nodePool->freeSize; i < nodePool->size; i++ ) - nodePool->slot[i - nodePool->freeSize] = nodePool->slot[i]; - } - nodePool->size -= nodePool->freeSize; - nodePool->freeSize = 0; - } - } + PGXCNodePool *nodePool; + Oid node = lfirst_oid(lc); - /* Then for Coordinators */ - for (count = 0; count < co_len; count++) - { - int node_num = co_list[count]; - nodePool = databasePool->coordNodePools[node_num]; + nodePool = hash_search(databasePool->nodePools, &node, HASH_FIND, + NULL); if (nodePool) { /* Check if connections are in use */ - if (nodePool->freeSize != nodePool->size) + if (nodePool->freeSize < nodePool->size) { - elog(WARNING, "Pool of Database %s is using Coordinator %d connections", - databasePool->database, node_num); + elog(WARNING, "Pool of Database %s is using Datanode %u connections", + databasePool->database, node); res = CLEAN_CONNECTION_NOT_COMPLETED; } /* Destroy connections currently in Node Pool */ if (nodePool->slot) { + int i; for (i = 0; i < nodePool->freeSize; i++) destroy_slot(nodePool->slot[i]); - - /* Move slots in use at the beginning of Node Pool array */ - for (i = nodePool->freeSize; i < nodePool->size; i++ ) - nodePool->slot[i - nodePool->freeSize] = nodePool->slot[i]; } nodePool->size -= nodePool->freeSize; nodePool->freeSize = 0; } } + + databasePool = databasePool->next; } /* Release lock on Pooler, to allow transactions to connect again. */ @@ -2858,3 +2477,31 @@ IsPoolHandle(void) return false; return true; } + + +/* + * Given node identifier, dbname and user name build connection string. + * Get node connection details from the shared memory node table + */ +static char * +build_node_conn_str(Oid node, char *dbname, char *user_name) +{ + NodeDefinition *nodeDef; + char *connstr; + + nodeDef = PgxcNodeGetDefinition(node); + if (nodeDef == NULL) + { + /* No such definition, node is dropped? */ + return NULL; + } + + connstr = PGXCNodeConnStr(NameStr(nodeDef->nodehost), + nodeDef->nodeport, + dbname, + user_name, + IS_PGXC_COORDINATOR ? "coordinator" : "datanode"); + pfree(nodeDef); + + return connstr; +} diff --git a/src/backend/pgxc/pool/poolutils.c b/src/backend/pgxc/pool/poolutils.c index fa5800a7d3..0db2d70634 100644 --- a/src/backend/pgxc/pool/poolutils.c +++ b/src/backend/pgxc/pool/poolutils.c @@ -138,12 +138,12 @@ pgxc_pool_reload(PG_FUNCTION_ARGS) /* Now session information is reset in correct memory context */ old_context = MemoryContextSwitchTo(TopMemoryContext); - /* Reconnect to pool manager */ - PoolManagerReconnect(); - - /* And reinitialize session */ + /* Reinitialize session, while old pooler connection is active */ InitMultinodeExecutor(true); + /* And reconnect to pool manager */ + PoolManagerReconnect(); + MemoryContextSwitchTo(old_context); PG_RETURN_BOOL(true); @@ -394,12 +394,12 @@ HandlePoolerReload(void) /* Need to be able to look into catalogs */ CurrentResourceOwner = ResourceOwnerCreate(NULL, "ForPoolerReload"); - /* Reconnect to pool manager */ - PoolManagerReconnect(); - - /* And reinitialize session */ + /* Reinitialize session, while old pooler connection is active */ InitMultinodeExecutor(true); + /* And reconnect to pool manager */ + PoolManagerReconnect(); + /* Send a message back to client regarding session being reloaded */ ereport(WARNING, (errcode(ERRCODE_OPERATOR_INTERVENTION), diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 56c0bd8d49..a8a8b00b61 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -23,6 +23,9 @@ #include "commands/async.h" #include "miscadmin.h" #include "pgstat.h" +#ifdef PGXC +#include "pgxc/nodemgr.h" +#endif #include "postmaster/autovacuum.h" #include "postmaster/bgwriter.h" #include "postmaster/postmaster.h" @@ -126,6 +129,10 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, BTreeShmemSize()); size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); +#ifdef PGXC + size = add_size(size, NodeTablesShmemSize()); +#endif + #ifdef EXEC_BACKEND size = add_size(size, ShmemBackendArraySize()); #endif @@ -236,6 +243,11 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) SyncScanShmemInit(); AsyncShmemInit(); +#ifdef PGXC + NodeTablesShmemInit(); +#endif + + #ifdef EXEC_BACKEND /* diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index f160a545d5..622441af55 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3922,26 +3922,6 @@ PostgresMain(int argc, char *argv[], const char *username) /* If we exit, first try and clean connection to GTM */ on_proc_exit (DataNodeShutdown, 0); } - - if (IS_PGXC_COORDINATOR && !IsConnFromCoord()) - { - /* - * Do not authorize connection to Coordinator if - * connection data to remote nodes is inconsistent. - */ - bool is_pool_chk = PoolManagerCheckConnectionInfo(); - - if (!is_pool_chk && !superuser()) - ereport(ERROR, - (errcode(ERRCODE_OPERATOR_INTERVENTION), - errmsg("Remote node information on pool manager inconsistent " - "with catalogs"))); - - if (!is_pool_chk && superuser()) - elog(WARNING, "Remote connection information on pooler is inconsistent " - "with catalogs. You should run pgxc_pool_reload() to reload " - "new cluster configuration"); - } #endif /* diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 53d4e08d1e..4b0a9d643d 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -746,7 +746,7 @@ pgxc_tablespace_size(Oid tsOid) initStringInfo(&buf); appendStringInfo(&buf, "SELECT pg_catalog.pg_tablespace_size('%s')", tsname); - PgxcNodeListAndCount(&coOids, &dnOids, &numcoords, &numdnodes); + PgxcNodeGetOids(&coOids, &dnOids, &numcoords, &numdnodes, false); return pgxc_execute_on_nodes(numdnodes, dnOids, buf.data); } @@ -771,7 +771,7 @@ pgxc_database_size(Oid dbOid) initStringInfo(&buf); appendStringInfo(&buf, "SELECT pg_catalog.pg_database_size('%s')", dbname); - PgxcNodeListAndCount(&coOids, &dnOids, &numcoords, &numdnodes); + PgxcNodeGetOids(&coOids, &dnOids, &numcoords, &numdnodes, false); return pgxc_execute_on_nodes(numdnodes, dnOids, buf.data); } diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 2306b35a45..e437103ce0 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -474,7 +474,7 @@ pgxc_advisory_lock(int64 key64, int32 key1, int32 key2, bool iskeybig, else SET_LOCKTAG_INT32(locktag, key1, key2); - PgxcNodeListAndCount(&coOids, &dnOids, &numcoords, &numdnodes); + PgxcNodeGetOids(&coOids, &dnOids, &numcoords, &numdnodes, false); /* Skip everything XC specific if there's only one coordinator running */ if (numcoords <= 1) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 43d53a650f..005fceeb99 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -61,6 +61,7 @@ #include "pgxc/planner.h" #include "nodes/nodes.h" #include "pgxc/poolmgr.h" +#include "pgxc/nodemgr.h" #endif #include "postmaster/autovacuum.h" #include "postmaster/bgwriter.h" @@ -632,7 +633,7 @@ const char *const config_group_names[] = gettext_noop("Developer Options"), #ifdef PGXC /* DATA_NODES */ - gettext_noop("Data Nodes and Connection Pooling"), + gettext_noop("Datanodes and Connection Pooling"), /* GTM */ gettext_noop("GTM Connection"), #endif @@ -2489,6 +2490,28 @@ static struct config_int ConfigureNamesInt[] = 6666, 1, 65535, NULL, NULL, NULL }, + + { + {"max_datanodes", PGC_POSTMASTER, DATA_NODES, + gettext_noop("Maximum number of Datanodes in the cluster."), + gettext_noop("It is not possible to create more Datanodes in the cluster than " + "this maximum number.") + }, + &MaxDataNodes, + 16, 2, 65535, + NULL, NULL, NULL + }, + + { + {"max_coordinators", PGC_POSTMASTER, DATA_NODES, + gettext_noop("Maximum number of Coordinators in the cluster."), + gettext_noop("It is not possible to create more Coordinators in the cluster than " + "this maximum number.") + }, + &MaxCoords, + 16, 2, 65535, + NULL, NULL, NULL + }, #endif /* End-of-list marker */ { @@ -3377,7 +3400,7 @@ static struct config_enum ConfigureNamesEnum[] = #ifdef PGXC { - {"remotetype", PGC_BACKEND, CONN_AUTH, + {"remotetype", PGC_BACKEND, CONN_AUTH, gettext_noop("Sets the type of Postgres-XC remote connection"), NULL }, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 804f3504ce..20cb73fd6e 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -549,8 +549,14 @@ #max_pool_size = 100 # Maximum pool size # (change requires restart) #persistent_datanode_connections = off # Set persistent connection mode for pooler - # if set at on, connections taken for coordinator + # if set at on, connections taken for session # are not put back to pool +#max_coordinators = 16 # Maximum number of Coordinators + # that can be defined in cluster + # (change requires restart) +#max_datanodes = 16 # Maximum number of Datanodes + # that can be defined in cluster + # (change requires restart) #------------------------------------------------------------------------------ # GTM CONNECTION diff --git a/src/include/pgxc/nodemgr.h b/src/include/pgxc/nodemgr.h index fc55c6fb06..86471fed28 100644 --- a/src/include/pgxc/nodemgr.h +++ b/src/include/pgxc/nodemgr.h @@ -21,14 +21,32 @@ #define PGXC_NODENAME_LENGTH 64 +/* GUC parameters, limit for number of nodes */ +extern int MaxDataNodes; +extern int MaxCoords; /* Global number of nodes */ -extern int NumDataNodes; -extern int NumCoords; -extern int NumCoordSlaves; -extern int NumDataNodeSlaves; +extern int NumDataNodes; +extern int NumCoords; -extern void PgxcNodeListAndCount(Oid **coOids, Oid **dnOids, - int *num_coords, int *num_dns); +/* Node definition */ +typedef struct +{ + Oid nodeoid; + NameData nodename; + NameData nodehost; + int nodeport; + bool nodeisprimary; + bool nodeispreferred; +} NodeDefinition; + +extern void NodeTablesShmemInit(void); +extern Size NodeTablesShmemSize(void); + +extern void PgxcNodeListAndCount(void); +extern void PgxcNodeGetOids(Oid **coOids, Oid **dnOids, + int *num_coords, int *num_dns, + bool update_preferred); +extern NodeDefinition *PgxcNodeGetDefinition(Oid node); extern void PgxcNodeAlter(AlterNodeStmt *stmt); extern void PgxcNodeCreate(CreateNodeStmt *stmt); extern void PgxcNodeRemove(DropNodeStmt *stmt); diff --git a/src/include/pgxc/poolmgr.h b/src/include/pgxc/poolmgr.h index 06489d4ef9..08e29c6e23 100644 --- a/src/include/pgxc/poolmgr.h +++ b/src/include/pgxc/poolmgr.h @@ -21,6 +21,7 @@ #include "pgxcnode.h" #include "poolcomm.h" #include "storage/pmsignal.h" +#include "utils/hsearch.h" #define MAX_IDLE_TIME 60 @@ -69,8 +70,8 @@ typedef struct /* Pool of connections to specified pgxc node */ typedef struct { - char *connstr; Oid nodeoid; /* Node Oid related to this pool */ + char *connstr; int freeSize; /* available connections */ int size; /* total pool size */ PGXCNodePoolSlot **slot; @@ -81,11 +82,10 @@ typedef struct databasepool { char *database; char *user_name; - int num_dn_pools; - int num_co_pools; - PGXCNodePool **dataNodePools; /* one for each Datanode */ - PGXCNodePool **coordNodePools; /* one for each Coordinator */ - struct databasepool *next; + HTAB *nodePools; /* Hashtable of PGXCNodePool, one entry for each + * Coordinator or DataNode */ + MemoryContext mcxt; + struct databasepool *next; /* Reference to next to organize linked list */ } DatabasePool; /* @@ -96,16 +96,19 @@ typedef struct databasepool typedef struct { /* Process ID of postmaster child process associated to pool agent */ - int pid; + int pid; /* communication channel */ PoolPort port; - DatabasePool *pool; - int num_dn_connections; - int num_coord_connections; - PGXCNodePoolSlot **dn_connections; /* one for each Datanode */ - PGXCNodePoolSlot **coord_connections; /* one for each Coordinator */ - char *session_params; - char *local_params; + DatabasePool *pool; + MemoryContext mcxt; + int num_dn_connections; + int num_coord_connections; + Oid *dn_conn_oids; /* one for each Datanode */ + Oid *coord_conn_oids; /* one for each Coordinator */ + PGXCNodePoolSlot **dn_connections; /* one for each Datanode */ + PGXCNodePoolSlot **coord_connections; /* one for each Coordinator */ + char *session_params; + char *local_params; bool is_temp; /* Temporary objects used for this pool session? */ } PoolAgent; diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 20e321ba49..833a632218 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -73,6 +73,7 @@ typedef enum LWLockId SyncScanLock, #ifdef PGXC BarrierLock, + NodeTableLock, #endif RelationMappingLock, AsyncCtlLock, |