summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavan Deolasee2015-11-16 11:07:45 +0000
committerPavan Deolasee2015-11-16 11:07:45 +0000
commitfd8095eccd1126e3ce67e1c34e8698e025543fc3 (patch)
tree853621e9a19e9eac021175e1ff5232548195e949
parent20a773eecf419c605be78e06527a195e48b05dce (diff)
Use poll() instead of select() at a few places
Patch by Krzysztof Nienartowicz, with some bug fixes and rework by me
-rw-r--r--src/backend/pgxc/pool/pgxcnode.c92
-rw-r--r--src/backend/pgxc/pool/poolcomm.c7
-rw-r--r--src/backend/pgxc/pool/poolmgr.c111
-rw-r--r--src/include/pgxc/poolmgr.h4
4 files changed, 137 insertions, 77 deletions
diff --git a/src/backend/pgxc/pool/pgxcnode.c b/src/backend/pgxc/pool/pgxcnode.c
index cc92b1fc27..274dd51bfd 100644
--- a/src/backend/pgxc/pool/pgxcnode.c
+++ b/src/backend/pgxc/pool/pgxcnode.c
@@ -22,7 +22,8 @@
*/
#include "postgres.h"
-#include <sys/select.h>
+#include <poll.h>
+
#include <sys/time.h>
#include <sys/types.h>
#include <sys/ioctl.h>
@@ -422,13 +423,15 @@ pgxc_node_receive(const int conn_count,
{
#define ERROR_OCCURED true
#define NO_ERROR_OCCURED false
- int i,
- res_select,
- nfds = 0;
- fd_set readfds;
- bool is_msg_buffered;
+ int i,
+ sockets_to_poll,
+ poll_val;
+ bool is_msg_buffered;
+ long timeout_ms;
+ struct pollfd pool_fd[conn_count];
- FD_ZERO(&readfds);
+ /* sockets to be polled index */
+ sockets_to_poll = 0;
is_msg_buffered = false;
for (i = 0; i < conn_count; i++)
@@ -445,54 +448,63 @@ pgxc_node_receive(const int conn_count,
{
/* If connection finished sending do not wait input from it */
if (connections[i]->state == DN_CONNECTION_STATE_IDLE || HAS_MESSAGE_BUFFERED(connections[i]))
+ {
+ pool_fd[i].fd = -1;
+ pool_fd[i].events = 0;
continue;
+ }
/* prepare select params */
if (connections[i]->sock > 0)
{
- FD_SET(connections[i]->sock, &readfds);
- nfds = connections[i]->sock;
+ pool_fd[i].fd = connections[i]->sock;
+ pool_fd[i].events = POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND;
+ sockets_to_poll++;
}
else
{
/* flag as bad, it will be removed from the list */
connections[i]->state = DN_CONNECTION_STATE_ERROR_FATAL;
+ pool_fd[i].fd = -1;
+ pool_fd[i].events = 0;
}
}
/*
* Return if we do not have connections to receive input
*/
- if (nfds == 0)
+ if (sockets_to_poll == 0)
{
if (is_msg_buffered)
return NO_ERROR_OCCURED;
return ERROR_OCCURED;
}
+ /* do conversion from the select behaviour */
+ if ( timeout == NULL )
+ timeout_ms = -1;
+ else
+ timeout_ms = (timeout->tv_sec * (uint64_t) 1000) + (timeout->tv_usec / 1000);
+
retry:
CHECK_FOR_INTERRUPTS();
- res_select = select(nfds + 1, &readfds, NULL, NULL, timeout);
- if (res_select < 0)
+ poll_val = poll(pool_fd, conn_count, timeout_ms);
+ if (poll_val < 0)
{
- /* error - retry if EINTR or EAGAIN */
- if (errno == EINTR || errno == EAGAIN)
+ /* error - retry if EINTR */
+ if (errno == EINTR || errno == EAGAIN)
goto retry;
- if (errno == EBADF)
- {
- elog(WARNING, "select() bad file descriptor set");
- }
- elog(WARNING, "select() error: %d", errno);
+ elog(WARNING, "poll() error: %d", errno);
if (errno)
return ERROR_OCCURED;
return NO_ERROR_OCCURED;
}
- if (res_select == 0)
+ if (poll_val == 0)
{
/* Handle timeout */
- elog(DEBUG1, "timeout while waiting for response");
+ elog(DEBUG1, "timeout %d while waiting for any response from %d connections", timeout_ms,conn_count);
for (i = 0; i < conn_count; i++)
connections[i]->state = DN_CONNECTION_STATE_ERROR_FATAL;
return NO_ERROR_OCCURED;
@@ -503,17 +515,35 @@ retry:
{
PGXCNodeHandle *conn = connections[i];
- if (FD_ISSET(conn->sock, &readfds))
+ if( pool_fd[i].fd == -1 )
+ continue;
+
+ if ( pool_fd[i].fd == conn->sock )
{
- int read_status = pgxc_node_read_data(conn, true);
+ if( pool_fd[i].revents & POLLIN )
+ {
+ int read_status = pgxc_node_read_data(conn, true);
+ if ( read_status == EOF || read_status < 0 )
+ {
+ /* Can not read - no more actions, just discard connection */
+ conn->state = DN_CONNECTION_STATE_ERROR_FATAL;
+ add_error_message(conn, "unexpected EOF on datanode connection.");
+ elog(WARNING, "unexpected EOF on datanode oid connection: %d", conn->nodeoid);
+ /* Should we read from the other connections before returning? */
+ return ERROR_OCCURED;
+ }
- if (read_status == EOF || read_status < 0)
+ }
+ else if (
+ (pool_fd[i].revents & POLLERR) ||
+ (pool_fd[i].revents & POLLHUP) ||
+ (pool_fd[i].revents & POLLNVAL)
+ )
{
- /* Can not read - no more actions, just discard connection */
- conn->state = DN_CONNECTION_STATE_ERROR_FATAL;
- add_error_message(conn, "unexpected EOF on datanode connection");
- elog(WARNING, "unexpected EOF on datanode connection");
- /* Should we read from the other connections before returning? */
+ connections[i]->state = DN_CONNECTION_STATE_ERROR_FATAL;
+ add_error_message(conn, "unexpected network error on datanode connection");
+ elog(WARNING, "unexpected EOF on datanode oid connection: %d with event %d", conn->nodeoid,pool_fd[i].revents);
+ /* Should we check/read from the other connections before returning? */
return ERROR_OCCURED;
}
}
@@ -2394,8 +2424,8 @@ PGXCNodeSetParam(bool local, const char *name, const char *value)
}
/*
- * Special case for
- * RESET SESSION AUTHORIZATION
+ * Special case for
+ * RESET SESSION AUTHORIZATION
* SET SESSION AUTHORIZATION TO DEFAULT
*
* We must also forget any SET ROLE commands since RESET SESSION
diff --git a/src/backend/pgxc/pool/poolcomm.c b/src/backend/pgxc/pool/poolcomm.c
index 4a3c755e7d..49a13bbab0 100644
--- a/src/backend/pgxc/pool/poolcomm.c
+++ b/src/backend/pgxc/pool/poolcomm.c
@@ -57,6 +57,7 @@ pool_listen(unsigned short port, const char *unixSocketName)
struct sockaddr_un unix_addr;
int maxconn;
+
#ifdef HAVE_UNIX_SOCKETS
if (Lock_AF_UNIX(port, unixSocketName) < 0)
return -1;
@@ -72,6 +73,8 @@ pool_listen(unsigned short port, const char *unixSocketName)
len = sizeof(unix_addr.sun_family) +
strlen(unix_addr.sun_path) + 1;
+
+
/* bind the name to the descriptor */
if (bind(fd, (struct sockaddr *) & unix_addr, len) < 0)
return -1;
@@ -89,6 +92,8 @@ pool_listen(unsigned short port, const char *unixSocketName)
if (listen(fd, maxconn) < 0)
return -1;
+
+
/* Arrange to unlink the socket file at exit */
on_proc_exit(StreamDoUnlink, 0);
@@ -595,7 +600,7 @@ pool_recvfds(PoolPort *port, int *fds, int count)
{
ereport(ERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
- errmsg("incomplete message from client")));
+ errmsg("incomplete message from client [size: %u errno %u]",r,errno)));
goto failure;
}
diff --git a/src/backend/pgxc/pool/poolmgr.c b/src/backend/pgxc/pool/poolmgr.c
index 35c00d64c2..04bcd101d4 100644
--- a/src/backend/pgxc/pool/poolmgr.c
+++ b/src/backend/pgxc/pool/poolmgr.c
@@ -66,6 +66,7 @@
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
+#include <poll.h>
#include "pgxc/pause.h"
#include "storage/procarray.h"
@@ -110,7 +111,7 @@ static MemoryContext PoolerAgentContext = NULL;
/* Pool to all the databases (linked list) */
static DatabasePool *databasePools = NULL;
-/* PoolAgents */
+/* PoolAgents and the poll array*/
static int agentCount = 0;
static PoolAgent **poolAgents;
@@ -233,7 +234,7 @@ PoolManagerInit()
{
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+ errmsg("out of memory while initializing pool agents")));
}
PoolerLoop();
@@ -377,8 +378,8 @@ GetPoolManagerHandle(void)
{
saved_errno = errno;
ereport(WARNING,
- (errmsg("could not create Unix-domain socket in directory \"%s\"",
- socketdir)));
+ (errmsg("could not create Unix-domain socket in directory \"%s\", errno: %d",
+ socketdir, saved_errno)));
}
else
{
@@ -1819,23 +1820,26 @@ acquire_connection(DatabasePool *dbPool, Oid node)
slot = nodePool->slot[--(nodePool->freeSize)];
retry:
- /*
- * Make sure connection is ok, destroy connection slot if there is a
- * problem.
- */
- poll_result = pqReadReady((PGconn *) slot->conn);
-
- if (poll_result == 0)
- break; /* ok, no data */
- else if (poll_result < 0)
+ if (PQsocket((PGconn *) slot->conn) > 0)
{
- if (errno == EAGAIN || errno == EINTR)
- goto retry;
+ /*
+ * Make sure connection is ok, destroy connection slot if there is a
+ * problem.
+ */
+ poll_result = pqReadReady((PGconn *) slot->conn);
+
+ if (poll_result == 0)
+ break; /* ok, no data */
+ else if (poll_result < 0)
+ {
+ if (errno == EAGAIN || errno == EINTR)
+ goto retry;
- elog(WARNING, "Error in checking connection, errno = %d", errno);
+ elog(WARNING, "Error in checking connection, errno = %d", errno);
+ }
+ else
+ elog(WARNING, "Unexpected data on connection, cleaning.");
}
- else
- elog(WARNING, "Unexpected data on connection, cleaning.");
destroy_slot(slot);
slot = NULL;
@@ -2046,6 +2050,9 @@ PoolerLoop(void)
{
StringInfoData input_message;
time_t last_maintenance = (time_t) 0;
+ int maintenance_timeout;
+ struct pollfd *pool_fd;
+ int i;
#ifdef HAVE_UNIX_SOCKETS
if (Unix_socket_directories)
@@ -2096,12 +2103,23 @@ PoolerLoop(void)
pfree(rawstring);
}
#endif
+
+ pool_fd = (struct pollfd *) palloc((MaxConnections + 1) * sizeof(struct pollfd));
+
+ if (server_fd == -1)
+ {
+ /* log error */
+ return;
+ }
+
initStringInfo(&input_message);
+ pool_fd[0].fd = server_fd;
+ pool_fd[0].events = POLLIN;
+
for (;;)
{
- int nfds;
- fd_set rfds;
+
int retval;
int i;
@@ -2112,25 +2130,17 @@ PoolerLoop(void)
if (!PostmasterIsAlive())
exit(1);
- /* watch for incoming connections */
- FD_ZERO(&rfds);
- FD_SET(server_fd, &rfds);
-
- nfds = server_fd;
-
/* watch for incoming messages */
- for (i = 0; i < agentCount; i++)
+ for (i = 1; i <= agentCount; i++)
{
- PoolAgent *agent = poolAgents[i];
- int sockfd = Socket(agent->port);
- FD_SET (sockfd, &rfds);
-
- nfds = Max(nfds, sockfd);
+ PoolAgent *agent = poolAgents[i - 1];
+ int sockfd = Socket(agent->port);
+ pool_fd[i].fd = sockfd;
+ pool_fd[i].events = POLLIN;
}
if (PoolMaintenanceTimeout > 0)
{
- struct timeval maintenance_timeout;
int timeout_val;
double timediff;
@@ -2149,13 +2159,10 @@ PoolerLoop(void)
else
timeout_val = PoolMaintenanceTimeout - rint(timediff);
- maintenance_timeout.tv_sec = timeout_val;
- maintenance_timeout.tv_usec = 0;
- /* wait for event */
- retval = select(nfds + 1, &rfds, NULL, NULL, &maintenance_timeout);
+ maintenance_timeout = timeout_val * 1000;
}
else
- retval = select(nfds + 1, &rfds, NULL, NULL, NULL);
+ maintenance_timeout = -1;
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children.
@@ -2171,21 +2178,33 @@ PoolerLoop(void)
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
}
+
if (shutdown_requested)
{
- for (i = agentCount - 1; i >= 0; i--)
+ for (i = agentCount - 1; agentCount > 0 && i >= 0; i--)
{
PoolAgent *agent = poolAgents[i];
-
agent_destroy(agent);
}
+
while (databasePools)
if (destroy_database_pool(databasePools->database,
databasePools->user_name) == 0)
break;
+
close(server_fd);
exit(0);
}
+
+ /* wait for event */
+ retval = poll(pool_fd, agentCount + 1, maintenance_timeout);
+ if (retval < 0)
+ {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ elog(FATAL, "poll returned with error %d", retval);
+ }
+
if (retval > 0)
{
/*
@@ -2193,15 +2212,17 @@ PoolerLoop(void)
* and trailing items are shifted, so scroll downward
* to avoid problem
*/
- for (i = agentCount - 1; i >= 0; i--)
+ for (i = agentCount - 1; agentCount > 0 && i >= 0; i--)
{
- PoolAgent *agent = poolAgents[i];
- int sockfd = Socket(agent->port);
+ PoolAgent *agent = poolAgents[i];
+ int sockfd = Socket(agent->port);
- if (FD_ISSET(sockfd, &rfds))
+ if ((sockfd == pool_fd[i + 1].fd) &&
+ (pool_fd[i + 1].revents & POLLIN))
agent_handle_input(agent, &input_message);
}
- if (FD_ISSET(server_fd, &rfds))
+
+ if (pool_fd[0].revents & POLLIN)
agent_create();
}
else if (retval == 0)
diff --git a/src/include/pgxc/poolmgr.h b/src/include/pgxc/poolmgr.h
index f81cd71b7e..58371573cd 100644
--- a/src/include/pgxc/poolmgr.h
+++ b/src/include/pgxc/poolmgr.h
@@ -80,6 +80,10 @@ typedef struct
PGXCNodePoolSlot **dn_connections; /* one for each Datanode */
PGXCNodePoolSlot **coord_connections; /* one for each Coordinator */
} PoolAgent;
+/*
+ * Helper to poll for all pooler sockets
+ */
+typedef struct pollfd Pollfd;
extern int PoolConnKeepAlive;