diff options
author | Pavan Deolasee | 2015-11-16 11:07:45 +0000 |
---|---|---|
committer | Pavan Deolasee | 2015-11-16 11:07:45 +0000 |
commit | fd8095eccd1126e3ce67e1c34e8698e025543fc3 (patch) | |
tree | 853621e9a19e9eac021175e1ff5232548195e949 | |
parent | 20a773eecf419c605be78e06527a195e48b05dce (diff) |
Use poll() instead of select() at a few places
Patch by Krzysztof Nienartowicz, with some bug fixes and rework by me
-rw-r--r-- | src/backend/pgxc/pool/pgxcnode.c | 92 | ||||
-rw-r--r-- | src/backend/pgxc/pool/poolcomm.c | 7 | ||||
-rw-r--r-- | src/backend/pgxc/pool/poolmgr.c | 111 | ||||
-rw-r--r-- | src/include/pgxc/poolmgr.h | 4 |
4 files changed, 137 insertions, 77 deletions
diff --git a/src/backend/pgxc/pool/pgxcnode.c b/src/backend/pgxc/pool/pgxcnode.c index cc92b1fc27..274dd51bfd 100644 --- a/src/backend/pgxc/pool/pgxcnode.c +++ b/src/backend/pgxc/pool/pgxcnode.c @@ -22,7 +22,8 @@ */ #include "postgres.h" -#include <sys/select.h> +#include <poll.h> + #include <sys/time.h> #include <sys/types.h> #include <sys/ioctl.h> @@ -422,13 +423,15 @@ pgxc_node_receive(const int conn_count, { #define ERROR_OCCURED true #define NO_ERROR_OCCURED false - int i, - res_select, - nfds = 0; - fd_set readfds; - bool is_msg_buffered; + int i, + sockets_to_poll, + poll_val; + bool is_msg_buffered; + long timeout_ms; + struct pollfd pool_fd[conn_count]; - FD_ZERO(&readfds); + /* sockets to be polled index */ + sockets_to_poll = 0; is_msg_buffered = false; for (i = 0; i < conn_count; i++) @@ -445,54 +448,63 @@ pgxc_node_receive(const int conn_count, { /* If connection finished sending do not wait input from it */ if (connections[i]->state == DN_CONNECTION_STATE_IDLE || HAS_MESSAGE_BUFFERED(connections[i])) + { + pool_fd[i].fd = -1; + pool_fd[i].events = 0; continue; + } /* prepare select params */ if (connections[i]->sock > 0) { - FD_SET(connections[i]->sock, &readfds); - nfds = connections[i]->sock; + pool_fd[i].fd = connections[i]->sock; + pool_fd[i].events = POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND; + sockets_to_poll++; } else { /* flag as bad, it will be removed from the list */ connections[i]->state = DN_CONNECTION_STATE_ERROR_FATAL; + pool_fd[i].fd = -1; + pool_fd[i].events = 0; } } /* * Return if we do not have connections to receive input */ - if (nfds == 0) + if (sockets_to_poll == 0) { if (is_msg_buffered) return NO_ERROR_OCCURED; return ERROR_OCCURED; } + /* do conversion from the select behaviour */ + if ( timeout == NULL ) + timeout_ms = -1; + else + timeout_ms = (timeout->tv_sec * (uint64_t) 1000) + (timeout->tv_usec / 1000); + retry: CHECK_FOR_INTERRUPTS(); - res_select = select(nfds + 1, &readfds, NULL, NULL, timeout); - if (res_select < 0) + poll_val = poll(pool_fd, conn_count, timeout_ms); + if (poll_val < 0) { - /* error - retry if EINTR or EAGAIN */ - if (errno == EINTR || errno == EAGAIN) + /* error - retry if EINTR */ + if (errno == EINTR || errno == EAGAIN) goto retry; - if (errno == EBADF) - { - elog(WARNING, "select() bad file descriptor set"); - } - elog(WARNING, "select() error: %d", errno); + elog(WARNING, "poll() error: %d", errno); if (errno) return ERROR_OCCURED; return NO_ERROR_OCCURED; } - if (res_select == 0) + if (poll_val == 0) { /* Handle timeout */ - elog(DEBUG1, "timeout while waiting for response"); + elog(DEBUG1, "timeout %d while waiting for any response from %d connections", timeout_ms,conn_count); for (i = 0; i < conn_count; i++) connections[i]->state = DN_CONNECTION_STATE_ERROR_FATAL; return NO_ERROR_OCCURED; @@ -503,17 +515,35 @@ retry: { PGXCNodeHandle *conn = connections[i]; - if (FD_ISSET(conn->sock, &readfds)) + if( pool_fd[i].fd == -1 ) + continue; + + if ( pool_fd[i].fd == conn->sock ) { - int read_status = pgxc_node_read_data(conn, true); + if( pool_fd[i].revents & POLLIN ) + { + int read_status = pgxc_node_read_data(conn, true); + if ( read_status == EOF || read_status < 0 ) + { + /* Can not read - no more actions, just discard connection */ + conn->state = DN_CONNECTION_STATE_ERROR_FATAL; + add_error_message(conn, "unexpected EOF on datanode connection."); + elog(WARNING, "unexpected EOF on datanode oid connection: %d", conn->nodeoid); + /* Should we read from the other connections before returning? */ + return ERROR_OCCURED; + } - if (read_status == EOF || read_status < 0) + } + else if ( + (pool_fd[i].revents & POLLERR) || + (pool_fd[i].revents & POLLHUP) || + (pool_fd[i].revents & POLLNVAL) + ) { - /* Can not read - no more actions, just discard connection */ - conn->state = DN_CONNECTION_STATE_ERROR_FATAL; - add_error_message(conn, "unexpected EOF on datanode connection"); - elog(WARNING, "unexpected EOF on datanode connection"); - /* Should we read from the other connections before returning? */ + connections[i]->state = DN_CONNECTION_STATE_ERROR_FATAL; + add_error_message(conn, "unexpected network error on datanode connection"); + elog(WARNING, "unexpected EOF on datanode oid connection: %d with event %d", conn->nodeoid,pool_fd[i].revents); + /* Should we check/read from the other connections before returning? */ return ERROR_OCCURED; } } @@ -2394,8 +2424,8 @@ PGXCNodeSetParam(bool local, const char *name, const char *value) } /* - * Special case for - * RESET SESSION AUTHORIZATION + * Special case for + * RESET SESSION AUTHORIZATION * SET SESSION AUTHORIZATION TO DEFAULT * * We must also forget any SET ROLE commands since RESET SESSION diff --git a/src/backend/pgxc/pool/poolcomm.c b/src/backend/pgxc/pool/poolcomm.c index 4a3c755e7d..49a13bbab0 100644 --- a/src/backend/pgxc/pool/poolcomm.c +++ b/src/backend/pgxc/pool/poolcomm.c @@ -57,6 +57,7 @@ pool_listen(unsigned short port, const char *unixSocketName) struct sockaddr_un unix_addr; int maxconn; + #ifdef HAVE_UNIX_SOCKETS if (Lock_AF_UNIX(port, unixSocketName) < 0) return -1; @@ -72,6 +73,8 @@ pool_listen(unsigned short port, const char *unixSocketName) len = sizeof(unix_addr.sun_family) + strlen(unix_addr.sun_path) + 1; + + /* bind the name to the descriptor */ if (bind(fd, (struct sockaddr *) & unix_addr, len) < 0) return -1; @@ -89,6 +92,8 @@ pool_listen(unsigned short port, const char *unixSocketName) if (listen(fd, maxconn) < 0) return -1; + + /* Arrange to unlink the socket file at exit */ on_proc_exit(StreamDoUnlink, 0); @@ -595,7 +600,7 @@ pool_recvfds(PoolPort *port, int *fds, int count) { ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg("incomplete message from client"))); + errmsg("incomplete message from client [size: %u errno %u]",r,errno))); goto failure; } diff --git a/src/backend/pgxc/pool/poolmgr.c b/src/backend/pgxc/pool/poolmgr.c index 35c00d64c2..04bcd101d4 100644 --- a/src/backend/pgxc/pool/poolmgr.c +++ b/src/backend/pgxc/pool/poolmgr.c @@ -66,6 +66,7 @@ #include <string.h> #include <sys/types.h> #include <sys/socket.h> +#include <poll.h> #include "pgxc/pause.h" #include "storage/procarray.h" @@ -110,7 +111,7 @@ static MemoryContext PoolerAgentContext = NULL; /* Pool to all the databases (linked list) */ static DatabasePool *databasePools = NULL; -/* PoolAgents */ +/* PoolAgents and the poll array*/ static int agentCount = 0; static PoolAgent **poolAgents; @@ -233,7 +234,7 @@ PoolManagerInit() { ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); + errmsg("out of memory while initializing pool agents"))); } PoolerLoop(); @@ -377,8 +378,8 @@ GetPoolManagerHandle(void) { saved_errno = errno; ereport(WARNING, - (errmsg("could not create Unix-domain socket in directory \"%s\"", - socketdir))); + (errmsg("could not create Unix-domain socket in directory \"%s\", errno: %d", + socketdir, saved_errno))); } else { @@ -1819,23 +1820,26 @@ acquire_connection(DatabasePool *dbPool, Oid node) slot = nodePool->slot[--(nodePool->freeSize)]; retry: - /* - * Make sure connection is ok, destroy connection slot if there is a - * problem. - */ - poll_result = pqReadReady((PGconn *) slot->conn); - - if (poll_result == 0) - break; /* ok, no data */ - else if (poll_result < 0) + if (PQsocket((PGconn *) slot->conn) > 0) { - if (errno == EAGAIN || errno == EINTR) - goto retry; + /* + * Make sure connection is ok, destroy connection slot if there is a + * problem. + */ + poll_result = pqReadReady((PGconn *) slot->conn); + + if (poll_result == 0) + break; /* ok, no data */ + else if (poll_result < 0) + { + if (errno == EAGAIN || errno == EINTR) + goto retry; - elog(WARNING, "Error in checking connection, errno = %d", errno); + elog(WARNING, "Error in checking connection, errno = %d", errno); + } + else + elog(WARNING, "Unexpected data on connection, cleaning."); } - else - elog(WARNING, "Unexpected data on connection, cleaning."); destroy_slot(slot); slot = NULL; @@ -2046,6 +2050,9 @@ PoolerLoop(void) { StringInfoData input_message; time_t last_maintenance = (time_t) 0; + int maintenance_timeout; + struct pollfd *pool_fd; + int i; #ifdef HAVE_UNIX_SOCKETS if (Unix_socket_directories) @@ -2096,12 +2103,23 @@ PoolerLoop(void) pfree(rawstring); } #endif + + pool_fd = (struct pollfd *) palloc((MaxConnections + 1) * sizeof(struct pollfd)); + + if (server_fd == -1) + { + /* log error */ + return; + } + initStringInfo(&input_message); + pool_fd[0].fd = server_fd; + pool_fd[0].events = POLLIN; + for (;;) { - int nfds; - fd_set rfds; + int retval; int i; @@ -2112,25 +2130,17 @@ PoolerLoop(void) if (!PostmasterIsAlive()) exit(1); - /* watch for incoming connections */ - FD_ZERO(&rfds); - FD_SET(server_fd, &rfds); - - nfds = server_fd; - /* watch for incoming messages */ - for (i = 0; i < agentCount; i++) + for (i = 1; i <= agentCount; i++) { - PoolAgent *agent = poolAgents[i]; - int sockfd = Socket(agent->port); - FD_SET (sockfd, &rfds); - - nfds = Max(nfds, sockfd); + PoolAgent *agent = poolAgents[i - 1]; + int sockfd = Socket(agent->port); + pool_fd[i].fd = sockfd; + pool_fd[i].events = POLLIN; } if (PoolMaintenanceTimeout > 0) { - struct timeval maintenance_timeout; int timeout_val; double timediff; @@ -2149,13 +2159,10 @@ PoolerLoop(void) else timeout_val = PoolMaintenanceTimeout - rint(timediff); - maintenance_timeout.tv_sec = timeout_val; - maintenance_timeout.tv_usec = 0; - /* wait for event */ - retval = select(nfds + 1, &rfds, NULL, NULL, &maintenance_timeout); + maintenance_timeout = timeout_val * 1000; } else - retval = select(nfds + 1, &rfds, NULL, NULL, NULL); + maintenance_timeout = -1; /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. @@ -2171,21 +2178,33 @@ PoolerLoop(void) got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } + if (shutdown_requested) { - for (i = agentCount - 1; i >= 0; i--) + for (i = agentCount - 1; agentCount > 0 && i >= 0; i--) { PoolAgent *agent = poolAgents[i]; - agent_destroy(agent); } + while (databasePools) if (destroy_database_pool(databasePools->database, databasePools->user_name) == 0) break; + close(server_fd); exit(0); } + + /* wait for event */ + retval = poll(pool_fd, agentCount + 1, maintenance_timeout); + if (retval < 0) + { + if (errno == EINTR || errno == EAGAIN) + continue; + elog(FATAL, "poll returned with error %d", retval); + } + if (retval > 0) { /* @@ -2193,15 +2212,17 @@ PoolerLoop(void) * and trailing items are shifted, so scroll downward * to avoid problem */ - for (i = agentCount - 1; i >= 0; i--) + for (i = agentCount - 1; agentCount > 0 && i >= 0; i--) { - PoolAgent *agent = poolAgents[i]; - int sockfd = Socket(agent->port); + PoolAgent *agent = poolAgents[i]; + int sockfd = Socket(agent->port); - if (FD_ISSET(sockfd, &rfds)) + if ((sockfd == pool_fd[i + 1].fd) && + (pool_fd[i + 1].revents & POLLIN)) agent_handle_input(agent, &input_message); } - if (FD_ISSET(server_fd, &rfds)) + + if (pool_fd[0].revents & POLLIN) agent_create(); } else if (retval == 0) diff --git a/src/include/pgxc/poolmgr.h b/src/include/pgxc/poolmgr.h index f81cd71b7e..58371573cd 100644 --- a/src/include/pgxc/poolmgr.h +++ b/src/include/pgxc/poolmgr.h @@ -80,6 +80,10 @@ typedef struct PGXCNodePoolSlot **dn_connections; /* one for each Datanode */ PGXCNodePoolSlot **coord_connections; /* one for each Coordinator */ } PoolAgent; +/* + * Helper to poll for all pooler sockets + */ +typedef struct pollfd Pollfd; extern int PoolConnKeepAlive; |