Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 98a64d0

Browse files
committedMar 21, 2016
Introduce WaitEventSet API.
Commit ac1d794 ("Make idle backends exit if the postmaster dies.") introduced a regression on, at least, large linux systems. Constantly adding the same postmaster_alive_fds to the OSs internal datastructures for implementing poll/select can cause significant contention; leading to a performance regression of nearly 3x in one example. This can be avoided by using e.g. linux' epoll, which avoids having to add/remove file descriptors to the wait datastructures at a high rate. Unfortunately the current latch interface makes it hard to allocate any persistent per-backend resources. Replace, with a backward compatibility layer, WaitLatchOrSocket with a new WaitEventSet API. Users can allocate such a Set across multiple calls, and add more than one file-descriptor to wait on. The latter has been added because there's upcoming postgres features where that will be helpful. In addition to the previously existing poll(2), select(2), WaitForMultipleObjects() implementations also provide an epoll_wait(2) based implementation to address the aforementioned performance problem. Epoll is only available on linux, but that is the most likely OS for machines large enough (four sockets) to reproduce the problem. To actually address the aforementioned regression, create and use a long-lived WaitEventSet for FE/BE communication. There are additional places that would benefit from a long-lived set, but that's a task for another day. Thanks to Amit Kapila, who helped make the windows code I blindly wrote actually work. Reported-By: Dmitry Vasilyev Discussion: CAB-SwXZh44_2ybvS5Z67p_CDz=XFn4hNAD=CnMEF+QqkXwFrGg@mail.gmail.com 20160114143931.GG10941@awork2.anarazel.de
1 parent 72e2d21 commit 98a64d0

File tree

10 files changed

+1171
-530
lines changed

10 files changed

+1171
-530
lines changed
 

‎configure

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10193,7 +10193,7 @@ fi
1019310193
## Header files
1019410194
##
1019510195

10196-
for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h
10196+
for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h
1019710197
do :
1019810198
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
1019910199
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"

‎configure.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1183,7 +1183,7 @@ AC_SUBST(UUID_LIBS)
11831183
##
11841184

11851185
dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES
1186-
AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h])
1186+
AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/epoll.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h])
11871187

11881188
# On BSD, test for net/if.h will fail unless sys/socket.h
11891189
# is included first.

‎src/backend/libpq/be-secure.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,13 @@ secure_read(Port *port, void *ptr, size_t len)
140140
/* In blocking mode, wait until the socket is ready */
141141
if (n < 0 && !port->noblock && (errno == EWOULDBLOCK || errno == EAGAIN))
142142
{
143-
int w;
143+
WaitEvent event;
144144

145145
Assert(waitfor);
146146

147-
w = WaitLatchOrSocket(MyLatch,
148-
WL_LATCH_SET | WL_POSTMASTER_DEATH | waitfor,
149-
port->sock, 0);
147+
ModifyWaitEvent(FeBeWaitSet, 0, waitfor, NULL);
148+
149+
WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */, &event, 1);
150150

151151
/*
152152
* If the postmaster has died, it's not safe to continue running,
@@ -165,13 +165,13 @@ secure_read(Port *port, void *ptr, size_t len)
165165
* cycles checking for this very rare condition, and this should cause
166166
* us to exit quickly in most cases.)
167167
*/
168-
if (w & WL_POSTMASTER_DEATH)
168+
if (event.events & WL_POSTMASTER_DEATH)
169169
ereport(FATAL,
170170
(errcode(ERRCODE_ADMIN_SHUTDOWN),
171171
errmsg("terminating connection due to unexpected postmaster exit")));
172172

173173
/* Handle interrupt. */
174-
if (w & WL_LATCH_SET)
174+
if (event.events & WL_LATCH_SET)
175175
{
176176
ResetLatch(MyLatch);
177177
ProcessClientReadInterrupt(true);
@@ -241,22 +241,22 @@ secure_write(Port *port, void *ptr, size_t len)
241241

242242
if (n < 0 && !port->noblock && (errno == EWOULDBLOCK || errno == EAGAIN))
243243
{
244-
int w;
244+
WaitEvent event;
245245

246246
Assert(waitfor);
247247

248-
w = WaitLatchOrSocket(MyLatch,
249-
WL_LATCH_SET | WL_POSTMASTER_DEATH | waitfor,
250-
port->sock, 0);
248+
ModifyWaitEvent(FeBeWaitSet, 0, waitfor, NULL);
249+
250+
WaitEventSetWait(FeBeWaitSet, -1 /* no timeout */, &event, 1);
251251

252252
/* See comments in secure_read. */
253-
if (w & WL_POSTMASTER_DEATH)
253+
if (event.events & WL_POSTMASTER_DEATH)
254254
ereport(FATAL,
255255
(errcode(ERRCODE_ADMIN_SHUTDOWN),
256256
errmsg("terminating connection due to unexpected postmaster exit")));
257257

258258
/* Handle interrupt. */
259-
if (w & WL_LATCH_SET)
259+
if (event.events & WL_LATCH_SET)
260260
{
261261
ResetLatch(MyLatch);
262262
ProcessClientWriteInterrupt(true);

‎src/backend/libpq/pqcomm.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,11 @@ pq_init(void)
201201
(errmsg("could not set socket to nonblocking mode: %m")));
202202
#endif
203203

204+
FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, 3);
205+
AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE, MyProcPort->sock,
206+
NULL, NULL);
207+
AddWaitEventToSet(FeBeWaitSet, WL_LATCH_SET, -1, MyLatch, NULL);
208+
AddWaitEventToSet(FeBeWaitSet, WL_POSTMASTER_DEATH, -1, NULL, NULL);
204209
}
205210

206211
/* --------------------------------

‎src/backend/storage/ipc/latch.c

Lines changed: 1104 additions & 511 deletions
Large diffs are not rendered by default.

‎src/backend/utils/init/miscinit.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
#include "access/htup_details.h"
3535
#include "catalog/pg_authid.h"
36+
#include "libpq/libpq.h"
3637
#include "mb/pg_wchar.h"
3738
#include "miscadmin.h"
3839
#include "postmaster/autovacuum.h"
@@ -247,6 +248,9 @@ SwitchToSharedLatch(void)
247248

248249
MyLatch = &MyProc->procLatch;
249250

251+
if (FeBeWaitSet)
252+
ModifyWaitEvent(FeBeWaitSet, 1, WL_LATCH_SET, MyLatch);
253+
250254
/*
251255
* Set the shared latch as the local one might have been set. This
252256
* shouldn't normally be necessary as code is supposed to check the
@@ -262,6 +266,10 @@ SwitchBackToLocalLatch(void)
262266
Assert(MyProc != NULL && MyLatch == &MyProc->procLatch);
263267

264268
MyLatch = &LocalLatchData;
269+
270+
if (FeBeWaitSet)
271+
ModifyWaitEvent(FeBeWaitSet, 1, WL_LATCH_SET, MyLatch);
272+
265273
SetLatch(MyLatch);
266274
}
267275

‎src/include/libpq/libpq.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "lib/stringinfo.h"
2121
#include "libpq/libpq-be.h"
22+
#include "storage/latch.h"
2223

2324

2425
typedef struct
@@ -95,6 +96,8 @@ extern ssize_t secure_raw_write(Port *port, const void *ptr, size_t len);
9596

9697
extern bool ssl_loaded_verify_locations;
9798

99+
WaitEventSet *FeBeWaitSet;
100+
98101
/* GUCs */
99102
extern char *SSLCipherSuites;
100103
extern char *SSLECDHCurve;

‎src/include/pg_config.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,9 @@
530530
/* Define to 1 if you have the syslog interface. */
531531
#undef HAVE_SYSLOG
532532

533+
/* Define to 1 if you have the <sys/epoll.h> header file. */
534+
#undef HAVE_SYS_EPOLL_H
535+
533536
/* Define to 1 if you have the <sys/ioctl.h> header file. */
534537
#undef HAVE_SYS_IOCTL_H
535538

‎src/include/storage/latch.h

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@
6868
* use of any generic handler.
6969
*
7070
*
71+
* WaitEventSets allow to wait for latches being set and additional events -
72+
* postmaster dying and socket readiness of several sockets currently - at the
73+
* same time. On many platforms using a long lived event set is more
74+
* efficient than using WaitLatch or WaitLatchOrSocket.
75+
*
76+
*
7177
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7278
* Portions Copyright (c) 1994, Regents of the University of California
7379
*
@@ -95,13 +101,27 @@ typedef struct Latch
95101
#endif
96102
} Latch;
97103

98-
/* Bitmasks for events that may wake-up WaitLatch() clients */
104+
/*
105+
* Bitmasks for events that may wake-up WaitLatch(), WaitLatchOrSocket(), or
106+
* WaitEventSetWait().
107+
*/
99108
#define WL_LATCH_SET (1 << 0)
100109
#define WL_SOCKET_READABLE (1 << 1)
101110
#define WL_SOCKET_WRITEABLE (1 << 2)
102-
#define WL_TIMEOUT (1 << 3)
111+
#define WL_TIMEOUT (1 << 3) /* not for WaitEventSetWait() */
103112
#define WL_POSTMASTER_DEATH (1 << 4)
104113

114+
typedef struct WaitEvent
115+
{
116+
int pos; /* position in the event data structure */
117+
uint32 events; /* triggered events */
118+
pgsocket fd; /* socket fd associated with event */
119+
void *user_data; /* pointer provided in AddWaitEventToSet */
120+
} WaitEvent;
121+
122+
/* forward declaration to avoid exposing latch.c implementation details */
123+
typedef struct WaitEventSet WaitEventSet;
124+
105125
/*
106126
* prototypes for functions in latch.c
107127
*/
@@ -110,12 +130,19 @@ extern void InitLatch(volatile Latch *latch);
110130
extern void InitSharedLatch(volatile Latch *latch);
111131
extern void OwnLatch(volatile Latch *latch);
112132
extern void DisownLatch(volatile Latch *latch);
113-
extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout);
114-
extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents,
115-
pgsocket sock, long timeout);
116133
extern void SetLatch(volatile Latch *latch);
117134
extern void ResetLatch(volatile Latch *latch);
118135

136+
extern WaitEventSet *CreateWaitEventSet(MemoryContext context, int nevents);
137+
extern void FreeWaitEventSet(WaitEventSet *set);
138+
extern int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd,
139+
Latch *latch, void *user_data);
140+
extern void ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch);
141+
142+
extern int WaitEventSetWait(WaitEventSet *set, long timeout, WaitEvent *occurred_events, int nevents);
143+
extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout);
144+
extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents,
145+
pgsocket sock, long timeout);
119146

120147
/*
121148
* Unix implementation uses SIGUSR1 for inter-process signaling.

‎src/tools/pgindent/typedefs.list

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,6 +2113,8 @@ WalSnd
21132113
WalSndCtlData
21142114
WalSndSendDataCallback
21152115
WalSndState
2116+
WaitEvent
2117+
WaitEventSet
21162118
WholeRowVarExprState
21172119
WindowAgg
21182120
WindowAggState

0 commit comments

Comments
 (0)
Please sign in to comment.