Skip to content

Commit 1f39a1c

Browse files
committed
Restructure libpq's handling of send failures.
Originally, if libpq got a failure (e.g., ECONNRESET) while trying to send data to the server, it would just report that and wash its hands of the matter. It was soon found that that wasn't a very pleasant way of coping with server-initiated disconnections, so we introduced a hack (pqHandleSendFailure) in the code that sends queries to make it peek ahead for server error reports before reporting the send failure. It now emerges that related cases can occur during connection setup; in particular, as of TLS 1.3 it's unsafe to assume that SSL connection failures will be reported by SSL_connect rather than during our first send attempt. We could have fixed that in a hacky way by applying pqHandleSendFailure after a startup packet send failure, but (a) pqHandleSendFailure explicitly disclaims suitability for use in any state except query startup, and (b) the problem still potentially exists for other send attempts in libpq. Instead, let's fix this in a more general fashion by eliminating pqHandleSendFailure altogether, and instead arranging to postpone all reports of send failures in libpq until after we've made an attempt to read and process server messages. The send failure won't be reported at all if we find a server message or detect input EOF. (Note: this removes one of the reasons why libpq typically overwrites, rather than appending to, conn->errorMessage: pqHandleSendFailure needed that behavior so that the send failure report would be replaced if we got a server message or read failure report. Eventually I'd like to get rid of that overwrite behavior altogether, but today is not that day. For the moment, pqSendSome is assuming that its callees will overwrite not append to conn->errorMessage.) Possibly this change should get back-patched someday; but it needs testing first, so let's not consider that till after v12 beta. Discussion: https://postgr.es/m/CAEepm=2n6Nv+5tFfe8YnkUm1fXgvxR0Mm1FoD+QKG-vLNGLyKg@mail.gmail.com
1 parent 5e28b77 commit 1f39a1c

File tree

6 files changed

+109
-75
lines changed

6 files changed

+109
-75
lines changed

src/interfaces/libpq/fe-connect.c

+6
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,10 @@ pqDropServerData(PGconn *conn)
537537
conn->last_sqlstate[0] = '\0';
538538
conn->auth_req_received = false;
539539
conn->password_needed = false;
540+
conn->write_failed = false;
541+
if (conn->write_err_msg)
542+
free(conn->write_err_msg);
543+
conn->write_err_msg = NULL;
540544
conn->be_pid = 0;
541545
conn->be_key = 0;
542546
}
@@ -3702,6 +3706,8 @@ freePGconn(PGconn *conn)
37023706
/* Note that conn->Pfdebug is not ours to close or free */
37033707
if (conn->last_query)
37043708
free(conn->last_query);
3709+
if (conn->write_err_msg)
3710+
free(conn->write_err_msg);
37053711
if (conn->inBuffer)
37063712
free(conn->inBuffer);
37073713
if (conn->outBuffer)

src/interfaces/libpq/fe-exec.c

+54-37
Original file line numberDiff line numberDiff line change
@@ -790,6 +790,32 @@ pqSaveErrorResult(PGconn *conn)
790790
}
791791
}
792792

793+
/*
794+
* As above, and append conn->write_err_msg to whatever other error we have.
795+
* This is used when we've detected a write failure and have exhausted our
796+
* chances of reporting something else instead.
797+
*/
798+
static void
799+
pqSaveWriteError(PGconn *conn)
800+
{
801+
/*
802+
* Ensure conn->result is an error result, and add anything in
803+
* conn->errorMessage to it.
804+
*/
805+
pqSaveErrorResult(conn);
806+
807+
/*
808+
* Now append write_err_msg to that. If it's null because of previous
809+
* strdup failure, do what we can. (It's likely our machinations here are
810+
* all getting OOM failures as well, but ...)
811+
*/
812+
if (conn->write_err_msg && conn->write_err_msg[0] != '\0')
813+
pqCatenateResultError(conn->result, conn->write_err_msg);
814+
else
815+
pqCatenateResultError(conn->result,
816+
libpq_gettext("write to server failed\n"));
817+
}
818+
793819
/*
794820
* This subroutine prepares an async result object for return to the caller.
795821
* If there is not already an async result object, build an error object
@@ -1224,7 +1250,7 @@ PQsendQuery(PGconn *conn, const char *query)
12241250
pqPuts(query, conn) < 0 ||
12251251
pqPutMsgEnd(conn) < 0)
12261252
{
1227-
pqHandleSendFailure(conn);
1253+
/* error message should be set up already */
12281254
return 0;
12291255
}
12301256

@@ -1243,7 +1269,7 @@ PQsendQuery(PGconn *conn, const char *query)
12431269
*/
12441270
if (pqFlush(conn) < 0)
12451271
{
1246-
pqHandleSendFailure(conn);
1272+
/* error message should be set up already */
12471273
return 0;
12481274
}
12491275

@@ -1389,7 +1415,7 @@ PQsendPrepare(PGconn *conn,
13891415
return 1;
13901416

13911417
sendFailed:
1392-
pqHandleSendFailure(conn);
1418+
/* error message should be set up already */
13931419
return 0;
13941420
}
13951421

@@ -1641,39 +1667,10 @@ PQsendQueryGuts(PGconn *conn,
16411667
return 1;
16421668

16431669
sendFailed:
1644-
pqHandleSendFailure(conn);
1670+
/* error message should be set up already */
16451671
return 0;
16461672
}
16471673

1648-
/*
1649-
* pqHandleSendFailure: try to clean up after failure to send command.
1650-
*
1651-
* Primarily, what we want to accomplish here is to process any ERROR or
1652-
* NOTICE messages that the backend might have sent just before it died.
1653-
* Since we're in IDLE state, all such messages will get sent to the notice
1654-
* processor.
1655-
*
1656-
* NOTE: this routine should only be called in PGASYNC_IDLE state.
1657-
*/
1658-
void
1659-
pqHandleSendFailure(PGconn *conn)
1660-
{
1661-
/*
1662-
* Accept and parse any available input data, ignoring I/O errors. Note
1663-
* that if pqReadData decides the backend has closed the channel, it will
1664-
* close our side of the socket --- that's just what we want here.
1665-
*/
1666-
while (pqReadData(conn) > 0)
1667-
parseInput(conn);
1668-
1669-
/*
1670-
* Be sure to parse available input messages even if we read no data.
1671-
* (Note: calling parseInput within the above loop isn't really necessary,
1672-
* but it prevents buffer bloat if there's a lot of data available.)
1673-
*/
1674-
parseInput(conn);
1675-
}
1676-
16771674
/*
16781675
* Select row-by-row processing mode
16791676
*/
@@ -1763,8 +1760,11 @@ PQisBusy(PGconn *conn)
17631760
/* Parse any available data, if our state permits. */
17641761
parseInput(conn);
17651762

1766-
/* PQgetResult will return immediately in all states except BUSY. */
1767-
return conn->asyncStatus == PGASYNC_BUSY;
1763+
/*
1764+
* PQgetResult will return immediately in all states except BUSY, or if we
1765+
* had a write failure.
1766+
*/
1767+
return conn->asyncStatus == PGASYNC_BUSY || conn->write_failed;
17681768
}
17691769

17701770

@@ -1804,7 +1804,13 @@ PQgetResult(PGconn *conn)
18041804
}
18051805
}
18061806

1807-
/* Wait for some more data, and load it. */
1807+
/*
1808+
* Wait for some more data, and load it. (Note: if the connection has
1809+
* been lost, pqWait should return immediately because the socket
1810+
* should be read-ready, either with the last server data or with an
1811+
* EOF indication. We expect therefore that this won't result in any
1812+
* undue delay in reporting a previous write failure.)
1813+
*/
18081814
if (flushResult ||
18091815
pqWait(true, false, conn) ||
18101816
pqReadData(conn) < 0)
@@ -1820,6 +1826,17 @@ PQgetResult(PGconn *conn)
18201826

18211827
/* Parse it. */
18221828
parseInput(conn);
1829+
1830+
/*
1831+
* If we had a write error, but nothing above obtained a query result
1832+
* or detected a read error, report the write error.
1833+
*/
1834+
if (conn->write_failed && conn->asyncStatus == PGASYNC_BUSY)
1835+
{
1836+
pqSaveWriteError(conn);
1837+
conn->asyncStatus = PGASYNC_IDLE;
1838+
return pqPrepareAsyncResult(conn);
1839+
}
18231840
}
18241841

18251842
/* Return the appropriate thing. */
@@ -2252,7 +2269,7 @@ PQsendDescribe(PGconn *conn, char desc_type, const char *desc_target)
22522269
return 1;
22532270

22542271
sendFailed:
2255-
pqHandleSendFailure(conn);
2272+
/* error message should be set up already */
22562273
return 0;
22572274
}
22582275

src/interfaces/libpq/fe-misc.c

+45-8
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,13 @@ pqReadData(PGconn *conn)
824824
*
825825
* Return 0 on success, -1 on failure and 1 when not all data could be sent
826826
* because the socket would block and the connection is non-blocking.
827+
*
828+
* Upon write failure, conn->write_failed is set and the error message is
829+
* saved in conn->write_err_msg, but we clear the output buffer and return
830+
* zero anyway; this is because callers should soldier on until it's possible
831+
* to read from the server and check for an error message. write_err_msg
832+
* should be reported only when we are unable to obtain a server error first.
833+
* (Thus, a -1 result is returned only for an internal *read* failure.)
827834
*/
828835
static int
829836
pqSendSome(PGconn *conn, int len)
@@ -832,13 +839,32 @@ pqSendSome(PGconn *conn, int len)
832839
int remaining = conn->outCount;
833840
int result = 0;
834841

842+
/*
843+
* If we already had a write failure, we will never again try to send data
844+
* on that connection. Even if the kernel would let us, we've probably
845+
* lost message boundary sync with the server. conn->write_failed
846+
* therefore persists until the connection is reset, and we just discard
847+
* all data presented to be written.
848+
*/
849+
if (conn->write_failed)
850+
{
851+
/* conn->write_err_msg should be set up already */
852+
conn->outCount = 0;
853+
return 0;
854+
}
855+
835856
if (conn->sock == PGINVALID_SOCKET)
836857
{
837858
printfPQExpBuffer(&conn->errorMessage,
838859
libpq_gettext("connection not open\n"));
860+
conn->write_failed = true;
861+
/* Transfer error message to conn->write_err_msg, if possible */
862+
/* (strdup failure is OK, we'll cope later) */
863+
conn->write_err_msg = strdup(conn->errorMessage.data);
864+
resetPQExpBuffer(&conn->errorMessage);
839865
/* Discard queued data; no chance it'll ever be sent */
840866
conn->outCount = 0;
841-
return -1;
867+
return 0;
842868
}
843869

844870
/* while there's still data to send */
@@ -876,17 +902,24 @@ pqSendSome(PGconn *conn, int len)
876902

877903
default:
878904
/* pqsecure_write set the error message for us */
905+
conn->write_failed = true;
879906

880907
/*
881-
* We used to close the socket here, but that's a bad idea
882-
* since there might be unread data waiting (typically, a
883-
* NOTICE message from the backend telling us it's
884-
* committing hara-kiri...). Leave the socket open until
885-
* pqReadData finds no more data can be read. But abandon
886-
* attempt to send data.
908+
* Transfer error message to conn->write_err_msg, if
909+
* possible (strdup failure is OK, we'll cope later).
910+
*
911+
* Note: this assumes that pqsecure_write and its children
912+
* will overwrite not append to conn->errorMessage. If
913+
* that's ever changed, we could remember the length of
914+
* conn->errorMessage at entry to this routine, and then
915+
* save and delete just what was appended.
887916
*/
917+
conn->write_err_msg = strdup(conn->errorMessage.data);
918+
resetPQExpBuffer(&conn->errorMessage);
919+
920+
/* Discard queued data; no chance it'll ever be sent */
888921
conn->outCount = 0;
889-
return -1;
922+
return 0;
890923
}
891924
}
892925
else
@@ -921,6 +954,9 @@ pqSendSome(PGconn *conn, int len)
921954
* can do, and works pretty well in practice. (The documentation
922955
* used to say that you only need to wait for write-ready, so
923956
* there are still plenty of applications like that out there.)
957+
*
958+
* Note that errors here don't result in write_failed becoming
959+
* set.
924960
*/
925961
if (pqReadData(conn) < 0)
926962
{
@@ -956,6 +992,7 @@ pqSendSome(PGconn *conn, int len)
956992
*
957993
* Return 0 on success, -1 on failure and 1 when not all data could be sent
958994
* because the socket would block and the connection is non-blocking.
995+
* (See pqSendSome comments about how failure should be handled.)
959996
*/
960997
int
961998
pqFlush(PGconn *conn)

src/interfaces/libpq/fe-protocol2.c

+1-13
Original file line numberDiff line numberDiff line change
@@ -1450,42 +1450,30 @@ pqFunctionCall2(PGconn *conn, Oid fnid,
14501450
pqPutInt(fnid, 4, conn) != 0 || /* function id */
14511451
pqPutInt(nargs, 4, conn) != 0) /* # of args */
14521452
{
1453-
pqHandleSendFailure(conn);
1453+
/* error message should be set up already */
14541454
return NULL;
14551455
}
14561456

14571457
for (i = 0; i < nargs; ++i)
14581458
{ /* len.int4 + contents */
14591459
if (pqPutInt(args[i].len, 4, conn))
1460-
{
1461-
pqHandleSendFailure(conn);
14621460
return NULL;
1463-
}
14641461

14651462
if (args[i].isint)
14661463
{
14671464
if (pqPutInt(args[i].u.integer, 4, conn))
1468-
{
1469-
pqHandleSendFailure(conn);
14701465
return NULL;
1471-
}
14721466
}
14731467
else
14741468
{
14751469
if (pqPutnchar((char *) args[i].u.ptr, args[i].len, conn))
1476-
{
1477-
pqHandleSendFailure(conn);
14781470
return NULL;
1479-
}
14801471
}
14811472
}
14821473

14831474
if (pqPutMsgEnd(conn) < 0 ||
14841475
pqFlush(conn))
1485-
{
1486-
pqHandleSendFailure(conn);
14871476
return NULL;
1488-
}
14891477

14901478
for (;;)
14911479
{

src/interfaces/libpq/fe-protocol3.c

+1-16
Original file line numberDiff line numberDiff line change
@@ -1926,50 +1926,35 @@ pqFunctionCall3(PGconn *conn, Oid fnid,
19261926
pqPutInt(1, 2, conn) < 0 || /* format code: BINARY */
19271927
pqPutInt(nargs, 2, conn) < 0) /* # of args */
19281928
{
1929-
pqHandleSendFailure(conn);
1929+
/* error message should be set up already */
19301930
return NULL;
19311931
}
19321932

19331933
for (i = 0; i < nargs; ++i)
19341934
{ /* len.int4 + contents */
19351935
if (pqPutInt(args[i].len, 4, conn))
1936-
{
1937-
pqHandleSendFailure(conn);
19381936
return NULL;
1939-
}
19401937
if (args[i].len == -1)
19411938
continue; /* it's NULL */
19421939

19431940
if (args[i].isint)
19441941
{
19451942
if (pqPutInt(args[i].u.integer, args[i].len, conn))
1946-
{
1947-
pqHandleSendFailure(conn);
19481943
return NULL;
1949-
}
19501944
}
19511945
else
19521946
{
19531947
if (pqPutnchar((char *) args[i].u.ptr, args[i].len, conn))
1954-
{
1955-
pqHandleSendFailure(conn);
19561948
return NULL;
1957-
}
19581949
}
19591950
}
19601951

19611952
if (pqPutInt(1, 2, conn) < 0) /* result format code: BINARY */
1962-
{
1963-
pqHandleSendFailure(conn);
19641953
return NULL;
1965-
}
19661954

19671955
if (pqPutMsgEnd(conn) < 0 ||
19681956
pqFlush(conn))
1969-
{
1970-
pqHandleSendFailure(conn);
19711957
return NULL;
1972-
}
19731958

19741959
for (;;)
19751960
{

src/interfaces/libpq/libpq-int.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,8 @@ struct pg_conn
410410
bool password_needed; /* true if server demanded a password */
411411
bool sigpipe_so; /* have we masked SIGPIPE via SO_NOSIGPIPE? */
412412
bool sigpipe_flag; /* can we mask SIGPIPE via MSG_NOSIGNAL? */
413+
bool write_failed; /* have we had a write failure on sock? */
414+
char *write_err_msg; /* write error message, or NULL if OOM */
413415

414416
/* Transient state needed while establishing connection */
415417
bool try_next_addr; /* time to advance to next address/host? */
@@ -585,7 +587,6 @@ extern void pqSaveMessageField(PGresult *res, char code,
585587
extern void pqSaveParameterStatus(PGconn *conn, const char *name,
586588
const char *value);
587589
extern int pqRowProcessor(PGconn *conn, const char **errmsgp);
588-
extern void pqHandleSendFailure(PGconn *conn);
589590

590591
/* === in fe-protocol2.c === */
591592

0 commit comments

Comments
 (0)