From 8d7bc2a2592974c038dce18499fa6cc975ed671d Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 14 Aug 2025 13:45:44 +0300 Subject: [PATCH v4 2/3] libpq: Handle OOMs by disconnecting instead of silently skipping messages As noted in the previous commit, silently ignoring async notifications is not great. Our options for reporting errors are limited, but it seems better to terminate the connection than try to soldier on. Applications should handle connection loss gracefully, whereas silently missing a notification could cause much weirder problems. Similarly, if we run out of memory while saving a received ParameterStatus or cancellation key, disconnect. This also changes the error message on OOM while expanding the input buffer. It used to report "cannot allocate memory for input buffer", followed by "lost synchronization with server: got message type ...". The "lost synchronization" message seems unnecessary, so remove that and report only "cannot allocate memory for input buffer". (The comment speculated that the out of memory could indeed be caused by loss of sync, but that seems highly unlikely.) Discussion: https://www.postgresql.org/message-id/df892f9f-5923-4046-9d6f-8c48d8980b50@iki.fi --- src/interfaces/libpq/fe-exec.c | 13 +++- src/interfaces/libpq/fe-protocol3.c | 116 ++++++++++++++++++---------- src/interfaces/libpq/libpq-int.h | 2 +- 3 files changed, 88 insertions(+), 43 deletions(-) diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 4256ae5c0cc..0b1e37ec30b 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -1076,8 +1076,12 @@ pqSaveMessageField(PGresult *res, char code, const char *value) /* * pqSaveParameterStatus - remember parameter status sent by backend + * + * Returns 1 on success, 0 on out-of-memory. (Note that on out-of-memory, we + * have already released the old value of the parameter, if any. The only + * really safe way to recover is to terminate the connection.) */ -void +int pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) { pgParameterStatus *pstatus; @@ -1119,6 +1123,11 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) pstatus->next = conn->pstatus; conn->pstatus = pstatus; } + else + { + /* out of memory */ + return 0; + } /* * Save values of settings that are of interest to libpq in fields of the @@ -1190,6 +1199,8 @@ pqSaveParameterStatus(PGconn *conn, const char *name, const char *value) { conn->scram_sha_256_iterations = atoi(value); } + + return 1; } diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index 5683229e32e..0cca832c06a 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -43,6 +43,7 @@ (id) == PqMsg_RowDescription) +static void handleFatalError(PGconn *conn); static void handleSyncLoss(PGconn *conn, char id, int msgLength); static int getRowDescriptions(PGconn *conn, int msgLength); static int getParamDescriptions(PGconn *conn, int msgLength); @@ -120,12 +121,12 @@ pqParseInput3(PGconn *conn) conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); } return; } @@ -456,6 +457,11 @@ pqParseInput3(PGconn *conn) /* Normal case: parsing agrees with specified length */ pqParseDone(conn, conn->inCursor); } + else if (conn->error_result && conn->status == CONNECTION_BAD) + { + /* The connection was abandoned and we already reported it */ + return; + } else { /* Trouble --- report it */ @@ -470,15 +476,14 @@ pqParseInput3(PGconn *conn) } /* - * handleSyncLoss: clean up after loss of message-boundary sync + * handleFatalError: clean up after a nonrecoverable error * - * There isn't really a lot we can do here except abandon the connection. + * This is for errors where we need to abandon the connection. The caller has + * already saved the error message in conn->errorMessage. */ static void -handleSyncLoss(PGconn *conn, char id, int msgLength) +handleFatalError(PGconn *conn) { - libpq_append_conn_error(conn, "lost synchronization with server: got message type \"%c\", length %d", - id, msgLength); /* build an error result holding the error message */ pqSaveErrorResult(conn); conn->asyncStatus = PGASYNC_READY; /* drop out of PQgetResult wait loop */ @@ -487,6 +492,19 @@ handleSyncLoss(PGconn *conn, char id, int msgLength) conn->status = CONNECTION_BAD; /* No more connection to backend */ } +/* + * handleSyncLoss: clean up after loss of message-boundary sync + * + * There isn't really a lot we can do here except abandon the connection. + */ +static void +handleSyncLoss(PGconn *conn, char id, int msgLength) +{ + libpq_append_conn_error(conn, "lost synchronization with server: got message type \"%c\", length %d", + id, msgLength); + handleFatalError(conn); +} + /* * parseInput subroutine to read a 'T' (row descriptions) message. * We'll build a new PGresult structure (unless called for a Describe @@ -1519,7 +1537,11 @@ getParameterStatus(PGconn *conn) return EOF; } /* And save it */ - pqSaveParameterStatus(conn, conn->workBuffer.data, valueBuf.data); + if (!pqSaveParameterStatus(conn, conn->workBuffer.data, valueBuf.data)) + { + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + } termPQExpBuffer(&valueBuf); return 0; } @@ -1550,7 +1572,8 @@ getBackendKeyData(PGconn *conn, int msgLength) conn->be_cancel_key = malloc(cancel_key_len); if (conn->be_cancel_key == NULL) { - /* continue without cancel key */ + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); return 0; } if (pqGetnchar(conn->be_cancel_key, cancel_key_len, conn)) @@ -1587,6 +1610,18 @@ getNotify(PGconn *conn) return EOF; /* must save name while getting extra string */ svname = strdup(conn->workBuffer.data); + if (!svname) + { + /* + * Notify messages can arrive at any state, so we cannot associate the + * error with any particular query. There's no way to return back an + * "async error", so the best we can do is drop the connection. That + * seems better than silently ignoring the notification. + */ + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + return 0; + } if (pqGets(&conn->workBuffer, conn)) { if (svname) @@ -1594,12 +1629,6 @@ getNotify(PGconn *conn) return EOF; } - if (!svname) - { - /* out of memory; silently ignore the notification */ - return 0; - } - /* * Store the strings right after the PGnotify structure so it can all be * freed at once. We don't use NAMEDATALEN because we don't want to tie @@ -1608,21 +1637,26 @@ getNotify(PGconn *conn) nmlen = strlen(svname); extralen = strlen(conn->workBuffer.data); newNotify = (PGnotify *) malloc(sizeof(PGnotify) + nmlen + extralen + 2); - if (newNotify) - { - newNotify->relname = (char *) newNotify + sizeof(PGnotify); - strcpy(newNotify->relname, svname); - newNotify->extra = newNotify->relname + nmlen + 1; - strcpy(newNotify->extra, conn->workBuffer.data); - newNotify->be_pid = be_pid; - newNotify->next = NULL; - if (conn->notifyTail) - conn->notifyTail->next = newNotify; - else - conn->notifyHead = newNotify; - conn->notifyTail = newNotify; + if (!newNotify) + { + free(svname); + libpq_append_conn_error(conn, "out of memory"); + handleFatalError(conn); + return 0; } + newNotify->relname = (char *) newNotify + sizeof(PGnotify); + strcpy(newNotify->relname, svname); + newNotify->extra = newNotify->relname + nmlen + 1; + strcpy(newNotify->extra, conn->workBuffer.data); + newNotify->be_pid = be_pid; + newNotify->next = NULL; + if (conn->notifyTail) + conn->notifyTail->next = newNotify; + else + conn->notifyHead = newNotify; + conn->notifyTail = newNotify; + free(svname); return 0; } @@ -1756,12 +1790,12 @@ getCopyDataMessage(PGconn *conn) conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); return -2; } return 0; @@ -2190,12 +2224,12 @@ pqFunctionCall3(PGconn *conn, Oid fnid, conn)) { /* - * XXX add some better recovery code... plan is to skip over - * the message using its length, then report an error. For the - * moment, just treat this like loss of sync (which indeed it - * might be!) + * Abandon the connection. There's not much else we can + * safely do; we can't just ignore the message or we could + * miss important changes to the connection state. + * pqCheckInBufferSpace() already reported the error. */ - handleSyncLoss(conn, id, msgLength); + handleFatalError(conn); break; } continue; diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index a701c25038a..02c114f1405 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -746,7 +746,7 @@ extern PGresult *pqPrepareAsyncResult(PGconn *conn); extern void pqInternalNotice(const PGNoticeHooks *hooks, const char *fmt,...) pg_attribute_printf(2, 3); extern void pqSaveMessageField(PGresult *res, char code, const char *value); -extern void pqSaveParameterStatus(PGconn *conn, const char *name, +extern int pqSaveParameterStatus(PGconn *conn, const char *name, const char *value); extern int pqRowProcessor(PGconn *conn, const char **errmsgp); extern void pqCommandQueueAdvance(PGconn *conn, bool isReadyForQuery, -- 2.39.5