From d57726134ad51b794b6db31d5824f21f6fd40214 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Mon, 1 Mar 2021 18:08:23 +1300 Subject: [PATCH v6 1/2] Detect dropped connections while running queries. Provide a new optional GUC that can be used to check whether the client connection has gone away periodically while running very long queries. Author: Sergey Cherkashin Author: Thomas Munro Reviewed-by: Thomas Munro Reviewed-by: Tatsuo Ishii Reviewed-by: Konstantin Knizhnik Reviewed-by: Zhihong Yu Reviewed-by: Tom Lane (much earlier version) Discussion: https://postgr.es/m/77def86b27e41f0efcba411460e929ae%40postgrespro.ru --- doc/src/sgml/config.sgml | 36 +++++++++++++ src/backend/libpq/pqcomm.c | 52 +++++++++++++++++++ src/backend/tcop/postgres.c | 27 ++++++++++ src/backend/utils/init/globals.c | 1 + src/backend/utils/init/postinit.c | 11 ++++ src/backend/utils/misc/guc.c | 10 ++++ src/backend/utils/misc/postgresql.conf.sample | 3 ++ src/include/libpq/libpq.h | 2 + src/include/miscadmin.h | 1 + src/include/utils/timeout.h | 1 + 10 files changed, 144 insertions(+) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 5679b40dd5..5cd0d38dbf 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -998,6 +998,42 @@ include_dir 'conf.d' + + client_connection_check_interval (integer) + + client_connection_check_interval configuration parameter + + + + + Sets the time interval between checks that the client is still + connected, while running queries. The check is performed by testing + whether one byte could be read from the socket with + MSG_PEEK. If the kernel reports that the connection + has been closed or lost, a long running query can abort immediately, + rather than discovering the problem when it eventually tries to send + the response. + + + If this value is specified without units, it is taken as milliseconds. + The default value is 0, which disables connection + checks. Without connection checks, the server will detect the loss of + the connection only when it is waiting for a new request, receiving a + request or sending a response. + + + For the kernel itself to detect lost TCP connections reliably and + within a known timeframe in all scenarios including network failure, it + may also be necessary to adjust the default TCP keepalive settings of + the operating system, or the + , + and + settings of + PostgreSQL. + + + + diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c index 4c7b1e7bfd..74b309a1ef 100644 --- a/src/backend/libpq/pqcomm.c +++ b/src/backend/libpq/pqcomm.c @@ -79,6 +79,7 @@ #include "storage/ipc.h" #include "utils/guc.h" #include "utils/memutils.h" +#include "utils/timeout.h" /* * Cope with the various platform-specific ways to spell TCP keepalive socket @@ -104,6 +105,7 @@ */ int Unix_socket_permissions; char *Unix_socket_group; +int client_connection_check_interval; /* Where the Unix socket files are (list of palloc'd strings) */ static List *sock_paths = NIL; @@ -1921,3 +1923,53 @@ pq_settcpusertimeout(int timeout, Port *port) return STATUS_OK; } + +/* -------------------------------- + * pq_check_client_connection - is the client still connected? + * -------------------------------- + */ +bool +pq_check_client_connection(void) +{ + bool connected = true; + char nextbyte; + int r; + +retry: +#ifdef WIN32 + pgwin32_noblock = 1; +#endif + r = recv(MyProcPort->sock, &nextbyte, 1, MSG_PEEK); +#ifdef WIN32 + pgwin32_noblock = 0; +#endif + + if (r == 0) + { + /* EOF detected. */ + connected = false; + } + else if (r > 0) + { + /* Data available to read. Connection looks good. */ + } + else if (errno == EINTR) + { + /* Interrupted by a signal, so retry. */ + goto retry; + } + else if (errno == EAGAIN || errno == EWOULDBLOCK) + { + /* No data available to read. Connection looks good. */ + } + else + { + /* Got some other error. We'd better log the reason. */ + ereport(COMMERROR, + (errcode(ERRCODE_CONNECTION_EXCEPTION), + errmsg("could not check client connection: %m"))); + connected = false; + } + + return connected; +} diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 2b1b68109f..44fb7b7c30 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -2671,6 +2671,14 @@ start_xact_command(void) * not desired, the timeout has to be disabled explicitly. */ enable_statement_timeout(); + + /* Start timeout for checking if the client has gone away if necessary. */ + if (client_connection_check_interval > 0 && + IsUnderPostmaster && + MyProcPort && + !get_timeout_active(CLIENT_CONNECTION_CHECK_TIMEOUT)) + enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT, + client_connection_check_interval); } static void @@ -3149,6 +3157,25 @@ ProcessInterrupts(void) (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("terminating connection due to administrator command"))); } + if (CheckClientConnectionPending) + { + CheckClientConnectionPending = false; + + /* + * If we're idle or reading a command, we can skip the explicit check + * for a lost connection (if configured), because that'll be detected + * by socket I/O routines, and a new check will be rescheduled if + * necessary when the command runs. We don't want needless wakeups of + * idle sessions. + */ + if (!DoingCommandRead && client_connection_check_interval > 0) + { + if (!pq_check_client_connection()) + ClientConnectionLost = true; + enable_timeout_after(CLIENT_CONNECTION_CHECK_TIMEOUT, + client_connection_check_interval); + } + } if (ClientConnectionLost) { QueryCancelPending = false; /* lost connection trumps QueryCancel */ diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 73e0a672ae..a9f0fc3017 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -30,6 +30,7 @@ ProtocolVersion FrontendProtocol; volatile sig_atomic_t InterruptPending = false; volatile sig_atomic_t QueryCancelPending = false; volatile sig_atomic_t ProcDiePending = false; +volatile sig_atomic_t CheckClientConnectionPending = false; volatile sig_atomic_t ClientConnectionLost = false; volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false; volatile sig_atomic_t IdleSessionTimeoutPending = false; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 7abeccb536..0bac23e75d 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -34,6 +34,7 @@ #include "catalog/pg_db_role_setting.h" #include "catalog/pg_tablespace.h" #include "libpq/auth.h" +#include "libpq/libpq.h" #include "libpq/libpq-be.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -73,6 +74,7 @@ static void StatementTimeoutHandler(void); static void LockTimeoutHandler(void); static void IdleInTransactionSessionTimeoutHandler(void); static void IdleSessionTimeoutHandler(void); +static void ClientCheckTimeoutHandler(void); static bool ThereIsAtLeastOneRole(void); static void process_startup_options(Port *port, bool am_superuser); static void process_settings(Oid databaseid, Oid roleid); @@ -620,6 +622,7 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, RegisterTimeout(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IdleInTransactionSessionTimeoutHandler); RegisterTimeout(IDLE_SESSION_TIMEOUT, IdleSessionTimeoutHandler); + RegisterTimeout(CLIENT_CONNECTION_CHECK_TIMEOUT, ClientCheckTimeoutHandler); } /* @@ -1242,6 +1245,14 @@ IdleSessionTimeoutHandler(void) SetLatch(MyLatch); } +static void +ClientCheckTimeoutHandler(void) +{ + CheckClientConnectionPending = true; + InterruptPending = true; + SetLatch(MyLatch); +} + /* * Returns true if at least one role is defined in this database cluster. */ diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 3b36a31a47..391d9983e0 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3483,6 +3483,16 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"client_connection_check_interval", PGC_USERSET, CLIENT_CONN_OTHER, + gettext_noop("Sets the time interval for checking connection with the client."), + gettext_noop("A value of 0 disables this feature."), + GUC_UNIT_MS + }, + &client_connection_check_interval, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 86425965d0..dd850bf272 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -718,6 +718,9 @@ #dynamic_library_path = '$libdir' +#client_connection_check_interval = 0 # set time interval between + # checks for client disconnection while + # running long queries; 0 for never #------------------------------------------------------------------------------ # LOCK MANAGEMENT diff --git a/src/include/libpq/libpq.h b/src/include/libpq/libpq.h index b20deeb555..3bd97c4e93 100644 --- a/src/include/libpq/libpq.h +++ b/src/include/libpq/libpq.h @@ -71,6 +71,7 @@ extern int pq_getbyte(void); extern int pq_peekbyte(void); extern int pq_getbyte_if_available(unsigned char *c); extern int pq_putmessage_v2(char msgtype, const char *s, size_t len); +extern bool pq_check_client_connection(void); /* * prototypes for functions in be-secure.c @@ -109,6 +110,7 @@ extern ssize_t secure_open_gssapi(Port *port); extern char *SSLCipherSuites; extern char *SSLECDHCurve; extern bool SSLPreferServerCiphers; +extern int client_connection_check_interval; extern int ssl_min_protocol_version; extern int ssl_max_protocol_version; diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 013850ac28..6f8251e0b0 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -85,6 +85,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending; +extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending; extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost; /* these are marked volatile because they are examined by signal handlers: */ diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h index ecb2a366a5..93e6a691b3 100644 --- a/src/include/utils/timeout.h +++ b/src/include/utils/timeout.h @@ -32,6 +32,7 @@ typedef enum TimeoutId STANDBY_LOCK_TIMEOUT, IDLE_IN_TRANSACTION_SESSION_TIMEOUT, IDLE_SESSION_TIMEOUT, + CLIENT_CONNECTION_CHECK_TIMEOUT, /* First user-definable timeout reason */ USER_TIMEOUT, /* Maximum number of timeout reasons */ -- 2.30.1