From ea4b916b08aa05d67405f4cffa9dc7a6708ac507 Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Wed, 29 Sep 2021 13:13:40 +0200 Subject: [PATCH v4] Quick exit on log stream child exit in pg_basebackup If the log streaming child process (thread on Windows) dies during backup then the whole backup will be aborted at the end of the backup. Instead, trap ungraceful termination of the log streaming child and exit early. Reviewed-by: Bharath Rupireddy Reviewed-by: Magnus Hagander Discussion: https://postgr.es/m/0F69E282-97F9-4DB7-8D6D-F927AA6340C8@yesql.se Discussion: https://postgr.es/m/VI1PR83MB0189818B82C19059CB62E26199A89@VI1PR83MB0189.EURPRD83.prod.outlook.com --- src/bin/pg_basebackup/pg_basebackup.c | 47 +++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 669aa207a3..74aa5e9e4d 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -174,6 +174,8 @@ static int bgpipe[2] = {-1, -1}; /* Handle to child process */ static pid_t bgchild = -1; static bool in_log_streamer = false; +/* Flag to indicate if child process exited unexpectedly */ +static volatile sig_atomic_t bgchild_exited = false; /* End position for xlog streaming, empty string if unknown yet */ static XLogRecPtr xlogendptr; @@ -268,6 +270,18 @@ disconnect_atexit(void) } #ifndef WIN32 +/* + * If the bgchild exits prematurely and raises a SIGCHLD signal, we can abort + * processing rather than wait until the backup has finished and error out at + * that time. On Windows, we use a background thread which can communicate + * without the need for a signal handler. + */ +static void +sigchld_handler(SIGNAL_ARGS) +{ + bgchild_exited = true; +} + /* * On windows, our background thread dies along with the process. But on * Unix, if we have started a subprocess, we want to kill it off so it @@ -276,7 +290,7 @@ disconnect_atexit(void) static void kill_bgchild_atexit(void) { - if (bgchild > 0) + if (bgchild > 0 && !bgchild_exited) kill(bgchild, SIGTERM); } #endif @@ -562,17 +576,28 @@ LogStreamerMain(logstreamer_param *param) stream.do_sync); if (!ReceiveXlogStream(param->bgconn, &stream)) - + { /* * Any errors will already have been reported in the function process, * but we need to tell the parent that we didn't shutdown in a nice * way. */ +#ifdef WIN32 + /* + * In order to signal the main thread of an ungraceful exit we + * set the same flag that we use on Unix to signal SIGCHLD. + */ + bgchild_exited = true; +#endif return 1; + } if (!stream.walmethod->finish()) { pg_log_error("could not finish writing WAL files: %m"); +#ifdef WIN32 + bgchild_exited = true; +#endif return 1; } @@ -980,6 +1005,12 @@ ReceiveCopyData(PGconn *conn, WriteDataCallback callback, exit(1); } + if (bgchild_exited) + { + pg_log_error("background process terminated unexpectedly"); + exit(1); + } + (*callback) (r, copybuf, callback_data); PQfreemem(copybuf); @@ -2595,6 +2626,18 @@ main(int argc, char **argv) } atexit(disconnect_atexit); +#ifndef WIN32 + /* + * Trap SIGCHLD to be able to handle the WAL stream process exiting. There + * is no SIGCHLD on Windows, there we rely on the background thread setting + * the signal variable on unexpected but graceful exit. If the WAL stream + * thread crashes on Windows it will bring down the entire process as it's + * a thread, so there is nothing to catch should that happen. A crash on + * UNIX will be caught by the signal handler. + */ + pqsignal(SIGCHLD, sigchld_handler); +#endif + /* * Set umask so that directories/files are created with the same * permissions as directories/files in the source data directory. -- 2.24.3 (Apple Git-128)