Re: Parallel worker hangs while handling errors. - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Parallel worker hangs while handling errors.
Date
Msg-id 644875.1599933441@sss.pgh.pa.us
Whole thread Raw
In response to Re: Parallel worker hangs while handling errors.  (Robert Haas <robertmhaas@gmail.com>)
List pgsql-hackers
Robert Haas <robertmhaas@gmail.com> writes:
> On Fri, Sep 11, 2020 at 4:20 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> It's not clear to me whether we want to institute the "accepting SIGQUIT
>> is always okay" rule in processes that didn't already have code to change
>> BlockSig.

> I think a backend process that isn't timely handling SIGQUIT is by
> that very fact buggy. "pg_ctl stop -mi" isn't a friendly suggestion.
> So I favor trying to make it uniform.

Well, if we want to take a hard line about that, we should centralize
the setup of SIGQUIT.  The attached makes InitPostmasterChild do it,
and removes the duplicative code from elsewhere.

I also flipped autovacuum and walsender from using quickdie to using
SignalHandlerForCrashExit.  Whatever you think about the safety or
unsafety of quickdie, there seems no reason for autovacuum to be trying
to tell its nonexistent client about a shutdown.  I don't think it's
terribly useful for a walsender either, though maybe somebody has a
different opinion about that?

            regards, tom lane

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 19ba26b914..2cef56f115 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -454,8 +454,8 @@ AutoVacLauncherMain(int argc, char *argv[])
     pqsignal(SIGHUP, SignalHandlerForConfigReload);
     pqsignal(SIGINT, StatementCancelHandler);
     pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */

-    pqsignal(SIGQUIT, quickdie);
     InitializeTimeouts();        /* establishes SIGALRM handler */

     pqsignal(SIGPIPE, SIG_IGN);
@@ -498,9 +498,10 @@ AutoVacLauncherMain(int argc, char *argv[])
      *
      * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask
      * (to wit, BlockSig) will be restored when longjmp'ing to here.  Thus,
-     * signals will be blocked until we complete error recovery.  It might
-     * seem that this policy makes the HOLD_INTERRUPTS() call redundant, but
-     * it is not since InterruptPending might be set already.
+     * signals other than SIGQUIT will be blocked until we complete error
+     * recovery.  It might seem that this policy makes the HOLD_INTERRUPTS()
+     * call redundant, but it is not since InterruptPending might be set
+     * already.
      */
     if (sigsetjmp(local_sigjmp_buf, 1) != 0)
     {
@@ -1531,7 +1532,8 @@ AutoVacWorkerMain(int argc, char *argv[])
      */
     pqsignal(SIGINT, StatementCancelHandler);
     pqsignal(SIGTERM, die);
-    pqsignal(SIGQUIT, quickdie);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
+
     InitializeTimeouts();        /* establishes SIGALRM handler */

     pqsignal(SIGPIPE, SIG_IGN);
@@ -1562,9 +1564,9 @@ AutoVacWorkerMain(int argc, char *argv[])
      *
      * Note that we use sigsetjmp(..., 1), so that the prevailing signal mask
      * (to wit, BlockSig) will be restored when longjmp'ing to here.  Thus,
-     * signals will be blocked until we exit.  It might seem that this policy
-     * makes the HOLD_INTERRUPTS() call redundant, but it is not since
-     * InterruptPending might be set already.
+     * signals other than SIGQUIT will be blocked until we exit.  It might
+     * seem that this policy makes the HOLD_INTERRUPTS() call redundant, but
+     * it is not since InterruptPending might be set already.
      */
     if (sigsetjmp(local_sigjmp_buf, 1) != 0)
     {
diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c
index d043ced686..5a9a0e3435 100644
--- a/src/backend/postmaster/bgworker.c
+++ b/src/backend/postmaster/bgworker.c
@@ -731,9 +731,9 @@ StartBackgroundWorker(void)
         pqsignal(SIGFPE, SIG_IGN);
     }
     pqsignal(SIGTERM, bgworker_die);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     pqsignal(SIGHUP, SIG_IGN);

-    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
     InitializeTimeouts();        /* establishes SIGALRM handler */

     pqsignal(SIGPIPE, SIG_IGN);
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index c96568149f..a7afa758b6 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -104,7 +104,7 @@ BackgroundWriterMain(void)
     pqsignal(SIGHUP, SignalHandlerForConfigReload);
     pqsignal(SIGINT, SIG_IGN);
     pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
-    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     pqsignal(SIGALRM, SIG_IGN);
     pqsignal(SIGPIPE, SIG_IGN);
     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
@@ -115,10 +115,6 @@ BackgroundWriterMain(void)
      */
     pqsignal(SIGCHLD, SIG_DFL);

-    /* We allow SIGQUIT (SignalHandlerForCrashExit) at all times */
-    sigdelset(&BlockSig, SIGQUIT);
-    PG_SETMASK(&BlockSig);
-
     /*
      * We just started, assume there has been either a shutdown or
      * end-of-recovery snapshot.
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 45f5deca72..3e7dcd4f76 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -198,7 +198,7 @@ CheckpointerMain(void)
     pqsignal(SIGHUP, SignalHandlerForConfigReload);
     pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */
     pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
-    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     pqsignal(SIGALRM, SIG_IGN);
     pqsignal(SIGPIPE, SIG_IGN);
     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
@@ -209,10 +209,6 @@ CheckpointerMain(void)
      */
     pqsignal(SIGCHLD, SIG_DFL);

-    /* We allow SIGQUIT (SignalHandlerForCrashExit) at all times */
-    sigdelset(&BlockSig, SIGQUIT);
-    PG_SETMASK(&BlockSig);
-
     /*
      * Initialize so that first time-driven event happens at the correct time.
      */
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 37be0e2bbb..ed1b65358d 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -228,7 +228,7 @@ PgArchiverMain(int argc, char *argv[])
     pqsignal(SIGHUP, SignalHandlerForConfigReload);
     pqsignal(SIGINT, SIG_IGN);
     pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
-    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     pqsignal(SIGALRM, SIG_IGN);
     pqsignal(SIGPIPE, SIG_IGN);
     pqsignal(SIGUSR1, pgarch_waken);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 3cd6fa30eb..959e3b8873 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -4355,7 +4355,7 @@ BackendInitialize(Port *port)
      * cleaned up.
      */
     pqsignal(SIGTERM, process_startup_packet_die);
-    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     InitializeTimeouts();        /* establishes SIGALRM handler */
     PG_SETMASK(&StartupBlockSig);

@@ -4435,7 +4435,7 @@ BackendInitialize(Port *port)
     status = ProcessStartupPacket(port, false, false);

     /*
-     * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
+     * Disable the timeout, and prevent SIGTERM again.
      */
     disable_timeout(STARTUP_PACKET_TIMEOUT, false);
     PG_SETMASK(&BlockSig);
@@ -4983,10 +4983,6 @@ SubPostmasterMain(int argc, char *argv[])
     if (strcmp(argv[1], "--forkavworker") == 0)
         AutovacuumWorkerIAm();

-    /* In EXEC_BACKEND case we will not have inherited these settings */
-    pqinitmask();
-    PG_SETMASK(&BlockSig);
-
     /* Read in remaining GUC variables */
     read_nondefault_variables();

diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index fd9ac35dac..64af7b8707 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -175,7 +175,7 @@ StartupProcessMain(void)
     pqsignal(SIGHUP, StartupProcSigHupHandler); /* reload config file */
     pqsignal(SIGINT, SIG_IGN);    /* ignore query cancel */
     pqsignal(SIGTERM, StartupProcShutdownHandler);    /* request shutdown */
-    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     InitializeTimeouts();        /* establishes SIGALRM handler */
     pqsignal(SIGPIPE, SIG_IGN);
     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 358c0916ac..a52832fe90 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -101,7 +101,7 @@ WalWriterMain(void)
     pqsignal(SIGHUP, SignalHandlerForConfigReload);
     pqsignal(SIGINT, SignalHandlerForShutdownRequest);
     pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
-    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     pqsignal(SIGALRM, SIG_IGN);
     pqsignal(SIGPIPE, SIG_IGN);
     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
@@ -112,10 +112,6 @@ WalWriterMain(void)
      */
     pqsignal(SIGCHLD, SIG_DFL);

-    /* We allow SIGQUIT (SignalHandlerForCrashExit) at all times */
-    sigdelset(&BlockSig, SIGQUIT);
-    PG_SETMASK(&BlockSig);
-
     /*
      * Create a memory context that we will do all our work in.  We do this so
      * that we can reset the context during error recovery and thereby avoid
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index b180598507..17f1a49f87 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -270,7 +270,7 @@ WalReceiverMain(void)
     pqsignal(SIGHUP, WalRcvSigHupHandler);    /* set flag to read config file */
     pqsignal(SIGINT, SIG_IGN);
     pqsignal(SIGTERM, WalRcvShutdownHandler);    /* request shutdown */
-    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     pqsignal(SIGALRM, SIG_IGN);
     pqsignal(SIGPIPE, SIG_IGN);
     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
@@ -279,10 +279,6 @@ WalReceiverMain(void)
     /* Reset some signals that are accepted by postmaster but not here */
     pqsignal(SIGCHLD, SIG_DFL);

-    /* We allow SIGQUIT (SignalHandlerForCrashExit) at all times */
-    sigdelset(&BlockSig, SIGQUIT);
-    PG_SETMASK(&BlockSig);
-
     /* Load the libpq-specific functions */
     load_file("libpqwalreceiver", false);
     if (WalReceiverFunctions == NULL)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 3f756b470a..5e6ae6fde5 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -3035,7 +3035,7 @@ WalSndSignals(void)
     pqsignal(SIGHUP, SignalHandlerForConfigReload);
     pqsignal(SIGINT, StatementCancelHandler);    /* query cancel */
     pqsignal(SIGTERM, die);        /* request shutdown */
-    pqsignal(SIGQUIT, quickdie);    /* hard crash time */
+    /* SIGQUIT handler was already set up by InitPostmasterChild */
     InitializeTimeouts();        /* establishes SIGALRM handler */
     pqsignal(SIGPIPE, SIG_IGN);
     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index c9424f167c..411cfadbff 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3820,7 +3820,8 @@ PostgresMain(int argc, char *argv[],
     }

     /*
-     * Set up signal handlers and masks.
+     * Set up signal handlers.  (InitPostmasterChild or InitStandaloneProcess
+     * has already set up BlockSig and made that the active signal mask.)
      *
      * Note that postmaster blocked all signals before forking child process,
      * so there is no race condition whereby we might receive a signal before
@@ -3842,6 +3843,9 @@ PostgresMain(int argc, char *argv[],
         pqsignal(SIGTERM, die); /* cancel current query and exit */

         /*
+         * In a postmaster child backend, replace SignalHandlerForCrashExit
+         * with quickdie, so we can tell the client we're dying.
+         *
          * In a standalone backend, SIGQUIT can be generated from the keyboard
          * easily, while SIGTERM cannot, so we make both signals do die()
          * rather than quickdie().
@@ -3871,16 +3875,6 @@ PostgresMain(int argc, char *argv[],
                                      * platforms */
     }

-    pqinitmask();
-
-    if (IsUnderPostmaster)
-    {
-        /* We allow SIGQUIT (quickdie) at all times */
-        sigdelset(&BlockSig, SIGQUIT);
-    }
-
-    PG_SETMASK(&BlockSig);        /* block everything except SIGQUIT */
-
     if (!IsUnderPostmaster)
     {
         /*
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index cf8f9579c3..ed2ab4b5b2 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -32,10 +32,12 @@
 #include "catalog/pg_authid.h"
 #include "common/file_perm.h"
 #include "libpq/libpq.h"
+#include "libpq/pqsignal.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "postmaster/interrupt.h"
 #include "postmaster/postmaster.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
@@ -133,6 +135,23 @@ InitPostmasterChild(void)
         elog(FATAL, "setsid() failed: %m");
 #endif

+    /* In EXEC_BACKEND case we will not have inherited BlockSig etc values */
+#ifdef EXEC_BACKEND
+    pqinitmask();
+#endif
+
+    /*
+     * Every postmaster child process is expected to respond promptly to
+     * SIGQUIT at all times.  Therefore we centrally remove SIGQUIT from
+     * BlockSig and install a suitable signal handler.  (Client-facing
+     * processes may choose to replace this default choice of handler with
+     * quickdie().)  All other blockable signals remain blocked for now.
+     */
+    pqsignal(SIGQUIT, SignalHandlerForCrashExit);
+
+    sigdelset(&BlockSig, SIGQUIT);
+    PG_SETMASK(&BlockSig);
+
     /* Request a signal if the postmaster dies, if possible. */
     PostmasterDeathSignalInit();
 }
@@ -155,6 +174,13 @@ InitStandaloneProcess(const char *argv0)
     InitLatch(MyLatch);
     InitializeLatchWaitSet();

+    /*
+     * For consistency with InitPostmasterChild, initialize signal mask here.
+     * But we don't unblock SIGQUIT or provide a default handler for it.
+     */
+    pqinitmask();
+    PG_SETMASK(&BlockSig);
+
     /* Compute paths, no postmaster to inherit from */
     if (my_exec_path[0] == '\0')
     {

pgsql-hackers by date:

Previous
From: Tomas Vondra
Date:
Subject: Re: WIP: BRIN multi-range indexes
Next
From: Peter Eisentraut
Date:
Subject: Re: Missing "Up" navigation link between parts and doc root?