Home > mailing lists
Re: Parallel query hangs after a smart shutdown is issued - Mailing list pgsql-hackers

From	Tom Lane
Subject	Re: Parallel query hangs after a smart shutdown is issued
Date	August 12, 2020 20:59:28
Msg-id	320380.1597265968@sss.pgh.pa.us Whole thread Raw
In response to	Re: Parallel query hangs after a smart shutdown is issued (Tom Lane <tgl@sss.pgh.pa.us>)
Responses	Re: Parallel query hangs after a smart shutdown is issued
List	pgsql-hackers
Tree view
I wrote:
> Oh, excellent point!  I'd not thought to look at tests of the Shutdown
> variable, but yeah, those should be <= SmartShutdown if we want autovac
> to continue to operate in this state.

On looking closer, there's another problem: setting start_autovac_launcher
isn't enough to get the AV launcher to run, because ServerLoop() won't
launch it except in PM_RUN state.  Likewise, the other "relaunch a dead
process" checks in ServerLoop() need to be generalized to support
relaunching background processes while we're waiting out the foreground
clients.  So that leads me to the attached v3.  I had to re-instantiate
PM_WAIT_READONLY as an alternate state to PM_WAIT_CLIENTS; these states
are about the same so far as PostmasterStateMachine is concerned, but
some of the should-we-launch-FOO checks care about the difference.

The various pmState tests are getting messy enough to cry out for
refactorization, but I've not attempted that here.  There's enough
variance in the conditions for launching different subprocesses that
I'm not very sure what would be a nicer-looking way to write them.

            regards, tom lane

diff --git a/doc/src/sgml/ref/pg_ctl-ref.sgml b/doc/src/sgml/ref/pg_ctl-ref.sgml
index e31275a04e..3946fa52ea 100644
--- a/doc/src/sgml/ref/pg_ctl-ref.sgml
+++ b/doc/src/sgml/ref/pg_ctl-ref.sgml
@@ -185,8 +185,8 @@ PostgreSQL documentation
    <option>stop</option> mode shuts down the server that is running in
    the specified data directory.  Three different
    shutdown methods can be selected with the <option>-m</option>
-   option.  <quote>Smart</quote> mode waits for all active
-   clients to disconnect and any online backup to finish.
+   option.  <quote>Smart</quote> mode disallows new connections, then waits
+   for all existing clients to disconnect and any online backup to finish.
    If the server is in hot standby, recovery and streaming replication
    will be terminated once all clients have disconnected.
    <quote>Fast</quote> mode (the default) does not wait for clients to disconnect and
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 38e2c16ac2..d134dade53 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -148,8 +148,6 @@
 #define BACKEND_TYPE_BGWORKER    0x0008    /* bgworker process */
 #define BACKEND_TYPE_ALL        0x000F    /* OR of all the above */

-#define BACKEND_TYPE_WORKER        (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
-
 /*
  * List of active backends (or child processes anyway; we don't actually
  * know whether a given child has become a backend or is still in the
@@ -319,10 +317,10 @@ static bool FatalError = false; /* T if recovering from backend crash */
  *
  * Notice that this state variable does not distinguish *why* we entered
  * states later than PM_RUN --- Shutdown and FatalError must be consulted
- * to find that out.  FatalError is never true in PM_RECOVERY_* or PM_RUN
- * states, nor in PM_SHUTDOWN states (because we don't enter those states
- * when trying to recover from a crash).  It can be true in PM_STARTUP state,
- * because we don't clear it until we've successfully started WAL redo.
+ * to find that out.  FatalError is never true in PM_RECOVERY, PM_HOT_STANDBY,
+ * or PM_RUN states, nor in PM_SHUTDOWN states (because we don't enter those
+ * states when trying to recover from a crash).  It can be true in PM_STARTUP
+ * state, because we don't clear it until we've successfully started WAL redo.
  */
 typedef enum
 {
@@ -332,8 +330,9 @@ typedef enum
     PM_HOT_STANDBY,                /* in hot standby mode */
     PM_RUN,                        /* normal "database is alive" state */
     PM_WAIT_BACKUP,                /* waiting for online backup mode to end */
-    PM_WAIT_READONLY,            /* waiting for read only backends to exit */
-    PM_WAIT_BACKENDS,            /* waiting for live backends to exit */
+    PM_WAIT_CLIENTS,            /* waiting for normal backends to exit */
+    PM_WAIT_READONLY,            /* likewise, when we had been in a RO state */
+    PM_WAIT_BACKENDS,            /* waiting for all backends to exit */
     PM_SHUTDOWN,                /* waiting for checkpointer to do shutdown
                                  * ckpt */
     PM_SHUTDOWN_2,                /* waiting for archiver and walsenders to
@@ -437,9 +436,10 @@ static void InitPostmasterDeathWatchHandle(void);
  * even during recovery.
  */
 #define PgArchStartupAllowed()    \
-    ((XLogArchivingActive() && pmState == PM_RUN) ||    \
+    ((XLogArchivingActive() &&    \
+      (pmState == PM_RUN || pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_CLIENTS)) ||    \
      (XLogArchivingAlways() &&    \
-      (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
+      (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY)))

 #ifdef EXEC_BACKEND

@@ -1750,7 +1750,8 @@ ServerLoop(void)
          * fails, we'll just try again later.  Likewise for the checkpointer.
          */
         if (pmState == PM_RUN || pmState == PM_RECOVERY ||
-            pmState == PM_HOT_STANDBY)
+            pmState == PM_HOT_STANDBY || pmState == PM_WAIT_BACKUP ||
+            pmState == PM_WAIT_CLIENTS || pmState == PM_WAIT_READONLY)
         {
             if (CheckpointerPID == 0)
                 CheckpointerPID = StartCheckpointer();
@@ -1763,7 +1764,8 @@ ServerLoop(void)
          * one.  But this is needed only in normal operation (else we cannot
          * be writing any new WAL).
          */
-        if (WalWriterPID == 0 && pmState == PM_RUN)
+        if (WalWriterPID == 0 &&
+            (pmState == PM_RUN || pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_CLIENTS))
             WalWriterPID = StartWalWriter();

         /*
@@ -1774,7 +1776,7 @@ ServerLoop(void)
          */
         if (!IsBinaryUpgrade && AutoVacPID == 0 &&
             (AutoVacuumingActive() || start_autovac_launcher) &&
-            pmState == PM_RUN)
+            (pmState == PM_RUN || pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_CLIENTS))
         {
             AutoVacPID = StartAutoVacLauncher();
             if (AutoVacPID != 0)
@@ -1783,7 +1785,9 @@ ServerLoop(void)

         /* If we have lost the stats collector, try to start a new one */
         if (PgStatPID == 0 &&
-            (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
+            (pmState == PM_RUN || pmState == PM_HOT_STANDBY ||
+             pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_CLIENTS ||
+             pmState == PM_WAIT_READONLY))
             PgStatPID = pgstat_start();

         /* If we have lost the archiver, try to start a new one. */
@@ -2793,35 +2797,19 @@ pmdie(SIGNAL_ARGS)
             sd_notify(0, "STOPPING=1");
 #endif

-            if (pmState == PM_RUN || pmState == PM_RECOVERY ||
-                pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
-            {
-                /* autovac workers are told to shut down immediately */
-                /* and bgworkers too; does this need tweaking? */
-                SignalSomeChildren(SIGTERM,
-                                   BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER);
-                /* and the autovac launcher too */
-                if (AutoVacPID != 0)
-                    signal_child(AutoVacPID, SIGTERM);
-                /* and the bgwriter too */
-                if (BgWriterPID != 0)
-                    signal_child(BgWriterPID, SIGTERM);
-                /* and the walwriter too */
-                if (WalWriterPID != 0)
-                    signal_child(WalWriterPID, SIGTERM);
-
-                /*
-                 * If we're in recovery, we can't kill the startup process
-                 * right away, because at present doing so does not release
-                 * its locks.  We might want to change this in a future
-                 * release.  For the time being, the PM_WAIT_READONLY state
-                 * indicates that we're waiting for the regular (read only)
-                 * backends to die off; once they do, we'll kill the startup
-                 * and walreceiver processes.
-                 */
-                pmState = (pmState == PM_RUN) ?
-                    PM_WAIT_BACKUP : PM_WAIT_READONLY;
-            }
+            /*
+             * If we reached normal running, we have to wait for any online
+             * backup mode to end; otherwise go straight to waiting for client
+             * backends to exit.  (The difference is that in the former state,
+             * we'll still let in new superuser clients, so that somebody can
+             * end the online backup mode.)  If already in PM_WAIT_BACKUP or a
+             * later state, do not change it.
+             */
+            if (pmState == PM_RUN)
+                pmState = PM_WAIT_BACKUP;
+            else if (pmState == PM_RECOVERY ||
+                     pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
+                pmState = PM_WAIT_READONLY;

             /*
              * Now wait for online backup mode to end and backends to exit. If
@@ -2871,16 +2859,16 @@ pmdie(SIGNAL_ARGS)
             }
             else if (pmState == PM_RUN ||
                      pmState == PM_WAIT_BACKUP ||
+                     pmState == PM_WAIT_CLIENTS ||
                      pmState == PM_WAIT_READONLY ||
                      pmState == PM_WAIT_BACKENDS ||
                      pmState == PM_HOT_STANDBY)
             {
                 ereport(LOG,
                         (errmsg("aborting any active transactions")));
-                /* shut down all backends and workers */
+                /* shut down all backends and workers, but not walsenders */
                 SignalSomeChildren(SIGTERM,
-                                   BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC |
-                                   BACKEND_TYPE_BGWORKER);
+                                   BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
                 /* and the autovac launcher too */
                 if (AutoVacPID != 0)
                     signal_child(AutoVacPID, SIGTERM);
@@ -2987,7 +2975,7 @@ reaper(SIGNAL_ARGS)
                 ereport(LOG,
                         (errmsg("shutdown at recovery target")));
                 StartupStatus = STARTUP_NOT_RUNNING;
-                Shutdown = SmartShutdown;
+                Shutdown = Max(Shutdown, SmartShutdown);
                 TerminateChildren(SIGTERM);
                 pmState = PM_WAIT_BACKENDS;
                 /* PostmasterStateMachine logic does the rest */
@@ -3234,7 +3222,9 @@ reaper(SIGNAL_ARGS)
             if (!EXIT_STATUS_0(exitstatus))
                 LogChildExit(LOG, _("statistics collector process"),
                              pid, exitstatus);
-            if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
+            if (pmState == PM_RUN || pmState == PM_HOT_STANDBY ||
+                pmState == PM_WAIT_BACKUP || pmState == PM_WAIT_CLIENTS ||
+                pmState == PM_WAIT_READONLY)
                 PgStatPID = pgstat_start();
             continue;
         }
@@ -3713,6 +3703,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
         pmState == PM_HOT_STANDBY ||
         pmState == PM_RUN ||
         pmState == PM_WAIT_BACKUP ||
+        pmState == PM_WAIT_CLIENTS ||
         pmState == PM_WAIT_READONLY ||
         pmState == PM_SHUTDOWN)
         pmState = PM_WAIT_BACKENDS;
@@ -3802,21 +3793,36 @@ PostmasterStateMachine(void)
          * PM_WAIT_BACKUP state ends when online backup mode is not active.
          */
         if (!BackupInProgress())
-            pmState = PM_WAIT_BACKENDS;
+            pmState = PM_WAIT_CLIENTS;
     }

-    if (pmState == PM_WAIT_READONLY)
+    if (pmState == PM_WAIT_CLIENTS || pmState == PM_WAIT_READONLY)
     {
         /*
-         * PM_WAIT_READONLY state ends when we have no regular backends that
-         * have been started during recovery.  We kill the startup and
-         * walreceiver processes and transition to PM_WAIT_BACKENDS.  Ideally,
-         * we might like to kill these processes first and then wait for
-         * backends to die off, but that doesn't work at present because
-         * killing the startup process doesn't release its locks.
+         * PM_WAIT_CLIENTS or PM_WAIT_READONLY state ends when we have no
+         * normal client backends running.  Then signal appropriate support
+         * processes, and transition to PM_WAIT_BACKENDS to wait for them to
+         * die.
          */
         if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
         {
+            /*
+             * Signal all backend children except walsenders.  (While there
+             * can't be any normal children left, we might as well include
+             * BACKEND_TYPE_NORMAL in this mask, just to be sure.)
+             */
+            SignalSomeChildren(SIGTERM,
+                               BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND);
+            /* and the autovac launcher too */
+            if (AutoVacPID != 0)
+                signal_child(AutoVacPID, SIGTERM);
+            /* and the bgwriter too */
+            if (BgWriterPID != 0)
+                signal_child(BgWriterPID, SIGTERM);
+            /* and the walwriter too */
+            if (WalWriterPID != 0)
+                signal_child(WalWriterPID, SIGTERM);
+            /* If we're in recovery, also stop startup and walreceiver procs */
             if (StartupPID != 0)
                 signal_child(StartupPID, SIGTERM);
             if (WalReceiverPID != 0)
@@ -3843,7 +3849,7 @@ PostmasterStateMachine(void)
          * later after writing the checkpoint record, like the archiver
          * process.
          */
-        if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 &&
+        if (CountChildren(BACKEND_TYPE_ALL - BACKEND_TYPE_WALSND) == 0 &&
             StartupPID == 0 &&
             WalReceiverPID == 0 &&
             BgWriterPID == 0 &&
@@ -5287,7 +5293,7 @@ sigusr1_handler(SIGNAL_ARGS)
     }

     if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
-        Shutdown == NoShutdown)
+        Shutdown <= SmartShutdown && pmState < PM_WAIT_BACKENDS)
     {
         /*
          * Start one iteration of the autovacuum daemon, even if autovacuuming
@@ -5302,7 +5308,7 @@ sigusr1_handler(SIGNAL_ARGS)
     }

     if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
-        Shutdown == NoShutdown)
+        Shutdown <= SmartShutdown && pmState < PM_WAIT_BACKENDS)
     {
         /* The autovacuum launcher wants us to start a worker process. */
         StartAutovacuumWorker();
@@ -5333,7 +5339,7 @@ sigusr1_handler(SIGNAL_ARGS)

     if (StartupPID != 0 &&
         (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
-         pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
+         pmState == PM_HOT_STANDBY) &&
         CheckPromoteSignal())
     {
         /*
@@ -5652,7 +5658,7 @@ MaybeStartWalReceiver(void)
     if (WalReceiverPID == 0 &&
         (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
          pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
-        Shutdown == NoShutdown)
+        Shutdown <= SmartShutdown)
     {
         WalReceiverPID = StartWalReceiver();
         if (WalReceiverPID != 0)
@@ -5906,6 +5912,7 @@ bgworker_should_start_now(BgWorkerStartTime start_time)
         case PM_SHUTDOWN:
         case PM_WAIT_BACKENDS:
         case PM_WAIT_READONLY:
+        case PM_WAIT_CLIENTS:
         case PM_WAIT_BACKUP:
             break;
pgsql-hackers by date:
From: Alvaro Herrera
Date: 12 August 2020, 19:58:19
Subject: Re: [BUG] Error in BRIN summarization
From: Thomas Munro
Date: 12 August 2020, 21:41:42
Subject: Re: Parallel query hangs after a smart shutdown is issued
Re: Parallel query hangs after a smart shutdown is issued - Mailing list pgsql-hackers

Previous

Next