diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 2a83671b53..80d12b26d7 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -1690,6 +1690,11 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i backup: This WAL sender is sending a backup. + + + stopping: This WAL sender is stopping. + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c7667879c6..4b64c460c3 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -8325,6 +8325,12 @@ ShutdownXLOG(int code, Datum arg) ereport(IsPostmasterEnvironment ? LOG : NOTICE, (errmsg("shutting down"))); + /* + * If there are any WAL senders active, wait to get the confirmation that + * they are in a stopping state before moving on to next steps. + */ + WalSndWaitStop(); + if (RecoveryInProgress()) CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE); else diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 2bb4380533..992fb3ac98 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2918,7 +2918,7 @@ reaper(SIGNAL_ARGS) * Waken walsenders for the last time. No regular backends * should be around anymore. */ - SignalChildren(SIGUSR2); + SignalChildren(SIGINT); pmState = PM_SHUTDOWN_2; @@ -3656,7 +3656,9 @@ PostmasterStateMachine(void) /* * If we get here, we are proceeding with normal shutdown. All * the regular children are gone, and it's time to tell the - * checkpointer to do a shutdown checkpoint. + * checkpointer to do a shutdown checkpoint. All WAL senders + * are told to switch to a stopping state so as the shutdown + * checkpoint can progress. */ Assert(Shutdown > NoShutdown); /* Start the checkpointer if not running */ @@ -3665,6 +3667,7 @@ PostmasterStateMachine(void) /* And tell it to shut down */ if (CheckpointerPID != 0) { + SignalSomeChildren(SIGUSR2, BACKEND_TYPE_WALSND); signal_child(CheckpointerPID, SIGUSR2); pmState = PM_SHUTDOWN; } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 064cf5ee28..dda3368168 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -24,11 +24,17 @@ * are treated as not a crash but approximately normal termination; * the walsender will exit quickly without sending any more XLOG records. * - * If the server is shut down, postmaster sends us SIGUSR2 after all - * regular backends have exited and the shutdown checkpoint has been written. - * This instructs walsender to send any outstanding WAL, including the - * shutdown checkpoint record, wait for it to be replicated to the standby, - * and then exit. + * If the server is shut down, postmaster sends SIGUSR2 before telling the + * checkpointer to issue the shutdown checkpoint to switch all the WAL + * senders to a stopping state. Once this state is reached WAL senders will + * block any replication command that may generate WAL activity. The + * checkpointer checks the state of each WAL sender, and begins the shutdown + * checkpoint once all the WAL senders are confirmed as stopping. When the + * shutdown checkpoint finishes, the postmaster sends SIGINT to all the WAL + * senders once all the regular backends have exited and the shutdown + * checkpoint has been written. This instructs walsender to send any + * outstanding WAL, including the shutdown checkpoint record, wait for it to + * be replicated to the standby, and then exit. * * * Portions Copyright (c) 2010-2017, PostgreSQL Global Development Group @@ -177,13 +183,14 @@ static bool WalSndCaughtUp = false; /* Flags set by signal handlers for later service in main loop */ static volatile sig_atomic_t got_SIGHUP = false; -static volatile sig_atomic_t walsender_ready_to_stop = false; +static volatile sig_atomic_t got_SIGUSR2 = false; +static volatile sig_atomic_t got_SIGINT = false; /* - * This is set while we are streaming. When not set, SIGUSR2 signal will be + * This is set while we are streaming. When not set, SIGINT signal will be * handled like SIGTERM. When set, the main loop is responsible for checking - * walsender_ready_to_stop and terminating when it's set (after streaming any - * remaining WAL). + * got_SIGINT and terminating when it's set (after streaming any remaining + * WAL). */ static volatile sig_atomic_t replication_active = false; @@ -213,6 +220,7 @@ static struct /* Signal handlers */ static void WalSndSigHupHandler(SIGNAL_ARGS); static void WalSndXLogSendHandler(SIGNAL_ARGS); +static void WalSndSwitchStopping(SIGNAL_ARGS); static void WalSndLastCycleHandler(SIGNAL_ARGS); /* Prototypes for private functions */ @@ -299,11 +307,14 @@ WalSndErrorCleanup(void) ReplicationSlotCleanup(); replication_active = false; - if (walsender_ready_to_stop) + if (got_SIGINT) proc_exit(0); /* Revert back to startup state */ WalSndSetState(WALSNDSTATE_STARTUP); + + if (got_SIGUSR2) + WalSndSetState(WALSNDSTATE_STOPPING); } /* @@ -676,7 +687,7 @@ StartReplication(StartReplicationCmd *cmd) WalSndLoop(XLogSendPhysical); replication_active = false; - if (walsender_ready_to_stop) + if (got_SIGINT) proc_exit(0); WalSndSetState(WALSNDSTATE_STARTUP); @@ -844,6 +855,14 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) Assert(!MyReplicationSlot); + /* + * If WAL sender is shutting down, prevent CREATE_REPLICATION_SLOT as it + * could result in the generation of new WAL data. + */ + if (MyWalSnd->state == WALSNDSTATE_STOPPING) + ereport(ERROR, + (errmsg("CREATE_REPLICATION_SLOT cannot be called during WAL sender shutdown"))); + parseCreateReplSlotOptions(cmd, &reserve_wal, &snapshot_action); /* setup state for XLogReadPage */ @@ -1019,6 +1038,14 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) static void DropReplicationSlot(DropReplicationSlotCmd *cmd) { + /* + * If WAL sender is shutting down, prevent DROP_REPLICATION_SLOT as it + * could result in the generation of new WAL data. + */ + if (MyWalSnd->state == WALSNDSTATE_STOPPING) + ereport(ERROR, + (errmsg("DROP_REPLICATION_SLOT cannot be called during WAL sender shutdown"))); + ReplicationSlotDrop(cmd->slotname); EndCommand("DROP_REPLICATION_SLOT", DestRemote); } @@ -1048,7 +1075,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) { ereport(LOG, (errmsg("terminating walsender process after promotion"))); - walsender_ready_to_stop = true; + got_SIGINT = true; } WalSndSetState(WALSNDSTATE_CATCHUP); @@ -1098,7 +1125,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) ReplicationSlotRelease(); replication_active = false; - if (walsender_ready_to_stop) + if (got_SIGINT) proc_exit(0); WalSndSetState(WALSNDSTATE_STARTUP); @@ -1286,6 +1313,14 @@ WalSndWaitForWal(XLogRecPtr loc) RecentFlushPtr = GetXLogReplayRecPtr(NULL); /* + * If postmaster asked us to switch to a stopping state, do so. + * Shutdown is in progress and this will allow the checkpointer to + * move on with the shutdown checkpoint. + */ + if (got_SIGUSR2) + WalSndSetState(WALSNDSTATE_STOPPING); + + /* * If postmaster asked us to stop, don't wait here anymore. This will * cause the xlogreader to return without reading a full record, which * is the fastest way to reach the mainloop which then can quit. @@ -1294,7 +1329,7 @@ WalSndWaitForWal(XLogRecPtr loc) * RecentFlushPtr, so we can send all remaining data before shutting * down. */ - if (walsender_ready_to_stop) + if (got_SIGINT) break; /* @@ -1369,6 +1404,13 @@ exec_replication_command(const char *cmd_string) MemoryContext old_context; /* + * If WAL sender has been told that shutdown is getting close, switch its + * status accordingly to handle the next replication commands correctly. + */ + if (got_SIGUSR2) + WalSndSetState(WALSNDSTATE_STOPPING); + + /* * CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot until the next * command arrives. Clean up the old stuff if there's anything. */ @@ -2090,13 +2132,20 @@ WalSndLoop(WalSndSendDataCallback send_data) } /* - * When SIGUSR2 arrives, we send any outstanding logs up to the + * At the reception of SIGUSR2, switch the WAL sender to a stopping + * mode. + */ + if (got_SIGUSR2) + WalSndSetState(WALSNDSTATE_STOPPING); + + /* + * When SIGINT arrives, we send any outstanding logs up to the * shutdown checkpoint record (i.e., the latest record), wait for * them to be replicated to the standby, and exit. This may be a * normal termination at shutdown, or a promotion, the walsender * is not sure which. */ - if (walsender_ready_to_stop) + if (got_SIGINT) WalSndDone(send_data); } @@ -2836,7 +2885,24 @@ WalSndXLogSendHandler(SIGNAL_ARGS) errno = save_errno; } -/* SIGUSR2: set flag to do a last cycle and shut down afterwards */ +/* SIGUSR2: set flag to switch to stopping state */ +static void +WalSndSwitchStopping(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_SIGUSR2 = true; + + SetLatch(MyLatch); + + errno = save_errno; +} + +/* + * SIGINT: set flag to do a last cycle and shut down afterwards. The WAL + * sender should already have been switched to WALSNDSTATE_STOPPING at + * this point. + */ static void WalSndLastCycleHandler(SIGNAL_ARGS) { @@ -2851,7 +2917,7 @@ WalSndLastCycleHandler(SIGNAL_ARGS) if (!replication_active) kill(MyProcPid, SIGTERM); - walsender_ready_to_stop = true; + got_SIGINT = true; SetLatch(MyLatch); errno = save_errno; @@ -2864,14 +2930,14 @@ WalSndSignals(void) /* Set up signal handlers */ pqsignal(SIGHUP, WalSndSigHupHandler); /* set flag to read config * file */ - pqsignal(SIGINT, SIG_IGN); /* not used */ + pqsignal(SIGINT, WalSndLastCycleHandler); /* request a last cycle and + * shutdown */ pqsignal(SIGTERM, die); /* request shutdown */ pqsignal(SIGQUIT, quickdie); /* hard crash time */ InitializeTimeouts(); /* establishes SIGALRM handler */ pqsignal(SIGPIPE, SIG_IGN); pqsignal(SIGUSR1, WalSndXLogSendHandler); /* request WAL sending */ - pqsignal(SIGUSR2, WalSndLastCycleHandler); /* request a last cycle and - * shutdown */ + pqsignal(SIGUSR2, WalSndSwitchStopping); /* switch to stopping state */ /* Reset some signals that are accepted by postmaster but not here */ pqsignal(SIGCHLD, SIG_DFL); @@ -2949,6 +3015,51 @@ WalSndWakeup(void) } } +/* + * Wait that all the WAL senders have reached a stopping state. This is + * used by the checkpointer to control when shutdown checkpoints can + * safely begin. + */ +void +WalSndWaitStop(void) +{ + int i; + + for (;;) + { + bool all_stopped = true; + + for (i = 0; i < max_wal_senders; i++) + { + WalSndState state; + WalSnd *walsnd = &WalSndCtl->walsnds[i]; + + SpinLockAcquire(&walsnd->mutex); + + if (walsnd->pid == 0) + { + SpinLockRelease(&walsnd->mutex); + continue; + } + + state = walsnd->state; + SpinLockRelease(&walsnd->mutex); + + if (state != WALSNDSTATE_STOPPING) + { + all_stopped = false; + break; + } + } + + /* safe to leave if confirmation is done for all WAL senders */ + if (all_stopped) + return; + + pg_usleep(10000L); /* wait for 10 msec */ + } +} + /* Set state for current walsender (only called in walsender) */ void WalSndSetState(WalSndState state) @@ -2982,6 +3093,8 @@ WalSndGetStateString(WalSndState state) return "catchup"; case WALSNDSTATE_STREAMING: return "streaming"; + case WALSNDSTATE_STOPPING: + return "stopping"; } return "UNKNOWN"; } diff --git a/src/include/replication/walsender.h b/src/include/replication/walsender.h index 2ca903872e..eb9cf0b0dc 100644 --- a/src/include/replication/walsender.h +++ b/src/include/replication/walsender.h @@ -44,6 +44,7 @@ extern void WalSndSignals(void); extern Size WalSndShmemSize(void); extern void WalSndShmemInit(void); extern void WalSndWakeup(void); +extern void WalSndWaitStop(void); extern void WalSndRqstFileReload(void); /* diff --git a/src/include/replication/walsender_private.h b/src/include/replication/walsender_private.h index 2c59056cef..36311e124c 100644 --- a/src/include/replication/walsender_private.h +++ b/src/include/replication/walsender_private.h @@ -24,7 +24,8 @@ typedef enum WalSndState WALSNDSTATE_STARTUP = 0, WALSNDSTATE_BACKUP, WALSNDSTATE_CATCHUP, - WALSNDSTATE_STREAMING + WALSNDSTATE_STREAMING, + WALSNDSTATE_STOPPING } WalSndState; /*