diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 2e2af9e96e..7887391bbb 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4005,9 +4005,15 @@ ANY num_sync ( @@ -4022,9 +4028,13 @@ ANY num_sync ( ). - This parameter can only be set at server start. + This parameter can only be set in the postgresql.conf + file or on the server command line. This setting has no effect if primary_conninfo is not - set. + set or the server is not in standby mode. + + + The WAL receiver is restarted after an update of primary_slot_name. @@ -4142,7 +4152,11 @@ ANY num_sync ( ). The default is on. The only reason to turn this off would be if the remote instance is currently out of available replication slots. This - parameter can only be set at server start. + parameter can only be set in the postgresql.conf + file or on the server command line. + + + The WAL receiver is restarted after an update of wal_receiver_create_temp_slot. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 408b9b489a..c24b93332e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -803,6 +803,12 @@ static XLogSource readSource = 0; /* XLOG_FROM_* code */ static XLogSource currentSource = 0; /* XLOG_FROM_* code */ static bool lastSourceFailed = false; +/* + * Need for restart running WalReceiver due the configuration change. + * Suitable only for XLOG_FROM_STREAM source + */ +static bool pendingWalRcvRestart = false; + typedef struct XLogPageReadPrivate { int emode; @@ -11831,6 +11837,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, for (;;) { int oldSource = currentSource; + bool startWalReceiver = false; /* * First check if we failed to read from the current source, and @@ -11864,54 +11871,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, if (!StandbyMode) return false; - /* - * If primary_conninfo is set, launch walreceiver to try - * to stream the missing WAL. - * - * If fetching_ckpt is true, RecPtr points to the initial - * checkpoint location. In that case, we use RedoStartLSN - * as the streaming start position instead of RecPtr, so - * that when we later jump backwards to start redo at - * RedoStartLSN, we will have the logs streamed already. - */ - if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0) - { - XLogRecPtr ptr; - TimeLineID tli; - - if (fetching_ckpt) - { - ptr = RedoStartLSN; - tli = ControlFile->checkPointCopy.ThisTimeLineID; - } - else - { - ptr = RecPtr; - - /* - * Use the record begin position to determine the - * TLI, rather than the position we're reading. - */ - tli = tliOfPointInHistory(tliRecPtr, expectedTLEs); - - if (curFileTLI > 0 && tli < curFileTLI) - elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", - (uint32) (tliRecPtr >> 32), - (uint32) tliRecPtr, - tli, curFileTLI); - } - curFileTLI = tli; - RequestXLogStreaming(tli, ptr, PrimaryConnInfo, - PrimarySlotName); - receivedUpto = 0; - } - /* * Move to XLOG_FROM_STREAM state in either case. We'll * get immediate failure if we didn't launch walreceiver, * and move on to the next state. */ currentSource = XLOG_FROM_STREAM; + startWalReceiver = true; break; case XLOG_FROM_STREAM: @@ -12057,7 +12023,69 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, Assert(StandbyMode); /* - * Check if WAL receiver is still active. + * shutdown WAL receiver if restart is requested. + */ + if (!startWalReceiver && pendingWalRcvRestart) + { + if (WalRcvRunning()) + ShutdownWalRcv(); + + /* + * Re-scan for possible new timelines if we were + * requested to recover to the latest timeline. + */ + if (recoveryTargetTimeLineGoal == + RECOVERY_TARGET_TIMELINE_LATEST) + rescanLatestTimeLine(); + + startWalReceiver = true; + } + pendingWalRcvRestart = false; + + /* + * Launch walreceiver if needed. + * + * If fetching_ckpt is true, RecPtr points to the initial + * checkpoint location. In that case, we use RedoStartLSN + * as the streaming start position instead of RecPtr, so + * that when we later jump backwards to start redo at + * RedoStartLSN, we will have the logs streamed already. + */ + if (startWalReceiver && + PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0) + { + XLogRecPtr ptr; + TimeLineID tli; + + if (fetching_ckpt) + { + ptr = RedoStartLSN; + tli = ControlFile->checkPointCopy.ThisTimeLineID; + } + else + { + ptr = RecPtr; + + /* + * Use the record begin position to determine the + * TLI, rather than the position we're reading. + */ + tli = tliOfPointInHistory(tliRecPtr, expectedTLEs); + + if (curFileTLI > 0 && tli < curFileTLI) + elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", + (uint32) (tliRecPtr >> 32), + (uint32) tliRecPtr, + tli, curFileTLI); + } + curFileTLI = tli; + RequestXLogStreaming(tli, ptr, PrimaryConnInfo, + PrimarySlotName); + receivedUpto = 0; + } + + /* + * Check if WAL receiver is active or wait to start up. */ if (!WalRcvStreaming()) { @@ -12185,6 +12213,61 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, return false; /* not reached */ } +/* + * Re-read config file and plan to restart running walreceiver if + * connection settings was changed. + */ +void +ProcessStartupSigHup(void) +{ + char *conninfo = pstrdup(PrimaryConnInfo); + char *slotname = pstrdup(PrimarySlotName); + bool tempSlot = wal_receiver_create_temp_slot; + bool conninfoChanged; + bool slotnameChanged; + bool tempSlotChanged = false; + + ProcessConfigFile(PGC_SIGHUP); + + /* + * We need restart walreceiver if replication settings was changed. + */ + conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0); + slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0); + + /* + * wal_receiver_create_temp_slot is used only when we have no slot + * configured. We do not need to track this change if it has no effect. + */ + if (!slotnameChanged && strcmp(PrimarySlotName, "") == 0) + tempSlotChanged = (tempSlot != wal_receiver_create_temp_slot); + + pfree(conninfo); + pfree(slotname); + + if ((conninfoChanged || slotnameChanged || tempSlotChanged) && + currentSource == XLOG_FROM_STREAM + && WalRcvRunning()) + { + if (conninfoChanged && strcmp(PrimaryConnInfo, "") == 0) + ereport(LOG, + (errmsg("The WAL receiver is going to be shut down due to change of %s", + "primary_conninfo"))); + else if (conninfoChanged && (slotnameChanged || tempSlotChanged)) + ereport(LOG, + (errmsg("The WAL receiver is going to be restarted due to change of %s and %s", + "primary_conninfo", + slotnameChanged ? "primary_slot_name" : "wal_receiver_create_temp_slot"))); + else + ereport(LOG, + (errmsg("The WAL receiver is going to be restarted due to change of %s", + conninfoChanged ? "primary_conninfo" + : (slotnameChanged ? "primary_slot_name" : "wal_receiver_create_temp_slot")))); + + pendingWalRcvRestart = true; + } +} + /* * Determine what log level should be used to report a corrupt WAL record * in the current WAL page, previously read by XLogPageRead(). diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index c2250d7d4e..c396fdae9a 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -101,12 +101,12 @@ void HandleStartupProcInterrupts(void) { /* - * Check if we were requested to re-read config file. + * Process any requests or signals received recently. */ if (got_SIGHUP) { got_SIGHUP = false; - ProcessConfigFile(PGC_SIGHUP); + ProcessStartupSigHup(); } /* diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 264b544194..f0651aade1 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -666,7 +666,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI) walrcv->walRcvState == WALRCV_STOPPING); if (walrcv->walRcvState == WALRCV_RESTARTING) { - /* we don't expect primary_conninfo to change */ + /* + * We don't need handle changes of primary_conninfo or + * primary_slotname here. Startup process will shutdown running + * walreceiver in this case. + */ *startpoint = walrcv->receiveStart; *startpointTLI = walrcv->receiveStartTLI; walrcv->walRcvState = WALRCV_STREAMING; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index f20f572a62..8dd78fdd35 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1994,7 +1994,7 @@ static struct config_bool ConfigureNamesBool[] = }, { - {"wal_receiver_create_temp_slot", PGC_POSTMASTER, REPLICATION_STANDBY, + {"wal_receiver_create_temp_slot", PGC_SIGHUP, REPLICATION_STANDBY, gettext_noop("Sets whether a WAL receiver should create a temporary replication slot if no permanent slot is configured."), }, &wal_receiver_create_temp_slot, @@ -3643,7 +3643,7 @@ static struct config_string ConfigureNamesString[] = }, { - {"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY, + {"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY, gettext_noop("Sets the connection string to be used to connect to the sending server."), NULL, GUC_SUPERUSER_ONLY @@ -3654,7 +3654,7 @@ static struct config_string ConfigureNamesString[] = }, { - {"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY, + {"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY, gettext_noop("Sets the name of the replication slot to use on the sending server."), NULL }, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 4454407b7c..3dae3e9f3c 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -309,9 +309,7 @@ # These settings are ignored on a master server. #primary_conninfo = '' # connection string to sending server - # (change requires restart) #primary_slot_name = '' # replication slot on sending server - # (change requires restart) #promote_trigger_file = '' # file name whose presence ends recovery #hot_standby = on # "off" disallows queries during recovery # (change requires restart) @@ -322,7 +320,6 @@ # when reading streaming WAL; # -1 allows indefinite delay #wal_receiver_create_temp_slot = on # create temp slot if primary_slot_name not set - # (change requires restart) #wal_receiver_status_interval = 10s # send replies at least this often # 0 disables #hot_standby_feedback = off # send info from standby to prevent diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 12362421d7..94009eb7be 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping); extern void XLogRequestWalReceiverReply(void); +extern void ProcessStartupSigHup(void); extern void assign_max_wal_size(int newval, void *extra); extern void assign_checkpoint_completion_target(double newval, void *extra); diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl index d09ebe65a3..52585a1014 100644 --- a/src/test/recovery/t/001_stream_rep.pl +++ b/src/test/recovery/t/001_stream_rep.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 34; +use Test::More tests => 35; # Initialize master node my $node_master = get_new_node('master'); @@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf', "primary_slot_name = $slotname_2"); $node_standby_2->append_conf('postgresql.conf', "wal_receiver_status_interval = 1"); -$node_standby_2->restart; +# should be able change primary_slot_name without restart +# will wait effect in get_slot_xmins above +$node_standby_2->reload; # Fetch xmin columns from slot's pg_replication_slots row, after waiting for # given boolean condition to be true to ensure we've reached a quiescent state @@ -345,6 +347,24 @@ is($xmin, '', 'xmin of cascaded slot null with hs feedback reset'); is($catalog_xmin, '', 'catalog xmin of cascaded slot still null with hs_feedback reset'); +note "check change primary_conninfo without restart"; +$node_standby_2->append_conf('postgresql.conf', + "primary_slot_name = ''"); +$node_standby_2->enable_streaming($node_master); +$node_standby_2->reload; + +# be sure do not streaming from cascade +$node_standby_1->stop; + +my $newval = $node_master->safe_psql('postgres', +'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val' +); +$node_master->wait_for_catchup($node_standby_2, 'replay', + $node_master->lsn('insert')); +my $is_replayed = $node_standby_2->safe_psql('postgres', + qq[SELECT 1 FROM replayed WHERE val = $newval]); +is($is_replayed, qq(1), "standby_2 didn't replay master value $newval"); + # Test physical slot advancing and its durability. Create a new slot on # the primary, not used by any of the standbys. This reserves WAL at creation. my $phys_slot = 'phys_slot';