From 9bb71e39b218885466a53c005a87361d4ae889fa Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 5 Jan 2015 17:10:13 +0900 Subject: [PATCH] Add restore_command_retry_interval to control retries of restore_command restore_command_retry_interval can be specified in recovery.conf to control the interval of time between retries of restore_command when failures occur. --- doc/src/sgml/recovery-config.sgml | 21 +++++++++++++ src/backend/access/transam/recovery.conf.sample | 9 ++++++ src/backend/access/transam/xlog.c | 42 ++++++++++++++++++++----- 3 files changed, 64 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml index 0c64ff2..0488ae3 100644 --- a/doc/src/sgml/recovery-config.sgml +++ b/doc/src/sgml/recovery-config.sgml @@ -145,6 +145,27 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + restore_command_retry_interval (integer) + + restore_command_retry_interval recovery parameter + + + + + If restore_command returns nonzero exit status code, retry + command after the interval of time specified by this parameter, + measured in milliseconds if no unit is specified. Default value is + 5s. + + + This command is particularly useful for warm standby configurations + to leverage the amount of retries done to restore a WAL segment file + depending on the activity of a master node. + + + + diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample index b777400..8699a33 100644 --- a/src/backend/access/transam/recovery.conf.sample +++ b/src/backend/access/transam/recovery.conf.sample @@ -58,6 +58,15 @@ # #recovery_end_command = '' # +# +# restore_command_retry_interval +# +# specifies an optional retry interval for restore_command command, if +# previous command returned nonzero exit status code. This can be useful +# to increase or decrease the number of calls of restore_command. +# +#restore_command_retry_interval = '5s' +# #--------------------------------------------------------------------------- # RECOVERY TARGET PARAMETERS #--------------------------------------------------------------------------- diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5cc7e47..1944e6f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -235,6 +235,7 @@ static TimestampTz recoveryTargetTime; static char *recoveryTargetName; static int recovery_min_apply_delay = 0; static TimestampTz recoveryDelayUntilTime; +static int restore_command_retry_interval = 5000; /* options taken from recovery.conf for XLOG streaming */ static bool StandbyModeRequested = false; @@ -4881,6 +4882,26 @@ readRecoveryCommandFile(void) (errmsg_internal("trigger_file = '%s'", TriggerFile))); } + else if (strcmp(item->name, "restore_command_retry_interval") == 0) + { + const char *hintmsg; + + if (!parse_int(item->value, &restore_command_retry_interval, GUC_UNIT_MS, + &hintmsg)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("parameter \"%s\" requires a temporal value", + "restore_command_retry_interval"), + hintmsg ? errhint("%s", _(hintmsg)) : 0)); + ereport(DEBUG2, + (errmsg_internal("restore_command_retry_interval = '%s'", item->value))); + + if (restore_command_retry_interval <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%s\" must have a value strictly positive", + "restore_command_retry_interval"))); + } else if (strcmp(item->name, "recovery_min_apply_delay") == 0) { const char *hintmsg; @@ -10345,8 +10366,8 @@ static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, bool fetching_ckpt, XLogRecPtr tliRecPtr) { - static pg_time_t last_fail_time = 0; - pg_time_t now; + TimestampTz now = GetCurrentTimestamp(); + TimestampTz last_fail_time = now; /*------- * Standby mode is implemented by a state machine: @@ -10495,14 +10516,19 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, * machine, so we've exhausted all the options for * obtaining the requested WAL. We're going to loop back * and retry from the archive, but if it hasn't been long - * since last attempt, sleep 5 seconds to avoid - * busy-waiting. + * since last attempt, sleep the amount of time specified + * by restore_command_retry_interval to avoid busy-waiting. */ - now = (pg_time_t) time(NULL); - if ((now - last_fail_time) < 5) + now = GetCurrentTimestamp(); + if (!TimestampDifferenceExceeds(last_fail_time, now, + restore_command_retry_interval)) { - pg_usleep(1000000L * (5 - (now - last_fail_time))); - now = (pg_time_t) time(NULL); + long secs; + int microsecs; + + TimestampDifference(last_fail_time, now, &secs, µsecs); + pg_usleep(restore_command_retry_interval * 1000L - (1000000L * secs + 1L * microsecs)); + now = GetCurrentTimestamp(); } last_fail_time = now; currentSource = XLOG_FROM_ARCHIVE; -- 2.2.1