Re: [proposal] recovery_target "latest" - Mailing list pgsql-hackers
From | Kyotaro Horiguchi |
---|---|
Subject | Re: [proposal] recovery_target "latest" |
Date | |
Msg-id | 20191113.165515.53765824575101302.horikyota.ntt@gmail.com Whole thread Raw |
In response to | Re: [proposal] recovery_target "latest" (Grigory Smolkin <g.smolkin@postgrespro.ru>) |
List | pgsql-hackers |
At Fri, 8 Nov 2019 16:08:47 +0300, Grigory Smolkin <g.smolkin@postgrespro.ru> wrote in > > On 11/8/19 7:00 AM, Grigory Smolkin wrote: > > > > On 11/7/19 4:36 PM, Grigory Smolkin wrote: > >> I gave it some thought and now think that prohibiting recovery_target > >> 'latest' and standby was a bad idea. > >> All recovery_targets follow the same pattern of usage, so > >> recovery_target "latest" also must be capable of working in standby > >> mode. > >> All recovery_targets have a clear deterministic 'target' where > >> recovery should end. > >> In case of recovery_target "latest" this target is the end of > >> available WAL, therefore the end of available WAL must be more clearly > >> defined. > >> I will work on it. > >> > >> Thank you for a feedback. > > > > > > Attached new patch revision, now end of available WAL is defined as > > the fact of missing required WAL. > > In case of standby, the end of WAL is defined as 2 consecutive > > switches of WAL source, that didn`t provided requested record. > > In case of streaming standby, each switch of WAL source is forced > > after 3 failed attempts to get new data from walreceiver. > > > > All constants are taken off the top of my head and serves as proof of > > concept. > > TAP test is updated. > > > Attached new revision, it contains some minor refactoring. Thanks for the new patch. I found that it needs more than I thought, but it seems a bit too complicated and less stable. As the patch does, WaitForWALToBecomeAvailable needs to exit when avaiable sources are exhausted. However, we don't need to count failures to do that. It is enough that the function have two more exit point. When streaming timeout fires, and when we found that streaming is not set up when archive/wal failed. In my opinion, it is better that we have less dependency to global variables in such deep levels in a call hierachy. Such nformation can be stored in XLogPageReadPrivate. I think The doc needs to exiplain on the difference between default and latest. Please find the attached, which illustrates the first two points of the aboves. regards. -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 3b766e66b9..8c8a1c6bf0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -808,6 +808,7 @@ typedef struct XLogPageReadPrivate int emode; bool fetching_ckpt; /* are we fetching a checkpoint record? */ bool randAccess; + bool return_on_eow; /* returns when reaching End of WAL */ } XLogPageReadPrivate; /* @@ -887,7 +888,9 @@ static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source); static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogRecPtr targetRecPtr, char *readBuf); static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, - bool fetching_ckpt, XLogRecPtr tliRecPtr); + bool fetching_ckpt, + XLogRecPtr tliRecPtr, + bool return_on_eow); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); static void PreallocXlogFiles(XLogRecPtr endptr); @@ -4253,6 +4256,7 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode, private->fetching_ckpt = fetching_ckpt; private->emode = emode; private->randAccess = (RecPtr != InvalidXLogRecPtr); + private->return_on_eow = (recoveryTarget == RECOVERY_TARGET_LATEST); /* This is the first attempt to read this page. */ lastSourceFailed = false; @@ -4371,8 +4375,12 @@ ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr, int emode, continue; } - /* In standby mode, loop back to retry. Otherwise, give up. */ - if (StandbyMode && !CheckForStandbyTrigger()) + /* + * In standby mode, loop back to retry. Otherwise, give up. + * If we told to return on end of WAL, also give up. + */ + if (StandbyMode && !CheckForStandbyTrigger() && + !private->return_on_eow) continue; else return NULL; @@ -7271,7 +7279,7 @@ StartupXLOG(void) * end of main redo apply loop */ - if (reachedStopPoint) + if (reachedStopPoint || recoveryTarget == RECOVERY_TARGET_LATEST) { if (!reachedConsistency) ereport(FATAL, @@ -7473,6 +7481,8 @@ StartupXLOG(void) recoveryStopName); else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE) snprintf(reason, sizeof(reason), "reached consistency"); + else if (recoveryTarget == RECOVERY_TARGET_LATEST) + snprintf(reason, sizeof(reason), "end of WAL"); else snprintf(reason, sizeof(reason), "no recovery target specified"); @@ -11605,7 +11615,8 @@ retry: if (!WaitForWALToBecomeAvailable(targetPagePtr + reqLen, private->randAccess, private->fetching_ckpt, - targetRecPtr)) + targetRecPtr, + private->return_on_eow)) { if (readFile >= 0) close(readFile); @@ -11745,7 +11756,9 @@ next_record_is_invalid: * * If the record is not immediately available, the function returns false * if we're not in standby mode. In standby mode, waits for it to become - * available. + * available looping over all sources if return_on_eow is false. Otherwise the + * function returns false if the record is not immediately available on all + * sources. * * When the requested record becomes available, the function opens the file * containing it (if not open already), and returns true. When end of standby @@ -11754,7 +11767,8 @@ next_record_is_invalid: */ static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, - bool fetching_ckpt, XLogRecPtr tliRecPtr) + bool fetching_ckpt, XLogRecPtr tliRecPtr, + bool return_on_eow) { static TimestampTz last_fail_time = 0; TimestampTz now; @@ -11862,6 +11876,12 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, receivedUpto = 0; } + /* + * If we don't get into streaming, this is the end of WAL + */ + else if (return_on_eow) + return false; + /* * Move to XLOG_FROM_STREAM state in either case. We'll * get immediate failure if we didn't launch walreceiver, @@ -12124,6 +12144,14 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, WL_EXIT_ON_PM_DEATH, 5000L, WAIT_EVENT_RECOVERY_WAL_ALL); ResetLatch(&XLogCtl->recoveryWakeupLatch); + + /* + * This is not exactly the end of WAL, but assume the + * primary has no more record to send. + */ + if (return_on_eow) + return false; + break; } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 4b3769b8b0..42367a793d 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -11637,9 +11637,11 @@ error_multiple_recovery_targets(void) static bool check_recovery_target(char **newval, void **extra, GucSource source) { - if (strcmp(*newval, "immediate") != 0 && strcmp(*newval, "") != 0) + if (strcmp(*newval, "immediate") != 0 && + strcmp(*newval, "latest") != 0 && + strcmp(*newval, "") != 0) { - GUC_check_errdetail("The only allowed value is \"immediate\"."); + GUC_check_errdetail("The only allowed value is \"immediate\" and \"latest\"."); return false; } return true; @@ -11649,13 +11651,16 @@ static void assign_recovery_target(const char *newval, void *extra) { if (recoveryTarget != RECOVERY_TARGET_UNSET && - recoveryTarget != RECOVERY_TARGET_IMMEDIATE) + recoveryTarget != RECOVERY_TARGET_IMMEDIATE && + recoveryTarget != RECOVERY_TARGET_LATEST) error_multiple_recovery_targets(); - if (newval && strcmp(newval, "") != 0) + if (!newval || strcmp(newval, "") == 0) + recoveryTarget = RECOVERY_TARGET_UNSET; + else if(strcmp(newval, "immediate") == 0) recoveryTarget = RECOVERY_TARGET_IMMEDIATE; else - recoveryTarget = RECOVERY_TARGET_UNSET; + recoveryTarget = RECOVERY_TARGET_LATEST; } static bool diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d519252aad..55e7e4bd2c 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -84,7 +84,8 @@ typedef enum RECOVERY_TARGET_TIME, RECOVERY_TARGET_NAME, RECOVERY_TARGET_LSN, - RECOVERY_TARGET_IMMEDIATE + RECOVERY_TARGET_IMMEDIATE, + RECOVERY_TARGET_LATEST } RecoveryTargetType; /*
pgsql-hackers by date: