Re: [BUG] Checkpointer on hot standby runs without looking checkpoint_segments - Mailing list pgsql-hackers

From Kyotaro HORIGUCHI
Subject Re: [BUG] Checkpointer on hot standby runs without looking checkpoint_segments
Date
Msg-id 20120419.142007.249982022.horiguchi.kyotaro@lab.ntt.co.jp
Whole thread Raw
In response to Re: [BUG] Checkpointer on hot standby runs without looking checkpoint_segments  (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>)
Responses Re: [BUG] Checkpointer on hot standby runs without looking checkpoint_segments
List pgsql-hackers
Hello, this is new version of standby checkpoint_segments patch.
- xlog.c: Make StandbyMode shared.
- checkpointer.c: Use IsStandbyMode() to check if postmaster is   under standby mode.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center

== My e-mail address has been changed since Apr. 1, 2012.

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8d0aabf..2457840 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -177,6 +177,12 @@ static bool LocalRecoveryInProgress = true;static bool LocalHotStandbyActive = false;/*
+ * Local copy of SharedIsStandbyMode variable.  True actually means "not known,
+ * need to check the shared state".
+ */
+static bool LocalIsStandbyMode = true;
+
+/* * Local state for XLogInsertAllowed(): *        1: unconditionally allowed to insert XLOG *        0:
unconditionallynot allowed to insert XLOG
 
@@ -206,7 +212,6 @@ static TimestampTz recoveryTargetTime;static char *recoveryTargetName;/* options taken from
recovery.conffor XLOG streaming */
 
-static bool StandbyMode = false;static char *PrimaryConnInfo = NULL;static char *TriggerFile = NULL;
@@ -427,6 +432,11 @@ typedef struct XLogCtlData    bool        SharedHotStandbyActive;    /*
+     * SharedInStandbyMode indicates if we are running in standby mode.
+     */
+    bool        SharedIsStandbyMode;
+
+    /*     * recoveryWakeupLatch is used to wake up the startup process to continue     * WAL replay, if it is waiting
forWAL to arrive or failover trigger file     * to appear.
 
@@ -619,6 +629,7 @@ static void SetLatestXTime(TimestampTz xtime);static void SetCurrentChunkStartTime(TimestampTz
xtime);staticvoid CheckRequiredParameterValues(void);static void XLogReportParameters(void);
 
+static void ExitStandbyMode(void);static void LocalSetXLogInsertAllowed(void);static void CheckPointGuts(XLogRecPtr
checkPointRedo,int flags);static void KeepLogSeg(XLogRecPtr recptr, uint32 *logId, uint32 *logSeg);
 
@@ -3115,7 +3126,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,                 * incorrectly conclude
we'vereached the end of WAL and we're                 * done recovering ...                 */
 
-                if (StandbyMode && stat_buf.st_size < expectedSize)
+                if (IsStandbyMode() && stat_buf.st_size < expectedSize)                    elevel = DEBUG1;
   else                    elevel = FATAL;
 
@@ -4072,7 +4083,7 @@ next_record_is_invalid:    }    /* In standby-mode, keep trying */
-    if (StandbyMode)
+    if (IsStandbyMode())        goto retry;    else        return NULL;
@@ -5098,6 +5109,7 @@ XLOGShmemInit(void)    XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
XLogCtl->SharedRecoveryInProgress= true;    XLogCtl->SharedHotStandbyActive = false;
 
+    XLogCtl->SharedIsStandbyMode = true;    XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
SpinLockInit(&XLogCtl->info_lck);   InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
 
@@ -5289,6 +5301,7 @@ readRecoveryCommandFile(void)    FILE       *fd;    TimeLineID    rtli = 0;    bool
rtliGiven= false;
 
+    bool        standby_mode = false;    ConfigVariable *item,               *head = NULL,               *tail =
NULL;
@@ -5439,13 +5452,14 @@ readRecoveryCommandFile(void)        }        else if (strcmp(item->name, "standby_mode") == 0)
      {
 
-            if (!parse_bool(item->value, &StandbyMode))
+            if (!parse_bool(item->value, &standby_mode))                ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),                        errmsg("parameter \"%s\" requires a Boolean value",
                            "standby_mode")));            ereport(DEBUG2,
(errmsg_internal("standby_mode= '%s'", item->value)));
 
+        }        else if (strcmp(item->name, "primary_conninfo") == 0)        {
@@ -5470,7 +5484,7 @@ readRecoveryCommandFile(void)    /*     * Check for compulsory parameters     */
-    if (StandbyMode)
+    if (standby_mode)    {        if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL)
ereport(WARNING,
@@ -5480,6 +5494,7 @@ readRecoveryCommandFile(void)    }    else    {
+        ExitStandbyMode();        if (recoveryRestoreCommand == NULL)            ereport(FATAL,
(errmsg("recoverycommand file \"%s\" must specify restore_command when standby mode is not enabled",
 
@@ -6086,7 +6101,7 @@ StartupXLOG(void)    if (InArchiveRecovery)    {
-        if (StandbyMode)
+        if (IsStandbyMode())            ereport(LOG,                    (errmsg("entering standby mode")));
elseif (recoveryTarget == RECOVERY_TARGET_XID)
 
@@ -6110,7 +6125,7 @@ StartupXLOG(void)     * Take ownership of the wakeup latch if we're going to sleep during     *
recovery.    */
 
-    if (StandbyMode)
+    if (IsStandbyMode())        OwnLatch(&XLogCtl->recoveryWakeupLatch);    if (read_backup_label(&checkPointLoc,
&backupEndRequired,
@@ -6169,7 +6184,7 @@ StartupXLOG(void)                    (errmsg("checkpoint record is at %X/%X",
      checkPointLoc.xlogid, checkPointLoc.xrecoff)));        }
 
-        else if (StandbyMode)
+        else if (IsStandbyMode())        {            /*             * The last valid checkpoint record required for a
streaming
@@ -6683,7 +6698,7 @@ StartupXLOG(void)     * We don't need the latch anymore. It's not strictly necessary to disown
* it, but let's do it for the sake of tidiness.     */
 
-    if (StandbyMode)
+    if (IsStandbyMode())        DisownLatch(&XLogCtl->recoveryWakeupLatch);    /*
@@ -6691,7 +6706,7 @@ StartupXLOG(void)     * recovery to force fetching the files (which would be required at end of
 * recovery, e.g., timeline history file) from archive or pg_xlog.     */
 
-    StandbyMode = false;
+    ExitStandbyMode();    /*     * Re-fetch the last valid or last applied record, so we can identify the
@@ -7096,7 +7111,7 @@ RecoveryInProgress(void) * since normal backends won't ever be able to connect until this returns
*true. Postmaster knows this by way of signal, not via shared memory. *
 
- * Unlike testing standbyState, this works in any process that's connected to
+ * Unlike testing InRecovery, this works in any process that's connected to * shared memory. */bool
@@ -7124,6 +7139,53 @@ HotStandbyActive(void)}/*
+ * Are we running in standby mode?
+ *
+ * Unlike testing InRecovery, this works in any process that's connected to
+ * shared memory.
+ */
+bool
+IsStandbyMode(void)
+{
+    /*
+     * We check shared state each time only until exiting standby mode. We
+     * can't re-enter standby mode, so there's no need to keep checking after
+     * the shared variable has once been seen false.
+     */
+    if (!LocalIsStandbyMode)
+        return false;
+    else
+    {
+        /* use volatile pointer to prevent code rearrangement */
+        volatile XLogCtlData *xlogctl = XLogCtl;
+        
+        /* spinlock is essential on machines with weak memory ordering! */
+        SpinLockAcquire(&xlogctl->info_lck);
+        LocalIsStandbyMode = xlogctl->SharedIsStandbyMode;
+        SpinLockRelease(&xlogctl->info_lck);
+
+        return LocalIsStandbyMode;
+    }
+
+}
+
+
+/*
+ * Inform the processes connected to shared memory that we exit standby mode.
+ */
+static void
+ExitStandbyMode()
+{
+    /* use volatile pointer to prevent code rearrangement */
+    volatile XLogCtlData *xlogctl = XLogCtl;
+
+    /* spinlock is essential on machines with weak memory ordering! */
+    SpinLockAcquire(&xlogctl->info_lck);
+    LocalIsStandbyMode = xlogctl->SharedIsStandbyMode = false;
+    SpinLockRelease(&xlogctl->info_lck);
+}
+
+/* * Is this process allowed to insert new WAL records? * * Ordinarily this is essentially equivalent to
!RecoveryInProgress().
@@ -10026,7 +10088,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,         * Request a
restartpointif we've replayed too much         * xlog since the last one.         */
 
-        if (StandbyMode && bgwriterLaunched)
+        if (IsStandbyMode() && bgwriterLaunched)        {            if (XLogCheckpointNeeded(readId, readSeg))
   {
 
@@ -10048,7 +10110,7 @@ retry:    if (readFile < 0 ||        (readSource == XLOG_FROM_STREAM && !XLByteLT(*RecPtr,
receivedUpto)))   {
 
-        if (StandbyMode)
+        if (IsStandbyMode())        {            /*             * In standby mode, wait for the requested record to
become
@@ -10362,7 +10424,7 @@ next_record_is_invalid:    readSource = 0;    /* In standby-mode, keep trying */
-    if (StandbyMode)
+    if (IsStandbyMode())        goto retry;    else        return false;
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index c9473f7..f91bd52 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -491,8 +491,8 @@ CheckpointerMain(void)             * Initialize checkpointer-private variables used during
checkpoint.            */            ckpt_active = true;
 
-            if (!do_restartpoint)
-                ckpt_start_recptr = GetInsertRecPtr();
+            ckpt_start_recptr =
+                do_restartpoint ? GetXLogReplayRecPtr(NULL) : GetInsertRecPtr();            ckpt_start_time = now;
      ckpt_cached_elapsed = 0;
 
@@ -715,6 +715,7 @@ IsCheckpointOnSchedule(double progress)    struct timeval now;    double        elapsed_xlogs,
         elapsed_time;
 
+    bool        recovery_in_progress;    Assert(ckpt_active);
@@ -731,18 +732,27 @@ IsCheckpointOnSchedule(double progress)        return false;    /*
-     * Check progress against WAL segments written and checkpoint_segments.
+     * Check progress against WAL segments written, or replayed for
+     * hot standby, and checkpoint_segments.     *     * We compare the current WAL insert location against the
location
-     * computed before calling CreateCheckPoint. The code in XLogInsert that
-     * actually triggers a checkpoint when checkpoint_segments is exceeded
-     * compares against RedoRecptr, so this is not completely accurate.
-     * However, it's good enough for our purposes, we're only calculating an
-     * estimate anyway.
+     * computed before calling CreateCheckPoint. The code in
+     * XLogInsert that actually triggers a checkpoint when
+     * checkpoint_segments is exceeded compares against RedoRecPtr.
+     * Similarly, we consult WAL replay location instead on hot
+     * standbys and XLogPageRead compares it aganst RedoRecPtr, too.
+     * Altough these are not completely accurate, it's good enough for
+     * our purposes, we're only calculating an estimate anyway.
+     */
+
+    /*
+     * Inhibit governing progress by segments in archive recovery.     */
-    if (!RecoveryInProgress())
+    recovery_in_progress = RecoveryInProgress();
+    if (!recovery_in_progress || IsStandbyMode())    {
-        recptr = GetInsertRecPtr();
+        recptr = recovery_in_progress ? GetXLogReplayRecPtr(NULL) :
+            GetInsertRecPtr();        elapsed_xlogs =            (((double) (int32) (recptr.xlogid -
ckpt_start_recptr.xlogid))* XLogSegsPerFile +             ((double) recptr.xrecoff - (double)
ckpt_start_recptr.xrecoff)/ XLogSegSize) /
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f8aecef..329119b 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -285,6 +285,7 @@ extern void issue_xlog_fsync(int fd, uint32 log, uint32 seg);extern bool
RecoveryInProgress(void);externbool HotStandbyActive(void);
 
+extern bool IsStandbyMode(void);extern bool XLogInsertAllowed(void);extern void GetXLogReceiptTime(TimestampTz *rtime,
bool*fromStream);extern XLogRecPtr GetXLogReplayRecPtr(XLogRecPtr *restoreLastRecPtr); 

pgsql-hackers by date:

Previous
From: Amit Kapila
Date:
Subject: clarification for generate join implied equalities
Next
From: Tom Lane
Date:
Subject: Re: Improving our clauseless-join heuristics