Re: pg_basebackup may fail to send feedbacks. - Mailing list pgsql-hackers

From Kyotaro HORIGUCHI
Subject Re: pg_basebackup may fail to send feedbacks.
Date
Msg-id 20150220.172914.241732690.horiguchi.kyotaro@lab.ntt.co.jp
Whole thread Raw
In response to Re: pg_basebackup may fail to send feedbacks.  (Fujii Masao <masao.fujii@gmail.com>)
Responses Re: pg_basebackup may fail to send feedbacks.  (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>)
List pgsql-hackers
Hello,

At Thu, 19 Feb 2015 19:22:21 +0900, Fujii Masao <masao.fujii@gmail.com> wrote in
<CAHGQGwGLFLaFrCYcuikkVefNaoEL448TLSJ9oPsvb17v3foZHA@mail.gmail.com>
> On Wed, Feb 18, 2015 at 5:34 PM, Kyotaro HORIGUCHI
> <horiguchi.kyotaro@lab.ntt.co.jp> wrote:
> > Hello, this is the last patch for pg_basebackup/pg_receivexlog on
> > master (9.5). Preor versions don't have this issue.
> >
> > 4. basebackup_reply_fix_mst_v2.patch
> >   receivelog.c patch applyable on master.
> >
> > This is based on the same design with
> > walrcv_reply_fix_91_v2.patch in the aspect of gettimeofday().
> 
> Thanks for updating the patches! But I'm still not sure if the idea depending
> on the frequent calls of gettimeofday() for each WAL receive is good or not.

Neither do I. Nowadays, linux on AMD64/x64 environment has no
problem even if gettimeofday() called frequently, but Windows
seems to have a problem and I don't know about other platforms.

One possible timing source is LSN.

>  if ((blockpos - last_blockpos) / BLKSZ > 0)
>  {
>      now = feGetCurrentTimestamp();
>      if (feTimestampDifferenceExceeds(last_status, now,
..
>          if (!sendFeedback(conn, blockpos, now, false))
>      }
>  }
>  
>  last_blockpos = blockpos;

But once per PAGESZ can easily be more frequent than once per 10
records and XLOG_SEG_SIZE seems too big.  However I don't see any
bases to determine the frequency between them nor other than the
time itself.

SIGALRM seems to me to be more preferable to keep the main jobe
as fast as possible than introducing a code with no reasonable
basis.


> Some users may complain about the performance impact by such frequent calls
> and we may want to get rid of them from walreceiver loop in the future.
> If we adopt your idea now, I'm afraid that it would tie our hands in that case.
> 
> How much impact can such frequent calls of gettimeofday() have on replication
> performance? If it's not negligible, probably we should remove them at first
> and find out another idea to fix the problem you pointed. ISTM that it's not so
> difficult to remove them. Thought? Do you have any numbers which can prove
> that such frequent gettimeofday() has only ignorable impact on the performance?

The attached patch is 'the more sober' version of SIGLARM patch.

I'll search for the another way after this.

regards,

diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c
index 8caedff..c55af83 100644
--- a/src/bin/pg_basebackup/receivelog.c
+++ b/src/bin/pg_basebackup/receivelog.c
@@ -16,6 +16,8 @@#include <sys/stat.h>#include <unistd.h>
+#include <signal.h>
+#include <sys/time.h>/* local includes */#include "receivelog.h"
@@ -33,6 +35,12 @@ static XLogRecPtr lastFlushPosition = InvalidXLogRecPtr;static bool still_sending = true;        /*
feedbackstill needs to be sent? */
 
+static bool standby_message_timeout_active = false;   /* timeout is active?  */
+static bool standby_message_timeout_expired = false;  /* timeout expired?  */
+
+static void SigAlrmHandler(int s);
+static int set_standby_message_timeout(long duration);
+static int cancel_standby_message_timeout(void);static PGresult *HandleCopyStream(PGconn *conn, XLogRecPtr startpos,
             uint32 timeline, char *basedir,               stream_stop_callback stream_stop, int
standby_message_timeout,
@@ -60,6 +68,14 @@ static long CalculateCopyStreamSleeptime(int64 now, int standby_message_timeout,static bool
ReadEndOfStreamingResult(PGresult*res, XLogRecPtr *startpos,                         uint32 *timeline);
 
+
+static void
+SigAlrmHandler(int s)
+{
+    standby_message_timeout_expired = true;
+}
+
+static boolmark_file_as_archived(const char *basedir, const char *fname){
@@ -802,6 +818,65 @@ ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, uint32 *timeline)}/*
+ * set_standby_message_timeout: Set standby message timeout.
+ *
+ * duration is the timeout duration in milliseconds.
+ *
+ * Returns 0 on success and -1 on error.
+ */
+static int
+set_standby_message_timeout(long duration)
+{
+    struct itimerval itv;
+
+    /* negative duration means no standby_message_timeout. */
+    if (duration < 0) return 0;
+
+    MemSet(&itv, 0, sizeof(itv));
+    itv.it_value.tv_sec = duration / 1000;
+    itv.it_value.tv_usec = (duration % 1000) * 1000;
+    standby_message_timeout_active = true;
+    standby_message_timeout_expired = false;
+    if (setitimer(ITIMER_REAL, &itv, NULL) != 0)
+    {
+        fprintf(stderr, "could not set timer: %m");
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * cancel_standby_message_timeout: cancel standby message timeout if active.
+ *
+ * Returns 0 on success and -1 on error.
+ */
+static int
+cancel_standby_message_timeout(void)
+{
+    struct itimerval itv;
+
+    if (!standby_message_timeout_active)
+        return;
+
+    standby_message_timeout_active = false;
+
+    if (standby_message_timeout_expired)
+    {
+        standby_message_timeout_expired = false;
+        return 0;
+    }
+    MemSet(&itv, 0, sizeof(itv));
+    if (setitimer(ITIMER_REAL, &itv, NULL) != 0)
+    {
+        fprintf(stderr, "could not set timer: %m");
+        return -1;
+    }
+
+    return 0;
+}
+
+/* * The main loop of ReceiveXlogStream. Handles the COPY stream after * initiating streaming with the START_STREAMING
command.*
 
@@ -818,6 +893,15 @@ HandleCopyStream(PGconn *conn, XLogRecPtr startpos, uint32 timeline,    char       *copybuf =
NULL;   int64        last_status = -1;    XLogRecPtr    blockpos = startpos;
 
+    sighandler_t    oldhandler;
+
+    oldhandler = pqsignal(SIGALRM, SigAlrmHandler);
+
+    if (oldhandler != SigAlrmHandler)
+    {
+        /* SIGALRM should not be used here  */
+        Assert(oldhandler == SIG_DFL);
+    }    still_sending = true;
@@ -878,23 +962,9 @@ HandleCopyStream(PGconn *conn, XLogRecPtr startpos, uint32 timeline,         */        sleeptime =
CalculateCopyStreamSleeptime(now,standby_message_timeout,
last_status);
-        r = CopyStreamReceive(conn, sleeptime, ©buf);
-        while (r != 0)
+        while (r > 0)        {
-            if (r == -1)
-                goto error;
-            if (r == -2)
-            {
-                PGresult    *res = HandleEndOfCopyStream(conn, copybuf, blockpos,
-                                                         basedir, partial_suffix,
-                                                         stoppos, mark_done);
-                if (res == NULL)
-                    goto error;
-                else
-                    return res;
-            }
-            /* Check the message type. */            if (copybuf[0] == 'k')            {
@@ -925,14 +995,62 @@ HandleCopyStream(PGconn *conn, XLogRecPtr startpos, uint32 timeline,            }            /*
+              If standby_message_timeout has expired, send feed back and then
+             * set timeout again.
+             */
+            if (standby_message_timeout_expired)
+            {
+                if (!sendFeedback(conn, blockpos, now, false))
+                    goto error;
+                last_status = feGetCurrentTimestamp();
+
+                /* Set next timeout */
+                sleeptime = CalculateCopyStreamSleeptime(now, standby_message_timeout,
+                                                         last_status);
+                if (set_standby_message_timeout(sleeptime) < 0)
+                    goto error;
+            }
+
+            /*             * Process the received data, and any subsequent data we             * can read without
blocking.            */            r = CopyStreamReceive(conn, 0, ©buf);
 
+
+            /* If continuous input has come, set timeout for standby
+             * message. This fires a bit later than exact desired time but it
+             * would be earlier enough because standby_message_timeout is
+             * ususally set enough smaller than wal_sender_timeout.
+             */
+            if (r > 0 && !standby_message_timeout_active)
+            {
+                if (set_standby_message_timeout(sleeptime) < 0)
+                    goto error;
+
+            }        }
-    }
+        if (cancel_standby_message_timeout() < 0)
+            goto error;
+        if (r == -1)
+            goto error;
+        if (r == -2)
+        {
+            PGresult    *res = HandleEndOfCopyStream(conn, copybuf, blockpos,
+                                                     basedir, partial_suffix,
+                                                     stoppos, mark_done);
+            if (res == NULL)
+                goto error;
+            else
+                return res;
+        }
+
+    }
+    error:
+    cancel_standby_message_timeout();
+    pqsignal(SIGALRM, oldhandler);
+    if (copybuf != NULL)        PQfreemem(copybuf);    return NULL;

pgsql-hackers by date:

Previous
From: Michael Paquier
Date:
Subject: Re: Expanding the use of FLEXIBLE_ARRAY_MEMBER for declarations like foo[1]
Next
From: Alexey Klyukin
Date:
Subject: Re: Report search_path value back to the client.