Re: XLogWrite uses palloc within a critical section - Mailing list pgsql-hackers

From Kyotaro HORIGUCHI
Subject Re: XLogWrite uses palloc within a critical section
Date
Msg-id 20180525.165742.66009686.horiguchi.kyotaro@lab.ntt.co.jp
Whole thread Raw
In response to Re: XLogWrite uses palloc within a critical section  (Heikki Linnakangas <hlinnaka@iki.fi>)
List pgsql-hackers
Thank you for the comment.

At Fri, 25 May 2018 09:05:21 +0300, Heikki Linnakangas <hlinnaka@iki.fi> wrote in
<466a3c6d-7986-8cb1-d908-e85aa6a090e0@iki.fi>
> On 25/05/18 07:45, Kyotaro HORIGUCHI wrote:
> > Hello.
> > I happened to see the following in XLogWrite.
> > 
> >>   ereport(PANIC,
> >>       (errcode_for_file_access(),
> >>        errmsg("could not seek in log file %s to offset %u: %m",
> >>           XLogFileNameP(ThisTimeLineID, openLogSegNo),
> >>           startoffset)));
> > where XLogFileNameP calls palloc within, and it is within a
> > critical section there. So it ends with assertion failure hiding
> > the PANIC message. We should use XLogFileName instead. The
> > problem has existed at least since 9.3. The code is frequently
> > revised so the patch needed to vary into four files.
> 
> Hmm, that's rather annoying, it sure would be handy if we could do
> small palloc()s like this in error messages safely.
> 
> I wonder if we could switch to ErrorContext in errstart()? That way,
> any small allocations in the ereport() arguments, like what
> XLogFileNameP() does, would be allocated in ErrorContext.

It already controlling error context per-recursion-level basis
but it doesn't work for the top-level errmsg(). I'm not sure the
basis of edata->assoc_context, it seems always set to
ErrorContext.

As a PoC, just moving to and restore from ErrorContext at the top
level seems working fine. (The first attached and it changes only
ereport.)

# The second is less invasive version of the previous patch..

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 16531f7a0f..0e4877240f 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -154,6 +154,8 @@ static bool saved_timeval_set = false;
 static char formatted_start_time[FORMATTED_TS_LEN];
 static char formatted_log_time[FORMATTED_TS_LEN];
 
+/* Store memory context to restore after error reporting */
+MemoryContext    elog_oldcontext = NULL;
 
 /* Macro for checking errordata_stack_depth is reasonable */
 #define CHECK_STACK_DEPTH() \
@@ -396,8 +398,11 @@ errstart(int elevel, const char *filename, int lineno,
      * Any allocations for this error state level should go into ErrorContext
      */
     edata->assoc_context = ErrorContext;
-
     recursion_depth--;
+
+    /* Set memory context to ErrorContext if this is the toplevel */
+    if (recursion_depth == 0)
+        elog_oldcontext = MemoryContextSwitchTo(ErrorContext);
     return true;
 }
 
@@ -414,19 +419,12 @@ errfinish(int dummy,...)
 {
     ErrorData  *edata = &errordata[errordata_stack_depth];
     int            elevel;
-    MemoryContext oldcontext;
     ErrorContextCallback *econtext;
 
     recursion_depth++;
     CHECK_STACK_DEPTH();
     elevel = edata->elevel;
 
-    /*
-     * Do processing in ErrorContext, which we hope has enough reserved space
-     * to report an error.
-     */
-    oldcontext = MemoryContextSwitchTo(ErrorContext);
-
     /*
      * Call any context callback functions.  Errors occurring in callback
      * functions will be treated as recursive errors --- this ensures we will
@@ -509,9 +507,12 @@ errfinish(int dummy,...)
     errordata_stack_depth--;
 
     /* Exit error-handling context */
-    MemoryContextSwitchTo(oldcontext);
     recursion_depth--;
 
+    /* Restore memory context if this is the top-level */
+    if (recursion_depth == 0 && elog_oldcontext)
+        MemoryContextSwitchTo(elog_oldcontext);
+
     /*
      * Perform error recovery action as specified by elevel.
      */
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..9cd7a106ba 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2477,7 +2477,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                     ereport(PANIC,
                             (errcode_for_file_access(),
                              errmsg("could not seek in log file %s to offset %u: %m",
-                                    XLogFileNameP(ThisTimeLineID, openLogSegNo),
+                                    XLogFileNameEP(ThisTimeLineID, openLogSegNo),
                                     startoffset)));
                 openLogOff = startoffset;
             }
@@ -2500,7 +2500,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                             (errcode_for_file_access(),
                              errmsg("could not write to log file %s "
                                     "at offset %u, length %zu: %m",
-                                    XLogFileNameP(ThisTimeLineID, openLogSegNo),
+                                    XLogFileNameEP(ThisTimeLineID, openLogSegNo),
                                     openLogOff, nbytes)));
                 }
                 nleft -= written;
@@ -3743,7 +3743,8 @@ XLogFileClose(void)
         ereport(PANIC,
                 (errcode_for_file_access(),
                  errmsg("could not close log file %s: %m",
-                        XLogFileNameP(ThisTimeLineID, openLogSegNo))));
+                        XLogFileNameEP(ThisTimeLineID, openLogSegNo))));
+
     openLogFile = -1;
 }
 
@@ -10160,7 +10161,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
                 ereport(PANIC,
                         (errcode_for_file_access(),
                          errmsg("could not fsync log file %s: %m",
-                                XLogFileNameP(ThisTimeLineID, segno))));
+                                XLogFileNameEP(ThisTimeLineID, segno))));
             break;
 #ifdef HAVE_FSYNC_WRITETHROUGH
         case SYNC_METHOD_FSYNC_WRITETHROUGH:
@@ -10168,7 +10169,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
                 ereport(PANIC,
                         (errcode_for_file_access(),
                          errmsg("could not fsync write-through log file %s: %m",
-                                XLogFileNameP(ThisTimeLineID, segno))));
+                                XLogFileNameEP(ThisTimeLineID, segno))));
             break;
 #endif
 #ifdef HAVE_FDATASYNC
@@ -10177,7 +10178,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
                 ereport(PANIC,
                         (errcode_for_file_access(),
                          errmsg("could not fdatasync log file %s: %m",
-                                XLogFileNameP(ThisTimeLineID, segno))));
+                                XLogFileNameEP(ThisTimeLineID, segno))));
             break;
 #endif
         case SYNC_METHOD_OPEN:
@@ -10202,6 +10203,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
     return result;
 }
 
+/*
+ * Do the same thing with XLogFileNameP but using ErrorContext.
+ *
+ * This is intended to be used in the parameters of ereport invoked in a
+ * critical section.
+ */
+char *
+XLogFileNameEP(TimeLineID tli, XLogSegNo segno)
+{
+    char       *result = MemoryContextAlloc(ErrorContext, MAXFNAMELEN);
+
+    XLogFileName(result, tli, segno, wal_segment_size);
+    return result;
+}
+
 /*
  * do_pg_start_backup is the workhorse of the user-visible pg_start_backup()
  * function. It creates the necessary starting checkpoint and constructs the
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d775..e0ec1c0c91 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -253,6 +253,7 @@ extern void SetRecoveryPause(bool recoveryPause);
 extern TimestampTz GetLatestXTime(void);
 extern TimestampTz GetCurrentChunkReplayStartTime(void);
 extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
+extern char *XLogFileNameEP(TimeLineID tli, XLogSegNo segno);
 
 extern void UpdateControlFile(void);
 extern uint64 GetSystemIdentifier(void);

pgsql-hackers by date:

Previous
From: Ashwin Agrawal
Date:
Subject: Re: Keeping temporary tables in shared buffers
Next
From: Ashwin Agrawal
Date:
Subject: Avoiding Tablespace path collision for primary and standby