Thread: XLogWrite uses palloc within a critical section

XLogWrite uses palloc within a critical section

From
Kyotaro HORIGUCHI
Date:
Hello.

I happened to see the following in XLogWrite.

>  ereport(PANIC,
>      (errcode_for_file_access(),
>       errmsg("could not seek in log file %s to offset %u: %m",
>          XLogFileNameP(ThisTimeLineID, openLogSegNo),
>          startoffset)));

where XLogFileNameP calls palloc within, and it is within a
critical section there. So it ends with assertion failure hiding
the PANIC message. We should use XLogFileName instead. The
problem has existed at least since 9.3. The code is frequently
revised so the patch needed to vary into four files.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 1641,1651 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
              if (openLogOff != startoffset)
              {
                  if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
                      ereport(PANIC,
                              (errcode_for_file_access(),
                       errmsg("could not seek in log file %s to offset %u: %m",
!                             XLogFileNameP(ThisTimeLineID, openLogSegNo),
!                             startoffset)));
                  openLogOff = startoffset;
              }
  
--- 1641,1660 ----
              if (openLogOff != startoffset)
              {
                  if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
+                 {
+                     /*
+                      * We are within a critical section here so we cannot use
+                      * XLogFileNameP. Use this buffer instead.
+                      */
+                     char        xlogfname[MAXFNAMELEN];
+  
+                     XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
                      ereport(PANIC,
                              (errcode_for_file_access(),
                       errmsg("could not seek in log file %s to offset %u: %m",
!                             xlogfname, startoffset)));
!                 }
! 
                  openLogOff = startoffset;
              }
  
***************
*** 1655,1660 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
--- 1664,1677 ----
              errno = 0;
              if (write(openLogFile, from, nbytes) != nbytes)
              {
+                 /*
+                  * We are within a critical section here so we cannot use
+                  * XLogFileNameP. Use this buffer instead.
+                  */
+                 char        xlogfname[MAXFNAMELEN];
+ 
+                 XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
+ 
                  /* if write didn't set errno, assume no disk space */
                  if (errno == 0)
                      errno = ENOSPC;
***************
*** 1662,1668 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
                          (errcode_for_file_access(),
                           errmsg("could not write to log file %s "
                                  "at offset %u, length %lu: %m",
!                                 XLogFileNameP(ThisTimeLineID, openLogSegNo),
                                  openLogOff, (unsigned long) nbytes)));
              }
  
--- 1679,1685 ----
                          (errcode_for_file_access(),
                           errmsg("could not write to log file %s "
                                  "at offset %u, length %lu: %m",
!                                 xlogfname,
                                  openLogOff, (unsigned long) nbytes)));
              }
  
***************
*** 2802,2811 **** XLogFileClose(void)
  #endif
  
      if (close(openLogFile))
          ereport(PANIC,
                  (errcode_for_file_access(),
!                  errmsg("could not close log file %s: %m",
!                         XLogFileNameP(ThisTimeLineID, openLogSegNo))));
      openLogFile = -1;
  }
  
--- 2819,2837 ----
  #endif
  
      if (close(openLogFile))
+     {
+         /*
+          * We are within a critical section here so we cannot use
+          * XLogFileNameP. Use this buffer instead.
+          */
+         char        xlogfname[MAXFNAMELEN];
+ 
+         XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
          ereport(PANIC,
                  (errcode_for_file_access(),
!                  errmsg("could not close log file %s: %m", xlogfname)));
!     }
! 
      openLogFile = -1;
  }
  
***************
*** 8607,8612 **** assign_xlog_sync_method(int new_sync_method, void *extra)
--- 8633,8646 ----
  void
  issue_xlog_fsync(int fd, XLogSegNo segno)
  {
+     /*
+      * We are within a critical section here so we cannot use let
+      * XLogFileNameP. Use this buffer instead.
+      */
+     char        xlogfname[MAXFNAMELEN];
+ 
+     XLogFileName(xlogfname, ThisTimeLineID, segno);
+ 
      switch (sync_method)
      {
          case SYNC_METHOD_FSYNC:
***************
*** 8614,8620 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #ifdef HAVE_FSYNC_WRITETHROUGH
          case SYNC_METHOD_FSYNC_WRITETHROUGH:
--- 8648,8654 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync log file %s: %m",
!                                 xlogfname)));
              break;
  #ifdef HAVE_FSYNC_WRITETHROUGH
          case SYNC_METHOD_FSYNC_WRITETHROUGH:
***************
*** 8622,8628 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                        errmsg("could not fsync write-through log file %s: %m",
!                              XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #endif
  #ifdef HAVE_FDATASYNC
--- 8656,8662 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                        errmsg("could not fsync write-through log file %s: %m",
!                              xlogfname)));
              break;
  #endif
  #ifdef HAVE_FDATASYNC
***************
*** 8631,8637 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fdatasync log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #endif
          case SYNC_METHOD_OPEN:
--- 8665,8671 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fdatasync log file %s: %m",
!                                 xlogfname)));
              break;
  #endif
          case SYNC_METHOD_OPEN:
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 2463,2473 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
              if (openLogOff != startoffset)
              {
                  if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
                      ereport(PANIC,
                              (errcode_for_file_access(),
                       errmsg("could not seek in log file %s to offset %u: %m",
!                             XLogFileNameP(ThisTimeLineID, openLogSegNo),
!                             startoffset)));
                  openLogOff = startoffset;
              }
  
--- 2463,2482 ----
              if (openLogOff != startoffset)
              {
                  if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
+                 {
+                     /*
+                      * We are within a critical section here so we cannot use
+                      * XLogFileNameP. Use this buffer instead.
+                      */
+                     char        xlogfname[MAXFNAMELEN];
+  
+                     XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
                      ereport(PANIC,
                              (errcode_for_file_access(),
                       errmsg("could not seek in log file %s to offset %u: %m",
!                             xlogfname, startoffset)));
!                 }
! 
                  openLogOff = startoffset;
              }
  
***************
*** 2481,2494 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                  written = write(openLogFile, from, nleft);
                  if (written <= 0)
                  {
                      if (errno == EINTR)
                          continue;
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not write to log file %s "
                                      "at offset %u, length %zu: %m",
!                                  XLogFileNameP(ThisTimeLineID, openLogSegNo),
!                                     openLogOff, nbytes)));
                  }
                  nleft -= written;
                  from += written;
--- 2490,2510 ----
                  written = write(openLogFile, from, nleft);
                  if (written <= 0)
                  {
+                     /*
+                      * We are within a critical section here so we cannot use
+                      * XLogFileNameP. Use this buffer instead.
+                      */
+                     char        xlogfname[MAXFNAMELEN];
+ 
                      if (errno == EINTR)
                          continue;
+ 
+                     XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not write to log file %s "
                                      "at offset %u, length %zu: %m",
!                                     xlogfname, openLogOff, nbytes)));
                  }
                  nleft -= written;
                  from += written;
***************
*** 3669,3678 **** XLogFileClose(void)
  #endif
  
      if (close(openLogFile))
          ereport(PANIC,
                  (errcode_for_file_access(),
!                  errmsg("could not close log file %s: %m",
!                         XLogFileNameP(ThisTimeLineID, openLogSegNo))));
      openLogFile = -1;
  }
  
--- 3685,3703 ----
  #endif
  
      if (close(openLogFile))
+     {
+         /*
+          * We are within a critical section here so we cannot use
+          * XLogFileNameP. Use this buffer instead.
+          */
+         char        xlogfname[MAXFNAMELEN];
+ 
+         XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
          ereport(PANIC,
                  (errcode_for_file_access(),
!                  errmsg("could not close log file %s: %m", xlogfname)));
!     }
! 
      openLogFile = -1;
  }
  
***************
*** 9799,9804 **** assign_xlog_sync_method(int new_sync_method, void *extra)
--- 9824,9837 ----
  void
  issue_xlog_fsync(int fd, XLogSegNo segno)
  {
+     /*
+      * We are within a critical section here so we cannot use let
+      * XLogFileNameP. Use this buffer instead.
+      */
+     char        xlogfname[MAXFNAMELEN];
+ 
+     XLogFileName(xlogfname, ThisTimeLineID, segno);
+ 
      switch (sync_method)
      {
          case SYNC_METHOD_FSYNC:
***************
*** 9806,9812 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #ifdef HAVE_FSYNC_WRITETHROUGH
          case SYNC_METHOD_FSYNC_WRITETHROUGH:
--- 9839,9845 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync log file %s: %m",
!                                 xlogfname)));
              break;
  #ifdef HAVE_FSYNC_WRITETHROUGH
          case SYNC_METHOD_FSYNC_WRITETHROUGH:
***************
*** 9814,9820 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                        errmsg("could not fsync write-through log file %s: %m",
!                              XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #endif
  #ifdef HAVE_FDATASYNC
--- 9847,9853 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                        errmsg("could not fsync write-through log file %s: %m",
!                              xlogfname)));
              break;
  #endif
  #ifdef HAVE_FDATASYNC
***************
*** 9823,9829 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fdatasync log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #endif
          case SYNC_METHOD_OPEN:
--- 9856,9862 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fdatasync log file %s: %m",
!                                 xlogfname)));
              break;
  #endif
          case SYNC_METHOD_OPEN:
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 2450,2460 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
              if (openLogOff != startoffset)
              {
                  if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not seek in log file %s to offset %u: %m",
!                                     XLogFileNameP(ThisTimeLineID, openLogSegNo),
!                                     startoffset)));
                  openLogOff = startoffset;
              }
  
--- 2450,2469 ----
              if (openLogOff != startoffset)
              {
                  if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
+                 {
+                     /*
+                      * We are within a critical section here so we cannot use
+                      * XLogFileNameP. Use this buffer instead.
+                      */
+                     char        xlogfname[MAXFNAMELEN];
+ 
+                     XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not seek in log file %s to offset %u: %m",
!                                     xlogfname, startoffset)));
!                 }
! 
                  openLogOff = startoffset;
              }
  
***************
*** 2470,2483 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                  pgstat_report_wait_end();
                  if (written <= 0)
                  {
                      if (errno == EINTR)
                          continue;
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not write to log file %s "
                                      "at offset %u, length %zu: %m",
!                                     XLogFileNameP(ThisTimeLineID, openLogSegNo),
!                                     openLogOff, nbytes)));
                  }
                  nleft -= written;
                  from += written;
--- 2479,2499 ----
                  pgstat_report_wait_end();
                  if (written <= 0)
                  {
+                     /*
+                      * We are within a critical section here so we cannot use
+                      * XLogFileNameP. Use this buffer instead.
+                      */
+                     char        xlogfname[MAXFNAMELEN];
+ 
                      if (errno == EINTR)
                          continue;
+ 
+                     XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not write to log file %s "
                                      "at offset %u, length %zu: %m",
!                                     xlogfname, openLogOff, nbytes)));
                  }
                  nleft -= written;
                  from += written;
***************
*** 3718,3727 **** XLogFileClose(void)
  #endif
  
      if (close(openLogFile))
          ereport(PANIC,
                  (errcode_for_file_access(),
!                  errmsg("could not close log file %s: %m",
!                         XLogFileNameP(ThisTimeLineID, openLogSegNo))));
      openLogFile = -1;
  }
  
--- 3734,3752 ----
  #endif
  
      if (close(openLogFile))
+     {
+         /*
+          * We are within a critical section here so we cannot use
+          * XLogFileNameP. Use this buffer instead.
+          */
+         char        xlogfname[MAXFNAMELEN];
+ 
+         XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo);
          ereport(PANIC,
                  (errcode_for_file_access(),
!                  errmsg("could not close log file %s: %m", xlogfname)));
!     }
! 
      openLogFile = -1;
  }
  
***************
*** 10117,10122 **** assign_xlog_sync_method(int new_sync_method, void *extra)
--- 10142,10155 ----
  void
  issue_xlog_fsync(int fd, XLogSegNo segno)
  {
+     /*
+      * We are within a critical section here so we cannot use let
+      * XLogFileNameP. Use this buffer instead.
+      */
+     char        xlogfname[MAXFNAMELEN];
+ 
+     XLogFileName(xlogfname, ThisTimeLineID, segno);
+ 
      switch (sync_method)
      {
          case SYNC_METHOD_FSYNC:
***************
*** 10124,10130 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #ifdef HAVE_FSYNC_WRITETHROUGH
          case SYNC_METHOD_FSYNC_WRITETHROUGH:
--- 10157,10163 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync log file %s: %m",
!                                 xlogfname)));
              break;
  #ifdef HAVE_FSYNC_WRITETHROUGH
          case SYNC_METHOD_FSYNC_WRITETHROUGH:
***************
*** 10132,10138 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync write-through log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #endif
  #ifdef HAVE_FDATASYNC
--- 10165,10171 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync write-through log file %s: %m",
!                                 xlogfname)));
              break;
  #endif
  #ifdef HAVE_FDATASYNC
***************
*** 10141,10147 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fdatasync log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #endif
          case SYNC_METHOD_OPEN:
--- 10174,10180 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fdatasync log file %s: %m",
!                                 xlogfname)));
              break;
  #endif
          case SYNC_METHOD_OPEN:
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 2474,2484 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
              if (openLogOff != startoffset)
              {
                  if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not seek in log file %s to offset %u: %m",
!                                     XLogFileNameP(ThisTimeLineID, openLogSegNo),
!                                     startoffset)));
                  openLogOff = startoffset;
              }
  
--- 2474,2494 ----
              if (openLogOff != startoffset)
              {
                  if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
+                 {
+                     /*
+                      * We are within a critical section here so we cannot use
+                      * XLogFileNameP. Use this buffer instead.
+                      */
+                     char        xlogfname[MAXFNAMELEN];
+ 
+                     XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo,
+                                  wal_segment_size);
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not seek in log file %s to offset %u: %m",
!                                     xlogfname, startoffset)));
!                 }
! 
                  openLogOff = startoffset;
              }
  
***************
*** 2494,2507 **** XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                  pgstat_report_wait_end();
                  if (written <= 0)
                  {
                      if (errno == EINTR)
                          continue;
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not write to log file %s "
                                      "at offset %u, length %zu: %m",
!                                     XLogFileNameP(ThisTimeLineID, openLogSegNo),
!                                     openLogOff, nbytes)));
                  }
                  nleft -= written;
                  from += written;
--- 2504,2525 ----
                  pgstat_report_wait_end();
                  if (written <= 0)
                  {
+                     /*
+                      * We are within a critical section here so we cannot use
+                      * XLogFileNameP. Use this buffer instead.
+                      */
+                     char        xlogfname[MAXFNAMELEN];
+ 
                      if (errno == EINTR)
                          continue;
+ 
+                     XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo,
+                                  wal_segment_size);
                      ereport(PANIC,
                              (errcode_for_file_access(),
                               errmsg("could not write to log file %s "
                                      "at offset %u, length %zu: %m",
!                                     xlogfname, openLogOff, nbytes)));
                  }
                  nleft -= written;
                  from += written;
***************
*** 3740,3749 **** XLogFileClose(void)
  #endif
  
      if (close(openLogFile))
          ereport(PANIC,
                  (errcode_for_file_access(),
!                  errmsg("could not close log file %s: %m",
!                         XLogFileNameP(ThisTimeLineID, openLogSegNo))));
      openLogFile = -1;
  }
  
--- 3758,3776 ----
  #endif
  
      if (close(openLogFile))
+     {
+         /*
+          * We are within a critical section here so we cannot use
+          * XLogFileNameP. Use this buffer instead.
+          */
+         char        xlogfname[MAXFNAMELEN];
+ 
+         XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo, wal_segment_size);
          ereport(PANIC,
                  (errcode_for_file_access(),
!                  errmsg("could not close log file %s: %m", xlogfname)));
!     }
! 
      openLogFile = -1;
  }
  
***************
*** 10153,10158 **** assign_xlog_sync_method(int new_sync_method, void *extra)
--- 10180,10193 ----
  void
  issue_xlog_fsync(int fd, XLogSegNo segno)
  {
+     /*
+      * We are within a critical section here so we cannot use let
+      * XLogFileNameP. Use this buffer instead.
+      */
+     char        xlogfname[MAXFNAMELEN];
+ 
+     XLogFileName(xlogfname, ThisTimeLineID, segno, wal_segment_size);
+ 
      switch (sync_method)
      {
          case SYNC_METHOD_FSYNC:
***************
*** 10160,10166 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #ifdef HAVE_FSYNC_WRITETHROUGH
          case SYNC_METHOD_FSYNC_WRITETHROUGH:
--- 10195,10201 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync log file %s: %m",
!                                 xlogfname)));
              break;
  #ifdef HAVE_FSYNC_WRITETHROUGH
          case SYNC_METHOD_FSYNC_WRITETHROUGH:
***************
*** 10168,10174 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync write-through log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #endif
  #ifdef HAVE_FDATASYNC
--- 10203,10209 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fsync write-through log file %s: %m",
!                                 xlogfname)));
              break;
  #endif
  #ifdef HAVE_FDATASYNC
***************
*** 10177,10183 **** issue_xlog_fsync(int fd, XLogSegNo segno)
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fdatasync log file %s: %m",
!                                 XLogFileNameP(ThisTimeLineID, segno))));
              break;
  #endif
          case SYNC_METHOD_OPEN:
--- 10212,10218 ----
                  ereport(PANIC,
                          (errcode_for_file_access(),
                           errmsg("could not fdatasync log file %s: %m",
!                                 xlogfname)));
              break;
  #endif
          case SYNC_METHOD_OPEN:

Re: XLogWrite uses palloc within a critical section

From
Heikki Linnakangas
Date:
On 25/05/18 07:45, Kyotaro HORIGUCHI wrote:
> Hello.
> 
> I happened to see the following in XLogWrite.
> 
>>   ereport(PANIC,
>>       (errcode_for_file_access(),
>>        errmsg("could not seek in log file %s to offset %u: %m",
>>           XLogFileNameP(ThisTimeLineID, openLogSegNo),
>>           startoffset)));
> 
> where XLogFileNameP calls palloc within, and it is within a
> critical section there. So it ends with assertion failure hiding
> the PANIC message. We should use XLogFileName instead. The
> problem has existed at least since 9.3. The code is frequently
> revised so the patch needed to vary into four files.

Hmm, that's rather annoying, it sure would be handy if we could do small 
palloc()s like this in error messages safely.

I wonder if we could switch to ErrorContext in errstart()? That way, any 
small allocations in the ereport() arguments, like what XLogFileNameP() 
does, would be allocated in ErrorContext.

- Heikki


Re: XLogWrite uses palloc within a critical section

From
Kyotaro HORIGUCHI
Date:
Thank you for the comment.

At Fri, 25 May 2018 09:05:21 +0300, Heikki Linnakangas <hlinnaka@iki.fi> wrote in
<466a3c6d-7986-8cb1-d908-e85aa6a090e0@iki.fi>
> On 25/05/18 07:45, Kyotaro HORIGUCHI wrote:
> > Hello.
> > I happened to see the following in XLogWrite.
> > 
> >>   ereport(PANIC,
> >>       (errcode_for_file_access(),
> >>        errmsg("could not seek in log file %s to offset %u: %m",
> >>           XLogFileNameP(ThisTimeLineID, openLogSegNo),
> >>           startoffset)));
> > where XLogFileNameP calls palloc within, and it is within a
> > critical section there. So it ends with assertion failure hiding
> > the PANIC message. We should use XLogFileName instead. The
> > problem has existed at least since 9.3. The code is frequently
> > revised so the patch needed to vary into four files.
> 
> Hmm, that's rather annoying, it sure would be handy if we could do
> small palloc()s like this in error messages safely.
> 
> I wonder if we could switch to ErrorContext in errstart()? That way,
> any small allocations in the ereport() arguments, like what
> XLogFileNameP() does, would be allocated in ErrorContext.

It already controlling error context per-recursion-level basis
but it doesn't work for the top-level errmsg(). I'm not sure the
basis of edata->assoc_context, it seems always set to
ErrorContext.

As a PoC, just moving to and restore from ErrorContext at the top
level seems working fine. (The first attached and it changes only
ereport.)

# The second is less invasive version of the previous patch..

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 16531f7a0f..0e4877240f 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -154,6 +154,8 @@ static bool saved_timeval_set = false;
 static char formatted_start_time[FORMATTED_TS_LEN];
 static char formatted_log_time[FORMATTED_TS_LEN];
 
+/* Store memory context to restore after error reporting */
+MemoryContext    elog_oldcontext = NULL;
 
 /* Macro for checking errordata_stack_depth is reasonable */
 #define CHECK_STACK_DEPTH() \
@@ -396,8 +398,11 @@ errstart(int elevel, const char *filename, int lineno,
      * Any allocations for this error state level should go into ErrorContext
      */
     edata->assoc_context = ErrorContext;
-
     recursion_depth--;
+
+    /* Set memory context to ErrorContext if this is the toplevel */
+    if (recursion_depth == 0)
+        elog_oldcontext = MemoryContextSwitchTo(ErrorContext);
     return true;
 }
 
@@ -414,19 +419,12 @@ errfinish(int dummy,...)
 {
     ErrorData  *edata = &errordata[errordata_stack_depth];
     int            elevel;
-    MemoryContext oldcontext;
     ErrorContextCallback *econtext;
 
     recursion_depth++;
     CHECK_STACK_DEPTH();
     elevel = edata->elevel;
 
-    /*
-     * Do processing in ErrorContext, which we hope has enough reserved space
-     * to report an error.
-     */
-    oldcontext = MemoryContextSwitchTo(ErrorContext);
-
     /*
      * Call any context callback functions.  Errors occurring in callback
      * functions will be treated as recursive errors --- this ensures we will
@@ -509,9 +507,12 @@ errfinish(int dummy,...)
     errordata_stack_depth--;
 
     /* Exit error-handling context */
-    MemoryContextSwitchTo(oldcontext);
     recursion_depth--;
 
+    /* Restore memory context if this is the top-level */
+    if (recursion_depth == 0 && elog_oldcontext)
+        MemoryContextSwitchTo(elog_oldcontext);
+
     /*
      * Perform error recovery action as specified by elevel.
      */
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..9cd7a106ba 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2477,7 +2477,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                     ereport(PANIC,
                             (errcode_for_file_access(),
                              errmsg("could not seek in log file %s to offset %u: %m",
-                                    XLogFileNameP(ThisTimeLineID, openLogSegNo),
+                                    XLogFileNameEP(ThisTimeLineID, openLogSegNo),
                                     startoffset)));
                 openLogOff = startoffset;
             }
@@ -2500,7 +2500,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                             (errcode_for_file_access(),
                              errmsg("could not write to log file %s "
                                     "at offset %u, length %zu: %m",
-                                    XLogFileNameP(ThisTimeLineID, openLogSegNo),
+                                    XLogFileNameEP(ThisTimeLineID, openLogSegNo),
                                     openLogOff, nbytes)));
                 }
                 nleft -= written;
@@ -3743,7 +3743,8 @@ XLogFileClose(void)
         ereport(PANIC,
                 (errcode_for_file_access(),
                  errmsg("could not close log file %s: %m",
-                        XLogFileNameP(ThisTimeLineID, openLogSegNo))));
+                        XLogFileNameEP(ThisTimeLineID, openLogSegNo))));
+
     openLogFile = -1;
 }
 
@@ -10160,7 +10161,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
                 ereport(PANIC,
                         (errcode_for_file_access(),
                          errmsg("could not fsync log file %s: %m",
-                                XLogFileNameP(ThisTimeLineID, segno))));
+                                XLogFileNameEP(ThisTimeLineID, segno))));
             break;
 #ifdef HAVE_FSYNC_WRITETHROUGH
         case SYNC_METHOD_FSYNC_WRITETHROUGH:
@@ -10168,7 +10169,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
                 ereport(PANIC,
                         (errcode_for_file_access(),
                          errmsg("could not fsync write-through log file %s: %m",
-                                XLogFileNameP(ThisTimeLineID, segno))));
+                                XLogFileNameEP(ThisTimeLineID, segno))));
             break;
 #endif
 #ifdef HAVE_FDATASYNC
@@ -10177,7 +10178,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
                 ereport(PANIC,
                         (errcode_for_file_access(),
                          errmsg("could not fdatasync log file %s: %m",
-                                XLogFileNameP(ThisTimeLineID, segno))));
+                                XLogFileNameEP(ThisTimeLineID, segno))));
             break;
 #endif
         case SYNC_METHOD_OPEN:
@@ -10202,6 +10203,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
     return result;
 }
 
+/*
+ * Do the same thing with XLogFileNameP but using ErrorContext.
+ *
+ * This is intended to be used in the parameters of ereport invoked in a
+ * critical section.
+ */
+char *
+XLogFileNameEP(TimeLineID tli, XLogSegNo segno)
+{
+    char       *result = MemoryContextAlloc(ErrorContext, MAXFNAMELEN);
+
+    XLogFileName(result, tli, segno, wal_segment_size);
+    return result;
+}
+
 /*
  * do_pg_start_backup is the workhorse of the user-visible pg_start_backup()
  * function. It creates the necessary starting checkpoint and constructs the
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d775..e0ec1c0c91 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -253,6 +253,7 @@ extern void SetRecoveryPause(bool recoveryPause);
 extern TimestampTz GetLatestXTime(void);
 extern TimestampTz GetCurrentChunkReplayStartTime(void);
 extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
+extern char *XLogFileNameEP(TimeLineID tli, XLogSegNo segno);
 
 extern void UpdateControlFile(void);
 extern uint64 GetSystemIdentifier(void);