Re: XLogWrite uses palloc within a critical section - Mailing list pgsql-hackers
From | Kyotaro HORIGUCHI |
---|---|
Subject | Re: XLogWrite uses palloc within a critical section |
Date | |
Msg-id | 20180525.165742.66009686.horiguchi.kyotaro@lab.ntt.co.jp Whole thread Raw |
In response to | Re: XLogWrite uses palloc within a critical section (Heikki Linnakangas <hlinnaka@iki.fi>) |
List | pgsql-hackers |
Thank you for the comment. At Fri, 25 May 2018 09:05:21 +0300, Heikki Linnakangas <hlinnaka@iki.fi> wrote in <466a3c6d-7986-8cb1-d908-e85aa6a090e0@iki.fi> > On 25/05/18 07:45, Kyotaro HORIGUCHI wrote: > > Hello. > > I happened to see the following in XLogWrite. > > > >> ereport(PANIC, > >> (errcode_for_file_access(), > >> errmsg("could not seek in log file %s to offset %u: %m", > >> XLogFileNameP(ThisTimeLineID, openLogSegNo), > >> startoffset))); > > where XLogFileNameP calls palloc within, and it is within a > > critical section there. So it ends with assertion failure hiding > > the PANIC message. We should use XLogFileName instead. The > > problem has existed at least since 9.3. The code is frequently > > revised so the patch needed to vary into four files. > > Hmm, that's rather annoying, it sure would be handy if we could do > small palloc()s like this in error messages safely. > > I wonder if we could switch to ErrorContext in errstart()? That way, > any small allocations in the ereport() arguments, like what > XLogFileNameP() does, would be allocated in ErrorContext. It already controlling error context per-recursion-level basis but it doesn't work for the top-level errmsg(). I'm not sure the basis of edata->assoc_context, it seems always set to ErrorContext. As a PoC, just moving to and restore from ErrorContext at the top level seems working fine. (The first attached and it changes only ereport.) # The second is less invasive version of the previous patch.. regards. -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 16531f7a0f..0e4877240f 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -154,6 +154,8 @@ static bool saved_timeval_set = false; static char formatted_start_time[FORMATTED_TS_LEN]; static char formatted_log_time[FORMATTED_TS_LEN]; +/* Store memory context to restore after error reporting */ +MemoryContext elog_oldcontext = NULL; /* Macro for checking errordata_stack_depth is reasonable */ #define CHECK_STACK_DEPTH() \ @@ -396,8 +398,11 @@ errstart(int elevel, const char *filename, int lineno, * Any allocations for this error state level should go into ErrorContext */ edata->assoc_context = ErrorContext; - recursion_depth--; + + /* Set memory context to ErrorContext if this is the toplevel */ + if (recursion_depth == 0) + elog_oldcontext = MemoryContextSwitchTo(ErrorContext); return true; } @@ -414,19 +419,12 @@ errfinish(int dummy,...) { ErrorData *edata = &errordata[errordata_stack_depth]; int elevel; - MemoryContext oldcontext; ErrorContextCallback *econtext; recursion_depth++; CHECK_STACK_DEPTH(); elevel = edata->elevel; - /* - * Do processing in ErrorContext, which we hope has enough reserved space - * to report an error. - */ - oldcontext = MemoryContextSwitchTo(ErrorContext); - /* * Call any context callback functions. Errors occurring in callback * functions will be treated as recursive errors --- this ensures we will @@ -509,9 +507,12 @@ errfinish(int dummy,...) errordata_stack_depth--; /* Exit error-handling context */ - MemoryContextSwitchTo(oldcontext); recursion_depth--; + /* Restore memory context if this is the top-level */ + if (recursion_depth == 0 && elog_oldcontext) + MemoryContextSwitchTo(elog_oldcontext); + /* * Perform error recovery action as specified by elevel. */ diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index adbd6a2126..9cd7a106ba 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2477,7 +2477,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) ereport(PANIC, (errcode_for_file_access(), errmsg("could not seek in log file %s to offset %u: %m", - XLogFileNameP(ThisTimeLineID, openLogSegNo), + XLogFileNameEP(ThisTimeLineID, openLogSegNo), startoffset))); openLogOff = startoffset; } @@ -2500,7 +2500,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) (errcode_for_file_access(), errmsg("could not write to log file %s " "at offset %u, length %zu: %m", - XLogFileNameP(ThisTimeLineID, openLogSegNo), + XLogFileNameEP(ThisTimeLineID, openLogSegNo), openLogOff, nbytes))); } nleft -= written; @@ -3743,7 +3743,8 @@ XLogFileClose(void) ereport(PANIC, (errcode_for_file_access(), errmsg("could not close log file %s: %m", - XLogFileNameP(ThisTimeLineID, openLogSegNo)))); + XLogFileNameEP(ThisTimeLineID, openLogSegNo)))); + openLogFile = -1; } @@ -10160,7 +10161,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno) ereport(PANIC, (errcode_for_file_access(), errmsg("could not fsync log file %s: %m", - XLogFileNameP(ThisTimeLineID, segno)))); + XLogFileNameEP(ThisTimeLineID, segno)))); break; #ifdef HAVE_FSYNC_WRITETHROUGH case SYNC_METHOD_FSYNC_WRITETHROUGH: @@ -10168,7 +10169,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno) ereport(PANIC, (errcode_for_file_access(), errmsg("could not fsync write-through log file %s: %m", - XLogFileNameP(ThisTimeLineID, segno)))); + XLogFileNameEP(ThisTimeLineID, segno)))); break; #endif #ifdef HAVE_FDATASYNC @@ -10177,7 +10178,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno) ereport(PANIC, (errcode_for_file_access(), errmsg("could not fdatasync log file %s: %m", - XLogFileNameP(ThisTimeLineID, segno)))); + XLogFileNameEP(ThisTimeLineID, segno)))); break; #endif case SYNC_METHOD_OPEN: @@ -10202,6 +10203,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno) return result; } +/* + * Do the same thing with XLogFileNameP but using ErrorContext. + * + * This is intended to be used in the parameters of ereport invoked in a + * critical section. + */ +char * +XLogFileNameEP(TimeLineID tli, XLogSegNo segno) +{ + char *result = MemoryContextAlloc(ErrorContext, MAXFNAMELEN); + + XLogFileName(result, tli, segno, wal_segment_size); + return result; +} + /* * do_pg_start_backup is the workhorse of the user-visible pg_start_backup() * function. It creates the necessary starting checkpoint and constructs the diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 421ba6d775..e0ec1c0c91 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -253,6 +253,7 @@ extern void SetRecoveryPause(bool recoveryPause); extern TimestampTz GetLatestXTime(void); extern TimestampTz GetCurrentChunkReplayStartTime(void); extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno); +extern char *XLogFileNameEP(TimeLineID tli, XLogSegNo segno); extern void UpdateControlFile(void); extern uint64 GetSystemIdentifier(void);
pgsql-hackers by date: