diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 6023c7d8c0..a4a43dff83 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -6847,10 +6847,10 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' - Controls whether detailed information is produced when the - startup process is resolving or waiting for recovery conflicts. - It logs only when it waits for longer than deadlock_timeout. - For the lock conflict case, it does not wait for deadlock_timeout to be reached. + Controls whether a log message is produced when the startup process + is waiting for recovery conflicts longer than + deadlock_timeout. This is useful in determining if + recovery conflicts prevents the recovery from applying WAL. diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 152c2292d8..648ced33eb 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -3809,7 +3809,7 @@ LockBufferForCleanup(Buffer buffer) BufferDesc *bufHdr; char *new_status = NULL; TimestampTz waitStart = 0; - bool logged; + bool logged_recovery_conflict = false; Assert(BufferIsPinned(buffer)); Assert(PinCountWaitBuf == NULL); @@ -3830,8 +3830,9 @@ LockBufferForCleanup(Buffer buffer) GetPrivateRefCount(buffer)); bufHdr = GetBufferDescriptor(buffer - 1); - waitStart = GetCurrentTimestamp(); - logged = false; + + if (log_recovery_conflict_waits) + waitStart = GetCurrentTimestamp(); for (;;) { @@ -3885,13 +3886,12 @@ LockBufferForCleanup(Buffer buffer) new_status[len] = '\0'; /* truncate off " waiting" */ } - if (log_recovery_conflict_waits && !logged - && TimestampDifferenceExceeds(waitStart, - GetCurrentTimestamp(), DeadlockTimeout)) + if (log_recovery_conflict_waits && !logged_recovery_conflict && + TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(), + DeadlockTimeout)) { - LogRecoveryConflict(NULL, - PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, true); - logged = true; + LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, NULL); + logged_recovery_conflict = true; } /* Publish the bufid that Startup process waits on */ diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 575ee79687..ffe67acea1 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -591,39 +591,3 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) errno = save_errno; } - -extern const char *get_procsignal_reason_desc(ProcSignalReason reason) - { - const char *reasonDesc = "unknown reason"; - - switch (reason) - { - case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: - reasonDesc = "buffer pin"; - break; - case PROCSIG_RECOVERY_CONFLICT_LOCK: - reasonDesc = "lock"; - break; - case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: - reasonDesc = "tablespace"; - break; - case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: - reasonDesc = "snapshot"; - break; - case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: - reasonDesc = "deadlock"; - break; - case PROCSIG_RECOVERY_CONFLICT_DATABASE: - reasonDesc = "database"; - break; - case PROCSIG_CATCHUP_INTERRUPT: - case PROCSIG_NOTIFY_INTERRUPT: - case PROCSIG_PARALLEL_MESSAGE: - case PROCSIG_WALSND_INIT_STOPPING: - case NUM_PROCSIGNALS: - break; - default: - break; - } - return reasonDesc; -} diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 7d4241a4e1..4cef839e64 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -50,6 +50,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason); static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts); static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks); +static const char *get_recovery_conflict_desc(ProcSignalReason reason); /* * Keep track of all the locks owned by a given transaction. @@ -217,49 +218,46 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info) } /* - * Log the recovery conflict + * Log the recovery conflict. + * + * This function also reports the details about the conflicted virtual transactions + * listed in waitlist, if not NULL. */ void -LogRecoveryConflict(VirtualTransactionId *waitlist, ProcSignalReason reason, bool waiting) +LogRecoveryConflict(ProcSignalReason reason, VirtualTransactionId *waitlist) { + if (waitlist) + { + VirtualTransactionId *vxids; + StringInfoData buf; + int nvxids = 0; - VirtualTransactionId *vxids; - StringInfoData buf; - int count; - - if (waitlist && waiting) { - vxids = waitlist; - count = 0; initStringInfo(&buf); + /* construct a string of list of the conflicted virtual transactions */ + vxids = waitlist; while (VirtualTransactionIdIsValid(*vxids)) { - count++; - if (count > 1) - appendStringInfo(&buf,", "); - - appendStringInfo(&buf,"%d/%u",vxids->backendId, - vxids->localTransactionId); + if (nvxids == 0) + appendStringInfo(&buf, "%d/%u", + vxids->backendId, vxids->localTransactionId); + else + appendStringInfo(&buf, ", %d/%u", + vxids->backendId, vxids->localTransactionId); + nvxids++; vxids++; } + ereport(LOG, - (errmsg("recovery is waiting recovery conflict on %s", - get_procsignal_reason_desc(reason)), - (errdetail_log_plural("Conflicting virtual transaction id: %s.", - "Conflicting virtual transaction ids: %s.", - count, buf.data)))); + (errmsg("%s", get_recovery_conflict_desc(reason)), + (errdetail_log_plural("Conflicting virtual transaction id: %s.", + "Conflicting virtual transaction ids: %s.", + nvxids, buf.data)))); pfree(buf.data); } - else if (!waitlist) { - if (!waiting) - ereport(LOG, - (errmsg("recovery is resolving recovery conflict on %s", - get_procsignal_reason_desc(reason)))); - else - ereport(LOG, - (errmsg("recovery is waiting recovery conflict on %s", - get_procsignal_reason_desc(reason)))); - } + else + ereport(LOG, + (errmsg("%s", get_recovery_conflict_desc(reason)))); } /* @@ -268,9 +266,10 @@ LogRecoveryConflict(VirtualTransactionId *waitlist, ProcSignalReason reason, boo * a specific rmgr. Here we just issue the orders to the procs. The procs * then throw the required error as instructed. * - * If report_waiting is true, "waiting" is reported in PS display if necessary. - * If the caller has already reported that, report_waiting should be false. - * Otherwise, "waiting" is reported twice unexpectedly. + * If report_waiting is true, "waiting" is reported in PS display if necessary + * and log the waits for the recovery conflict. If the caller has already reported + * that, report_waiting should be false. Otherwise, both are reported twice + * unexpectedly. */ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, @@ -279,7 +278,7 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, { TimestampTz waitStart = 0; char *new_status; - bool logged = false; + bool logged = false; /* Fast exit, to avoid a kernel call if there's no work to be done. */ if (!VirtualTransactionIdIsValid(*waitlist)) @@ -298,13 +297,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, while (!VirtualXactLock(*waitlist, false)) { /* Log the recovery conflict */ - if (log_recovery_conflict_waits && !logged - && TimestampDifferenceExceeds(waitStart, - GetCurrentTimestamp(), DeadlockTimeout)) + if (log_recovery_conflict_waits && !logged && report_waiting && + TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(), + DeadlockTimeout)) { - LogRecoveryConflict(waitlist, reason, report_waiting); + LogRecoveryConflict(reason, waitlist); logged = true; } + /* * Report via ps if we have been waiting for more than 500 msec * (should that be configurable?) @@ -427,10 +427,6 @@ ResolveRecoveryConflictWithDatabase(Oid dbid) * database has been removed. */ - /* Log the recovery conflict */ - if (log_recovery_conflict_waits) - LogRecoveryConflict(NULL, PROCSIG_RECOVERY_CONFLICT_DATABASE, false); - while (CountDBBackends(dbid) > 0) { CancelDBBackends(dbid, PROCSIG_RECOVERY_CONFLICT_DATABASE, true); @@ -1177,3 +1173,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs, nmsgs * sizeof(SharedInvalidationMessage)); XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS); } + +/* Return the description of recovery conflict */ +static const char * +get_recovery_conflict_desc(ProcSignalReason reason) +{ + const char *reasonDesc = _("unknown reason"); + + switch (reason) + { + case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + reasonDesc = _("recovery is waiting recovery conflict on buffer pin"); + break; + case PROCSIG_RECOVERY_CONFLICT_LOCK: + reasonDesc = _("recovery is waiting recovery conflict on lock"); + break; + case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + reasonDesc = _("recovery is waiting recovery conflict on tablespace"); + break; + case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + reasonDesc = _("recovery is waiting recovery conflict on snapshot"); + break; + case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + reasonDesc = _("recovery is waiting recovery conflict on buffer deadlock"); + break; + case PROCSIG_RECOVERY_CONFLICT_DATABASE: + reasonDesc = _("recovery is waiting recovery conflict on database"); + break; + default: + break; + } + + return reasonDesc; +} diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 8cbd3b39a3..0aefde61d7 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1069,7 +1069,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) PGPROC *proc; PGPROC *leader = MyProc->lockGroupLeader; int i; - bool logged; + bool logged_recovery_conflict = false; /* * If group locking is in use, locks held by members of my locking group @@ -1276,20 +1276,22 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) * timeout and updating the locallock table, but if we lose control to an * error, LockErrorCleanup will fix that up. */ - logged = false; do { if (InHotStandby) { /* Set a timer and wait for that or for the Lock to be granted */ - VirtualTransactionId *backends; - backends = GetLockConflicts(&locallock->tag.lock, AccessExclusiveLock, NULL); - if (log_recovery_conflict_waits && !logged) { - LogRecoveryConflict(backends, PROCSIG_RECOVERY_CONFLICT_LOCK, true); - logged = true; - } - ResolveRecoveryConflictWithLock(locallock->tag.lock); + + if (log_recovery_conflict_waits && !logged_recovery_conflict && + TimestampDifferenceExceeds(get_timeout_start_time(DEADLOCK_TIMEOUT), + GetCurrentTimestamp(), DeadlockTimeout)) + { + LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK, + GetLockConflicts(&locallock->tag.lock, + AccessExclusiveLock, NULL)); + logged_recovery_conflict = true; + } } else { diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 8e3b02f7be..642ce5201a 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -39,7 +39,7 @@ extern void CheckRecoveryConflictDeadlock(void); extern void StandbyDeadLockHandler(void); extern void StandbyTimeoutHandler(void); extern void StandbyLockTimeoutHandler(void); -extern void LogRecoveryConflict(VirtualTransactionId *waitlist, ProcSignalReason reason, bool waiting); +extern void LogRecoveryConflict(ProcSignalReason reason, VirtualTransactionId *waitlist); /* * Standby Rmgr (RM_STANDBY_ID)