From 134f203907083f557075afda695b778af627b318 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 26 Sep 2023 15:23:37 +0900 Subject: [PATCH v5 3/4] Make WAL replay more robust on OOM failures This takes advantage of the new error facility for WAL readers, allowing WAL replay to loop when an out-of-memory happens when reading a record. This was the origin of potential data loss scenarios, making crash recovery more robust by acting the same way as a standby here: each time a record cannot be read because of an OOM, loop and try to read again the record. --- src/backend/access/transam/xlogrecovery.c | 75 ++++++++++++++++------- 1 file changed, 52 insertions(+), 23 deletions(-) diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 68100bfa4a..ed5ac06938 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -3067,29 +3067,50 @@ ReadRecord(XLogPrefetcher *xlogprefetcher, int emode, record = XLogPrefetcherReadRecord(xlogprefetcher, &errordata); if (record == NULL) { - /* - * When we find that WAL ends in an incomplete record, keep track - * of that record. After recovery is done, we'll write a record - * to indicate to downstream WAL readers that that portion is to - * be ignored. - * - * However, when ArchiveRecoveryRequested = true, we're going to - * switch to a new timeline at the end of recovery. We will only - * copy WAL over to the new timeline up to the end of the last - * complete record, so if we did this, we would later create an - * overwrite contrecord in the wrong place, breaking everything. - */ - if (!ArchiveRecoveryRequested && - !XLogRecPtrIsInvalid(xlogreader->abortedRecPtr)) + switch (errordata.code) { - abortedRecPtr = xlogreader->abortedRecPtr; - missingContrecPtr = xlogreader->missingContrecPtr; - } + case XLOG_READER_NO_ERROR: + /* Possible when XLogPageRead() has failed */ + Assert(!errordata.message); + /* FALLTHROUGH */ - if (readFile >= 0) - { - close(readFile); - readFile = -1; + case XLOG_READER_INVALID_DATA: + + /* + * When we find that WAL ends in an incomplete record, + * keep track of that record. After recovery is done, + * we'll write a record to indicate to downstream WAL + * readers that that portion is to be ignored. + * + * However, when ArchiveRecoveryRequested = true, we're + * going to switch to a new timeline at the end of + * recovery. We will only copy WAL over to the new + * timeline up to the end of the last complete record, so + * if we did this, we would later create an overwrite + * contrecord in the wrong place, breaking everything. + */ + if (!ArchiveRecoveryRequested && + !XLogRecPtrIsInvalid(xlogreader->abortedRecPtr)) + { + abortedRecPtr = xlogreader->abortedRecPtr; + missingContrecPtr = xlogreader->missingContrecPtr; + } + + if (readFile >= 0) + { + close(readFile); + readFile = -1; + } + break; + case XLOG_READER_OOM: + + /* + * If we failed because of an out-of-memory problem, just + * give up and retry recovery later. It may be posible + * that the WAL record to decode required a larger memory + * allocation than what the host can offer. + */ + break; } /* @@ -3147,9 +3168,12 @@ ReadRecord(XLogPrefetcher *xlogprefetcher, int emode, * WAL from the archive, even if pg_wal is completely empty, but * we'd have no idea how far we'd have to replay to reach * consistency. So err on the safe side and give up. + * + * It may be possible that the record was not decoded because of + * an out-of-memory failure. In this case, just loop. */ if (!InArchiveRecovery && ArchiveRecoveryRequested && - !fetching_ckpt) + !fetching_ckpt && errordata.code != XLOG_READER_OOM) { ereport(DEBUG1, (errmsg_internal("reached end of WAL in pg_wal, entering archive recovery"))); @@ -3173,9 +3197,14 @@ ReadRecord(XLogPrefetcher *xlogprefetcher, int emode, continue; } - /* In standby mode, loop back to retry. Otherwise, give up. */ + /* + * In standby mode or if the WAL record failed on an + * out-of-memory, loop back and retry. Otherwise, give up. + */ if (StandbyMode && !CheckForStandbyTrigger()) continue; + else if (errordata.code == XLOG_READER_OOM) + continue; else return NULL; } -- 2.42.0