From 7bf89e41ee440bf7515d49058abdc3c6bc639828 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Wed, 15 Oct 2025 10:54:19 -0400 Subject: [PATCH v8 2/7] Split FlushBuffer() into two parts Before adding write combining to write a batch of blocks when flushing dirty buffers, refactor FlushBuffer() into the preparatory step and actual buffer flushing step. This separation procides symmetry with future code for batch flushing which necessarily separates these steps, as it must prepare multiple buffers before flushing them together. These steps are moved into a new FlushBuffer() helper function, CleanVictimBuffer() which will contain both the batch flushing and single flush code in future commits. Author: Melanie Plageman Reviewed-by: Chao Li Reviewed-by: Nazir Bilal Yavuz Discussion: https://postgr.es/m/2FA0BAC7-5413-4ABD-94CA-4398FE77750D%40gmail.com Discussion: https://postgr.es/m/flat/CAAKRu_Yjn4mvN9NBxtmsCQSGwup45CoA4e05nhR7ADP-v0WCig%40mail.gmail.com --- src/backend/storage/buffer/bufmgr.c | 142 +++++++++++++++++++--------- 1 file changed, 98 insertions(+), 44 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 90c24b8d93d..235e261738b 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -533,6 +533,12 @@ static void FlushUnlockedBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context); static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context); +static bool PrepareFlushBuffer(BufferDesc *bufdesc, XLogRecPtr *lsn); +static void DoFlushBuffer(BufferDesc *buf, SMgrRelation reln, + IOObject io_object, IOContext io_context, + XLogRecPtr buffer_lsn); +static void CleanVictimBuffer(BufferDesc *bufdesc, bool from_ring, + IOContext io_context); static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, @@ -2394,12 +2400,8 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) continue; } - /* OK, do the I/O */ - FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context); - LWLockRelease(content_lock); - - ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context, - &buf_hdr->tag); + /* Content lock is released inside CleanVictimBuffer */ + CleanVictimBuffer(buf_hdr, from_ring, io_context); } @@ -4270,54 +4272,64 @@ static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context) { - XLogRecPtr recptr; - ErrorContextCallback errcallback; - instr_time io_start; - Block bufBlock; - char *bufToWrite; - uint32 buf_state; + XLogRecPtr lsn; - /* - * Try to start an I/O operation. If StartBufferIO returns false, then - * someone else flushed the buffer before we could, so we need not do - * anything. - */ - if (!StartBufferIO(buf, false, false)) - return; + if (PrepareFlushBuffer(buf, &lsn)) + DoFlushBuffer(buf, reln, io_object, io_context, lsn); +} - /* Setup error traceback support for ereport() */ - errcallback.callback = shared_buffer_write_error_callback; - errcallback.arg = buf; - errcallback.previous = error_context_stack; - error_context_stack = &errcallback; +/* + * Prepare and write out a dirty victim buffer. + * + * Buffer must be pinned, the content lock must be held exclusively, and the + * buffer header spinlock must not be held. The exclusive lock is released and + * the buffer is returned pinned but not locked. + * + * bufdesc may be modified. + */ +static void +CleanVictimBuffer(BufferDesc *bufdesc, + bool from_ring, IOContext io_context) +{ + XLogRecPtr max_lsn = InvalidXLogRecPtr; + LWLock *content_lock; - /* Find smgr relation for buffer */ - if (reln == NULL) - reln = smgropen(BufTagGetRelFileLocator(&buf->tag), INVALID_PROC_NUMBER); + /* Set up this victim buffer to be flushed */ + if (!PrepareFlushBuffer(bufdesc, &max_lsn)) + return; - TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag), - buf->tag.blockNum, - reln->smgr_rlocator.locator.spcOid, - reln->smgr_rlocator.locator.dbOid, - reln->smgr_rlocator.locator.relNumber); + DoFlushBuffer(bufdesc, NULL, IOOBJECT_RELATION, io_context, max_lsn); + content_lock = BufferDescriptorGetContentLock(bufdesc); + LWLockRelease(content_lock); + ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context, + &bufdesc->tag); +} - buf_state = LockBufHdr(buf); +/* + * Prepare the buffer with bufdesc for writing. Returns true if the buffer + * acutally needs writing and false otherwise. lsn returns the buffer's LSN if + * the table is logged. + */ +static bool +PrepareFlushBuffer(BufferDesc *bufdesc, XLogRecPtr *lsn) +{ + uint32 buf_state; /* - * Run PageGetLSN while holding header lock, since we don't have the - * buffer locked exclusively in all cases. + * Try to start an I/O operation. If StartBufferIO returns false, then + * someone else flushed the buffer before we could, so we need not do + * anything. */ - recptr = BufferGetLSN(buf); + if (!StartBufferIO(bufdesc, false, false)) + return false; - /* To check if block content changes while flushing. - vadim 01/17/97 */ - UnlockBufHdrExt(buf, buf_state, - 0, BM_JUST_DIRTIED, - 0); + *lsn = InvalidXLogRecPtr; + buf_state = LockBufHdr(bufdesc); /* - * Force XLOG flush up to buffer's LSN. This implements the basic WAL - * rule that log updates must hit disk before any of the data-file changes - * they describe do. + * Record the buffer's LSN. We will force XLOG flush up to buffer's LSN. + * This implements the basic WAL rule that log updates must hit disk + * before any of the data-file changes they describe do. * * However, this rule does not apply to unlogged relations, which will be * lost after a crash anyway. Most unlogged relation pages do not bear @@ -4330,9 +4342,51 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, * happen, attempting to flush WAL through that location would fail, with * disastrous system-wide consequences. To make sure that can't happen, * skip the flush if the buffer isn't permanent. + * + * We must hold the buffer header lock when examining the page LSN since + * don't have buffer exclusively locked in all cases. */ if (buf_state & BM_PERMANENT) - XLogFlush(recptr); + *lsn = BufferGetLSN(bufdesc); + + /* To check if block content changes while flushing. - vadim 01/17/97 */ + UnlockBufHdrExt(bufdesc, buf_state, + 0, BM_JUST_DIRTIED, + 0); + return true; +} + +/* + * Actually do the write I/O to clean a buffer. buf and reln may be modified. + */ +static void +DoFlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, + IOContext io_context, XLogRecPtr buffer_lsn) +{ + ErrorContextCallback errcallback; + instr_time io_start; + Block bufBlock; + char *bufToWrite; + + /* Setup error traceback support for ereport() */ + errcallback.callback = shared_buffer_write_error_callback; + errcallback.arg = buf; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + + /* Find smgr relation for buffer */ + if (reln == NULL) + reln = smgropen(BufTagGetRelFileLocator(&buf->tag), INVALID_PROC_NUMBER); + + TRACE_POSTGRESQL_BUFFER_FLUSH_START(BufTagGetForkNum(&buf->tag), + buf->tag.blockNum, + reln->smgr_rlocator.locator.spcOid, + reln->smgr_rlocator.locator.dbOid, + reln->smgr_rlocator.locator.relNumber); + + /* Force XLOG flush up to buffer's LSN */ + if (XLogRecPtrIsValid(buffer_lsn)) + XLogFlush(buffer_lsn); /* * Now it's safe to write the buffer to disk. Note that no one else should -- 2.43.0