From b91d996c691077a7fa48718c933159136d650e77 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 29 Jun 2023 10:52:56 +1200 Subject: [PATCH 1/2] Improve ReadRecentBuffer() scalability. While testing a new potential use for ReadRecentBuffer(), Andres reported that it scales badly when called concurrently for the same buffer by many backends. Instead of a naive (but wrong) coding with PinBuffer(), it used the spinlock, so that it could be careful to pin only if the buffer was valid and holding the expected block, to avoid breaking invariants in eg GetVictimBuffer(). Unfortunately that made it less scalable than PinBuffer(), which uses compare-exchange instead. We can fix that by giving PinBuffer() a new skip_if_not_valid mode that doesn't pin invalid buffers. It might occasionally skip when it shouldn't due to the unlocked read of the header flags, but that's unlikely and perfectly acceptable for an opportunistic optimisation routine, and it can only succeed when it really should due to the compare-exchange loop. XXX This also fixes ReadRecentBuffer()'s failure to bump the usage count. Fix separately or back-patch this? Reported-by: Andres Freund Discussion: https://postgr.es/m/20230627020546.t6z4tntmj7wmjrfh%40awork3.anarazel.de --- src/backend/storage/buffer/bufmgr.c | 60 ++++++++++++----------------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 3c59bbd04e..0df9a1ec30 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -467,7 +467,8 @@ static BlockNumber ExtendBufferedRelShared(ExtendBufferedWhat eb, BlockNumber extend_upto, Buffer *buffers, uint32 *extended_by); -static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy); +static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, + bool skip_if_not_valid); static void PinBuffer_Locked(BufferDesc *buf); static void UnpinBuffer(BufferDesc *buf); static void BufferSync(int flags); @@ -635,7 +636,6 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN BufferDesc *bufHdr; BufferTag tag; uint32 buf_state; - bool have_private_ref; Assert(BufferIsValid(recent_buffer)); @@ -663,38 +663,17 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN else { bufHdr = GetBufferDescriptor(recent_buffer - 1); - have_private_ref = GetPrivateRefCount(recent_buffer) > 0; - /* - * Do we already have this buffer pinned with a private reference? If - * so, it must be valid and it is safe to check the tag without - * locking. If not, we have to lock the header first and then check. - */ - if (have_private_ref) - buf_state = pg_atomic_read_u32(&bufHdr->state); - else - buf_state = LockBufHdr(bufHdr); - - if ((buf_state & BM_VALID) && BufferTagsEqual(&tag, &bufHdr->tag)) + /* Is it still valid and holding the right tag? */ + if (PinBuffer(bufHdr, NULL, true)) { - /* - * It's now safe to pin the buffer. We can't pin first and ask - * questions later, because it might confuse code paths like - * InvalidateBuffer() if we pinned a random non-matching buffer. - */ - if (have_private_ref) - PinBuffer(bufHdr, NULL); /* bump pin count */ - else - PinBuffer_Locked(bufHdr); /* pin for first time */ - - pgBufferUsage.shared_blks_hit++; - - return true; + if (BufferTagsEqual(&tag, &bufHdr->tag)) + { + pgBufferUsage.shared_blks_hit++; + return true; + } + UnpinBuffer(bufHdr); } - - /* If we locked the header above, now unlock. */ - if (!have_private_ref) - UnlockBufHdr(bufHdr, buf_state); } return false; @@ -1252,7 +1231,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ buf = GetBufferDescriptor(existing_buf_id); - valid = PinBuffer(buf, strategy); + valid = PinBuffer(buf, strategy, false); /* Can release the mapping lock as soon as we've pinned it */ LWLockRelease(newPartitionLock); @@ -1330,7 +1309,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, existing_buf_hdr = GetBufferDescriptor(existing_buf_id); - valid = PinBuffer(existing_buf_hdr, strategy); + valid = PinBuffer(existing_buf_hdr, strategy, false); /* Can release the mapping lock as soon as we've pinned it */ LWLockRelease(newPartitionLock); @@ -1979,7 +1958,7 @@ ExtendBufferedRelShared(ExtendBufferedWhat eb, * Pin the existing buffer before releasing the partition lock, * preventing it from being evicted. */ - valid = PinBuffer(existing_hdr, strategy); + valid = PinBuffer(existing_hdr, strategy, false); LWLockRelease(partition_lock); @@ -2225,10 +2204,13 @@ ReleaseAndReadBuffer(Buffer buffer, * Note that ResourceOwnerEnlargeBuffers must have been done already. * * Returns true if buffer is BM_VALID, else false. This provision allows - * some callers to avoid an extra spinlock cycle. + * some callers to avoid an extra spinlock cycle. If skip_if_not_valid is + * true, then a false return value also indicates that the buffer was + * (recently) invalid and has not been pinned. */ static bool -PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) +PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy, + bool skip_if_not_valid) { Buffer b = BufferDescriptorGetBuffer(buf); bool result; @@ -2252,6 +2234,12 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) if (old_buf_state & BM_LOCKED) old_buf_state = WaitBufHdrUnlocked(buf); + if (unlikely(skip_if_not_valid && !(old_buf_state & BM_VALID))) + { + ForgetPrivateRefCountEntry(ref); + return false; + } + buf_state = old_buf_state; /* increase refcount */ -- 2.40.1