From 17a5e2416006e885d0d0a7bada02e56c6bb40486 Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Thu, 27 Feb 2025 17:39:45 +0530 Subject: [PATCH 09/11] WIP: Support shrinking shared buffers When shrinking the shared buffers pool, each buffer in the area being shrunk needs to be flushed if it's dirty so as not to loose the changes to that buffer after shrinking. Also, each such buffer needs to be removed from the buffer mapping table so that backends do not access it after shrinking. This needs to be done before we remap the shared memory segments related to buffer pools. If a buffer being evicted is pinned, we raise a FATAL error. TODO: Ideally we should be just rolling back the buffer pool resizing operation and try it again. But we need infrastructure to do so. Ashutosh Bapat --- src/backend/port/sysv_shmem.c | 79 ++++++++++++++++--- src/backend/storage/buffer/buf_init.c | 5 +- .../utils/activity/wait_event_names.txt | 1 + src/include/storage/pg_shmem.h | 3 +- 4 files changed, 70 insertions(+), 18 deletions(-) diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c index 2b144d45cf0..f084a0747ff 100644 --- a/src/backend/port/sysv_shmem.c +++ b/src/backend/port/sysv_shmem.c @@ -933,14 +933,6 @@ AnonymousShmemResize(void) */ pending_pm_shmem_resize = false; - /* - * XXX: Currently only increasing of shared_buffers is supported. For - * decreasing something similar has to be done, but buffer blocks with - * data have to be drained first. - */ - if(NBuffersOld > NBuffers) - return false; - for(int i = 0; i < next_free_segment; i++) { /* Note that CalculateShmemSize indirectly depends on NBuffers */ @@ -998,8 +990,6 @@ AnonymousShmemResize(void) * reinitialize the new portion of buffer pool. Every other * process will wait on the shared barrier for that to finish, * since it's a part of the SHMEM_RESIZE_DONE phase. - * - * XXX: This is the right place for buffer eviction as well. */ ResizeBufferPool(NBuffersOld, true); @@ -1022,6 +1012,52 @@ AnonymousShmemResize(void) return true; } +/* + * When shrinking shared buffers pool, evict the buffers which will not be part + * of the shrunk buffer pool. + */ +static bool +EvictExtraBuffers() +{ + bool result = true; + + /* + * If the buffer being evicated is locked, this function will need to wait. + * This function should not be called from a Postmaster since it can not wait on a lock. + */ + Assert(IsUnderPostmaster); + + /* + * Let only one backend perform eviction. We could split the work across all + * the backends but that doesn't seem necessary. The first backend to acquire sets its own PID as the evictor PID so that other backends do not perform eviction. Any backend which can not take this lock already knows that some backend is evicting the buffers without looking at evictor_pid. All the backends which do not perform eviction still wait for this phase to finish and thus release lock before the next phase begins. Thus the same LWLock can be used to select a leader for each phase. TODO: This comment would better be placed at a place common to all phases. + */ + if (LWLockConditionalAcquire(ShmemResizeLock, LW_EXCLUSIVE)) + { + if (ShmemCtrl->evictor_pid == 0) + { + ShmemCtrl->evictor_pid = MyProcPid; + + /* + * TODO: Before evicting any buffer, we should check whether any of the + * buffers are pinned. If we find that a buffer is pinned after evicting + * most of them, that will impact performance since all those evicted + * buffers might need to be read again. + */ + for (Buffer b = NBuffers + 1; b <= NBuffersOld; b++) + { + if (!EvictUnpinnedBuffer(b)) + { + elog(WARNING, "could not remove buffer %u, it is pinned", b); + result = false; + } + } + } + LWLockRelease(ShmemResizeLock); + } + + return result; +} + /* * We are asked to resize shared memory. Do the resize and make sure to wait on * the provided barrier until all simultaneously participating backends finish @@ -1065,15 +1101,31 @@ ProcessBarrierShmemResize(Barrier *barrier) /* First phase means the resize has begun, SHMEM_RESIZE_START */ BarrierArriveAndWait(barrier, WAIT_EVENT_SHMEM_RESIZE_START); + /* + * Evict extra buffers when shrinking shared buffers. We need to do this + * while the memory for extra buffers is still mapped i.e. before remapping + * the shared memory segments to a smaller memory area. + */ + if (NBuffersOld > NBuffers) + { + /* + * TODO: If the buffer eviction fails for any reason, we should gracefully rollback the shared buffer resizing and try again. But the infrastructure to do so is not available right now. Hence just raise a FATAL so that the system restarts. + */ + if (!EvictExtraBuffers()) + elog(FATAL, "buffer eviction failed"); + + BarrierArriveAndWait(barrier, WAIT_EVENT_SHMEM_RESIZE_EVICT); + } + /* XXX: Split mremap and buffer reinitialization into two barrier phases */ AnonymousShmemResize(); /* The second phase means the resize has finished, SHMEM_RESIZE_DONE */ BarrierArriveAndWait(barrier, WAIT_EVENT_SHMEM_RESIZE_DONE); - /* Allow the last backend to reset the barrier */ + /* Allow the last backend to reset the control area. */ if (BarrierArriveAndDetach(barrier)) - ResetShmemBarrier(); + ResetShmemCtrl(); return true; } @@ -1518,7 +1570,8 @@ WaitOnShmemBarrier(int phase) } void -ResetShmemBarrier(void) +ResetShmemCtrl(void) { BarrierInit(&ShmemCtrl->Barrier, 0); + ShmemCtrl->evictor_pid = 0; } diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 248fbf1633b..d8139a899bb 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -119,6 +119,7 @@ BufferManagerShmemInit(void) /* Initialize with the currently known value */ pg_atomic_init_u32(&ShmemCtrl->NSharedBuffers, NBuffers); BarrierInit(&ShmemCtrl->Barrier, 0); + ShmemCtrl->evictor_pid = 0; } /* Align descriptors to a cacheline boundary. */ @@ -228,10 +229,6 @@ ResizeBufferPool(int NBuffersOld, bool initNew) int i; elog(DEBUG1, "Resizing buffer pool from %d to %d", NBuffersOld, NBuffers); - /* XXX: Only increasing of shared_buffers is supported in this function */ - if(NBuffersOld > NBuffers) - return; - /* Align descriptors to a cacheline boundary. */ BufferDescriptors = (BufferDescPadded *) ShmemInitStructInSegment("Buffer Descriptors", diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 4203c987edc..a4a1e855c48 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -155,6 +155,7 @@ REPLICATION_SLOT_DROP "Waiting for a replication slot to become inactive so it c RESTORE_COMMAND "Waiting for to complete." SAFE_SNAPSHOT "Waiting to obtain a valid snapshot for a READ ONLY DEFERRABLE transaction." SHMEM_RESIZE_START "Waiting for other backends to start resizing shared memory." +SHMEM_RESIZE_EVICT "Waiting for other backends to finish buffer evication phase." SHMEM_RESIZE_DONE "Waiting for other backends to finish resizing shared memory." SYNC_REP "Waiting for confirmation from a remote server during synchronous replication." WAL_BUFFER_INIT "Waiting on WAL buffer to be initialized." diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h index 3f103d708a5..3793f369313 100644 --- a/src/include/storage/pg_shmem.h +++ b/src/include/storage/pg_shmem.h @@ -68,6 +68,7 @@ extern PGDLLIMPORT ShmemSegment Segments[ANON_MAPPINGS]; typedef struct { pg_atomic_uint32 NSharedBuffers; + pid_t evictor_pid; Barrier Barrier; } ShmemControl; @@ -131,7 +132,7 @@ bool ProcessBarrierShmemResize(Barrier *barrier); void assign_shared_buffers(int newval, void *extra, bool *pending); void AdjustShmemSize(void); extern void WaitOnShmemBarrier(int phase); -extern void ResetShmemBarrier(void); +extern void ResetShmemCtrl(void); /* * To be able to dynamically resize largest parts of the data stored in shared -- 2.34.1