From 0d1f0f67557679cb20d53096d12fa22de7760fe5 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Fri, 28 Feb 2025 17:06:01 +0300
Subject: [PATCH v4] Process sync requests incrementally in AbsorbSyncRequests

If the number of sync requests is big enough, the palloc() call in
AbsorbSyncRequests() will attempt to allocate more than 1 Gb of memory,
resulting in failure. This will lead to an infinite loop in the checkpointer
process.

So, we process requests incrementally with a buffer of not more than 1 Gb on
each step.
---
 src/backend/postmaster/checkpointer.c | 63 +++++++++++++++++----------
 1 file changed, 40 insertions(+), 23 deletions(-)

diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 7acbbd3e26..de2e99844b 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -1324,39 +1324,56 @@ AbsorbSyncRequests(void)
 	CheckpointerRequest *requests = NULL;
 	CheckpointerRequest *request;
 	int			n;
+	bool		loop;
 
 	if (!AmCheckpointerProcess())
 		return;
 
-	LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
-
-	/*
-	 * We try to avoid holding the lock for a long time by copying the request
-	 * array, and processing the requests after releasing the lock.
-	 *
-	 * Once we have cleared the requests from shared memory, we have to PANIC
-	 * if we then fail to absorb them (eg, because our hashtable runs out of
-	 * memory).  This is because the system cannot run safely if we are unable
-	 * to fsync what we have been told to fsync.  Fortunately, the hashtable
-	 * is so small that the problem is quite unlikely to arise in practice.
-	 */
-	n = CheckpointerShmem->num_requests;
-	if (n > 0)
+	do
 	{
-		requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
-		memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
-	}
+		LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
 
-	START_CRIT_SECTION();
+		/*
+		 * We try to avoid holding the lock for a long time by copying the
+		 * request array, and processing the requests after releasing the lock.
+		 *
+		 * Once we have cleared the requests from shared memory, we have to
+		 * PANIC if we then fail to absorb them (eg, because our hashtable runs
+		 * out of memory).  This is because the system cannot run safely if we
+		 * are unable to fsync what we have been told to fsync.  Fortunately,
+		 * the hashtable is so small that the problem is quite unlikely to arise
+		 * in practice.
+		 *
+		 * Note: we could not palloc more than 1Gb of memory, thus make sure
+		 * that the maximum number of elements will fit in the requests buffer.
+		 */
+		n = Min(CheckpointerShmem->num_requests,
+				MaxAllocSize / sizeof(CheckpointerRequest));
+		if (n > 0)
+		{
+			if (!requests)
+				requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
+			memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
 
-	CheckpointerShmem->num_requests = 0;
+			CheckpointerShmem->num_requests -= n;
 
-	LWLockRelease(CheckpointerCommLock);
+			memmove(CheckpointerShmem->requests,
+					CheckpointerShmem->requests + n,
+					CheckpointerShmem->num_requests * sizeof(CheckpointerRequest));
+		}
+
+		START_CRIT_SECTION();
+
+		/* Are there any requests in the queue? If so, keep going. */
+		loop = CheckpointerShmem->num_requests != 0;
+
+		LWLockRelease(CheckpointerCommLock);
 
-	for (request = requests; n > 0; request++, n--)
-		RememberSyncRequest(&request->ftag, request->type);
+		for (request = requests; n > 0; request++, n--)
+			RememberSyncRequest(&request->ftag, request->type);
 
-	END_CRIT_SECTION();
+		END_CRIT_SECTION();
+	} while (loop);
 
 	if (requests)
 		pfree(requests);
-- 
2.48.1