From e1b47ed47508d2eeb3e3a2ca2424edb56eae4dec Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Thu, 27 Feb 2025 12:59:51 +0300 Subject: [PATCH v2] AbsorbSyncRequests incrementally, instead of doing it for all requests at once. --- src/backend/postmaster/checkpointer.c | 80 +++++++++++++++++++-------- 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 7acbbd3e26..bdb0de4176 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -1321,42 +1321,76 @@ CompactCheckpointerRequestQueue(void) void AbsorbSyncRequests(void) { - CheckpointerRequest *requests = NULL; - CheckpointerRequest *request; - int n; + CheckpointerRequest *requests = NULL; + int num_requests, + max_requests; if (!AmCheckpointerProcess()) return; LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); - + num_requests = CheckpointerShmem->num_requests; /* - * We try to avoid holding the lock for a long time by copying the request - * array, and processing the requests after releasing the lock. - * - * Once we have cleared the requests from shared memory, we have to PANIC - * if we then fail to absorb them (eg, because our hashtable runs out of - * memory). This is because the system cannot run safely if we are unable - * to fsync what we have been told to fsync. Fortunately, the hashtable - * is so small that the problem is quite unlikely to arise in practice. + * Note: this can be chosen arbitrarily, stick for 1Gb for now. */ - n = CheckpointerShmem->num_requests; - if (n > 0) + max_requests = MaxAllocSize / sizeof(CheckpointerRequest); + + for (int i = 0; i < num_requests; i += max_requests) { - requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest)); - memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest)); - } + Size n = (num_requests - i >= max_requests) ? max_requests : + num_requests - i; - START_CRIT_SECTION(); + if (!requests) + requests = palloc(n * sizeof(CheckpointerRequest)); - CheckpointerShmem->num_requests = 0; + memcpy(requests, CheckpointerShmem->requests + i, + n * sizeof(CheckpointerRequest)); - LWLockRelease(CheckpointerCommLock); + LWLockRelease(CheckpointerCommLock); + + START_CRIT_SECTION(); + + /* + * We try to avoid holding the lock for a long time by copying the + * request array, and processing the requests after releasing the lock. + * + * Once we have cleared the requests from shared memory, we have to + * PANIC if we then fail to absorb them (eg, because our hashtable runs + * out of memory). This is because the system cannot run safely if we + * are unable to fsync what we have been told to fsync. Fortunately, + * the hashtable is so small that the problem is quite unlikely to arise + * in practice. + */ + for (int j = 0; j < n; j++) + RememberSyncRequest(&requests[j].ftag, requests[j].type); - for (request = requests; n > 0; request++, n--) - RememberSyncRequest(&request->ftag, request->type); + END_CRIT_SECTION(); - END_CRIT_SECTION(); + LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); + } + + if (CheckpointerShmem->num_requests >= num_requests) + { + /* + * All the requests that were added to the queue between the release and + * the lock acquisition must be processed. + */ + Size n = CheckpointerShmem->num_requests - num_requests; + + memmove(CheckpointerShmem->requests, + CheckpointerShmem->requests + num_requests, + n * sizeof(CheckpointerRequest)); + CheckpointerShmem->num_requests = n; + } + else + { + /* + * CompactCheckpointerRequestQueue was called, found some duplicates and + * remove them. + */ + } + + LWLockRelease(CheckpointerCommLock); if (requests) pfree(requests); -- 2.48.1