From 254d0eef0edcbc96c5cf23c6856bd0467dde4604 Mon Sep 17 00:00:00 2001 From: Shlok Kyal Date: Mon, 10 Jun 2024 11:29:00 +0530 Subject: [PATCH v2] Fix infinite loop in walsender during publisher shutdown When a publisher server is shutting down, there can be a case where the last WAL record at that point is a continuation record with its latter part not yet flushed. In such cases, the walsender attempts to read this unflushed part and ends up in an infinite loop. To prevent this situation, modify the logical WAL sender to consider itself caught up in this case. The records that are not fully flushed at this point are generally not significant, so simply ignoring them should not cause any issues. --- src/backend/replication/walsender.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 269914bce2..6ae5b99ecd 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -3083,8 +3083,15 @@ XLogSendLogical(void) else if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr) flushPtr = GetFlushRecPtr(NULL); - /* If EndRecPtr is still past our flushPtr, it means we caught up. */ - if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr) + /* + * If EndRecPtr is still past our flushPtr, it means we caught up. When + * the server is shutting down, the latter part of a continuation record + * may be missing. If got_STOPPING is true, assume we are caught up if the + * last record is missing its continuation part at flushPtr. + */ + if (logical_decoding_ctx->reader->EndRecPtr >= flushPtr || + (got_STOPPING && + logical_decoding_ctx->reader->missingContrecPtr == flushPtr)) WalSndCaughtUp = true; /* -- 2.34.1