From 51d53b166df7c8eaebe49756e24088c16764807b Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Sun, 22 Mar 2026 06:53:25 -0400
Subject: [PATCH v5 3/5] Fix init_archive_reader to not depend on cur_file.

init_archive_reader() relied on privateInfo->cur_file to track which
WAL segment was being read, but cur_file can become NULL if a member
trailer is processed during a read_archive_file() call.  This could
cause unreproducible "could not find WAL in archive" failures,
particularly with compressed archives where all the WAL data fits
in a small number of compressed bytes.

Fix by scanning the hash table after each read to find any cached
WAL segment with sufficient data, instead of depending on cur_file.
Also reduce the minimum data requirement from XLOG_BLCKSZ to
sizeof(XLogLongPageHeaderData), since we only need the long page
header to extract the segment size.

Add a safety comment on cur_file in pg_waldump.h to document that
it can change during a single read_archive_file() call.

Author: Tom Lane <tgl@sss.pgh.pa.us>

Discussion: https://postgr.es/m/2178517.1774064942@sss.pgh.pa.us
---
 src/bin/pg_waldump/archive_waldump.c | 22 +++++++++++++++++-----
 src/bin/pg_waldump/pg_waldump.h      |  9 ++++++++-
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/bin/pg_waldump/archive_waldump.c b/src/bin/pg_waldump/archive_waldump.c
index cd092a057ef..3fce2183099 100644
--- a/src/bin/pg_waldump/archive_waldump.c
+++ b/src/bin/pg_waldump/archive_waldump.c
@@ -173,17 +173,29 @@ init_archive_reader(XLogDumpPrivate *privateInfo,
 	privateInfo->archive_wal_htab = ArchivedWAL_create(8, NULL);
 
 	/*
-	 * Read until we have at least one full WAL page (XLOG_BLCKSZ bytes) from
-	 * the first WAL segment in the archive so we can extract the WAL segment
-	 * size from the long page header.
+	 * Read until we have at least one WAL segment with enough data to extract
+	 * the WAL segment size from the long page header.
+	 *
+	 * We must not rely on cur_file here, because it can become NULL if a
+	 * member trailer is processed during a read_archive_file() call. Instead,
+	 * scan the hash table after each read to find any entry with sufficient
+	 * data.
 	 */
-	while (entry == NULL || entry->buf->len < XLOG_BLCKSZ)
+	while (entry == NULL)
 	{
+		ArchivedWAL_iterator iter;
+
 		if (!read_archive_file(privateInfo, XLOG_BLCKSZ))
 			pg_fatal("could not find WAL in archive \"%s\"",
 					 privateInfo->archive_name);
 
-		entry = privateInfo->cur_file;
+		ArchivedWAL_start_iterate(privateInfo->archive_wal_htab, &iter);
+		while ((entry = ArchivedWAL_iterate(privateInfo->archive_wal_htab,
+											&iter)) != NULL)
+		{
+			if (entry->read_len >= sizeof(XLogLongPageHeaderData))
+				break;
+		}
 	}
 
 	/* Extract the WAL segment size from the long page header */
diff --git a/src/bin/pg_waldump/pg_waldump.h b/src/bin/pg_waldump/pg_waldump.h
index cde7c6ca3f2..ca0dfd97168 100644
--- a/src/bin/pg_waldump/pg_waldump.h
+++ b/src/bin/pg_waldump/pg_waldump.h
@@ -44,7 +44,14 @@ typedef struct XLogDumpPrivate
 	Size		archive_read_buf_size;
 #endif
 
-	/* What the archive streamer is currently reading */
+	/*
+	 * The buffer for the WAL file the archive streamer is currently reading,
+	 * or NULL if none.  It is quite risky to examine this anywhere except in
+	 * astreamer_waldump_content(), since it can change multiple times during
+	 * a single read_archive_file() call.  However, it is safe to assume that
+	 * if cur_file is different from a particular ArchivedWALFile of interest,
+	 * then the archive streamer has finished reading that file.
+	 */
 	struct ArchivedWALFile *cur_file;
 
 	/*
-- 
2.43.0