From 51d53b166df7c8eaebe49756e24088c16764807b Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Sun, 22 Mar 2026 06:53:25 -0400 Subject: [PATCH v5 3/5] Fix init_archive_reader to not depend on cur_file. init_archive_reader() relied on privateInfo->cur_file to track which WAL segment was being read, but cur_file can become NULL if a member trailer is processed during a read_archive_file() call. This could cause unreproducible "could not find WAL in archive" failures, particularly with compressed archives where all the WAL data fits in a small number of compressed bytes. Fix by scanning the hash table after each read to find any cached WAL segment with sufficient data, instead of depending on cur_file. Also reduce the minimum data requirement from XLOG_BLCKSZ to sizeof(XLogLongPageHeaderData), since we only need the long page header to extract the segment size. Add a safety comment on cur_file in pg_waldump.h to document that it can change during a single read_archive_file() call. Author: Tom Lane Discussion: https://postgr.es/m/2178517.1774064942@sss.pgh.pa.us --- src/bin/pg_waldump/archive_waldump.c | 22 +++++++++++++++++----- src/bin/pg_waldump/pg_waldump.h | 9 ++++++++- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/bin/pg_waldump/archive_waldump.c b/src/bin/pg_waldump/archive_waldump.c index cd092a057ef..3fce2183099 100644 --- a/src/bin/pg_waldump/archive_waldump.c +++ b/src/bin/pg_waldump/archive_waldump.c @@ -173,17 +173,29 @@ init_archive_reader(XLogDumpPrivate *privateInfo, privateInfo->archive_wal_htab = ArchivedWAL_create(8, NULL); /* - * Read until we have at least one full WAL page (XLOG_BLCKSZ bytes) from - * the first WAL segment in the archive so we can extract the WAL segment - * size from the long page header. + * Read until we have at least one WAL segment with enough data to extract + * the WAL segment size from the long page header. + * + * We must not rely on cur_file here, because it can become NULL if a + * member trailer is processed during a read_archive_file() call. Instead, + * scan the hash table after each read to find any entry with sufficient + * data. */ - while (entry == NULL || entry->buf->len < XLOG_BLCKSZ) + while (entry == NULL) { + ArchivedWAL_iterator iter; + if (!read_archive_file(privateInfo, XLOG_BLCKSZ)) pg_fatal("could not find WAL in archive \"%s\"", privateInfo->archive_name); - entry = privateInfo->cur_file; + ArchivedWAL_start_iterate(privateInfo->archive_wal_htab, &iter); + while ((entry = ArchivedWAL_iterate(privateInfo->archive_wal_htab, + &iter)) != NULL) + { + if (entry->read_len >= sizeof(XLogLongPageHeaderData)) + break; + } } /* Extract the WAL segment size from the long page header */ diff --git a/src/bin/pg_waldump/pg_waldump.h b/src/bin/pg_waldump/pg_waldump.h index cde7c6ca3f2..ca0dfd97168 100644 --- a/src/bin/pg_waldump/pg_waldump.h +++ b/src/bin/pg_waldump/pg_waldump.h @@ -44,7 +44,14 @@ typedef struct XLogDumpPrivate Size archive_read_buf_size; #endif - /* What the archive streamer is currently reading */ + /* + * The buffer for the WAL file the archive streamer is currently reading, + * or NULL if none. It is quite risky to examine this anywhere except in + * astreamer_waldump_content(), since it can change multiple times during + * a single read_archive_file() call. However, it is safe to assume that + * if cur_file is different from a particular ArchivedWALFile of interest, + * then the archive streamer has finished reading that file. + */ struct ArchivedWALFile *cur_file; /* -- 2.43.0