From 2fda0987b2a38fda22d70ebd019dc102ddd0b11d Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Wed, 22 Apr 2026 03:50:25 +0000 Subject: [PATCH v1] Add test extension for WAL stats using WALReadFromBuffers Add a test module that collects per-resource-manager WAL record statistics by reading directly from in-memory WAL buffers via WALReadFromBuffers, avoiding WAL file I/O. This requires a small fix in ReadPageInternal to handle buffer-based page_read callbacks where the first page of a segment may no longer be available in the WAL buffers. --- src/backend/access/transam/xlog.c | 4 +- src/backend/access/transam/xlogreader.c | 11 +- src/include/access/xlog.h | 1 + src/test/modules/Makefile | 1 + src/test/modules/meson.build | 1 + .../modules/test_wal_record_stats/.gitignore | 4 + .../modules/test_wal_record_stats/Makefile | 23 ++ .../modules/test_wal_record_stats/meson.build | 23 ++ .../test_wal_record_stats--1.0.sql | 18 ++ .../test_wal_record_stats.c | 236 ++++++++++++++++++ .../test_wal_record_stats.control | 4 + src/tools/pgindent/typedefs.list | 1 + 12 files changed, 324 insertions(+), 3 deletions(-) create mode 100644 src/test/modules/test_wal_record_stats/.gitignore create mode 100644 src/test/modules/test_wal_record_stats/Makefile create mode 100644 src/test/modules/test_wal_record_stats/meson.build create mode 100644 src/test/modules/test_wal_record_stats/test_wal_record_stats--1.0.sql create mode 100644 src/test/modules/test_wal_record_stats/test_wal_record_stats.c create mode 100644 src/test/modules/test_wal_record_stats/test_wal_record_stats.control diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e39af79c03b..2731c1fe0f2 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -738,7 +738,7 @@ static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr); static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr); -static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto); +XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto); static char *GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli); static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos); static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos); @@ -1541,7 +1541,7 @@ WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt) * uninitialized page), and the inserter might need to evict an old WAL buffer * to make room for a new one, which in turn requires WALWriteLock. */ -static XLogRecPtr +XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto) { uint64 bytepos; diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 8849610db00..a559fcc2fa9 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1044,8 +1044,17 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) * file and validate its header, even if that's not where the target * record is. This is so that we can check the additional identification * info that is present in the first page's "long" header. + * + * When reading WAL from the in-memory WAL buffers via WALReadFromBuffers, + * the first page of a segment may have already been overwritten by newer + * WAL in the WAL buffers by the time we need to read a later page from + * that same segment. In this case, we skip this first page validation. + * + * XXX: Consider adding a flag in XLogReaderState that callers reading + * from WAL buffers can set, rather than relying on ws_segno == 0. */ - if (targetSegNo != state->seg.ws_segno && targetPageOff != 0) + if (state->seg.ws_segno != 0 && + targetSegNo != state->seg.ws_segno && targetPageOff != 0) { XLogRecPtr targetSegmentPtr = pageptr - targetPageOff; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 437b4f32349..ad31a9cb618 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -284,6 +284,7 @@ extern void SetWalWriterSleeping(bool sleeping); extern void WakeupCheckpointer(void); +extern XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto); extern Size WALReadFromBuffers(char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli); diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 0a74ab5c86f..c3bb366b090 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -15,6 +15,7 @@ SUBDIRS = \ oauth_validator \ plsample \ spgist_name_ops \ + test_wal_record_stats \ test_aio \ test_autovacuum \ test_binaryheap \ diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index 4bca42bb370..f3133193705 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -14,6 +14,7 @@ subdir('nbtree') subdir('oauth_validator') subdir('plsample') subdir('spgist_name_ops') +subdir('test_wal_record_stats') subdir('ssl_passphrase_callback') subdir('test_aio') subdir('test_autovacuum') diff --git a/src/test/modules/test_wal_record_stats/.gitignore b/src/test/modules/test_wal_record_stats/.gitignore new file mode 100644 index 00000000000..5dcb3ff9723 --- /dev/null +++ b/src/test/modules/test_wal_record_stats/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_wal_record_stats/Makefile b/src/test/modules/test_wal_record_stats/Makefile new file mode 100644 index 00000000000..bdde4eb595e --- /dev/null +++ b/src/test/modules/test_wal_record_stats/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_wal_record_stats/Makefile + +MODULE_big = test_wal_record_stats +OBJS = \ + $(WIN32RES) \ + test_wal_record_stats.o +PGFILEDESC = "test_wal_record_stats - test WAL record stats from WAL buffers" + +EXTENSION = test_wal_record_stats +DATA = test_wal_record_stats--1.0.sql + +TAP_TESTS = 0 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_wal_record_stats +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_wal_record_stats/meson.build b/src/test/modules/test_wal_record_stats/meson.build new file mode 100644 index 00000000000..d135f4b2721 --- /dev/null +++ b/src/test/modules/test_wal_record_stats/meson.build @@ -0,0 +1,23 @@ +# Copyright (c) 2024-2026, PostgreSQL Global Development Group + +test_wal_record_stats_sources = files( + 'test_wal_record_stats.c', +) + +if host_system == 'windows' + test_wal_record_stats_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_wal_record_stats', + '--FILEDESC', 'test_wal_record_stats - test WAL record stats from WAL buffers',]) +endif + +test_wal_record_stats = shared_module('test_wal_record_stats', + test_wal_record_stats_sources, + kwargs: pg_test_mod_args, +) +test_install_libs += test_wal_record_stats + +test_install_data += files( + 'test_wal_record_stats.control', + 'test_wal_record_stats--1.0.sql', +) + diff --git a/src/test/modules/test_wal_record_stats/test_wal_record_stats--1.0.sql b/src/test/modules/test_wal_record_stats/test_wal_record_stats--1.0.sql new file mode 100644 index 00000000000..cb8e3d07d00 --- /dev/null +++ b/src/test/modules/test_wal_record_stats/test_wal_record_stats--1.0.sql @@ -0,0 +1,18 @@ +/* src/test/modules/test_wal_record_stats/test_wal_record_stats--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_wal_record_stats" to load this file. \quit + +CREATE FUNCTION get_wal_record_stats_from_buffers( + IN start_lsn pg_lsn, + IN end_lsn pg_lsn, + OUT resource_manager text, + OUT record_type text, + OUT count int8, + OUT total_record_length int8, + OUT total_main_data_length int8, + OUT total_fpi_length int8 +) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'get_wal_record_stats_from_buffers' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/src/test/modules/test_wal_record_stats/test_wal_record_stats.c b/src/test/modules/test_wal_record_stats/test_wal_record_stats.c new file mode 100644 index 00000000000..adee5dc2b60 --- /dev/null +++ b/src/test/modules/test_wal_record_stats/test_wal_record_stats.c @@ -0,0 +1,236 @@ +/*------------------------------------------------------------------------- + * + * test_wal_record_stats.c + * Test module exercising WALReadFromBuffers() to read WAL records + * directly from WAL buffers (shared memory, no disk I/O). + * + * Copyright (c) 1996-2026, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_wal_record_stats/test_wal_record_stats.c + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/xlog.h" +#include "access/xlog_internal.h" +#include "access/xlogreader.h" +#include "access/xlogrecovery.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/pg_lsn.h" +#include "utils/tuplestore.h" + +PG_MODULE_MAGIC; + +/* + * page_read callback that reads WAL directly from WAL buffers. + */ +static int +wal_buffer_page_read(XLogReaderState *state, XLogRecPtr targetPagePtr, + int reqLen, XLogRecPtr targetRecPtr, + char *cur_page) +{ + XLogRecPtr read_upto; + XLogRecPtr loc; + TimeLineID tli = GetWALInsertionTimeLine(); + Size count; + Size nbytes; + + loc = targetPagePtr + reqLen; + + read_upto = GetXLogInsertRecPtr(); + + /* + * If the requested WAL hasn't been inserted yet, return -1 rather than + * waiting. The WAL between start_lsn and end_lsn should already be + * inserted by the time we're called. + */ + if (loc > read_upto) + return -1; + + /* Ensure any in-progress insertions up to this point are visible */ + WaitXLogInsertionsToFinish(loc); + + if (targetPagePtr + XLOG_BLCKSZ <= read_upto) + count = XLOG_BLCKSZ; + else if (targetPagePtr + reqLen > read_upto) + return -1; + else + count = read_upto - targetPagePtr; + + nbytes = WALReadFromBuffers(cur_page, targetPagePtr, count, tli); + + if (nbytes <= 0) + return -1; /* data evicted from circular WAL buffer */ + + return nbytes; +} + +/* Per-rmgr/record_type accumulation entry. */ +typedef struct WalRecordStat +{ + char resource_manager[64]; + char record_type[64]; + int64 count; + int64 total_record_length; + int64 total_main_data_length; + int64 total_fpi_length; +} WalRecordStat; + +#define MAX_WAL_STAT_ENTRIES 256 + +/* + * get_wal_record_stats_from_buffers(start_lsn, end_lsn) + * + * Returns per-resource_manager/record_type WAL record statistics by reading + * directly from WAL buffers via WALReadFromBuffers(). + */ +PG_FUNCTION_INFO_V1(get_wal_record_stats_from_buffers); +Datum +get_wal_record_stats_from_buffers(PG_FUNCTION_ARGS) +{ +#define WAL_RECORD_STATS_COLS 6 + XLogRecPtr start_lsn = PG_GETARG_LSN(0); + XLogRecPtr end_lsn = PG_GETARG_LSN(1); + XLogReaderState *xlogreader; + XLogRecPtr first_valid_record; + char *errormsg; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + WalRecordStat stats[MAX_WAL_STAT_ENTRIES]; + int nstats = 0; + int i; + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL buffers can only be read on a primary server"))); + + if (start_lsn < XLOG_BLCKSZ) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("could not read WAL at LSN %X/%08X", + LSN_FORMAT_ARGS(start_lsn)))); + + if (start_lsn > end_lsn) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("WAL start LSN must be less than end LSN"))); + + memset(stats, 0, sizeof(stats)); + + InitMaterializedSRF(fcinfo, 0); + + xlogreader = XLogReaderAllocate(wal_segment_size, NULL, + XL_ROUTINE(.page_read = &wal_buffer_page_read, + .segment_open = NULL, + .segment_close = NULL), + NULL); + + if (xlogreader == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("Failed while allocating a WAL reading processor."))); + + first_valid_record = XLogFindNextRecord(xlogreader, start_lsn, &errormsg); + + if (XLogRecPtrIsInvalid(first_valid_record)) + { + XLogReaderFree(xlogreader); + PG_RETURN_VOID(); + } + + /* Scan WAL records and accumulate stats */ + while (true) + { + XLogRecord *record; + RmgrData desc; + const char *rmgr_name; + const char *rec_type; + int found = -1; + + record = XLogReadRecord(xlogreader, &errormsg); + if (record == NULL) + break; + + if (xlogreader->EndRecPtr > end_lsn) + break; + + desc = GetRmgr(XLogRecGetRmid(xlogreader)); + rmgr_name = desc.rm_name; + rec_type = desc.rm_identify(XLogRecGetInfo(xlogreader)); + if (rec_type == NULL) + rec_type = "UNKNOWN"; + + /* Find existing entry or create new one */ + for (i = 0; i < nstats; i++) + { + if (strcmp(stats[i].resource_manager, rmgr_name) == 0 && + strcmp(stats[i].record_type, rec_type) == 0) + { + found = i; + break; + } + } + + if (found < 0) + { + if (nstats >= MAX_WAL_STAT_ENTRIES) + { + ereport(WARNING, + (errmsg("WAL record stat entries limit reached (%d)", + MAX_WAL_STAT_ENTRIES))); + break; + } + found = nstats++; + strlcpy(stats[found].resource_manager, rmgr_name, + sizeof(stats[found].resource_manager)); + strlcpy(stats[found].record_type, rec_type, + sizeof(stats[found].record_type)); + } + + stats[found].count++; + stats[found].total_record_length += XLogRecGetTotalLen(xlogreader); + stats[found].total_main_data_length += XLogRecGetDataLen(xlogreader); + + if (XLogRecHasAnyBlockRefs(xlogreader)) + { + uint32 fpi_len = 0; + StringInfoData dummy; + + initStringInfo(&dummy); + XLogRecGetBlockRefInfo(xlogreader, false, false, &dummy, &fpi_len); + pfree(dummy.data); + stats[found].total_fpi_length += fpi_len; + } + + CHECK_FOR_INTERRUPTS(); + } + + XLogReaderFree(xlogreader); + + /* Emit result rows */ + for (i = 0; i < nstats; i++) + { + Datum values[WAL_RECORD_STATS_COLS] = {0}; + bool nulls[WAL_RECORD_STATS_COLS] = {0}; + int col = 0; + + values[col++] = CStringGetTextDatum(stats[i].resource_manager); + values[col++] = CStringGetTextDatum(stats[i].record_type); + values[col++] = Int64GetDatum(stats[i].count); + values[col++] = Int64GetDatum(stats[i].total_record_length); + values[col++] = Int64GetDatum(stats[i].total_main_data_length); + values[col++] = Int64GetDatum(stats[i].total_fpi_length); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + + PG_RETURN_VOID(); + +#undef WAL_RECORD_STATS_COLS +} diff --git a/src/test/modules/test_wal_record_stats/test_wal_record_stats.control b/src/test/modules/test_wal_record_stats/test_wal_record_stats.control new file mode 100644 index 00000000000..85a33766c71 --- /dev/null +++ b/src/test/modules/test_wal_record_stats/test_wal_record_stats.control @@ -0,0 +1,4 @@ +comment = 'Test WAL record stats reading from WAL buffers via WALReadFromBuffers' +default_version = '1.0' +module_pathname = '$libdir/test_wal_record_stats' +relocatable = true diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 49dfb662abc..fd283b7e88b 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3419,6 +3419,7 @@ WalRcvStreamOptions WalRcvWakeupReason WalReceiverConn WalReceiverFunctionsType +WalRecordStat WalSnd WalSndCtlData WalSndSendDataCallback -- 2.47.3