From 21282eefbf4c2096ff1d683b5d996897b7932592 Mon Sep 17 00:00:00 2001 From: alterego655 <824662526@qq.com> Date: Mon, 23 Mar 2026 14:53:39 +0800 Subject: [PATCH v3] Count WAL segment creations by all processes in log_checkpoints output The "WAL file(s) added" field in log_checkpoints output (and the corresponding ckpt_segs_added field reported via TRACE_POSTGRESQL_ CHECKPOINT_DONE) previously counted only segments preallocated by PreallocXlogFiles() inside CreateCheckPoint() and CreateRestartPoint(). Segments created by ordinary backends, the WAL receiver, or during end-of-recovery timeline initialization were silently excluded, making the reported count misleading on busy systems. Fix this by introducing a shared-memory atomic counter, walSegmentsCreated in XLogCtlData, which is incremented by any process whenever a new WAL segment file is installed via XLogFileInitInternal() or XLogFileCopy(). Each checkpoint or restartpoint computes ckpt_segs_added as the difference between the current counter value and a stored baseline, then advances the baseline. The baseline is initialized to zero, so the first checkpoint after startup naturally captures segments created during end-of-recovery timeline initialization. The metric is now "new WAL segment files created since the previous successful checkpoint or restartpoint, by any process." This is a deliberate semantic change from "preallocated by the checkpointer." The arity and types of TRACE_POSTGRESQL_CHECKPOINT_DONE are unchanged; only the meaning of arg2 changes. The old direct increment of CheckpointStats.ckpt_segs_added inside PreallocXlogFiles() is removed. Update config.sgml and monitoring.sgml to document the new semantics. --- doc/src/sgml/config.sgml | 4 ++ doc/src/sgml/monitoring.sgml | 5 +- src/backend/access/transam/xlog.c | 56 ++++++++++++++++- src/test/recovery/meson.build | 1 + src/test/recovery/t/053_wal_segs_added.pl | 73 +++++++++++++++++++++++ 5 files changed, 135 insertions(+), 4 deletions(-) create mode 100644 src/test/recovery/t/053_wal_segs_added.pl diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 8cdd826fbd3..8a20a34caec 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7674,6 +7674,10 @@ local0.* /var/log/postgresql Causes checkpoints and restartpoints to be logged in the server log. Some statistics are included in the log messages, including the number of buffers written and the time spent writing them. + The number of WAL files added counts all new WAL segment files created + by any process (including regular backends and the WAL receiver) since + the previous checkpoint or restartpoint, not only those preallocated + by the checkpointer. This parameter can only be set in the postgresql.conf file or on the server command line. The default is on. diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 462019a972c..d43be3d34ca 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -7692,8 +7692,9 @@ FROM pg_stat_get_backend_idset() AS backendid; Probe that fires when a checkpoint is complete. (The probes listed next fire in sequence during checkpoint processing.) arg0 is the number of buffers written. arg1 is the total number of - buffers. arg2, arg3 and arg4 contain the number of WAL files added, - removed and recycled respectively. + buffers. arg2 is the number of new WAL files created by any process + since the previous checkpoint or restartpoint. arg3 and arg4 contain + the number of WAL files removed and recycled respectively. clog-checkpoint-start diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f5c9a34374d..c9ce9643a4d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -475,6 +475,24 @@ typedef struct XLogCtlData pg_atomic_uint64 logWriteResult; /* last byte + 1 written out */ pg_atomic_uint64 logFlushResult; /* last byte + 1 flushed */ + /* + * Cumulative count of new WAL segment files created since startup, by any + * process. Used to compute per-checkpoint "WAL file(s) added" via + * differencing against walSegsCreatedLastCheckpoint. + */ + pg_atomic_uint64 walSegmentsCreated; + + /* + * Value of walSegmentsCreated recorded when the last checkpoint or + * restartpoint computed its ckpt_segs_added count. The next + * checkpoint/restartpoint diffs against this to get its own count, and + * then advances this value. Initialized to 0, so the first checkpoint + * captures all segments created since startup (including end-of-recovery + * timeline initialization). Only the checkpointer writes this; no lock + * required. + */ + uint64 walSegsCreatedLastCheckpoint; + /* * Latest initialized page in the cache (last byte position + 1). * @@ -3369,6 +3387,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, logtli)) { *added = true; + pg_atomic_fetch_add_u64(&XLogCtl->walSegmentsCreated, 1); elog(DEBUG2, "done creating and filling new WAL file"); } else @@ -3552,6 +3571,7 @@ XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, */ if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, destTLI)) elog(ERROR, "InstallXLogFileSegment should not have failed"); + pg_atomic_fetch_add_u64(&XLogCtl->walSegmentsCreated, 1); } /* @@ -3727,8 +3747,6 @@ PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli) lf = XLogFileInitInternal(_logSegNo, tli, &added, path); if (lf >= 0) close(lf); - if (added) - CheckpointStats.ckpt_segs_added++; } } @@ -5093,6 +5111,8 @@ XLOGShmemInit(void) XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; XLogCtl->InstallXLogFileSegmentActive = false; XLogCtl->WalWriterSleeping = false; + pg_atomic_init_u64(&XLogCtl->walSegmentsCreated, 0); + XLogCtl->walSegsCreatedLastCheckpoint = 0; SpinLockInit(&XLogCtl->Insert.insertpos_lck); SpinLockInit(&XLogCtl->info_lck); @@ -7019,6 +7039,7 @@ CreateCheckPoint(int flags) VirtualTransactionId *vxids; int nvxids; int oldXLogAllowed = 0; + uint64 current; /* * An end-of-recovery checkpoint is really a shutdown checkpoint, just @@ -7473,6 +7494,22 @@ CreateCheckPoint(int flags) if (!RecoveryInProgress()) TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning()); + /* + * Compute the number of new WAL segments created since the last + * checkpoint or restartpoint (by any process), and advance the baseline + * for the next interval. The initial baseline is 0, so the first + * checkpoint captures segments created during end-of-recovery timeline + * initialization. + */ + current = pg_atomic_read_u64(&XLogCtl->walSegmentsCreated); + + /* Overflow requires >2^31 new segments between checkpoints (32 PB at + * default segment size), which is unreachable in practice. + */ + CheckpointStats.ckpt_segs_added = (int) + (current - XLogCtl->walSegsCreatedLastCheckpoint); + XLogCtl->walSegsCreatedLastCheckpoint = current; + /* Real work is done; log and update stats. */ LogCheckpointEnd(false, flags); @@ -7724,6 +7761,7 @@ CreateRestartPoint(int flags) XLogRecPtr endptr; XLogSegNo _logSegNo; TimestampTz xtime; + uint64 current; /* Concurrent checkpoint/restartpoint cannot happen */ Assert(!IsUnderPostmaster || MyBackendType == B_CHECKPOINTER); @@ -7944,6 +7982,20 @@ CreateRestartPoint(int flags) if (EnableHotStandby) TruncateSUBTRANS(GetOldestTransactionIdConsideredRunning()); + /* + * Compute the number of new WAL segments created since the last + * checkpoint or restartpoint (by any process), and advance the baseline + * for the next interval. + */ + current = pg_atomic_read_u64(&XLogCtl->walSegmentsCreated); + + /* Overflow requires >2^31 new segments between checkpoints (32 PB at + * default segment size), which is unreachable in practice. + */ + CheckpointStats.ckpt_segs_added = (int) + (current - XLogCtl->walSegsCreatedLastCheckpoint); + XLogCtl->walSegsCreatedLastCheckpoint = current; + /* Real work is done; log and update stats. */ LogCheckpointEnd(true, flags); diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index 36d789720a3..a7fa6605663 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -61,6 +61,7 @@ tests += { 't/050_redo_segment_missing.pl', 't/051_effective_wal_level.pl', 't/052_checkpoint_segment_missing.pl', + 't/053_wal_segs_added.pl', ], }, } diff --git a/src/test/recovery/t/053_wal_segs_added.pl b/src/test/recovery/t/053_wal_segs_added.pl new file mode 100644 index 00000000000..405740f674e --- /dev/null +++ b/src/test/recovery/t/053_wal_segs_added.pl @@ -0,0 +1,73 @@ +# Copyright (c) 2026, PostgreSQL Global Development Group +# +# Test that "WAL file(s) added" in log_checkpoints output counts new segments +# created by any process, not only those preallocated by the checkpointer. +# +# We verify this by first counting any future segment files that already +# exist, then forcing enough segment switches to move past all of them. That +# guarantees that at least one later switch must create a new segment in the +# backend, rather than merely opening a pre-existing file. + +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Start a node with log_checkpoints enabled. Disable autovacuum to avoid +# background WAL noise that could interfere with the segment count. +my $node = PostgreSQL::Test::Cluster->new('primary'); +$node->init; +$node->append_conf( + 'postgresql.conf', q[ +log_checkpoints = on +autovacuum = off +]); +$node->start; + +# Run a checkpoint first so the baseline (walSegsCreatedLastCheckpoint) +# is set to the current walSegmentsCreated value. After this, any new +# segment creation will be counted toward the next checkpoint. +$node->safe_psql('postgres', 'CHECKPOINT'); + +# Note the log file position after the baseline checkpoint. +my $log_start = -s $node->logfile; + +# Count how many future WAL segments already exist. A simple pg_switch_wal() +# is not enough here, because it can just open a segment file that was +# preallocated earlier. By advancing past all future files currently present, +# we force a later switch to create at least one new segment in the backend. +my $current_walfile = + $node->safe_psql('postgres', 'SELECT pg_walfile_name(pg_current_wal_lsn())'); + +my $future_segments = $node->safe_psql('postgres', + "SELECT count(*) FROM pg_ls_dir('pg_wal') + WHERE pg_ls_dir ~ '^[0-9A-F]{24}\$' + AND pg_ls_dir > '$current_walfile'"); + +$node->advance_wal($future_segments + 1); + +# Now run a checkpoint and capture its log output. The differencing logic +# computes ckpt_segs_added from the current walSegmentsCreated minus the +# stored baseline, so we expect at least 1 WAL file added. +$node->safe_psql('postgres', 'CHECKPOINT'); + +# Wait for the checkpoint completion log entry before reading the logfile. +$node->wait_for_log(qr/checkpoint complete/, $log_start); + +my $logfile = slurp_file($node->logfile, $log_start); + +# The checkpoint log line contains "N WAL file(s) added". Match the count. +like( + $logfile, + qr/checkpoint complete.*?(\d+) WAL file\(s\) added/, + 'checkpoint log line mentions WAL files added'); + +# Capture the count specifically and verify it is at least 1. +my ($segs_added) = + $logfile =~ /checkpoint complete[^\n]*?(\d+) WAL file\(s\) added/; +ok(defined $segs_added && $segs_added >= 1, + "at least 1 WAL segment reported as added (got: $segs_added)"); + +$node->stop; +done_testing(); -- 2.51.0