From 17b83eb9d1b5a825e1e2bfca9d360a738213bf01 Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Wed, 1 Oct 2025 09:38:19 +0530 Subject: [PATCH 4/4] WIP test shared buffers resizing and checkpoint A new test triggers an injection point in the BufferSync() after it has collected buffers to flushed. Simultaneously it starts buffer shrinking. The expectation is that the checkpointer would crash accessing a buffer (descriptor) outside the new range of shared buffers. But that does not happen because of a bug in synchronization. The checkpointer does not reload configuration when checkpoint is going on. It does not load the new value of the configuration. When the resizing is triggered by the PM, checkpointer receives the proc signal barrier but it does not start it doesn't enter the barrier mechanism and doesn't alter its address maps or memory sizes. Hence the test does not crash. But of course it means that it won't consider the correct size of buffers next time it performs a checkpoint. The test was at least useful to detect this anomaly. Once we fix the synchronization issue we should see the crash and then fix the crash. Author: Ashutosh Bapat Notes to reviewers ------------------ 1. pg_buffercache used a query on pg_settings to fetch the value of the number of buffers. That doesn't work anymore because of change in the SHOW shared_buffers. Modified the test to convert the setting value to the number of shared buffers, save it in a variable and use the variable in queries which need the number of shared buffers. We could instead fix ShowGUCOption() to pass use_units flag to show_hook and let it output the number of shared buffers instead. But that seems a larger change. There aren't other GUCs whose show_hook outputs their values with units. So this local fix might be better. --- .../expected/pg_buffercache.out | 19 ++- contrib/pg_buffercache/sql/pg_buffercache.sql | 19 ++- src/backend/storage/buffer/bufmgr.c | 4 + src/test/buffermgr/meson.build | 1 + .../t/002_checkpoint_buffer_resize.pl | 111 ++++++++++++++++++ 5 files changed, 130 insertions(+), 24 deletions(-) create mode 100644 src/test/buffermgr/t/002_checkpoint_buffer_resize.pl diff --git a/contrib/pg_buffercache/expected/pg_buffercache.out b/contrib/pg_buffercache/expected/pg_buffercache.out index 2f27bf34637..632b12abbf8 100644 --- a/contrib/pg_buffercache/expected/pg_buffercache.out +++ b/contrib/pg_buffercache/expected/pg_buffercache.out @@ -1,8 +1,9 @@ CREATE EXTENSION pg_buffercache; -select count(*) = (select setting::bigint - from pg_settings - where name = 'shared_buffers') -from pg_buffercache; +select pg_size_bytes(setting)/(select setting::bigint from pg_settings where name = 'block_size') AS nbuffers + from pg_settings + where name = 'shared_buffers' +\gset +select count(*) = :nbuffers from pg_buffercache; ?column? ---------- t @@ -24,20 +25,14 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0; (1 row) -- Test the buffer lookup table function and count is <= shared_buffers -select count(*) <= (select setting::bigint - from pg_settings - where name = 'shared_buffers') -from pg_buffercache_lookup_table_entries(); +select count(*) <= :nbuffers from pg_buffercache_lookup_table_entries(); ?column? ---------- t (1 row) -- Check that pg_buffercache_lookup_table view works and count is <= shared_buffers -select count(*) <= (select setting::bigint - from pg_settings - where name = 'shared_buffers') -from pg_buffercache_lookup_table; +select count(*) <= :nbuffers from pg_buffercache_lookup_table; ?column? ---------- t diff --git a/contrib/pg_buffercache/sql/pg_buffercache.sql b/contrib/pg_buffercache/sql/pg_buffercache.sql index 569b28aebb9..11fe85ceb3b 100644 --- a/contrib/pg_buffercache/sql/pg_buffercache.sql +++ b/contrib/pg_buffercache/sql/pg_buffercache.sql @@ -1,9 +1,10 @@ CREATE EXTENSION pg_buffercache; -select count(*) = (select setting::bigint - from pg_settings - where name = 'shared_buffers') -from pg_buffercache; +select pg_size_bytes(setting)/(select setting::bigint from pg_settings where name = 'block_size') AS nbuffers + from pg_settings + where name = 'shared_buffers' +\gset +select count(*) = :nbuffers from pg_buffercache; select buffers_used + buffers_unused > 0, buffers_dirty <= buffers_used, @@ -13,16 +14,10 @@ from pg_buffercache_summary(); SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0; -- Test the buffer lookup table function and count is <= shared_buffers -select count(*) <= (select setting::bigint - from pg_settings - where name = 'shared_buffers') -from pg_buffercache_lookup_table_entries(); +select count(*) <= :nbuffers from pg_buffercache_lookup_table_entries(); -- Check that pg_buffercache_lookup_table view works and count is <= shared_buffers -select count(*) <= (select setting::bigint - from pg_settings - where name = 'shared_buffers') -from pg_buffercache_lookup_table; +select count(*) <= :nbuffers from pg_buffercache_lookup_table; -- Check that the functions / views can't be accessed by default. To avoid -- having to create a dedicated user, use the pg_database_owner pseudo-role. diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 6c8f8552a4c..f489ae2932f 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -67,6 +67,7 @@ #include "utils/rel.h" #include "utils/resowner.h" #include "utils/timestamp.h" +#include "utils/injection_point.h" /* Note: these two macros only work on shared buffers, not local ones! */ @@ -3416,6 +3417,9 @@ BufferSync(int flags) ProcessProcSignalBarrier(); } + /* Injection point after scanning all buffers for dirty pages */ + INJECTION_POINT("buffer-sync-dirty-buffer-scan", NULL); + if (num_to_scan == 0) return; /* nothing to do */ diff --git a/src/test/buffermgr/meson.build b/src/test/buffermgr/meson.build index c24bff721e6..f33feb64a06 100644 --- a/src/test/buffermgr/meson.build +++ b/src/test/buffermgr/meson.build @@ -16,6 +16,7 @@ tests += { }, 'tests': [ 't/001_resize_buffer.pl', + 't/002_checkpoint_buffer_resize.pl', 't/003_parallel_resize_buffer.pl', 't/004_client_join_buffer_resize.pl', ], diff --git a/src/test/buffermgr/t/002_checkpoint_buffer_resize.pl b/src/test/buffermgr/t/002_checkpoint_buffer_resize.pl new file mode 100644 index 00000000000..9ab615b6557 --- /dev/null +++ b/src/test/buffermgr/t/002_checkpoint_buffer_resize.pl @@ -0,0 +1,111 @@ +# Copyright (c) 2025-2025, PostgreSQL Global Development Group +# +# Test shared_buffer resizing coordination with checkpoint using injection points + +use strict; +use warnings; +use IPC::Run; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Skip this test if injection points are not supported +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +# Initialize cluster with injection points enabled +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +$node->append_conf('postgresql.conf', 'shared_preload_libraries = injection_points'); +$node->append_conf('postgresql.conf', 'shared_buffers = 256kB'); +# Disable background writer to prevent interference with dirty buffers +$node->append_conf('postgresql.conf', 'bgwriter_lru_maxpages = 0'); +$node->start; + +# Load the injection points extension +$node->safe_psql('postgres', "CREATE EXTENSION injection_points"); + +# Create some data to make checkpoint meaningful and ensure many dirty buffers +$node->safe_psql('postgres', "CREATE TABLE test_data (id int, data text)"); +# Insert enough data to fill more than 16 buffers (each row ~1KB, so 20+ rows per page) +$node->safe_psql('postgres', "INSERT INTO test_data SELECT i, repeat('x', 1000) FROM generate_series(1, 5000) i"); + +# Create additional tables to ensure we have plenty of dirty buffers +$node->safe_psql('postgres', "CREATE TABLE test_data2 AS SELECT * FROM test_data WHERE id <= 2500"); +$node->safe_psql('postgres', "CREATE TABLE test_data3 AS SELECT * FROM test_data WHERE id > 2500"); + +# Update data to create more dirty buffers +$node->safe_psql('postgres', "UPDATE test_data SET data = repeat('y', 1000) WHERE id % 3 = 0"); +$node->safe_psql('postgres', "UPDATE test_data2 SET data = repeat('z', 1000) WHERE id % 2 = 0"); + +# Prepare the new shared_buffers configuration before starting checkpoint +$node->safe_psql('postgres', "ALTER SYSTEM SET shared_buffers = '128kB'"); +$node->safe_psql('postgres', "SELECT pg_reload_conf()"); + +# Set up the injection point to make checkpoint wait +$node->safe_psql('postgres', "SELECT injection_points_attach('buffer-sync-dirty-buffer-scan', 'wait')"); + +# Start a checkpoint in the background that will trigger the injection point +my $checkpoint_session = $node->background_psql('postgres'); +$checkpoint_session->query_until( + qr/starting_checkpoint/, + q( + \echo starting_checkpoint + CHECKPOINT; + \q + ) +); + +# Wait until checkpointer actually reaches the injection point +$node->wait_for_event('checkpointer', 'buffer-sync-dirty-buffer-scan'); + +# Verify checkpoint is waiting by checking if it hasn't completed +my $checkpoint_running = $node->safe_psql('postgres', + "SELECT COUNT(*) FROM pg_stat_activity WHERE backend_type = 'checkpointer' AND wait_event = 'buffer-sync-dirty-buffer-scan'"); +is($checkpoint_running, '1', 'Checkpoint is waiting at injection point'); + +# Start the resize operation in the background (don't wait for completion) +my $resize_session = $node->background_psql('postgres'); +$resize_session->query_until( + qr/starting_resize/, + q( + \echo starting_resize + SELECT pg_resize_shared_buffers(); + ) +); + +# Continue the checkpoint and wait for its completion +my $log_offset = -s $node->logfile; +$node->safe_psql('postgres', "SELECT injection_points_wakeup('buffer-sync-dirty-buffer-scan')"); + +# Wait for both checkpoint and resize to complete +$node->wait_for_log(qr/checkpoint complete/, $log_offset); + +# Wait for the resize operation to complete using the proper method +$resize_session->query(q(\echo 'resize_complete')); + +pass('Checkpoint and buffer resize both completed after injection point was released'); + +# Verify the resize actually worked +is($node->safe_psql('postgres', "SHOW shared_buffers"), '128kB', + 'Buffer resize completed successfully after checkpoint coordination'); + +# Cleanup the background session +$resize_session->quit; + +# Clean up the injection point +$node->safe_psql('postgres', "SELECT injection_points_detach('buffer-sync-dirty-buffer-scan')"); + +# Verify system remains stable after coordinated operations + +# Perform a normal checkpoint to ensure everything is working +$node->safe_psql('postgres', "CHECKPOINT"); + +pass('System remains stable after injection point testing'); + +# Cleanup +$node->safe_psql('postgres', "DROP TABLE test_data, test_data2, test_data3"); + +done_testing(); \ No newline at end of file -- 2.34.1