From a24b6fc17b633514de2ca5064c65029910be06ac Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Thu, 23 Apr 2026 10:44:09 +0200 Subject: [PATCH 2/8] Test improvements for online checksums This includes a number of smaller fixups to the onine checksums test module which were found during postcommit review and stabilization work. * Fix scope increase for PG_TEST_EXTRA: The online checksums tests have two levels of PG_TEST_EXTRA, checksum and checksums_extended for extra test runs and test runs with increased randomization. The logic for increasing the number of test iterations was however backwards. * Change stopmode for PITR test: The pitr suite used immediate stop mode which caused problems on slower machines where the sigquit would interrupt archive commands leaving partial WAL files behind. This would then prevent restart. Fix by using fast mode which is the appropriate mode for the test at hand. Also increase timeouts to help slower test systems since an expired timeout will incur the same effect as an immediate standby with a partial WAL left behind. This issue was observed when running the test suites on a Raspberry Pi 4 machine. * Improve logging: The test suite for data checksums use a set of helper functions in a Perl module to avoid repeating code, this makes sure that the helper functions do a better job of logging their test output to make debug easier. * Remove unused code: wait_for_cluster_crash was used during the development of online checksums but was never used in any test which shipped, so remove the function. * Standby fixes: Ensure no vacuum on pgbench init on standby with -n to avoid bogus error message in the log, and enable hot_standby_feedback to prevent queries from getting cancelled due to recovery on slower systems. Author: Daniel Gustafsson Author: Tomas Vondra Discussion: https://postgr.es/m/xxx --- .../test_checksums/t/007_pgbench_standby.pl | 12 +++-- src/test/modules/test_checksums/t/008_pitr.pl | 5 +- .../test_checksums/t/DataChecksums/Utils.pm | 53 +++---------------- 3 files changed, 20 insertions(+), 50 deletions(-) diff --git a/src/test/modules/test_checksums/t/007_pgbench_standby.pl b/src/test/modules/test_checksums/t/007_pgbench_standby.pl index f3611e7ce25..0b3996f1d69 100644 --- a/src/test/modules/test_checksums/t/007_pgbench_standby.pl +++ b/src/test/modules/test_checksums/t/007_pgbench_standby.pl @@ -49,8 +49,8 @@ my $node_standby_loglocation = 0; # of tests performed and the wall time taken is non-deterministic as the test # performs a lot of randomized actions, but 5 iterations will be a long test # run regardless. -my $TEST_ITERATIONS = 5; -$TEST_ITERATIONS = 1 if ($extended); +my $TEST_ITERATIONS = 1; +$TEST_ITERATIONS = 5 if ($extended); # Variables which record the current state of the cluster my $data_checksum_state = 'off'; @@ -83,6 +83,7 @@ sub background_pgbench push(@cmd, '-C') if ($extended && cointoss()); # If we run on a standby it needs to be a read-only benchmark push(@cmd, '-S') if ($standby); + push(@cmd, '-n') if ($standby); # Finally add the database name to use push(@cmd, 'postgres'); @@ -146,8 +147,10 @@ sub flip_data_checksums . "FROM pg_catalog.pg_settings " . "WHERE name = 'data_checksums';"); - is(($result eq 'inprogress-on' || $result eq 'on'), - 1, 'ensure checksums are on, or in progress, on standby_1'); + is( ($result eq 'inprogress-on' || $result eq 'on'), + 1, + 'ensure checksums are on, or in progress, on standby_1, got: ' + . $result); # Wait for checksums enabled on the primary and standby wait_for_checksum_state($node_primary, 'on'); @@ -210,6 +213,7 @@ $node_primary->append_conf( qq[ max_connections = 30 log_statement = none +hot_standby_feedback = on ]); $node_primary->start; $node_primary->safe_psql('postgres', 'CREATE EXTENSION test_checksums;'); diff --git a/src/test/modules/test_checksums/t/008_pitr.pl b/src/test/modules/test_checksums/t/008_pitr.pl index e8cb2b0ed96..1f8176686fd 100644 --- a/src/test/modules/test_checksums/t/008_pitr.pl +++ b/src/test/modules/test_checksums/t/008_pitr.pl @@ -124,11 +124,14 @@ $node_primary->init( has_archiving => 1, allows_streaming => 1, no_data_checksums => 1); +my $timeout_unit = 's'; $node_primary->append_conf( 'postgresql.conf', qq[ max_connections = 100 log_statement = none +wal_sender_timeout = $PostgreSQL::Test::Utils::timeout_default$timeout_unit +wal_receiver_timeout = $PostgreSQL::Test::Utils::timeout_default$timeout_unit ]); $node_primary->start; @@ -154,7 +157,7 @@ my ($pre_lsn, $post_lsn) = flip_data_checksums(); $node_primary->safe_psql('postgres', "UPDATE t SET a = a + 1;"); $node_primary->safe_psql('postgres', "SELECT pg_create_restore_point('a');"); $node_primary->safe_psql('postgres', "UPDATE t SET a = a + 1;"); -$node_primary->stop('immediate'); +$node_primary->stop('fast'); my $node_pitr = PostgreSQL::Test::Cluster->new('pitr_backup'); $node_pitr->init_from_backup( diff --git a/src/test/modules/test_checksums/t/DataChecksums/Utils.pm b/src/test/modules/test_checksums/t/DataChecksums/Utils.pm index fb704623a60..cb78dd6ecfb 100644 --- a/src/test/modules/test_checksums/t/DataChecksums/Utils.pm +++ b/src/test/modules/test_checksums/t/DataChecksums/Utils.pm @@ -43,7 +43,6 @@ our @EXPORT = qw( stopmode test_checksum_state wait_for_checksum_state - wait_for_cluster_crash ); =pod @@ -67,7 +66,10 @@ sub test_checksum_state my $result = $postgresnode->safe_psql('postgres', "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';" ); - is($result, $state, 'ensure checksums are set to ' . $state); + is($result, $state, + 'ensure checksums are set to ' + . $state . ' on ' + . $postgresnode->name()); return $result eq $state; } @@ -89,52 +91,13 @@ sub wait_for_checksum_state 'postgres', "SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';", $state); - is($res, 1, 'ensure data checksums are transitioned to ' . $state); + is($res, 1, + 'ensure data checksums are transitioned to ' + . $state . ' on ' + . $postgresnode->name()); return $res == 1; } -=item wait_for_cluster_crash(node, params) - -Repeatedly test if the cluster running at B responds to connections -and return when it no longer does so, or when it times out. Processing will -run for $PostgreSQL::Test::Utils::timeout_default seconds unless a timeout -value is specified as a parameter. Returns True if the cluster crashed, else -False if the process timed out. - -=over - -=item timeout - -Approximate number of seconds to wait for cluster to crash, default is -$PostgreSQL::Test::Utils::timeout_default. There are no real-time guarantees -that the total process time won't exceed the timeout. - -=back - -=cut - -sub wait_for_cluster_crash -{ - my $postgresnode = shift; - my %params = @_; - my $crash = 0; - - $params{timeout} = $PostgreSQL::Test::Utils::timeout_default - unless (defined($params{timeout})); - - for (my $naps = 0; $naps < $params{timeout}; $naps++) - { - if (!$postgresnode->is_alive) - { - $crash = 1; - last; - } - sleep(1); - } - - return $crash == 1; -} - =item enable_data_checksums($node, %params) Function for enabling data checksums in the cluster running at B. -- 2.39.3 (Apple Git-146)