From c39dcb10df366a43e1511e5274704ddf02feb3e2 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 14 Nov 2023 20:41:57 +0900 Subject: [PATCH v3 3/3] Add basic test for promotion and restart race condition This test checks after 7863ee4def65 is reverted. test_injection_points is extended so as it is possible to add condition variables to wait for in the point callbacks, with a SQL function to broadcast condition variables that may be sleeping. --- src/backend/access/transam/xlog.c | 7 + .../modules/test_injection_points/meson.build | 6 + .../t/002_invalid_checkpoint_after_promote.pl | 132 ++++++++++++++++++ .../test_injection_points--1.0.sql | 10 ++ .../test_injection_points.c | 73 ++++++++++ 5 files changed, 228 insertions(+) create mode 100644 src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 1159dff1a6..b2f9a78467 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -99,6 +99,7 @@ #include "storage/sync.h" #include "utils/guc_hooks.h" #include "utils/guc_tables.h" +#include "utils/injection_point.h" #include "utils/memutils.h" #include "utils/ps_status.h" #include "utils/relmapper.h" @@ -7329,6 +7330,12 @@ CreateRestartPoint(int flags) CheckPointGuts(lastCheckPoint.redo, flags); + /* + * This location is important to be after CheckPointGuts() to ensure + * that some work has happened. + */ + INJECTION_POINT_RUN("CreateRestartPoint"); + /* * Remember the prior checkpoint's redo ptr for * UpdateCheckPointDistanceEstimate() diff --git a/src/test/modules/test_injection_points/meson.build b/src/test/modules/test_injection_points/meson.build index 7509a102ef..c6bb8eda77 100644 --- a/src/test/modules/test_injection_points/meson.build +++ b/src/test/modules/test_injection_points/meson.build @@ -34,4 +34,10 @@ tests += { 'test_injection_points', ], }, + 'tap': { + 'tests': [ + 't/001_snapshot_status.pl', + 't/002_invalid_checkpoint_after_promote.pl', + ], + }, } diff --git a/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl b/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl new file mode 100644 index 0000000000..5087bc5f9f --- /dev/null +++ b/src/test/modules/test_injection_points/t/002_invalid_checkpoint_after_promote.pl @@ -0,0 +1,132 @@ +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Time::HiRes qw(usleep nanosleep); +use Test::More; + +# initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('master'); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf( + 'postgresql.conf', q[ +checkpoint_timeout = 30s +log_checkpoints = on +restart_after_crash = on +]); +$node_primary->start; +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# setup a standby +my $node_standby = PostgreSQL::Test::Cluster->new('standby1'); +$node_standby->init_from_backup($node_primary, $backup_name, has_streaming => 1); +$node_standby->start; + +# dummy table for the upcoming tests. +$node_primary->safe_psql('postgres', 'checkpoint'); +$node_primary->safe_psql('postgres', 'CREATE TABLE prim_tab (a int);'); + +# Register a injection point on the standby so as the follow-up +# restart point running on it will wait. +$node_primary->safe_psql('postgres', 'CREATE EXTENSION test_injection_points;'); +# Wait until the extension has been created on the standby +$node_primary->wait_for_replay_catchup($node_standby); +# This causes a restartpoint to wait on a standby. +$node_standby->safe_psql('postgres', + "SELECT test_injection_points_create('CreateRestartPoint', 'wait');"); + +# Execute a restart point on the standby, that will be waited on. +# This needs to be in the background as we'll wait on it. +my $logstart = -s $node_standby->logfile; +my $psql_session = + $node_standby->background_psql('postgres', on_error_stop => 0); +$psql_session->query_until(qr/starting_checkpoint/, q( + \echo starting_checkpoint + CHECKPOINT; +)); + +# Switch one WAL segment to make the restartpoint remove it. +$node_primary->safe_psql('postgres', 'INSERT INTO prim_tab VALUES (1);'); +$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal();'); +$node_primary->wait_for_replay_catchup($node_standby); + +# Wait until the checkpointer is in the middle of the restartpoint +# processing. +ok( $node_standby->poll_query_until( + 'postgres', + qq[SELECT count(*) FROM pg_stat_activity + WHERE backend_type = 'checkpointer' AND wait_event = 'test_injection_wait' ;], + '1'), + 'checkpointer is waiting at restart point' + ) or die "Timed out while waiting for checkpointer to run restartpoint"; + + +# Restartpoint should have started on standby. +my $log = slurp_file($node_standby->logfile, $logstart); +my $checkpoint_start = 0; +if ($log =~ m/restartpoint starting: immediate wait/) +{ + $checkpoint_start = 1; +} +is($checkpoint_start, 1, 'restartpoint has started'); + +# promote during restartpoint +$node_primary->stop; +$node_standby->promote; + +# Update the start position before waking up the checkpointer! +$logstart = -s $node_standby->logfile; + +# Now wake up the checkpointer +$node_standby->safe_psql('postgres', + "SELECT test_injection_points_wake();"); + +# wait until checkpoint completes on the newly-promoted standby. +my $checkpoint_complete = 0; +for (my $i = 0; $i < 3000; $i++) +{ + my $log = slurp_file($node_standby->logfile, $logstart); + if ($log =~ m/restartpoint complete/) + { + $checkpoint_complete = 1; + last; + } + usleep(100_000); +} +is($checkpoint_complete, 1, 'restartpoint has completed'); + +# kill SIGKILL a backend, and all backend will restart. Note that previous checkpoint has not completed. +my $psql_timeout = IPC::Run::timer(3600); +my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +my $killme = IPC::Run::start( + [ 'psql', '-XAtq', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', $node_standby->connstr('postgres') ], + '<', + \$killme_stdin, + '>', + \$killme_stdout, + '2>', + \$killme_stderr, + $psql_timeout); +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +$killme->pump until $killme_stdout =~ /[[:digit:]]+[\r\n]$/; +my $pid = $killme_stdout; +chomp($pid); +my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, 'killed process with KILL'); +my $stdout; +my $stderr; + +# after recovery, the server will not start, and log PANIC: could not locate a valid checkpoint record +for (my $i = 0; $i < 30; $i++) +{ + ($ret, $stdout, $stderr) = $node_standby->psql('postgres', 'select 1'); + last if $ret == 0; + sleep(1); +} +is($ret, 0, "psql connect success"); +is($stdout, 1, "psql select 1"); + +done_testing(); diff --git a/src/test/modules/test_injection_points/test_injection_points--1.0.sql b/src/test/modules/test_injection_points/test_injection_points--1.0.sql index c4c42b5b69..2fefd48d98 100644 --- a/src/test/modules/test_injection_points/test_injection_points--1.0.sql +++ b/src/test/modules/test_injection_points/test_injection_points--1.0.sql @@ -25,6 +25,16 @@ RETURNS void AS 'MODULE_PATHNAME', 'test_injection_points_run' LANGUAGE C STRICT PARALLEL UNSAFE; +-- +-- test_injection_points_wake() +-- +-- Wakes a condition variable executed in an injection point. +-- +CREATE FUNCTION test_injection_points_wake() +RETURNS void +AS 'MODULE_PATHNAME', 'test_injection_points_wake' +LANGUAGE C STRICT PARALLEL UNSAFE; + -- -- test_injection_points_drop() -- diff --git a/src/test/modules/test_injection_points/test_injection_points.c b/src/test/modules/test_injection_points/test_injection_points.c index e02b676773..b3e9c0c572 100644 --- a/src/test/modules/test_injection_points/test_injection_points.c +++ b/src/test/modules/test_injection_points/test_injection_points.c @@ -18,13 +18,56 @@ #include "postgres.h" #include "fmgr.h" +#include "storage/condition_variable.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" #include "utils/builtins.h" #include "utils/injection_point.h" +#include "utils/wait_event.h" PG_MODULE_MAGIC; +/* Shared state information for injection points. */ +typedef struct TestInjectionPointSharedState +{ + /* + * Wait variable that can be registered at a given point, and that can be + * awakened via SQL. + */ + ConditionVariable wait_point; +} TestInjectionPointSharedState; + +/* Pointer to shared-memory state. */ +static TestInjectionPointSharedState *inj_state = NULL; + +/* Wait event when waiting on condition variable */ +static uint32 test_injection_wait_event = 0; + extern PGDLLEXPORT void test_injection_error(const char *name); extern PGDLLEXPORT void test_injection_notice(const char *name); +extern PGDLLEXPORT void test_injection_wait(const char *name); + + +static void +test_injection_init_shmem(void) +{ + bool found; + + if (inj_state != NULL) + return; + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + inj_state = ShmemInitStruct("test_injection_points", + sizeof(TestInjectionPointSharedState), + &found); + if (!found) + { + /* First time through ... */ + MemSet(inj_state, 0, sizeof(TestInjectionPointSharedState)); + ConditionVariableInit(&inj_state->wait_point); + } + LWLockRelease(AddinShmemInitLock); +} /* Set of callbacks available at point creation */ void @@ -39,6 +82,20 @@ test_injection_notice(const char *name) elog(NOTICE, "notice triggered for injection point %s", name); } +void +test_injection_wait(const char *name) +{ + if (inj_state == NULL) + test_injection_init_shmem(); + if (test_injection_wait_event == 0) + test_injection_wait_event = WaitEventExtensionNew("test_injection_wait"); + + /* And sleep.. */ + ConditionVariablePrepareToSleep(&inj_state->wait_point); + ConditionVariableSleep(&inj_state->wait_point, test_injection_wait_event); + ConditionVariableCancelSleep(); +} + /* * SQL function for creating an injection point. */ @@ -54,6 +111,8 @@ test_injection_points_create(PG_FUNCTION_ARGS) function = "test_injection_error"; else if (strcmp(mode, "notice") == 0) function = "test_injection_notice"; + else if (strcmp(mode, "wait") == 0) + function = "test_injection_wait"; else elog(ERROR, "incorrect mode \"%s\" for injection point creation", mode); @@ -76,6 +135,20 @@ test_injection_points_run(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +/* + * SQL function for waking a condition variable. + */ +PG_FUNCTION_INFO_V1(test_injection_points_wake); +Datum +test_injection_points_wake(PG_FUNCTION_ARGS) +{ + if (inj_state == NULL) + test_injection_init_shmem(); + + ConditionVariableBroadcast(&inj_state->wait_point); + PG_RETURN_VOID(); +} + /* * SQL function for dropping an injection point. */ -- 2.42.0