From 4a31143cb8dc348d4f8f915a76e5698b185bca14 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Mon, 21 Dec 2015 16:44:20 +0900 Subject: [PATCH 3/3] Add recovery test suite This includes basic tests maipulating standbys, be they archiving or streaming nodes, and some basic sanity checks around them. PostgresNode is extended with a couple of routines allowing to set up WAL archiving, WAL streaming or WAL restore on a node, as well as a commodity routine to allow a promotion. --- src/bin/pg_rewind/RewindTest.pm | 2 +- src/test/Makefile | 2 +- src/test/perl/PostgresNode.pm | 125 +++++++++++++++++++++++++++- src/test/recovery/.gitignore | 3 + src/test/recovery/Makefile | 17 ++++ src/test/recovery/README | 19 +++++ src/test/recovery/t/001_stream_rep.pl | 62 ++++++++++++++ src/test/recovery/t/002_archiving.pl | 46 ++++++++++ src/test/recovery/t/003_recovery_targets.pl | 125 ++++++++++++++++++++++++++++ src/test/recovery/t/004_timeline_switch.pl | 67 +++++++++++++++ src/test/recovery/t/005_replay_delay.pl | 43 ++++++++++ 11 files changed, 506 insertions(+), 5 deletions(-) create mode 100644 src/test/recovery/.gitignore create mode 100644 src/test/recovery/Makefile create mode 100644 src/test/recovery/README create mode 100644 src/test/recovery/t/001_stream_rep.pl create mode 100644 src/test/recovery/t/002_archiving.pl create mode 100644 src/test/recovery/t/003_recovery_targets.pl create mode 100644 src/test/recovery/t/004_timeline_switch.pl create mode 100644 src/test/recovery/t/005_replay_delay.pl diff --git a/src/bin/pg_rewind/RewindTest.pm b/src/bin/pg_rewind/RewindTest.pm index 3e43d39..73ea203 100644 --- a/src/bin/pg_rewind/RewindTest.pm +++ b/src/bin/pg_rewind/RewindTest.pm @@ -177,7 +177,7 @@ sub promote_standby # Now promote slave and insert some new data on master, this will put # the master out-of-sync with the standby. Wait until the standby is # out of recovery mode, and is ready to accept read-write connections. - system_or_bail('pg_ctl', '-w', '-D', $node_standby->data_dir, 'promote'); + $node_standby->promote; $node_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") or die "Timed out while waiting for promotion of standby"; diff --git a/src/test/Makefile b/src/test/Makefile index b713c2c..7f7754f 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -12,7 +12,7 @@ subdir = src/test top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = regress isolation modules +SUBDIRS = regress isolation modules recovery # We don't build or execute examples/, locale/, or thread/ by default, # but we do want "make clean" etc to recurse into them. Likewise for ssl/, diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index 60a2b81..465b9f5 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -171,6 +171,10 @@ sub init $params{hba_permit_replication} = 1 if (!defined($params{hba_permit_replication})); + $params{has_archiving} = 0 + if (!defined($params{has_archiving})); + $params{allows_streaming} = 0 + if (!defined($params{allows_streaming})); mkdir $self->backup_dir; mkdir $self->archive_dir; @@ -183,6 +187,18 @@ sub init print $conf "fsync = off\n"; print $conf "log_statement = all\n"; print $conf "port = $port\n"; + + if ($params{allows_streaming}) + { + print $conf "wal_level = hot_standby\n"; + print $conf "max_wal_senders = 5\n"; + print $conf "wal_keep_segments = 20\n"; + print $conf "max_wal_size = 128MB\n"; + print $conf "shared_buffers = 1MB\n"; + print $conf "wal_log_hints = on\n"; + print $conf "hot_standby = on\n"; + } + if ($TestLib::windows_os) { print $conf "listen_addresses = '$host'\n"; @@ -195,6 +211,7 @@ sub init close $conf; $self->set_replication_conf if ($params{hba_permit_replication}); + $self->enable_archiving if ($params{has_archiving}); } sub append_conf @@ -219,11 +236,18 @@ sub backup sub init_from_backup { - my ($self, $root_node, $backup_name) = @_; + my ($self, $root_node, $backup_name, %params) = @_; my $backup_path = $root_node->backup_dir . '/' . $backup_name; my $port = $self->port; my $root_port = $root_node->port; + $params{hba_permit_replication} = 1 + if (!defined($params{hba_permit_replication})); + $params{has_streaming} = 0 + if (!defined($params{has_streaming})); + $params{has_restoring} = 0 + if (!defined($params{has_restoring})); + print "Initializing node $port from backup \"$backup_name\" of node $root_port\n"; die "Backup $backup_path does not exist" unless -d $backup_path; @@ -242,7 +266,10 @@ sub init_from_backup qq( port = $port )); - $self->set_replication_conf; + + $self->set_replication_conf if ($params{hba_permit_replication}); + $self->enable_restoring($root_node) if ($params{has_restoring}); + $self->enable_streaming($root_node) if ($params{has_streaming}); } sub start @@ -263,7 +290,6 @@ sub start } $self->_update_pid; - } sub stop @@ -292,6 +318,99 @@ sub restart $self->_update_pid; } +sub promote +{ + my ($self) = @_; + my $port = $self->port; + my $pgdata = $self->data_dir; + my $logfile = $self->logfile; + my $name = $self->name; + print "### Promoting node \"$name\"\n"; + TestLib::system_log('pg_ctl', '-D', $pgdata, '-w', '-l', $logfile, + 'promote'); +} + +# +# Set of routines for replication and recovery +# +sub enable_streaming +{ + my ($self, $root_node) = @_; + my $root_connstr = $root_node->connstr; + my $name = $self->name; + my $pgdata = $self->data_dir; + my $port = $self->port; + + print "### Enabling streaming replication for node in $pgdata with port $port\n"; + $self->append_conf('recovery.conf', qq( +primary_conninfo='$root_connstr application_name=$name' +standby_mode=on +)); +} + +sub enable_restoring +{ + my ($self, $root_node) = @_; + my $path = $root_node->archive_dir; + my $pgdata = $self->data_dir; + my $port = $self->port; + + print "### Enabling restoring for node in $pgdata with port $port\n"; + + # Switch path to use slashes on Windows + my $copy_command = $TestLib::windows_os ? + qq{copy "$path/%f" "%p"} : + qq{cp $path/%f %p}; + + $self->append_conf('recovery.conf', qq( +restore_command = '$copy_command' +standby_mode = on +)); +} + +sub enable_archiving +{ + my ($self) = @_; + my $path = $self->archive_dir; + my $pgdata = $self->data_dir; + my $port = $self->port; + + print "### Enabling archiving for node in $pgdata with port $port\n"; + + # Switch path to use slashes on Windows + my $copy_command = $TestLib::windows_os ? + qq{copy "%p" "$path/%f"} : + qq{cp %p $path/%f}; + + # Enable archive_mode and archive_command on node + $self->append_conf('postgresql.conf', qq( +archive_mode = on +archive_command = '$copy_command' +)); +} + +# Wait until a node is able to accept queries. Useful when putting a node +# in recovery and wait for it to be able to work particularly on slow +# machines. +sub wait_for_access +{ + my ($self) = @_; + my $max_attempts = 30; + my $attempts = 0; + while ($attempts < $max_attempts) + { + if (run_log(['pg_isready', '-d', $self->connstr('postgres')])) + { + return 1; + } + + # Wait a second before retrying. + sleep 1; + $attempts++; + } + return 0; +} + sub _update_pid { my $self = shift; diff --git a/src/test/recovery/.gitignore b/src/test/recovery/.gitignore new file mode 100644 index 0000000..499fa7d --- /dev/null +++ b/src/test/recovery/.gitignore @@ -0,0 +1,3 @@ +# Generated by test suite +/regress_log/ +/tmp_check/ diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile new file mode 100644 index 0000000..16c063a --- /dev/null +++ b/src/test/recovery/Makefile @@ -0,0 +1,17 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/recovery +# +# Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/test/recovery/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/recovery +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) diff --git a/src/test/recovery/README b/src/test/recovery/README new file mode 100644 index 0000000..20b98e0 --- /dev/null +++ b/src/test/recovery/README @@ -0,0 +1,19 @@ +src/test/recovery/README + +Regression tests for recovery and replication +============================================= + +This directory contains a test suite for recovery and replication, +testing mainly the interactions of recovery.conf with cluster +instances by providing a simple set of routines that can be used +to define a custom cluster for a test, including backup, archiving, +and streaming configuration. + +Running the tests +================= + + make check + +NOTE: This creates a temporary installation, and some tests may +create one or multiple nodes, be they master or standby(s) for the +purpose of the tests. diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl new file mode 100644 index 0000000..3ed9be3 --- /dev/null +++ b/src/test/recovery/t/001_stream_rep.pl @@ -0,0 +1,62 @@ +# Minimal test testing streaming replication +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 4; + +# Initialize master node +my $node_master = get_new_node('master'); +$node_master->init(allows_streaming => 1); +$node_master->start; +my $backup_name = 'my_backup'; + +# Take backup +$node_master->backup($backup_name); + +# Create streaming standby linking to master +my $node_standby_1 = get_new_node('standby_1'); +$node_standby_1->init_from_backup($node_master, $backup_name, + has_streaming => 1); +$node_standby_1->start; + +# Take backup of standby 1 (not mandatory, but useful to check if +# pg_basebackup works on a standby). +$node_standby_1->backup($backup_name); + +# Create second standby node linking to standby 1 +my $node_standby_2 = get_new_node('standby_2'); +$node_standby_2->init_from_backup($node_standby_1, $backup_name, + has_streaming => 1); +$node_standby_2->start; + +# Create some content on master and check its presence in standby 1 +$node_master->psql('postgres', "CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a"); + +# Wait for standbys to catch up +my $applname_1 = $node_standby_1->name; +my $applname_2 = $node_standby_2->name; +my $caughtup_query = "SELECT pg_current_xlog_location() = write_location FROM pg_stat_replication WHERE application_name = '$applname_1';"; +$node_master->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby 1 to catch up"; +$caughtup_query = "SELECT pg_last_xlog_replay_location() = write_location FROM pg_stat_replication WHERE application_name = '$applname_2';"; +$node_standby_1->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby 2 to catch up"; + +my $result = $node_standby_1->psql('postgres', "SELECT count(*) FROM tab_int"); +print "standby 1: $result\n"; +is($result, qq(1002), 'check streamed content on standby 1'); + +$result = $node_standby_2->psql('postgres', "SELECT count(*) FROM tab_int"); +print "standby 2: $result\n"; +is($result, qq(1002), 'check streamed content on standby 2'); + +# Check that only READ-only queries can run on standbys +$node_standby_1->command_fails(['psql', '-A', '-t', '--no-psqlrc', + '-d', $node_standby_1->connstr, '-c', + "INSERT INTO tab_int VALUES (1)"], + 'Read-only queries on standby 1'); +$node_standby_2->command_fails(['psql', '-A', '-t', '--no-psqlrc', + '-d', $node_standby_2->connstr, '-c', + "INSERT INTO tab_int VALUES (1)"], + 'Read-only queries on standby 2'); diff --git a/src/test/recovery/t/002_archiving.pl b/src/test/recovery/t/002_archiving.pl new file mode 100644 index 0000000..930125c --- /dev/null +++ b/src/test/recovery/t/002_archiving.pl @@ -0,0 +1,46 @@ +# test for archiving with warm standby +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 1; +use File::Copy; + +# Initialize master node, doing archives +my $node_master = get_new_node('master'); +$node_master->init(has_archiving => 1, + allows_streaming => 1); +my $backup_name = 'my_backup'; + +# Start it +$node_master->start; + +# Take backup for slave +$node_master->backup($backup_name); + +# Initialize standby node from backup, fetching WAL from archives +my $node_standby = get_new_node('standby'); +$node_standby->init_from_backup($node_master, $backup_name, + has_restoring => 1); +$node_standby->append_conf('postgresql.conf', qq( +wal_retrieve_retry_interval = '100ms' +)); +$node_standby->start; + +# Create some content on master +$node_master->psql('postgres', "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); +my $current_lsn = $node_master->psql('postgres', "SELECT pg_current_xlog_location();"); + +# Force archiving of WAL file to make it present on master +$node_master->psql('postgres', "SELECT pg_switch_xlog()"); + +# Add some more content, it should not be present on standby +$node_master->psql('postgres', "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); + +# Wait until necessary replay has been done on standby +my $caughtup_query = "SELECT '$current_lsn'::pg_lsn <= pg_last_xlog_replay_location()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +my $result = $node_standby->psql('postgres', "SELECT count(*) FROM tab_int"); +is($result, qq(1000), 'check content from archives'); diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl new file mode 100644 index 0000000..293603a --- /dev/null +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -0,0 +1,125 @@ +# Test for recovery targets: name, timestamp, XID +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 7; + +# Create and test a standby from given backup, with a certain +# recovery target. +sub test_recovery_standby +{ + my $test_name = shift; + my $node_name = shift; + my $node_master = shift; + my $recovery_params = shift; + my $num_rows = shift; + my $until_lsn = shift; + + my $node_standby = get_new_node($node_name); + $node_standby->init_from_backup($node_master, 'my_backup', + has_restoring => 1); + + foreach my $param_item (@$recovery_params) + { + $node_standby->append_conf('recovery.conf', + qq($param_item +)); + } + + $node_standby->start; + + # Wait until standby has replayed enough data + my $caughtup_query = "SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()"; + $node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + + # Create some content on master and check its presence in standby + my $result = $node_standby->psql('postgres', "SELECT count(*) FROM tab_int"); + is($result, qq($num_rows), "check standby content for $test_name"); + + # Stop standby node + $node_standby->teardown_node; +} + +# Initialize master node +my $node_master = get_new_node('master'); +$node_master->init(has_archiving => 1, allows_streaming => 1); + +# Start it +$node_master->start; + +# Create data before taking the backup, aimed at testing +# recovery_target = 'immediate' +$node_master->psql('postgres', "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); +my $lsn1 = $node_master->psql('postgres', "SELECT pg_current_xlog_location();"); + +# Take backup from which all operations will be run +$node_master->backup('my_backup'); + +# Insert some data with used as a replay reference, with a recovery +# target TXID. +$node_master->psql('postgres', "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); +my $recovery_txid = $node_master->psql('postgres', "SELECT txid_current()"); +my $lsn2 = $node_master->psql('postgres', "SELECT pg_current_xlog_location();"); + +# More data, with recovery target timestamp +$node_master->psql('postgres', "INSERT INTO tab_int VALUES (generate_series(2001,3000))"); +my $recovery_time = $node_master->psql('postgres', "SELECT now()"); +my $lsn3 = $node_master->psql('postgres', "SELECT pg_current_xlog_location();"); + +# Even more data, this time with a recovery target name +$node_master->psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(3001,4000))"); +my $recovery_name = "my_target"; +my $lsn4 = $node_master->psql('postgres', "SELECT pg_current_xlog_location();"); +$node_master->psql('postgres', "SELECT pg_create_restore_point('$recovery_name'"); + +# Force archiving of WAL file +$node_master->psql('postgres', "SELECT pg_switch_xlog()"); + +# Test recovery targets +my @recovery_params = ( "recovery_target = 'immediate'" ); +test_recovery_standby('immediate target', 'standby_1', $node_master, + \@recovery_params, + "1000", $lsn1); +@recovery_params = ( "recovery_target_xid = '$recovery_txid'" ); +test_recovery_standby('XID', 'standby_2', $node_master, + \@recovery_params, + "2000", $lsn2); +@recovery_params = ( "recovery_target_time = '$recovery_time'" ); +test_recovery_standby('Time', 'standby_3', $node_master, + \@recovery_params, + "3000", $lsn3); +@recovery_params = ( "recovery_target_name = '$recovery_name'" ); +test_recovery_standby('Name', 'standby_4', $node_master, + \@recovery_params, + "4000", $lsn4); + +# Multiple targets +# Last entry has priority (note that an array respects the order of items +# not hashes). +@recovery_params = ( + "recovery_target_name = '$recovery_name'", + "recovery_target_xid = '$recovery_txid'", + "recovery_target_time = '$recovery_time'" +); +test_recovery_standby('Name + XID + Time', 'standby_5', $node_master, + \@recovery_params, + "3000", $lsn3); +@recovery_params = ( + "recovery_target_time = '$recovery_time'", + "recovery_target_name = '$recovery_name'", + "recovery_target_xid = '$recovery_txid'" +); +test_recovery_standby('Time + Name + XID', 'standby_6', $node_master, + \@recovery_params, + "2000", $lsn2); +@recovery_params = ( + "recovery_target_xid = '$recovery_txid'", + "recovery_target_time = '$recovery_time'", + "recovery_target_name = '$recovery_name'" +); +test_recovery_standby('XID + Time + Name', 'standby_7', $node_master, + \@recovery_params, + "4000", $lsn4); diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl new file mode 100644 index 0000000..c58c602 --- /dev/null +++ b/src/test/recovery/t/004_timeline_switch.pl @@ -0,0 +1,67 @@ +# Tets for timeline switch +# Encure that a standby is able to follow a newly-promoted standby +# on a new timeline. +use strict; +use warnings; +use File::Path qw(remove_tree); +use PostgresNode; +use TestLib; +use Test::More tests => 1; + +$ENV{PGDATABASE} = 'postgres'; + +# Initialize master node +my $node_master = get_new_node('master'); +$node_master->init(allows_streaming => 1); +$node_master->start; + +# Take backup +my $backup_name = 'my_backup'; +$node_master->backup($backup_name); + +# Create two standbys linking to it +my $node_standby_1 = get_new_node('standby_1'); +$node_standby_1->init_from_backup($node_master, $backup_name, + has_streaming => 1); +$node_standby_1->start; +my $node_standby_2 = get_new_node('standby_2'); +$node_standby_2->init_from_backup($node_master, $backup_name, + has_streaming => 1); +$node_standby_2->start; + +# Create some content on master +$node_master->psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); +my $until_lsn = $node_master->psql('postgres', "SELECT pg_current_xlog_location();"); + +# Wait until standby has replayed enough data on standby 1 +my $caughtup_query = "SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()"; +$node_standby_1->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +# Stop and remove master, and promote standby 1, switching it to a new timeline +$node_master->teardown_node; +$node_standby_1->promote; + +# Switch standby 2 to replay from standby 1 +remove_tree($node_standby_2->data_dir . '/recovery.conf'); +my $connstr_1 = $node_standby_1->connstr; +$node_standby_2->append_conf('recovery.conf', qq( +primary_conninfo='$connstr_1' +standby_mode=on +recovery_target_timeline='latest' +)); +$node_standby_2->restart; + +# Insert some data in standby 1 and check its presence in standby 2 +# to ensure that the timeline switch has been done. Standby 1 needs +# to exit recovery first before moving on with the test. +$node_standby_1->poll_query_until('postgres', "SELECT pg_is_in_recovery() <> true"); +$node_standby_1->psql('postgres', "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); +$until_lsn = $node_standby_1->psql('postgres', "SELECT pg_current_xlog_location();"); +$caughtup_query = "SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()"; +$node_standby_2->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +my $result = $node_standby_2->psql('postgres', "SELECT count(*) FROM tab_int"); +is($result, qq(2000), 'check content of standby 2'); diff --git a/src/test/recovery/t/005_replay_delay.pl b/src/test/recovery/t/005_replay_delay.pl new file mode 100644 index 0000000..14d9b29 --- /dev/null +++ b/src/test/recovery/t/005_replay_delay.pl @@ -0,0 +1,43 @@ +# Checks for recovery_min_apply_delay +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 2; + +# Initialize master node +my $node_master = get_new_node(); +$node_master->init(allows_streaming => 1); +$node_master->start; + +# And some content +$node_master->psql('postgres', "CREATE TABLE tab_int AS SELECT generate_series(1,10) AS a"); + +# Take backup +my $backup_name = 'my_backup'; +$node_master->backup($backup_name); + +# Create streaming standby from backup +my $node_standby = get_new_node(); +$node_standby->init_from_backup($node_master, $backup_name, + has_streaming => 1); +$node_standby->append_conf('recovery.conf', qq( +recovery_min_apply_delay = '2s' +)); +$node_standby->start; + +# Make new content on master and check its presence in standby +# depending on the delay of 2s applied above. +$node_master->psql('postgres', "INSERT INTO tab_int VALUES (generate_series(11,20))"); +sleep 1; +# Here we should have only 10 rows +my $result = $node_standby->psql('postgres', "SELECT count(*) FROM tab_int"); +is($result, qq(10), 'check content with delay of 1s'); + +# Now wait for replay to complete on standby +my $until_lsn = $node_master->psql('postgres', "SELECT pg_current_xlog_location();"); +my $caughtup_query = "SELECT '$until_lsn'::pg_lsn <= pg_last_xlog_replay_location()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; +$result = $node_standby->psql('postgres', "SELECT count(*) FROM tab_int"); +is($result, qq(20), 'check content with delay of 2s'); -- 2.6.4