Re: A failure of standby to follow timeline switch - Mailing list pgsql-hackers
| From | Kyotaro Horiguchi |
|---|---|
| Subject | Re: A failure of standby to follow timeline switch |
| Date | |
| Msg-id | 20210106.104804.1837050045776994380.horikyota.ntt@gmail.com Whole thread Raw |
| In response to | Re: A failure of standby to follow timeline switch (Kyotaro Horiguchi <horikyota.ntt@gmail.com>) |
| List | pgsql-hackers |
At Tue, 05 Jan 2021 17:26:02 +0900 (JST), Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote in
> Thanks. The attached is the revised patchset.
It is not applicable to PG13 due to wording changes. This is an
applicable all-in-one version to PG13.
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
From d4b11d93f93f3af2aa55033b91ddbec176763325 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
Date: Tue, 5 Jan 2021 13:34:36 +0900
Subject: [PATCH] Fix timeline-tracking failure while sending a historic
timeline
Walsender should track timeline switches while sending a historic
timeline. Regain that behavior, which was broken in PG13, by a thinko
of 709d003fbd. Backpatch to PG13.
---
src/backend/replication/walsender.c | 2 +-
src/test/perl/PostgresNode.pm | 33 ++++++++++++++++++
src/test/perl/TestLib.pm | 16 ++++++---
src/test/recovery/t/001_stream_rep.pl | 41 ++++++++++++++++++++++-
src/test/recovery/t/019_replslot_limit.pl | 37 ++++----------------
5 files changed, 92 insertions(+), 37 deletions(-)
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 7f87eb7f19..04f6c3ebb4 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2478,7 +2478,7 @@ WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo,
XLogSegNo endSegNo;
XLByteToSeg(sendTimeLineValidUpto, endSegNo, state->segcxt.ws_segsize);
- if (state->seg.ws_segno == endSegNo)
+ if (nextSegNo == endSegNo)
*tli_p = sendTimeLineNextTLI;
}
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 980f1f1533..687aa3ac88 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -2138,6 +2138,39 @@ sub pg_recvlogical_upto
=pod
+=item $node->current_log_position()
+
+Return the current position of server log.
+
+=cut
+
+sub current_log_position
+{
+ my $self = shift;
+
+ return (stat $self->logfile)[7];
+}
+
+=pod
+
+=item $node->find_in_log($pattern, $startpos)
+
+Returns whether the $pattern occurs after $startpos in the server log.
+
+=cut
+
+sub find_in_log
+{
+ my ($self, $pattern, $startpos) = @_;
+
+ $startpos = 0 unless defined $startpos;
+ my $log = TestLib::slurp_file($self->logfile, $startpos);
+
+ return $log =~ m/$pattern/;
+}
+
+=pod
+
=back
=cut
diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm
index a7490d2ce7..a0ce9521e2 100644
--- a/src/test/perl/TestLib.pm
+++ b/src/test/perl/TestLib.pm
@@ -402,30 +402,38 @@ sub slurp_dir
=pod
-=item slurp_file(filename)
+=item slurp_file(filename, pos)
-Return the full contents of the specified file.
+Return the contents after pos of the specified file.
+Reutrns the full contents if pos is omitted.
=cut
sub slurp_file
{
- my ($filename) = @_;
+ my ($filename, $from) = @_;
local $/;
my $contents;
+
+ $from = 0 unless defined $from;
+
if ($Config{osname} ne 'MSWin32')
{
open(my $in, '<', $filename)
or die "could not read \"$filename\": $!";
+ seek($in, $from, 0)
+ or die "could not seek \"$filename\" to $from: $!";
$contents = <$in>;
close $in;
}
else
{
my $fHandle = createFile($filename, "r", "rwd")
- or die "could not open \"$filename\": $^E";
+ or die "could not open \"$filename\": $^E\n";
OsFHandleOpen(my $fh = IO::Handle->new(), $fHandle, 'r')
or die "could not read \"$filename\": $^E\n";
+ seek($fh, $from, 0)
+ or die "could not seek \"$filename\" to $from: $^E\n";
$contents = <$fh>;
CloseHandle($fHandle)
or die "could not close \"$filename\": $^E\n";
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 778f11b28b..8d2b24fe55 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -2,8 +2,9 @@
use strict;
use warnings;
use PostgresNode;
+use Time::HiRes qw(usleep);
use TestLib;
-use Test::More tests => 36;
+use Test::More tests => 37;
# Initialize master node
my $node_master = get_new_node('master');
@@ -409,3 +410,41 @@ ok( ($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0,
my $master_data = $node_master->data_dir;
ok(!-f "$master_data/pg_wal/$segment_removed",
"WAL segment $segment_removed recycled after physical slot advancing");
+
+#
+# Check if timeline-increment works while reading a historic timeline.
+my $node_primary_2 = get_new_node('primary_2');
+# archiving is needed to create .paritial segment
+$node_primary_2->init(allows_streaming => 1, has_archiving => 1);
+$node_primary_2->start;
+$node_primary_2->backup($backup_name);
+my $node_standby_3 = get_new_node('standby_3');
+$node_standby_3->init_from_backup($node_primary_2, $backup_name,
+ has_streaming => 1);
+$node_primary_2->stop;
+$node_primary_2->set_standby_mode; # increment primary timeline
+$node_primary_2->start;
+$node_primary_2->promote;
+my $logstart = $node_standby_3->current_log_position();
+$node_standby_3->start;
+
+my $success = 0;
+for (my $i = 0 ; $i < 1000; $i++)
+{
+ if ($node_standby_3->find_in_log(
+ "requested WAL segment [0-9A-F]+ has already been removed",
+ $logstart))
+ {
+ last;
+ }
+ elsif ($node_standby_3->find_in_log(
+ "End of WAL reached on timeline",
+ $logstart))
+ {
+ $success = 1;
+ last;
+ }
+ usleep(100_000);
+}
+
+ok($success, 'Timeline increment while reading a historic timeline');
diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl
index a7231dcd47..8b3c5de057 100644
--- a/src/test/recovery/t/019_replslot_limit.pl
+++ b/src/test/recovery/t/019_replslot_limit.pl
@@ -165,19 +165,17 @@ $node_master->wait_for_catchup($node_standby, 'replay', $start_lsn);
$node_standby->stop;
-ok( !find_in_log(
- $node_standby,
- "requested WAL segment [0-9A-F]+ has already been removed"),
+ok( !$node_standby->find_in_log(
+ "requested WAL segment [0-9A-F]+ has already been removed"),
'check that required WAL segments are still available');
# Advance WAL again, the slot loses the oldest segment.
-my $logstart = get_log_size($node_master);
+my $logstart = $node_master->current_log_position();
advance_wal($node_master, 7);
$node_master->safe_psql('postgres', "CHECKPOINT;");
# WARNING should be issued
-ok( find_in_log(
- $node_master,
+ok( $node_master->find_in_log(
"invalidating slot \"rep1\" because its restart_lsn [0-9A-F/]+ exceeds max_slot_wal_keep_size",
$logstart),
'check that the warning is logged');
@@ -190,14 +188,13 @@ is($result, "rep1|f|t|lost|",
'check that the slot became inactive and the state "lost" persists');
# The standby no longer can connect to the master
-$logstart = get_log_size($node_standby);
+$logstart = $node_standby->current_log_position();
$node_standby->start;
my $failed = 0;
for (my $i = 0; $i < 10000; $i++)
{
- if (find_in_log(
- $node_standby,
+ if ($node_standby->find_in_log(
"requested WAL segment [0-9A-F]+ has already been removed",
$logstart))
{
@@ -264,25 +261,3 @@ sub advance_wal
}
return;
}
-
-# return the size of logfile of $node in bytes
-sub get_log_size
-{
- my ($node) = @_;
-
- return (stat $node->logfile)[7];
-}
-
-# find $pat in logfile of $node after $off-th byte
-sub find_in_log
-{
- my ($node, $pat, $off) = @_;
-
- $off = 0 unless defined $off;
- my $log = TestLib::slurp_file($node->logfile);
- return 0 if (length($log) <= $off);
-
- $log = substr($log, $off);
-
- return $log =~ m/$pat/;
-}
--
2.27.0
pgsql-hackers by date: