From edb976cf1d2daedafef9cee1e36c4ef5f232e9aa Mon Sep 17 00:00:00 2001 From: Justin Kwan Date: Tue, 19 Jul 2022 22:15:36 -0700 Subject: [PATCH 1/1] Avoid copying WAL segments before divergence to speed up pg_rewind "Optimize pg_rewind to Skip Copying Common WAL Files". It adds a conditional check to avoid uncessesarily copying any WAL segment files from source to target if they are common between both servers, before the point of WAL divergence during pg_rewind. On the source server, each WAL file's corresponding segment number is computed and compared against the segement number of the first diverged LSN. All WAL files which fall before the segment of the first diverged LSN can safely be skipped from copying to the target. The reduction in WAL segment files transmitted over the network from source to target server massively reduces overall pg_rewind execution time, when a large amount of WAL segment files are retained. This patch is intended for immediate application into the Postgres master branch on version 14.4. Regression tests are included to verify that WAL segment files prior to WAL diverge are not copied. The source code successfully compiles, and all tests successfully pass. Author: Justin Kwan, Vignesh Ravichandran --- src/bin/pg_rewind/filemap.c | 15 +++++ src/bin/pg_rewind/pg_rewind.c | 39 ++++++++++- src/bin/pg_rewind/pg_rewind.h | 2 + src/bin/pg_rewind/t/001_basic.pl | 65 +++++++++++++++++++ src/bin/pg_rewind/t/008_min_recovery_point.pl | 64 ++++++++++++++++++ 5 files changed, 183 insertions(+), 2 deletions(-) diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index 269ed6446e..2fd0523ce5 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -26,6 +26,7 @@ #include #include +#include "access/xlog_internal.h" #include "catalog/pg_tablespace_d.h" #include "common/hashfn.h" #include "common/string.h" @@ -728,6 +729,20 @@ decide_file_action(file_entry_t *entry) case FILE_TYPE_REGULAR: if (!entry->isrelfile) { + /* Handle WAL segment file. */ + const char *fname; + char *slash; + + /* Split filepath into directory & filename. */ + slash = strrchr(path, '/'); + if (slash) + fname = slash + 1; + else + fname = path; + + if (IsXLogFileName(fname)) + return decide_wal_file_action(fname); + /* * It's a non-data file that we have no special processing * for. Copy it in toto. diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 1ff8da1676..7bdc3447c2 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -25,7 +25,6 @@ #include "fe_utils/recovery_gen.h" #include "fe_utils/string_utils.h" #include "file_ops.h" -#include "filemap.h" #include "getopt_long.h" #include "pg_rewind.h" #include "rewind_source.h" @@ -69,6 +68,8 @@ bool dry_run = false; bool do_sync = true; bool restore_wal = false; +static XLogRecPtr divergerec; + /* Target history */ TimeLineHistoryEntry *targetHistory; int targetNentries; @@ -129,7 +130,6 @@ main(int argc, char **argv) }; int option_index; int c; - XLogRecPtr divergerec; int lastcommontliIndex; XLogRecPtr chkptrec; TimeLineID chkpttli; @@ -490,6 +490,41 @@ main(int argc, char **argv) return 0; } +file_action_t +decide_wal_file_action(const char *fname) +{ + TimeLineID file_tli; + XLogSegNo file_segno; + XLogSegNo last_common_segno; + + /* + * Find last common WAL segment number between source and target before + * divergence given last common LSN (byte position). + */ + XLByteToSeg(divergerec, last_common_segno, ControlFile_target.xlog_seg_size); + + /* Get current WAL segment number given current segment file name. */ + XLogFromFileName(fname, &file_tli, &file_segno, ControlFile_target.xlog_seg_size); + + /* + * Avoid unnecessarily copying WAL segment files created before last common + * segment to avoid performance penalty when many WAL segment files are + * retained on source and copied to target. + * + * These files are already common between new source (old target) and new + * target (old source). Only WAL segment files after the last common segment + * number on the new source need to be copied to the new target. + */ + if (file_segno < last_common_segno) + { + pg_log_debug("WAL file entry \"%s\" not copied to target", fname); + return FILE_ACTION_NONE; + } + + pg_log_debug("WAL file entry \"%s\" is copied to target", fname); + return FILE_ACTION_COPY; +} + /* * Perform the rewind. * diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index 8b4b50a33b..0752348831 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -14,6 +14,7 @@ #include "access/timeline.h" #include "common/logging.h" #include "datapagemap.h" +#include "filemap.h" #include "libpq-fe.h" #include "storage/block.h" #include "storage/relfilelocator.h" @@ -47,6 +48,7 @@ extern XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, /* in pg_rewind.c */ extern void progress_report(bool finished); +extern file_action_t decide_wal_file_action(const char *fname); /* in timeline.c */ extern TimeLineHistoryEntry *rewind_parseTimeLineHistory(char *buffer, diff --git a/src/bin/pg_rewind/t/001_basic.pl b/src/bin/pg_rewind/t/001_basic.pl index db9201f38e..7caff1c029 100644 --- a/src/bin/pg_rewind/t/001_basic.pl +++ b/src/bin/pg_rewind/t/001_basic.pl @@ -172,6 +172,71 @@ in primary, before promotion ), 'drop'); + if ($test_mode eq 'archive') + { + goto SKIP; + } + + # Read logs produced by pg_rewind + my $log_file_name = + "$PostgreSQL::Test::Utils::tmp_check/log/regress_log_001_basic"; + my $pg_rewind_logs = do { + local $/ = undef; + open(my $log_file, '<', $log_file_name) || die; + <$log_file>; + }; + + # This WAL segment file is common between both source and target, + # originating before WAL timeline divergence. pg_rewind should not copy + # and overwrite this target file with the source's copy. + like( + $pg_rewind_logs, + qr/pg_rewind: WAL file entry "000000010000000000000002" not copied to target/, + 'common segment file skipped from copying'); + unlike( + $pg_rewind_logs, + qr/pg_rewind: pg_wal\/000000010000000000000002 \(COPY\)/, + 'common segment file not copied'); + if ($test_mode eq 'remote') + { + unlike( + $pg_rewind_logs, + qr/pg_rewind: received chunk for file "pg_wal\/000000010000000000000002"/, + 'common segment file copy not received by remote target'); + } + + # These WAL segment files occur just before and after the divergence to the + # new timeline. (Both of these WAL files are internally represented by + # segment 3.) pg_rewind should copy and overwrite these target files with + # the source's copies. + like( + $pg_rewind_logs, + qr/pg_rewind: WAL file entry "000000010000000000000003" is copied to target/, + 'diverged segment file included in copying to target'); + like( + $pg_rewind_logs, + qr/pg_rewind: pg_wal\/000000010000000000000003 \(COPY\)/, + 'diverged segment file copied'); + if ($test_mode eq 'remote') + { + like( + $pg_rewind_logs, + qr/pg_rewind: received chunk for file "pg_wal\/000000010000000000000003"/, + 'diverged segment file copy received by remote target'); + } + + like( + $pg_rewind_logs, + qr/pg_rewind: pg_wal\/000000020000000000000003 \(COPY\)/, + 'diverged segment file included in copying to target'); + if ($test_mode eq 'remote') + { + like( + $pg_rewind_logs, + qr/pg_rewind: received chunk for file "pg_wal\/000000020000000000000003"/, + 'diverged segment file copy received by remote target'); + } + # Permissions on PGDATA should be default SKIP: { diff --git a/src/bin/pg_rewind/t/008_min_recovery_point.pl b/src/bin/pg_rewind/t/008_min_recovery_point.pl index e6a7177fb7..204ec10738 100644 --- a/src/bin/pg_rewind/t/008_min_recovery_point.pl +++ b/src/bin/pg_rewind/t/008_min_recovery_point.pl @@ -174,4 +174,68 @@ and this too), 'table foo after rewind'); $result = $node_2->safe_psql('postgres', 'SELECT * FROM public.bar'); is($result, qq(in both), 'table bar after rewind'); +# Read logs produced by pg_rewind +my $log_file_name = + "$PostgreSQL::Test::Utils::tmp_check/log/regress_log_008_min_recovery_point"; +my $pg_rewind_logs = do { + local $/ = undef; + open(my $log_file, '<', $log_file_name) || die; + <$log_file>; +}; + +# This WAL segment file is common between both source and target, +# originating before WAL timeline divergence. pg_rewind should not copy +# and overwrite this target file with the source's copy. +like( + $pg_rewind_logs, + qr/pg_rewind: WAL file entry "000000010000000000000002" not copied to target/, + 'common segment file skipped from copying'); +unlike( + $pg_rewind_logs, + qr/pg_rewind: pg_wal\/000000010000000000000002 \(COPY\)/, + 'common segment file not copied'); +unlike( + $pg_rewind_logs, + qr/pg_rewind: received chunk for file "pg_wal\/000000010000000000000002"/, + 'common segment file copy not received by remote target'); + +# These WAL segment files occur just before and after the divergence to the +# new timeline. (Both of these WAL files are internally represented by +# segment 3.) pg_rewind should copy and overwrite these target files with +# the source's copies. +like( + $pg_rewind_logs, + qr/pg_rewind: WAL file entry "000000010000000000000003" is copied to target/, + 'diverged segment file included in copying to target'); +like( + $pg_rewind_logs, + qr/pg_rewind: pg_wal\/000000010000000000000003 \(COPY\)/, + 'diverged segment file copied'); +like( + $pg_rewind_logs, + qr/pg_rewind: received chunk for file "pg_wal\/000000010000000000000003"/, + 'diverged segment file copy received by remote target'); + +like( + $pg_rewind_logs, + qr/pg_rewind: WAL file entry "000000020000000000000003" is copied to target/, + 'diverged segment file included in copying to target'); +like( + $pg_rewind_logs, + qr/pg_rewind: pg_wal\/000000020000000000000003 \(COPY\)/, + 'diverged segment file copied'); +like( + $pg_rewind_logs, + qr/pg_rewind: received chunk for file "pg_wal\/000000020000000000000003"/, + 'diverged segment file copy received by remote target'); + +like( + $pg_rewind_logs, + qr/pg_rewind: pg_wal\/000000030000000000000003 \(COPY\)/, + 'diverged segment file copied'); +like( + $pg_rewind_logs, + qr/pg_rewind: received chunk for file "pg_wal\/000000030000000000000003"/, + 'diverged segment file copy received by remote target'); + done_testing(); -- 2.35.3