From 9c39345b701bf8794bb5ef8fa4f1de2c33f8700a Mon Sep 17 00:00:00 2001 From: vignesh Date: Mon, 5 Apr 2021 19:09:25 +0530 Subject: [PATCH v3 3/3] Handle overwriting of replication slot statistic issue. There is a remote scenario where one of the replication slots is dropped and the drop slot statistics message is not received by the statistic collector process, now if the max_replication_slots is reduced to the actual number of replication slots that are in use and the publisher is re-started then the statistics process will not be aware of this and the statistic collector process will write beyond the slots available, fixed it by skipping the replication slot statistic that are after max_replication_slot. --- contrib/test_decoding/t/001_repl_stats.pl | 24 +++++++++++++++++++++-- src/backend/postmaster/pgstat.c | 20 +++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/contrib/test_decoding/t/001_repl_stats.pl b/contrib/test_decoding/t/001_repl_stats.pl index 4f50804bf4..53283f55bd 100644 --- a/contrib/test_decoding/t/001_repl_stats.pl +++ b/contrib/test_decoding/t/001_repl_stats.pl @@ -5,7 +5,7 @@ use warnings; use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 2; +use Test::More tests => 3; # Test set-up my $node = get_new_node('test'); @@ -101,12 +101,32 @@ is($result, qq(regression_slot1|t|t regression_slot2|t|t regression_slot3|t|t), 'check replication statistics are updated'); +# Test to remove one of the replication slots and adjust max_replication_slots +# accordingly to the number of slots and verify replication statistics data is +# fine after publisher is restarted. +$node->stop; +my $publisher_data = $node->data_dir; +my $slot3_replslotdir = "$publisher_data/pg_replslot/regression_slot3"; + +rmtree($slot3_replslotdir); + +$node->append_conf('postgresql.conf', 'max_replication_slots = 2'); +$node->start; + +# Verify statistics data present in pg_stat_replication_slots are sane after +# publisher is restarted +$result = $node->safe_psql('postgres', + "SELECT slot_name, total_txns > 0 AS total_txn, total_bytes > 0 AS total_bytes + FROM pg_stat_replication_slots ORDER BY slot_name" +); +is($result, qq(regression_slot1|t|t +regression_slot2|t|t), 'check replication statistics are updated'); + # cleanup $node->safe_psql('postgres', "DROP TABLE test_repl_stat"); $node->safe_psql('postgres', "DROP FUNCTION wait_for_decode_stats"); $node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot1')"); $node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot2')"); -$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot3')"); # shutdown $node->stop; diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 1d8626d17c..ede97362b7 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -4078,6 +4078,26 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep) memset(&replSlotStats[nReplSlotStats], 0, sizeof(PgStat_ReplSlotStats)); goto done; } + + /* + * There is a remote scenario where one of the replication slots + * is dropped and the drop slot statistics message is not + * received by the statistic collector process, now if the + * max_replication_slots is reduced to the actual number of + * replication slots that are in use and the publisher is + * re-started then the statistics process will not be aware of + * this. To avoid writing beyond the max_replication_slots + * this replication slot statistic information will be skipped. + */ + if (max_replication_slots == nReplSlotStats) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("skipping \"%s\" replication slot statistics as pg_stat_replication_slots does not have enough slots", + NameStr(replSlotStats[nReplSlotStats].slotname)))); + memset(&replSlotStats[nReplSlotStats], 0, sizeof(PgStat_ReplSlotStats)); + goto done; + } + nReplSlotStats++; break; -- 2.25.1