Re: min_safe_lsn column in pg_replication_slots view - Mailing list pgsql-hackers
From | Kyotaro Horiguchi |
---|---|
Subject | Re: min_safe_lsn column in pg_replication_slots view |
Date | |
Msg-id | 20200701.103259.1263090824041890800.horikyota.ntt@gmail.com Whole thread Raw |
In response to | Re: min_safe_lsn column in pg_replication_slots view (Fujii Masao <masao.fujii@oss.nttdata.com>) |
List | pgsql-hackers |
At Tue, 30 Jun 2020 23:23:30 +0900, Fujii Masao <masao.fujii@oss.nttdata.com> wrote in > >> Can we consider an option to "Remove min_safe_lsn; document how a user > >> can monitor the distance"? We have a function to get current WAL > >> insert location and other things required are available either via > >> view or as guc variable values. The reason I am thinking of this > >> option is that it might be better to get some more feedback on what is > >> the most appropriate value to display. However, I am okay if we can > >> reach a consensus on one of the above options. > > Yes, that's an idea. But it might not be easy to calculate that > > distance > > manually by subtracting max_slot_wal_keep_size from the current LSN. > > Because we've not supported -(pg_lsn, numeric) operator yet. I'm > > proposing that operator, but it's for v14. > > Sorry this is not true. That distance can be calculated without those > operators. > For example, > > SELECT restart_lsn - pg_current_wal_lsn() + (SELECT setting::numeric * > 1024 * 1024 FROM pg_settings WHERE name = 'max_slot_wal_keep_size') > distance FROM pg_replication_slots; It's an approximation with accuracy of segment size. The calculation would be not that simple because of the unit of the calculation. The formula for the exact calculateion (ignoring wal_keep_segments) is: distance = (seg_floor(restart_lsn) + seg_floor(max_slot_wal_keep_size) + 1) * wal_segment_size - current_lsn where seg_floor is floor() by wal_segment_size. regards. > If the calculated distance is small or negative value, which means > that > we may lose some required WAL files. So in this case it's worth > considering > to increase max_slot_wal_keep_size. > > I still think it's better and more helpful to display something like > that distance in pg_replication_slots rather than making each user > calculate it... Agreed. The attached replaces min_safe_lsn with "distance". regards. -- Kyotaro Horiguchi NTT Open Source Software Center From 7e078c9ff8e0594ce8f4e95c7db84ea0a31e9775 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com> Date: Tue, 30 Jun 2020 21:09:19 +0900 Subject: [PATCH v1] Replace min_safe_lsn with "distance" in pg_replication_slots pg_replication_slot.min_safe_lsn, which shows the oldest LSN kept in pg_wal, is doubtful in usability for monitoring. Change it to distance, which shows how many bytes the server can advance before the slot loses required segments. --- src/backend/access/transam/xlog.c | 39 +++++++++++++++++++++++ src/backend/catalog/system_views.sql | 2 +- src/backend/replication/slotfuncs.c | 19 +++++------ src/include/access/xlog.h | 1 + src/include/catalog/pg_proc.dat | 4 +-- src/test/recovery/t/019_replslot_limit.pl | 20 ++++++------ src/test/regress/expected/rules.out | 4 +-- 7 files changed, 62 insertions(+), 27 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index fd93bcfaeb..1f27639912 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -9570,6 +9570,45 @@ GetWALAvailability(XLogRecPtr targetLSN) return WALAVAIL_REMOVED; } +/* + * Calculate how many bytes we can advance from currptr until the targetLSN is + * removed. + * + * Returns 0 if the distance is invalid. + */ +uint64 +DistanceToWALHorizon(XLogRecPtr targetLSN, XLogRecPtr currptr) +{ + XLogSegNo targetSeg; + XLogSegNo keepSegs; + XLogSegNo failSeg; + XLogRecPtr horizon; + + XLByteToSeg(targetLSN, targetSeg, wal_segment_size); + keepSegs = 0; + + /* no limit if max_slot_wal_keep_size is invalid */ + if (max_slot_wal_keep_size_mb < 0) + return 0; + + /* How many segments slots can keep? */ + keepSegs = ConvertToXSegs(max_slot_wal_keep_size_mb, wal_segment_size); + + /* override by wal_keep_segments if needed */ + if (wal_keep_segments > keepSegs) + keepSegs = wal_keep_segments; + + /* calculate the LSN where targetLSN is lost when currpos reaches */ + failSeg = targetSeg + keepSegs + 1; + XLogSegNoOffsetToRecPtr(failSeg, 0, wal_segment_size, horizon); + + /* If currptr already beyond the horizon, return zero. */ + if (currptr > horizon) + return 0; + + /* return the distance from currptr to the horizon */ + return horizon - currptr; +} /* * Retreat *logSegNo to the last segment that we need to retain because of diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 5314e9348f..b9847a9f92 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -879,7 +879,7 @@ CREATE VIEW pg_replication_slots AS L.restart_lsn, L.confirmed_flush_lsn, L.wal_status, - L.min_safe_lsn + L.distance FROM pg_get_replication_slots() AS L LEFT JOIN pg_database D ON (L.datoid = D.oid); diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 88033a79b2..532b3c5826 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -242,6 +242,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) Tuplestorestate *tupstore; MemoryContext per_query_ctx; MemoryContext oldcontext; + XLogRecPtr currlsn; int slotno; /* check to see if caller supports us returning a tuplestore */ @@ -274,6 +275,8 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); + currlsn = GetXLogWriteRecPtr(); + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); for (slotno = 0; slotno < max_replication_slots; slotno++) { @@ -282,7 +285,7 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) Datum values[PG_GET_REPLICATION_SLOTS_COLS]; bool nulls[PG_GET_REPLICATION_SLOTS_COLS]; WALAvailability walstate; - XLogSegNo last_removed_seg; + uint32 distance; int i; if (!slot->in_use) @@ -398,16 +401,10 @@ pg_get_replication_slots(PG_FUNCTION_ARGS) break; } - if (max_slot_wal_keep_size_mb >= 0 && - (walstate == WALAVAIL_RESERVED || walstate == WALAVAIL_EXTENDED) && - ((last_removed_seg = XLogGetLastRemovedSegno()) != 0)) - { - XLogRecPtr min_safe_lsn; - - XLogSegNoOffsetToRecPtr(last_removed_seg + 1, 0, - wal_segment_size, min_safe_lsn); - values[i++] = Int64GetDatum(min_safe_lsn); - } + distance = + DistanceToWALHorizon(slot_contents.data.restart_lsn, currlsn); + if (distance > 0) + values[i++] = Int64GetDatum(distance); else nulls[i++] = true; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 77ac4e785f..1ec448c5d5 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -329,6 +329,7 @@ extern void InitXLOGAccess(void); extern void CreateCheckPoint(int flags); extern bool CreateRestartPoint(int flags); extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN); +extern uint64 DistanceToWALHorizon(XLogRecPtr targetLSN, XLogRecPtr currptr); extern XLogRecPtr CalculateMaxmumSafeLSN(void); extern void XLogPutNextOid(Oid nextOid); extern XLogRecPtr XLogRestorePoint(const char *rpName); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 61f2c2f5b4..199fd994bd 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10063,9 +10063,9 @@ proname => 'pg_get_replication_slots', prorows => '10', proisstrict => 'f', proretset => 't', provolatile => 's', prorettype => 'record', proargtypes => '', - proallargtypes => '{name,name,text,oid,bool,bool,int4,xid,xid,pg_lsn,pg_lsn,text,pg_lsn}', + proallargtypes => '{name,name,text,oid,bool,bool,int4,xid,xid,pg_lsn,pg_lsn,text,int8}', proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,wal_status,min_safe_lsn}', + proargnames => '{slot_name,plugin,slot_type,datoid,temporary,active,active_pid,xmin,catalog_xmin,restart_lsn,confirmed_flush_lsn,wal_status,distance}', prosrc => 'pg_get_replication_slots' }, { oid => '3786', descr => 'set up a logical replication slot', proname => 'pg_create_logical_replication_slot', provolatile => 'v', diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index 7d22ae5720..1c76d2d9e9 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -28,7 +28,7 @@ $node_master->safe_psql('postgres', # The slot state and remain should be null before the first connection my $result = $node_master->safe_psql('postgres', - "SELECT restart_lsn IS NULL, wal_status is NULL, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" + "SELECT restart_lsn IS NULL, wal_status is NULL, distance is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); is($result, "t|t|t", 'check the state of non-reserved slot is "unknown"'); @@ -52,9 +52,9 @@ $node_master->wait_for_catchup($node_standby, 'replay', $start_lsn); # Stop standby $node_standby->stop; -# Preparation done, the slot is the state "normal" now +# Preparation done, the slot is the state "reserved" now $result = $node_master->safe_psql('postgres', - "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" + "SELECT wal_status, distance is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); is($result, "reserved|t", 'check the catching-up state'); @@ -64,7 +64,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;"); # The slot is always "safe" when fitting max_wal_size $result = $node_master->safe_psql('postgres', - "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" + "SELECT wal_status, distance is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); is($result, "reserved|t", 'check that it is safe if WAL fits in max_wal_size'); @@ -74,7 +74,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;"); # The slot is always "safe" when max_slot_wal_keep_size is not set $result = $node_master->safe_psql('postgres', - "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" + "SELECT wal_status, distance is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); is($result, "reserved|t", 'check that slot is working'); @@ -94,9 +94,7 @@ max_slot_wal_keep_size = ${max_slot_wal_keep_size_mb}MB )); $node_master->reload; -# The slot is in safe state. The distance from the min_safe_lsn should -# be as almost (max_slot_wal_keep_size - 1) times large as the segment -# size +# The slot is in safe state. $result = $node_master->safe_psql('postgres', "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); @@ -110,7 +108,7 @@ $node_master->safe_psql('postgres', "CHECKPOINT;"); $result = $node_master->safe_psql('postgres', "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); is($result, "reserved", - 'check that min_safe_lsn gets close to the current LSN'); + 'check that distance gets close to the current LSN'); # The standby can reconnect to master $node_standby->start; @@ -154,7 +152,7 @@ advance_wal($node_master, 1); # Slot gets into 'unreserved' state $result = $node_master->safe_psql('postgres', - "SELECT wal_status, min_safe_lsn is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" + "SELECT wal_status, distance is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" ); is($result, "unreserved|t", 'check that the slot state changes to "unreserved"'); @@ -186,7 +184,7 @@ ok( find_in_log( # This slot should be broken $result = $node_master->safe_psql('postgres', - "SELECT slot_name, active, restart_lsn IS NULL, wal_status, min_safe_lsn FROM pg_replication_slots WHERE slot_name ='rep1'" + "SELECT slot_name, active, restart_lsn IS NULL, wal_status, distance FROM pg_replication_slots WHERE slot_name = 'rep1'" ); is($result, "rep1|f|t|lost|", 'check that the slot became inactive and the state "lost" persists'); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index b813e32215..392eab12dd 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1464,8 +1464,8 @@ pg_replication_slots| SELECT l.slot_name, l.restart_lsn, l.confirmed_flush_lsn, l.wal_status, - l.min_safe_lsn - FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin,restart_lsn, confirmed_flush_lsn, wal_status, min_safe_lsn) + l.distance + FROM (pg_get_replication_slots() l(slot_name, plugin, slot_type, datoid, temporary, active, active_pid, xmin, catalog_xmin,restart_lsn, confirmed_flush_lsn, wal_status, distance) LEFT JOIN pg_database d ON ((l.datoid = d.oid))); pg_roles| SELECT pg_authid.rolname, pg_authid.rolsuper, -- 2.18.4
pgsql-hackers by date: