Re: Wrong usage of RelationNeedsWAL - Mailing list pgsql-hackers

From Kyotaro Horiguchi
Subject Re: Wrong usage of RelationNeedsWAL
Date
Msg-id 20210121.002844.690876870629206205.horikyota.ntt@gmail.com
Whole thread Raw
In response to Re: Wrong usage of RelationNeedsWAL  (Kyotaro Horiguchi <horikyota.ntt@gmail.com>)
Responses Re: Wrong usage of RelationNeedsWAL
List pgsql-hackers
At Wed, 20 Jan 2021 17:34:44 +0900 (JST), Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote in 
> Anyway, it seems actually dangerous that cause pruning on wal-skipped
> relation.
> 
> > with your patch versions.  Could you try implementing both test procedures in
> > src/test/modules/snapshot_too_old?  There's no need to make the test use
> > wal_level=minimal explicitly; it's sufficient to catch these bugs when
> > wal_level=minimal is in the TEMP_CONFIG file.
> 
> In the attached, TestForOldSnapshot() considers XLogIsNeeded(),
> instead of moving the condition on LSN to TestForOldSnapshot_impl for
> performance.
> 
> I'll add the test part in the next version.

This is it.

However, with the previous patch, two existing tests sto_using_cursor
and sto_using_select behaves differently from the master.  That change
is coming from the omission of actual LSN check in TestForOldSnapshot
while wal_level=minimal. So we have no choice other than actually
updating page LSN.

In the scenario under discussion there are two places we need to do
that. one is heap_page_prune, and the other is RelationCopyStorge. As
a PoC, I used gistXLogAssignLSN() as is for thie purpose. See the
attached third file.

If it is the right direction, I will move XLOG_GIST_ASSIGN_LSN to
XLOG_ASSIGN_LSN and move gistXLogAssignLSN() to XLogAdvanceLSN() or
XLogNop() or such.

With the third patch, the test succedds both wal_level = replica and
minimal.

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
From 332ac10844befb8a9c00df9f68993eb3696f0a85 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
Date: Wed, 20 Jan 2021 20:57:03 +0900
Subject: [PATCH v5 1/3] Test for snapshot too old and wal_level=minimal

---
 src/test/modules/snapshot_too_old/Makefile    | 12 ++++-
 .../expected/sto_wal_optimized.out            | 24 ++++++++++
 .../input_sto/sto_wal_optimized.spec          | 44 +++++++++++++++++++
 src/test/modules/snapshot_too_old/sto.conf    |  3 ++
 4 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out
 create mode 100644 src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec

diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index dfb4537f63..1e0b0b6efc 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -4,7 +4,7 @@
 # we have to clean those result files explicitly
 EXTRA_CLEAN = $(pg_regress_clean_files)
 
-ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
+ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index sto_wal_optimized
 ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
 
 # Disabled because these tests require "old_snapshot_threshold" >= 0, which
@@ -22,6 +22,16 @@ include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif
 
+.PHONY: tablespace-setup specfile-setup
+tablespace-setup:
+    rm -rf ./testtablespace
+    mkdir ./testtablespace
+
+specfile-setup: input_sto/*.spec
+    sed 's!@srcdir@!$(realpath $(top_srcdir))!g' $? > specs/$(notdir $?)
+
+check: tablespace-setup specfile-setup
+
 # But it can nonetheless be very helpful to run tests on preexisting
 # installation, allow to do so, but only if requested explicitly.
 installcheck-force:
diff --git a/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out
b/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out
new file mode 100644
index 0000000000..4038d0a713
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out
@@ -0,0 +1,24 @@
+Parsed test spec with 3 sessions
+
+starting permutation: s1a1 s1a2 s2a1 s2a2 s1b1 a3-0 a3-1 s3-2 s3-3 s3-4 s2b1
+step s1a1: BEGIN;
+step s1a2: DELETE FROM t;
+step s2a1: BEGIN ISOLATION LEVEL REPEATABLE READ;
+step s2a2: SELECT 1;
+?column?       
+
+1              
+step s1b1: COMMIT;
+step a3-0: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
+setting        pg_sleep       
+
+0                             
+step a3-1: BEGIN;
+step s3-2: ALTER TABLE t SET TABLESPACE tsp1;
+step s3-3: SELECT count(*) FROM t;
+count          
+
+0              
+step s3-4: COMMIT;
+step s2b1: SELECT count(*) FROM t;
+ERROR:  snapshot too old
diff --git a/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec
b/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec
new file mode 100644
index 0000000000..02adb50581
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec
@@ -0,0 +1,44 @@
+# This test provokes a "snapshot too old" error 
+#
+# The sleep is needed because with a threshold of zero a statement could error
+# on changes it made.  With more normal settings no external delay is needed,
+# but we don't want these tests to run long enough to see that, since
+# granularity is in minutes.
+#
+# Since results depend on the value of old_snapshot_threshold, sneak that into
+# the line generated by the sleep, so that a surprising values isn't so hard
+# to identify.
+
+setup
+{
+    CREATE TABLESPACE tsp1 LOCATION '@srcdir@/src/test/modules/snapshot_too_old/testtablespace';
+}
+
+setup
+{
+    CREATE TABLE t AS SELECT a FROM generate_series(0, 9) a;
+}
+
+session "s1"
+step "s1a1" { BEGIN; }
+step "s1a2" { DELETE FROM t; }
+step "s1b1" { COMMIT; }
+
+session "s2"
+# s2a : take snapshot
+step "s2a1" { BEGIN ISOLATION LEVEL REPEATABLE READ; }
+step "s2a2" { SELECT 1; }
+# s2b : wanted "snapshot too old"
+step "s2b1" { SELECT count(*) FROM t; }
+
+session "s3"
+# s3-0 : wait for snapshot is expired
+step "a3-0"   { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
+step "a3-1"   { BEGIN; }
+# s3-2 : start skipping WAL when wal_level=minimal
+step "s3-2"   { ALTER TABLE t SET TABLESPACE tsp1; }
+# s3-3 : early pruning. w/o WAL and LSN changes when wal_level=minimal
+step "s3-3"   { SELECT count(*) FROM t; }
+step "s3-4"   { COMMIT; }
+
+permutation "s1a1" "s1a2" "s2a1" "s2a2" "s1b1" "a3-0" "a3-1" "s3-2" "s3-3" "s3-4" "s2b1"
diff --git a/src/test/modules/snapshot_too_old/sto.conf b/src/test/modules/snapshot_too_old/sto.conf
index 7eeaeeb0dc..09887fc725 100644
--- a/src/test/modules/snapshot_too_old/sto.conf
+++ b/src/test/modules/snapshot_too_old/sto.conf
@@ -1,2 +1,5 @@
 autovacuum = off
 old_snapshot_threshold = 0
+
+# needed when setting wal_level=minimal
+wal_skip_threshold = 0
-- 
2.27.0

From 2e6e3930fdd495ec4b23150b7577889a90b56bf4 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
Date: Mon, 18 Jan 2021 14:47:21 +0900
Subject: [PATCH v5 2/3] Do not use RelationNeedsWAL to identify relation
 persistence

RelationNeedsWAL() may return false for a permanent relation when
wal_level=minimal and the relation is created or truncated in the
current transaction. Directly examine relpersistence instead of using
the function to know relation persistence.
---
 src/backend/catalog/pg_publication.c | 2 +-
 src/backend/optimizer/util/plancat.c | 3 ++-
 src/include/storage/bufmgr.h         | 8 +++++++-
 src/include/utils/rel.h              | 2 +-
 src/include/utils/snapmgr.h          | 2 +-
 5 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c
index 5f8e1c64e1..84d2efcfd2 100644
--- a/src/backend/catalog/pg_publication.c
+++ b/src/backend/catalog/pg_publication.c
@@ -67,7 +67,7 @@ check_publication_add_relation(Relation targetrel)
                  errdetail("System tables cannot be added to publications.")));
 
     /* UNLOGGED and TEMP relations cannot be part of publication. */
-    if (!RelationNeedsWAL(targetrel))
+    if (targetrel->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT)
         ereport(ERROR,
                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                  errmsg("table \"%s\" cannot be replicated",
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index da322b453e..177e6e336a 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -126,7 +126,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
     relation = table_open(relationObjectId, NoLock);
 
     /* Temporary and unlogged relations are inaccessible during recovery. */
-    if (!RelationNeedsWAL(relation) && RecoveryInProgress())
+    if (relation->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT &&
+        RecoveryInProgress())
         ereport(ERROR,
                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                  errmsg("cannot access temporary or unlogged relations during recovery")));
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index fb00fda6a7..e641174798 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -14,6 +14,7 @@
 #ifndef BUFMGR_H
 #define BUFMGR_H
 
+#include "access/xlog.h"
 #include "storage/block.h"
 #include "storage/buf.h"
 #include "storage/bufpage.h"
@@ -278,12 +279,17 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
 {
     Assert(relation != NULL);
 
+    /*
+     * While wal_level=minimal, early pruning can happen without updating page
+     * LSN. Don't take the fast-return path while wal_level=minimal so that we
+     * don't miss early pruning.
+     */
     if (old_snapshot_threshold >= 0
         && (snapshot) != NULL
         && ((snapshot)->snapshot_type == SNAPSHOT_MVCC
             || (snapshot)->snapshot_type == SNAPSHOT_TOAST)
         && !XLogRecPtrIsInvalid((snapshot)->lsn)
-        && PageGetLSN(page) > (snapshot)->lsn)
+        && (!XLogIsNeeded() || PageGetLSN(page) > (snapshot)->lsn))
         TestForOldSnapshot_impl(snapshot, relation);
 }
 
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 10b63982c0..f58d65cf28 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -554,7 +554,7 @@ typedef struct ViewOptions
 
 /*
  * RelationNeedsWAL
- *        True if relation needs WAL.
+ *        True if relation needs WAL at the time.
  *
  * Returns false if wal_level = minimal and this relation is created or
  * truncated in the current transaction.  See "Skipping WAL for New
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index 579be352c5..c21ee3c289 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -37,7 +37,7 @@
  */
 #define RelationAllowsEarlyPruning(rel) \
 ( \
-     RelationNeedsWAL(rel) \
+     (rel)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT    \
   && !IsCatalogRelation(rel) \
   && !RelationIsAccessibleInLogicalDecoding(rel) \
 )
-- 
2.27.0

From 67bf139a9355adc93801c5b35b912154504075c0 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
Date: Wed, 20 Jan 2021 23:59:39 +0900
Subject: [PATCH v5 3/3] Poc: Keep page-LSN updated while WAL-skipping.

WAL-skipping optimization omits bumping page LSN when ALTER TABLE SET
TABLESPACE. Also heap_page_prune does the same.  However,
old_snapshot_threshold feature needs page LSN to be kept updated on
these operations so that TestForOldSnapshot properly find identify
whether a snapshot is invalidated.
---
 src/backend/access/heap/pruneheap.c | 14 ++++++++++++++
 src/backend/catalog/storage.c       |  8 ++++++++
 src/include/storage/bufmgr.h        |  3 ++-
 src/include/utils/rel.h             |  4 ++--
 4 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index e3a716a2a2..33c0841b36 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -70,6 +70,8 @@ static void heap_prune_record_redirect(PruneState *prstate,
 static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
 static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
 
+/* XXXXXXXX tmporary modification */
+extern XLogRecPtr gistXLogAssignLSN(void);
 
 /*
  * Optionally prune and repair fragmentation in the specified page.
@@ -331,6 +333,18 @@ heap_page_prune(Relation relation, Buffer buffer,
 
             PageSetLSN(BufferGetPage(buffer), recptr);
         }
+        else if (relation->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
+        {
+            XLogRecPtr    currlsn = GetXLogInsertRecPtr();
+            Page        page = BufferGetPage(buffer);
+
+            Assert(wal_level < WAL_LEVEL_REPLICA);
+
+            /* we need to update page LSN to notify reader of pruning */
+            if (PageGetLSN(page) == currlsn)
+                currlsn = gistXLogAssignLSN();
+            PageSetLSN(page, currlsn);
+        }
     }
     else
     {
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index cba7a9ada0..488ac6f760 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -75,6 +75,8 @@ typedef struct PendingRelSync
 static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
 HTAB       *pendingSyncHash = NULL;
 
+/* XXXXXXXX tmporary modification */
+extern XLogRecPtr gistXLogAssignLSN(void);
 
 /*
  * AddPendingSync
@@ -414,6 +416,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
     bool        copying_initfork;
     BlockNumber nblocks;
     BlockNumber blkno;
+    XLogRecPtr    fakepagelsn;
 
     page = (Page) buf.data;
 
@@ -434,6 +437,9 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
     use_wal = XLogIsNeeded() &&
         (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork);
 
+    if (!use_wal)
+        fakepagelsn = gistXLogAssignLSN();
+
     nblocks = smgrnblocks(src, forkNum);
 
     for (blkno = 0; blkno < nblocks; blkno++)
@@ -460,6 +466,8 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
          */
         if (use_wal)
             log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
+        else
+            PageSetLSN(page, fakepagelsn);
 
         PageSetChecksumInplace(page, blkno);
 
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index e641174798..9ab9e7394c 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -19,6 +19,7 @@
 #include "storage/buf.h"
 #include "storage/bufpage.h"
 #include "storage/relfilenode.h"
+#include "utils/rel.h"
 #include "utils/relcache.h"
 #include "utils/snapmgr.h"
 
@@ -289,7 +290,7 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
         && ((snapshot)->snapshot_type == SNAPSHOT_MVCC
             || (snapshot)->snapshot_type == SNAPSHOT_TOAST)
         && !XLogRecPtrIsInvalid((snapshot)->lsn)
-        && (!XLogIsNeeded() || PageGetLSN(page) > (snapshot)->lsn))
+        && PageGetLSN(page) > (snapshot)->lsn)
         TestForOldSnapshot_impl(snapshot, relation);
 }
 
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index f58d65cf28..35e53c8d74 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -618,8 +618,8 @@ typedef struct ViewOptions
  *        decoding snapshot.
  */
 #define RelationIsAccessibleInLogicalDecoding(relation) \
-    (XLogLogicalInfoActive() && \
-     RelationNeedsWAL(relation) && \
+    (XLogLogicalInfoActive() &&                                            \
+     (relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT &&    \
      (IsCatalogRelation(relation) || RelationIsUsedAsCatalogTable(relation)))
 
 /*
-- 
2.27.0


pgsql-hackers by date:

Previous
From: "Dian M Fay"
Date:
Subject: Re: [HACKERS] [PATCH] Generic type subscripting
Next
From: Masahiko Sawada
Date:
Subject: Re: pg_stat_statements oddity with track = all