Re: Wrong usage of RelationNeedsWAL - Mailing list pgsql-hackers
From | Kyotaro Horiguchi |
---|---|
Subject | Re: Wrong usage of RelationNeedsWAL |
Date | |
Msg-id | 20210121.002844.690876870629206205.horikyota.ntt@gmail.com Whole thread Raw |
In response to | Re: Wrong usage of RelationNeedsWAL (Kyotaro Horiguchi <horikyota.ntt@gmail.com>) |
Responses |
Re: Wrong usage of RelationNeedsWAL
|
List | pgsql-hackers |
At Wed, 20 Jan 2021 17:34:44 +0900 (JST), Kyotaro Horiguchi <horikyota.ntt@gmail.com> wrote in > Anyway, it seems actually dangerous that cause pruning on wal-skipped > relation. > > > with your patch versions. Could you try implementing both test procedures in > > src/test/modules/snapshot_too_old? There's no need to make the test use > > wal_level=minimal explicitly; it's sufficient to catch these bugs when > > wal_level=minimal is in the TEMP_CONFIG file. > > In the attached, TestForOldSnapshot() considers XLogIsNeeded(), > instead of moving the condition on LSN to TestForOldSnapshot_impl for > performance. > > I'll add the test part in the next version. This is it. However, with the previous patch, two existing tests sto_using_cursor and sto_using_select behaves differently from the master. That change is coming from the omission of actual LSN check in TestForOldSnapshot while wal_level=minimal. So we have no choice other than actually updating page LSN. In the scenario under discussion there are two places we need to do that. one is heap_page_prune, and the other is RelationCopyStorge. As a PoC, I used gistXLogAssignLSN() as is for thie purpose. See the attached third file. If it is the right direction, I will move XLOG_GIST_ASSIGN_LSN to XLOG_ASSIGN_LSN and move gistXLogAssignLSN() to XLogAdvanceLSN() or XLogNop() or such. With the third patch, the test succedds both wal_level = replica and minimal. regards. -- Kyotaro Horiguchi NTT Open Source Software Center From 332ac10844befb8a9c00df9f68993eb3696f0a85 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com> Date: Wed, 20 Jan 2021 20:57:03 +0900 Subject: [PATCH v5 1/3] Test for snapshot too old and wal_level=minimal --- src/test/modules/snapshot_too_old/Makefile | 12 ++++- .../expected/sto_wal_optimized.out | 24 ++++++++++ .../input_sto/sto_wal_optimized.spec | 44 +++++++++++++++++++ src/test/modules/snapshot_too_old/sto.conf | 3 ++ 4 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out create mode 100644 src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile index dfb4537f63..1e0b0b6efc 100644 --- a/src/test/modules/snapshot_too_old/Makefile +++ b/src/test/modules/snapshot_too_old/Makefile @@ -4,7 +4,7 @@ # we have to clean those result files explicitly EXTRA_CLEAN = $(pg_regress_clean_files) -ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index +ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index sto_wal_optimized ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf # Disabled because these tests require "old_snapshot_threshold" >= 0, which @@ -22,6 +22,16 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif +.PHONY: tablespace-setup specfile-setup +tablespace-setup: + rm -rf ./testtablespace + mkdir ./testtablespace + +specfile-setup: input_sto/*.spec + sed 's!@srcdir@!$(realpath $(top_srcdir))!g' $? > specs/$(notdir $?) + +check: tablespace-setup specfile-setup + # But it can nonetheless be very helpful to run tests on preexisting # installation, allow to do so, but only if requested explicitly. installcheck-force: diff --git a/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out b/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out new file mode 100644 index 0000000000..4038d0a713 --- /dev/null +++ b/src/test/modules/snapshot_too_old/expected/sto_wal_optimized.out @@ -0,0 +1,24 @@ +Parsed test spec with 3 sessions + +starting permutation: s1a1 s1a2 s2a1 s2a2 s1b1 a3-0 a3-1 s3-2 s3-3 s3-4 s2b1 +step s1a1: BEGIN; +step s1a2: DELETE FROM t; +step s2a1: BEGIN ISOLATION LEVEL REPEATABLE READ; +step s2a2: SELECT 1; +?column? + +1 +step s1b1: COMMIT; +step a3-0: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; +setting pg_sleep + +0 +step a3-1: BEGIN; +step s3-2: ALTER TABLE t SET TABLESPACE tsp1; +step s3-3: SELECT count(*) FROM t; +count + +0 +step s3-4: COMMIT; +step s2b1: SELECT count(*) FROM t; +ERROR: snapshot too old diff --git a/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec b/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec new file mode 100644 index 0000000000..02adb50581 --- /dev/null +++ b/src/test/modules/snapshot_too_old/input_sto/sto_wal_optimized.spec @@ -0,0 +1,44 @@ +# This test provokes a "snapshot too old" error +# +# The sleep is needed because with a threshold of zero a statement could error +# on changes it made. With more normal settings no external delay is needed, +# but we don't want these tests to run long enough to see that, since +# granularity is in minutes. +# +# Since results depend on the value of old_snapshot_threshold, sneak that into +# the line generated by the sleep, so that a surprising values isn't so hard +# to identify. + +setup +{ + CREATE TABLESPACE tsp1 LOCATION '@srcdir@/src/test/modules/snapshot_too_old/testtablespace'; +} + +setup +{ + CREATE TABLE t AS SELECT a FROM generate_series(0, 9) a; +} + +session "s1" +step "s1a1" { BEGIN; } +step "s1a2" { DELETE FROM t; } +step "s1b1" { COMMIT; } + +session "s2" +# s2a : take snapshot +step "s2a1" { BEGIN ISOLATION LEVEL REPEATABLE READ; } +step "s2a2" { SELECT 1; } +# s2b : wanted "snapshot too old" +step "s2b1" { SELECT count(*) FROM t; } + +session "s3" +# s3-0 : wait for snapshot is expired +step "a3-0" { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; } +step "a3-1" { BEGIN; } +# s3-2 : start skipping WAL when wal_level=minimal +step "s3-2" { ALTER TABLE t SET TABLESPACE tsp1; } +# s3-3 : early pruning. w/o WAL and LSN changes when wal_level=minimal +step "s3-3" { SELECT count(*) FROM t; } +step "s3-4" { COMMIT; } + +permutation "s1a1" "s1a2" "s2a1" "s2a2" "s1b1" "a3-0" "a3-1" "s3-2" "s3-3" "s3-4" "s2b1" diff --git a/src/test/modules/snapshot_too_old/sto.conf b/src/test/modules/snapshot_too_old/sto.conf index 7eeaeeb0dc..09887fc725 100644 --- a/src/test/modules/snapshot_too_old/sto.conf +++ b/src/test/modules/snapshot_too_old/sto.conf @@ -1,2 +1,5 @@ autovacuum = off old_snapshot_threshold = 0 + +# needed when setting wal_level=minimal +wal_skip_threshold = 0 -- 2.27.0 From 2e6e3930fdd495ec4b23150b7577889a90b56bf4 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com> Date: Mon, 18 Jan 2021 14:47:21 +0900 Subject: [PATCH v5 2/3] Do not use RelationNeedsWAL to identify relation persistence RelationNeedsWAL() may return false for a permanent relation when wal_level=minimal and the relation is created or truncated in the current transaction. Directly examine relpersistence instead of using the function to know relation persistence. --- src/backend/catalog/pg_publication.c | 2 +- src/backend/optimizer/util/plancat.c | 3 ++- src/include/storage/bufmgr.h | 8 +++++++- src/include/utils/rel.h | 2 +- src/include/utils/snapmgr.h | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/backend/catalog/pg_publication.c b/src/backend/catalog/pg_publication.c index 5f8e1c64e1..84d2efcfd2 100644 --- a/src/backend/catalog/pg_publication.c +++ b/src/backend/catalog/pg_publication.c @@ -67,7 +67,7 @@ check_publication_add_relation(Relation targetrel) errdetail("System tables cannot be added to publications."))); /* UNLOGGED and TEMP relations cannot be part of publication. */ - if (!RelationNeedsWAL(targetrel)) + if (targetrel->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("table \"%s\" cannot be replicated", diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index da322b453e..177e6e336a 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -126,7 +126,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, relation = table_open(relationObjectId, NoLock); /* Temporary and unlogged relations are inaccessible during recovery. */ - if (!RelationNeedsWAL(relation) && RecoveryInProgress()) + if (relation->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT && + RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary or unlogged relations during recovery"))); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index fb00fda6a7..e641174798 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -14,6 +14,7 @@ #ifndef BUFMGR_H #define BUFMGR_H +#include "access/xlog.h" #include "storage/block.h" #include "storage/buf.h" #include "storage/bufpage.h" @@ -278,12 +279,17 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page) { Assert(relation != NULL); + /* + * While wal_level=minimal, early pruning can happen without updating page + * LSN. Don't take the fast-return path while wal_level=minimal so that we + * don't miss early pruning. + */ if (old_snapshot_threshold >= 0 && (snapshot) != NULL && ((snapshot)->snapshot_type == SNAPSHOT_MVCC || (snapshot)->snapshot_type == SNAPSHOT_TOAST) && !XLogRecPtrIsInvalid((snapshot)->lsn) - && PageGetLSN(page) > (snapshot)->lsn) + && (!XLogIsNeeded() || PageGetLSN(page) > (snapshot)->lsn)) TestForOldSnapshot_impl(snapshot, relation); } diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 10b63982c0..f58d65cf28 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -554,7 +554,7 @@ typedef struct ViewOptions /* * RelationNeedsWAL - * True if relation needs WAL. + * True if relation needs WAL at the time. * * Returns false if wal_level = minimal and this relation is created or * truncated in the current transaction. See "Skipping WAL for New diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h index 579be352c5..c21ee3c289 100644 --- a/src/include/utils/snapmgr.h +++ b/src/include/utils/snapmgr.h @@ -37,7 +37,7 @@ */ #define RelationAllowsEarlyPruning(rel) \ ( \ - RelationNeedsWAL(rel) \ + (rel)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT \ && !IsCatalogRelation(rel) \ && !RelationIsAccessibleInLogicalDecoding(rel) \ ) -- 2.27.0 From 67bf139a9355adc93801c5b35b912154504075c0 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com> Date: Wed, 20 Jan 2021 23:59:39 +0900 Subject: [PATCH v5 3/3] Poc: Keep page-LSN updated while WAL-skipping. WAL-skipping optimization omits bumping page LSN when ALTER TABLE SET TABLESPACE. Also heap_page_prune does the same. However, old_snapshot_threshold feature needs page LSN to be kept updated on these operations so that TestForOldSnapshot properly find identify whether a snapshot is invalidated. --- src/backend/access/heap/pruneheap.c | 14 ++++++++++++++ src/backend/catalog/storage.c | 8 ++++++++ src/include/storage/bufmgr.h | 3 ++- src/include/utils/rel.h | 4 ++-- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index e3a716a2a2..33c0841b36 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -70,6 +70,8 @@ static void heap_prune_record_redirect(PruneState *prstate, static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum); static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum); +/* XXXXXXXX tmporary modification */ +extern XLogRecPtr gistXLogAssignLSN(void); /* * Optionally prune and repair fragmentation in the specified page. @@ -331,6 +333,18 @@ heap_page_prune(Relation relation, Buffer buffer, PageSetLSN(BufferGetPage(buffer), recptr); } + else if (relation->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT) + { + XLogRecPtr currlsn = GetXLogInsertRecPtr(); + Page page = BufferGetPage(buffer); + + Assert(wal_level < WAL_LEVEL_REPLICA); + + /* we need to update page LSN to notify reader of pruning */ + if (PageGetLSN(page) == currlsn) + currlsn = gistXLogAssignLSN(); + PageSetLSN(page, currlsn); + } } else { diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index cba7a9ada0..488ac6f760 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -75,6 +75,8 @@ typedef struct PendingRelSync static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ HTAB *pendingSyncHash = NULL; +/* XXXXXXXX tmporary modification */ +extern XLogRecPtr gistXLogAssignLSN(void); /* * AddPendingSync @@ -414,6 +416,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst, bool copying_initfork; BlockNumber nblocks; BlockNumber blkno; + XLogRecPtr fakepagelsn; page = (Page) buf.data; @@ -434,6 +437,9 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst, use_wal = XLogIsNeeded() && (relpersistence == RELPERSISTENCE_PERMANENT || copying_initfork); + if (!use_wal) + fakepagelsn = gistXLogAssignLSN(); + nblocks = smgrnblocks(src, forkNum); for (blkno = 0; blkno < nblocks; blkno++) @@ -460,6 +466,8 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst, */ if (use_wal) log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false); + else + PageSetLSN(page, fakepagelsn); PageSetChecksumInplace(page, blkno); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index e641174798..9ab9e7394c 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -19,6 +19,7 @@ #include "storage/buf.h" #include "storage/bufpage.h" #include "storage/relfilenode.h" +#include "utils/rel.h" #include "utils/relcache.h" #include "utils/snapmgr.h" @@ -289,7 +290,7 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page) && ((snapshot)->snapshot_type == SNAPSHOT_MVCC || (snapshot)->snapshot_type == SNAPSHOT_TOAST) && !XLogRecPtrIsInvalid((snapshot)->lsn) - && (!XLogIsNeeded() || PageGetLSN(page) > (snapshot)->lsn)) + && PageGetLSN(page) > (snapshot)->lsn) TestForOldSnapshot_impl(snapshot, relation); } diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index f58d65cf28..35e53c8d74 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -618,8 +618,8 @@ typedef struct ViewOptions * decoding snapshot. */ #define RelationIsAccessibleInLogicalDecoding(relation) \ - (XLogLogicalInfoActive() && \ - RelationNeedsWAL(relation) && \ + (XLogLogicalInfoActive() && \ + (relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT && \ (IsCatalogRelation(relation) || RelationIsUsedAsCatalogTable(relation))) /* -- 2.27.0
pgsql-hackers by date: