Re: [HACKERS] WAL logging problem in 9.4.3? - Mailing list pgsql-hackers
| From | Kyotaro Horiguchi |
|---|---|
| Subject | Re: [HACKERS] WAL logging problem in 9.4.3? |
| Date | |
| Msg-id | 20190710.131914.109303138.horikyota.ntt@gmail.com Whole thread Raw |
| In response to | Re: [HACKERS] WAL logging problem in 9.4.3? (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>) |
| Responses |
Re: [HACKERS] WAL logging problem in 9.4.3?
|
| List | pgsql-hackers |
Hello. Rebased the patch to master(bd56cd75d2).
regards.
--
Kyotaro Horiguchi
NTT Open Source Software Center
From ac52e2c1c56a96c1745149ff4220a3a116d6c811 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Date: Thu, 11 Oct 2018 10:03:21 +0900
Subject: [PATCH 1/3] TAP test for copy-truncation optimization.
---
src/test/recovery/t/018_wal_optimize.pl | 291 ++++++++++++++++++++++++++++++++
1 file changed, 291 insertions(+)
create mode 100644 src/test/recovery/t/018_wal_optimize.pl
diff --git a/src/test/recovery/t/018_wal_optimize.pl b/src/test/recovery/t/018_wal_optimize.pl
new file mode 100644
index 0000000000..4fa8be728e
--- /dev/null
+++ b/src/test/recovery/t/018_wal_optimize.pl
@@ -0,0 +1,291 @@
+# Test WAL replay for optimized TRUNCATE and COPY records
+#
+# WAL truncation is optimized in some cases with TRUNCATE and COPY queries
+# which sometimes interact badly with the other optimizations in line with
+# several setting values of wal_level, particularly when using "minimal" or
+# "replica". The optimization may be enabled or disabled depending on the
+# scenarios dealt here, and should never result in any type of failures or
+# data loss.
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 24;
+
+sub check_orphan_relfilenodes
+{
+ my($node, $test_name) = @_;
+
+ my $db_oid = $node->safe_psql('postgres',
+ "SELECT oid FROM pg_database WHERE datname = 'postgres'");
+ my $prefix = "base/$db_oid/";
+ my $filepaths_referenced = $node->safe_psql('postgres', "
+ SELECT pg_relation_filepath(oid) FROM pg_class
+ WHERE reltablespace = 0 and relpersistence <> 't' and
+ pg_relation_filepath(oid) IS NOT NULL;");
+ is_deeply([sort(map { "$prefix$_" }
+ grep(/^[0-9]+$/,
+ slurp_dir($node->data_dir . "/$prefix")))],
+ [sort split /\n/, $filepaths_referenced],
+ $test_name);
+ return;
+}
+
+# Wrapper routine tunable for wal_level.
+sub run_wal_optimize
+{
+ my $wal_level = shift;
+
+ # Primary needs to have wal_level = minimal here
+ my $node = get_new_node("node_$wal_level");
+ $node->init;
+ $node->append_conf('postgresql.conf', qq(
+wal_level = $wal_level
+));
+ $node->start;
+
+ # Setup
+ my $tablespace_dir = $node->basedir . '/tablespace_other';
+ mkdir ($tablespace_dir);
+ $tablespace_dir = TestLib::real_dir($tablespace_dir);
+ $node->safe_psql('postgres',
+ "CREATE TABLESPACE other LOCATION '$tablespace_dir';");
+
+ # Test direct truncation optimization. No tuples
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test1 (id serial PRIMARY KEY);
+ TRUNCATE test1;
+ COMMIT;");
+
+ $node->stop('immediate');
+ $node->start;
+
+ my $result = $node->safe_psql('postgres', "SELECT count(*) FROM test1;");
+ is($result, qq(0),
+ "wal_level = $wal_level, optimized truncation with empty table");
+
+ # Test truncation with inserted tuples within the same transaction.
+ # Tuples inserted after the truncation should be seen.
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test2 (id serial PRIMARY KEY);
+ INSERT INTO test2 VALUES (DEFAULT);
+ TRUNCATE test2;
+ INSERT INTO test2 VALUES (DEFAULT);
+ COMMIT;");
+
+ $node->stop('immediate');
+ $node->start;
+
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test2;");
+ is($result, qq(1),
+ "wal_level = $wal_level, optimized truncation with inserted table");
+
+ # Data file for COPY query in follow-up tests.
+ my $basedir = $node->basedir;
+ my $copy_file = "$basedir/copy_data.txt";
+ TestLib::append_to_file($copy_file, qq(20000,30000
+20001,30001
+20002,30002));
+
+ # Test truncation with inserted tuples using COPY. Tuples copied after the
+ # truncation should be seen.
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test3 (id serial PRIMARY KEY, id2 int);
+ INSERT INTO test3 (id, id2) VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE test3;
+ COPY test3 FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test3;");
+ is($result, qq(3),
+ "wal_level = $wal_level, optimized truncation with copied table");
+
+ # Like previous test, but rollback SET TABLESPACE in a subtransaction.
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test3a (id serial PRIMARY KEY, id2 int);
+ INSERT INTO test3a (id, id2) VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE test3a;
+ SAVEPOINT s; ALTER TABLE test3a SET TABLESPACE other; ROLLBACK TO s;
+ COPY test3a FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test3a;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE in subtransaction");
+
+ # in different subtransaction patterns
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test3a2 (id serial PRIMARY KEY, id2 int);
+ INSERT INTO test3a2 (id, id2) VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE test3a2;
+ SAVEPOINT s; ALTER TABLE test3a SET TABLESPACE other; RELEASE s;
+ COPY test3a2 FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test3a;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE in subtransaction");
+
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test3a3 (id serial PRIMARY KEY, id2 int);
+ INSERT INTO test3a3 (id, id2) VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE test3a3;
+ SAVEPOINT s;
+ ALTER TABLE test3a3 SET TABLESPACE other;
+ SAVEPOINT s2;
+ ALTER TABLE test3a3 SET TABLESPACE pg_default;
+ ROLLBACK TO s2;
+ SAVEPOINT s2;
+ ALTER TABLE test3a3 SET TABLESPACE pg_default;
+ RELEASE s2;
+ ROLLBACK TO s;
+ COPY test3a3 FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test3a;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE in subtransaction");
+
+ # UPDATE touches two buffers; one is BufferNeedsWAL(); the other is not.
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test3b (id serial PRIMARY KEY, id2 int);
+ INSERT INTO test3b (id, id2) VALUES (DEFAULT, generate_series(1,10000));
+ COPY test3b FROM '$copy_file' DELIMITER ','; -- set sync_above
+ UPDATE test3b SET id2 = id2 + 1;
+ DELETE FROM test3b;
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test3b;");
+ is($result, qq(0),
+ "wal_level = $wal_level, UPDATE of logged page extends relation");
+
+ # Test truncation with inserted tuples using both INSERT and COPY. Tuples
+ # inserted after the truncation should be seen.
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test4 (id serial PRIMARY KEY, id2 int);
+ INSERT INTO test4 (id, id2) VALUES (DEFAULT, generate_series(1,10000));
+ TRUNCATE test4;
+ INSERT INTO test4 (id, id2) VALUES (DEFAULT, 10000);
+ COPY test4 FROM '$copy_file' DELIMITER ',';
+ INSERT INTO test4 (id, id2) VALUES (DEFAULT, 10000);
+ COMMIT;");
+
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test4;");
+ is($result, qq(5),
+ "wal_level = $wal_level, optimized truncation with inserted/copied table");
+
+ # Test consistency of COPY with INSERT for table created in the same
+ # transaction.
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test5 (id serial PRIMARY KEY, id2 int);
+ INSERT INTO test5 VALUES (DEFAULT, 1);
+ COPY test5 FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test5;");
+ is($result, qq(4),
+ "wal_level = $wal_level, replay of optimized copy with inserted table");
+
+ # Test consistency of COPY that inserts more to the same table using
+ # triggers. If the INSERTS from the trigger go to the same block data
+ # is copied to, and the INSERTs are WAL-logged, WAL replay will fail when
+ # it tries to replay the WAL record but the "before" image doesn't match,
+ # because not all changes were WAL-logged.
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test6 (id serial PRIMARY KEY, id2 text);
+ CREATE FUNCTION test6_before_row_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ IF new.id2 NOT LIKE 'triggered%' THEN
+ INSERT INTO test6 VALUES (DEFAULT, 'triggered row before' || NEW.id2);
+ END IF;
+ RETURN NEW;
+ END; \$\$;
+ CREATE FUNCTION test6_after_row_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ IF new.id2 NOT LIKE 'triggered%' THEN
+ INSERT INTO test6 VALUES (DEFAULT, 'triggered row after' || OLD.id2);
+ END IF;
+ RETURN NEW;
+ END; \$\$;
+ CREATE TRIGGER test6_before_row_insert
+ BEFORE INSERT ON test6
+ FOR EACH ROW EXECUTE PROCEDURE test6_before_row_trig();
+ CREATE TRIGGER test6_after_row_insert
+ AFTER INSERT ON test6
+ FOR EACH ROW EXECUTE PROCEDURE test6_after_row_trig();
+ COPY test6 FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test6;");
+ is($result, qq(9),
+ "wal_level = $wal_level, replay of optimized copy with before trigger");
+
+ # Test consistency of INSERT, COPY and TRUNCATE in same transaction block
+ # with TRUNCATE triggers.
+ $node->safe_psql('postgres', "
+ BEGIN;
+ CREATE TABLE test7 (id serial PRIMARY KEY, id2 text);
+ CREATE FUNCTION test7_before_stat_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ INSERT INTO test7 VALUES (DEFAULT, 'triggered stat before');
+ RETURN NULL;
+ END; \$\$;
+ CREATE FUNCTION test7_after_stat_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ INSERT INTO test7 VALUES (DEFAULT, 'triggered stat before');
+ RETURN NULL;
+ END; \$\$;
+ CREATE TRIGGER test7_before_stat_truncate
+ BEFORE TRUNCATE ON test7
+ FOR EACH STATEMENT EXECUTE PROCEDURE test7_before_stat_trig();
+ CREATE TRIGGER test7_after_stat_truncate
+ AFTER TRUNCATE ON test7
+ FOR EACH STATEMENT EXECUTE PROCEDURE test7_after_stat_trig();
+ INSERT INTO test7 VALUES (DEFAULT, 1);
+ TRUNCATE test7;
+ COPY test7 FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM test7;");
+ is($result, qq(4),
+ "wal_level = $wal_level, replay of optimized copy with before trigger");
+
+ # Test redo of temp table creation.
+ $node->safe_psql('postgres', "
+ CREATE TEMP TABLE test8 (id serial PRIMARY KEY, id2 text);");
+ $node->stop('immediate');
+ $node->start;
+
+ check_orphan_relfilenodes($node, "wal_level = $wal_level, no orphan relfilenode remains");
+
+ return;
+}
+
+# Run same test suite for multiple wal_level values.
+run_wal_optimize("minimal");
+run_wal_optimize("replica");
--
2.16.3
From 4363a50092dc8aa536b24582a3160f4f47c85349 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@oss.ntt.co.jp>
Date: Mon, 27 May 2019 16:06:30 +0900
Subject: [PATCH 2/3] Fix WAL skipping feature
WAL-skipping operations mixed with WAL-logged operations can lead to
database corruption after a crash. This patch changes the WAL-skipping
feature so that no data modifcation is WAL-logged at all then sync
such relations at commit.
---
src/backend/access/heap/heapam.c | 4 +-
src/backend/access/heap/heapam_handler.c | 22 +----------
src/backend/access/heap/rewriteheap.c | 13 ++-----
src/backend/catalog/storage.c | 64 +++++++++++++++++++++++++-------
src/backend/commands/cluster.c | 24 ++++++++++++
src/backend/commands/copy.c | 38 ++++---------------
src/backend/commands/createas.c | 5 +--
src/backend/commands/matview.c | 4 --
src/backend/commands/tablecmds.c | 10 ++---
src/backend/storage/buffer/bufmgr.c | 33 +++++++++++-----
src/backend/utils/cache/relcache.c | 16 ++++++--
src/include/access/heapam.h | 1 -
src/include/access/rewriteheap.h | 2 +-
src/include/access/tableam.h | 41 ++------------------
src/include/storage/bufmgr.h | 1 +
src/include/utils/rel.h | 17 ++++++++-
16 files changed, 148 insertions(+), 147 deletions(-)
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index d768b9b061..eca98fb063 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1941,7 +1941,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
MarkBufferDirty(buffer);
/* XLOG stuff */
- if (!(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation))
+ if (RelationNeedsWAL(relation))
{
xl_heap_insert xlrec;
xl_heap_header xlhdr;
@@ -2124,7 +2124,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/* currently not needed (thus unsupported) for heap_multi_insert() */
AssertArg(!(options & HEAP_INSERT_NO_LOGICAL));
- needwal = !(options & HEAP_INSERT_SKIP_WAL) && RelationNeedsWAL(relation);
+ needwal = RelationNeedsWAL(relation);
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
HEAP_DEFAULT_FILLFACTOR);
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 09bc6fe98a..b9554f6064 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -556,18 +556,6 @@ tuple_lock_retry:
return result;
}
-static void
-heapam_finish_bulk_insert(Relation relation, int options)
-{
- /*
- * If we skipped writing WAL, then we need to sync the heap (but not
- * indexes since those use WAL anyway / don't go through tableam)
- */
- if (options & HEAP_INSERT_SKIP_WAL)
- heap_sync(relation);
-}
-
-
/* ------------------------------------------------------------------------
* DDL related callbacks for heap AM.
* ------------------------------------------------------------------------
@@ -699,7 +687,6 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
IndexScanDesc indexScan;
TableScanDesc tableScan;
HeapScanDesc heapScan;
- bool use_wal;
bool is_system_catalog;
Tuplesortstate *tuplesort;
TupleDesc oldTupDesc = RelationGetDescr(OldHeap);
@@ -713,12 +700,6 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(OldHeap);
- /*
- * We need to log the copied data in WAL iff WAL archiving/streaming is
- * enabled AND it's a WAL-logged rel.
- */
- use_wal = XLogIsNeeded() && RelationNeedsWAL(NewHeap);
-
/* use_wal off requires smgr_targblock be initially invalid */
Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
@@ -729,7 +710,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
/* Initialize the rewrite operation */
rwstate = begin_heap_rewrite(OldHeap, NewHeap, OldestXmin, *xid_cutoff,
- *multi_cutoff, use_wal);
+ *multi_cutoff);
/* Set up sorting if wanted */
@@ -2517,7 +2498,6 @@ static const TableAmRoutine heapam_methods = {
.tuple_delete = heapam_tuple_delete,
.tuple_update = heapam_tuple_update,
.tuple_lock = heapam_tuple_lock,
- .finish_bulk_insert = heapam_finish_bulk_insert,
.tuple_fetch_row_version = heapam_fetch_row_version,
.tuple_get_latest_tid = heap_get_latest_tid,
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index 72a448ad31..992d4b9880 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -144,7 +144,6 @@ typedef struct RewriteStateData
Page rs_buffer; /* page currently being built */
BlockNumber rs_blockno; /* block where page will go */
bool rs_buffer_valid; /* T if any tuples in buffer */
- bool rs_use_wal; /* must we WAL-log inserts? */
bool rs_logical_rewrite; /* do we need to do logical rewriting */
TransactionId rs_oldest_xmin; /* oldest xmin used by caller to determine
* tuple visibility */
@@ -238,15 +237,13 @@ static void logical_end_heap_rewrite(RewriteState state);
* oldest_xmin xid used by the caller to determine which tuples are dead
* freeze_xid xid before which tuples will be frozen
* min_multi multixact before which multis will be removed
- * use_wal should the inserts to the new heap be WAL-logged?
*
* Returns an opaque RewriteState, allocated in current memory context,
* to be used in subsequent calls to the other functions.
*/
RewriteState
begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xmin,
- TransactionId freeze_xid, MultiXactId cutoff_multi,
- bool use_wal)
+ TransactionId freeze_xid, MultiXactId cutoff_multi)
{
RewriteState state;
MemoryContext rw_cxt;
@@ -271,7 +268,6 @@ begin_heap_rewrite(Relation old_heap, Relation new_heap, TransactionId oldest_xm
/* new_heap needn't be empty, just locked */
state->rs_blockno = RelationGetNumberOfBlocks(new_heap);
state->rs_buffer_valid = false;
- state->rs_use_wal = use_wal;
state->rs_oldest_xmin = oldest_xmin;
state->rs_freeze_xid = freeze_xid;
state->rs_cutoff_multi = cutoff_multi;
@@ -330,7 +326,7 @@ end_heap_rewrite(RewriteState state)
/* Write the last page, if any */
if (state->rs_buffer_valid)
{
- if (state->rs_use_wal)
+ if (RelationNeedsWAL(state->rs_new_rel))
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
@@ -654,9 +650,6 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
{
int options = HEAP_INSERT_SKIP_FSM;
- if (!state->rs_use_wal)
- options |= HEAP_INSERT_SKIP_WAL;
-
/*
* While rewriting the heap for VACUUM FULL / CLUSTER, make sure data
* for the TOAST table are not logically decoded. The main heap is
@@ -695,7 +688,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
/* Doesn't fit, so write out the existing page */
/* XLOG stuff */
- if (state->rs_use_wal)
+ if (RelationNeedsWAL(state->rs_new_rel))
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 3cc886f7fe..e4bcdc390f 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -57,7 +57,8 @@ typedef struct PendingRelDelete
{
RelFileNode relnode; /* relation that may need to be deleted */
BackendId backend; /* InvalidBackendId if not a temp rel */
- bool atCommit; /* T=delete at commit; F=delete at abort */
+ bool atCommit; /* T=work at commit; F=work at abort */
+ bool dosync; /* T=work is sync; F=work is delete */
int nestLevel; /* xact nesting level of request */
struct PendingRelDelete *next; /* linked-list link */
} PendingRelDelete;
@@ -114,10 +115,29 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
pending->relnode = rnode;
pending->backend = backend;
pending->atCommit = false; /* delete if abort */
+ pending->dosync = false;
pending->nestLevel = GetCurrentTransactionNestLevel();
pending->next = pendingDeletes;
pendingDeletes = pending;
+ /*
+ * We are going to skip WAL-logging for storage of persistent relations
+ * created in the current transaction when wal_level = minimal. The
+ * relation needs to be synced at commit.
+ */
+ if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
+ {
+ pending = (PendingRelDelete *)
+ MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
+ pending->relnode = rnode;
+ pending->backend = backend;
+ pending->atCommit = true;
+ pending->dosync = true;
+ pending->nestLevel = GetCurrentTransactionNestLevel();
+ pending->next = pendingDeletes;
+ pendingDeletes = pending;
+ }
+
return srel;
}
@@ -155,6 +175,7 @@ RelationDropStorage(Relation rel)
pending->relnode = rel->rd_node;
pending->backend = rel->rd_backend;
pending->atCommit = true; /* delete if commit */
+ pending->dosync = false;
pending->nestLevel = GetCurrentTransactionNestLevel();
pending->next = pendingDeletes;
pendingDeletes = pending;
@@ -428,21 +449,34 @@ smgrDoPendingDeletes(bool isCommit)
{
SMgrRelation srel;
- srel = smgropen(pending->relnode, pending->backend);
-
- /* allocate the initial array, or extend it, if needed */
- if (maxrels == 0)
+ if (pending->dosync)
{
- maxrels = 8;
- srels = palloc(sizeof(SMgrRelation) * maxrels);
+ /* Perform pending sync of WAL-skipped relation */
+ FlushRelationBuffersWithoutRelcache(pending->relnode,
+ false);
+ srel = smgropen(pending->relnode, pending->backend);
+ smgrimmedsync(srel, MAIN_FORKNUM);
+ smgrclose(srel);
}
- else if (maxrels <= nrels)
+ else
{
- maxrels *= 2;
- srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
- }
+ /* Collect pending deletions */
+ srel = smgropen(pending->relnode, pending->backend);
- srels[nrels++] = srel;
+ /* allocate the initial array, or extend it, if needed */
+ if (maxrels == 0)
+ {
+ maxrels = 8;
+ srels = palloc(sizeof(SMgrRelation) * maxrels);
+ }
+ else if (maxrels <= nrels)
+ {
+ maxrels *= 2;
+ srels = repalloc(srels, sizeof(SMgrRelation) * maxrels);
+ }
+
+ srels[nrels++] = srel;
+ }
}
/* must explicitly free the list entry */
pfree(pending);
@@ -489,8 +523,9 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
nrels = 0;
for (pending = pendingDeletes; pending != NULL; pending = pending->next)
{
+ /* Pending syncs are excluded */
if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
- && pending->backend == InvalidBackendId)
+ && pending->backend == InvalidBackendId && !pending->dosync)
nrels++;
}
if (nrels == 0)
@@ -502,8 +537,9 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
*ptr = rptr;
for (pending = pendingDeletes; pending != NULL; pending = pending->next)
{
+ /* Pending syncs are excluded */
if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit
- && pending->backend == InvalidBackendId)
+ && pending->backend == InvalidBackendId && !pending->dosync)
{
*rptr = pending->relnode;
rptr++;
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index ebaec4f8dd..6fc9d7d64e 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -1034,12 +1034,36 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2))
{
+ Relation rel1;
+ Relation rel2;
+
/*
* Normal non-mapped relations: swap relfilenodes, reltablespaces,
* relpersistence
*/
Assert(!target_is_pg_class);
+ /* Update creation subid hints of relcache */
+ rel1 = relation_open(r1, ExclusiveLock);
+ rel2 = relation_open(r2, ExclusiveLock);
+
+ /*
+ * New relation's relfilenode is created in the current transaction
+ * and used as old ralation's new relfilenode. So its
+ * newRelfilenodeSubid as new relation's createSubid. We don't fix
+ * rel2 since it would be deleted soon.
+ */
+ Assert(rel2->rd_createSubid != InvalidSubTransactionId);
+ rel1->rd_newRelfilenodeSubid = rel2->rd_createSubid;
+
+ /* record the first relfilenode change in the current transaction */
+ if (rel1->rd_firstRelfilenodeSubid == InvalidSubTransactionId)
+ rel1->rd_firstRelfilenodeSubid = GetCurrentSubTransactionId();
+
+ relation_close(rel1, ExclusiveLock);
+ relation_close(rel2, ExclusiveLock);
+
+ /* swap relfilenodes, reltablespaces, relpersistence */
swaptemp = relform1->relfilenode;
relform1->relfilenode = relform2->relfilenode;
relform2->relfilenode = swaptemp;
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f1161f0fee..f4beff0001 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2722,28 +2722,9 @@ CopyFrom(CopyState cstate)
* If it does commit, we'll have done the table_finish_bulk_insert() at
* the bottom of this routine first.
*
- * As mentioned in comments in utils/rel.h, the in-same-transaction test
- * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
- * can be cleared before the end of the transaction. The exact case is
- * when a relation sets a new relfilenode twice in same transaction, yet
- * the second one fails in an aborted subtransaction, e.g.
- *
- * BEGIN;
- * TRUNCATE t;
- * SAVEPOINT save;
- * TRUNCATE t;
- * ROLLBACK TO save;
- * COPY ...
- *
- * Also, if the target file is new-in-transaction, we assume that checking
- * FSM for free space is a waste of time, even if we must use WAL because
- * of archiving. This could possibly be wrong, but it's unlikely.
- *
- * The comments for table_tuple_insert and RelationGetBufferForTuple
- * specify that skipping WAL logging is only safe if we ensure that our
- * tuples do not go into pages containing tuples from any other
- * transactions --- but this must be the case if we have a new table or
- * new relfilenode, so we need no additional work to enforce that.
+ * If the target file is new-in-transaction, we assume that checking FSM
+ * for free space is a waste of time, even if we must use WAL because of
+ * archiving. This could possibly be wrong, but it's unlikely.
*
* We currently don't support this optimization if the COPY target is a
* partitioned table as we currently only lazily initialize partition
@@ -2759,15 +2740,14 @@ CopyFrom(CopyState cstate)
* are not supported as per the description above.
*----------
*/
- /* createSubid is creation check, newRelfilenodeSubid is truncation check */
+ /*
+ * createSubid is creation check, firstRelfilenodeSubid is truncation and
+ * cluster check. Partitioned table doesn't have storage.
+ */
if (RELKIND_HAS_STORAGE(cstate->rel->rd_rel->relkind) &&
(cstate->rel->rd_createSubid != InvalidSubTransactionId ||
- cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId))
- {
+ cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId))
ti_options |= TABLE_INSERT_SKIP_FSM;
- if (!XLogIsNeeded())
- ti_options |= TABLE_INSERT_SKIP_WAL;
- }
/*
* Optimize if new relfilenode was created in this subxact or one of its
@@ -3366,8 +3346,6 @@ CopyFrom(CopyState cstate)
FreeExecutorState(estate);
- table_finish_bulk_insert(cstate->rel, ti_options);
-
return processed;
}
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c
index 4c1d909d38..39ebd73691 100644
--- a/src/backend/commands/createas.c
+++ b/src/backend/commands/createas.c
@@ -558,8 +558,7 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
* We can skip WAL-logging the insertions, unless PITR or streaming
* replication is in use. We can skip the FSM in any case.
*/
- myState->ti_options = TABLE_INSERT_SKIP_FSM |
- (XLogIsNeeded() ? 0 : TABLE_INSERT_SKIP_WAL);
+ myState->ti_options = TABLE_INSERT_SKIP_FSM;
myState->bistate = GetBulkInsertState();
/* Not using WAL requires smgr_targblock be initially invalid */
@@ -604,8 +603,6 @@ intorel_shutdown(DestReceiver *self)
FreeBulkInsertState(myState->bistate);
- table_finish_bulk_insert(myState->rel, myState->ti_options);
-
/* close rel, but keep lock until commit */
table_close(myState->rel, NoLock);
myState->rel = NULL;
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index 537d0e8cef..1c854dcebf 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -463,8 +463,6 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
* replication is in use. We can skip the FSM in any case.
*/
myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN;
- if (!XLogIsNeeded())
- myState->ti_options |= TABLE_INSERT_SKIP_WAL;
myState->bistate = GetBulkInsertState();
/* Not using WAL requires smgr_targblock be initially invalid */
@@ -509,8 +507,6 @@ transientrel_shutdown(DestReceiver *self)
FreeBulkInsertState(myState->bistate);
- table_finish_bulk_insert(myState->transientrel, myState->ti_options);
-
/* close transientrel, but keep lock until commit */
table_close(myState->transientrel, NoLock);
myState->transientrel = NULL;
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 0f1a9f0e54..ac7336ef58 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -4761,9 +4761,9 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
/*
* Prepare a BulkInsertState and options for table_tuple_insert. Because
- * we're building a new heap, we can skip WAL-logging and fsync it to disk
- * at the end instead (unless WAL-logging is required for archiving or
- * streaming replication). The FSM is empty too, so don't bother using it.
+ * we're building a new heap, the underlying table AM can skip WAL-logging
+ * and fsync the relation to disk at the end of the current transaction
+ * instead. The FSM is empty too, so don't bother using it.
*/
if (newrel)
{
@@ -4771,8 +4771,6 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
bistate = GetBulkInsertState();
ti_options = TABLE_INSERT_SKIP_FSM;
- if (!XLogIsNeeded())
- ti_options |= TABLE_INSERT_SKIP_WAL;
}
else
{
@@ -5057,8 +5055,6 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
{
FreeBulkInsertState(bistate);
- table_finish_bulk_insert(newrel, ti_options);
-
table_close(newrel, NoLock);
}
}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7332e6b590..280fdf8080 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -171,6 +171,7 @@ static HTAB *PrivateRefCountHash = NULL;
static int32 PrivateRefCountOverflowed = 0;
static uint32 PrivateRefCountClock = 0;
static PrivateRefCountEntry *ReservedRefCountEntry = NULL;
+static void FlushRelationBuffers_common(SMgrRelation smgr, bool islocal);
static void ReservePrivateRefCountEntry(void);
static PrivateRefCountEntry *NewPrivateRefCountEntry(Buffer buffer);
@@ -3190,20 +3191,32 @@ PrintPinnedBufs(void)
void
FlushRelationBuffers(Relation rel)
{
- int i;
- BufferDesc *bufHdr;
-
- /* Open rel at the smgr level if not already done */
RelationOpenSmgr(rel);
- if (RelationUsesLocalBuffers(rel))
+ FlushRelationBuffers_common(rel->rd_smgr, RelationUsesLocalBuffers(rel));
+}
+
+void
+FlushRelationBuffersWithoutRelcache(RelFileNode rnode, bool islocal)
+{
+ FlushRelationBuffers_common(smgropen(rnode, InvalidBackendId), islocal);
+}
+
+static void
+FlushRelationBuffers_common(SMgrRelation smgr, bool islocal)
+{
+ RelFileNode rnode = smgr->smgr_rnode.node;
+ int i;
+ BufferDesc *bufHdr;
+
+ if (islocal)
{
for (i = 0; i < NLocBuffer; i++)
{
uint32 buf_state;
bufHdr = GetLocalBufferDescriptor(i);
- if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
+ if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
((buf_state = pg_atomic_read_u32(&bufHdr->state)) &
(BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
{
@@ -3220,7 +3233,7 @@ FlushRelationBuffers(Relation rel)
PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
- smgrwrite(rel->rd_smgr,
+ smgrwrite(smgr,
bufHdr->tag.forkNum,
bufHdr->tag.blockNum,
localpage,
@@ -3250,18 +3263,18 @@ FlushRelationBuffers(Relation rel)
* As in DropRelFileNodeBuffers, an unlocked precheck should be safe
* and saves some cycles.
*/
- if (!RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
+ if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode))
continue;
ReservePrivateRefCountEntry();
buf_state = LockBufHdr(bufHdr);
- if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
+ if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
(buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, rel->rd_smgr);
+ FlushBuffer(bufHdr, smgr);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr, true);
}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 2b992d7832..812bfadb40 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -2661,7 +2661,7 @@ static void
RelationFlushRelation(Relation relation)
{
if (relation->rd_createSubid != InvalidSubTransactionId ||
- relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+ relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
{
/*
* New relcache entries are always rebuilt, not flushed; else we'd
@@ -2801,7 +2801,7 @@ RelationCacheInvalidate(void)
* pending invalidations.
*/
if (relation->rd_createSubid != InvalidSubTransactionId ||
- relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+ relation->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
continue;
relcacheInvalsReceived++;
@@ -3058,6 +3058,7 @@ AtEOXact_cleanup(Relation relation, bool isCommit)
* Likewise, reset the hint about the relfilenode being new.
*/
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
+ relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
}
/*
@@ -3149,7 +3150,7 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
}
/*
- * Likewise, update or drop any new-relfilenode-in-subtransaction hint.
+ * Likewise, update or drop any new-relfilenode-in-subtransaction hints.
*/
if (relation->rd_newRelfilenodeSubid == mySubid)
{
@@ -3158,6 +3159,15 @@ AtEOSubXact_cleanup(Relation relation, bool isCommit,
else
relation->rd_newRelfilenodeSubid = InvalidSubTransactionId;
}
+
+
+ if (relation->rd_firstRelfilenodeSubid == mySubid)
+ {
+ if (isCommit)
+ relation->rd_firstRelfilenodeSubid = parentSubid;
+ else
+ relation->rd_firstRelfilenodeSubid = InvalidSubTransactionId;
+ }
}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 858bcb6bc9..80c2e1bafc 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -29,7 +29,6 @@
/* "options" flag bits for heap_insert */
-#define HEAP_INSERT_SKIP_WAL TABLE_INSERT_SKIP_WAL
#define HEAP_INSERT_SKIP_FSM TABLE_INSERT_SKIP_FSM
#define HEAP_INSERT_FROZEN TABLE_INSERT_FROZEN
#define HEAP_INSERT_NO_LOGICAL TABLE_INSERT_NO_LOGICAL
diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h
index 8056253916..7f9736e294 100644
--- a/src/include/access/rewriteheap.h
+++ b/src/include/access/rewriteheap.h
@@ -23,7 +23,7 @@ typedef struct RewriteStateData *RewriteState;
extern RewriteState begin_heap_rewrite(Relation OldHeap, Relation NewHeap,
TransactionId OldestXmin, TransactionId FreezeXid,
- MultiXactId MultiXactCutoff, bool use_wal);
+ MultiXactId MultiXactCutoff);
extern void end_heap_rewrite(RewriteState state);
extern void rewrite_heap_tuple(RewriteState state, HeapTuple oldTuple,
HeapTuple newTuple);
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index c2b0481e7e..ac0e981acb 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -407,22 +407,6 @@ typedef struct TableAmRoutine
uint8 flags,
TM_FailureData *tmfd);
- /*
- * Perform operations necessary to complete insertions made via
- * tuple_insert and multi_insert with a BulkInsertState specified. This
- * may for example be used to flush the relation, when the
- * TABLE_INSERT_SKIP_WAL option was used.
- *
- * Typically callers of tuple_insert and multi_insert will just pass all
- * the flags that apply to them, and each AM has to decide which of them
- * make sense for it, and then only take actions in finish_bulk_insert for
- * those flags, and ignore others.
- *
- * Optional callback.
- */
- void (*finish_bulk_insert) (Relation rel, int options);
-
-
/* ------------------------------------------------------------------------
* DDL related functionality.
* ------------------------------------------------------------------------
@@ -1088,10 +1072,6 @@ table_compute_xid_horizon_for_tuples(Relation rel,
* behaviour of the AM. Several options might be ignored by AMs not supporting
* them.
*
- * If the TABLE_INSERT_SKIP_WAL option is specified, the new tuple doesn't
- * need to be logged to WAL, even for a non-temp relation. It is the AMs
- * choice whether this optimization is supported.
- *
* If the TABLE_INSERT_SKIP_FSM option is specified, AMs are free to not reuse
* free space in the relation. This can save some cycles when we know the
* relation is new and doesn't contain useful amounts of free space. It's
@@ -1111,10 +1091,8 @@ table_compute_xid_horizon_for_tuples(Relation rel,
* Note that most of these options will be applied when inserting into the
* heap's TOAST table, too, if the tuple requires any out-of-line data.
*
- *
* The BulkInsertState object (if any; bistate can be NULL for default
- * behavior) is also just passed through to RelationGetBufferForTuple. If
- * `bistate` is provided, table_finish_bulk_insert() needs to be called.
+ * behavior) is also just passed through to RelationGetBufferForTuple.
*
* On return the slot's tts_tid and tts_tableOid are updated to reflect the
* insertion. But note that any toasting of fields within the slot is NOT
@@ -1249,6 +1227,8 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid,
* update was done. However, any TOAST changes in the new tuple's
* data are not reflected into *newtup.
*
+ * See table_insert about skipping WAL-logging feature.
+ *
* In the failure cases, the routine fills *tmfd with the tuple's t_ctid,
* t_xmax, and, if possible, t_cmax. See comments for struct TM_FailureData
* for additional info.
@@ -1309,21 +1289,6 @@ table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot,
flags, tmfd);
}
-/*
- * Perform operations necessary to complete insertions made via
- * tuple_insert and multi_insert with a BulkInsertState specified. This
- * e.g. may e.g. used to flush the relation when inserting with
- * TABLE_INSERT_SKIP_WAL specified.
- */
-static inline void
-table_finish_bulk_insert(Relation rel, int options)
-{
- /* optional callback */
- if (rel->rd_tableam && rel->rd_tableam->finish_bulk_insert)
- rel->rd_tableam->finish_bulk_insert(rel, options);
-}
-
-
/* ------------------------------------------------------------------------
* DDL related functionality.
* ------------------------------------------------------------------------
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 509f4b7ef1..ace5f5a2ae 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -189,6 +189,7 @@ extern BlockNumber RelationGetNumberOfBlocksInFork(Relation relation,
ForkNumber forkNum);
extern void FlushOneBuffer(Buffer buffer);
extern void FlushRelationBuffers(Relation rel);
+extern void FlushRelationBuffersWithoutRelcache(RelFileNode rnode, bool islocal);
extern void FlushDatabaseBuffers(Oid dbid);
extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode,
ForkNumber forkNum, BlockNumber firstDelBlock);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index d35b4a5061..5cbb5a7b27 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -76,10 +76,17 @@ typedef struct RelationData
* transaction, with one of them occurring in a subsequently aborted
* subtransaction, e.g. BEGIN; TRUNCATE t; SAVEPOINT save; TRUNCATE t;
* ROLLBACK TO save; -- rd_newRelfilenode is now forgotten
+ * rd_firstRelfilenodeSubid is the ID of the hightest subtransaction the
+ * relfilenode change has took place first in the current
+ * transaction. This won't be forgotten as newRelfilenodeSubid is. A valid
+ * OID means that the currently active relfilenode is transaction-local
+ * and no-need for WAL-logging.
*/
SubTransactionId rd_createSubid; /* rel was created in current xact */
SubTransactionId rd_newRelfilenodeSubid; /* new relfilenode assigned in
* current xact */
+ SubTransactionId rd_firstRelfilenodeSubid; /* new relfilenode assigned
+ * first in current xact */
Form_pg_class rd_rel; /* RELATION tuple */
TupleDesc rd_att; /* tuple descriptor */
@@ -512,9 +519,15 @@ typedef struct ViewOptions
/*
* RelationNeedsWAL
* True if relation needs WAL.
+ *
+ * Returns false if wal_level = minimal and this relation is created or
+ * truncated in the current transaction.
*/
-#define RelationNeedsWAL(relation) \
- ((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
+#define RelationNeedsWAL(relation) \
+ ((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT && \
+ (XLogIsNeeded() || \
+ (relation->rd_createSubid == InvalidSubTransactionId && \
+ relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)))
/*
* RelationUsesLocalBuffers
--
2.16.3
From 63fc1a432f20e99df6f081bc6af640bf6907879c Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@oss.ntt.co.jp>
Date: Wed, 29 May 2019 23:03:22 +0900
Subject: [PATCH 3/3] Rename smgrDoPendingDeletes to smgrDoPendingOperations
The function longer does only deletions but also syncs. Rename the
function to refect that. smgrGetPendingDeletes is not renamed since it
does not change behavior.
---
src/backend/access/transam/xact.c | 4 +--
src/backend/catalog/storage.c | 57 ++++++++++++++++++++-------------------
src/include/catalog/storage.h | 2 +-
3 files changed, 32 insertions(+), 31 deletions(-)
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index d7930c077d..cc0c43b2dd 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2228,7 +2228,7 @@ CommitTransaction(void)
* Other backends will observe the attendant catalog changes and not
* attempt to access affected files.
*/
- smgrDoPendingDeletes(true);
+ smgrDoPendingOperations(true);
AtCommit_Notify();
AtEOXact_GUC(true, 1);
@@ -2716,7 +2716,7 @@ AbortTransaction(void)
ResourceOwnerRelease(TopTransactionResourceOwner,
RESOURCE_RELEASE_AFTER_LOCKS,
false, true);
- smgrDoPendingDeletes(false);
+ smgrDoPendingOperations(false);
AtEOXact_GUC(false, 1);
AtEOXact_SPI(false);
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index e4bcdc390f..6ebe75aa37 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -53,17 +53,17 @@
* but I'm being paranoid.
*/
-typedef struct PendingRelDelete
+typedef struct PendingRelOps
{
RelFileNode relnode; /* relation that may need to be deleted */
BackendId backend; /* InvalidBackendId if not a temp rel */
bool atCommit; /* T=work at commit; F=work at abort */
bool dosync; /* T=work is sync; F=work is delete */
int nestLevel; /* xact nesting level of request */
- struct PendingRelDelete *next; /* linked-list link */
-} PendingRelDelete;
+ struct PendingRelOps *next; /* linked-list link */
+} PendingRelOps;
-static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
+static PendingRelOps *pendingDeletes = NULL; /* head of linked list */
/*
* RelationCreateStorage
@@ -79,7 +79,7 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
SMgrRelation
RelationCreateStorage(RelFileNode rnode, char relpersistence)
{
- PendingRelDelete *pending;
+ PendingRelOps *pending;
SMgrRelation srel;
BackendId backend;
bool needs_wal;
@@ -110,8 +110,8 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
log_smgrcreate(&srel->smgr_rnode.node, MAIN_FORKNUM);
/* Add the relation to the list of stuff to delete at abort */
- pending = (PendingRelDelete *)
- MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
+ pending = (PendingRelOps *)
+ MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelOps));
pending->relnode = rnode;
pending->backend = backend;
pending->atCommit = false; /* delete if abort */
@@ -127,8 +127,8 @@ RelationCreateStorage(RelFileNode rnode, char relpersistence)
*/
if (relpersistence == RELPERSISTENCE_PERMANENT && !XLogIsNeeded())
{
- pending = (PendingRelDelete *)
- MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
+ pending = (PendingRelOps *)
+ MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelOps));
pending->relnode = rnode;
pending->backend = backend;
pending->atCommit = true;
@@ -167,11 +167,11 @@ log_smgrcreate(const RelFileNode *rnode, ForkNumber forkNum)
void
RelationDropStorage(Relation rel)
{
- PendingRelDelete *pending;
+ PendingRelOps *pending;
/* Add the relation to the list of stuff to delete at commit */
- pending = (PendingRelDelete *)
- MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
+ pending = (PendingRelOps *)
+ MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelOps));
pending->relnode = rel->rd_node;
pending->backend = rel->rd_backend;
pending->atCommit = true; /* delete if commit */
@@ -185,9 +185,9 @@ RelationDropStorage(Relation rel)
* present in the pending-delete list twice, once with atCommit true and
* once with atCommit false. Hence, it will be physically deleted at end
* of xact in either case (and the other entry will be ignored by
- * smgrDoPendingDeletes, so no error will occur). We could instead remove
- * the existing list entry and delete the physical file immediately, but
- * for now I'll keep the logic simple.
+ * smgrDoPendingOperations, so no error will occur). We could instead
+ * remove the existing list entry and delete the physical file
+ * immediately, but for now I'll keep the logic simple.
*/
RelationCloseSmgr(rel);
@@ -213,9 +213,9 @@ RelationDropStorage(Relation rel)
void
RelationPreserveStorage(RelFileNode rnode, bool atCommit)
{
- PendingRelDelete *pending;
- PendingRelDelete *prev;
- PendingRelDelete *next;
+ PendingRelOps *pending;
+ PendingRelOps *prev;
+ PendingRelOps *next;
prev = NULL;
for (pending = pendingDeletes; pending != NULL; pending = next)
@@ -406,7 +406,8 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
}
/*
- * smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
+ * smgrDoPendingOperations() -- Take care of relation deletes and syncs at
+ * end of xact.
*
* This also runs when aborting a subxact; we want to clean up a failed
* subxact immediately.
@@ -417,12 +418,12 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
* already recovered the physical storage.
*/
void
-smgrDoPendingDeletes(bool isCommit)
+smgrDoPendingOperations(bool isCommit)
{
int nestLevel = GetCurrentTransactionNestLevel();
- PendingRelDelete *pending;
- PendingRelDelete *prev;
- PendingRelDelete *next;
+ PendingRelOps *pending;
+ PendingRelOps *prev;
+ PendingRelOps *next;
int nrels = 0,
i = 0,
maxrels = 0;
@@ -518,7 +519,7 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
int nestLevel = GetCurrentTransactionNestLevel();
int nrels;
RelFileNode *rptr;
- PendingRelDelete *pending;
+ PendingRelOps *pending;
nrels = 0;
for (pending = pendingDeletes; pending != NULL; pending = pending->next)
@@ -558,8 +559,8 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr)
void
PostPrepare_smgr(void)
{
- PendingRelDelete *pending;
- PendingRelDelete *next;
+ PendingRelOps *pending;
+ PendingRelOps *next;
for (pending = pendingDeletes; pending != NULL; pending = next)
{
@@ -580,7 +581,7 @@ void
AtSubCommit_smgr(void)
{
int nestLevel = GetCurrentTransactionNestLevel();
- PendingRelDelete *pending;
+ PendingRelOps *pending;
for (pending = pendingDeletes; pending != NULL; pending = pending->next)
{
@@ -599,7 +600,7 @@ AtSubCommit_smgr(void)
void
AtSubAbort_smgr(void)
{
- smgrDoPendingDeletes(false);
+ smgrDoPendingOperations(false);
}
void
diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h
index 3579d3f3eb..43836cf11c 100644
--- a/src/include/catalog/storage.h
+++ b/src/include/catalog/storage.h
@@ -30,7 +30,7 @@ extern void RelationCopyStorage(SMgrRelation src, SMgrRelation dst,
* These functions used to be in storage/smgr/smgr.c, which explains the
* naming
*/
-extern void smgrDoPendingDeletes(bool isCommit);
+extern void smgrDoPendingOperations(bool isCommit);
extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
extern void AtSubCommit_smgr(void);
extern void AtSubAbort_smgr(void);
--
2.16.3
pgsql-hackers by date: