From 7b72e34e931dca1c0c8ea77f182b33a739dc2eba Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Tue, 5 Jan 2021 09:47:49 +0900 Subject: [PATCH 3/3] Skip btree bulkdelete if the index doesn't grow. On amvacuumstrategy, btree indexes returns INDEX_VACUUM_STRATEGY_NONE if the index doesn't grow since last bulk-deletion. To remember that, this change adds a new filed in the btree meta page to store the number of blocks last bulkdelete time. XXX: need to upgrade the meta page version. --- contrib/pageinspect/Makefile | 3 +- contrib/pageinspect/btreefuncs.c | 5 +++ contrib/pageinspect/pageinspect--1.8--1.9.sql | 22 +++++++++++++ contrib/pageinspect/pageinspect.control | 2 +- src/backend/access/nbtree/nbtpage.c | 9 ++++- src/backend/access/nbtree/nbtree.c | 33 ++++++++++++++++++- src/backend/access/nbtree/nbtxlog.c | 1 + src/backend/access/rmgrdesc/nbtdesc.c | 5 +-- src/include/access/nbtree.h | 2 ++ src/include/access/nbtxlog.h | 1 + 10 files changed, 77 insertions(+), 6 deletions(-) create mode 100644 contrib/pageinspect/pageinspect--1.8--1.9.sql diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index d9d8177116..a0760afa4e 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -12,7 +12,8 @@ OBJS = \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ +DATA = pageinspect--1.8--1.9.sql \ + pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \ pageinspect--1.2--1.3.sql pageinspect--1.1--1.2.sql \ diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index 445605db58..94f648118f 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -692,6 +692,11 @@ bt_metap(PG_FUNCTION_ARGS) values[j++] = "f"; } + if (metad->btm_version >= BTREE_VERSION) + values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_last_deletion_nblocks); + else + values[j++] = "-1"; + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql new file mode 100644 index 0000000000..bd1752cf35 --- /dev/null +++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql @@ -0,0 +1,22 @@ +/* contrib/pageinspect/pageinspect--1.8-1.9.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.9'" to load this file. \quit + +-- +-- bt_metap() +-- +DROP FUNCTION bt_metap(text); +CREATE FUNCTION bt_metap(IN relname text, + OUT magic int4, + OUT version int4, + OUT root int8, + OUT level int8, + OUT fastroot int8, + OUT fastlevel int8, + OUT oldest_xact xid, + OUT last_cleanup_num_tuples float8, + OUT allequalimage boolean, + OUT last_deletion_nblocks int8) +AS 'MODULE_PATHNAME', 'bt_metap' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control index f8cdf526c6..bd716769a1 100644 --- a/contrib/pageinspect/pageinspect.control +++ b/contrib/pageinspect/pageinspect.control @@ -1,5 +1,5 @@ # pageinspect extension comment = 'inspect the contents of database pages at a low level' -default_version = '1.8' +default_version = '1.9' module_pathname = '$libdir/pageinspect' relocatable = true diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 89eb66a8a6..eac78d3b7e 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -76,6 +76,7 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level, metad->btm_oldest_btpo_xact = InvalidTransactionId; metad->btm_last_cleanup_num_heap_tuples = -1.0; metad->btm_allequalimage = allequalimage; + metad->btm_last_deletion_nblocks = InvalidBlockNumber; metaopaque = (BTPageOpaque) PageGetSpecialPointer(page); metaopaque->btpo_flags = BTP_META; @@ -115,6 +116,7 @@ _bt_upgrademetapage(Page page) metad->btm_version = BTREE_NOVAC_VERSION; metad->btm_oldest_btpo_xact = InvalidTransactionId; metad->btm_last_cleanup_num_heap_tuples = -1.0; + /* Only a REINDEX can set this field */ Assert(!metad->btm_allequalimage); metad->btm_allequalimage = false; @@ -179,17 +181,20 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, BTMetaPageData *metad; bool needsRewrite = false; XLogRecPtr recptr; + BlockNumber nblocks; /* read the metapage and check if it needs rewrite */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); + nblocks = RelationGetNumberOfBlocks(rel); /* outdated version of metapage always needs rewrite */ if (metad->btm_version < BTREE_NOVAC_VERSION) needsRewrite = true; else if (metad->btm_oldest_btpo_xact != oldestBtpoXact || - metad->btm_last_cleanup_num_heap_tuples != numHeapTuples) + metad->btm_last_cleanup_num_heap_tuples != numHeapTuples || + metad->btm_last_deletion_nblocks != nblocks) needsRewrite = true; if (!needsRewrite) @@ -211,6 +216,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, /* update cleanup-related information */ metad->btm_oldest_btpo_xact = oldestBtpoXact; metad->btm_last_cleanup_num_heap_tuples = numHeapTuples; + metad->btm_last_deletion_nblocks = nblocks; MarkBufferDirty(metabuf); /* write wal record if needed */ @@ -230,6 +236,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, md.oldest_btpo_xact = oldestBtpoXact; md.last_cleanup_num_heap_tuples = numHeapTuples; md.allequalimage = metad->btm_allequalimage; + md.last_deletion_nblocks = metad->btm_last_deletion_nblocks; XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata)); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index c9a177d5e1..7409c23a5c 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -882,7 +882,38 @@ _bt_vacuum_needs_cleanup(IndexVacuumInfo *info) IndexVacuumStrategy btvacuumstrategy(IndexVacuumInfo *info) { - return INDEX_VACUUM_STRATEGY_BULKDELETE; + Buffer metabuf; + Page metapg; + BTMetaPageData *metad; + IndexVacuumStrategy result = INDEX_VACUUM_STRATEGY_NONE; + + metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ); + metapg = BufferGetPage(metabuf); + metad = BTPageGetMeta(metapg); + + if (metad->btm_version < BTREE_VERSION) + { + /* + * Do bulk-deletion if metapage needs upgrade, because we don't + * have meta-information yet. + */ + result = INDEX_VACUUM_STRATEGY_BULKDELETE; + } + else + { + BlockNumber nblocks = RelationGetNumberOfBlocks(info->index); + + /* + * Do deletion if the index grows since the last deletion or for + * the first time. + */ + if (!BlockNumberIsValid(metad->btm_last_deletion_nblocks) || + nblocks > metad->btm_last_deletion_nblocks) + result = INDEX_VACUUM_STRATEGY_BULKDELETE; + } + + _bt_relbuf(info->index, metabuf); + return result; } /* diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 45313d924c..65e537211c 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -115,6 +115,7 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id) md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact; md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples; md->btm_allequalimage = xlrec->allequalimage; + md->btm_last_deletion_nblocks = xlrec->last_deletion_nblocks; pageop = (BTPageOpaque) PageGetSpecialPointer(metapg); pageop->btpo_flags = BTP_META; diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index 4c4af9fce0..462838682e 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -110,9 +110,10 @@ btree_desc(StringInfo buf, XLogReaderState *record) xlrec = (xl_btree_metadata *) XLogRecGetBlockData(record, 0, NULL); - appendStringInfo(buf, "oldest_btpo_xact %u; last_cleanup_num_heap_tuples: %f", + appendStringInfo(buf, "oldest_btpo_xact %u; last_cleanup_num_heap_tuples: %f; last_deletion_nblocks: %u", xlrec->oldest_btpo_xact, - xlrec->last_cleanup_num_heap_tuples); + xlrec->last_cleanup_num_heap_tuples, + xlrec->last_deletion_nblocks); break; } } diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index b8247537fd..a56baea310 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -109,6 +109,8 @@ typedef struct BTMetaPageData float8 btm_last_cleanup_num_heap_tuples; /* number of heap tuples * during last cleanup */ bool btm_allequalimage; /* are all columns "equalimage"? */ + BlockNumber btm_last_deletion_nblocks; /* number of blocks during last + * bulk-deletion */ } BTMetaPageData; #define BTPageGetMeta(p) \ diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h index f5d3e9f5e0..45f01a3dc9 100644 --- a/src/include/access/nbtxlog.h +++ b/src/include/access/nbtxlog.h @@ -55,6 +55,7 @@ typedef struct xl_btree_metadata TransactionId oldest_btpo_xact; float8 last_cleanup_num_heap_tuples; bool allequalimage; + BlockNumber last_deletion_nblocks; } xl_btree_metadata; /* -- 2.27.0