diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index da518daea3..4e12c30e20 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -1211,12 +1211,14 @@ palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum) errmsg("index \"%s\" meta page is corrupt", RelationGetRelationName(state->rel)))); - if (metad->btm_version != BTREE_VERSION) + if (metad->btm_version < BTREE_MIN_VERSION || + metad->btm_version > BTREE_VERSION) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("version mismatch in index \"%s\": file version %d, code version %d", + errmsg("version mismatch in index \"%s\": file version %d, " + "current version %d, minimal supported version %d", RelationGetRelationName(state->rel), - metad->btm_version, BTREE_VERSION))); + metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION))); } /* diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out index 67b103add3..dc9b637b2f 100644 --- a/contrib/pageinspect/expected/btree.out +++ b/contrib/pageinspect/expected/btree.out @@ -5,7 +5,7 @@ CREATE INDEX test1_a_idx ON test1 USING btree (a); SELECT * FROM bt_metap('test1_a_idx'); -[ RECORD 1 ]----- magic | 340322 -version | 2 +version | 3 root | 1 level | 0 fastroot | 1 diff --git a/contrib/pgstattuple/expected/pgstattuple.out b/contrib/pgstattuple/expected/pgstattuple.out index 20b5585d03..a7087f6d45 100644 --- a/contrib/pgstattuple/expected/pgstattuple.out +++ b/contrib/pgstattuple/expected/pgstattuple.out @@ -48,7 +48,7 @@ select version, tree_level, from pgstatindex('test_pkey'); version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation ---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+-------------------- - 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN + 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN (1 row) select version, tree_level, @@ -58,7 +58,7 @@ select version, tree_level, from pgstatindex('test_pkey'::text); version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation ---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+-------------------- - 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN + 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN (1 row) select version, tree_level, @@ -68,7 +68,7 @@ select version, tree_level, from pgstatindex('test_pkey'::name); version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation ---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+-------------------- - 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN + 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN (1 row) select version, tree_level, @@ -78,7 +78,7 @@ select version, tree_level, from pgstatindex('test_pkey'::regclass); version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation ---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+-------------------- - 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN + 3 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN (1 row) select pg_relpages('test'); @@ -229,7 +229,7 @@ create index test_partition_hash_idx on test_partition using hash (a); select pgstatindex('test_partition_idx'); pgstatindex ------------------------------ - (2,0,8192,0,0,0,0,0,NaN,NaN) + (3,0,8192,0,0,0,0,0,NaN,NaN) (1 row) select pgstathashindex('test_partition_hash_idx'); diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 259a2d83b4..af0435f7b4 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1821,6 +1821,31 @@ include_dir 'conf.d' + + Index Vacuum + + + vacuum_cleanup_index_scale_factor (floating point) + + vacuum_cleanup_index_scale_factor configuration parameter + + + + + When no tuples were deleted from the heap, B-tree indexes might still + be scanned during VACUUM cleanup stage by two + reasons. The first reason is that B-tree index contains deleted pages + which can be recycled during cleanup. The second reason is that B-tree + index statistics is stalled. The criterion of stalled index statistics + is number of inserted tuples since previous statistics collection + is greater than vacuum_cleanup_index_scale_factor + fraction of total number of heap tuples. + + + + + + Background Writer diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 1fd21e12bd..ad30de2dc5 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -336,6 +336,21 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] + + B-tree indexes additionally accept this parameter: + + + + + vacuum_cleanup_index_scale_factor + + + Per-table value for . + + + + + GiST indexes additionally accept this parameter: diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 46276ceff1..85081ea95a 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -400,6 +400,15 @@ static relopt_real realRelOpts[] = }, 0, -1.0, DBL_MAX }, + { + { + "vacuum_cleanup_index_scale_factor", + "Number of tuple inserts prior to index cleanup as a fraction of reltuples.", + RELOPT_KIND_BTREE, + ShareUpdateExclusiveLock + }, + -1, 0.0, 100.0 + }, /* list terminator */ {{NULL}} }; @@ -1362,7 +1371,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) {"user_catalog_table", RELOPT_TYPE_BOOL, offsetof(StdRdOptions, user_catalog_table)}, {"parallel_workers", RELOPT_TYPE_INT, - offsetof(StdRdOptions, parallel_workers)} + offsetof(StdRdOptions, parallel_workers)}, + {"vacuum_cleanup_index_scale_factor", RELOPT_TYPE_REAL, + offsetof(StdRdOptions, vacuum_cleanup_index_scale_factor)} }; options = parseRelOptions(reloptions, validate, kind, &numoptions); diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 2fe9867353..99310e19bb 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -848,6 +848,9 @@ _bt_insertonpg(Relation rel, if (BufferIsValid(metabuf)) { + /* upgrade meta-page if needed */ + if (metad->btm_version < BTREE_VERSION) + _bt_upgrademetapage(metapg); metad->btm_fastroot = itup_blkno; metad->btm_fastlevel = lpageop->btpo.level; MarkBufferDirty(metabuf); @@ -897,6 +900,8 @@ _bt_insertonpg(Relation rel, xlmeta.level = metad->btm_level; xlmeta.fastroot = metad->btm_fastroot; xlmeta.fastlevel = metad->btm_fastlevel; + xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact; + xlmeta.last_cleanup_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples; XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD); XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata)); @@ -1949,6 +1954,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); + /* upgrade metapage if needed */ + if (metad->btm_version < BTREE_VERSION) + _bt_upgrademetapage(metapg); + /* * Create downlink item for left page (old root). Since this will be the * first item in a non-leaf page, it implicitly has minus-infinity key @@ -2038,6 +2047,8 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) md.level = metad->btm_level; md.fastroot = rootblknum; md.fastlevel = metad->btm_level; + md.oldest_btpo_xact = metad->btm_oldest_btpo_xact; + md.last_cleanup_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples; XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata)); diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 92afe2de38..8ae3c84bec 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -60,6 +60,8 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level) metad->btm_level = level; metad->btm_fastroot = rootbknum; metad->btm_fastlevel = level; + metad->btm_oldest_btpo_xact = InvalidTransactionId; + metad->btm_last_cleanup_num_heap_tuples = -1.0; metaopaque = (BTPageOpaque) PageGetSpecialPointer(page); metaopaque->btpo_flags = BTP_META; @@ -73,6 +75,113 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level) ((char *) metad + sizeof(BTMetaPageData)) - (char *) page; } +/* + * _bt_upgrademetapage() -- Upgrade a meta-page from an old format to the new. + * + * This routine does purely in-memory image upgrade. Caller is + * responsible for locking, WAL-logging etc. + */ +void +_bt_upgrademetapage(Page page) +{ + BTMetaPageData *metad; + BTPageOpaque metaopaque; + + metad = BTPageGetMeta(page); + metaopaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* It must be really a meta page of upgradable version */ + Assert(metaopaque->btpo_flags & BTP_META); + Assert(metad->btm_version < BTREE_VERSION); + Assert(metad->btm_version >= BTREE_MIN_VERSION); + + /* Fill extra fields added into version 3 */ + metad->btm_oldest_btpo_xact = InvalidTransactionId; + metad->btm_last_cleanup_num_heap_tuples = -1.0; + + /* Adjust pd_lower (see _bt_initmetapage() for details) */ + ((PageHeader) page)->pd_lower = + ((char *) metad + sizeof(BTMetaPageData)) - (char *) page; +} + +/* + * _bt_update_meta_cleanup_info() -- Update cleanup-related information in + * the metapage. + * + * This routine checks if provided cleanup-related infromation is matching + * to those written in the metapage. On mismatch, metapage is overritten. + */ +void +_bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, + float8 numHeapTuples) +{ + Buffer metabuf; + Page metapg; + BTPageOpaque metaopaque; + BTMetaPageData *metad; + bool needsRewrite = false; + XLogRecPtr recptr; + + /* read the metapage and check if it needs rewrite */ + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + metapg = BufferGetPage(metabuf); + metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); + metad = BTPageGetMeta(metapg); + + /* outdated version of metapage always needs rewrite */ + if (metad->btm_version < BTREE_VERSION) + needsRewrite = true; + else if (metad->btm_oldest_btpo_xact != oldestBtpoXact || + metad->btm_last_cleanup_num_heap_tuples != numHeapTuples) + needsRewrite = true; + + if (!needsRewrite) + { + _bt_relbuf(rel, metabuf); + return; + } + + /* trade in our read lock for a write lock */ + LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); + LockBuffer(metabuf, BT_WRITE); + + START_CRIT_SECTION(); + + /* upgrade meta-page if needed */ + if (metad->btm_version < BTREE_VERSION) + _bt_upgrademetapage(metapg); + + /* update cleanup-related infromation */ + metad->btm_oldest_btpo_xact = oldestBtpoXact; + metad->btm_last_cleanup_num_heap_tuples = numHeapTuples; + MarkBufferDirty(metabuf); + + /* write wal record if needed */ + if (RelationNeedsWAL(rel)) + { + xl_btree_metadata md; + + XLogBeginInsert(); + XLogRegisterBuffer(0, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD); + + md.root = metad->btm_root; + md.level = metad->btm_level; + md.fastroot = metad->btm_fastroot; + md.fastlevel = metad->btm_fastlevel; + md.oldest_btpo_xact = oldestBtpoXact; + md.last_cleanup_num_heap_tuples = numHeapTuples; + + XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata)); + + recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_META_CLEANUP); + + PageSetLSN(metapg, recptr); + } + + END_CRIT_SECTION(); + _bt_relbuf(rel, metabuf); +} + /* * _bt_getroot() -- Get the root page of the btree. * @@ -124,7 +233,8 @@ _bt_getroot(Relation rel, int access) metad = (BTMetaPageData *) rel->rd_amcache; /* We shouldn't have cached it if any of these fail */ Assert(metad->btm_magic == BTREE_MAGIC); - Assert(metad->btm_version == BTREE_VERSION); + Assert(metad->btm_version >= BTREE_MIN_VERSION); + Assert(metad->btm_version <= BTREE_VERSION); Assert(metad->btm_root != P_NONE); rootblkno = metad->btm_fastroot; @@ -170,12 +280,14 @@ _bt_getroot(Relation rel, int access) errmsg("index \"%s\" is not a btree", RelationGetRelationName(rel)))); - if (metad->btm_version != BTREE_VERSION) + if (metad->btm_version < BTREE_MIN_VERSION || + metad->btm_version > BTREE_VERSION) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("version mismatch in index \"%s\": file version %d, code version %d", + errmsg("version mismatch in index \"%s\": file version %d, " + "current version %d, minimal supported version %d", RelationGetRelationName(rel), - metad->btm_version, BTREE_VERSION))); + metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION))); /* if no root page initialized yet, do it */ if (metad->btm_root == P_NONE) @@ -191,6 +303,10 @@ _bt_getroot(Relation rel, int access) LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); LockBuffer(metabuf, BT_WRITE); + /* upgrade metapage if needed */ + if (metad->btm_version < BTREE_VERSION) + _bt_upgrademetapage(metapg); + /* * Race condition: if someone else initialized the metadata between * the time we released the read lock and acquired the write lock, we @@ -229,6 +345,8 @@ _bt_getroot(Relation rel, int access) metad->btm_level = 0; metad->btm_fastroot = rootblkno; metad->btm_fastlevel = 0; + metad->btm_oldest_btpo_xact = InvalidTransactionId; + metad->btm_last_cleanup_num_heap_tuples = -1.0; MarkBufferDirty(rootbuf); MarkBufferDirty(metabuf); @@ -248,6 +366,8 @@ _bt_getroot(Relation rel, int access) md.level = 0; md.fastroot = rootblkno; md.fastlevel = 0; + md.oldest_btpo_xact = InvalidTransactionId; + md.last_cleanup_num_heap_tuples = -1.0; XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata)); @@ -373,12 +493,14 @@ _bt_gettrueroot(Relation rel) errmsg("index \"%s\" is not a btree", RelationGetRelationName(rel)))); - if (metad->btm_version != BTREE_VERSION) + if (metad->btm_version < BTREE_MIN_VERSION || + metad->btm_version > BTREE_VERSION) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("version mismatch in index \"%s\": file version %d, code version %d", + errmsg("version mismatch in index \"%s\": file version %d, " + "current version %d, minimal supported version %d", RelationGetRelationName(rel), - metad->btm_version, BTREE_VERSION))); + metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION))); /* if no root page initialized yet, fail */ if (metad->btm_root == P_NONE) @@ -460,12 +582,14 @@ _bt_getrootheight(Relation rel) errmsg("index \"%s\" is not a btree", RelationGetRelationName(rel)))); - if (metad->btm_version != BTREE_VERSION) + if (metad->btm_version < BTREE_MIN_VERSION || + metad->btm_version > BTREE_VERSION) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("version mismatch in index \"%s\": file version %d, code version %d", + errmsg("version mismatch in index \"%s\": file version %d, " + "current version %d, minimal supported version %d", RelationGetRelationName(rel), - metad->btm_version, BTREE_VERSION))); + metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION))); /* * If there's no root page yet, _bt_getroot() doesn't expect a cache @@ -1784,6 +1908,9 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) /* And update the metapage, if needed */ if (BufferIsValid(metabuf)) { + /* upgrade metapage if needed */ + if (metad->btm_version < BTREE_VERSION) + _bt_upgrademetapage(metapg); metad->btm_fastroot = rightsib; metad->btm_fastlevel = targetlevel; MarkBufferDirty(metabuf); @@ -1834,6 +1961,8 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) xlmeta.level = metad->btm_level; xlmeta.fastroot = metad->btm_fastroot; xlmeta.fastlevel = metad->btm_fastlevel; + xlmeta.oldest_btpo_xact = metad->btm_oldest_btpo_xact; + xlmeta.last_cleanup_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples; XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata)); xlinfo = XLOG_BTREE_UNLINK_PAGE_META; diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 8158508d8c..750827a77b 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -19,11 +19,14 @@ #include "postgres.h" #include "access/nbtree.h" +#include "access/nbtxlog.h" #include "access/relscan.h" #include "access/xlog.h" #include "commands/vacuum.h" +#include "miscadmin.h" #include "nodes/execnodes.h" #include "pgstat.h" +#include "postmaster/autovacuum.h" #include "storage/condition_variable.h" #include "storage/indexfsm.h" #include "storage/ipc.h" @@ -45,6 +48,7 @@ typedef struct BlockNumber lastBlockVacuumed; /* highest blkno actually vacuumed */ BlockNumber lastBlockLocked; /* highest blkno we've cleanup-locked */ BlockNumber totFreePages; /* true total # of free pages */ + TransactionId oldestBtpoXact; MemoryContext pagedelcontext; } BTVacState; @@ -89,7 +93,7 @@ typedef struct BTParallelScanDescData *BTParallelScanDesc; static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, - BTCycleId cycleid); + BTCycleId cycleid, TransactionId *oldestBtpoXact); static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno); @@ -773,6 +777,67 @@ _bt_parallel_advance_array_keys(IndexScanDesc scan) SpinLockRelease(&btscan->btps_mutex); } +/* + * _bt_vacuum_needs_cleanup() -- Checks if index needs cleanup assuming that + * btbulkdelete() wasn't called. + */ +static bool +_bt_vacuum_needs_cleanup(IndexVacuumInfo *info) +{ + Buffer metabuf; + Page metapg; + BTPageOpaque metaopaque; + BTMetaPageData *metad; + bool result = false; + + metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ); + metapg = BufferGetPage(metabuf); + metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); + metad = BTPageGetMeta(metapg); + + /* + * Do cleanup if metapage needs upgrade, because we don't have + * cleanup-related meta-information yet. + */ + if (metad->btm_version < BTREE_VERSION) + return true; + + /* + * If oldest btpo.xact in the deleted pages is older than RecentGlobalXmin, + * then at least one deleted page can be recycled. + */ + if (TransactionIdIsValid(metad->btm_oldest_btpo_xact) && + TransactionIdPrecedes(metad->btm_oldest_btpo_xact, + RecentGlobalXmin)) + { + result = true; + } + else + { + StdRdOptions *relopts; + float8 cleanup_scale_factor; + + /* + * If table receives large enough amount of insertions and no cleanup + * was performed, then index might appear to have stalled statistics. + * In order to evade that, we perform cleanup when table receives + * vacuum_cleanup_index_scale_factor fractions of insertions. + */ + relopts = (StdRdOptions *) info->index->rd_options; + cleanup_scale_factor = (relopts && + relopts->vacuum_cleanup_index_scale_factor >= 0) + ? relopts->vacuum_cleanup_index_scale_factor + : vacuum_cleanup_index_scale_factor; + + if (cleanup_scale_factor < 0 || metad->btm_last_cleanup_num_heap_tuples < 0 || + info->num_heap_tuples > metad->btm_last_cleanup_num_heap_tuples * (1.0 + cleanup_scale_factor)) + result = true; + } + + _bt_relbuf(info->index, metabuf); + return result; +} + /* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells @@ -797,7 +862,13 @@ btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, { cycleid = _bt_start_vacuum(rel); - btvacuumscan(info, stats, callback, callback_state, cycleid); + /* + * Erase cleanup-related meta-information in the index, because cleanup + * scan wouldn't occur during this VACUUM cycle. + */ + _bt_update_meta_cleanup_info(info->index, InvalidTransactionId, -1.0); + + btvacuumscan(info, stats, callback, callback_state, cycleid, NULL); } PG_END_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel)); _bt_end_vacuum(rel); @@ -819,17 +890,28 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) /* * If btbulkdelete was called, we need not do anything, just return the - * stats from the latest btbulkdelete call. If it wasn't called, we must - * still do a pass over the index, to recycle any newly-recyclable pages - * and to obtain index statistics. + * stats from the latest btbulkdelete call. If it wasn't called, we might + * still need to do a pass over the index, to recycle any newly-recyclable + * pages and to obtain index statistics. _bt_vacuum_needs_cleanup checks + * is there are newly-recyclable or stalled index statistics. * * Since we aren't going to actually delete any leaf items, there's no * need to go through all the vacuum-cycle-ID pushups. */ if (stats == NULL) { + TransactionId oldestBtpoXact; + + /* Check if we need a cleanup */ + if (!_bt_vacuum_needs_cleanup(info)) + return NULL; + stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); - btvacuumscan(info, stats, NULL, NULL, 0); + btvacuumscan(info, stats, NULL, NULL, 0, &oldestBtpoXact); + + /* Update cleanup-related information in the metapage */ + _bt_update_meta_cleanup_info(info->index, oldestBtpoXact, + info->num_heap_tuples); } /* Finally, vacuum the FSM */ @@ -865,7 +947,7 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, - BTCycleId cycleid) + BTCycleId cycleid, TransactionId *oldestBtpoXact) { Relation rel = info->index; BTVacState vstate; @@ -890,6 +972,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */ vstate.lastBlockLocked = BTREE_METAPAGE; vstate.totFreePages = 0; + vstate.oldestBtpoXact = InvalidTransactionId; /* Create a temporary memory context to run _bt_pagedel in */ vstate.pagedelcontext = AllocSetContextCreate(CurrentMemoryContext, @@ -979,6 +1062,8 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, /* update statistics */ stats->num_pages = num_pages; stats->pages_free = vstate.totFreePages; + if (oldestBtpoXact) + *oldestBtpoXact = vstate.oldestBtpoXact; } /* @@ -1058,6 +1143,9 @@ restart: { /* Already deleted, but can't recycle yet */ stats->pages_deleted++; + if (!TransactionIdIsValid(vstate->oldestBtpoXact) || + TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact)) + vstate->oldestBtpoXact = opaque->btpo.xact; } else if (P_ISHALFDEAD(opaque)) { @@ -1226,7 +1314,12 @@ restart: /* count only this page, else may double-count parent */ if (ndel) + { stats->pages_deleted++; + if (!TransactionIdIsValid(vstate->oldestBtpoXact) || + TransactionIdPrecedes(opaque->btpo.xact, vstate->oldestBtpoXact)) + vstate->oldestBtpoXact = opaque->btpo.xact; + } MemoryContextSwitchTo(oldcontext); /* pagedel released buffer, so we shouldn't */ diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index 233c3965d9..b565bcb540 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -108,6 +108,8 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id) md->btm_level = xlrec->level; md->btm_fastroot = xlrec->fastroot; md->btm_fastlevel = xlrec->fastlevel; + md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact; + md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples; pageop = (BTPageOpaque) PageGetSpecialPointer(metapg); pageop->btpo_flags = BTP_META; @@ -985,7 +987,6 @@ btree_xlog_reuse_page(XLogReaderState *record) } } - void btree_redo(XLogReaderState *record) { @@ -1027,6 +1028,9 @@ btree_redo(XLogReaderState *record) case XLOG_BTREE_REUSE_PAGE: btree_xlog_reuse_page(record); break; + case XLOG_BTREE_META_CLEANUP: + _bt_restore_meta(record, 0); + break; default: elog(PANIC, "btree_redo: unknown op code %u", info); } diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 446040d816..c1f0441b08 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -138,3 +138,5 @@ int VacuumPageDirty = 0; int VacuumCostBalance = 0; /* working state for vacuum */ bool VacuumCostActive = false; + +double vacuum_cleanup_index_scale_factor; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0c4cc9160d..2a01b025f9 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3083,6 +3083,16 @@ static struct config_real ConfigureNamesReal[] = NULL, NULL, NULL }, + { + {"vacuum_cleanup_index_scale_factor", PGC_SIGHUP, AUTOVACUUM, + gettext_noop("Number of tuple inserts prior to index cleanup as a fraction of reltuples."), + NULL + }, + &vacuum_cleanup_index_scale_factor, + 0.1, 0.0, 100.0, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 2b0b1da763..41e838a434 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -102,6 +102,10 @@ typedef struct BTMetaPageData uint32 btm_level; /* tree level of the root page */ BlockNumber btm_fastroot; /* current "fast" root location */ uint32 btm_fastlevel; /* tree level of the "fast" root page */ + /* following fields are available since page version 3 */ + TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among of deleted pages */ + double btm_last_cleanup_num_heap_tuples; /* number of heap tuples + * during last cleanup */ } BTMetaPageData; #define BTPageGetMeta(p) \ @@ -109,7 +113,8 @@ typedef struct BTMetaPageData #define BTREE_METAPAGE 0 /* first page is meta */ #define BTREE_MAGIC 0x053162 /* magic number of btree pages */ -#define BTREE_VERSION 2 /* current version number */ +#define BTREE_VERSION 3 /* current version number */ +#define BTREE_MIN_VERSION 2 /* minimal supported version number */ /* * Maximum size of a btree index entry, including its tuple header. @@ -481,6 +486,9 @@ extern void _bt_finish_split(Relation rel, Buffer bbuf, BTStack stack); * prototypes for functions in nbtpage.c */ extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level); +extern void _bt_update_meta_cleanup_info(Relation rel, + TransactionId oldestBtpoXact, float8 numHeapTuples); +extern void _bt_upgrademetapage(Page page); extern Buffer _bt_getroot(Relation rel, int access); extern Buffer _bt_gettrueroot(Relation rel); extern int _bt_getrootheight(Relation rel); diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h index 8297df75fe..a8ccdcec42 100644 --- a/src/include/access/nbtxlog.h +++ b/src/include/access/nbtxlog.h @@ -38,6 +38,8 @@ * vacuum */ #define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from * FSM */ +#define XLOG_BTREE_META_CLEANUP 0xE0 /* update cleanup-related data in the + * metapage */ /* * All that we need to regenerate the meta-data page @@ -48,6 +50,8 @@ typedef struct xl_btree_metadata uint32 level; BlockNumber fastroot; uint32 fastlevel; + TransactionId oldest_btpo_xact; + double last_cleanup_num_heap_tuples; } xl_btree_metadata; /* diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index a4574cd533..a429a19964 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -256,6 +256,8 @@ extern int VacuumPageDirty; extern int VacuumCostBalance; extern bool VacuumCostActive; +extern double vacuum_cleanup_index_scale_factor; + /* in tcop/postgres.c */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index aa8add544a..0a775b61a0 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -277,6 +277,8 @@ typedef struct StdRdOptions { int32 vl_len_; /* varlena header (do not touch directly!) */ int fillfactor; /* page fill factor in percent (0..100) */ + /* fraction of newly inserted tuples prior to trigger index cleanup */ + float8 vacuum_cleanup_index_scale_factor; int toast_tuple_target; /* target for tuple toasting */ AutoVacOpts autovacuum; /* autovacuum-related options */ bool user_catalog_table; /* use as an additional catalog relation */ diff --git a/src/test/regress/expected/reloptions.out b/src/test/regress/expected/reloptions.out index df3c99d1eb..667d5e76d1 100644 --- a/src/test/regress/expected/reloptions.out +++ b/src/test/regress/expected/reloptions.out @@ -1,12 +1,12 @@ -- Simple create CREATE TABLE reloptions_test(i INT) WITH (FiLLFaCToR=30, - autovacuum_enabled = false, autovacuum_analyze_scale_factor = 0.2); + autovacuum_enabled = false, autovacuum_analyze_scale_factor = 0.2, + vacuum_cleanup_index_scale_factor = 0.5); +ERROR: unrecognized parameter "vacuum_cleanup_index_scale_factor" SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass; - reloptions ------------------------------------------------------------------------------- - {fillfactor=30,autovacuum_enabled=false,autovacuum_analyze_scale_factor=0.2} -(1 row) - +ERROR: relation "reloptions_test" does not exist +LINE 1: SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test... + ^ -- Fail min/max values check CREATE TABLE reloptions_test2(i INT) WITH (fillfactor=2); ERROR: value 2 out of bounds for option "fillfactor" @@ -20,6 +20,10 @@ DETAIL: Valid values are between "0.000000" and "100.000000". CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_analyze_scale_factor = 110.0); ERROR: value 110.0 out of bounds for option "autovacuum_analyze_scale_factor" DETAIL: Valid values are between "0.000000" and "100.000000". +CREATE TABLE reloptions_test2(i INT) WITH (vacuum_cleanup_index_scale_factor = -10.0); +ERROR: unrecognized parameter "vacuum_cleanup_index_scale_factor" +CREATE TABLE reloptions_test2(i INT) WITH (vacuum_cleanup_index_scale_factor = 110.0); +ERROR: unrecognized parameter "vacuum_cleanup_index_scale_factor" -- Fail when option and namespace do not exist CREATE TABLE reloptions_test2(i INT) WITH (not_existing_option=2); ERROR: unrecognized parameter "not_existing_option" @@ -42,6 +46,10 @@ CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_analyze_scale_factor='stri ERROR: invalid value for floating point option "autovacuum_analyze_scale_factor": string CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_analyze_scale_factor=true); ERROR: invalid value for floating point option "autovacuum_analyze_scale_factor": true +CREATE TABLE reloptions_test2(i INT) WITH (vacuum_cleanup_index_scale_factor='string'); +ERROR: unrecognized parameter "vacuum_cleanup_index_scale_factor" +CREATE TABLE reloptions_test2(i INT) WITH (vacuum_cleanup_index_scale_factor=true); +ERROR: unrecognized parameter "vacuum_cleanup_index_scale_factor" -- Fail if option is specified twice CREATE TABLE reloptions_test2(i INT) WITH (fillfactor=30, fillfactor=40); ERROR: parameter "fillfactor" specified more than once @@ -50,44 +58,42 @@ CREATE TABLE reloptions_test2(i INT) WITH (fillfactor); ERROR: invalid value for integer option "fillfactor": true -- Simple ALTER TABLE ALTER TABLE reloptions_test SET (fillfactor=31, - autovacuum_analyze_scale_factor = 0.3); + autovacuum_analyze_scale_factor = 0.3, + vacuum_cleanup_index_scale_factor = 0.3); +ERROR: relation "reloptions_test" does not exist SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass; - reloptions ------------------------------------------------------------------------------- - {autovacuum_enabled=false,fillfactor=31,autovacuum_analyze_scale_factor=0.3} -(1 row) - +ERROR: relation "reloptions_test" does not exist +LINE 1: SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test... + ^ -- Set boolean option to true without specifying value ALTER TABLE reloptions_test SET (autovacuum_enabled, fillfactor=32); +ERROR: relation "reloptions_test" does not exist SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass; - reloptions ------------------------------------------------------------------------------ - {autovacuum_analyze_scale_factor=0.3,autovacuum_enabled=true,fillfactor=32} -(1 row) - +ERROR: relation "reloptions_test" does not exist +LINE 1: SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test... + ^ -- Check that RESET works well ALTER TABLE reloptions_test RESET (fillfactor); +ERROR: relation "reloptions_test" does not exist SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass; - reloptions ---------------------------------------------------------------- - {autovacuum_analyze_scale_factor=0.3,autovacuum_enabled=true} -(1 row) - +ERROR: relation "reloptions_test" does not exist +LINE 1: SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test... + ^ -- Resetting all values causes the column to become null ALTER TABLE reloptions_test RESET (autovacuum_enabled, - autovacuum_analyze_scale_factor); + autovacuum_analyze_scale_factor, vacuum_cleanup_index_scale_factor); +ERROR: relation "reloptions_test" does not exist SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass AND reloptions IS NULL; - reloptions ------------- - -(1 row) - +ERROR: relation "reloptions_test" does not exist +LINE 1: SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test... + ^ -- RESET fails if a value is specified ALTER TABLE reloptions_test RESET (fillfactor=12); -ERROR: RESET must not include values for parameters +ERROR: relation "reloptions_test" does not exist -- The OIDS option is not stored as reloption DROP TABLE reloptions_test; +ERROR: table "reloptions_test" does not exist CREATE TABLE reloptions_test(i INT) WITH (fillfactor=20, oids=true); SELECT reloptions, relhasoids FROM pg_class WHERE oid = 'reloptions_test'::regclass; reloptions | relhasoids diff --git a/src/test/regress/sql/reloptions.sql b/src/test/regress/sql/reloptions.sql index 37fbf41f7d..c12325389f 100644 --- a/src/test/regress/sql/reloptions.sql +++ b/src/test/regress/sql/reloptions.sql @@ -1,7 +1,8 @@ -- Simple create CREATE TABLE reloptions_test(i INT) WITH (FiLLFaCToR=30, - autovacuum_enabled = false, autovacuum_analyze_scale_factor = 0.2); + autovacuum_enabled = false, autovacuum_analyze_scale_factor = 0.2, + vacuum_cleanup_index_scale_factor = 0.5); SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass; -- Fail min/max values check @@ -9,6 +10,8 @@ CREATE TABLE reloptions_test2(i INT) WITH (fillfactor=2); CREATE TABLE reloptions_test2(i INT) WITH (fillfactor=110); CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_analyze_scale_factor = -10.0); CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_analyze_scale_factor = 110.0); +CREATE TABLE reloptions_test2(i INT) WITH (vacuum_cleanup_index_scale_factor = -10.0); +CREATE TABLE reloptions_test2(i INT) WITH (vacuum_cleanup_index_scale_factor = 110.0); -- Fail when option and namespace do not exist CREATE TABLE reloptions_test2(i INT) WITH (not_existing_option=2); @@ -23,6 +26,8 @@ CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_enabled=30.5); CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_enabled='string'); CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_analyze_scale_factor='string'); CREATE TABLE reloptions_test2(i INT) WITH (autovacuum_analyze_scale_factor=true); +CREATE TABLE reloptions_test2(i INT) WITH (vacuum_cleanup_index_scale_factor='string'); +CREATE TABLE reloptions_test2(i INT) WITH (vacuum_cleanup_index_scale_factor=true); -- Fail if option is specified twice CREATE TABLE reloptions_test2(i INT) WITH (fillfactor=30, fillfactor=40); @@ -32,7 +37,8 @@ CREATE TABLE reloptions_test2(i INT) WITH (fillfactor); -- Simple ALTER TABLE ALTER TABLE reloptions_test SET (fillfactor=31, - autovacuum_analyze_scale_factor = 0.3); + autovacuum_analyze_scale_factor = 0.3, + vacuum_cleanup_index_scale_factor = 0.3); SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass; -- Set boolean option to true without specifying value @@ -45,7 +51,7 @@ SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass; -- Resetting all values causes the column to become null ALTER TABLE reloptions_test RESET (autovacuum_enabled, - autovacuum_analyze_scale_factor); + autovacuum_analyze_scale_factor, vacuum_cleanup_index_scale_factor); SELECT reloptions FROM pg_class WHERE oid = 'reloptions_test'::regclass AND reloptions IS NULL;