diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index c3a8b1d..9e06334 100644 --- a/contrib/pgstattuple/pgstattuple.c +++ b/contrib/pgstattuple/pgstattuple.c @@ -276,7 +276,7 @@ pgstat_heap(Relation rel, FunctionCallInfo fcinfo) SnapshotData SnapshotDirty; /* Disable syncscan because we assume we scan from block zero upwards */ - scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); + scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false, false); InitDirtySnapshot(SnapshotDirty); nblocks = scan->rs_nblocks; /* # blocks to be scanned */ diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index df4853b..164339f 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -80,7 +80,8 @@ static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, bool allow_strat, bool allow_sync, - bool is_bitmapscan, bool temp_snap); + bool is_bitmapscan, bool temp_snap, + bool always_prune); static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, @@ -349,9 +350,15 @@ heapgetpage(HeapScanDesc scan, BlockNumber page) snapshot = scan->rs_snapshot; /* - * Prune and repair fragmentation for the whole page, if possible. + * Prune and repair fragmentation for the whole page, if possible and + * within limits, if any. */ - heap_page_prune_opt(scan->rs_rd, buffer); + if (scan->rs_prune_count < PRUNE_LIMIT_PER_SCAN || + scan->rs_always_prune) + { + if (heap_page_prune_opt(scan->rs_rd, buffer) && !scan->rs_always_prune) + scan->rs_prune_count++; + } /* * We must hold share lock on the buffer content while examining tuple @@ -1286,7 +1293,8 @@ heap_openrv_extended(const RangeVar *relation, LOCKMODE lockmode, * heap_beginscan_strat offers an extended API that lets the caller control * whether a nondefault buffer access strategy can be used, and whether * syncscan can be chosen (possibly resulting in the scan not starting from - * block zero). Both of these default to TRUE with plain heap_beginscan. + * block zero). Both of these default to TRUE with plain heap_beginscan, + * while always_prune defaults to FALSE for regular scans. * * heap_beginscan_bm is an alternative entry point for setting up a * HeapScanDesc for a bitmap heap scan. Although that scan technology is @@ -1299,7 +1307,7 @@ heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key) { return heap_beginscan_internal(relation, snapshot, nkeys, key, - true, true, false, false); + true, true, false, false, false); } HeapScanDesc @@ -1309,31 +1317,33 @@ heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key) Snapshot snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); return heap_beginscan_internal(relation, snapshot, nkeys, key, - true, true, false, true); + true, true, false, true, true); } HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_sync) + bool allow_strat, bool allow_sync, bool always_prune) { return heap_beginscan_internal(relation, snapshot, nkeys, key, - allow_strat, allow_sync, false, false); + allow_strat, allow_sync, false, false, + always_prune); } HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key) + int nkeys, ScanKey key, bool always_prune) { return heap_beginscan_internal(relation, snapshot, nkeys, key, - false, false, true, false); + false, false, true, false, always_prune); } static HeapScanDesc heap_beginscan_internal(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, bool allow_strat, bool allow_sync, - bool is_bitmapscan, bool temp_snap) + bool is_bitmapscan, bool temp_snap, + bool always_prune) { HeapScanDesc scan; @@ -1360,6 +1370,9 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot, scan->rs_allow_sync = allow_sync; scan->rs_temp_snap = temp_snap; + scan->rs_prune_count = 0; + scan->rs_always_prune = always_prune; + /* * we can use page-at-a-time mode if it's an MVCC-safe snapshot */ diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 2c09128..8538e5a 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -70,8 +70,10 @@ static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum); * * OldestXmin is the cutoff XID used to distinguish whether tuples are DEAD * or RECENTLY_DEAD (see HeapTupleSatisfiesVacuum). + * + * Returns true if the block was cleaned. */ -void +bool heap_page_prune_opt(Relation relation, Buffer buffer) { Page page = BufferGetPage(buffer); @@ -84,7 +86,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * anyway, so this is no particular loss. */ if (RecoveryInProgress()) - return; + return false; /* * Use the appropriate xmin horizon for this relation. If it's a proper @@ -108,7 +110,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * older than OldestXmin. */ if (!PageIsPrunable(page, OldestXmin)) - return; + return false; /* * We prune when a previous UPDATE failed to find enough space on the page @@ -130,7 +132,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) { /* OK, try to get exclusive buffer lock */ if (!ConditionalLockBufferForCleanup(buffer)) - return; + return false; /* * Now that we have buffer lock, get accurate information about the @@ -149,7 +151,11 @@ heap_page_prune_opt(Relation relation, Buffer buffer) /* And release buffer lock */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + + return true; } + + return false; } diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 8849c08..85fa12c 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -318,7 +318,7 @@ systable_beginscan(Relation heapRelation, */ sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, nkeys, key, - true, false); + true, false, true); sysscan->iscan = NULL; } diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 53cf96f..a5c1c19 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -249,10 +249,41 @@ index_beginscan(Relation heapRelation, /* * Save additional parameters into the scandesc. Everything else was set + * up by RelationGetIndexScan. Disable pruning in all cases. + */ + scan->heapRelation = heapRelation; + scan->xs_snapshot = snapshot; + scan->xs_always_prune = true; + scan->xs_prune_count = 0; + + return scan; +} + +/* + * index_beginscan_strat - start a scan of an index with amgettuple + * allowing caller to specify additional scan strategies. + * + * Caller must be holding suitable locks on the heap and the index. + */ +IndexScanDesc +index_beginscan_strat(Relation heapRelation, + Relation indexRelation, + Snapshot snapshot, + int nkeys, int norderbys, + bool always_prune) +{ + IndexScanDesc scan; + + scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot); + + /* + * Save additional parameters into the scandesc. Everything else was set * up by RelationGetIndexScan. */ scan->heapRelation = heapRelation; scan->xs_snapshot = snapshot; + scan->xs_always_prune = always_prune; + scan->xs_prune_count = 0; return scan; } @@ -278,6 +309,10 @@ index_beginscan_bitmap(Relation indexRelation, */ scan->xs_snapshot = snapshot; + /* BitmapHeapScan does pruning if required */ + scan->xs_always_prune = false; + scan->xs_prune_count = 0; + return scan; } @@ -520,10 +555,15 @@ index_fetch_heap(IndexScanDesc scan) ItemPointerGetBlockNumber(tid)); /* - * Prune page, but only if we weren't already on this page + * Prune page if enabled, but only if we weren't already on this page */ - if (prev_buf != scan->xs_cbuf) - heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf); + if (prev_buf != scan->xs_cbuf && + (scan->xs_prune_count < PRUNE_LIMIT_PER_SCAN || + scan->xs_always_prune)) + { + if (heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf) && !scan->xs_always_prune) + scan->xs_prune_count++; + } } /* Obtain share-lock on the buffer so we can examine visibility */ diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 844d413..504aaab 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2193,7 +2193,8 @@ IndexBuildHeapRangeScan(Relation heapRelation, 0, /* number of keys */ NULL, /* scan key */ true, /* buffer access strategy OK */ - allow_sync); /* syncscan OK? */ + allow_sync, /* syncscan OK? */ + false); /* pruning optional */ /* set our scan endpoints */ heap_setscanlimits(scan, start_blockno, numblocks); @@ -2574,7 +2575,8 @@ IndexCheckExclusion(Relation heapRelation, 0, /* number of keys */ NULL, /* scan key */ true, /* buffer access strategy OK */ - true); /* syncscan OK */ + true, /* syncscan OK */ + false); /* pruning optional*/ while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { @@ -2848,7 +2850,8 @@ validate_index_heapscan(Relation heapRelation, 0, /* number of keys */ NULL, /* scan key */ true, /* buffer access strategy OK */ - false); /* syncscan not OK */ + false, /* syncscan not OK */ + false); /* pruning optional */ /* * Scan all tuples matching the snapshot. diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 9b1e975..5d8e537 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -334,9 +334,15 @@ bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres) ntup = 0; /* - * Prune and repair fragmentation for the whole page, if possible. + * Prune and repair fragmentation for the whole page, if possible + * and within limits, if any. */ - heap_page_prune_opt(scan->rs_rd, buffer); + if (scan->rs_prune_count < PRUNE_LIMIT_PER_SCAN || + scan->rs_always_prune) + { + if (heap_page_prune_opt(scan->rs_rd, buffer) && !scan->rs_always_prune) + scan->rs_prune_count++; + } /* * We must hold share lock on the buffer content while examining tuple @@ -537,6 +543,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) { BitmapHeapScanState *scanstate; Relation currentRelation; + bool relistarget; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -597,6 +604,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); scanstate->ss.ss_currentRelation = currentRelation; + relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); /* * Even though we aren't going to do a conventional seqscan, it is useful @@ -605,7 +613,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation, estate->es_snapshot, 0, - NULL); + NULL, + relistarget); /* * get the scan type from the relation descriptor. diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index afcd1ff..b154823 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -523,11 +523,12 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) /* * Initialize scan descriptor. */ - indexstate->ioss_ScanDesc = index_beginscan(currentRelation, + indexstate->ioss_ScanDesc = index_beginscan_strat(currentRelation, indexstate->ioss_RelationDesc, estate->es_snapshot, indexstate->ioss_NumScanKeys, - indexstate->ioss_NumOrderByKeys); + indexstate->ioss_NumOrderByKeys, + relistarget); /* Set it up for index-only scan */ indexstate->ioss_ScanDesc->xs_want_itup = true; diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 2b89dc6..3512f50 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -603,11 +603,12 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) /* * Initialize scan descriptor. */ - indexstate->iss_ScanDesc = index_beginscan(currentRelation, + indexstate->iss_ScanDesc = index_beginscan_strat(currentRelation, indexstate->iss_RelationDesc, estate->es_snapshot, indexstate->iss_NumScanKeys, - indexstate->iss_NumOrderByKeys); + indexstate->iss_NumOrderByKeys, + relistarget); /* * If no run-time keys to calculate, go ahead and pass the scankeys to the diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 53cfda5..321820b 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -124,20 +124,30 @@ InitScanRelation(SeqScanState *node, EState *estate, int eflags) { Relation currentRelation; HeapScanDesc currentScanDesc; + Oid currentRelid; + bool relistarget; + + /* + * get the relation object id from the relid'th entry in the range table + */ + currentRelid = ((SeqScan *) node->ps.plan)->scanrelid, + relistarget = ExecRelationIsTargetRelation(estate, currentRelid); /* - * get the relation object id from the relid'th entry in the range table, * open that relation and acquire appropriate lock on it. */ currentRelation = ExecOpenScanRelation(estate, - ((SeqScan *) node->ps.plan)->scanrelid, + currentRelid, eflags); /* initialize a heapscan */ - currentScanDesc = heap_beginscan(currentRelation, + currentScanDesc = heap_beginscan_strat(currentRelation, estate->es_snapshot, 0, - NULL); + NULL, + true, + true, + relistarget); node->ss_currentRelation = currentRelation; node->ss_currentScanDesc = currentScanDesc; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index d99158f..bc3885d 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -135,6 +135,10 @@ extern IndexScanDesc index_beginscan(Relation heapRelation, Relation indexRelation, Snapshot snapshot, int nkeys, int norderbys); +extern IndexScanDesc index_beginscan_strat(Relation heapRelation, + Relation indexRelation, + Snapshot snapshot, + int nkeys, int norderbys, bool always_prune); extern IndexScanDesc index_beginscan_bitmap(Relation indexRelation, Snapshot snapshot, int nkeys); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index f43b482..7144b61 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -110,9 +110,9 @@ extern HeapScanDesc heap_beginscan_catalog(Relation relation, int nkeys, ScanKey key); extern HeapScanDesc heap_beginscan_strat(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, - bool allow_strat, bool allow_sync); + bool allow_strat, bool allow_sync, bool always_prune); extern HeapScanDesc heap_beginscan_bm(Relation relation, Snapshot snapshot, - int nkeys, ScanKey key); + int nkeys, ScanKey key, bool always_prune); extern void heap_setscanlimits(HeapScanDesc scan, BlockNumber startBlk, BlockNumber endBlk); extern void heap_rescan(HeapScanDesc scan, ScanKey key); @@ -164,7 +164,7 @@ extern void simple_heap_update(Relation relation, ItemPointer otid, extern void heap_sync(Relation relation); /* in heap/pruneheap.c */ -extern void heap_page_prune_opt(Relation relation, Buffer buffer); +extern bool heap_page_prune_opt(Relation relation, Buffer buffer); extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, bool report_stats, TransactionId *latestRemovedXid); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index f2c7ca1..c5b57f3 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -20,6 +20,7 @@ #include "access/itup.h" #include "access/tupdesc.h" +#define PRUNE_LIMIT_PER_SCAN 4 typedef struct HeapScanDescData { @@ -34,6 +35,9 @@ typedef struct HeapScanDescData bool rs_allow_sync; /* allow or disallow use of syncscan */ bool rs_temp_snap; /* unregister snapshot at scan end? */ + bool rs_always_prune;/* Unlimited pruning allowed */ + int rs_prune_count; /* Number of blocks pruned during this scan */ + /* state set up at initscan time */ BlockNumber rs_nblocks; /* total number of blocks in rel */ BlockNumber rs_startblock; /* block # to start at */ @@ -72,6 +76,9 @@ typedef struct IndexScanDescData ScanKey orderByData; /* array of ordering op descriptors */ bool xs_want_itup; /* caller requests index tuples */ + bool xs_always_prune;/* Unlimited pruning allowed */ + int xs_prune_count; /* Number of blocks pruned during this scan */ + /* signaling to index AM about killing index tuples */ bool kill_prior_tuple; /* last-returned tuple is dead */ bool ignore_killed_tuples; /* do not return killed entries */