From d75cfac7a5ca380ad86569a65b678390b9768d4e Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Wed, 3 Apr 2024 16:28:46 -0400 Subject: [PATCH v7 2/2] some ideas --- src/backend/access/heap/heapam_handler.c | 57 +++++------------------- src/backend/commands/analyze.c | 21 +++++---- src/backend/utils/misc/sampling.c | 9 +--- src/include/access/heapam.h | 4 +- src/include/utils/sampling.h | 1 - 5 files changed, 27 insertions(+), 65 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index d83fbbe6af3..aab392f50e5 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -1053,40 +1053,6 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, pfree(isnull); } -/* - * Prepare to analyze block returned from streaming object. If the block returned - * from streaming object is valid, true is returned; otherwise false is returned. - * The scan has been started with SO_TYPE_ANALYZE option. - * - * This routine holds a buffer pin and lock on the heap page. They are held - * until heapam_scan_analyze_next_tuple() returns false. That is until all the - * items of the heap page are analyzed. - */ -bool -heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream) -{ - HeapScanDesc hscan = (HeapScanDesc) scan; - - /* - * We must maintain a pin on the target page's buffer to ensure that - * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from - * under us. Hence, pin the page until we are done looking at it. We - * also choose to hold sharelock on the buffer throughout --- we could - * release and re-acquire sharelock for each tuple, but since we aren't - * doing much work per tuple, the extra lock traffic is probably better - * avoided. - */ - hscan->rs_cbuf = read_stream_next_buffer(stream, NULL); - if (hscan->rs_cbuf == InvalidBuffer) - return false; - - LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - - hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf); - hscan->rs_cindex = FirstOffsetNumber; - return true; -} - /* * Iterate over tuples in the block selected with * heapam_scan_analyze_next_block(). If a tuple that's suitable for sampling @@ -1098,11 +1064,10 @@ heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream) * tuples. */ bool -heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, +heapam_scan_analyze_next_tuple(HeapScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) { - HeapScanDesc hscan = (HeapScanDesc) scan; Page targpage; OffsetNumber maxoffset; BufferHeapTupleTableSlot *hslot; @@ -1110,17 +1075,17 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, Assert(TTS_IS_BUFFERTUPLE(slot)); hslot = (BufferHeapTupleTableSlot *) slot; - targpage = BufferGetPage(hscan->rs_cbuf); + targpage = BufferGetPage(scan->rs_cbuf); maxoffset = PageGetMaxOffsetNumber(targpage); /* Inner loop over all tuples on the selected page */ - for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++) + for (; scan->rs_cindex <= maxoffset; scan->rs_cindex++) { ItemId itemid; HeapTuple targtuple = &hslot->base.tupdata; bool sample_it = false; - itemid = PageGetItemId(targpage, hscan->rs_cindex); + itemid = PageGetItemId(targpage, scan->rs_cindex); /* * We ignore unused and redirect line pointers. DEAD line pointers @@ -1135,14 +1100,14 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, continue; } - ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex); + ItemPointerSet(&targtuple->t_self, scan->rs_cblock, scan->rs_cindex); - targtuple->t_tableOid = RelationGetRelid(scan->rs_rd); + targtuple->t_tableOid = RelationGetRelid(scan->rs_base.rs_rd); targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); targtuple->t_len = ItemIdGetLength(itemid); switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, - hscan->rs_cbuf)) + scan->rs_cbuf)) { case HEAPTUPLE_LIVE: sample_it = true; @@ -1222,8 +1187,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, if (sample_it) { - ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf); - hscan->rs_cindex++; + ExecStoreBufferHeapTuple(targtuple, slot, scan->rs_cbuf); + scan->rs_cindex++; /* note that we leave the buffer locked here! */ return true; @@ -1231,8 +1196,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, } /* Now release the lock and pin on the page */ - UnlockReleaseBuffer(hscan->rs_cbuf); - hscan->rs_cbuf = InvalidBuffer; + UnlockReleaseBuffer(scan->rs_cbuf); + scan->rs_cbuf = InvalidBuffer; /* also prevent old slot contents from having pin on page */ ExecClearTuple(slot); diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 764520d5aa2..7002ece8be8 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1111,9 +1111,7 @@ block_sampling_streaming_read_next(ReadStream *stream, void *user_data, void *per_buffer_data) { - BlockSamplerData *bs = user_data; - - return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : InvalidBlockNumber; + return BlockSampler_Next(user_data); } /* @@ -1165,7 +1163,7 @@ acquire_sample_rows(Relation onerel, int elevel, BlockSamplerData bs; ReservoirStateData rstate; TupleTableSlot *slot; - TableScanDesc scan; + HeapScanDesc scan; BlockNumber nblocks; BlockNumber blksdone = 0; ReadStream *stream; @@ -1188,12 +1186,12 @@ acquire_sample_rows(Relation onerel, int elevel, /* Prepare for sampling rows */ reservoir_init_selection_state(&rstate, targrows); - scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE); + scan = (HeapScanDesc) heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE); slot = table_slot_create(onerel, NULL); stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE, vac_strategy, - scan->rs_rd, + onerel, MAIN_FORKNUM, block_sampling_streaming_read_next, &bs, @@ -1204,9 +1202,16 @@ acquire_sample_rows(Relation onerel, int elevel, { vacuum_delay_point(); - if (!heapam_scan_analyze_next_block(scan, stream)) + scan->rs_cbuf = read_stream_next_buffer(stream, NULL); + + if (!BufferIsValid(scan->rs_cbuf)) break; + LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); + + scan->rs_cblock = BufferGetBlockNumber(scan->rs_cbuf); + scan->rs_cindex = FirstOffsetNumber; + while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) { /* @@ -1258,7 +1263,7 @@ acquire_sample_rows(Relation onerel, int elevel, read_stream_end(stream); ExecDropSingleTupleTableSlot(slot); - heap_endscan(scan); + heap_endscan(&scan->rs_base); /* * If we didn't find as many tuples as we wanted then we're done. No sort diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c index 933db06702c..69b73f5b115 100644 --- a/src/backend/utils/misc/sampling.c +++ b/src/backend/utils/misc/sampling.c @@ -54,12 +54,6 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize, return Min(bs->n, bs->N); } -bool -BlockSampler_HasMore(BlockSampler bs) -{ - return (bs->t < bs->N) && (bs->m < bs->n); -} - BlockNumber BlockSampler_Next(BlockSampler bs) { @@ -68,7 +62,8 @@ BlockSampler_Next(BlockSampler bs) double p; /* probability to skip block */ double V; /* random */ - Assert(BlockSampler_HasMore(bs)); /* hence K > 0 and k > 0 */ + if (bs->t >= bs->N || bs->m >= bs->n) + return InvalidBlockNumber; if ((BlockNumber) k >= K) { diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 633caee9d95..f76dd18ad5d 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -389,9 +389,7 @@ extern bool HeapTupleIsSurelyDead(HeapTuple htup, struct GlobalVisState *vistest); /* in heap/heapam_handler.c*/ -extern bool heapam_scan_analyze_next_block(TableScanDesc scan, - ReadStream *stream); -extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan, +extern bool heapam_scan_analyze_next_tuple(HeapScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot); diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h index be48ee52bac..fb5d6820a24 100644 --- a/src/include/utils/sampling.h +++ b/src/include/utils/sampling.h @@ -38,7 +38,6 @@ typedef BlockSamplerData *BlockSampler; extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize, uint32 randseed); -extern bool BlockSampler_HasMore(BlockSampler bs); extern BlockNumber BlockSampler_Next(BlockSampler bs); /* Reservoir sampling methods */ -- 2.40.1