From 2e58b0e93a0000f6c9fc162e0b986a9d04209bd5 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Tue, 11 Jun 2024 12:14:28 -0400 Subject: [PATCH v21 14/20] BitmapHeapScan: lift and shift prefetch functions This is a temporary commit. For ease of review, move the BitmapHeapScan prefetch functions into heapam_handler.c without modifying them. The next commit will push the prefetching code into heap specific code. Separating the commit which moves to the functions to heapam_handler.c and the one which modifies them and their callers to contain bitmap prefetch code in heap AM-specific code makes it easier to see what has changed. Doing so requires including heap code in nodeBitmapHeapscan.c, which is not okay for the final code that is committed. --- src/backend/access/heap/heapam_handler.c | 218 +++++++++++++++++++++ src/backend/executor/nodeBitmapHeapscan.c | 225 +--------------------- src/include/access/heapam.h | 8 + 3 files changed, 228 insertions(+), 223 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 936a34dcaa6..bf6a67467b3 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2112,6 +2112,224 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths, HEAP_USABLE_BYTES_PER_PAGE); } +/* + * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target + */ +void +BitmapPrefetch(BitmapHeapScanState *node, BitmapTableScanDesc *scan) +{ +#ifdef USE_PREFETCH + ParallelBitmapHeapState *pstate = node->pstate; + + if (pstate == NULL) + { + TBMIterator *prefetch_iterator = &node->prefetch_iterator; + + if (!prefetch_iterator->exhausted) + { + while (node->prefetch_pages < node->prefetch_target) + { + TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); + bool skip_fetch; + + if (tbmpre == NULL) + { + /* No more pages to prefetch */ + tbm_end_iterate(prefetch_iterator); + break; + } + node->prefetch_pages++; + node->pfblockno = tbmpre->blockno; + + /* + * If we expect not to have to actually read this heap page, + * skip this prefetch call, but continue to run the prefetch + * logic normally. (Would it be better not to increment + * prefetch_pages?) + */ + skip_fetch = (!(scan->flags & SO_NEED_TUPLES) && + !tbmpre->recheck && + VM_ALL_VISIBLE(node->ss.ss_currentRelation, + tbmpre->blockno, + &node->pvmbuffer)); + + if (!skip_fetch) + PrefetchBuffer(scan->rel, MAIN_FORKNUM, tbmpre->blockno); + } + } + + return; + } + + if (pstate->prefetch_pages < pstate->prefetch_target) + { + TBMIterator *prefetch_iterator = &node->prefetch_iterator; + + if (!prefetch_iterator->exhausted) + { + while (1) + { + TBMIterateResult *tbmpre; + bool do_prefetch = false; + bool skip_fetch; + + /* + * Recheck under the mutex. If some other process has already + * done enough prefetching then we need not to do anything. + */ + SpinLockAcquire(&pstate->mutex); + if (pstate->prefetch_pages < pstate->prefetch_target) + { + pstate->prefetch_pages++; + do_prefetch = true; + } + SpinLockRelease(&pstate->mutex); + + if (!do_prefetch) + return; + + tbmpre = tbm_iterate(prefetch_iterator); + if (tbmpre == NULL) + { + /* No more pages to prefetch */ + tbm_end_iterate(prefetch_iterator); + break; + } + + node->pfblockno = tbmpre->blockno; + + /* As above, skip prefetch if we expect not to need page */ + skip_fetch = (!(scan->flags & SO_NEED_TUPLES) && + !tbmpre->recheck && + VM_ALL_VISIBLE(node->ss.ss_currentRelation, + tbmpre->blockno, + &node->pvmbuffer)); + + if (!skip_fetch) + PrefetchBuffer(scan->rel, MAIN_FORKNUM, tbmpre->blockno); + } + } + } +#endif /* USE_PREFETCH */ +} + +/* + * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator + * + * We keep track of how far the prefetch iterator is ahead of the main + * iterator in prefetch_pages. For each block the main iterator returns, we + * decrement prefetch_pages. + */ +void +BitmapAdjustPrefetchIterator(BitmapHeapScanState *node) +{ +#ifdef USE_PREFETCH + ParallelBitmapHeapState *pstate = node->pstate; + TBMIterateResult *tbmpre; + + if (pstate == NULL) + { + TBMIterator *prefetch_iterator = &node->prefetch_iterator; + + if (node->prefetch_pages > 0) + { + /* The main iterator has closed the distance by one page */ + node->prefetch_pages--; + } + else if (!prefetch_iterator->exhausted) + { + /* Do not let the prefetch iterator get behind the main one */ + tbmpre = tbm_iterate(prefetch_iterator); + node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber; + } + return; + } + + /* + * XXX: There is a known issue with keeping the prefetch and current block + * iterators in sync for parallel bitmap table scans. This can lead to + * prefetching blocks that have already been read. See the discussion + * here: + * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de + * Note that moving the call site of BitmapAdjustPrefetchIterator() + * exacerbates the effects of this bug. + */ + if (node->prefetch_maximum > 0) + { + TBMIterator *prefetch_iterator = &node->prefetch_iterator; + + SpinLockAcquire(&pstate->mutex); + if (pstate->prefetch_pages > 0) + { + pstate->prefetch_pages--; + SpinLockRelease(&pstate->mutex); + } + else + { + /* Release the mutex before iterating */ + SpinLockRelease(&pstate->mutex); + + /* + * In case of shared mode, we can not ensure that the current + * blockno of the main iterator and that of the prefetch iterator + * are same. It's possible that whatever blockno we are + * prefetching will be processed by another process. Therefore, + * we don't validate the blockno here as we do in non-parallel + * case. + */ + if (!prefetch_iterator->exhausted) + { + tbmpre = tbm_iterate(prefetch_iterator); + node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber; + } + } + } +#endif /* USE_PREFETCH */ +} + +/* + * BitmapAdjustPrefetchTarget - Adjust the prefetch target + * + * Increase prefetch target if it's not yet at the max. Note that + * we will increase it to zero after fetching the very first + * page/tuple, then to one after the second tuple is fetched, then + * it doubles as later pages are fetched. + */ +void +BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) +{ +#ifdef USE_PREFETCH + ParallelBitmapHeapState *pstate = node->pstate; + + if (pstate == NULL) + { + if (node->prefetch_target >= node->prefetch_maximum) + /* don't increase any further */ ; + else if (node->prefetch_target >= node->prefetch_maximum / 2) + node->prefetch_target = node->prefetch_maximum; + else if (node->prefetch_target > 0) + node->prefetch_target *= 2; + else + node->prefetch_target++; + return; + } + + /* Do an unlocked check first to save spinlock acquisitions. */ + if (pstate->prefetch_target < node->prefetch_maximum) + { + SpinLockAcquire(&pstate->mutex); + if (pstate->prefetch_target >= node->prefetch_maximum) + /* don't increase any further */ ; + else if (pstate->prefetch_target >= node->prefetch_maximum / 2) + pstate->prefetch_target = node->prefetch_maximum; + else if (pstate->prefetch_target > 0) + pstate->prefetch_target *= 2; + else + pstate->prefetch_target++; + SpinLockRelease(&pstate->mutex); + } +#endif /* USE_PREFETCH */ +} /* ------------------------------------------------------------------------ * Executor related callbacks for the heap AM diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 22df6314f3f..c3a6336f5d9 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -37,6 +37,8 @@ #include +/* XXX: temporary include only for review purposes */ +#include "access/heapam.h" #include "access/relscan.h" #include "access/tableam.h" #include "access/visibilitymap.h" @@ -51,10 +53,6 @@ static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node); static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate); -static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node); -static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); -static inline void BitmapPrefetch(BitmapHeapScanState *node, - BitmapTableScanDesc *scan); static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate); @@ -279,225 +277,6 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate) ConditionVariableBroadcast(&pstate->cv); } -/* - * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator - * - * We keep track of how far the prefetch iterator is ahead of the main - * iterator in prefetch_pages. For each block the main iterator returns, we - * decrement prefetch_pages. - */ -static inline void -BitmapAdjustPrefetchIterator(BitmapHeapScanState *node) -{ -#ifdef USE_PREFETCH - ParallelBitmapHeapState *pstate = node->pstate; - TBMIterateResult *tbmpre; - - if (pstate == NULL) - { - TBMIterator *prefetch_iterator = &node->prefetch_iterator; - - if (node->prefetch_pages > 0) - { - /* The main iterator has closed the distance by one page */ - node->prefetch_pages--; - } - else if (!prefetch_iterator->exhausted) - { - /* Do not let the prefetch iterator get behind the main one */ - tbmpre = tbm_iterate(prefetch_iterator); - node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber; - } - return; - } - - /* - * XXX: There is a known issue with keeping the prefetch and current block - * iterators in sync for parallel bitmap table scans. This can lead to - * prefetching blocks that have already been read. See the discussion - * here: - * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de - * Note that moving the call site of BitmapAdjustPrefetchIterator() - * exacerbates the effects of this bug. - */ - if (node->prefetch_maximum > 0) - { - TBMIterator *prefetch_iterator = &node->prefetch_iterator; - - SpinLockAcquire(&pstate->mutex); - if (pstate->prefetch_pages > 0) - { - pstate->prefetch_pages--; - SpinLockRelease(&pstate->mutex); - } - else - { - /* Release the mutex before iterating */ - SpinLockRelease(&pstate->mutex); - - /* - * In case of shared mode, we can not ensure that the current - * blockno of the main iterator and that of the prefetch iterator - * are same. It's possible that whatever blockno we are - * prefetching will be processed by another process. Therefore, - * we don't validate the blockno here as we do in non-parallel - * case. - */ - if (!prefetch_iterator->exhausted) - { - tbmpre = tbm_iterate(prefetch_iterator); - node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber; - } - } - } -#endif /* USE_PREFETCH */ -} - -/* - * BitmapAdjustPrefetchTarget - Adjust the prefetch target - * - * Increase prefetch target if it's not yet at the max. Note that - * we will increase it to zero after fetching the very first - * page/tuple, then to one after the second tuple is fetched, then - * it doubles as later pages are fetched. - */ -static inline void -BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) -{ -#ifdef USE_PREFETCH - ParallelBitmapHeapState *pstate = node->pstate; - - if (pstate == NULL) - { - if (node->prefetch_target >= node->prefetch_maximum) - /* don't increase any further */ ; - else if (node->prefetch_target >= node->prefetch_maximum / 2) - node->prefetch_target = node->prefetch_maximum; - else if (node->prefetch_target > 0) - node->prefetch_target *= 2; - else - node->prefetch_target++; - return; - } - - /* Do an unlocked check first to save spinlock acquisitions. */ - if (pstate->prefetch_target < node->prefetch_maximum) - { - SpinLockAcquire(&pstate->mutex); - if (pstate->prefetch_target >= node->prefetch_maximum) - /* don't increase any further */ ; - else if (pstate->prefetch_target >= node->prefetch_maximum / 2) - pstate->prefetch_target = node->prefetch_maximum; - else if (pstate->prefetch_target > 0) - pstate->prefetch_target *= 2; - else - pstate->prefetch_target++; - SpinLockRelease(&pstate->mutex); - } -#endif /* USE_PREFETCH */ -} - -/* - * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target - */ -static inline void -BitmapPrefetch(BitmapHeapScanState *node, BitmapTableScanDesc *scan) -{ -#ifdef USE_PREFETCH - ParallelBitmapHeapState *pstate = node->pstate; - - if (pstate == NULL) - { - TBMIterator *prefetch_iterator = &node->prefetch_iterator; - - if (!prefetch_iterator->exhausted) - { - while (node->prefetch_pages < node->prefetch_target) - { - TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); - bool skip_fetch; - - if (tbmpre == NULL) - { - /* No more pages to prefetch */ - tbm_end_iterate(prefetch_iterator); - break; - } - node->prefetch_pages++; - node->pfblockno = tbmpre->blockno; - - /* - * If we expect not to have to actually read this heap page, - * skip this prefetch call, but continue to run the prefetch - * logic normally. (Would it be better not to increment - * prefetch_pages?) - */ - skip_fetch = (!(scan->flags & SO_NEED_TUPLES) && - !tbmpre->recheck && - VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre->blockno, - &node->pvmbuffer)); - - if (!skip_fetch) - PrefetchBuffer(scan->rel, MAIN_FORKNUM, tbmpre->blockno); - } - } - - return; - } - - if (pstate->prefetch_pages < pstate->prefetch_target) - { - TBMIterator *prefetch_iterator = &node->prefetch_iterator; - - if (!prefetch_iterator->exhausted) - { - while (1) - { - TBMIterateResult *tbmpre; - bool do_prefetch = false; - bool skip_fetch; - - /* - * Recheck under the mutex. If some other process has already - * done enough prefetching then we need not to do anything. - */ - SpinLockAcquire(&pstate->mutex); - if (pstate->prefetch_pages < pstate->prefetch_target) - { - pstate->prefetch_pages++; - do_prefetch = true; - } - SpinLockRelease(&pstate->mutex); - - if (!do_prefetch) - return; - - tbmpre = tbm_iterate(prefetch_iterator); - if (tbmpre == NULL) - { - /* No more pages to prefetch */ - tbm_end_iterate(prefetch_iterator); - break; - } - - node->pfblockno = tbmpre->blockno; - - /* As above, skip prefetch if we expect not to need page */ - skip_fetch = (!(scan->flags & SO_NEED_TUPLES) && - !tbmpre->recheck && - VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre->blockno, - &node->pvmbuffer)); - - if (!skip_fetch) - PrefetchBuffer(scan->rel, MAIN_FORKNUM, tbmpre->blockno); - } - } - } -#endif /* USE_PREFETCH */ -} - /* * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual */ diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 824092c1d63..9a455deba48 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -394,6 +394,14 @@ extern void simple_heap_update(Relation relation, ItemPointer otid, extern TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate); +/* in heapam_handler.c */ +extern void BitmapPrefetch(BitmapHeapScanState *node, + BitmapTableScanDesc *scan); + +extern void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node); + +extern void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); + /* in heap/pruneheap.c */ struct GlobalVisState; extern void heap_page_prune_opt(Relation relation, Buffer buffer); -- 2.34.1