From e2bf17c5a936c3d536d9f25150b81d00969963b1 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Thu, 15 Feb 2024 21:23:41 -0500 Subject: [PATCH v4 12/14] Separate TBM[Shared]Iterator and TBMIterateResult Remove the TBMIterateResult from the TBMIterator and TBMSharedIterator and have tbm_[shared_]iterate() take a TBMIterateResult as a parameter. This will allow multiple TBMIterateResults to exist concurrently allowing asynchronous use of the TIDBitmap for prefetching, for example. tbm_[shared]_iterate() now sets blockno to InvalidBlockNumber when the bitmap is exhausted instead of returning NULL. BitmapHeapScan callers of tbm_iterate make a TBMIterateResult locally and pass it in. Because GIN only needs a single TBMIterateResult, inline the matchResult in the GinScanEntry to avoid having to separately manage memory for the TBMIterateResult. --- src/backend/access/gin/ginget.c | 48 +++++++++------ src/backend/access/gin/ginscan.c | 2 +- src/backend/access/heap/heapam_handler.c | 32 +++++----- src/backend/executor/nodeBitmapHeapscan.c | 33 +++++----- src/backend/nodes/tidbitmap.c | 73 ++++++++++++----------- src/include/access/gin_private.h | 2 +- src/include/nodes/tidbitmap.h | 4 +- 7 files changed, 107 insertions(+), 87 deletions(-) diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 0b4f2ebadb6..3aa457a29e1 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -332,10 +332,22 @@ restartScanEntry: entry->list = NULL; entry->nlist = 0; entry->matchBitmap = NULL; - entry->matchResult = NULL; entry->reduceResult = false; entry->predictNumberResult = 0; + /* + * MTODO: is it enough to set blockno to InvalidBlockNumber? In all the + * places were we previously set matchResult to NULL, I just set blockno + * to InvalidBlockNumber. It seems like this should be okay because that + * is usually what we check before using the matchResult members. But it + * might be safer to zero out the offsets array. But that is expensive. + */ + entry->matchResult.blockno = InvalidBlockNumber; + entry->matchResult.ntuples = 0; + entry->matchResult.recheck = true; + memset(entry->matchResult.offsets, 0, + sizeof(OffsetNumber) * MaxHeapTuplesPerPage); + /* * we should find entry, and begin scan of posting tree or just store * posting list in memory @@ -374,6 +386,7 @@ restartScanEntry: { if (entry->matchIterator) tbm_end_iterate(entry->matchIterator); + entry->matchResult.blockno = InvalidBlockNumber; entry->matchIterator = NULL; tbm_free(entry->matchBitmap); entry->matchBitmap = NULL; @@ -823,18 +836,19 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, { /* * If we've exhausted all items on this block, move to next block - * in the bitmap. + * in the bitmap. tbm_iterate() sets matchResult->blockno to + * InvalidBlockNumber when the bitmap is exhausted. */ - while (entry->matchResult == NULL || - (entry->matchResult->ntuples >= 0 && - entry->offset >= entry->matchResult->ntuples) || - entry->matchResult->blockno < advancePastBlk || + while ((!BlockNumberIsValid(entry->matchResult.blockno)) || + (entry->matchResult.ntuples >= 0 && + entry->offset >= entry->matchResult.ntuples) || + entry->matchResult.blockno < advancePastBlk || (ItemPointerIsLossyPage(&advancePast) && - entry->matchResult->blockno == advancePastBlk)) + entry->matchResult.blockno == advancePastBlk)) { - entry->matchResult = tbm_iterate(entry->matchIterator); + tbm_iterate(entry->matchIterator, &entry->matchResult); - if (entry->matchResult == NULL) + if (!BlockNumberIsValid(entry->matchResult.blockno)) { ItemPointerSetInvalid(&entry->curItem); tbm_end_iterate(entry->matchIterator); @@ -858,10 +872,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, * We're now on the first page after advancePast which has any * items on it. If it's a lossy result, return that. */ - if (entry->matchResult->ntuples < 0) + if (entry->matchResult.ntuples < 0) { ItemPointerSetLossyPage(&entry->curItem, - entry->matchResult->blockno); + entry->matchResult.blockno); /* * We might as well fall out of the loop; we could not @@ -875,27 +889,27 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, * Not a lossy page. Skip over any offsets <= advancePast, and * return that. */ - if (entry->matchResult->blockno == advancePastBlk) + if (entry->matchResult.blockno == advancePastBlk) { /* * First, do a quick check against the last offset on the * page. If that's > advancePast, so are all the other * offsets, so just go back to the top to get the next page. */ - if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff) + if (entry->matchResult.offsets[entry->matchResult.ntuples - 1] <= advancePastOff) { - entry->offset = entry->matchResult->ntuples; + entry->offset = entry->matchResult.ntuples; continue; } /* Otherwise scan to find the first item > advancePast */ - while (entry->matchResult->offsets[entry->offset] <= advancePastOff) + while (entry->matchResult.offsets[entry->offset] <= advancePastOff) entry->offset++; } ItemPointerSet(&entry->curItem, - entry->matchResult->blockno, - entry->matchResult->offsets[entry->offset]); + entry->matchResult.blockno, + entry->matchResult.offsets[entry->offset]); entry->offset++; /* Done unless we need to reduce the result */ diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index af24d38544e..033d5253394 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -106,7 +106,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, ItemPointerSetMin(&scanEntry->curItem); scanEntry->matchBitmap = NULL; scanEntry->matchIterator = NULL; - scanEntry->matchResult = NULL; + scanEntry->matchResult.blockno = InvalidBlockNumber; scanEntry->list = NULL; scanEntry->nlist = 0; scanEntry->offset = InvalidOffsetNumber; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index a439ddc87bf..daa5902e24d 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2121,7 +2121,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, Buffer buffer; Snapshot snapshot; int ntup; - TBMIterateResult *tbmres; + TBMIterateResult tbmres; hscan->rs_cindex = 0; hscan->rs_ntuples = 0; @@ -2134,11 +2134,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, CHECK_FOR_INTERRUPTS(); if (scan->shared_tbmiterator) - tbmres = tbm_shared_iterate(scan->shared_tbmiterator); + tbm_shared_iterate(scan->shared_tbmiterator, &tbmres); else - tbmres = tbm_iterate(scan->tbmiterator); + tbm_iterate(scan->tbmiterator, &tbmres); - if (tbmres == NULL) + if (!BlockNumberIsValid(tbmres.blockno)) { /* no more entries in the bitmap */ Assert(hscan->rs_empty_tuples_pending == 0); @@ -2153,11 +2153,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, * isolation though, as we need to examine all invisible tuples * reachable by the index. */ - } while (!IsolationIsSerializable() && tbmres->blockno >= hscan->rs_nblocks); + } while (!IsolationIsSerializable() && tbmres.blockno >= hscan->rs_nblocks); /* Got a valid block */ - *blockno = tbmres->blockno; - *recheck = tbmres->recheck; + *blockno = tbmres.blockno; + *recheck = tbmres.recheck; /* * We can skip fetching the heap page if we don't need any fields from the @@ -2165,19 +2165,19 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, * the page are visible to our transaction. */ if (scan->rs_flags & SO_CAN_SKIP_FETCH && - !tbmres->recheck && - VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer)) + !tbmres.recheck && + VM_ALL_VISIBLE(scan->rs_rd, tbmres.blockno, &hscan->rs_vmbuffer)) { /* can't be lossy in the skip_fetch case */ - Assert(tbmres->ntuples >= 0); + Assert(tbmres.ntuples >= 0); Assert(hscan->rs_empty_tuples_pending >= 0); - hscan->rs_empty_tuples_pending += tbmres->ntuples; + hscan->rs_empty_tuples_pending += tbmres.ntuples; return true; } - block = tbmres->blockno; + block = tbmres.blockno; /* * Acquire pin on the target heap page, trading in any pin we held before. @@ -2206,7 +2206,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, /* * We need two separate strategies for lossy and non-lossy cases. */ - if (tbmres->ntuples >= 0) + if (tbmres.ntuples >= 0) { /* * Bitmap is non-lossy, so we just look through the offsets listed in @@ -2215,9 +2215,9 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, */ int curslot; - for (curslot = 0; curslot < tbmres->ntuples; curslot++) + for (curslot = 0; curslot < tbmres.ntuples; curslot++) { - OffsetNumber offnum = tbmres->offsets[curslot]; + OffsetNumber offnum = tbmres.offsets[curslot]; ItemPointerData tid; HeapTupleData heapTuple; @@ -2267,7 +2267,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, Assert(ntup <= MaxHeapTuplesPerPage); hscan->rs_ntuples = ntup; - *lossy = tbmres->ntuples < 0; + *lossy = tbmres.ntuples < 0; /* * Return true to indicate that a valid block was found and the bitmap is diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 3be433ea6e1..74b92d4cbf4 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -344,9 +344,10 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, else if (prefetch_iterator) { /* Do not let the prefetch iterator get behind the main one */ - TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); + TBMIterateResult tbmpre; + tbm_iterate(prefetch_iterator, &tbmpre); - if (tbmpre == NULL || tbmpre->blockno != blockno) + if (!BlockNumberIsValid(tbmpre.blockno) || tbmpre.blockno != blockno) elog(ERROR, "prefetch and main iterators are out of sync"); } return; @@ -364,6 +365,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, } else { + TBMIterateResult tbmpre; + /* Release the mutex before iterating */ SpinLockRelease(&pstate->mutex); @@ -376,7 +379,7 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, * case. */ if (prefetch_iterator) - tbm_shared_iterate(prefetch_iterator); + tbm_shared_iterate(prefetch_iterator, &tbmpre); } } #endif /* USE_PREFETCH */ @@ -443,10 +446,12 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) { while (node->prefetch_pages < node->prefetch_target) { - TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); + TBMIterateResult tbmpre; bool skip_fetch; - if (tbmpre == NULL) + tbm_iterate(prefetch_iterator, &tbmpre); + + if (!BlockNumberIsValid(tbmpre.blockno)) { /* No more pages to prefetch */ tbm_end_iterate(prefetch_iterator); @@ -462,13 +467,13 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) * prefetch_pages?) */ skip_fetch = (scan->rs_flags & SO_CAN_SKIP_FETCH && - !tbmpre->recheck && + !tbmpre.recheck && VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre->blockno, + tbmpre.blockno, &node->pvmbuffer)); if (!skip_fetch) - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno); } } @@ -483,7 +488,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) { while (1) { - TBMIterateResult *tbmpre; + TBMIterateResult tbmpre; bool do_prefetch = false; bool skip_fetch; @@ -502,8 +507,8 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) if (!do_prefetch) return; - tbmpre = tbm_shared_iterate(prefetch_iterator); - if (tbmpre == NULL) + tbm_shared_iterate(prefetch_iterator, &tbmpre); + if (!BlockNumberIsValid(tbmpre.blockno)) { /* No more pages to prefetch */ tbm_end_shared_iterate(prefetch_iterator); @@ -513,13 +518,13 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) /* As above, skip prefetch if we expect not to need page */ skip_fetch = (scan->rs_flags & SO_CAN_SKIP_FETCH && - !tbmpre->recheck && + !tbmpre.recheck && VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre->blockno, + tbmpre.blockno, &node->pvmbuffer)); if (!skip_fetch) - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno); } } } diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index 689a959b467..b4dcb1cbb88 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -171,7 +171,6 @@ struct TBMIterator int spageptr; /* next spages index */ int schunkptr; /* next schunks index */ int schunkbit; /* next bit to check in current schunk */ - TBMIterateResult output; }; /* @@ -212,7 +211,6 @@ struct TBMSharedIterator PTEntryArray *ptbase; /* pagetable element array */ PTIterationArray *ptpages; /* sorted exact page index list */ PTIterationArray *ptchunks; /* sorted lossy page index list */ - TBMIterateResult output; }; /* Local function prototypes */ @@ -943,20 +941,21 @@ tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp) /* * tbm_iterate - scan through next page of a TIDBitmap * - * Returns a TBMIterateResult representing one page, or NULL if there are - * no more pages to scan. Pages are guaranteed to be delivered in numerical - * order. If result->ntuples < 0, then the bitmap is "lossy" and failed to - * remember the exact tuples to look at on this page --- the caller must - * examine all tuples on the page and check if they meet the intended - * condition. If result->recheck is true, only the indicated tuples need - * be examined, but the condition must be rechecked anyway. (For ease of - * testing, recheck is always set true when ntuples < 0.) + * Caller must pass in a TBMIterateResult to be filled. + * + * Pages are guaranteed to be delivered in numerical order. tbmres->blockno is + * set to InvalidBlockNumber when there are no more pages to scan. If + * tbmres->ntuples < 0, then the bitmap is "lossy" and failed to remember the + * exact tuples to look at on this page --- the caller must examine all tuples + * on the page and check if they meet the intended condition. If + * tbmres->recheck is true, only the indicated tuples need be examined, but the + * condition must be rechecked anyway. (For ease of testing, recheck is always + * set true when ntuples < 0.) */ -TBMIterateResult * -tbm_iterate(TBMIterator *iterator) +void +tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres) { TIDBitmap *tbm = iterator->tbm; - TBMIterateResult *output = &(iterator->output); Assert(tbm->iterating == TBM_ITERATING_PRIVATE); @@ -984,6 +983,7 @@ tbm_iterate(TBMIterator *iterator) * If both chunk and per-page data remain, must output the numerically * earlier page. */ + Assert(tbmres); if (iterator->schunkptr < tbm->nchunks) { PagetableEntry *chunk = tbm->schunks[iterator->schunkptr]; @@ -994,11 +994,11 @@ tbm_iterate(TBMIterator *iterator) chunk_blockno < tbm->spages[iterator->spageptr]->blockno) { /* Return a lossy page indicator from the chunk */ - output->blockno = chunk_blockno; - output->ntuples = -1; - output->recheck = true; + tbmres->blockno = chunk_blockno; + tbmres->ntuples = -1; + tbmres->recheck = true; iterator->schunkbit++; - return output; + return; } } @@ -1014,16 +1014,17 @@ tbm_iterate(TBMIterator *iterator) page = tbm->spages[iterator->spageptr]; /* scan bitmap to extract individual offset numbers */ - ntuples = tbm_extract_page_tuple(page, output); - output->blockno = page->blockno; - output->ntuples = ntuples; - output->recheck = page->recheck; + ntuples = tbm_extract_page_tuple(page, tbmres); + tbmres->blockno = page->blockno; + tbmres->ntuples = ntuples; + tbmres->recheck = page->recheck; iterator->spageptr++; - return output; + return; } /* Nothing more in the bitmap */ - return NULL; + tbmres->blockno = InvalidBlockNumber; + return; } /* @@ -1033,10 +1034,9 @@ tbm_iterate(TBMIterator *iterator) * across multiple processes. We need to acquire the iterator LWLock, * before accessing the shared members. */ -TBMIterateResult * -tbm_shared_iterate(TBMSharedIterator *iterator) +void +tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres) { - TBMIterateResult *output = &iterator->output; TBMSharedIteratorState *istate = iterator->state; PagetableEntry *ptbase = NULL; int *idxpages = NULL; @@ -1087,13 +1087,13 @@ tbm_shared_iterate(TBMSharedIterator *iterator) chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno) { /* Return a lossy page indicator from the chunk */ - output->blockno = chunk_blockno; - output->ntuples = -1; - output->recheck = true; + tbmres->blockno = chunk_blockno; + tbmres->ntuples = -1; + tbmres->recheck = true; istate->schunkbit++; LWLockRelease(&istate->lock); - return output; + return; } } @@ -1103,21 +1103,22 @@ tbm_shared_iterate(TBMSharedIterator *iterator) int ntuples; /* scan bitmap to extract individual offset numbers */ - ntuples = tbm_extract_page_tuple(page, output); - output->blockno = page->blockno; - output->ntuples = ntuples; - output->recheck = page->recheck; + ntuples = tbm_extract_page_tuple(page, tbmres); + tbmres->blockno = page->blockno; + tbmres->ntuples = ntuples; + tbmres->recheck = page->recheck; istate->spageptr++; LWLockRelease(&istate->lock); - return output; + return; } LWLockRelease(&istate->lock); /* Nothing more in the bitmap */ - return NULL; + tbmres->blockno = InvalidBlockNumber; + return; } /* diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 51d0c74a6b0..e423d92b41c 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -352,7 +352,7 @@ typedef struct GinScanEntryData /* for a partial-match or full-scan query, we accumulate all TIDs here */ TIDBitmap *matchBitmap; TBMIterator *matchIterator; - TBMIterateResult *matchResult; + TBMIterateResult matchResult; /* used for Posting list and one page in Posting tree */ ItemPointerData *list; diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index 432fae52962..f000c1af28f 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -72,8 +72,8 @@ extern bool tbm_is_empty(const TIDBitmap *tbm); extern TBMIterator *tbm_begin_iterate(TIDBitmap *tbm); extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm); -extern TBMIterateResult *tbm_iterate(TBMIterator *iterator); -extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator); +extern void tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres); +extern void tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres); extern void tbm_end_iterate(TBMIterator *iterator); extern void tbm_end_shared_iterate(TBMSharedIterator *iterator); extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa, -- 2.37.2