From 05f7e86cb1276a6141d0213d40bcd7252e8af31e Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Thu, 15 Feb 2024 21:23:41 -0500 Subject: [PATCH v13 13/16] Separate TBM[Shared]Iterator and TBMIterateResult Remove the TBMIterateResult from the TBMIterator and TBMSharedIterator and have tbm_[shared_]iterate() take a TBMIterateResult as a parameter. This will allow multiple TBMIterateResults to exist concurrently allowing asynchronous use of the TIDBitmap for prefetching, for example. tbm_[shared]_iterate() now sets blockno to InvalidBlockNumber when the bitmap is exhausted instead of returning NULL. BitmapHeapScan callers of tbm_iterate make a TBMIterateResult locally and pass it in. Because GIN only needs a single TBMIterateResult, inline the matchResult in the GinScanEntry to avoid having to separately manage memory for the TBMIterateResult. --- src/backend/access/gin/ginget.c | 48 +++++++++------ src/backend/access/gin/ginscan.c | 2 +- src/backend/access/heap/heapam_handler.c | 30 +++++----- src/backend/executor/nodeBitmapHeapscan.c | 47 ++++++++------- src/backend/nodes/tidbitmap.c | 73 ++++++++++++----------- src/include/access/gin_private.h | 2 +- src/include/executor/nodeBitmapHeapscan.h | 2 +- src/include/nodes/tidbitmap.h | 4 +- 8 files changed, 113 insertions(+), 95 deletions(-) diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 0b4f2ebadb..3aa457a29e 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -332,10 +332,22 @@ restartScanEntry: entry->list = NULL; entry->nlist = 0; entry->matchBitmap = NULL; - entry->matchResult = NULL; entry->reduceResult = false; entry->predictNumberResult = 0; + /* + * MTODO: is it enough to set blockno to InvalidBlockNumber? In all the + * places were we previously set matchResult to NULL, I just set blockno + * to InvalidBlockNumber. It seems like this should be okay because that + * is usually what we check before using the matchResult members. But it + * might be safer to zero out the offsets array. But that is expensive. + */ + entry->matchResult.blockno = InvalidBlockNumber; + entry->matchResult.ntuples = 0; + entry->matchResult.recheck = true; + memset(entry->matchResult.offsets, 0, + sizeof(OffsetNumber) * MaxHeapTuplesPerPage); + /* * we should find entry, and begin scan of posting tree or just store * posting list in memory @@ -374,6 +386,7 @@ restartScanEntry: { if (entry->matchIterator) tbm_end_iterate(entry->matchIterator); + entry->matchResult.blockno = InvalidBlockNumber; entry->matchIterator = NULL; tbm_free(entry->matchBitmap); entry->matchBitmap = NULL; @@ -823,18 +836,19 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, { /* * If we've exhausted all items on this block, move to next block - * in the bitmap. + * in the bitmap. tbm_iterate() sets matchResult->blockno to + * InvalidBlockNumber when the bitmap is exhausted. */ - while (entry->matchResult == NULL || - (entry->matchResult->ntuples >= 0 && - entry->offset >= entry->matchResult->ntuples) || - entry->matchResult->blockno < advancePastBlk || + while ((!BlockNumberIsValid(entry->matchResult.blockno)) || + (entry->matchResult.ntuples >= 0 && + entry->offset >= entry->matchResult.ntuples) || + entry->matchResult.blockno < advancePastBlk || (ItemPointerIsLossyPage(&advancePast) && - entry->matchResult->blockno == advancePastBlk)) + entry->matchResult.blockno == advancePastBlk)) { - entry->matchResult = tbm_iterate(entry->matchIterator); + tbm_iterate(entry->matchIterator, &entry->matchResult); - if (entry->matchResult == NULL) + if (!BlockNumberIsValid(entry->matchResult.blockno)) { ItemPointerSetInvalid(&entry->curItem); tbm_end_iterate(entry->matchIterator); @@ -858,10 +872,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, * We're now on the first page after advancePast which has any * items on it. If it's a lossy result, return that. */ - if (entry->matchResult->ntuples < 0) + if (entry->matchResult.ntuples < 0) { ItemPointerSetLossyPage(&entry->curItem, - entry->matchResult->blockno); + entry->matchResult.blockno); /* * We might as well fall out of the loop; we could not @@ -875,27 +889,27 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, * Not a lossy page. Skip over any offsets <= advancePast, and * return that. */ - if (entry->matchResult->blockno == advancePastBlk) + if (entry->matchResult.blockno == advancePastBlk) { /* * First, do a quick check against the last offset on the * page. If that's > advancePast, so are all the other * offsets, so just go back to the top to get the next page. */ - if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff) + if (entry->matchResult.offsets[entry->matchResult.ntuples - 1] <= advancePastOff) { - entry->offset = entry->matchResult->ntuples; + entry->offset = entry->matchResult.ntuples; continue; } /* Otherwise scan to find the first item > advancePast */ - while (entry->matchResult->offsets[entry->offset] <= advancePastOff) + while (entry->matchResult.offsets[entry->offset] <= advancePastOff) entry->offset++; } ItemPointerSet(&entry->curItem, - entry->matchResult->blockno, - entry->matchResult->offsets[entry->offset]); + entry->matchResult.blockno, + entry->matchResult.offsets[entry->offset]); entry->offset++; /* Done unless we need to reduce the result */ diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index af24d38544..033d525339 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -106,7 +106,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, ItemPointerSetMin(&scanEntry->curItem); scanEntry->matchBitmap = NULL; scanEntry->matchIterator = NULL; - scanEntry->matchResult = NULL; + scanEntry->matchResult.blockno = InvalidBlockNumber; scanEntry->list = NULL; scanEntry->nlist = 0; scanEntry->offset = InvalidOffsetNumber; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index b08837efd0..d9ceb4b848 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2196,7 +2196,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, Buffer buffer; Snapshot snapshot; int ntup; - TBMIterateResult *tbmres; + TBMIterateResult tbmres; hscan->rs_cindex = 0; hscan->rs_ntuples = 0; @@ -2208,9 +2208,9 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, { CHECK_FOR_INTERRUPTS(); - tbmres = bhs_iterate(scan->rs_bhs_iterator); + bhs_iterate(scan->rs_bhs_iterator, &tbmres); - if (tbmres == NULL) + if (!BlockNumberIsValid(tbmres.blockno)) { /* no more entries in the bitmap */ Assert(hscan->rs_empty_tuples_pending == 0); @@ -2225,11 +2225,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, * isolation though, as we need to examine all invisible tuples * reachable by the index. */ - } while (!IsolationIsSerializable() && tbmres->blockno >= hscan->rs_nblocks); + } while (!IsolationIsSerializable() && tbmres.blockno >= hscan->rs_nblocks); /* Got a valid block */ - *blockno = tbmres->blockno; - *recheck = tbmres->recheck; + *blockno = tbmres.blockno; + *recheck = tbmres.recheck; /* * We can skip fetching the heap page if we don't need any fields from the @@ -2237,19 +2237,19 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, * page are visible to our transaction. */ if (!(scan->rs_flags & SO_NEED_TUPLE) && - !tbmres->recheck && - VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer)) + !tbmres.recheck && + VM_ALL_VISIBLE(scan->rs_rd, tbmres.blockno, &hscan->rs_vmbuffer)) { /* can't be lossy in the skip_fetch case */ - Assert(tbmres->ntuples >= 0); + Assert(tbmres.ntuples >= 0); Assert(hscan->rs_empty_tuples_pending >= 0); - hscan->rs_empty_tuples_pending += tbmres->ntuples; + hscan->rs_empty_tuples_pending += tbmres.ntuples; return true; } - block = tbmres->blockno; + block = tbmres.blockno; /* * Acquire pin on the target heap page, trading in any pin we held before. @@ -2278,7 +2278,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, /* * We need two separate strategies for lossy and non-lossy cases. */ - if (tbmres->ntuples >= 0) + if (tbmres.ntuples >= 0) { /* * Bitmap is non-lossy, so we just look through the offsets listed in @@ -2287,9 +2287,9 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, */ int curslot; - for (curslot = 0; curslot < tbmres->ntuples; curslot++) + for (curslot = 0; curslot < tbmres.ntuples; curslot++) { - OffsetNumber offnum = tbmres->offsets[curslot]; + OffsetNumber offnum = tbmres.offsets[curslot]; ItemPointerData tid; HeapTupleData heapTuple; @@ -2339,7 +2339,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, Assert(ntup <= MaxHeapTuplesPerPage); hscan->rs_ntuples = ntup; - if (tbmres->ntuples < 0) + if (tbmres.ntuples < 0) (*lossy_pages)++; else (*exact_pages)++; diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index fb79f57d7a..d61965a276 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -77,15 +77,16 @@ bhs_begin_iterate(TIDBitmap *tbm, dsa_pointer shared_area, dsa_area *personal_ar return result; } -TBMIterateResult * -bhs_iterate(BitmapHeapIterator *iterator) +void +bhs_iterate(BitmapHeapIterator *iterator, TBMIterateResult *result) { Assert(iterator); + Assert(result); if (iterator->serial) - return tbm_iterate(iterator->serial); + tbm_iterate(iterator->serial, result); else - return tbm_shared_iterate(iterator->parallel); + tbm_shared_iterate(iterator->parallel, result); } void @@ -348,7 +349,7 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node) #ifdef USE_PREFETCH ParallelBitmapHeapState *pstate = node->pstate; BitmapHeapIterator *prefetch_iterator = node->pf_iterator; - TBMIterateResult *tbmpre; + TBMIterateResult tbmpre; if (pstate == NULL) { @@ -360,8 +361,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node) else if (prefetch_iterator) { /* Do not let the prefetch iterator get behind the main one */ - tbmpre = bhs_iterate(prefetch_iterator); - node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber; + bhs_iterate(prefetch_iterator, &tbmpre); + node->pfblockno = tbmpre.blockno; } return; } @@ -394,8 +395,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node) */ if (prefetch_iterator) { - tbmpre = bhs_iterate(prefetch_iterator); - node->pfblockno = tbmpre ? tbmpre->blockno : InvalidBlockNumber; + bhs_iterate(prefetch_iterator, &tbmpre); + node->pfblockno = tbmpre.blockno; } } } @@ -462,10 +463,12 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) { while (node->prefetch_pages < node->prefetch_target) { - TBMIterateResult *tbmpre = bhs_iterate(prefetch_iterator); + TBMIterateResult tbmpre; bool skip_fetch; - if (tbmpre == NULL) + bhs_iterate(prefetch_iterator, &tbmpre); + + if (!BlockNumberIsValid(tbmpre.blockno)) { /* No more pages to prefetch */ bhs_end_iterate(prefetch_iterator); @@ -473,7 +476,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) break; } node->prefetch_pages++; - node->pfblockno = tbmpre->blockno; + node->pfblockno = tbmpre.blockno; /* * If we expect not to have to actually read this heap page, @@ -482,13 +485,13 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) * prefetch_pages?) */ skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLE) && - !tbmpre->recheck && + !tbmpre.recheck && VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre->blockno, + tbmpre.blockno, &node->pvmbuffer)); if (!skip_fetch) - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno); } } @@ -501,7 +504,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) { while (1) { - TBMIterateResult *tbmpre; + TBMIterateResult tbmpre; bool do_prefetch = false; bool skip_fetch; @@ -520,8 +523,8 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) if (!do_prefetch) return; - tbmpre = bhs_iterate(prefetch_iterator); - if (tbmpre == NULL) + bhs_iterate(prefetch_iterator, &tbmpre); + if (!BlockNumberIsValid(tbmpre.blockno)) { /* No more pages to prefetch */ bhs_end_iterate(prefetch_iterator); @@ -529,17 +532,17 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) break; } - node->pfblockno = tbmpre->blockno; + node->pfblockno = tbmpre.blockno; /* As above, skip prefetch if we expect not to need page */ skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLE) && - !tbmpre->recheck && + !tbmpre.recheck && VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre->blockno, + tbmpre.blockno, &node->pvmbuffer)); if (!skip_fetch) - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno); } } } diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index 1dc4c99bf9..309a44bdb8 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -172,7 +172,6 @@ struct TBMIterator int spageptr; /* next spages index */ int schunkptr; /* next schunks index */ int schunkbit; /* next bit to check in current schunk */ - TBMIterateResult output; }; /* @@ -213,7 +212,6 @@ struct TBMSharedIterator PTEntryArray *ptbase; /* pagetable element array */ PTIterationArray *ptpages; /* sorted exact page index list */ PTIterationArray *ptchunks; /* sorted lossy page index list */ - TBMIterateResult output; }; /* Local function prototypes */ @@ -944,20 +942,21 @@ tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp) /* * tbm_iterate - scan through next page of a TIDBitmap * - * Returns a TBMIterateResult representing one page, or NULL if there are - * no more pages to scan. Pages are guaranteed to be delivered in numerical - * order. If result->ntuples < 0, then the bitmap is "lossy" and failed to - * remember the exact tuples to look at on this page --- the caller must - * examine all tuples on the page and check if they meet the intended - * condition. If result->recheck is true, only the indicated tuples need - * be examined, but the condition must be rechecked anyway. (For ease of - * testing, recheck is always set true when ntuples < 0.) + * Caller must pass in a TBMIterateResult to be filled. + * + * Pages are guaranteed to be delivered in numerical order. tbmres->blockno is + * set to InvalidBlockNumber when there are no more pages to scan. If + * tbmres->ntuples < 0, then the bitmap is "lossy" and failed to remember the + * exact tuples to look at on this page --- the caller must examine all tuples + * on the page and check if they meet the intended condition. If + * tbmres->recheck is true, only the indicated tuples need be examined, but the + * condition must be rechecked anyway. (For ease of testing, recheck is always + * set true when ntuples < 0.) */ -TBMIterateResult * -tbm_iterate(TBMIterator *iterator) +void +tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres) { TIDBitmap *tbm = iterator->tbm; - TBMIterateResult *output = &(iterator->output); Assert(tbm->iterating == TBM_ITERATING_PRIVATE); @@ -985,6 +984,7 @@ tbm_iterate(TBMIterator *iterator) * If both chunk and per-page data remain, must output the numerically * earlier page. */ + Assert(tbmres); if (iterator->schunkptr < tbm->nchunks) { PagetableEntry *chunk = tbm->schunks[iterator->schunkptr]; @@ -995,11 +995,11 @@ tbm_iterate(TBMIterator *iterator) chunk_blockno < tbm->spages[iterator->spageptr]->blockno) { /* Return a lossy page indicator from the chunk */ - output->blockno = chunk_blockno; - output->ntuples = -1; - output->recheck = true; + tbmres->blockno = chunk_blockno; + tbmres->ntuples = -1; + tbmres->recheck = true; iterator->schunkbit++; - return output; + return; } } @@ -1015,16 +1015,17 @@ tbm_iterate(TBMIterator *iterator) page = tbm->spages[iterator->spageptr]; /* scan bitmap to extract individual offset numbers */ - ntuples = tbm_extract_page_tuple(page, output); - output->blockno = page->blockno; - output->ntuples = ntuples; - output->recheck = page->recheck; + ntuples = tbm_extract_page_tuple(page, tbmres); + tbmres->blockno = page->blockno; + tbmres->ntuples = ntuples; + tbmres->recheck = page->recheck; iterator->spageptr++; - return output; + return; } /* Nothing more in the bitmap */ - return NULL; + tbmres->blockno = InvalidBlockNumber; + return; } /* @@ -1034,10 +1035,9 @@ tbm_iterate(TBMIterator *iterator) * across multiple processes. We need to acquire the iterator LWLock, * before accessing the shared members. */ -TBMIterateResult * -tbm_shared_iterate(TBMSharedIterator *iterator) +void +tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres) { - TBMIterateResult *output = &iterator->output; TBMSharedIteratorState *istate = iterator->state; PagetableEntry *ptbase = NULL; int *idxpages = NULL; @@ -1088,13 +1088,13 @@ tbm_shared_iterate(TBMSharedIterator *iterator) chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno) { /* Return a lossy page indicator from the chunk */ - output->blockno = chunk_blockno; - output->ntuples = -1; - output->recheck = true; + tbmres->blockno = chunk_blockno; + tbmres->ntuples = -1; + tbmres->recheck = true; istate->schunkbit++; LWLockRelease(&istate->lock); - return output; + return; } } @@ -1104,21 +1104,22 @@ tbm_shared_iterate(TBMSharedIterator *iterator) int ntuples; /* scan bitmap to extract individual offset numbers */ - ntuples = tbm_extract_page_tuple(page, output); - output->blockno = page->blockno; - output->ntuples = ntuples; - output->recheck = page->recheck; + ntuples = tbm_extract_page_tuple(page, tbmres); + tbmres->blockno = page->blockno; + tbmres->ntuples = ntuples; + tbmres->recheck = page->recheck; istate->spageptr++; LWLockRelease(&istate->lock); - return output; + return; } LWLockRelease(&istate->lock); /* Nothing more in the bitmap */ - return NULL; + tbmres->blockno = InvalidBlockNumber; + return; } /* diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 3013a44bae..3b432263bb 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -353,7 +353,7 @@ typedef struct GinScanEntryData /* for a partial-match or full-scan query, we accumulate all TIDs here */ TIDBitmap *matchBitmap; TBMIterator *matchIterator; - TBMIterateResult *matchResult; + TBMIterateResult matchResult; /* used for Posting list and one page in Posting tree */ ItemPointerData *list; diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h index cb56d20dc6..3c330f86e6 100644 --- a/src/include/executor/nodeBitmapHeapscan.h +++ b/src/include/executor/nodeBitmapHeapscan.h @@ -34,7 +34,7 @@ typedef struct BitmapHeapIterator struct TBMSharedIterator *parallel; } BitmapHeapIterator; -extern TBMIterateResult *bhs_iterate(BitmapHeapIterator *iterator); +extern void bhs_iterate(BitmapHeapIterator *iterator, TBMIterateResult *result); extern void bhs_end_iterate(BitmapHeapIterator *iterator); diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index 432fae5296..f000c1af28 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -72,8 +72,8 @@ extern bool tbm_is_empty(const TIDBitmap *tbm); extern TBMIterator *tbm_begin_iterate(TIDBitmap *tbm); extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm); -extern TBMIterateResult *tbm_iterate(TBMIterator *iterator); -extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator); +extern void tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres); +extern void tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres); extern void tbm_end_iterate(TBMIterator *iterator); extern void tbm_end_shared_iterate(TBMSharedIterator *iterator); extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa, -- 2.40.1