From 417e286dce29aafed6de08784da3cb32db205844 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Fri, 14 Jun 2024 17:09:50 -0400 Subject: [PATCH v21 19/20] Separate TBMIterator and TBMIterateResult Remove the TBMIterateResult from the TBMSerialIterator and TBMSharedIterator and have tbm_[shared_]iterate() take a TBMIterateResult as a parameter. This will allow multiple TBMIterateResults to exist concurrently allowing asynchronous use of the TIDBitmap for prefetching. tbm_[shared]_iterate() now sets blockno to InvalidBlockNumber when the bitmap is exhausted instead of returning NULL. BitmapHeapScan callers of tbm_iterate make a TBMIterateResult locally and pass it in. Because GIN only needs a single TBMIterateResult, inline the matchResult in the GinScanEntry to avoid having to separately manage memory for the TBMIterateResult. --- src/backend/access/gin/ginget.c | 49 +++++++++------ src/backend/access/gin/ginscan.c | 2 +- src/backend/access/heap/heapam_handler.c | 66 ++++++++++---------- src/backend/nodes/tidbitmap.c | 77 ++++++++++++------------ src/include/access/gin_private.h | 2 +- src/include/nodes/tidbitmap.h | 6 +- 6 files changed, 108 insertions(+), 94 deletions(-) diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 0d66381df1e..235a465362d 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -332,7 +332,20 @@ restartScanEntry: entry->list = NULL; entry->nlist = 0; entry->matchBitmap = NULL; - entry->matchResult = NULL; + + /* + * MTODO: is it enough to set blockno to InvalidBlockNumber? In all the + * places were we previously set matchResult to NULL, I just set blockno + * to InvalidBlockNumber. It seems like this should be okay because that + * is usually what we check before using the matchResult members. But it + * might be safer to zero out the offsets array. But that is expensive. + */ + entry->matchResult.blockno = InvalidBlockNumber; + entry->matchResult.ntuples = 0; + entry->matchResult.recheck = true; + memset(entry->matchResult.offsets, 0, + sizeof(OffsetNumber) * MaxHeapTuplesPerPage); + entry->reduceResult = false; entry->predictNumberResult = 0; @@ -374,6 +387,7 @@ restartScanEntry: { if (entry->matchIterator) tbm_end_serial_iterate(entry->matchIterator); + entry->matchResult.blockno = InvalidBlockNumber; entry->matchIterator = NULL; tbm_free(entry->matchBitmap); entry->matchBitmap = NULL; @@ -823,18 +837,19 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, { /* * If we've exhausted all items on this block, move to next block - * in the bitmap. + * in the bitmap. tbm_serial_iterate() sets matchResult->blockno + * to InvalidBlockNumber when the bitmap is exhausted. */ - while (entry->matchResult == NULL || - (entry->matchResult->ntuples >= 0 && - entry->offset >= entry->matchResult->ntuples) || - entry->matchResult->blockno < advancePastBlk || + while ((!BlockNumberIsValid(entry->matchResult.blockno)) || + (entry->matchResult.ntuples >= 0 && + entry->offset >= entry->matchResult.ntuples) || + entry->matchResult.blockno < advancePastBlk || (ItemPointerIsLossyPage(&advancePast) && - entry->matchResult->blockno == advancePastBlk)) + entry->matchResult.blockno == advancePastBlk)) { - entry->matchResult = tbm_serial_iterate(entry->matchIterator); + tbm_serial_iterate(entry->matchIterator, &entry->matchResult); - if (entry->matchResult == NULL) + if (!BlockNumberIsValid(entry->matchResult.blockno)) { ItemPointerSetInvalid(&entry->curItem); tbm_end_serial_iterate(entry->matchIterator); @@ -858,10 +873,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, * We're now on the first page after advancePast which has any * items on it. If it's a lossy result, return that. */ - if (entry->matchResult->ntuples < 0) + if (entry->matchResult.ntuples < 0) { ItemPointerSetLossyPage(&entry->curItem, - entry->matchResult->blockno); + entry->matchResult.blockno); /* * We might as well fall out of the loop; we could not @@ -875,27 +890,27 @@ entryGetItem(GinState *ginstate, GinScanEntry entry, * Not a lossy page. Skip over any offsets <= advancePast, and * return that. */ - if (entry->matchResult->blockno == advancePastBlk) + if (entry->matchResult.blockno == advancePastBlk) { /* * First, do a quick check against the last offset on the * page. If that's > advancePast, so are all the other * offsets, so just go back to the top to get the next page. */ - if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff) + if (entry->matchResult.offsets[entry->matchResult.ntuples - 1] <= advancePastOff) { - entry->offset = entry->matchResult->ntuples; + entry->offset = entry->matchResult.ntuples; continue; } /* Otherwise scan to find the first item > advancePast */ - while (entry->matchResult->offsets[entry->offset] <= advancePastOff) + while (entry->matchResult.offsets[entry->offset] <= advancePastOff) entry->offset++; } ItemPointerSet(&entry->curItem, - entry->matchResult->blockno, - entry->matchResult->offsets[entry->offset]); + entry->matchResult.blockno, + entry->matchResult.offsets[entry->offset]); entry->offset++; /* Done unless we need to reduce the result */ diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 46dde99514d..43bb6c7ac4b 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -106,7 +106,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, ItemPointerSetMin(&scanEntry->curItem); scanEntry->matchBitmap = NULL; scanEntry->matchIterator = NULL; - scanEntry->matchResult = NULL; + scanEntry->matchResult.blockno = InvalidBlockNumber; scanEntry->list = NULL; scanEntry->nlist = 0; scanEntry->offset = InvalidOffsetNumber; diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 973eaf6d713..aa862348672 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2132,17 +2132,19 @@ BitmapPrefetch(BitmapHeapScanDesc *scan) { while (scan->prefetch_pages < scan->prefetch_target) { - TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); + TBMIterateResult tbmpre; bool skip_fetch; - if (tbmpre == NULL) + tbm_iterate(prefetch_iterator, &tbmpre); + + if (!BlockNumberIsValid(tbmpre.blockno)) { /* No more pages to prefetch */ tbm_end_iterate(prefetch_iterator); break; } scan->prefetch_pages++; - scan->pfblock = tbmpre->blockno; + scan->pfblock = tbmpre.blockno; /* * If we expect not to have to actually read this heap page, @@ -2151,13 +2153,13 @@ BitmapPrefetch(BitmapHeapScanDesc *scan) * prefetch_pages?) */ skip_fetch = (!(scan->base.flags & SO_NEED_TUPLES) && - !tbmpre->recheck && + !tbmpre.recheck && VM_ALL_VISIBLE(scan->base.rel, - tbmpre->blockno, + tbmpre.blockno, &scan->pvmbuffer)); if (!skip_fetch) - PrefetchBuffer(scan->base.rel, MAIN_FORKNUM, tbmpre->blockno); + PrefetchBuffer(scan->base.rel, MAIN_FORKNUM, tbmpre.blockno); } } @@ -2172,7 +2174,7 @@ BitmapPrefetch(BitmapHeapScanDesc *scan) { while (1) { - TBMIterateResult *tbmpre; + TBMIterateResult tbmpre; bool do_prefetch = false; bool skip_fetch; @@ -2191,25 +2193,25 @@ BitmapPrefetch(BitmapHeapScanDesc *scan) if (!do_prefetch) return; - tbmpre = tbm_iterate(prefetch_iterator); - if (tbmpre == NULL) + tbm_iterate(prefetch_iterator, &tbmpre); + if (!BlockNumberIsValid(tbmpre.blockno)) { /* No more pages to prefetch */ tbm_end_iterate(prefetch_iterator); break; } - scan->pfblock = tbmpre->blockno; + scan->pfblock = tbmpre.blockno; /* As above, skip prefetch if we expect not to need page */ skip_fetch = (!(scan->base.flags & SO_NEED_TUPLES) && - !tbmpre->recheck && + !tbmpre.recheck && VM_ALL_VISIBLE(scan->base.rel, - tbmpre->blockno, + tbmpre.blockno, &scan->pvmbuffer)); if (!skip_fetch) - PrefetchBuffer(scan->base.rel, MAIN_FORKNUM, tbmpre->blockno); + PrefetchBuffer(scan->base.rel, MAIN_FORKNUM, tbmpre.blockno); } } } @@ -2228,7 +2230,7 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanDesc *scan) { #ifdef USE_PREFETCH ParallelBitmapHeapState *pstate = scan->pstate; - TBMIterateResult *tbmpre; + TBMIterateResult tbmpre; if (pstate == NULL) { @@ -2242,8 +2244,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanDesc *scan) else if (!prefetch_iterator->exhausted) { /* Do not let the prefetch iterator get behind the main one */ - tbmpre = tbm_iterate(prefetch_iterator); - scan->pfblock = tbmpre ? tbmpre->blockno : InvalidBlockNumber; + tbm_iterate(prefetch_iterator, &tbmpre); + scan->pfblock = tbmpre.blockno; } return; } @@ -2282,8 +2284,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanDesc *scan) */ if (!prefetch_iterator->exhausted) { - tbmpre = tbm_iterate(prefetch_iterator); - scan->pfblock = tbmpre ? tbmpre->blockno : InvalidBlockNumber; + tbm_iterate(prefetch_iterator, &tbmpre); + scan->pfblock = tbmpre.blockno; } } } @@ -2354,7 +2356,7 @@ heapam_scan_bitmap_next_block(BitmapHeapScanDesc *scan, Buffer buffer; Snapshot snapshot; int ntup; - TBMIterateResult *tbmres; + TBMIterateResult tbmres; scan->vis_idx = 0; scan->vis_ntuples = 0; @@ -2367,9 +2369,9 @@ heapam_scan_bitmap_next_block(BitmapHeapScanDesc *scan, { CHECK_FOR_INTERRUPTS(); - tbmres = tbm_iterate(&scan->iterator); + tbm_iterate(&scan->iterator, &tbmres); - if (tbmres == NULL) + if (!BlockNumberIsValid(tbmres.blockno)) return false; /* @@ -2380,11 +2382,11 @@ heapam_scan_bitmap_next_block(BitmapHeapScanDesc *scan, * isolation though, as we need to examine all invisible tuples * reachable by the index. */ - } while (!IsolationIsSerializable() && tbmres->blockno >= scan->nblocks); + } while (!IsolationIsSerializable() && tbmres.blockno >= scan->nblocks); /* Got a valid block */ - block = tbmres->blockno; - *recheck = tbmres->recheck; + block = tbmres.blockno; + *recheck = tbmres.recheck; /* * We can skip fetching the heap page if we don't need any fields from the @@ -2392,14 +2394,14 @@ heapam_scan_bitmap_next_block(BitmapHeapScanDesc *scan, * page are visible to our transaction. */ if (!(scan->base.flags & SO_NEED_TUPLES) && - !tbmres->recheck && - VM_ALL_VISIBLE(scan->base.rel, tbmres->blockno, &scan->vmbuffer)) + !tbmres.recheck && + VM_ALL_VISIBLE(scan->base.rel, tbmres.blockno, &scan->vmbuffer)) { /* can't be lossy in the skip_fetch case */ - Assert(tbmres->ntuples >= 0); + Assert(tbmres.ntuples >= 0); Assert(scan->empty_tuples_pending >= 0); - scan->empty_tuples_pending += tbmres->ntuples; + scan->empty_tuples_pending += tbmres.ntuples; return true; } @@ -2431,7 +2433,7 @@ heapam_scan_bitmap_next_block(BitmapHeapScanDesc *scan, /* * We need two separate strategies for lossy and non-lossy cases. */ - if (tbmres->ntuples >= 0) + if (tbmres.ntuples >= 0) { /* * Bitmap is non-lossy, so we just look through the offsets listed in @@ -2440,9 +2442,9 @@ heapam_scan_bitmap_next_block(BitmapHeapScanDesc *scan, */ int curslot; - for (curslot = 0; curslot < tbmres->ntuples; curslot++) + for (curslot = 0; curslot < tbmres.ntuples; curslot++) { - OffsetNumber offnum = tbmres->offsets[curslot]; + OffsetNumber offnum = tbmres.offsets[curslot]; ItemPointerData tid; HeapTupleData heapTuple; @@ -2492,7 +2494,7 @@ heapam_scan_bitmap_next_block(BitmapHeapScanDesc *scan, Assert(ntup <= MaxHeapTuplesPerPage); scan->vis_ntuples = ntup; - if (tbmres->ntuples < 0) + if (tbmres.ntuples < 0) (*lossy_pages)++; else (*exact_pages)++; diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c index 8af2193b7ec..653c0133455 100644 --- a/src/backend/nodes/tidbitmap.c +++ b/src/backend/nodes/tidbitmap.c @@ -172,7 +172,6 @@ struct TBMSerialIterator int spageptr; /* next spages index */ int schunkptr; /* next schunks index */ int schunkbit; /* next bit to check in current schunk */ - TBMIterateResult output; }; /* @@ -213,7 +212,6 @@ struct TBMSharedIterator PTEntryArray *ptbase; /* pagetable element array */ PTIterationArray *ptpages; /* sorted exact page index list */ PTIterationArray *ptchunks; /* sorted lossy page index list */ - TBMIterateResult output; }; /* Local function prototypes */ @@ -948,22 +946,22 @@ tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp) /* * tbm_serial_iterate - scan through next page of a TIDBitmap * - * Returns a TBMIterateResult representing one page, or NULL if there are - * no more pages to scan. Pages are guaranteed to be delivered in numerical - * order. If result->ntuples < 0, then the bitmap is "lossy" and failed to - * remember the exact tuples to look at on this page --- the caller must - * examine all tuples on the page and check if they meet the intended - * condition. If result->recheck is true, only the indicated tuples need - * be examined, but the condition must be rechecked anyway. (For ease of - * testing, recheck is always set true when ntuples < 0.) + * Caller must pass in a TBMIterateResult to be filled. + * + * Pages are guaranteed to be delivered in numerical order. If result->ntuples + * < 0, then the bitmap is "lossy" and failed to remember the exact tuples to + * look at on this page --- the caller must examine all tuples on the page and + * check if they meet the intended condition. If result->recheck is true, only + * the indicated tuples need be examined, but the condition must be rechecked + * anyway. (For ease of testing, recheck is always set true when ntuples < 0.) */ -TBMIterateResult * -tbm_serial_iterate(TBMSerialIterator *iterator) +void +tbm_serial_iterate(TBMSerialIterator *iterator, TBMIterateResult *tbmres) { TIDBitmap *tbm = iterator->tbm; - TBMIterateResult *output = &(iterator->output); Assert(tbm->iterating == TBM_ITERATING_PRIVATE); + Assert(tbmres); /* * If lossy chunk pages remain, make sure we've advanced schunkptr/ @@ -999,11 +997,11 @@ tbm_serial_iterate(TBMSerialIterator *iterator) chunk_blockno < tbm->spages[iterator->spageptr]->blockno) { /* Return a lossy page indicator from the chunk */ - output->blockno = chunk_blockno; - output->ntuples = -1; - output->recheck = true; + tbmres->blockno = chunk_blockno; + tbmres->ntuples = -1; + tbmres->recheck = true; iterator->schunkbit++; - return output; + return; } } @@ -1019,16 +1017,16 @@ tbm_serial_iterate(TBMSerialIterator *iterator) page = tbm->spages[iterator->spageptr]; /* scan bitmap to extract individual offset numbers */ - ntuples = tbm_extract_page_tuple(page, output); - output->blockno = page->blockno; - output->ntuples = ntuples; - output->recheck = page->recheck; + ntuples = tbm_extract_page_tuple(page, tbmres); + tbmres->blockno = page->blockno; + tbmres->ntuples = ntuples; + tbmres->recheck = page->recheck; iterator->spageptr++; - return output; + return; } /* Nothing more in the bitmap */ - return NULL; + tbmres->blockno = InvalidBlockNumber; } /* @@ -1038,10 +1036,9 @@ tbm_serial_iterate(TBMSerialIterator *iterator) * across multiple processes. We need to acquire the iterator LWLock, * before accessing the shared members. */ -TBMIterateResult * -tbm_shared_iterate(TBMSharedIterator *iterator) +void +tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres) { - TBMIterateResult *output = &iterator->output; TBMSharedIteratorState *istate = iterator->state; PagetableEntry *ptbase = NULL; int *idxpages = NULL; @@ -1092,13 +1089,13 @@ tbm_shared_iterate(TBMSharedIterator *iterator) chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno) { /* Return a lossy page indicator from the chunk */ - output->blockno = chunk_blockno; - output->ntuples = -1; - output->recheck = true; + tbmres->blockno = chunk_blockno; + tbmres->ntuples = -1; + tbmres->recheck = true; istate->schunkbit++; LWLockRelease(&istate->lock); - return output; + return; } } @@ -1108,21 +1105,21 @@ tbm_shared_iterate(TBMSharedIterator *iterator) int ntuples; /* scan bitmap to extract individual offset numbers */ - ntuples = tbm_extract_page_tuple(page, output); - output->blockno = page->blockno; - output->ntuples = ntuples; - output->recheck = page->recheck; + ntuples = tbm_extract_page_tuple(page, tbmres); + tbmres->blockno = page->blockno; + tbmres->ntuples = ntuples; + tbmres->recheck = page->recheck; istate->spageptr++; LWLockRelease(&istate->lock); - return output; + return; } LWLockRelease(&istate->lock); /* Nothing more in the bitmap */ - return NULL; + tbmres->blockno = InvalidBlockNumber; } /* @@ -1592,14 +1589,14 @@ tbm_end_iterate(TBMIterator *iterator) /* * Get the next TBMIterateResult from the shared or non-shared bitmap iterator. */ -TBMIterateResult * -tbm_iterate(TBMIterator *iterator) +void +tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres) { Assert(iterator); Assert(!iterator->exhausted); if (iterator->serial) - return tbm_serial_iterate(iterator->serial); + tbm_serial_iterate(iterator->serial, tbmres); else - return tbm_shared_iterate(iterator->parallel); + tbm_shared_iterate(iterator->parallel, tbmres); } diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index cfac6ca3923..d8a460d2e2f 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -353,7 +353,7 @@ typedef struct GinScanEntryData /* for a partial-match or full-scan query, we accumulate all TIDs here */ TIDBitmap *matchBitmap; TBMSerialIterator *matchIterator; - TBMIterateResult *matchResult; + TBMIterateResult matchResult; /* used for Posting list and one page in Posting tree */ ItemPointerData *list; diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h index 78e612b58cc..60c6eb41738 100644 --- a/src/include/nodes/tidbitmap.h +++ b/src/include/nodes/tidbitmap.h @@ -83,8 +83,8 @@ extern bool tbm_is_empty(const TIDBitmap *tbm); extern TBMSerialIterator *tbm_begin_serial_iterate(TIDBitmap *tbm); extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm); -extern TBMIterateResult *tbm_serial_iterate(TBMSerialIterator *iterator); -extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator); +extern void tbm_serial_iterate(TBMSerialIterator *iterator, TBMIterateResult *tbmres); +extern void tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres); extern void tbm_end_serial_iterate(TBMSerialIterator *iterator); extern void tbm_end_shared_iterate(TBMSharedIterator *iterator); extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa, @@ -94,7 +94,7 @@ extern long tbm_calculate_entries(double maxbytes); extern void tbm_begin_iterate(TBMIterator *iterator, TIDBitmap *tbm, dsa_area *dsa, dsa_pointer dsp); extern void tbm_end_iterate(TBMIterator *iterator); -extern TBMIterateResult *tbm_iterate(TBMIterator *iterator); +extern void tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres); #endif /* TIDBITMAP_H */ -- 2.34.1