From fbb728a265464bd1c3d5e823f6bb23d98c8b3df0 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Thu, 19 Jul 2018 14:09:38 +0400 Subject: [PATCH 1/2] Physical GiST scan in VACUUM v11 --- src/backend/access/gist/gistvacuum.c | 231 +++++++++++++++++------------------ 1 file changed, 115 insertions(+), 116 deletions(-) diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 5948218c77..ff86f4491f 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -100,32 +100,10 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) typedef struct GistBDItem { - GistNSN parentlsn; - BlockNumber blkno; + Buffer buffer; struct GistBDItem *next; } GistBDItem; -static void -pushStackIfSplited(Page page, GistBDItem *stack) -{ - GISTPageOpaque opaque = GistPageGetOpaque(page); - - if (stack->blkno != GIST_ROOT_BLKNO && !XLogRecPtrIsInvalid(stack->parentlsn) && - (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page)) && - opaque->rightlink != InvalidBlockNumber /* sanity check */ ) - { - /* split page detected, install right link to the stack */ - - GistBDItem *ptr = (GistBDItem *) palloc(sizeof(GistBDItem)); - - ptr->blkno = opaque->rightlink; - ptr->parentlsn = stack->parentlsn; - ptr->next = stack->next; - stack->next = ptr; - } -} - - /* * Bulk deletion of all index entries pointing to a set of heap tuples and * check invalid tuples left after upgrade. @@ -138,9 +116,11 @@ IndexBulkDeleteResult * gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state) { - Relation rel = info->index; - GistBDItem *stack, - *ptr; + Relation rel = info->index; + BlockNumber npages; + bool needLock; + BlockNumber blkno; + GistNSN startNSN = GetInsertRecPtr(); /* first time through? */ if (stats == NULL) @@ -149,125 +129,144 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, stats->estimated_count = false; stats->num_index_tuples = 0; - stack = (GistBDItem *) palloc0(sizeof(GistBDItem)); - stack->blkno = GIST_ROOT_BLKNO; + /* + * Need lock unless it's local to this backend. + */ + needLock = !RELATION_IS_LOCAL(rel); - while (stack) + /* try to find deleted pages */ + if (needLock) + LockRelationForExtension(rel, ExclusiveLock); + npages = RelationGetNumberOfBlocks(rel); + if (needLock) + UnlockRelationForExtension(rel, ExclusiveLock); + + for (blkno = GIST_ROOT_BLKNO; blkno < npages; blkno++) { - Buffer buffer; - Page page; - OffsetNumber i, - maxoff; - IndexTuple idxtuple; - ItemId iid; + /* + * In case of concurrent splits we may need to jump back + * and vacuum pages again to prevent left dead tuples. + * These fields are here to implement tail recursion of + * these jumps. + */ + BlockNumber nextBlock = blkno; + bool needScan = true; + GistBDItem *bufferStack = NULL; - buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno, - RBM_NORMAL, info->strategy); - LockBuffer(buffer, GIST_SHARE); - gistcheckpage(rel, buffer); - page = (Page) BufferGetPage(buffer); + vacuum_delay_point(); - if (GistPageIsLeaf(page)) + while (needScan) { - OffsetNumber todelete[MaxOffsetNumber]; - int ntodelete = 0; + Buffer buffer; + Page page; + OffsetNumber i, + maxoff; + IndexTuple idxtuple; + ItemId iid; - LockBuffer(buffer, GIST_UNLOCK); - LockBuffer(buffer, GIST_EXCLUSIVE); + needScan = false; + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, nextBlock, RBM_NORMAL, + info->strategy); + /* + * We are not going to stay here for a long time, calling recursive algorithms. + * Especially for an internal page. So, agressivly grab an exclusive lock. + */ + LockBuffer(buffer, GIST_EXCLUSIVE); page = (Page) BufferGetPage(buffer); - if (stack->blkno == GIST_ROOT_BLKNO && !GistPageIsLeaf(page)) + + if (PageIsNew(page) || GistPageIsDeleted(page)) { - /* only the root can become non-leaf during relock */ UnlockReleaseBuffer(buffer); - /* one more check */ + /* TODO: Should not we record free page here? */ continue; } - /* - * check for split proceeded after look at parent, we should check - * it after relock - */ - pushStackIfSplited(page, stack); - - /* - * Remove deletable tuples from page - */ - maxoff = PageGetMaxOffsetNumber(page); - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + if (GistPageIsLeaf(page)) { - iid = PageGetItemId(page, i); - idxtuple = (IndexTuple) PageGetItem(page, iid); + OffsetNumber todelete[MaxOffsetNumber]; + int ntodelete = 0; + GISTPageOpaque opaque = GistPageGetOpaque(page); + + /* + * If this page was splitted after start of the VACUUM we have to + * revisit rightlink, if it points to block we already scanned. + * This is recursive revisit, should not be deep, but we check + * the possibility of stack overflow anyway. + */ + if ((GistFollowRight(page) || startNSN < GistPageGetNSN(page)) && + (opaque->rightlink != InvalidBlockNumber) && (opaque->rightlink < nextBlock)) + { + /* + * we will aquire locks by following rightlinks + * this lock coupling is allowed in GiST + */ + nextBlock = opaque->rightlink; + needScan = true; + } + + /* + * Remove deletable tuples from page + */ + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + { + iid = PageGetItemId(page, i); + idxtuple = (IndexTuple) PageGetItem(page, iid); - if (callback(&(idxtuple->t_tid), callback_state)) - todelete[ntodelete++] = i; - else - stats->num_index_tuples += 1; - } + if (callback(&(idxtuple->t_tid), callback_state)) + todelete[ntodelete++] = i; + else + stats->num_index_tuples += 1; + } - stats->tuples_removed += ntodelete; + stats->tuples_removed += ntodelete; - if (ntodelete) - { - START_CRIT_SECTION(); + /* We have dead tuples on the page */ + if (ntodelete) + { + START_CRIT_SECTION(); - MarkBufferDirty(buffer); + MarkBufferDirty(buffer); - PageIndexMultiDelete(page, todelete, ntodelete); - GistMarkTuplesDeleted(page); + PageIndexMultiDelete(page, todelete, ntodelete); + GistMarkTuplesDeleted(page); - if (RelationNeedsWAL(rel)) - { - XLogRecPtr recptr; + if (RelationNeedsWAL(rel)) + { + XLogRecPtr recptr; - recptr = gistXLogUpdate(buffer, - todelete, ntodelete, - NULL, 0, InvalidBuffer); - PageSetLSN(page, recptr); - } - else - PageSetLSN(page, gistGetFakeLSN(rel)); + recptr = gistXLogUpdate(buffer, + todelete, ntodelete, + NULL, 0, InvalidBuffer); + PageSetLSN(page, recptr); + } + else + PageSetLSN(page, gistGetFakeLSN(rel)); - END_CRIT_SECTION(); + END_CRIT_SECTION(); + } } - } - else - { - /* check for split proceeded after look at parent */ - pushStackIfSplited(page, stack); - - maxoff = PageGetMaxOffsetNumber(page); - - for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) + /* We should not unlock buffer if we are going to jump left */ + if (needScan) { - iid = PageGetItemId(page, i); - idxtuple = (IndexTuple) PageGetItem(page, iid); - - ptr = (GistBDItem *) palloc(sizeof(GistBDItem)); - ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); - ptr->parentlsn = BufferGetLSNAtomic(buffer); - ptr->next = stack->next; - stack->next = ptr; - - if (GistTupleIsInvalid(idxtuple)) - ereport(LOG, - (errmsg("index \"%s\" contains an inner tuple marked as invalid", - RelationGetRelationName(rel)), - errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."), - errhint("Please REINDEX it."))); + GistBDItem *ptr = (GistBDItem *) palloc(sizeof(GistBDItem)); + ptr->buffer = buffer; + ptr->next = bufferStack; + bufferStack = ptr; } + else + UnlockReleaseBuffer(buffer); + } + /* unlock stacked buffers */ + while (bufferStack) + { + UnlockReleaseBuffer(bufferStack->buffer); + bufferStack = bufferStack->next; } - - UnlockReleaseBuffer(buffer); - - ptr = stack->next; - pfree(stack); - stack = ptr; - - vacuum_delay_point(); } return stats; -- 2.15.2 (Apple Git-101.1)