Re: Index Skip Scan - Mailing list pgsql-hackers
From | Kyotaro Horiguchi |
---|---|
Subject | Re: Index Skip Scan |
Date | |
Msg-id | 20200214.172313.259958375378013155.horikyota.ntt@gmail.com Whole thread Raw |
In response to | Re: Index Skip Scan (Tomas Vondra <tomas.vondra@2ndquadrant.com>) |
Responses |
Re: Index Skip Scan
|
List | pgsql-hackers |
Thank you very much for the benchmarking! A bit different topic from the latest branch.. At Sat, 8 Feb 2020 14:11:59 +0100, Tomas Vondra <tomas.vondra@2ndquadrant.com> wrote in > >Yes, I've mentioned that already in one of the previous emails :) The > >simplest way I see to achieve what we want is to do something like in > >attached modified version with a new hasDeclaredCursor field. It's not > >a > >final version though, but posted just for discussion, so feel free to > >suggest any improvements or alternatives. > > IMO the proper fix for this case (moving forward, reading backwards) > is > simply making it work by properly checking deleted tuples etc. Not > sure > why that would be so much complex (haven't tried implementing it)? I don't think it's not so complex. But I suspect that it might be a bit harder starting from the current shpae. The first attached (renamed to .txt not to confuse the cfbots) is a small patch that makes sure if _bt_readpage is called with the proper condition as written in its comment, that is, caller must have pinned and read-locked so->currPos.buf. This patch reveals many instances of breakage of the contract. The second is a crude fix the breakages, but the result seems far from neat.. I think we need rethinking taking modification of support functions into consideration. > I think making this depend on things like declared cursor etc. is > going > to be tricky, may easily be more complex than checking deleted tuples, > and the behavior may be quite surprising. Sure. regards. -- Kyotaro Horiguchi NTT Open Source Software Center From de129e5a261ed43f002c1684dc9d6575f3880b16 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Date: Thu, 6 Feb 2020 14:31:36 +0900 Subject: [PATCH 1/2] debug aid --- src/backend/access/nbtree/nbtsearch.c | 1 + src/backend/storage/buffer/bufmgr.c | 13 +++++++++++++ src/include/storage/bufmgr.h | 1 + 3 files changed, 15 insertions(+) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index c5f5d228f2..5cd97d8bb5 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1785,6 +1785,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) * used here; this function is what makes it good for currPos. */ Assert(BufferIsValid(so->currPos.buf)); + Assert(BufferLockAndPinHeldByMe(so->currPos.buf)); page = BufferGetPage(so->currPos.buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index aba3960481..08a75a6846 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1553,6 +1553,19 @@ ReleaseAndReadBuffer(Buffer buffer, return ReadBuffer(relation, blockNum); } +/* tmp function for debugging */ +bool +BufferLockAndPinHeldByMe(Buffer buffer) +{ + BufferDesc *b = GetBufferDescriptor(buffer - 1); + + if (BufferIsPinned(buffer) && + LWLockHeldByMe(BufferDescriptorGetContentLock(b))) + return true; + + return false; +} + /* * PinBuffer -- make buffer unavailable for replacement. * diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 73c7e9ba38..8e5fc639a0 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -177,6 +177,7 @@ extern void MarkBufferDirty(Buffer buffer); extern void IncrBufferRefCount(Buffer buffer); extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum); +extern bool BufferLockAndPinHeldByMe(Buffer buffer); extern void InitBufferPool(void); extern void InitBufferPoolAccess(void); -- 2.18.2 From 912bad2ec8c66ccd01cebf1f69233b004c633243 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyota.ntt@gmail.com> Date: Thu, 6 Feb 2020 19:09:09 +0900 Subject: [PATCH 2/2] crude fix --- src/backend/access/nbtree/nbtsearch.c | 43 +++++++++++++++++---------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 5cd97d8bb5..1f18b38ca5 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1619,6 +1619,9 @@ _bt_skip(IndexScanDesc scan, ScanDirection dir, nextOffset = startOffset = ItemPointerGetOffsetNumber(&scan->xs_itup->t_tid); + if (nextOffset != startOffset) + LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); + while (nextOffset == startOffset) { IndexTuple itup; @@ -1653,7 +1656,7 @@ _bt_skip(IndexScanDesc scan, ScanDirection dir, offnum = OffsetNumberPrev(offnum); /* Check if _bt_readpage returns already found item */ - if (!_bt_readpage(scan, indexdir, offnum)) + if (!_bt_readpage(scan, dir, offnum)) { /* * There's no actually-matching data on this page. Try to @@ -1668,6 +1671,8 @@ _bt_skip(IndexScanDesc scan, ScanDirection dir, return false; } } + else + LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); currItem = &so->currPos.items[so->currPos.lastItem]; itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); @@ -1721,24 +1726,30 @@ _bt_skip(IndexScanDesc scan, ScanDirection dir, } /* Now read the data */ - if (!_bt_readpage(scan, indexdir, offnum)) + if (!(ScanDirectionIsForward(dir) && + ScanDirectionIsBackward(indexdir)) || + scanstart) { - /* - * There's no actually-matching data on this page. Try to advance to - * the next page. Return false if there's no matching data at all. - */ - LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); - if (!_bt_steppage(scan, dir)) + if (!_bt_readpage(scan, dir, offnum)) { - pfree(so->skipScanKey); - so->skipScanKey = NULL; - return false; + /* + * There's no actually-matching data on this page. Try to advance + * to the next page. Return false if there's no matching data at + * all. + */ + LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); + if (!_bt_steppage(scan, dir)) + { + pfree(so->skipScanKey); + so->skipScanKey = NULL; + return false; + } + } + else + { + /* Drop the lock, and maybe the pin, on the current page */ + LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); } - } - else - { - /* Drop the lock, and maybe the pin, on the current page */ - LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); } /* And set IndexTuple */ -- 2.18.2
pgsql-hackers by date: