From d05c2491a542926cb878f2ce08e8dec108123195 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 12 Nov 2018 13:11:21 -0800 Subject: [PATCH v16 6/7] Add high key "continuescan" optimization. Teach B-Tree forward index scans to check the high key before moving to the next page in the hopes of finding that it isn't actually necessary to move to the next page. We already opportunistically force a key check of the last item on leaf pages, even when it's clear that it cannot be returned to the scan due to being dead-to-all, for the same reason. Since forcing the last item to be key checked no longer makes any difference in the case of forward scans, the existing extra key check is now only used for backwards scans. Like the existing check, the new check won't always work out, but that seems like an acceptable price to pay. The new approach is more effective than just checking non-pivot tuples, especially with composite indexes and non-unique indexes. The high key represents an upper bound on all values that can appear on the page, which is often greater than whatever tuple happens to appear last at the time of the check. Also, suffix truncation's new logic for picking a split point will often result in high keys that are relatively dissimilar to the other (non-pivot) tuples on the page, and therefore more likely to indicate that the scan need not proceed to the next page. Note that even pre-pg_upgrade'd v3 indexes make use of this optimization. --- src/backend/access/nbtree/nbtsearch.c | 23 +++++++-- src/backend/access/nbtree/nbtutils.c | 70 +++++++++++++++++++-------- 2 files changed, 68 insertions(+), 25 deletions(-) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 1e3c2f638c..9d5c9a9149 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -1371,6 +1371,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) _bt_parallel_release(scan, BufferGetBlockNumber(so->currPos.buf)); } + continuescan = true; /* default assumption */ minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); @@ -1419,16 +1420,30 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) _bt_saveitem(so, itemIndex, offnum, itup); itemIndex++; } + /* When !continuescan, there can't be any more matches, so stop */ if (!continuescan) - { - /* there can't be any more matches, so stop */ - so->currPos.moreRight = false; break; - } offnum = OffsetNumberNext(offnum); } + /* + * Forward scans need not visit page to the right when high key + * indicates no more matches will be found there. + * + * Checking the high key like this works out more often than you might + * think. Leaf page splits pick a split point between the two most + * dissimilar tuples (this is weighed against the need to evenly share + * free space). Leaf pages with high key attribute values that can + * only appear on non-pivot tuples on the right sibling page are + * common. + */ + if (continuescan && !P_RIGHTMOST(opaque)) + _bt_checkkeys(scan, page, P_HIKEY, dir, &continuescan); + + if (!continuescan) + so->currPos.moreRight = false; + Assert(itemIndex <= MaxIndexTuplesPerPage); so->currPos.firstItem = 0; so->currPos.lastItem = itemIndex - 1; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 1b09ab8d6a..ece82b44f9 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -48,7 +48,7 @@ static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, static bool _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption); static void _bt_mark_scankey_required(ScanKey skey); static bool _bt_check_rowcompare(ScanKey skey, - IndexTuple tuple, TupleDesc tupdesc, + IndexTuple tuple, int tupnatts, TupleDesc tupdesc, ScanDirection dir, bool *continuescan); static int _bt_keep_natts(Relation rel, IndexTuple lastleft, IndexTuple firstright, BTScanInsert itup_key); @@ -1345,11 +1345,14 @@ _bt_mark_scankey_required(ScanKey skey) * * scan: index scan descriptor (containing a search-type scankey) * page: buffer page containing index tuple - * offnum: offset number of index tuple (must be a valid item!) + * offnum: offset number of index tuple (must be hikey or a valid item!) * dir: direction we are scanning in * continuescan: output parameter (will be set correctly in all cases) * - * Caller must hold pin and lock on the index page. + * Caller must hold pin and lock on the index page. Caller can pass a high + * key offnum in the hopes of discovering that the scan need not continue on + * to a page to the right. We don't currently bother limiting high key + * comparisons to SK_BT_REQFWD scan keys. */ IndexTuple _bt_checkkeys(IndexScanDesc scan, @@ -1359,6 +1362,7 @@ _bt_checkkeys(IndexScanDesc scan, ItemId iid = PageGetItemId(page, offnum); bool tuple_alive; IndexTuple tuple; + int tupnatts; TupleDesc tupdesc; BTScanOpaque so; int keysz; @@ -1372,24 +1376,21 @@ _bt_checkkeys(IndexScanDesc scan, * killed tuple as not passing the qual. Most of the time, it's a win to * not bother examining the tuple's index keys, but just return * immediately with continuescan = true to proceed to the next tuple. - * However, if this is the last tuple on the page, we should check the - * index keys to prevent uselessly advancing to the next page. + * However, if this is the first tuple on the page, and we're doing a + * backward scan, we should check the index keys to prevent uselessly + * advancing to the page to the left. This is similar to the high key + * optimization used by forward scan callers. */ if (scan->ignore_killed_tuples && ItemIdIsDead(iid)) { - /* return immediately if there are more tuples on the page */ - if (ScanDirectionIsForward(dir)) - { - if (offnum < PageGetMaxOffsetNumber(page)) - return NULL; - } - else - { - BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); - if (offnum > P_FIRSTDATAKEY(opaque)) - return NULL; - } + /* forward scan callers check high key instead */ + Assert(offnum >= P_FIRSTDATAKEY(opaque)); + if (ScanDirectionIsForward(dir)) + return NULL; + else if (offnum > P_FIRSTDATAKEY(opaque)) + return NULL; /* * OK, we want to check the keys so we can set continuescan correctly, @@ -1401,6 +1402,7 @@ _bt_checkkeys(IndexScanDesc scan, tuple_alive = true; tuple = (IndexTuple) PageGetItem(page, iid); + tupnatts = BTreeTupleGetNAtts(tuple, scan->indexRelation); tupdesc = RelationGetDescr(scan->indexRelation); so = (BTScanOpaque) scan->opaque; @@ -1412,11 +1414,24 @@ _bt_checkkeys(IndexScanDesc scan, bool isNull; Datum test; - Assert(key->sk_attno <= BTreeTupleGetNAtts(tuple, scan->indexRelation)); + /* + * Assume that truncated attribute (from high key) passes the qual. + * The value of a truncated attribute for the first tuple on the right + * page could be any possible value, so we may have to visit the next + * page. + */ + if (key->sk_attno > tupnatts) + { + Assert(offnum == P_HIKEY); + Assert(ScanDirectionIsForward(dir)); + continue; + } + /* row-comparison keys need special processing */ if (key->sk_flags & SK_ROW_HEADER) { - if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan)) + if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir, + continuescan)) continue; return NULL; } @@ -1547,8 +1562,8 @@ _bt_checkkeys(IndexScanDesc scan, * This is a subroutine for _bt_checkkeys, which see for more info. */ static bool -_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, - ScanDirection dir, bool *continuescan) +_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, + TupleDesc tupdesc, ScanDirection dir, bool *continuescan) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); int32 cmpresult = 0; @@ -1565,6 +1580,19 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc, Assert(subkey->sk_flags & SK_ROW_MEMBER); + /* + * Assume that truncated attribute (from high key) passes the qual. + * The value of a truncated attribute for the first tuple on the right + * page could be any possible value, so we may have to visit the next + * page. + */ + if (subkey->sk_attno > tupnatts) + { + Assert(ScanDirectionIsForward(dir)); + cmpresult = 0; + continue; + } + datum = index_getattr(tuple, subkey->sk_attno, tupdesc, -- 2.17.1