From d6d85dc5c1160e4e6eba61543ac6f4e35c2c196f Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Mon, 17 Jun 2019 15:42:19 -0700 Subject: [PATCH 12/12] Index scan positioning DEBUG1 instrumentation --- src/backend/access/nbtree/nbtree.c | 41 ++++ src/backend/access/nbtree/nbtsearch.c | 277 +++++++++++++++++++++++++- src/backend/utils/adt/selfuncs.c | 2 + 3 files changed, 319 insertions(+), 1 deletion(-) diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 85e54ac44b..bf0bec795c 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -22,6 +22,7 @@ #include "access/nbtxlog.h" #include "access/relscan.h" #include "access/xlog.h" +#include "catalog/catalog.h" #include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" @@ -198,6 +199,10 @@ btinsert(Relation rel, Datum *values, bool *isnull, bool result; IndexTuple itup; + if (!IsCatalogRelation(rel)) + elog(DEBUG1, "%s call to btinsert()", + RelationGetRelationName(rel)); + /* generate an index tuple */ itup = index_form_tuple(RelationGetDescr(rel), values, isnull); itup->t_tid = *ht_ctid; @@ -218,6 +223,10 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; + if (!IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s call to btgettuple()", + RelationGetRelationName(scan->indexRelation)); + /* btree indexes are never lossy */ scan->xs_recheck = false; @@ -293,6 +302,10 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) int64 ntids = 0; ItemPointer heapTid; + if (!IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s call to btgetbitmap()", + RelationGetRelationName(scan->indexRelation)); + /* * If we have any array keys, initialize them. */ @@ -350,6 +363,10 @@ btbeginscan(Relation rel, int nkeys, int norderbys) IndexScanDesc scan; BTScanOpaque so; + if (!IsCatalogRelation(rel)) + elog(DEBUG1, "%s call to btbeginscan()", + RelationGetRelationName(rel)); + /* no order by operators allowed */ Assert(norderbys == 0); @@ -396,6 +413,10 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, { BTScanOpaque so = (BTScanOpaque) scan->opaque; + if (!IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s call to btrescan()", + RelationGetRelationName(scan->indexRelation)); + /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { @@ -455,6 +476,10 @@ btendscan(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; + if (!IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s call to btendscan()", + RelationGetRelationName(scan->indexRelation)); + /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { @@ -491,6 +516,10 @@ btmarkpos(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; + if (!IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s call to btmarkpos()", + RelationGetRelationName(scan->indexRelation)); + /* There may be an old mark with a pin (but no lock). */ BTScanPosUnpinIfPinned(so->markPos); @@ -521,6 +550,10 @@ btrestrpos(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; + if (!IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s call to btrestrpos()", + RelationGetRelationName(scan->indexRelation)); + /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); @@ -859,6 +892,10 @@ btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, Relation rel = info->index; BTCycleId cycleid; + if (!IsCatalogRelation(rel)) + elog(DEBUG1, "%s call to btbulkdelete()", + RelationGetRelationName(rel)); + /* allocate stats if first time through, else re-use existing struct */ if (stats == NULL) stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); @@ -900,6 +937,10 @@ btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) if (info->analyze_only) return stats; + if (!IsCatalogRelation(info->index)) + elog(DEBUG1, "%s call to btvacuumcleanup()", + RelationGetRelationName(info->index)); + /* * If btbulkdelete was called, we need not do anything, just return the * stats from the latest btbulkdelete call. If it wasn't called, we might diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index c655dadb96..e480d56ff2 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -17,6 +17,7 @@ #include "access/nbtree.h" #include "access/relscan.h" +#include "catalog/catalog.h" #include "miscadmin.h" #include "pgstat.h" #include "storage/predicate.h" @@ -67,6 +68,63 @@ _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp) } } +static void +print_itup(BlockNumber blk, IndexTuple left, IndexTuple right, Relation rel, + char *extra) +{ + bool isnull[INDEX_MAX_KEYS]; + Datum values[INDEX_MAX_KEYS]; + char *lkey_desc = NULL; + char *rkey_desc; + + /* Avoid infinite recursion -- don't instrument catalog indexes */ + if (!IsCatalogRelation(rel)) + { + TupleDesc itupdesc = RelationGetDescr(rel); + int natts; + int indnkeyatts = rel->rd_index->indnkeyatts; + + natts = BTreeTupleGetNAtts(left, rel); + itupdesc->natts = Min(indnkeyatts, natts); + memset(&isnull, 0xFF, sizeof(isnull)); + index_deform_tuple(left, itupdesc, values, isnull); + rel->rd_index->indnkeyatts = natts; + + /* + * Since the regression tests should pass when the instrumentation + * patch is applied, be prepared for BuildIndexValueDescription() to + * return NULL due to security considerations. + */ + lkey_desc = BuildIndexValueDescription(rel, values, isnull); + if (lkey_desc && right) + { + /* + * Revolting hack: modify tuple descriptor to have number of key + * columns actually present in caller's pivot tuples + */ + natts = BTreeTupleGetNAtts(right, rel); + itupdesc->natts = Min(indnkeyatts, natts); + memset(&isnull, 0xFF, sizeof(isnull)); + index_deform_tuple(right, itupdesc, values, isnull); + rel->rd_index->indnkeyatts = natts; + rkey_desc = BuildIndexValueDescription(rel, values, isnull); + elog(DEBUG1, "%s blk %u sk > %s, sk <= %s %s", + RelationGetRelationName(rel), blk, lkey_desc, rkey_desc, + extra); + pfree(rkey_desc); + } + else + elog(DEBUG1, "%s blk %u sk check %s %s", + RelationGetRelationName(rel), blk, lkey_desc, extra); + + /* Cleanup */ + itupdesc->natts = IndexRelationGetNumberOfAttributes(rel); + rel->rd_index->indnkeyatts = indnkeyatts; + if (lkey_desc) + pfree(lkey_desc); + } +} + /* * _bt_search() -- Search the tree for a particular scankey, * or more precisely for the first leaf page it could be on. @@ -113,6 +171,7 @@ _bt_search(Relation rel, BTScanInsert key, Buffer *bufP, int access, BlockNumber blkno; BlockNumber par_blkno; BTStack new_stack; + IndexTuple right; /* * Race -- the page we just grabbed may have split since we read its @@ -142,6 +201,40 @@ _bt_search(Relation rel, BTScanInsert key, Buffer *bufP, int access, offnum = _bt_binsrch(rel, key, *bufP); itemid = PageGetItemId(page, offnum); itup = (IndexTuple) PageGetItem(page, itemid); + + /* + * Every downlink is between two separator keys, provided you pretend + * that even rightmost pages have a positive infinity high key. The + * key to the left of the downlink is a strict lower bound for items + * that can be found by following the downlink, whereas the right + * separator is a <= bound. + */ + if (offnum == PageGetMaxOffsetNumber(page)) + { + /* + * XXX: This is correct even on rightmost page, since "high key" + * position item will be negative infinity item, which is printed + * blank. If you assume that even rightmost pages have a positive + * infinity high key (and don't expect the instrumentation of the + * tuple to say either positive or negative infinity) then it + * makes sense. + * + * An internal page with only one downlink is rare though possible + * (see comments above _bt_binsrch()). Note that even in that + * case there are two separators (positive and negative infinity). + */ + itemid = PageGetItemId(page, P_HIKEY); + right = (IndexTuple) PageGetItem(page, itemid); + print_itup(BufferGetBlockNumber(*bufP), itup, right, rel, + "(<= separator is high key)"); + } + else if (OffsetNumberNext(offnum) <= PageGetMaxOffsetNumber(page)) + { + itemid = PageGetItemId(page, OffsetNumberNext(offnum)); + right = (IndexTuple) PageGetItem(page, itemid); + print_itup(BufferGetBlockNumber(*bufP), itup, right, rel, ""); + } + blkno = BTreeInnerTupleGetDownLink(itup); par_blkno = BufferGetBlockNumber(*bufP); @@ -274,6 +367,9 @@ _bt_moveright(Relation rel, for (;;) { + IndexTuple hikey; + ItemId itemid; + page = BufferGetPage(buf); TestForOldSnapshot(snapshot, rel, page); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -305,14 +401,40 @@ _bt_moveright(Relation rel, continue; } - if (P_IGNORE(opaque) || _bt_compare(rel, key, page, P_HIKEY) >= cmpval) + if (P_IGNORE(opaque)) { /* step right one page */ + elog(DEBUG1, "%s blk %u must move right because page is ignorable", + RelationGetRelationName(rel), BufferGetBlockNumber(buf)); + buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access); + continue; + } + else if (_bt_compare(rel, key, page, P_HIKEY) >= cmpval) + { + /* + * Very unlikely to catch this -- repeated moving right at same + * point in index suggests corruption masked by moving right + */ + itemid = PageGetItemId(page, P_HIKEY); + hikey = (IndexTuple) PageGetItem(page, itemid); + print_itup(BufferGetBlockNumber(buf), hikey, NULL, rel, + "high key move right"); + /* step right one page */ buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access); continue; } else + { + /* + * No need to move right (common case), but report that to be + * consistent + */ + itemid = PageGetItemId(page, P_HIKEY); + hikey = (IndexTuple) PageGetItem(page, itemid); + print_itup(BufferGetBlockNumber(buf), hikey, NULL, rel, + "high key no move right"); break; + } } if (P_IGNORE(opaque)) @@ -778,7 +900,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * never be satisfied (eg, x == 1 AND x > 2). */ if (!so->qual_ok) + { + if (!IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s preprocessing determined that keys are contradictory", + RelationGetRelationName(scan->indexRelation)); return false; + } /* * For parallel scans, get the starting page from shared state. If the @@ -788,6 +915,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) */ if (scan->parallel_scan != NULL) { + /* XXX: No instrumentation for parallel scans */ status = _bt_parallel_seize(scan, &blkno); if (!status) return false; @@ -986,6 +1114,15 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) { bool match; + if (!IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s sk could not be formed, so descending to %s leaf page in whole index", + RelationGetRelationName(scan->indexRelation), + ScanDirectionIsForward(dir) ? "leftmost" : "rightmost"); + + /* + * Note that _bt_endpoint() will call _bt_readpage() -- it will be + * called, though not from usual place + */ match = _bt_endpoint(scan, dir); if (!match) @@ -1008,6 +1145,9 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) for (i = 0; i < keysCount; i++) { ScanKey cur = startKeys[i]; + Oid typOutput; + bool varlenatype; + char *val; Assert(cur->sk_attno == i + 1); @@ -1032,6 +1172,27 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) } memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData)); + /* Report row comparison header's first insertion scankey entry */ + if (!IsCatalogRelation(rel)) + { + if (subkey->sk_subtype != InvalidOid) + getTypeOutputInfo(subkey->sk_subtype, + &typOutput, &varlenatype); + else + getTypeOutputInfo(rel->rd_opcintype[i], + &typOutput, &varlenatype); + val = OidOutputFunctionCall(typOutput, subkey->sk_argument); + if (val) + { + elog(DEBUG1, "%s sk subkey attr %d val: %s (%s, %s)", + RelationGetRelationName(rel), subkey->sk_attno, val, + (subkey->sk_flags & SK_BT_NULLS_FIRST) != 0 ? "NULLS FIRST" : "NULLS LAST", + (subkey->sk_flags & SK_BT_DESC) != 0 ? "DESC" : "ASC"); + + pfree(val); + } + } + /* * If the row comparison is the last positioning key we accepted, * try to add additional keys from the lower-order row members. @@ -1065,6 +1226,32 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) memcpy(inskey.scankeys + keysCount, subkey, sizeof(ScanKeyData)); keysCount++; + + /* + * The need to separately report additional + * subkeys-as-insertion-scankey is an artifact of the way + * extra keys are added here more or less as an + * opportunistic optimization used when row comparison is + * the last positioning key. + */ + if (!IsCatalogRelation(rel)) + { + if (subkey->sk_subtype != InvalidOid) + getTypeOutputInfo(subkey->sk_subtype, + &typOutput, &varlenatype); + else + getTypeOutputInfo(rel->rd_opcintype[i], + &typOutput, &varlenatype); + val = OidOutputFunctionCall(typOutput, subkey->sk_argument); + if (val) + { + elog(DEBUG1, "%s sk extra subkey attr %d val: %s (%s, %s)", + RelationGetRelationName(rel), subkey->sk_attno, val, + (subkey->sk_flags & SK_BT_NULLS_FIRST) != 0 ? "NULLS FIRST" : "NULLS LAST", + (subkey->sk_flags & SK_BT_DESC) != 0 ? "DESC" : "ASC"); + pfree(val); + } + } if (subkey->sk_flags & SK_ROW_END) { used_all_subkeys = true; @@ -1139,6 +1326,38 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) cmp_proc, cur->sk_argument); } + + /* + * Most index scans have insertion scan key entries reported here + */ + if (!IsCatalogRelation(rel)) + { + if (!(cur->sk_flags & SK_ISNULL)) + { + if (cur->sk_subtype != InvalidOid) + getTypeOutputInfo(cur->sk_subtype, + &typOutput, &varlenatype); + else + getTypeOutputInfo(rel->rd_opcintype[i], + &typOutput, &varlenatype); + val = OidOutputFunctionCall(typOutput, cur->sk_argument); + if (val) + { + elog(DEBUG1, "%s sk attr %d val: %s (%s, %s)", + RelationGetRelationName(rel), i, val, + (cur->sk_flags & SK_BT_NULLS_FIRST) != 0 ? "NULLS FIRST" : "NULLS LAST", + (cur->sk_flags & SK_BT_DESC) != 0 ? "DESC" : "ASC"); + pfree(val); + } + } + else + { + elog(DEBUG1, "%s sk attr %d val: NULL (%s, %s)", + RelationGetRelationName(rel), i, + (cur->sk_flags & SK_BT_NULLS_FIRST) != 0 ? "NULLS FIRST" : "NULLS LAST", + (cur->sk_flags & SK_BT_DESC) != 0 ? "DESC" : "ASC"); + } + } } } @@ -1241,6 +1460,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) inskey.scantid = NULL; inskey.keysz = keysCount; + /* Report additional insertion scan key details */ + if (!IsCatalogRelation(rel)) + elog(DEBUG1, "%s searching tree with %d keys, nextkey=%d, goback=%d", + RelationGetRelationName(rel), inskey.keysz, inskey.nextkey, + goback); + /* * Use the manufactured insertion scan key to descend the tree and * position ourselves on the target leaf page. @@ -1297,6 +1522,13 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (goback) offnum = OffsetNumberPrev(offnum); + /* Report _bt_readpage()'s starting offset */ + if (!IsCatalogRelation(rel)) + elog(DEBUG1, "%s blk %u initial leaf page offset is %u out of %lu", + RelationGetRelationName(rel), + BufferGetBlockNumber(buf), offnum, + PageGetMaxOffsetNumber(BufferGetPage(buf))); + /* remember which buffer we have pinned, if any */ Assert(!BTScanPosIsValid(so->currPos)); so->currPos.buf = buf; @@ -1500,6 +1732,12 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) offnum = OffsetNumberNext(offnum); } + /* Report which offset/item terminated index scan */ + if (!continuescan && !IsCatalogRelation(scan->indexRelation)) + elog(DEBUG1, "%s blk %u non-pivot offnum %u ended forward scan", + RelationGetRelationName(scan->indexRelation), + BufferGetBlockNumber(so->currPos.buf), offnum); + /* * We don't need to visit page to the right when the high key * indicates that no more matches will be found there. @@ -1519,6 +1757,31 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation); _bt_checkkeys(scan, itup, truncatt, dir, &continuescan); + + /* + * Report if high key check was effective. This is also a + * reasonably useful way of indicating the progress of a large + * range scan that visits many leaf pages. + */ + if (continuescan) + print_itup(BufferGetBlockNumber(so->currPos.buf), itup, + NULL, scan->indexRelation, + "continuescan high key check did not end forward scan"); + else + print_itup(BufferGetBlockNumber(so->currPos.buf), itup, + NULL, scan->indexRelation, + "continuescan high key check ended forward scan"); + } + else if (continuescan && P_RIGHTMOST(opaque) && + !IsCatalogRelation(scan->indexRelation)) + { + /* + * Report that range scan reached end of entire index -- this + * won't be caught by above non-pivot elog(). + */ + elog(DEBUG1, "%s blk %u non-pivot offnum %u (last in whole index) ended forward scan", + RelationGetRelationName(scan->indexRelation), + BufferGetBlockNumber(so->currPos.buf), offnum); } if (!continuescan) @@ -1587,6 +1850,18 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) offnum = OffsetNumberPrev(offnum); } + if (!IsCatalogRelation(scan->indexRelation)) + { + if (!continuescan) + elog(DEBUG1, "%s blk %u non-pivot offnum %u ended backwards scan", + RelationGetRelationName(scan->indexRelation), + BufferGetBlockNumber(so->currPos.buf), offnum); + else + elog(DEBUG1, "%s blk %u backwards scan must continue to left sibling", + RelationGetRelationName(scan->indexRelation), + BufferGetBlockNumber(so->currPos.buf)); + } + Assert(itemIndex >= 0); so->currPos.firstItem = itemIndex; so->currPos.lastItem = MaxIndexTuplesPerPage - 1; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index d7e3f09f1a..dbcb2e2c27 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -5126,6 +5126,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, /* No hope if no relation or it doesn't have indexes */ if (rel == NULL || rel->indexlist == NIL) return false; + elog(DEBUG1, "*** Begin get_actual_variable_range() ***"); /* If it has indexes it must be a plain relation */ rte = root->simple_rte_array[rel->relid]; Assert(rte->rtekind == RTE_RELATION); @@ -5336,6 +5337,7 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, } } + elog(DEBUG1, "*** End get_actual_variable_range() ***"); return have_data; } -- 2.17.1