From 8571e2b0ee705ea7d1b8d2f285c361ea010e4d2d Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sat, 18 Nov 2023 12:05:53 +0100 Subject: [PATCH v20231124 4/7] reintroduce the LRU cache of recent blocks Useful for detecting sequential patterns, for which read-ahead works better than our prefetching, and checking shared buffers and page cache is not sufficient. --- src/backend/access/index/indexam.c | 137 +++++++++++++++++++++++++++++ src/include/access/genam.h | 30 +++++++ 2 files changed, 167 insertions(+) diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 54a704338f1..7456a69ab34 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -1082,6 +1082,125 @@ index_opclass_options(Relation indrel, AttrNumber attnum, Datum attoptions, return build_local_reloptions(&relopts, attoptions, validate); } +/* + * index_prefetch_is_sequential + * Track the block number and check if the I/O pattern is sequential, + * or if the same block was just prefetched. + * + * Prefetching is cheap, but for some access patterns the benefits are small + * compared to the extra overhead. In particular, for sequential access the + * read-ahead performed by the OS is very effective/efficient. Doing more + * prefetching is just increasing the costs. + * + * This tries to identify simple sequential patterns, so that we can skip + * the prefetching request. This is implemented by having a small queue + * of block numbers, and checking it before prefetching another block. + * + * We look at the preceding PREFETCH_SEQ_PATTERN_BLOCKS blocks, and see if + * they are sequential. We also check if the block is the same as the last + * request (which is not sequential). + * + * Note that the main prefetch queue is not really useful for this, as it + * stores TIDs while we care about block numbers. Consider a sorted table, + * with a perfectly sequential pattern when accessed through an index. Each + * heap page may have dozens of TIDs, but we need to check block numbers. + * We could keep enough TIDs to cover enough blocks, but then we also need + * to walk those when checking the pattern (in hot path). + * + * So instead, we maintain a small separate queue of block numbers, and we use + * this instead. + * + * Returns true if the block is in a sequential pattern (and so should not be + * prefetched), or false (not sequential, should be prefetched). + * + * XXX The name is a bit misleading, as it also adds the block number to the + * block queue and checks if the block is the same as the last one (which + * does not require a sequential pattern). + */ +static bool +index_prefetch_is_sequential(IndexPrefetch prefetch, BlockNumber block) +{ + int idx; + + /* + * If the block queue is empty, just store the block and we're done (it's + * neither a sequential pattern, neither recently prefetched block). + */ + if (prefetch->blockIndex == 0) + { + prefetch->blockItems[PREFETCH_BLOCK_INDEX(prefetch->blockIndex)] = block; + prefetch->blockIndex++; + return false; + } + + /* + * Check if it's the same as the immediately preceding block. We don't + * want to prefetch the same block over and over (which would happen for + * well correlated indexes). + * + * In principle we could rely on index_prefetch_add_cache doing this using + * the full cache, but this check is much cheaper and we need to look at + * the preceding block anyway, so we just do it. + * + * XXX Notice we haven't added the block to the block queue yet, and there + * is a preceding block (i.e. blockIndex-1 is valid). + */ + if (prefetch->blockItems[PREFETCH_BLOCK_INDEX(prefetch->blockIndex - 1)] == block) + return true; + + /* + * Add the block number to the queue. + * + * We do this before checking if the pattern, because we want to know + * about the block even if we end up skipping the prefetch. Otherwise we'd + * not be able to detect longer sequential pattens - we'd skip one block + * but then fail to skip the next couple blocks even in a perfect + * sequential pattern. This ocillation might even prevent the OS + * read-ahead from kicking in. + */ + prefetch->blockItems[PREFETCH_BLOCK_INDEX(prefetch->blockIndex)] = block; + prefetch->blockIndex++; + + /* + * Check if the last couple blocks are in a sequential pattern. We look + * for a sequential pattern of PREFETCH_SEQ_PATTERN_BLOCKS (4 by default), + * so we look for patterns of 5 pages (40kB) including the new block. + * + * XXX Perhaps this should be tied to effective_io_concurrency somehow? + * + * XXX Could it be harmful that we read the queue backwards? Maybe memory + * prefetching works better for the forward direction? + */ + for (int i = 1; i < PREFETCH_SEQ_PATTERN_BLOCKS; i++) + { + /* + * Are there enough requests to confirm a sequential pattern? We only + * consider something to be sequential after finding a sequence of + * PREFETCH_SEQ_PATTERN_BLOCKS blocks. + * + * FIXME Better to move this outside the loop. + */ + if (prefetch->blockIndex < i) + return false; + + /* + * Calculate index of the earlier block (we need to do -1 as we + * already incremented the index when adding the new block to the + * queue). + */ + idx = PREFETCH_BLOCK_INDEX(prefetch->blockIndex - i - 1); + + /* + * For a sequential pattern, blocks "k" step ago needs to have block + * number by "k" smaller compared to the current block. + */ + if (prefetch->blockItems[idx] != (block - i)) + return false; + } + + return true; +} + /* * index_prefetch * Prefetch the TID, unless it's sequential or recently prefetched. @@ -1172,8 +1291,26 @@ index_prefetch(IndexScanDesc scan, ItemPointer tid, bool skip_all_visible) return; } + /* + * Now also check if the blocks to prefetch are in a sequential pattern. + * We do it here because we need to do this check before PrefetchBuffer + * initiates the prefetch, and we it can't do this easily (as it doesn't + * know in what context it's called in). So we do it here. + * + * We use a tiny LRU cache and see if the blocks follow a sequential + * pattern - if it's the same as the previous block, or if the last + * couple blocks are a continguous sequence, we don't prefetch it. + */ + if (index_prefetch_is_sequential(prefetch, block)) + { + prefetch->countSkipSequential++; + return; + } + + /* XXX shouldn't this be before the VM / sequenqial check? */ prefetch->countAll++; + /* OK, try prefetching the block. */ result = PrefetchBuffer(scan->heapRelation, MAIN_FORKNUM, block); /* diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 9f33796fd29..9e2d77ef23b 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -235,6 +235,22 @@ extern HeapTuple systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction); extern void systable_endscan_ordered(SysScanDesc sysscan); +/* + * Cache of recently prefetched blocks, organized as a hash table of + * small LRU caches. Doesn't need to be perfectly accurate, but we + * aim to make false positives/negatives reasonably low. + */ +typedef struct PrefetchCacheEntry { + BlockNumber block; + uint64 request; +} PrefetchCacheEntry; + +/* + * Used to detect sequential patterns (to not prefetch in this case). + */ +#define PREFETCH_QUEUE_HISTORY 8 +#define PREFETCH_SEQ_PATTERN_BLOCKS 4 + typedef struct IndexPrefetchData { @@ -264,6 +280,20 @@ typedef struct IndexPrefetchData uint64 queueIndex; /* next TID to prefetch */ uint64 queueStart; /* first valid TID in queue */ uint64 queueEnd; /* first invalid (empty) TID in queue */ + + /* + * A couple of last prefetched blocks, used to check for certain access + * pattern and skip prefetching - e.g. for sequential access). + * + * XXX Separate from the main queue, because we only want to compare the + * block numbers, not the whole TID. In sequential access it's likely we + * read many items from each page, and we don't want to check many items + * (as that is much more expensive). + */ + BlockNumber blockItems[PREFETCH_QUEUE_HISTORY]; + uint64 blockIndex; /* index in the block (points to the first + * empty entry)*/ + } IndexPrefetchData; #define PREFETCH_QUEUE_INDEX(a) ((a) % (MAX_IO_CONCURRENCY)) -- 2.42.0