From b4f9a5a23e79e70dea5946e2d70cb8aff269bf31 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Wed, 15 Apr 2020 11:39:51 -0700 Subject: [PATCH 1/2] Redefine split interval to be space-wise. --- src/backend/access/nbtree/nbtsplitloc.c | 86 +++++++++++++++++++------ 1 file changed, 68 insertions(+), 18 deletions(-) diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c index c850cd807c..91f1f8feea 100644 --- a/src/backend/access/nbtree/nbtsplitloc.c +++ b/src/backend/access/nbtree/nbtsplitloc.c @@ -17,10 +17,6 @@ #include "access/nbtree.h" #include "storage/lmgr.h" -/* limits on split interval (default strategy only) */ -#define MAX_LEAF_INTERVAL 9 -#define MAX_INTERNAL_INTERVAL 18 - typedef enum { /* strategy for searching through materialized list of split points */ @@ -71,6 +67,7 @@ static void _bt_recsplitloc(FindSplitData *state, static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult, bool usemult); static int _bt_splitcmp(const void *arg1, const void *arg2); +static int _bt_defaultinterval(FindSplitData *state); static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff, int leaffillfactor, bool *usemult); static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid); @@ -338,19 +335,6 @@ _bt_findsplitloc(Relation rel, fillfactormult = 0.50; } - /* - * Set an initial limit on the split interval/number of candidate split - * points as appropriate. The "Prefix B-Trees" paper refers to this as - * sigma l for leaf splits and sigma b for internal ("branch") splits. - * It's hard to provide a theoretical justification for the initial size - * of the split interval, though it's clear that a small split interval - * makes suffix truncation much more effective without noticeably - * affecting space utilization over time. - */ - state.interval = Min(Max(1, state.nsplits * 0.05), - state.is_leaf ? MAX_LEAF_INTERVAL : - MAX_INTERNAL_INTERVAL); - /* * Save leftmost and rightmost splits for page before original ordinal * sort order is lost by delta/fillfactormult sort @@ -361,6 +345,9 @@ _bt_findsplitloc(Relation rel, /* Give split points a fillfactormult-wise delta, and sort on deltas */ _bt_deltasortsplits(&state, fillfactormult, usemult); + /* Determine optimal default strategy split interval from sorted splits */ + state.interval = _bt_defaultinterval(&state); + /* * Determine if default strategy/split interval will produce a * sufficiently distinguishing split, or if we should change strategies. @@ -618,6 +605,69 @@ _bt_splitcmp(const void *arg1, const void *arg2) return 0; } +#define LEAF_SPLIT_DISTANCE 0.050 +#define INTERNAL_SPLIT_DISTANCE 0.075 + +/* + * Set an initial limit on the number of candidate split points we'll consider + * for the default strategy (i.e. the split interval). This is based on a + * maximum acceptable leftfree + rightfree divergence compared to the + * space-wise optimal split point (i.e. compared to the split point currently + * at the start of state's sorted "splits" array). + * + * The "Prefix B-Trees" paper refers to split interval as sigma l for leaf + * splits and sigma b for internal ("branch") splits. It's hard to provide a + * theoretical justification for the size of the split interval, though it's + * clear that a small split interval makes suffix truncation much more + * effective without noticeably affecting space utilization over time. + */ +static int +_bt_defaultinterval(FindSplitData *state) +{ + SplitPoint *spaceoptimal = state->splits; + int16 lowleftfree; + int16 lowrightfree; + int16 highleftfree; + int16 highrightfree; + int16 tolerance; + + /* + * Determine values that are higher and lower than we're willing to + * tolerate for both leftfree and rightfree. Note that the final split + * interval will be about 10% of nsplits in the common case where all + * non-pivot tuples (data items) from a leaf page are uniformly sized. + */ + if (state->is_leaf) + tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE; + else + tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE; + + lowleftfree = spaceoptimal->leftfree - tolerance; + lowrightfree = spaceoptimal->rightfree - tolerance; + highleftfree = spaceoptimal->leftfree + tolerance; + highrightfree = spaceoptimal->rightfree + tolerance; + + /* + * Iterate through sorted candidate split points starting from the one + * after the space optimal/first split, until we go too far. + * + * The final split interval excludes the first candidate split point that + * exceeds the space tolerance. Posting list tuples are frequently much + * larger than nearby tuples. It's important that the split interval ends + * just before any large outlier tuple (not after). + */ + for (int i = 1; i < state->nsplits; i++) + { + SplitPoint *split = state->splits + i; + + if (split->leftfree < lowleftfree || split->rightfree < lowrightfree || + split->leftfree > highleftfree || split->rightfree > highrightfree) + return i; + } + + return state->nsplits; +} + /* * Subroutine to determine whether or not a non-rightmost leaf page should be * split immediately after the would-be original page offset for the @@ -850,7 +900,7 @@ _bt_bestsplitloc(FindSplitData *state, int perfectpenalty, */ if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost && !final->newitemonleft && final->firstrightoff >= state->newitemoff && - final->firstrightoff < state->newitemoff + MAX_LEAF_INTERVAL) + final->firstrightoff < state->newitemoff + 9) { /* * Avoid the problem by performing a 50:50 split when the new item is -- 2.25.1