From ddccbe9247161cc280e2eec058e3618d46fa2e58 Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Tue, 6 Nov 2018 16:51:35 +0500 Subject: [PATCH] Implement different B-tree page layouts --- src/backend/access/nbtree/nbtree.c | 212 ++++++++++++++++++++++++++ src/backend/access/nbtree/nbtsearch.c | 19 +++ src/backend/storage/page/bufpage.c | 26 ++++ src/include/storage/bufpage.h | 1 + 4 files changed, 258 insertions(+) diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index e8725fbbe1..67271df1a6 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -1089,6 +1089,210 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, *oldestBtpoXact = vstate.oldestBtpoXact; } +static void +_bt_sequential_layout(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high) +{ + while (high > low) + { + order[*next] = low; + (*next)++; + low++; + } +} + +static void +_bt_metabtree_recursion(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high) +{ + if (high > low) + { + OffsetNumber low1, high1, mid_low, mid_high; + + OffsetNumber mid = low + ((high - low) / 2); + low1 = mid + 1; + high1 = mid; + mid_low = low + ((high1 - low) / 2); + mid_high = low1 + ((high - low1) / 2); + /* mid is already packed! */ + + if (mid_low != mid) + { + order[*next] = mid_low; + (*next)++; + } + if (mid_high != mid && mid_high != high) + { + order[*next] = mid_high; + (*next)++; + } + + _bt_metabtree_recursion(order, next, low, high1); + _bt_metabtree_recursion(order, next, low1, high); + + /* left here for reference + if (result >= cmpval) + low = mid + 1; + else + high = mid;*/ + } +} + +static void +_bt_metabtree_layout(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high) +{ + if (high > low) + { + OffsetNumber mid = low + ((high - low) / 2); + + order[*next] = mid; + (*next)++; + + _bt_metabtree_recursion(order, next, low, high); + + /* left here for reference + if (result >= cmpval) + low = mid + 1; + else + high = mid;*/ + } +} + +static void +_bt_eyzinger_recursion(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high) +{ + if (high > low) + { + OffsetNumber mid = low + ((high - low) / 2); + + order[*next] = mid; + (*next)++; + + _bt_eyzinger_recursion(order, next, low, mid); + _bt_eyzinger_recursion(order, next, mid + 1, high); + + /* left here for reference + if (result >= cmpval) + low = mid + 1; + else + high = mid;*/ + } +} + +static void +_bt_veb_recursion(OffsetNumber* order, int* next, OffsetNumber low, OffsetNumber high) +{ + if (high > low) + { + OffsetNumber low1, high1, mid_low, mid_high; + + OffsetNumber mid = low + ((high - low) / 2); + low1 = mid + 1; + high1 = mid; + mid_low = low + ((high1 - low) / 2); + mid_high = low1 + ((high - low1) / 2); + + order[*next] = mid; + (*next)++; + if (mid_low != mid) + { + order[*next] = mid_low; + (*next)++; + } + if (mid_high != high) + { + order[*next] = mid_high; + (*next)++; + } + + /* We have low <= mid < high, so mid points at a real slot */ + + _bt_veb_recursion(order, next, low, mid_low); + _bt_veb_recursion(order, next, mid_low + 1, mid); + _bt_veb_recursion(order, next, mid + 1, mid_high); + _bt_veb_recursion(order, next, mid_high + 1, high); + + /* left here for reference + if (result >= cmpval) + low = mid + 1; + else + high = mid;*/ + } +} + +static bool +_bt_check_layout(Page page, OffsetNumber* order) +{ + char busy[MaxOffsetNumber]; + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + for (int i = 0; i <= maxoff; i++) + { + busy[i] = 0; + } + + for (int i = 0; i < maxoff; i++) + { + OffsetNumber current = order[i]; + if (current > maxoff || current == InvalidOffsetNumber) + { + elog(ERROR,"Page layout is broken: incorrect offset number %u at %i", current, i); + } + if (busy[current]) + { + elog(ERROR,"Page layout is broken: offset number %u is used more than once at %i", current, i); + } + busy[current] = 1; + } + for (int i = FirstOffsetNumber; i <= maxoff; i++) + { + if (!busy[i]) + { + elog(ERROR,"Page layout is broken: offset number %u is not used", i); + } + } + return true; +} + +#define USE_EYZINGER_ORDER +static bool +_bt_prepare_layout(Page page, OffsetNumber* order) +{ + BTPageOpaque opaque; + OffsetNumber low, + high; + int next = 0; + + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + low = P_FIRSTDATAKEY(opaque); + high = PageGetMaxOffsetNumber(page); + + /* check if there is something to defrag*/ + if (high < low) + return false; + + high++; /* establish the loop invariant for high */ + +#ifdef USE_BT_ORDER + _bt_metabtree_layout(order, &next, low, high); +#elif defined(USE_VEB_ORDER) + _bt_veb_recursion(order, &next, low, high); +#elif defined(USE_EYZINGER_ORDER) + _bt_eyzinger_recursion(order, &next, low, high); +#elif defined(USE_SEQ_ORDER) + _bt_sequential_layout(order, &next, low, high); +#endif + + if (!P_RIGHTMOST(opaque)) + { + order[next] = P_HIKEY; + next++; + } + + Assert(next == PageGetMaxOffsetNumber(page)); + Assert(_bt_check_layout(page, order)); + + return true; +} + /* * btvacuumpage --- VACUUM one page * @@ -1114,6 +1318,8 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno) Page page; BTPageOpaque opaque = NULL; + OffsetNumber order[MaxOffsetNumber]; + restart: delete_now = false; recurse_to = P_NONE; @@ -1350,7 +1556,13 @@ restart: /* pagedel released buffer, so we shouldn't */ } else + { +#if defined(USE_BT_ORDER) || defined(USE_VEB_ORDER) || defined(USE_EYZINGER_ORDER) || defined(USE_SEQ_ORDER) + _bt_prepare_layout(page, order); + PageMakeSpecialFragmentation(page, order); +#endif _bt_relbuf(rel, buf); + } /* * This is really tail recursion, but if the compiler is too stupid to diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 8b2772c154..e7c5308349 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -399,6 +399,25 @@ _bt_binsrch(Relation rel, { OffsetNumber mid = low + ((high - low) / 2); +#define USE_EYZINGER_ORDER +#define USE_PREFETCH + +#ifdef USE_PREFETCH +#ifdef USE_BT_ORDER + /* in this case we only need one prefetch */ + OffsetNumber x = mid + 1 + ((high - mid + 1) / 2); + if (x < high) + __builtin_prefetch (PageGetItem(page, PageGetItemId(page, x)), 0, 2); +#else + OffsetNumber x = mid + 1 + ((high - mid + 1) / 2); + if (x < high) + __builtin_prefetch (PageGetItem(page, PageGetItemId(page, x)), 0, 2); + x = low + ((mid - low) / 2); + if (x > low) + __builtin_prefetch (PageGetItem(page, PageGetItemId(page, x)), 0, 2); +#endif +#endif + /* We have low <= mid < high, so mid points at a real slot */ result = _bt_compare(rel, keysz, scankey, page, mid); diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index dfbda5458f..e3f553478c 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -375,6 +375,32 @@ PageGetTempPageCopy(Page page) return temp; } +void +PageMakeSpecialFragmentation(Page page, uint16 *order) +{ + Page temp = PageGetTempPageCopy(page); + + PageHeader phdr = (PageHeader) page; + Offset upper; + int i; + int nitems = PageGetMaxOffsetNumber(page); + + upper = phdr->pd_special; + for (i = nitems - 1; i >= 0; i--) + { + ItemId lp = PageGetItemId(page, order[i]); + upper -= MAXALIGN(ItemIdGetLength(lp)); + memmove((char *) page + upper, + (char *) temp + ItemIdGetOffset(lp), + MAXALIGN(ItemIdGetLength(lp))); + lp->lp_off = upper; + } + + phdr->pd_upper = upper; + + pfree(temp); +} + /* * PageGetTempPageCopySpecial * Get a temporary page in local memory for special processing. diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 85dd10c45a..c6b31fa3e8 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -434,5 +434,6 @@ extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize); extern char *PageSetChecksumCopy(Page page, BlockNumber blkno); extern void PageSetChecksumInplace(Page page, BlockNumber blkno); +extern void PageMakeSpecialFragmentation(Page page, uint16 *order); #endif /* BUFPAGE_H */ -- 2.17.2 (Apple Git-113)