From cb9e115a3a665f8a6dcde5cee39b9fa46852dd7d Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Wed, 3 Oct 2018 10:40:47 -0700 Subject: [PATCH v6 6/6] DEBUG: Allow nbtree to use ASC heap TID order. When the macro BTREE_ASC_HEAP_TID is defined (uncommented), the patch will change the implementation to use ASC sort order rather than DESC sort order. This may be useful to reviewers. This patch is not proposed for inclusion in PostgreSQL; it's included for the convenience of reviewers. --- src/backend/access/nbtree/nbtinsert.c | 4 ++++ src/backend/access/nbtree/nbtsearch.c | 11 +++++++++++ src/backend/access/nbtree/nbtsort.c | 4 ++++ src/backend/access/nbtree/nbtutils.c | 12 ++++++++++++ src/backend/utils/sort/tuplesort.c | 10 ++++++++++ src/include/access/nbtree.h | 22 ++++++++++++++++++++++ 6 files changed, 63 insertions(+) diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 5bfafc0892..6336f90d4e 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -2396,7 +2396,11 @@ _bt_perfect_penalty(Relation rel, Page page, bool is_leaf, SplitMode mode, */ if (outerpenalty > IndexRelationGetNumberOfKeyAttributes(rel)) { +#ifndef BTREE_ASC_HEAP_TID if (P_FIRSTDATAKEY(opaque) == newitemoff) +#else + if (maxoff < newitemoff) +#endif *secondmode = SPLIT_SINGLE_VALUE; else { diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index d3ac408a6d..2a3c915085 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -637,8 +637,12 @@ _bt_tuple_compare(Relation rel, if (heapTid == NULL) return 1; +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ return ItemPointerCompare(heapTid, key->scantid); +#else + return ItemPointerCompare(key->scantid, heapTid); +#endif } /* @@ -1182,9 +1186,16 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) { key->scantid = &minscantid; +#ifndef BTREE_ASC_HEAP_TID /* Heap TID attribute uses DESC ordering */ ItemPointerSetBlockNumber(key->scantid, InvalidBlockNumber); ItemPointerSetOffsetNumber(key->scantid, InvalidOffsetNumber); +#else + /* Lowest possible block is 0 */ + ItemPointerSetBlockNumber(key->scantid, 0); + /* InvalidOffsetNumber less than any real offset */ + ItemPointerSetOffsetNumber(key->scantid, InvalidOffsetNumber); +#endif } /* diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index c8e0e75487..16416a97f9 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -1156,8 +1156,12 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) */ if (compare == 0) { +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ compare = ItemPointerCompare(&itup2->t_tid, &itup->t_tid); +#else + compare = ItemPointerCompare(&itup->t_tid, &itup2->t_tid); +#endif Assert(compare != 0); if (compare > 0) load1 = false; diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index aeca964716..7e4493cd8d 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -2237,7 +2237,14 @@ _bt_suffix_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright) */ pivotheaptid = (ItemPointer) ((char *) pivot + newsize - sizeof(ItemPointerData)); +#ifndef BTREE_ASC_HEAP_TID ItemPointerCopy(&lastleft->t_tid, pivotheaptid); +#else + /* Manufacture TID that's less than right TID, but only minimally */ + ItemPointerCopy(&firstright->t_tid, pivotheaptid); + ItemPointerSetOffsetNumber(pivotheaptid, + OffsetNumberPrev(ItemPointerGetOffsetNumber(pivotheaptid))); +#endif /* * Lehman and Yao require that the downlink to the right page, which is to @@ -2247,9 +2254,14 @@ _bt_suffix_truncate(Relation rel, IndexTuple lastleft, IndexTuple firstright) * split). */ +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ Assert(ItemPointerCompare(&lastleft->t_tid, pivotheaptid) >= 0); Assert(ItemPointerCompare(&firstright->t_tid, pivotheaptid) < 0); +#else + Assert(ItemPointerCompare(pivotheaptid, &lastleft->t_tid) >= 0); + Assert(ItemPointerCompare(pivotheaptid, &firstright->t_tid) < 0); +#endif BTreeTupleSetNAtts(pivot, nkeyatts); BTreeTupleSetAltHeapTID(pivot); diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index d0397008db..ee93912626 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -4066,17 +4066,27 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b, BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ if (blk1 != blk2) return (blk1 < blk2) ? 1 : -1; +#else + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; +#endif } { OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); +#ifndef BTREE_ASC_HEAP_TID /* Deliberately invert the order, since TIDs "sort DESC" */ if (pos1 != pos2) return (pos1 < pos2) ? 1 : -1; +#else + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; +#endif } return 0; diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 1e9869b30e..db6d850de8 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -117,6 +117,24 @@ typedef struct BTMetaPageData #define BTREE_VERSION 4 /* current version number */ #define BTREE_MIN_VERSION 4 /* minimal supported version number */ +/* + * Heap TID behaves as a final key value within nbtree as of + * BTREE_VERSION 4. This ensures that all entries keys are unique + * and relocatable. By default, heap TIDs are sorted in DESC sort + * order within nbtree indexes. ASC heap TID ordering may be + * useful during testing. + * + * DESC order was chosen because it allowed BTREE_VERSION 4 to + * maintain compatibility with unspecified BTREE_VERSION 2 + 3 + * behavior that dependency management nevertheless relied on. + * However, DESC order also seems like it might be slightly better + * on its own merits, since continually splitting the same leaf + * page may cut down on the total number of FPIs generated when + * continually inserting tuples with the same user-visible + * attribute values. +#define BTREE_ASC_HEAP_TID + */ + /* * Maximum size of a btree index entry, including its tuple header. * @@ -151,7 +169,11 @@ typedef struct BTMetaPageData #define BTREE_MIN_FILLFACTOR 10 #define BTREE_DEFAULT_FILLFACTOR 90 #define BTREE_NONLEAF_FILLFACTOR 70 +#ifndef BTREE_ASC_HEAP_TID #define BTREE_SINGLEVAL_FILLFACTOR 1 +#else +#define BTREE_SINGLEVAL_FILLFACTOR 99 +#endif /* * In general, the btree code tries to localize its knowledge about -- 2.17.1