From a7d4cafc92358e6095a48c0b42ccbe06b7b8bd5f Mon Sep 17 00:00:00 2001 From: Peter Geoghegan Date: Thu, 12 Sep 2019 16:19:54 -0700 Subject: [PATCH v13 2/3] Stop deduplicating when a page split is avoided. Currently this is a big loss for performance, especially with WAL-logged relations, though it barely affects total space utilization compared to recent versions of the patch. With incremental rewriting of the page and incremental WAL logging, this could actually be a win for performance. In any case it seems like a good thing for deduplication to be able to operate in a "goal-orientated" way. The exact details will need to be validated by extensive benchmarking. --- src/backend/access/nbtree/nbtinsert.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 52651fcbe4..f3b945edf9 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -2675,6 +2675,21 @@ _bt_dedup_one_page(Relation rel, Buffer buffer, Relation heapRel, pagesaving += _bt_dedup_insert(newpage, dedupState); } + /* + * When we have deduplicated enough to avoid page split, don't bother + * deduplicating any more items. + * + * FIXME: If rewriting the page and doing the WAL logging were + * incremental, we could actually break out of the loop and save real + * work. As things stand this is a loss for performance, but it + * barely affects space utilization. (The number of blocks are the + * same as before, except for rounding effects. The minimum number of + * items on each page for each index "increases" when this is enabled, + * however.) + */ + if (pagesaving >= newitemsz) + deduplicate = false; + pfree(dedupState->itupprev); dedupState->itupprev = CopyIndexTuple(itup); } -- 2.17.1