From 624b4827e0d480fc16e016c1ad7c5b26f358b6f3 Mon Sep 17 00:00:00 2001 From: Ants Aasma Date: Sat, 28 Feb 2026 15:08:50 +0200 Subject: [PATCH v1] Decorrelate nested hash tables Because hash tables are iterated in hash order, using the same hash function in nested hash tables can lead to excessive collisions. If the parent hash table can be the same size as the child hash tables it is not a problem as the parent will quickly grow to the same size as the child, eliminating further collisions. But if there are more than one child the collisions can cause the table to quickly grow until it is below 10% fillfactor. The problem is made worse by nodeAgg devolving into building single entry batches once hash table size exceeds working memory. To hit the problem an aggregate node without a final function above multiple other aggregate nodes is needed. Because hash iv is already initialized using parallel worker number when there is no final fn the typical cases do not hit this problem. A hash aggregate implementing DISTINCT above multiple parallel workers with more groups than fit into memory is needed. Initializing the hash function based on plan node id decorrelates hash tables within a plan while still keeping behavior deterministic. Author: Ants Aasma Discussion: https://postgr.es/m/CANwKhkPOZupu3PYQVdkMmYjquYVqG2v8XmCAuuVM9Eu13-Zw3g%40mail.gmail.com --- src/backend/executor/execGrouping.c | 2 +- src/backend/executor/nodeAgg.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index c107514a85d..21f5e4cabcc 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -246,7 +246,7 @@ BuildTupleHashTable(PlanState *parent, * underestimated. */ if (use_variable_hash_iv) - hash_iv = murmurhash32(ParallelWorkerNumber); + hash_iv = hash_combine(ParallelWorkerNumber, parent->plan->plan_node_id); hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable); diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 7d487a165fa..5c947fd151f 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1535,7 +1535,7 @@ build_hash_table(AggState *aggstate, int setno, double nbuckets) metacxt, tuplescxt, tmpcxt, - DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit)); + true); } /* -- 2.51.0