From 494a31e1ed7976e0f965a32e81c769e1c3dfad66 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 26 Jan 2016 18:14:33 +0100 Subject: [PATCH 8/9] change how we apply selectivity to number of groups estimate Instead of simply multiplying the ndistinct estimate with selecticity, we instead use the formula for the expected number of distinct values observed in 'k' rows when there are 'd' distinct values in the bin d * (1 - ((d - 1) / d)^k) This is 'with replacements' which seems appropriate for the use, and it mostly assumes uniform distribution of the distinct values. So if the distribution is not uniform (e.g. there are very frequent groups) this may be less accurate than the current algorithm in some cases, giving over-estimates. But that's probably better than OOM. --- src/backend/utils/adt/selfuncs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index f8d39aa..6eceedf 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3466,7 +3466,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, /* * Multiply by restriction selectivity. */ - reldistinct *= rel->rows / rel->tuples; + reldistinct = reldistinct * (1 - powl((reldistinct - 1) / reldistinct,rel->rows)); /* * Update estimate of total distinct groups. -- 2.5.0