From 00e5314a357f80a54284b7b8fceed9b7eb9da7ce Mon Sep 17 00:00:00 2001 From: Zhenghua lyu Date: Fri, 15 Jul 2022 13:51:20 +0000 Subject: [PATCH] Adjust ndistinct with nrows in the rel when estimating join selectivity. Ndistinct is key to the accuracy of join selectivity estimation, which impacts the performance of complex SQLs a lot. Previous code in eqjoinsel does not take rel's restriction into account. A good math model should use the dependency of the Vars in rel's restrictions and the join var to estimate Ndistinct. At least there is a truth that Ndistinct should not be greater then the number of rows of the rel. --- src/backend/utils/adt/selfuncs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index fa1f589fad..bf6ef60b56 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2269,6 +2269,15 @@ eqjoinsel(PG_FUNCTION_ARGS) nd1 = get_variable_numdistinct(&vardata1, &isdefault1); nd2 = get_variable_numdistinct(&vardata2, &isdefault2); + /* + * Adjust ndistinct to account for restriction clauses. + * nd should not be greater than the number of rows in the relation. + */ + if (vardata1.rel) + nd1 = Min(nd1, vardata1.rel->rows); + if (vardata2.rel) + nd2 = Min(nd2, vardata2.rel->rows); + opfuncoid = get_opcode(operator); memset(&sslot1, 0, sizeof(sslot1)); -- 2.25.1