From 54a24390f1137a77c9755a875774a75ae8cf2424 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Mon, 26 Jun 2023 15:39:11 +0300 Subject: [PATCH] Fixed the case of calculating underestimated cardinality for an LEFT JOIN with the restriction "IS NULL" in the clause. This error is caused by an incorrect calculation of selectivity in the "IS NULL" clause, since it took into account only table-level statistics without zero values in the results of the join operation. This patch fixes this by calculating the fraction of zero values on the right side of the join without of matching row on left. Co-authored-by: Alena Rybakina --- src/backend/utils/adt/selfuncs.c | 39 ++++++++++++++++++++++++++++---- src/include/nodes/pathnodes.h | 3 +++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index c4fcd0076ea..a0e3834453b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -153,7 +153,7 @@ static double eqjoinsel_inner(Oid opfuncoid, Oid collation, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, - bool have_mcvs1, bool have_mcvs2); + bool have_mcvs1, bool have_mcvs2, double *unmatched_frac); static double eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, @@ -1710,6 +1710,9 @@ nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); freq_null = stats->stanullfrac; + if (sjinfo) + freq_null = freq_null + sjinfo->unmatched_frac - freq_null * sjinfo->unmatched_frac; + switch (nulltesttype) { case IS_NULL: @@ -2313,13 +2316,24 @@ eqjoinsel(PG_FUNCTION_ARGS) } /* We need to compute the inner-join selectivity in all cases */ + /* + * calculate fraction of right without of matching row on left + * + * FIXME Should be restricted to JOIN_LEFT, we should have similar logic + * for JOIN_FULL. + * + * XXX Probably should calculate unmatched as fraction of the join result, + * not of the relation on the right (because the matched part can have more + * matches per row and thus grow). Not sure. Depends on how it's used later. + */ selec_inner = eqjoinsel_inner(opfuncoid, collation, &vardata1, &vardata2, nd1, nd2, isdefault1, isdefault2, &sslot1, &sslot2, stats1, stats2, - have_mcvs1, have_mcvs2); + have_mcvs1, have_mcvs2, + &sjinfo->unmatched_frac); switch (sjinfo->jointype) { @@ -2407,7 +2421,7 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, - bool have_mcvs1, bool have_mcvs2) + bool have_mcvs1, bool have_mcvs2, double *unmatched_frac) { double selec; @@ -2503,7 +2517,10 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation, } CLAMP_PROBABILITY(matchfreq1); CLAMP_PROBABILITY(unmatchfreq1); + + *unmatched_frac = unmatchfreq1; matchfreq2 = unmatchfreq2 = 0.0; + for (i = 0; i < sslot2->nvalues; i++) { if (hasmatch2[i]) @@ -2581,10 +2598,22 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation, double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0; selec = (1.0 - nullfrac1) * (1.0 - nullfrac2); - if (nd1 > nd2) - selec /= nd1; + + /* + * XXX Should this look at nullfrac on either side? Probably depends on + * if we're calculating fraction of NULLs or fraction of unmatched rows. + */ + // unmatchfreq = (1.0 - nullfrac1) * (1.0 - nullfrac2); + if (nd1 != nd2) + { + selec /= Max(nd1, nd2); + *unmatched_frac = abs(nd1 - nd2) * 1.0 / Max(nd1, nd2); + } else + { selec /= nd2; + *unmatched_frac = 0.0; + } } return selec; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index c17b53f7adb..6bc63e648e6 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -2853,6 +2853,9 @@ struct SpecialJoinInfo bool semi_can_hash; /* true if semi_operators are all hash */ List *semi_operators; /* OIDs of equality join operators */ List *semi_rhs_exprs; /* righthand-side expressions of these ops */ + + /* For outer join, fraction of rows without a match. */ + Selectivity unmatched_frac; }; /* -- 2.34.1