From 71f59bd83e559df6b36720a4592bf3fdd689504a Mon Sep 17 00:00:00 2001 From: Ilia Evdokimov Date: Mon, 10 Nov 2025 16:11:43 +0300 Subject: [PATCH v1] Avoid duplicate MCV matching in eqjoinsel_semi and eqjoinsel_inner. Previously both eqjoinsel_inner() and eqjoinsel_semi() performed identical O(N^2) loops over MCV lists, even though the semi join case always follows the inner join case in eqjoinsel(). Now the MCV matching results from eqjoinsel_inner() are reused in eqjoinsel_semi() when possible (i.e., when the RHS MCV list is not clamped). This saves redundant computation and simplifies the code. Author: Ilia Evdokimov Reviewed-by: Tom Lane Reviewed-by: David Geier --- src/backend/utils/adt/selfuncs.c | 36 ++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index cb23ad52782..55cd0486bf9 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -154,7 +154,9 @@ static double eqjoinsel_inner(Oid opfuncoid, Oid collation, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, - bool have_mcvs1, bool have_mcvs2); + bool have_mcvs1, bool have_mcvs2, + double *matchfreq_mcvs1, double *matchfreq_mcvs2, + int *nmatches_mcvs); static double eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, @@ -162,6 +164,7 @@ static double eqjoinsel_semi(Oid opfuncoid, Oid collation, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2, + double matchfreq1, int nmatches, RelOptInfo *inner_rel); static bool estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, List **varinfos, double *ndistinct); @@ -2313,6 +2316,9 @@ eqjoinsel(PG_FUNCTION_ARGS) bool get_mcv_stats; bool join_is_reversed; RelOptInfo *inner_rel; + int nmatches_mcvs = 0; + double matchfreq_mcvs1 = 0.0; + double matchfreq_mcvs2 = 0.0; get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &join_is_reversed); @@ -2367,7 +2373,9 @@ eqjoinsel(PG_FUNCTION_ARGS) isdefault1, isdefault2, &sslot1, &sslot2, stats1, stats2, - have_mcvs1, have_mcvs2); + have_mcvs1, have_mcvs2, + &matchfreq_mcvs1, &matchfreq_mcvs2, + &nmatches_mcvs); switch (sjinfo->jointype) { @@ -2395,6 +2403,7 @@ eqjoinsel(PG_FUNCTION_ARGS) &sslot1, &sslot2, stats1, stats2, have_mcvs1, have_mcvs2, + matchfreq_mcvs1, nmatches_mcvs, inner_rel); else { @@ -2408,6 +2417,7 @@ eqjoinsel(PG_FUNCTION_ARGS) &sslot2, &sslot1, stats2, stats1, have_mcvs2, have_mcvs1, + matchfreq_mcvs2, nmatches_mcvs, inner_rel); } @@ -2455,7 +2465,9 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, - bool have_mcvs1, bool have_mcvs2) + bool have_mcvs1, bool have_mcvs2, + double *matchfreq_mcvs1, double *matchfreq_mcvs2, + int *nmatches_mcvs) { double selec; @@ -2595,6 +2607,11 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation, totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) / (nd1 - nmatches); + /* Save MCV match statistics for possible reuse by eqjoinsel_semi() */ + *matchfreq_mcvs1 = matchfreq1; + *matchfreq_mcvs2 = matchfreq2; + *nmatches_mcvs = nmatches; + /* * Use the smaller of the two estimates. This can be justified in * essentially the same terms as given below for the no-stats case: to @@ -2653,6 +2670,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2, + double matchfreq1, int nmatches, RelOptInfo *inner_rel) { double selec; @@ -2705,11 +2723,9 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation, bool *hasmatch1; bool *hasmatch2; double nullfrac1 = stats1->stanullfrac; - double matchfreq1, - uncertainfrac, + double uncertainfrac, uncertain; int i, - nmatches, clamped_nvalues2; /* @@ -2721,6 +2737,13 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation, */ clamped_nvalues2 = Min(sslot2->nvalues, nd2); + /* + * eqjoinsel_inner() normally already did the full MCV comparison, + * so we reuse its results unless RHS MCVs were clamped, in which + * case we must redo the loop for the reduced list. + */ + if (clamped_nvalues2 != sslot2->nvalues) + { fmgr_info(opfuncoid, &eqproc); /* @@ -2777,6 +2800,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation, CLAMP_PROBABILITY(matchfreq1); pfree(hasmatch1); pfree(hasmatch2); + } /* * Now we need to estimate the fraction of relation 1 that has at -- 2.34.1