Re: [BUGS] BUG #14676: neqsel is NULL dumb - Mailing list pgsql-bugs
From | Tom Lane |
---|---|
Subject | Re: [BUGS] BUG #14676: neqsel is NULL dumb |
Date | |
Msg-id | 25540.1496438079@sss.pgh.pa.us Whole thread Raw |
In response to | Re: [BUGS] BUG #14676: neqsel is NULL dumb (Tom Lane <tgl@sss.pgh.pa.us>) |
List | pgsql-bugs |
I wrote: > marko@joh.to writes: >> This seems to be because neqsel() doesn't take at all into account that both >> operators will exclude NULL rows, and does a simple 1.0 - eqsel(). > Yeah, that's clearly broken. A localized fix would be to re-fetch the > nullfrac statistic and subtract it off, but that seems pretty inefficient. > I'd be inclined to refactor things so that eqsel() and neqsel() call a > common routine that takes a "bool negate" argument, about like the way the > patternsel() functions work, and then the common routine could handle > the nullfrac correctly for both cases. Here's a proposed patch for that. I also fixed patternsel, but elected not to mess with neqjoinsel; partly because I think joining on inequality is so rare as to not be worth sweating over, and partly because I wasn't too sure what's the appropriate correction, especially for semijoins. Although this is clearly a bug fix, I'm leaning towards committing it only in HEAD; given the lack of previous field complaints, I fear back-patching might yield more complaints about destabilizing plans than kudos for fixing things. regards, tom lane diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 6e491bb..52d0e48 100644 *** a/src/backend/utils/adt/selfuncs.c --- b/src/backend/utils/adt/selfuncs.c *************** *** 154,165 **** get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; static double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, ! bool varonleft); static double var_eq_non_const(VariableStatData *vardata, Oid operator, Node *other, ! bool varonleft); static double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, --- 154,166 ---- get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; + static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); static double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, ! bool varonleft, bool negate); static double var_eq_non_const(VariableStatData *vardata, Oid operator, Node *other, ! bool varonleft, bool negate); static double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, *************** static List *add_predicate_to_quals(Inde *** 227,232 **** --- 228,242 ---- Datum eqsel(PG_FUNCTION_ARGS) { + PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false)); + } + + /* + * Common code for eqsel() and neqsel() + */ + static double + eqsel_internal(PG_FUNCTION_ARGS, bool negate) + { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); *************** eqsel(PG_FUNCTION_ARGS) *** 237,248 **** double selec; /* * If expression is not variable = something or something = variable, then * punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) ! PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * We can do a lot better if the something is a constant. (Note: the --- 247,272 ---- double selec; /* + * When asked about <>, we do the estimation using the corresponding = + * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac". + */ + if (negate) + { + operator = get_negator(operator); + if (!OidIsValid(operator)) + { + /* Use default selectivity (should we raise an error instead?) */ + return 1.0 - DEFAULT_EQ_SEL; + } + } + + /* * If expression is not variable = something or something = variable, then * punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) ! return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL; /* * We can do a lot better if the something is a constant. (Note: the *************** eqsel(PG_FUNCTION_ARGS) *** 253,266 **** selec = var_eq_const(&vardata, operator, ((Const *) other)->constvalue, ((Const *) other)->constisnull, ! varonleft); else selec = var_eq_non_const(&vardata, operator, other, ! varonleft); ReleaseVariableStats(vardata); ! PG_RETURN_FLOAT8((float8) selec); } /* --- 277,290 ---- selec = var_eq_const(&vardata, operator, ((Const *) other)->constvalue, ((Const *) other)->constisnull, ! varonleft, negate); else selec = var_eq_non_const(&vardata, operator, other, ! varonleft, negate); ReleaseVariableStats(vardata); ! return selec; } /* *************** eqsel(PG_FUNCTION_ARGS) *** 271,290 **** static double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, ! bool varonleft) { double selec; bool isdefault; Oid opfuncoid; /* * If the constant is NULL, assume operator is strict and return zero, ie, ! * operator will never return TRUE. */ if (constisnull) return 0.0; /* * If we matched the var to a unique index or DISTINCT clause, assume * there is exactly one match regardless of anything else. (This is * slightly bogus, since the index or clause's equality operator might be --- 295,327 ---- static double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, ! bool varonleft, bool negate) { double selec; + double nullfrac = 0.0; bool isdefault; Oid opfuncoid; /* * If the constant is NULL, assume operator is strict and return zero, ie, ! * operator will never return TRUE. (It's zero even for a negator op.) */ if (constisnull) return 0.0; /* + * Grab the nullfrac for use below. Note we allow use of nullfrac + * regardless of security check. + */ + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + nullfrac = stats->stanullfrac; + } + + /* * If we matched the var to a unique index or DISTINCT clause, assume * there is exactly one match regardless of anything else. (This is * slightly bogus, since the index or clause's equality operator might be *************** var_eq_const(VariableStatData *vardata, *** 292,302 **** * ignoring the information.) */ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) ! return 1.0 / vardata->rel->tuples; ! ! if (HeapTupleIsValid(vardata->statsTuple) && ! statistic_proc_security_check(vardata, ! (opfuncoid = get_opcode(operator)))) { Form_pg_statistic stats; AttStatsSlot sslot; --- 329,340 ---- * ignoring the information.) */ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) ! { ! selec = 1.0 / vardata->rel->tuples; ! } ! else if (HeapTupleIsValid(vardata->statsTuple) && ! statistic_proc_security_check(vardata, ! (opfuncoid = get_opcode(operator)))) { Form_pg_statistic stats; AttStatsSlot sslot; *************** var_eq_const(VariableStatData *vardata, *** 363,369 **** for (i = 0; i < sslot.nnumbers; i++) sumcommon += sslot.numbers[i]; ! selec = 1.0 - sumcommon - stats->stanullfrac; CLAMP_PROBABILITY(selec); /* --- 401,407 ---- for (i = 0; i < sslot.nnumbers; i++) sumcommon += sslot.numbers[i]; ! selec = 1.0 - sumcommon - nullfrac; CLAMP_PROBABILITY(selec); /* *************** var_eq_const(VariableStatData *vardata, *** 396,401 **** --- 434,443 ---- selec = 1.0 / get_variable_numdistinct(vardata, &isdefault); } + /* now adjust if we wanted <> rather than = */ + if (negate) + selec = 1.0 - selec - nullfrac; + /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); *************** var_eq_const(VariableStatData *vardata, *** 408,419 **** static double var_eq_non_const(VariableStatData *vardata, Oid operator, Node *other, ! bool varonleft) { double selec; bool isdefault; /* * If we matched the var to a unique index or DISTINCT clause, assume * there is exactly one match regardless of anything else. (This is * slightly bogus, since the index or clause's equality operator might be --- 450,473 ---- static double var_eq_non_const(VariableStatData *vardata, Oid operator, Node *other, ! bool varonleft, bool negate) { double selec; + double nullfrac = 0.0; bool isdefault; /* + * Grab the nullfrac for use below. + */ + if (HeapTupleIsValid(vardata->statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); + nullfrac = stats->stanullfrac; + } + + /* * If we matched the var to a unique index or DISTINCT clause, assume * there is exactly one match regardless of anything else. (This is * slightly bogus, since the index or clause's equality operator might be *************** var_eq_non_const(VariableStatData *varda *** 421,429 **** * ignoring the information.) */ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) ! return 1.0 / vardata->rel->tuples; ! ! if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; double ndistinct; --- 475,484 ---- * ignoring the information.) */ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0) ! { ! selec = 1.0 / vardata->rel->tuples; ! } ! else if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; double ndistinct; *************** var_eq_non_const(VariableStatData *varda *** 441,447 **** * values, regardless of their frequency in the table. Is that a good * idea?) */ ! selec = 1.0 - stats->stanullfrac; ndistinct = get_variable_numdistinct(vardata, &isdefault); if (ndistinct > 1) selec /= ndistinct; --- 496,502 ---- * values, regardless of their frequency in the table. Is that a good * idea?) */ ! selec = 1.0 - nullfrac; ndistinct = get_variable_numdistinct(vardata, &isdefault); if (ndistinct > 1) selec /= ndistinct; *************** var_eq_non_const(VariableStatData *varda *** 469,474 **** --- 524,533 ---- selec = 1.0 / get_variable_numdistinct(vardata, &isdefault); } + /* now adjust if we wanted <> rather than = */ + if (negate) + selec = 1.0 - selec - nullfrac; + /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); *************** var_eq_non_const(VariableStatData *varda *** 485,517 **** Datum neqsel(PG_FUNCTION_ARGS) { ! PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); ! Oid operator = PG_GETARG_OID(1); ! List *args = (List *) PG_GETARG_POINTER(2); ! int varRelid = PG_GETARG_INT32(3); ! Oid eqop; ! float8 result; ! ! /* ! * We want 1 - eqsel() where the equality operator is the one associated ! * with this != operator, that is, its negator. ! */ ! eqop = get_negator(operator); ! if (eqop) ! { ! result = DatumGetFloat8(DirectFunctionCall4(eqsel, ! PointerGetDatum(root), ! ObjectIdGetDatum(eqop), ! PointerGetDatum(args), ! Int32GetDatum(varRelid))); ! } ! else ! { ! /* Use default selectivity (should we raise an error instead?) */ ! result = DEFAULT_EQ_SEL; ! } ! result = 1.0 - result; ! PG_RETURN_FLOAT8(result); } /* --- 544,550 ---- Datum neqsel(PG_FUNCTION_ARGS) { ! PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true)); } /* *************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ *** 1114,1119 **** --- 1147,1153 ---- Const *patt; Const *prefix = NULL; Selectivity rest_selec = 0; + double nullfrac = 0.0; double result; /* *************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ *** 1203,1208 **** --- 1237,1253 ---- } /* + * Grab the nullfrac for use below. + */ + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + nullfrac = stats->stanullfrac; + } + + /* * Pull out any fixed prefix implied by the pattern, and estimate the * fractional selectivity of the remainder of the pattern. Unlike many of * the other functions in this file, we use the pattern operator's actual *************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ *** 1252,1258 **** if (eqopr == InvalidOid) elog(ERROR, "no = operator for opfamily %u", opfamily); result = var_eq_const(&vardata, eqopr, prefix->constvalue, ! false, true); } else { --- 1297,1303 ---- if (eqopr == InvalidOid) elog(ERROR, "no = operator for opfamily %u", opfamily); result = var_eq_const(&vardata, eqopr, prefix->constvalue, ! false, true, false); } else { *************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ *** 1275,1282 **** Selectivity selec; int hist_size; FmgrInfo opproc; ! double nullfrac, ! mcv_selec, sumcommon; /* Try to use the histogram entries to get selectivity */ --- 1320,1326 ---- Selectivity selec; int hist_size; FmgrInfo opproc; ! double mcv_selec, sumcommon; /* Try to use the histogram entries to get selectivity */ *************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ *** 1328,1338 **** mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true, &sumcommon); - if (HeapTupleIsValid(vardata.statsTuple)) - nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac; - else - nullfrac = 0.0; - /* * Now merge the results from the MCV and histogram calculations, * realizing that the histogram covers only the non-null values that --- 1372,1377 ---- *************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ *** 1340,1351 **** */ selec *= 1.0 - nullfrac - sumcommon; selec += mcv_selec; - - /* result should be in range, but make sure... */ - CLAMP_PROBABILITY(selec); result = selec; } if (prefix) { pfree(DatumGetPointer(prefix->constvalue)); --- 1379,1394 ---- */ selec *= 1.0 - nullfrac - sumcommon; selec += mcv_selec; result = selec; } + /* now adjust if we wanted not-match rather than match */ + if (negate) + result = 1.0 - result - nullfrac; + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(result); + if (prefix) { pfree(DatumGetPointer(prefix->constvalue)); *************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ *** 1354,1360 **** ReleaseVariableStats(vardata); ! return negate ? (1.0 - result) : result; } /* --- 1397,1403 ---- ReleaseVariableStats(vardata); ! return result; } /* *************** boolvarsel(PlannerInfo *root, Node *arg, *** 1451,1457 **** * compute the selectivity as if that is what we have. */ selec = var_eq_const(&vardata, BooleanEqualOperator, ! BoolGetDatum(true), false, true); } else if (is_funcclause(arg)) { --- 1494,1500 ---- * compute the selectivity as if that is what we have. */ selec = var_eq_const(&vardata, BooleanEqualOperator, ! BoolGetDatum(true), false, true, false); } else if (is_funcclause(arg)) { *************** prefix_selectivity(PlannerInfo *root, Va *** 5788,5794 **** if (cmpopr == InvalidOid) elog(ERROR, "no = operator for opfamily %u", opfamily); eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue, ! false, true); prefixsel = Max(prefixsel, eq_sel); --- 5831,5837 ---- if (cmpopr == InvalidOid) elog(ERROR, "no = operator for opfamily %u", opfamily); eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue, ! false, true, false); prefixsel = Max(prefixsel, eq_sel); -- Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-bugs
pgsql-bugs by date: