Re: [BUGS] BUG #14676: neqsel is NULL dumb - Mailing list pgsql-bugs
| From | Tom Lane |
|---|---|
| Subject | Re: [BUGS] BUG #14676: neqsel is NULL dumb |
| Date | |
| Msg-id | 25540.1496438079@sss.pgh.pa.us Whole thread Raw |
| In response to | Re: [BUGS] BUG #14676: neqsel is NULL dumb (Tom Lane <tgl@sss.pgh.pa.us>) |
| List | pgsql-bugs |
I wrote:
> marko@joh.to writes:
>> This seems to be because neqsel() doesn't take at all into account that both
>> operators will exclude NULL rows, and does a simple 1.0 - eqsel().
> Yeah, that's clearly broken. A localized fix would be to re-fetch the
> nullfrac statistic and subtract it off, but that seems pretty inefficient.
> I'd be inclined to refactor things so that eqsel() and neqsel() call a
> common routine that takes a "bool negate" argument, about like the way the
> patternsel() functions work, and then the common routine could handle
> the nullfrac correctly for both cases.
Here's a proposed patch for that. I also fixed patternsel, but elected
not to mess with neqjoinsel; partly because I think joining on inequality
is so rare as to not be worth sweating over, and partly because I wasn't
too sure what's the appropriate correction, especially for semijoins.
Although this is clearly a bug fix, I'm leaning towards committing it
only in HEAD; given the lack of previous field complaints, I fear
back-patching might yield more complaints about destabilizing plans
than kudos for fixing things.
regards, tom lane
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 6e491bb..52d0e48 100644
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
***************
*** 154,165 ****
get_relation_stats_hook_type get_relation_stats_hook = NULL;
get_index_stats_hook_type get_index_stats_hook = NULL;
static double var_eq_const(VariableStatData *vardata, Oid operator,
Datum constval, bool constisnull,
! bool varonleft);
static double var_eq_non_const(VariableStatData *vardata, Oid operator,
Node *other,
! bool varonleft);
static double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt,
--- 154,166 ----
get_relation_stats_hook_type get_relation_stats_hook = NULL;
get_index_stats_hook_type get_index_stats_hook = NULL;
+ static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
static double var_eq_const(VariableStatData *vardata, Oid operator,
Datum constval, bool constisnull,
! bool varonleft, bool negate);
static double var_eq_non_const(VariableStatData *vardata, Oid operator,
Node *other,
! bool varonleft, bool negate);
static double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt,
*************** static List *add_predicate_to_quals(Inde
*** 227,232 ****
--- 228,242 ----
Datum
eqsel(PG_FUNCTION_ARGS)
{
+ PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
+ }
+
+ /*
+ * Common code for eqsel() and neqsel()
+ */
+ static double
+ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
+ {
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
*************** eqsel(PG_FUNCTION_ARGS)
*** 237,248 ****
double selec;
/*
* If expression is not variable = something or something = variable, then
* punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
! PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
/*
* We can do a lot better if the something is a constant. (Note: the
--- 247,272 ----
double selec;
/*
+ * When asked about <>, we do the estimation using the corresponding =
+ * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
+ */
+ if (negate)
+ {
+ operator = get_negator(operator);
+ if (!OidIsValid(operator))
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ return 1.0 - DEFAULT_EQ_SEL;
+ }
+ }
+
+ /*
* If expression is not variable = something or something = variable, then
* punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
! return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
/*
* We can do a lot better if the something is a constant. (Note: the
*************** eqsel(PG_FUNCTION_ARGS)
*** 253,266 ****
selec = var_eq_const(&vardata, operator,
((Const *) other)->constvalue,
((Const *) other)->constisnull,
! varonleft);
else
selec = var_eq_non_const(&vardata, operator, other,
! varonleft);
ReleaseVariableStats(vardata);
! PG_RETURN_FLOAT8((float8) selec);
}
/*
--- 277,290 ----
selec = var_eq_const(&vardata, operator,
((Const *) other)->constvalue,
((Const *) other)->constisnull,
! varonleft, negate);
else
selec = var_eq_non_const(&vardata, operator, other,
! varonleft, negate);
ReleaseVariableStats(vardata);
! return selec;
}
/*
*************** eqsel(PG_FUNCTION_ARGS)
*** 271,290 ****
static double
var_eq_const(VariableStatData *vardata, Oid operator,
Datum constval, bool constisnull,
! bool varonleft)
{
double selec;
bool isdefault;
Oid opfuncoid;
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
! * operator will never return TRUE.
*/
if (constisnull)
return 0.0;
/*
* If we matched the var to a unique index or DISTINCT clause, assume
* there is exactly one match regardless of anything else. (This is
* slightly bogus, since the index or clause's equality operator might be
--- 295,327 ----
static double
var_eq_const(VariableStatData *vardata, Oid operator,
Datum constval, bool constisnull,
! bool varonleft, bool negate)
{
double selec;
+ double nullfrac = 0.0;
bool isdefault;
Oid opfuncoid;
/*
* If the constant is NULL, assume operator is strict and return zero, ie,
! * operator will never return TRUE. (It's zero even for a negator op.)
*/
if (constisnull)
return 0.0;
/*
+ * Grab the nullfrac for use below. Note we allow use of nullfrac
+ * regardless of security check.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
+ /*
* If we matched the var to a unique index or DISTINCT clause, assume
* there is exactly one match regardless of anything else. (This is
* slightly bogus, since the index or clause's equality operator might be
*************** var_eq_const(VariableStatData *vardata,
*** 292,302 ****
* ignoring the information.)
*/
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
! return 1.0 / vardata->rel->tuples;
!
! if (HeapTupleIsValid(vardata->statsTuple) &&
! statistic_proc_security_check(vardata,
! (opfuncoid = get_opcode(operator))))
{
Form_pg_statistic stats;
AttStatsSlot sslot;
--- 329,340 ----
* ignoring the information.)
*/
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
! {
! selec = 1.0 / vardata->rel->tuples;
! }
! else if (HeapTupleIsValid(vardata->statsTuple) &&
! statistic_proc_security_check(vardata,
! (opfuncoid = get_opcode(operator))))
{
Form_pg_statistic stats;
AttStatsSlot sslot;
*************** var_eq_const(VariableStatData *vardata,
*** 363,369 ****
for (i = 0; i < sslot.nnumbers; i++)
sumcommon += sslot.numbers[i];
! selec = 1.0 - sumcommon - stats->stanullfrac;
CLAMP_PROBABILITY(selec);
/*
--- 401,407 ----
for (i = 0; i < sslot.nnumbers; i++)
sumcommon += sslot.numbers[i];
! selec = 1.0 - sumcommon - nullfrac;
CLAMP_PROBABILITY(selec);
/*
*************** var_eq_const(VariableStatData *vardata,
*** 396,401 ****
--- 434,443 ----
selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
}
+ /* now adjust if we wanted <> rather than = */
+ if (negate)
+ selec = 1.0 - selec - nullfrac;
+
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(selec);
*************** var_eq_const(VariableStatData *vardata,
*** 408,419 ****
static double
var_eq_non_const(VariableStatData *vardata, Oid operator,
Node *other,
! bool varonleft)
{
double selec;
bool isdefault;
/*
* If we matched the var to a unique index or DISTINCT clause, assume
* there is exactly one match regardless of anything else. (This is
* slightly bogus, since the index or clause's equality operator might be
--- 450,473 ----
static double
var_eq_non_const(VariableStatData *vardata, Oid operator,
Node *other,
! bool varonleft, bool negate)
{
double selec;
+ double nullfrac = 0.0;
bool isdefault;
/*
+ * Grab the nullfrac for use below.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
+ /*
* If we matched the var to a unique index or DISTINCT clause, assume
* there is exactly one match regardless of anything else. (This is
* slightly bogus, since the index or clause's equality operator might be
*************** var_eq_non_const(VariableStatData *varda
*** 421,429 ****
* ignoring the information.)
*/
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
! return 1.0 / vardata->rel->tuples;
!
! if (HeapTupleIsValid(vardata->statsTuple))
{
Form_pg_statistic stats;
double ndistinct;
--- 475,484 ----
* ignoring the information.)
*/
if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
! {
! selec = 1.0 / vardata->rel->tuples;
! }
! else if (HeapTupleIsValid(vardata->statsTuple))
{
Form_pg_statistic stats;
double ndistinct;
*************** var_eq_non_const(VariableStatData *varda
*** 441,447 ****
* values, regardless of their frequency in the table. Is that a good
* idea?)
*/
! selec = 1.0 - stats->stanullfrac;
ndistinct = get_variable_numdistinct(vardata, &isdefault);
if (ndistinct > 1)
selec /= ndistinct;
--- 496,502 ----
* values, regardless of their frequency in the table. Is that a good
* idea?)
*/
! selec = 1.0 - nullfrac;
ndistinct = get_variable_numdistinct(vardata, &isdefault);
if (ndistinct > 1)
selec /= ndistinct;
*************** var_eq_non_const(VariableStatData *varda
*** 469,474 ****
--- 524,533 ----
selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
}
+ /* now adjust if we wanted <> rather than = */
+ if (negate)
+ selec = 1.0 - selec - nullfrac;
+
/* result should be in range, but make sure... */
CLAMP_PROBABILITY(selec);
*************** var_eq_non_const(VariableStatData *varda
*** 485,517 ****
Datum
neqsel(PG_FUNCTION_ARGS)
{
! PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
! Oid operator = PG_GETARG_OID(1);
! List *args = (List *) PG_GETARG_POINTER(2);
! int varRelid = PG_GETARG_INT32(3);
! Oid eqop;
! float8 result;
!
! /*
! * We want 1 - eqsel() where the equality operator is the one associated
! * with this != operator, that is, its negator.
! */
! eqop = get_negator(operator);
! if (eqop)
! {
! result = DatumGetFloat8(DirectFunctionCall4(eqsel,
! PointerGetDatum(root),
! ObjectIdGetDatum(eqop),
! PointerGetDatum(args),
! Int32GetDatum(varRelid)));
! }
! else
! {
! /* Use default selectivity (should we raise an error instead?) */
! result = DEFAULT_EQ_SEL;
! }
! result = 1.0 - result;
! PG_RETURN_FLOAT8(result);
}
/*
--- 544,550 ----
Datum
neqsel(PG_FUNCTION_ARGS)
{
! PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
}
/*
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1114,1119 ****
--- 1147,1153 ----
Const *patt;
Const *prefix = NULL;
Selectivity rest_selec = 0;
+ double nullfrac = 0.0;
double result;
/*
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1203,1208 ****
--- 1237,1253 ----
}
/*
+ * Grab the nullfrac for use below.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
+ /*
* Pull out any fixed prefix implied by the pattern, and estimate the
* fractional selectivity of the remainder of the pattern. Unlike many of
* the other functions in this file, we use the pattern operator's actual
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1252,1258 ****
if (eqopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
result = var_eq_const(&vardata, eqopr, prefix->constvalue,
! false, true);
}
else
{
--- 1297,1303 ----
if (eqopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
result = var_eq_const(&vardata, eqopr, prefix->constvalue,
! false, true, false);
}
else
{
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1275,1282 ****
Selectivity selec;
int hist_size;
FmgrInfo opproc;
! double nullfrac,
! mcv_selec,
sumcommon;
/* Try to use the histogram entries to get selectivity */
--- 1320,1326 ----
Selectivity selec;
int hist_size;
FmgrInfo opproc;
! double mcv_selec,
sumcommon;
/* Try to use the histogram entries to get selectivity */
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1328,1338 ****
mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
&sumcommon);
- if (HeapTupleIsValid(vardata.statsTuple))
- nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
- else
- nullfrac = 0.0;
-
/*
* Now merge the results from the MCV and histogram calculations,
* realizing that the histogram covers only the non-null values that
--- 1372,1377 ----
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1340,1351 ****
*/
selec *= 1.0 - nullfrac - sumcommon;
selec += mcv_selec;
-
- /* result should be in range, but make sure... */
- CLAMP_PROBABILITY(selec);
result = selec;
}
if (prefix)
{
pfree(DatumGetPointer(prefix->constvalue));
--- 1379,1394 ----
*/
selec *= 1.0 - nullfrac - sumcommon;
selec += mcv_selec;
result = selec;
}
+ /* now adjust if we wanted not-match rather than match */
+ if (negate)
+ result = 1.0 - result - nullfrac;
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(result);
+
if (prefix)
{
pfree(DatumGetPointer(prefix->constvalue));
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1354,1360 ****
ReleaseVariableStats(vardata);
! return negate ? (1.0 - result) : result;
}
/*
--- 1397,1403 ----
ReleaseVariableStats(vardata);
! return result;
}
/*
*************** boolvarsel(PlannerInfo *root, Node *arg,
*** 1451,1457 ****
* compute the selectivity as if that is what we have.
*/
selec = var_eq_const(&vardata, BooleanEqualOperator,
! BoolGetDatum(true), false, true);
}
else if (is_funcclause(arg))
{
--- 1494,1500 ----
* compute the selectivity as if that is what we have.
*/
selec = var_eq_const(&vardata, BooleanEqualOperator,
! BoolGetDatum(true), false, true, false);
}
else if (is_funcclause(arg))
{
*************** prefix_selectivity(PlannerInfo *root, Va
*** 5788,5794 ****
if (cmpopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
! false, true);
prefixsel = Max(prefixsel, eq_sel);
--- 5831,5837 ----
if (cmpopr == InvalidOid)
elog(ERROR, "no = operator for opfamily %u", opfamily);
eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
! false, true, false);
prefixsel = Max(prefixsel, eq_sel);
--
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs
pgsql-bugs by date: