Re: [BUGS] BUG #14676: neqsel is NULL dumb - Mailing list pgsql-bugs

From Tom Lane
Subject Re: [BUGS] BUG #14676: neqsel is NULL dumb
Date
Msg-id 25540.1496438079@sss.pgh.pa.us
Whole thread Raw
In response to Re: [BUGS] BUG #14676: neqsel is NULL dumb  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-bugs
I wrote:
> marko@joh.to writes:
>> This seems to be because neqsel() doesn't take at all into account that both
>> operators will exclude NULL rows, and does a simple  1.0 - eqsel().

> Yeah, that's clearly broken.  A localized fix would be to re-fetch the
> nullfrac statistic and subtract it off, but that seems pretty inefficient.
> I'd be inclined to refactor things so that eqsel() and neqsel() call a
> common routine that takes a "bool negate" argument, about like the way the
> patternsel() functions work, and then the common routine could handle
> the nullfrac correctly for both cases.

Here's a proposed patch for that.  I also fixed patternsel, but elected
not to mess with neqjoinsel; partly because I think joining on inequality
is so rare as to not be worth sweating over, and partly because I wasn't
too sure what's the appropriate correction, especially for semijoins.

Although this is clearly a bug fix, I'm leaning towards committing it
only in HEAD; given the lack of previous field complaints, I fear
back-patching might yield more complaints about destabilizing plans
than kudos for fixing things.

            regards, tom lane

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 6e491bb..52d0e48 100644
*** a/src/backend/utils/adt/selfuncs.c
--- b/src/backend/utils/adt/selfuncs.c
***************
*** 154,165 ****
  get_relation_stats_hook_type get_relation_stats_hook = NULL;
  get_index_stats_hook_type get_index_stats_hook = NULL;

  static double var_eq_const(VariableStatData *vardata, Oid operator,
               Datum constval, bool constisnull,
!              bool varonleft);
  static double var_eq_non_const(VariableStatData *vardata, Oid operator,
                   Node *other,
!                  bool varonleft);
  static double ineq_histogram_selectivity(PlannerInfo *root,
                             VariableStatData *vardata,
                             FmgrInfo *opproc, bool isgt,
--- 154,166 ----
  get_relation_stats_hook_type get_relation_stats_hook = NULL;
  get_index_stats_hook_type get_index_stats_hook = NULL;

+ static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
  static double var_eq_const(VariableStatData *vardata, Oid operator,
               Datum constval, bool constisnull,
!              bool varonleft, bool negate);
  static double var_eq_non_const(VariableStatData *vardata, Oid operator,
                   Node *other,
!                  bool varonleft, bool negate);
  static double ineq_histogram_selectivity(PlannerInfo *root,
                             VariableStatData *vardata,
                             FmgrInfo *opproc, bool isgt,
*************** static List *add_predicate_to_quals(Inde
*** 227,232 ****
--- 228,242 ----
  Datum
  eqsel(PG_FUNCTION_ARGS)
  {
+     PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
+ }
+
+ /*
+  * Common code for eqsel() and neqsel()
+  */
+ static double
+ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
+ {
      PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
      Oid            operator = PG_GETARG_OID(1);
      List       *args = (List *) PG_GETARG_POINTER(2);
*************** eqsel(PG_FUNCTION_ARGS)
*** 237,248 ****
      double        selec;

      /*
       * If expression is not variable = something or something = variable, then
       * punt and return a default estimate.
       */
      if (!get_restriction_variable(root, args, varRelid,
                                    &vardata, &other, &varonleft))
!         PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);

      /*
       * We can do a lot better if the something is a constant.  (Note: the
--- 247,272 ----
      double        selec;

      /*
+      * When asked about <>, we do the estimation using the corresponding =
+      * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
+      */
+     if (negate)
+     {
+         operator = get_negator(operator);
+         if (!OidIsValid(operator))
+         {
+             /* Use default selectivity (should we raise an error instead?) */
+             return 1.0 - DEFAULT_EQ_SEL;
+         }
+     }
+
+     /*
       * If expression is not variable = something or something = variable, then
       * punt and return a default estimate.
       */
      if (!get_restriction_variable(root, args, varRelid,
                                    &vardata, &other, &varonleft))
!         return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;

      /*
       * We can do a lot better if the something is a constant.  (Note: the
*************** eqsel(PG_FUNCTION_ARGS)
*** 253,266 ****
          selec = var_eq_const(&vardata, operator,
                               ((Const *) other)->constvalue,
                               ((Const *) other)->constisnull,
!                              varonleft);
      else
          selec = var_eq_non_const(&vardata, operator, other,
!                                  varonleft);

      ReleaseVariableStats(vardata);

!     PG_RETURN_FLOAT8((float8) selec);
  }

  /*
--- 277,290 ----
          selec = var_eq_const(&vardata, operator,
                               ((Const *) other)->constvalue,
                               ((Const *) other)->constisnull,
!                              varonleft, negate);
      else
          selec = var_eq_non_const(&vardata, operator, other,
!                                  varonleft, negate);

      ReleaseVariableStats(vardata);

!     return selec;
  }

  /*
*************** eqsel(PG_FUNCTION_ARGS)
*** 271,290 ****
  static double
  var_eq_const(VariableStatData *vardata, Oid operator,
               Datum constval, bool constisnull,
!              bool varonleft)
  {
      double        selec;
      bool        isdefault;
      Oid            opfuncoid;

      /*
       * If the constant is NULL, assume operator is strict and return zero, ie,
!      * operator will never return TRUE.
       */
      if (constisnull)
          return 0.0;

      /*
       * If we matched the var to a unique index or DISTINCT clause, assume
       * there is exactly one match regardless of anything else.  (This is
       * slightly bogus, since the index or clause's equality operator might be
--- 295,327 ----
  static double
  var_eq_const(VariableStatData *vardata, Oid operator,
               Datum constval, bool constisnull,
!              bool varonleft, bool negate)
  {
      double        selec;
+     double        nullfrac = 0.0;
      bool        isdefault;
      Oid            opfuncoid;

      /*
       * If the constant is NULL, assume operator is strict and return zero, ie,
!      * operator will never return TRUE.  (It's zero even for a negator op.)
       */
      if (constisnull)
          return 0.0;

      /*
+      * Grab the nullfrac for use below.  Note we allow use of nullfrac
+      * regardless of security check.
+      */
+     if (HeapTupleIsValid(vardata->statsTuple))
+     {
+         Form_pg_statistic stats;
+
+         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+         nullfrac = stats->stanullfrac;
+     }
+
+     /*
       * If we matched the var to a unique index or DISTINCT clause, assume
       * there is exactly one match regardless of anything else.  (This is
       * slightly bogus, since the index or clause's equality operator might be
*************** var_eq_const(VariableStatData *vardata,
*** 292,302 ****
       * ignoring the information.)
       */
      if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
!         return 1.0 / vardata->rel->tuples;
!
!     if (HeapTupleIsValid(vardata->statsTuple) &&
!         statistic_proc_security_check(vardata,
!                                       (opfuncoid = get_opcode(operator))))
      {
          Form_pg_statistic stats;
          AttStatsSlot sslot;
--- 329,340 ----
       * ignoring the information.)
       */
      if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
!     {
!         selec = 1.0 / vardata->rel->tuples;
!     }
!     else if (HeapTupleIsValid(vardata->statsTuple) &&
!              statistic_proc_security_check(vardata,
!                                          (opfuncoid = get_opcode(operator))))
      {
          Form_pg_statistic stats;
          AttStatsSlot sslot;
*************** var_eq_const(VariableStatData *vardata,
*** 363,369 ****

              for (i = 0; i < sslot.nnumbers; i++)
                  sumcommon += sslot.numbers[i];
!             selec = 1.0 - sumcommon - stats->stanullfrac;
              CLAMP_PROBABILITY(selec);

              /*
--- 401,407 ----

              for (i = 0; i < sslot.nnumbers; i++)
                  sumcommon += sslot.numbers[i];
!             selec = 1.0 - sumcommon - nullfrac;
              CLAMP_PROBABILITY(selec);

              /*
*************** var_eq_const(VariableStatData *vardata,
*** 396,401 ****
--- 434,443 ----
          selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
      }

+     /* now adjust if we wanted <> rather than = */
+     if (negate)
+         selec = 1.0 - selec - nullfrac;
+
      /* result should be in range, but make sure... */
      CLAMP_PROBABILITY(selec);

*************** var_eq_const(VariableStatData *vardata,
*** 408,419 ****
  static double
  var_eq_non_const(VariableStatData *vardata, Oid operator,
                   Node *other,
!                  bool varonleft)
  {
      double        selec;
      bool        isdefault;

      /*
       * If we matched the var to a unique index or DISTINCT clause, assume
       * there is exactly one match regardless of anything else.  (This is
       * slightly bogus, since the index or clause's equality operator might be
--- 450,473 ----
  static double
  var_eq_non_const(VariableStatData *vardata, Oid operator,
                   Node *other,
!                  bool varonleft, bool negate)
  {
      double        selec;
+     double        nullfrac = 0.0;
      bool        isdefault;

      /*
+      * Grab the nullfrac for use below.
+      */
+     if (HeapTupleIsValid(vardata->statsTuple))
+     {
+         Form_pg_statistic stats;
+
+         stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+         nullfrac = stats->stanullfrac;
+     }
+
+     /*
       * If we matched the var to a unique index or DISTINCT clause, assume
       * there is exactly one match regardless of anything else.  (This is
       * slightly bogus, since the index or clause's equality operator might be
*************** var_eq_non_const(VariableStatData *varda
*** 421,429 ****
       * ignoring the information.)
       */
      if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
!         return 1.0 / vardata->rel->tuples;
!
!     if (HeapTupleIsValid(vardata->statsTuple))
      {
          Form_pg_statistic stats;
          double        ndistinct;
--- 475,484 ----
       * ignoring the information.)
       */
      if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
!     {
!         selec = 1.0 / vardata->rel->tuples;
!     }
!     else if (HeapTupleIsValid(vardata->statsTuple))
      {
          Form_pg_statistic stats;
          double        ndistinct;
*************** var_eq_non_const(VariableStatData *varda
*** 441,447 ****
           * values, regardless of their frequency in the table.  Is that a good
           * idea?)
           */
!         selec = 1.0 - stats->stanullfrac;
          ndistinct = get_variable_numdistinct(vardata, &isdefault);
          if (ndistinct > 1)
              selec /= ndistinct;
--- 496,502 ----
           * values, regardless of their frequency in the table.  Is that a good
           * idea?)
           */
!         selec = 1.0 - nullfrac;
          ndistinct = get_variable_numdistinct(vardata, &isdefault);
          if (ndistinct > 1)
              selec /= ndistinct;
*************** var_eq_non_const(VariableStatData *varda
*** 469,474 ****
--- 524,533 ----
          selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
      }

+     /* now adjust if we wanted <> rather than = */
+     if (negate)
+         selec = 1.0 - selec - nullfrac;
+
      /* result should be in range, but make sure... */
      CLAMP_PROBABILITY(selec);

*************** var_eq_non_const(VariableStatData *varda
*** 485,517 ****
  Datum
  neqsel(PG_FUNCTION_ARGS)
  {
!     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
!     Oid            operator = PG_GETARG_OID(1);
!     List       *args = (List *) PG_GETARG_POINTER(2);
!     int            varRelid = PG_GETARG_INT32(3);
!     Oid            eqop;
!     float8        result;
!
!     /*
!      * We want 1 - eqsel() where the equality operator is the one associated
!      * with this != operator, that is, its negator.
!      */
!     eqop = get_negator(operator);
!     if (eqop)
!     {
!         result = DatumGetFloat8(DirectFunctionCall4(eqsel,
!                                                     PointerGetDatum(root),
!                                                     ObjectIdGetDatum(eqop),
!                                                     PointerGetDatum(args),
!                                                     Int32GetDatum(varRelid)));
!     }
!     else
!     {
!         /* Use default selectivity (should we raise an error instead?) */
!         result = DEFAULT_EQ_SEL;
!     }
!     result = 1.0 - result;
!     PG_RETURN_FLOAT8(result);
  }

  /*
--- 544,550 ----
  Datum
  neqsel(PG_FUNCTION_ARGS)
  {
!     PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
  }

  /*
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1114,1119 ****
--- 1147,1153 ----
      Const       *patt;
      Const       *prefix = NULL;
      Selectivity rest_selec = 0;
+     double        nullfrac = 0.0;
      double        result;

      /*
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1203,1208 ****
--- 1237,1253 ----
      }

      /*
+      * Grab the nullfrac for use below.
+      */
+     if (HeapTupleIsValid(vardata.statsTuple))
+     {
+         Form_pg_statistic stats;
+
+         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+         nullfrac = stats->stanullfrac;
+     }
+
+     /*
       * Pull out any fixed prefix implied by the pattern, and estimate the
       * fractional selectivity of the remainder of the pattern.  Unlike many of
       * the other functions in this file, we use the pattern operator's actual
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1252,1258 ****
          if (eqopr == InvalidOid)
              elog(ERROR, "no = operator for opfamily %u", opfamily);
          result = var_eq_const(&vardata, eqopr, prefix->constvalue,
!                               false, true);
      }
      else
      {
--- 1297,1303 ----
          if (eqopr == InvalidOid)
              elog(ERROR, "no = operator for opfamily %u", opfamily);
          result = var_eq_const(&vardata, eqopr, prefix->constvalue,
!                               false, true, false);
      }
      else
      {
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1275,1282 ****
          Selectivity selec;
          int            hist_size;
          FmgrInfo    opproc;
!         double        nullfrac,
!                     mcv_selec,
                      sumcommon;

          /* Try to use the histogram entries to get selectivity */
--- 1320,1326 ----
          Selectivity selec;
          int            hist_size;
          FmgrInfo    opproc;
!         double        mcv_selec,
                      sumcommon;

          /* Try to use the histogram entries to get selectivity */
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1328,1338 ****
          mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
                                      &sumcommon);

-         if (HeapTupleIsValid(vardata.statsTuple))
-             nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
-         else
-             nullfrac = 0.0;
-
          /*
           * Now merge the results from the MCV and histogram calculations,
           * realizing that the histogram covers only the non-null values that
--- 1372,1377 ----
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1340,1351 ****
           */
          selec *= 1.0 - nullfrac - sumcommon;
          selec += mcv_selec;
-
-         /* result should be in range, but make sure... */
-         CLAMP_PROBABILITY(selec);
          result = selec;
      }

      if (prefix)
      {
          pfree(DatumGetPointer(prefix->constvalue));
--- 1379,1394 ----
           */
          selec *= 1.0 - nullfrac - sumcommon;
          selec += mcv_selec;
          result = selec;
      }

+     /* now adjust if we wanted not-match rather than match */
+     if (negate)
+         result = 1.0 - result - nullfrac;
+
+     /* result should be in range, but make sure... */
+     CLAMP_PROBABILITY(result);
+
      if (prefix)
      {
          pfree(DatumGetPointer(prefix->constvalue));
*************** patternsel(PG_FUNCTION_ARGS, Pattern_Typ
*** 1354,1360 ****

      ReleaseVariableStats(vardata);

!     return negate ? (1.0 - result) : result;
  }

  /*
--- 1397,1403 ----

      ReleaseVariableStats(vardata);

!     return result;
  }

  /*
*************** boolvarsel(PlannerInfo *root, Node *arg,
*** 1451,1457 ****
           * compute the selectivity as if that is what we have.
           */
          selec = var_eq_const(&vardata, BooleanEqualOperator,
!                              BoolGetDatum(true), false, true);
      }
      else if (is_funcclause(arg))
      {
--- 1494,1500 ----
           * compute the selectivity as if that is what we have.
           */
          selec = var_eq_const(&vardata, BooleanEqualOperator,
!                              BoolGetDatum(true), false, true, false);
      }
      else if (is_funcclause(arg))
      {
*************** prefix_selectivity(PlannerInfo *root, Va
*** 5788,5794 ****
      if (cmpopr == InvalidOid)
          elog(ERROR, "no = operator for opfamily %u", opfamily);
      eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
!                           false, true);

      prefixsel = Max(prefixsel, eq_sel);

--- 5831,5837 ----
      if (cmpopr == InvalidOid)
          elog(ERROR, "no = operator for opfamily %u", opfamily);
      eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
!                           false, true, false);

      prefixsel = Max(prefixsel, eq_sel);


-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

pgsql-bugs by date:

Previous
From: Ray Warren
Date:
Subject: Re: [BUGS] BUG #14683: *** glibc detected *** SELECT: double freeor corruption
Next
From: Daniele Varrazzo
Date:
Subject: [BUGS] [PATCH] Sure you meant response?