Re: [HACKERS] Enhanced containment selectivity function - Mailing list pgsql-patches

From Bruce Momjian
Subject Re: [HACKERS] Enhanced containment selectivity function
Date
Msg-id 200604261833.k3QIX9l14013@candle.pha.pa.us
Whole thread Raw
Responses Re: [HACKERS] Enhanced containment selectivity function  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-patches
Cleaned-up patch attached and applied.   Catalog version updated in
separate patch.

---------------------------------------------------------------------------

Matteo Beccati wrote:
> Hi,
>
> >>Moving it in contrib/ltree would be more difficult to me because it
> >>depends on other functions declared in selfuncs.c
> >>(get_restriction_variable, etc).
> >
> > I'd be willing to consider exporting those functions from selfuncs.c.
>
> In the meanwhile here is the latest patch which uses both mcv and
> histogram values.
>
>
> BTW, when restoring my test database I've found out that there were many
> errors on ALTER INDEX "something" OWNER TO ... :
>
> ERROR:  "something" is not a table, view, or sequence
>
> This using 8.1devel pg_restore and a 8.0.3 compressed dump. I could be
> wrong, but I didn't get those errors a few days ago (some cvs updates ago).
>
>
> Best regards
> --
> Matteo Beccati
> http://phpadsnew.com/
> http://phppgads.com/

--
  Bruce Momjian   http://candle.pha.pa.us
  EnterpriseDB    http://www.enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +
Index: contrib/ltree/ltree.sql.in
===================================================================
RCS file: /cvsroot/pgsql/contrib/ltree/ltree.sql.in,v
retrieving revision 1.10
diff -c -c -r1.10 ltree.sql.in
*** contrib/ltree/ltree.sql.in    27 Feb 2006 16:09:48 -0000    1.10
--- contrib/ltree/ltree.sql.in    26 Apr 2006 18:25:16 -0000
***************
*** 230,236 ****
      RIGHTARG = ltree,
      PROCEDURE = ltree_isparent,
          COMMUTATOR = '<@',
!         RESTRICT = contsel,
      JOIN = contjoinsel
  );

--- 230,236 ----
      RIGHTARG = ltree,
      PROCEDURE = ltree_isparent,
          COMMUTATOR = '<@',
!         RESTRICT = parentsel,
      JOIN = contjoinsel
  );

***************
*** 248,254 ****
      RIGHTARG = ltree,
      PROCEDURE = ltree_risparent,
          COMMUTATOR = '@>',
!         RESTRICT = contsel,
      JOIN = contjoinsel
  );

--- 248,254 ----
      RIGHTARG = ltree,
      PROCEDURE = ltree_risparent,
          COMMUTATOR = '@>',
!         RESTRICT = parentsel,
      JOIN = contjoinsel
  );

Index: src/backend/utils/adt/geo_selfuncs.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/geo_selfuncs.c,v
retrieving revision 1.27
diff -c -c -r1.27 geo_selfuncs.c
*** src/backend/utils/adt/geo_selfuncs.c    5 Mar 2006 15:58:42 -0000    1.27
--- src/backend/utils/adt/geo_selfuncs.c    26 Apr 2006 18:25:21 -0000
***************
*** 20,26 ****

  #include "utils/geo_decls.h"

-
  /*
   *    Selectivity functions for geometric operators.    These are bogus -- unless
   *    we know the actual key distribution in the index, we can't make a good
--- 20,25 ----
***************
*** 93,95 ****
--- 92,95 ----
  {
      PG_RETURN_FLOAT8(0.001);
  }
+
Index: src/backend/utils/adt/selfuncs.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v
retrieving revision 1.199
diff -c -c -r1.199 selfuncs.c
*** src/backend/utils/adt/selfuncs.c    20 Apr 2006 17:50:18 -0000    1.199
--- src/backend/utils/adt/selfuncs.c    26 Apr 2006 18:25:28 -0000
***************
*** 4852,4854 ****
--- 4852,5033 ----

      PG_RETURN_VOID();
  }
+
+
+ #define DEFAULT_PARENT_SEL 0.001
+
+ /*
+  *    parentsel - Selectivity of parent relationship for ltree data types.
+  */
+ Datum
+ parentsel(PG_FUNCTION_ARGS)
+ {
+     PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+     Oid            operator = PG_GETARG_OID(1);
+     List       *args = (List *) PG_GETARG_POINTER(2);
+     int            varRelid = PG_GETARG_INT32(3);
+     VariableStatData vardata;
+     Node       *other;
+     bool        varonleft;
+     Datum       *values;
+     int            nvalues;
+     float4       *numbers;
+     int            nnumbers;
+     double        selec = 0.0;
+
+     /*
+      * If expression is not variable <@ something or something <@ variable,
+      * then punt and return a default estimate.
+      */
+     if (!get_restriction_variable(root, args, varRelid,
+                                   &vardata, &other, &varonleft))
+         PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);
+
+     /*
+      * If the something is a NULL constant, assume operator is strict and
+      * return zero, ie, operator will never return TRUE.
+      */
+     if (IsA(other, Const) &&
+         ((Const *) other)->constisnull)
+     {
+         ReleaseVariableStats(vardata);
+         PG_RETURN_FLOAT8(0.0);
+     }
+
+     if (HeapTupleIsValid(vardata.statsTuple))
+     {
+         Form_pg_statistic stats;
+         double        mcvsum = 0.0;
+         double        mcvsel = 0.0;
+         double        hissel = 0.0;
+
+         stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+
+         if (IsA(other, Const))
+         {
+             /* Variable is being compared to a known non-null constant */
+             Datum        constval = ((Const *) other)->constvalue;
+             bool        match = false;
+             int            i;
+
+             /*
+              * Is the constant "<@" to any of the column's most common values?
+              */
+             if (get_attstatsslot(vardata.statsTuple,
+                                  vardata.atttype, vardata.atttypmod,
+                                  STATISTIC_KIND_MCV, InvalidOid,
+                                  &values, &nvalues,
+                                  &numbers, &nnumbers))
+             {
+                 FmgrInfo    contproc;
+
+                 fmgr_info(get_opcode(operator), &contproc);
+
+                 for (i = 0; i < nvalues; i++)
+                 {
+                     /* be careful to apply operator right way 'round */
+                     if (varonleft)
+                         match = DatumGetBool(FunctionCall2(&contproc,
+                                                            values[i],
+                                                            constval));
+                     else
+                         match = DatumGetBool(FunctionCall2(&contproc,
+                                                            constval,
+                                                            values[i]));
+
+                     /* calculate total selectivity of all most-common-values */
+                     mcvsum += numbers[i];
+
+                     /* calculate selectivity of matching most-common-values */
+                     if (match)
+                         mcvsel += numbers[i];
+                 }
+             }
+             else
+             {
+                 /* no most-common-values info available */
+                 values = NULL;
+                 numbers = NULL;
+                 i = nvalues = nnumbers = 0;
+             }
+
+             free_attstatsslot(vardata.atttype, values, nvalues, NULL, 0);
+
+             /*
+              * Is the constant "<@" to any of the column's histogram values?
+              */
+             if (get_attstatsslot(vardata.statsTuple,
+                                  vardata.atttype, vardata.atttypmod,
+                                  STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                                  &values, &nvalues,
+                                  NULL, NULL))
+             {
+                 FmgrInfo    contproc;
+
+                 fmgr_info(get_opcode(operator), &contproc);
+
+                 for (i = 0; i < nvalues; i++)
+                 {
+                     /* be careful to apply operator right way 'round */
+                     if (varonleft)
+                         match = DatumGetBool(FunctionCall2(&contproc,
+                                                            values[i],
+                                                            constval));
+                     else
+                         match = DatumGetBool(FunctionCall2(&contproc,
+                                                            constval,
+                                                            values[i]));
+                     /* count matching histogram values */
+                     if (match)
+                         hissel++;
+                 }
+
+                 if (hissel > 0.0)
+                 {
+                     /*
+                      * some matching values found inside histogram, divide
+                      * matching entries number by total histogram entries to
+                      * get the histogram related selectivity
+                      */
+                     hissel /= nvalues;
+                 }
+             }
+             else
+             {
+                 /* no histogram info available */
+                 values = NULL;
+                 i = nvalues = 0;
+             }
+
+             free_attstatsslot(vardata.atttype, values, nvalues,
+                               NULL, 0);
+
+
+             /*
+              * calculate selectivity based on MCV and histogram result
+              * histogram selectivity needs to be scaled down if there are any
+              * most-common-values
+              */
+             selec = mcvsel + hissel * (1.0 - mcvsum);
+
+             /*
+              * don't return 0.0 selectivity unless all table values are inside
+              * mcv
+              */
+             if (selec == 0.0 && mcvsum != 1.0)
+                 selec = DEFAULT_PARENT_SEL;
+         }
+         else
+             selec = DEFAULT_PARENT_SEL;
+     }
+     else
+         selec = DEFAULT_PARENT_SEL;
+
+     ReleaseVariableStats(vardata);
+
+     /* result should be in range, but make sure... */
+     CLAMP_PROBABILITY(selec);
+
+     PG_RETURN_FLOAT8((float8) selec);
+ }
+
Index: src/include/catalog/pg_proc.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v
retrieving revision 1.406
diff -c -c -r1.406 pg_proc.h
*** src/include/catalog/pg_proc.h    25 Apr 2006 00:25:20 -0000    1.406
--- src/include/catalog/pg_proc.h    26 Apr 2006 18:25:35 -0000
***************
*** 3812,3817 ****
--- 3812,3819 ----
  DESCR("GiST support");
  DATA(insert OID = 2592 (  gist_circle_compress    PGNSP PGUID 12 f f t f i 1 2281 "2281" _null_ _null_ _null_
gist_circle_compress- _null_ )); 
  DESCR("GiST support");
+ DATA(insert OID = 2599 (  parentsel              PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" _null_ _null_
_null_parentsel - _null_ )); 
+ DESCR("enhanced restriction selectivity for ltree isparent comparison operators");


  /*
Index: src/include/utils/selfuncs.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/selfuncs.h,v
retrieving revision 1.28
diff -c -c -r1.28 selfuncs.h
*** src/include/utils/selfuncs.h    5 Mar 2006 15:59:07 -0000    1.28
--- src/include/utils/selfuncs.h    26 Apr 2006 18:25:35 -0000
***************
*** 134,137 ****
--- 134,139 ----
  extern Datum hashcostestimate(PG_FUNCTION_ARGS);
  extern Datum gistcostestimate(PG_FUNCTION_ARGS);

+ extern Datum parentsel(PG_FUNCTION_ARGS);
+
  #endif   /* SELFUNCS_H */

pgsql-patches by date:

Previous
From: Gevik Babakhani
Date:
Subject: Patch for %Allow per-database permissions to be set via GRANT
Next
From: Tom Lane
Date:
Subject: Re: [HACKERS] Enhanced containment selectivity function