Re: [HACKERS] Enhanced containment selectivity function - Mailing list pgsql-patches
From | Bruce Momjian |
---|---|
Subject | Re: [HACKERS] Enhanced containment selectivity function |
Date | |
Msg-id | 200604262232.k3QMW1018118@candle.pha.pa.us Whole thread Raw |
List | pgsql-patches |
Matteo Beccati wrote: > Bruce Momjian ha scritto: > > Your patch has been added to the PostgreSQL unapplied patches list at: > > > > http://momjian.postgresql.org/cgi-bin/pgpatches > > > > It will be applied as soon as one of the PostgreSQL committers reviews > > and approves it. > > Great. I would just like to remind that Tom said: > > > I'd be willing to consider exporting those functions from selfuncs.c. > > so that the selector function could be moved to contrib/ltree, which is > its natural place. > > It could also be noted that a similar feature could be useful outside > ltree: I guess there are plenty of cases when scanning statistics would > give a better result than using a constant selectivity. I have done as you and Tom suggested, moving parentsel into /contrib/ltree. Patch attached and applied, catalog version attached. -- Bruce Momjian http://candle.pha.pa.us EnterpriseDB http://www.enterprisedb.com + If your life is a hard drive, Christ can be your backup. + Index: contrib/ltree/ltree.sql.in =================================================================== RCS file: /cvsroot/pgsql/contrib/ltree/ltree.sql.in,v retrieving revision 1.11 diff -c -c -r1.11 ltree.sql.in *** contrib/ltree/ltree.sql.in 26 Apr 2006 18:28:29 -0000 1.11 --- contrib/ltree/ltree.sql.in 26 Apr 2006 22:28:48 -0000 *************** *** 225,230 **** --- 225,235 ---- AS 'MODULE_PATHNAME' LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; + CREATE FUNCTION ltreeparentsel(internal, oid, internal, integer) + RETURNS float8 + AS 'MODULE_PATHNAME' + LANGUAGE C RETURNS NULL ON NULL INPUT IMMUTABLE; + CREATE OPERATOR @> ( LEFTARG = ltree, RIGHTARG = ltree, Index: contrib/ltree/ltree_op.c =================================================================== RCS file: /cvsroot/pgsql/contrib/ltree/ltree_op.c,v retrieving revision 1.9 diff -c -c -r1.9 ltree_op.c *** contrib/ltree/ltree_op.c 11 Mar 2006 04:38:29 -0000 1.9 --- contrib/ltree/ltree_op.c 26 Apr 2006 22:28:49 -0000 *************** *** 7,12 **** --- 7,19 ---- #include "ltree.h" #include <ctype.h> + #include "access/heapam.h" + #include "catalog/pg_statistic.h" + #include "nodes/relation.h" + #include "utils/lsyscache.h" + #include "utils/selfuncs.h" + #include "utils/syscache.h" + /* compare functions */ PG_FUNCTION_INFO_V1(ltree_cmp); PG_FUNCTION_INFO_V1(ltree_lt); *************** *** 44,49 **** --- 51,57 ---- Datum lca(PG_FUNCTION_ARGS); Datum ltree2text(PG_FUNCTION_ARGS); Datum text2ltree(PG_FUNCTION_ARGS); + Datum ltreeparentsel(PG_FUNCTION_ARGS); int ltree_compare(const ltree * a, const ltree * b) *************** *** 551,553 **** --- 559,739 ---- PG_RETURN_POINTER(out); } + + + #define DEFAULT_PARENT_SEL 0.001 + + /* + * ltreeparentsel - Selectivity of parent relationship for ltree data types. + */ + Datum + ltreeparentsel(PG_FUNCTION_ARGS) + { + PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); + Oid operator = PG_GETARG_OID(1); + List *args = (List *) PG_GETARG_POINTER(2); + int varRelid = PG_GETARG_INT32(3); + VariableStatData vardata; + Node *other; + bool varonleft; + Datum *values; + int nvalues; + float4 *numbers; + int nnumbers; + double selec = 0.0; + + /* + * If expression is not variable <@ something or something <@ variable, + * then punt and return a default estimate. + */ + if (!get_restriction_variable(root, args, varRelid, + &vardata, &other, &varonleft)) + PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL); + + /* + * If the something is a NULL constant, assume operator is strict and + * return zero, ie, operator will never return TRUE. + */ + if (IsA(other, Const) && + ((Const *) other)->constisnull) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(0.0); + } + + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + double mcvsum = 0.0; + double mcvsel = 0.0; + double hissel = 0.0; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + + if (IsA(other, Const)) + { + /* Variable is being compared to a known non-null constant */ + Datum constval = ((Const *) other)->constvalue; + bool match = false; + int i; + + /* + * Is the constant "<@" to any of the column's most common values? + */ + if (get_attstatsslot(vardata.statsTuple, + vardata.atttype, vardata.atttypmod, + STATISTIC_KIND_MCV, InvalidOid, + &values, &nvalues, + &numbers, &nnumbers)) + { + FmgrInfo contproc; + + fmgr_info(get_opcode(operator), &contproc); + + for (i = 0; i < nvalues; i++) + { + /* be careful to apply operator right way 'round */ + if (varonleft) + match = DatumGetBool(FunctionCall2(&contproc, + values[i], + constval)); + else + match = DatumGetBool(FunctionCall2(&contproc, + constval, + values[i])); + + /* calculate total selectivity of all most-common-values */ + mcvsum += numbers[i]; + + /* calculate selectivity of matching most-common-values */ + if (match) + mcvsel += numbers[i]; + } + } + else + { + /* no most-common-values info available */ + values = NULL; + numbers = NULL; + i = nvalues = nnumbers = 0; + } + + free_attstatsslot(vardata.atttype, values, nvalues, NULL, 0); + + /* + * Is the constant "<@" to any of the column's histogram values? + */ + if (get_attstatsslot(vardata.statsTuple, + vardata.atttype, vardata.atttypmod, + STATISTIC_KIND_HISTOGRAM, InvalidOid, + &values, &nvalues, + NULL, NULL)) + { + FmgrInfo contproc; + + fmgr_info(get_opcode(operator), &contproc); + + for (i = 0; i < nvalues; i++) + { + /* be careful to apply operator right way 'round */ + if (varonleft) + match = DatumGetBool(FunctionCall2(&contproc, + values[i], + constval)); + else + match = DatumGetBool(FunctionCall2(&contproc, + constval, + values[i])); + /* count matching histogram values */ + if (match) + hissel++; + } + + if (hissel > 0.0) + { + /* + * some matching values found inside histogram, divide + * matching entries number by total histogram entries to + * get the histogram related selectivity + */ + hissel /= nvalues; + } + } + else + { + /* no histogram info available */ + values = NULL; + i = nvalues = 0; + } + + free_attstatsslot(vardata.atttype, values, nvalues, + NULL, 0); + + + /* + * calculate selectivity based on MCV and histogram result + * histogram selectivity needs to be scaled down if there are any + * most-common-values + */ + selec = mcvsel + hissel * (1.0 - mcvsum); + + /* + * don't return 0.0 selectivity unless all table values are inside + * mcv + */ + if (selec == 0.0 && mcvsum != 1.0) + selec = DEFAULT_PARENT_SEL; + } + else + selec = DEFAULT_PARENT_SEL; + } + else + selec = DEFAULT_PARENT_SEL; + + ReleaseVariableStats(vardata); + + /* result should be in range, but make sure... */ + CLAMP_PROBABILITY(selec); + + PG_RETURN_FLOAT8((float8) selec); + } Index: contrib/ltree/expected/ltree.out =================================================================== RCS file: /cvsroot/pgsql/contrib/ltree/expected/ltree.out,v retrieving revision 1.12 diff -c -c -r1.12 ltree.out *** contrib/ltree/expected/ltree.out 21 Aug 2004 00:26:54 -0000 1.12 --- contrib/ltree/expected/ltree.out 26 Apr 2006 22:28:56 -0000 *************** *** 2,16 **** psql:ltree.sql:7: NOTICE: type "ltree" is not yet defined DETAIL: Creating a shell type definition. psql:ltree.sql:12: NOTICE: argument type ltree is only a shell ! psql:ltree.sql:299: NOTICE: type "lquery" is not yet defined DETAIL: Creating a shell type definition. ! psql:ltree.sql:304: NOTICE: argument type lquery is only a shell ! psql:ltree.sql:410: NOTICE: type "ltxtquery" is not yet defined DETAIL: Creating a shell type definition. ! psql:ltree.sql:415: NOTICE: argument type ltxtquery is only a shell ! psql:ltree.sql:477: NOTICE: type "ltree_gist" is not yet defined DETAIL: Creating a shell type definition. ! psql:ltree.sql:482: NOTICE: argument type ltree_gist is only a shell SELECT ''::ltree; ltree ------- --- 2,16 ---- psql:ltree.sql:7: NOTICE: type "ltree" is not yet defined DETAIL: Creating a shell type definition. psql:ltree.sql:12: NOTICE: argument type ltree is only a shell ! psql:ltree.sql:304: NOTICE: type "lquery" is not yet defined DETAIL: Creating a shell type definition. ! psql:ltree.sql:309: NOTICE: argument type lquery is only a shell ! psql:ltree.sql:415: NOTICE: type "ltxtquery" is not yet defined DETAIL: Creating a shell type definition. ! psql:ltree.sql:420: NOTICE: argument type ltxtquery is only a shell ! psql:ltree.sql:482: NOTICE: type "ltree_gist" is not yet defined DETAIL: Creating a shell type definition. ! psql:ltree.sql:487: NOTICE: argument type ltree_gist is only a shell SELECT ''::ltree; ltree ------- Index: src/backend/utils/adt/geo_selfuncs.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/geo_selfuncs.c,v retrieving revision 1.28 diff -c -c -r1.28 geo_selfuncs.c *** src/backend/utils/adt/geo_selfuncs.c 26 Apr 2006 18:28:29 -0000 1.28 --- src/backend/utils/adt/geo_selfuncs.c 26 Apr 2006 22:29:04 -0000 *************** *** 9,15 **** * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/utils/adt/geo_selfuncs.c,v 1.28 2006/04/26 18:28:29 momjian Exp $ * * XXX These are totally bogus. Perhaps someone will make them do * something reasonable, someday. --- 9,15 ---- * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/utils/adt/geo_selfuncs.c,v 1.27 2006/03/05 15:58:42 momjian Exp $ * * XXX These are totally bogus. Perhaps someone will make them do * something reasonable, someday. *************** *** 20,25 **** --- 20,26 ---- #include "utils/geo_decls.h" + /* * Selectivity functions for geometric operators. These are bogus -- unless * we know the actual key distribution in the index, we can't make a good *************** *** 92,95 **** { PG_RETURN_FLOAT8(0.001); } - --- 93,95 ---- Index: src/backend/utils/adt/selfuncs.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v retrieving revision 1.200 diff -c -c -r1.200 selfuncs.c *** src/backend/utils/adt/selfuncs.c 26 Apr 2006 18:28:29 -0000 1.200 --- src/backend/utils/adt/selfuncs.c 26 Apr 2006 22:29:13 -0000 *************** *** 15,21 **** * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.200 2006/04/26 18:28:29 momjian Exp $ * *------------------------------------------------------------------------- */ --- 15,21 ---- * * * IDENTIFICATION ! * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.199 2006/04/20 17:50:18 tgl Exp $ * *------------------------------------------------------------------------- */ *************** *** 111,136 **** #include "utils/syscache.h" - /* Return data from examine_variable and friends */ - typedef struct - { - Node *var; /* the Var or expression tree */ - RelOptInfo *rel; /* Relation, or NULL if not identifiable */ - HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */ - /* NB: if statsTuple!=NULL, it must be freed when caller is done */ - Oid vartype; /* exposed type of expression */ - Oid atttype; /* type to pass to get_attstatsslot */ - int32 atttypmod; /* typmod to pass to get_attstatsslot */ - bool isunique; /* true if matched to a unique index */ - } VariableStatData; - - #define ReleaseVariableStats(vardata) \ - do { \ - if (HeapTupleIsValid((vardata).statsTuple)) \ - ReleaseSysCache((vardata).statsTuple); \ - } while(0) - - static double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, double *sumcommonp); static double ineq_histogram_selectivity(VariableStatData *vardata, --- 111,116 ---- *************** *** 158,166 **** int rangelo, int rangehi); static char *convert_string_datum(Datum value, Oid typid); static double convert_timevalue_to_scalar(Datum value, Oid typid); - static bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, - VariableStatData *vardata, Node **other, - bool *varonleft); static void get_join_variables(PlannerInfo *root, List *args, VariableStatData *vardata1, VariableStatData *vardata2); --- 138,143 ---- *************** *** 3172,3178 **** * Note: if there are Vars on both sides of the clause, we must fail, because * callers are expecting that the other side will act like a pseudoconstant. */ ! static bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft) --- 3149,3155 ---- * Note: if there are Vars on both sides of the clause, we must fail, because * callers are expecting that the other side will act like a pseudoconstant. */ ! bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft) *************** *** 4852,5033 **** PG_RETURN_VOID(); } - - - #define DEFAULT_PARENT_SEL 0.001 - - /* - * parentsel - Selectivity of parent relationship for ltree data types. - */ - Datum - parentsel(PG_FUNCTION_ARGS) - { - PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); - Oid operator = PG_GETARG_OID(1); - List *args = (List *) PG_GETARG_POINTER(2); - int varRelid = PG_GETARG_INT32(3); - VariableStatData vardata; - Node *other; - bool varonleft; - Datum *values; - int nvalues; - float4 *numbers; - int nnumbers; - double selec = 0.0; - - /* - * If expression is not variable <@ something or something <@ variable, - * then punt and return a default estimate. - */ - if (!get_restriction_variable(root, args, varRelid, - &vardata, &other, &varonleft)) - PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL); - - /* - * If the something is a NULL constant, assume operator is strict and - * return zero, ie, operator will never return TRUE. - */ - if (IsA(other, Const) && - ((Const *) other)->constisnull) - { - ReleaseVariableStats(vardata); - PG_RETURN_FLOAT8(0.0); - } - - if (HeapTupleIsValid(vardata.statsTuple)) - { - Form_pg_statistic stats; - double mcvsum = 0.0; - double mcvsel = 0.0; - double hissel = 0.0; - - stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); - - if (IsA(other, Const)) - { - /* Variable is being compared to a known non-null constant */ - Datum constval = ((Const *) other)->constvalue; - bool match = false; - int i; - - /* - * Is the constant "<@" to any of the column's most common values? - */ - if (get_attstatsslot(vardata.statsTuple, - vardata.atttype, vardata.atttypmod, - STATISTIC_KIND_MCV, InvalidOid, - &values, &nvalues, - &numbers, &nnumbers)) - { - FmgrInfo contproc; - - fmgr_info(get_opcode(operator), &contproc); - - for (i = 0; i < nvalues; i++) - { - /* be careful to apply operator right way 'round */ - if (varonleft) - match = DatumGetBool(FunctionCall2(&contproc, - values[i], - constval)); - else - match = DatumGetBool(FunctionCall2(&contproc, - constval, - values[i])); - - /* calculate total selectivity of all most-common-values */ - mcvsum += numbers[i]; - - /* calculate selectivity of matching most-common-values */ - if (match) - mcvsel += numbers[i]; - } - } - else - { - /* no most-common-values info available */ - values = NULL; - numbers = NULL; - i = nvalues = nnumbers = 0; - } - - free_attstatsslot(vardata.atttype, values, nvalues, NULL, 0); - - /* - * Is the constant "<@" to any of the column's histogram values? - */ - if (get_attstatsslot(vardata.statsTuple, - vardata.atttype, vardata.atttypmod, - STATISTIC_KIND_HISTOGRAM, InvalidOid, - &values, &nvalues, - NULL, NULL)) - { - FmgrInfo contproc; - - fmgr_info(get_opcode(operator), &contproc); - - for (i = 0; i < nvalues; i++) - { - /* be careful to apply operator right way 'round */ - if (varonleft) - match = DatumGetBool(FunctionCall2(&contproc, - values[i], - constval)); - else - match = DatumGetBool(FunctionCall2(&contproc, - constval, - values[i])); - /* count matching histogram values */ - if (match) - hissel++; - } - - if (hissel > 0.0) - { - /* - * some matching values found inside histogram, divide - * matching entries number by total histogram entries to - * get the histogram related selectivity - */ - hissel /= nvalues; - } - } - else - { - /* no histogram info available */ - values = NULL; - i = nvalues = 0; - } - - free_attstatsslot(vardata.atttype, values, nvalues, - NULL, 0); - - - /* - * calculate selectivity based on MCV and histogram result - * histogram selectivity needs to be scaled down if there are any - * most-common-values - */ - selec = mcvsel + hissel * (1.0 - mcvsum); - - /* - * don't return 0.0 selectivity unless all table values are inside - * mcv - */ - if (selec == 0.0 && mcvsum != 1.0) - selec = DEFAULT_PARENT_SEL; - } - else - selec = DEFAULT_PARENT_SEL; - } - else - selec = DEFAULT_PARENT_SEL; - - ReleaseVariableStats(vardata); - - /* result should be in range, but make sure... */ - CLAMP_PROBABILITY(selec); - - PG_RETURN_FLOAT8((float8) selec); - } - --- 4829,4831 ---- Index: src/include/catalog/catversion.h =================================================================== RCS file: /cvsroot/pgsql/src/include/catalog/catversion.h,v retrieving revision 1.325 diff -c -c -r1.325 catversion.h *** src/include/catalog/catversion.h 26 Apr 2006 18:30:10 -0000 1.325 --- src/include/catalog/catversion.h 26 Apr 2006 22:29:15 -0000 *************** *** 53,58 **** */ /* yyyymmddN */ ! #define CATALOG_VERSION_NO 200604261 #endif --- 53,58 ---- */ /* yyyymmddN */ ! #define CATALOG_VERSION_NO 200604262 #endif Index: src/include/catalog/pg_proc.h =================================================================== RCS file: /cvsroot/pgsql/src/include/catalog/pg_proc.h,v retrieving revision 1.407 diff -c -c -r1.407 pg_proc.h *** src/include/catalog/pg_proc.h 26 Apr 2006 18:28:30 -0000 1.407 --- src/include/catalog/pg_proc.h 26 Apr 2006 22:29:33 -0000 *************** *** 7,13 **** * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * ! * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.407 2006/04/26 18:28:30 momjian Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki --- 7,13 ---- * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * ! * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.406 2006/04/25 00:25:20 momjian Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki *************** *** 3812,3819 **** DESCR("GiST support"); DATA(insert OID = 2592 ( gist_circle_compress PGNSP PGUID 12 f f t f i 1 2281 "2281" _null_ _null_ _null_ gist_circle_compress- _null_ )); DESCR("GiST support"); - DATA(insert OID = 2599 ( parentsel PGNSP PGUID 12 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_parentsel - _null_ )); - DESCR("enhanced restriction selectivity for ltree isparent comparison operators"); /* --- 3812,3817 ---- Index: src/include/utils/selfuncs.h =================================================================== RCS file: /cvsroot/pgsql/src/include/utils/selfuncs.h,v retrieving revision 1.29 diff -c -c -r1.29 selfuncs.h *** src/include/utils/selfuncs.h 26 Apr 2006 18:28:34 -0000 1.29 --- src/include/utils/selfuncs.h 26 Apr 2006 22:29:33 -0000 *************** *** 8,14 **** * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * ! * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.29 2006/04/26 18:28:34 momjian Exp $ * *------------------------------------------------------------------------- */ --- 8,14 ---- * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * ! * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.28 2006/03/05 15:59:07 momjian Exp $ * *------------------------------------------------------------------------- */ *************** *** 16,21 **** --- 16,22 ---- #define SELFUNCS_H #include "fmgr.h" + #include "access/htup.h" #include "nodes/relation.h" *************** *** 62,67 **** --- 63,88 ---- } while (0) + /* Return data from examine_variable and friends */ + typedef struct + { + Node *var; /* the Var or expression tree */ + RelOptInfo *rel; /* Relation, or NULL if not identifiable */ + HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */ + /* NB: if statsTuple!=NULL, it must be freed when caller is done */ + Oid vartype; /* exposed type of expression */ + Oid atttype; /* type to pass to get_attstatsslot */ + int32 atttypmod; /* typmod to pass to get_attstatsslot */ + bool isunique; /* true if matched to a unique index */ + } VariableStatData; + + #define ReleaseVariableStats(vardata) \ + do { \ + if (HeapTupleIsValid((vardata).statsTuple)) \ + ReleaseSysCache((vardata).statsTuple); \ + } while(0) + + typedef enum { Pattern_Type_Like, Pattern_Type_Like_IC, *************** *** 133,139 **** extern Datum btcostestimate(PG_FUNCTION_ARGS); extern Datum hashcostestimate(PG_FUNCTION_ARGS); extern Datum gistcostestimate(PG_FUNCTION_ARGS); ! ! extern Datum parentsel(PG_FUNCTION_ARGS); #endif /* SELFUNCS_H */ --- 154,161 ---- extern Datum btcostestimate(PG_FUNCTION_ARGS); extern Datum hashcostestimate(PG_FUNCTION_ARGS); extern Datum gistcostestimate(PG_FUNCTION_ARGS); ! extern bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, ! VariableStatData *vardata, Node **other, ! bool *varonleft); #endif /* SELFUNCS_H */
pgsql-patches by date: