Re: Explicit deterministic COLLATE fails with pattern matching operations on column with non-deterministic collation - Mailing list pgsql-bugs
From | Tom Lane |
---|---|
Subject | Re: Explicit deterministic COLLATE fails with pattern matching operations on column with non-deterministic collation |
Date | |
Msg-id | 1442668.1591320121@sss.pgh.pa.us Whole thread Raw |
In response to | Re: Explicit deterministic COLLATE fails with pattern matching operations on column with non-deterministic collation (Tom Lane <tgl@sss.pgh.pa.us>) |
List | pgsql-bugs |
I wrote: > 3. Hack things up so that the core code renames all these exposed > functions to, say, ineq_histogram_selectivity_ext() and so on, > allowing the additional arguments to exist, but the old names would > still be there as ABI compatibility wrappers. Here's a proposed v12 patch along those lines. regards, tom lane diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c index 51545e0ef4..ec8e40626f 100644 --- a/contrib/ltree/ltree_op.c +++ b/contrib/ltree/ltree_op.c @@ -578,6 +578,7 @@ ltreeparentsel(PG_FUNCTION_ARGS) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); VariableStatData vardata; Node *other; bool varonleft; @@ -617,8 +618,9 @@ ltreeparentsel(PG_FUNCTION_ARGS) /* * Is the constant "<@" to any of the column's most common values? */ - mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft, - &mcvsum); + mcvsel = mcv_selectivity_ext(&vardata, &contproc, collation, + constval, varonleft, + &mcvsum); /* * If the histogram is large enough, see what fraction of it the @@ -626,9 +628,9 @@ ltreeparentsel(PG_FUNCTION_ARGS) * non-MCV population. Otherwise use the default selectivity for the * non-MCV population. */ - selec = histogram_selectivity(&vardata, &contproc, - constval, varonleft, - 10, 1, &hist_size); + selec = histogram_selectivity_ext(&vardata, &contproc, collation, + constval, varonleft, + 10, 1, &hist_size); if (selec < 0) { /* Nope, fall back on default */ diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 77cc378196..6465c9edcc 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -90,7 +90,9 @@ static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Selectivity *rest_selec); static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, - Oid vartype, Oid opfamily, Const *prefixcon); + Oid vartype, Oid opfamily, + Oid collation, + Const *prefixcon); static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive); static Selectivity regex_selectivity(const char *patt, int pattlen, @@ -586,8 +588,8 @@ patternsel_common(PlannerInfo *root, if (eqopr == InvalidOid) elog(ERROR, "no = operator for opfamily %u", opfamily); - result = var_eq_const(&vardata, eqopr, prefix->constvalue, - false, true, false); + result = var_eq_const_ext(&vardata, eqopr, collation, + prefix->constvalue, false, true, false); } else { @@ -618,8 +620,9 @@ patternsel_common(PlannerInfo *root, opfuncid = get_opcode(oprid); fmgr_info(opfuncid, &opproc); - selec = histogram_selectivity(&vardata, &opproc, constval, true, - 10, 1, &hist_size); + selec = histogram_selectivity_ext(&vardata, &opproc, collation, + constval, true, + 10, 1, &hist_size); /* If not at least 100 entries, use the heuristic method */ if (hist_size < 100) @@ -629,7 +632,7 @@ patternsel_common(PlannerInfo *root, if (pstatus == Pattern_Prefix_Partial) prefixsel = prefix_selectivity(root, &vardata, vartype, - opfamily, prefix); + opfamily, collation, prefix); else prefixsel = 1.0; heursel = prefixsel * rest_selec; @@ -661,8 +664,9 @@ patternsel_common(PlannerInfo *root, * directly to the result selectivity. Also add up the total fraction * represented by MCV entries. */ - mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true, - &sumcommon); + mcv_selec = mcv_selectivity_ext(&vardata, &opproc, collation, + constval, true, + &sumcommon); /* * Now merge the results from the MCV and histogram calculations, @@ -1170,12 +1174,13 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, */ static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, - Oid vartype, Oid opfamily, Const *prefixcon) + Oid vartype, Oid opfamily, + Oid collation, + Const *prefixcon) { Selectivity prefixsel; Oid cmpopr; FmgrInfo opproc; - AttStatsSlot sslot; Const *greaterstrcon; Selectivity eq_sel; @@ -1185,10 +1190,11 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, elog(ERROR, "no >= operator for opfamily %u", opfamily); fmgr_info(get_opcode(cmpopr), &opproc); - prefixsel = ineq_histogram_selectivity(root, vardata, - &opproc, true, true, - prefixcon->constvalue, - prefixcon->consttype); + prefixsel = ineq_histogram_selectivity_ext(root, vardata, + &opproc, true, true, + collation, + prefixcon->constvalue, + prefixcon->consttype); if (prefixsel < 0.0) { @@ -1196,33 +1202,24 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, return DEFAULT_MATCH_SEL; } - /*------- - * If we can create a string larger than the prefix, say - * "x < greaterstr". We try to generate the string referencing the - * collation of the var's statistics, but if that's not available, - * use DEFAULT_COLLATION_OID. - *------- + /* + * If we can create a string larger than the prefix, say "x < greaterstr". */ - if (HeapTupleIsValid(vardata->statsTuple) && - get_attstatsslot(&sslot, vardata->statsTuple, - STATISTIC_KIND_HISTOGRAM, InvalidOid, 0)) - /* sslot.stacoll is set up */ ; - else - sslot.stacoll = DEFAULT_COLLATION_OID; cmpopr = get_opfamily_member(opfamily, vartype, vartype, BTLessStrategyNumber); if (cmpopr == InvalidOid) elog(ERROR, "no < operator for opfamily %u", opfamily); fmgr_info(get_opcode(cmpopr), &opproc); - greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll); + greaterstrcon = make_greater_string(prefixcon, &opproc, collation); if (greaterstrcon) { Selectivity topsel; - topsel = ineq_histogram_selectivity(root, vardata, - &opproc, false, false, - greaterstrcon->constvalue, - greaterstrcon->consttype); + topsel = ineq_histogram_selectivity_ext(root, vardata, + &opproc, false, false, + collation, + greaterstrcon->constvalue, + greaterstrcon->consttype); /* ineq_histogram_selectivity worked before, it shouldn't fail now */ Assert(topsel >= 0.0); @@ -1253,8 +1250,8 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, BTEqualStrategyNumber); if (cmpopr == InvalidOid) elog(ERROR, "no = operator for opfamily %u", opfamily); - eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue, - false, true, false); + eq_sel = var_eq_const_ext(vardata, cmpopr, collation, prefixcon->constvalue, + false, true, false); prefixsel = Max(prefixsel, eq_sel); diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c index 5e0f0614ee..89131ebab7 100644 --- a/src/backend/utils/adt/network_selfuncs.c +++ b/src/backend/utils/adt/network_selfuncs.c @@ -137,8 +137,9 @@ networksel(PG_FUNCTION_ARGS) * by MCV entries. */ fmgr_info(get_opcode(operator), &proc); - mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft, - &sumcommon); + mcv_selec = mcv_selectivity_ext(&vardata, &proc, InvalidOid, + constvalue, varonleft, + &sumcommon); /* * If we have a histogram, use it to estimate the proportion of the diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index b67897da88..f9a2c96b0e 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -88,11 +88,7 @@ * (if any) is passed using the standard fmgr mechanism, so that the estimator * function can fetch it with PG_GET_COLLATION(). Note, however, that all * statistics in pg_statistic are currently built using the relevant column's - * collation. Thus, in most cases where we are looking at statistics, we - * should ignore the operator collation and use the stats entry's collation. - * We expect that the error induced by doing this is usually not large enough - * to justify complicating matters. In any case, doing otherwise would yield - * entirely garbage results for ordered stats data such as histograms. + * collation. *---------- */ @@ -148,14 +144,14 @@ get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); -static double eqjoinsel_inner(Oid opfuncoid, +static double eqjoinsel_inner(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2); -static double eqjoinsel_semi(Oid opfuncoid, +static double eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, @@ -193,10 +189,11 @@ static double convert_timevalue_to_scalar(Datum value, Oid typid, static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata); static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, Datum *min, Datum *max); + Oid sortop, Oid collation, + Datum *min, Datum *max); static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, + Oid sortop, Oid collation, Datum *min, Datum *max); static bool get_actual_variable_endpoint(Relation heapRel, Relation indexRel, @@ -234,6 +231,7 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); VariableStatData vardata; Node *other; bool varonleft; @@ -267,10 +265,10 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate) * in the query.) */ if (IsA(other, Const)) - selec = var_eq_const(&vardata, operator, - ((Const *) other)->constvalue, - ((Const *) other)->constisnull, - varonleft, negate); + selec = var_eq_const_ext(&vardata, operator, collation, + ((Const *) other)->constvalue, + ((Const *) other)->constisnull, + varonleft, negate); else selec = var_eq_non_const(&vardata, operator, other, varonleft, negate); @@ -289,6 +287,16 @@ double var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, bool varonleft, bool negate) +{ + return var_eq_const_ext(vardata, operator, DEFAULT_COLLATION_OID, + constval, constisnull, + varonleft, negate); +} + +double +var_eq_const_ext(VariableStatData *vardata, Oid operator, Oid collation, + Datum constval, bool constisnull, + bool varonleft, bool negate) { double selec; double nullfrac = 0.0; @@ -353,12 +361,12 @@ var_eq_const(VariableStatData *vardata, Oid operator, /* be careful to apply operator right way 'round */ if (varonleft) match = DatumGetBool(FunctionCall2Coll(&eqproc, - sslot.stacoll, + collation, sslot.values[i], constval)); else match = DatumGetBool(FunctionCall2Coll(&eqproc, - sslot.stacoll, + collation, constval, sslot.values[i])); if (match) @@ -555,6 +563,7 @@ neqsel(PG_FUNCTION_ARGS) */ static double scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, + Oid collation, VariableStatData *vardata, Datum constval, Oid consttype) { Form_pg_statistic stats; @@ -654,16 +663,17 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, * to the result selectivity. Also add up the total fraction represented * by MCV entries. */ - mcv_selec = mcv_selectivity(vardata, &opproc, constval, true, - &sumcommon); + mcv_selec = mcv_selectivity_ext(vardata, &opproc, collation, constval, true, + &sumcommon); /* * If there is a histogram, determine which bin the constant falls in, and * compute the resulting contribution to selectivity. */ - hist_selec = ineq_histogram_selectivity(root, vardata, - &opproc, isgt, iseq, - constval, consttype); + hist_selec = ineq_histogram_selectivity_ext(root, vardata, + &opproc, isgt, iseq, + collation, + constval, consttype); /* * Now merge the results from the MCV and histogram calculations, @@ -707,6 +717,15 @@ double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp) +{ + return mcv_selectivity_ext(vardata, opproc, DEFAULT_COLLATION_OID, + constval, varonleft, sumcommonp); +} + +double +mcv_selectivity_ext(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, + Datum constval, bool varonleft, + double *sumcommonp) { double mcv_selec, sumcommon; @@ -726,11 +745,11 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, { if (varonleft ? DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, sslot.values[i], constval)) : DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, constval, sslot.values[i]))) mcv_selec += sslot.numbers[i]; @@ -780,6 +799,20 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size) +{ + return histogram_selectivity_ext(vardata, + opproc, DEFAULT_COLLATION_OID, + constval, varonleft, + min_hist_size, n_skip, + hist_size); +} + +double +histogram_selectivity_ext(VariableStatData *vardata, + FmgrInfo *opproc, Oid collation, + Datum constval, bool varonleft, + int min_hist_size, int n_skip, + int *hist_size) { double result; AttStatsSlot sslot; @@ -804,11 +837,11 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, { if (varonleft ? DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, sslot.values[i], constval)) : DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, constval, sslot.values[i]))) nmatch++; @@ -848,6 +881,19 @@ ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, bool iseq, Datum constval, Oid consttype) +{ + return ineq_histogram_selectivity_ext(root, vardata, + opproc, isgt, iseq, + DEFAULT_COLLATION_OID, + constval, consttype); +} + +double +ineq_histogram_selectivity_ext(PlannerInfo *root, + VariableStatData *vardata, + FmgrInfo *opproc, bool isgt, bool iseq, + Oid collation, + Datum constval, Oid consttype) { double hist_selec; AttStatsSlot sslot; @@ -860,9 +906,11 @@ ineq_histogram_selectivity(PlannerInfo *root, * column type. However, to make that work we will need to figure out * which staop to search for --- it's not necessarily the one we have at * hand! (For example, we might have a '<=' operator rather than the '<' - * operator that will appear in staop.) For now, assume that whatever - * appears in pg_statistic is sorted the same way our operator sorts, or - * the reverse way if isgt is true. + * operator that will appear in staop.) The collation might not agree + * either. For now, just assume that whatever appears in pg_statistic is + * sorted the same way our operator sorts, or the reverse way if isgt is + * true. This could result in a bogus estimate, but it still seems better + * than falling back to the default estimate. */ if (HeapTupleIsValid(vardata->statsTuple) && statistic_proc_security_check(vardata, opproc->fn_oid) && @@ -908,6 +956,7 @@ ineq_histogram_selectivity(PlannerInfo *root, have_end = get_actual_variable_range(root, vardata, sslot.staop, + collation, &sslot.values[0], &sslot.values[1]); @@ -925,17 +974,19 @@ ineq_histogram_selectivity(PlannerInfo *root, have_end = get_actual_variable_range(root, vardata, sslot.staop, + collation, &sslot.values[0], NULL); else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2) have_end = get_actual_variable_range(root, vardata, sslot.staop, + collation, NULL, &sslot.values[probe]); ltcmp = DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, sslot.values[probe], constval)); if (isgt) @@ -1020,7 +1071,7 @@ ineq_histogram_selectivity(PlannerInfo *root, * values to a uniform comparison scale, and do a linear * interpolation within this bin. */ - if (convert_to_scalar(constval, consttype, sslot.stacoll, + if (convert_to_scalar(constval, consttype, collation, &val, sslot.values[i - 1], sslot.values[i], vardata->vartype, @@ -1160,6 +1211,7 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); VariableStatData vardata; Node *other; bool varonleft; @@ -1212,7 +1264,7 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq) } /* The rest of the work is done by scalarineqsel(). */ - selec = scalarineqsel(root, operator, isgt, iseq, + selec = scalarineqsel(root, operator, isgt, iseq, collation, &vardata, constval, consttype); ReleaseVariableStats(vardata); @@ -1277,8 +1329,8 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid) * A boolean variable V is equivalent to the clause V = 't', so we * compute the selectivity as if that is what we have. */ - selec = var_eq_const(&vardata, BooleanEqualOperator, - BoolGetDatum(true), false, true, false); + selec = var_eq_const_ext(&vardata, BooleanEqualOperator, InvalidOid, + BoolGetDatum(true), false, true, false); } else { @@ -2003,6 +2055,7 @@ eqjoinsel(PG_FUNCTION_ARGS) JoinType jointype = (JoinType) PG_GETARG_INT16(3); #endif SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); + Oid collation = PG_GET_COLLATION(); double selec; double selec_inner; VariableStatData vardata1; @@ -2053,7 +2106,7 @@ eqjoinsel(PG_FUNCTION_ARGS) } /* We need to compute the inner-join selectivity in all cases */ - selec_inner = eqjoinsel_inner(opfuncoid, + selec_inner = eqjoinsel_inner(opfuncoid, collation, &vardata1, &vardata2, nd1, nd2, isdefault1, isdefault2, @@ -2080,7 +2133,7 @@ eqjoinsel(PG_FUNCTION_ARGS) inner_rel = find_join_input_rel(root, sjinfo->min_righthand); if (!join_is_reversed) - selec = eqjoinsel_semi(opfuncoid, + selec = eqjoinsel_semi(opfuncoid, collation, &vardata1, &vardata2, nd1, nd2, isdefault1, isdefault2, @@ -2093,7 +2146,7 @@ eqjoinsel(PG_FUNCTION_ARGS) Oid commop = get_commutator(operator); Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid; - selec = eqjoinsel_semi(commopfuncoid, + selec = eqjoinsel_semi(commopfuncoid, collation, &vardata2, &vardata1, nd2, nd1, isdefault2, isdefault1, @@ -2141,7 +2194,7 @@ eqjoinsel(PG_FUNCTION_ARGS) * that it's worth trying to distinguish them here. */ static double -eqjoinsel_inner(Oid opfuncoid, +eqjoinsel_inner(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, @@ -2203,7 +2256,7 @@ eqjoinsel_inner(Oid opfuncoid, if (hasmatch2[j]) continue; if (DatumGetBool(FunctionCall2Coll(&eqproc, - sslot1->stacoll, + collation, sslot1->values[i], sslot2->values[j]))) { @@ -2321,7 +2374,7 @@ eqjoinsel_inner(Oid opfuncoid, * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid. */ static double -eqjoinsel_semi(Oid opfuncoid, +eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, @@ -2415,7 +2468,7 @@ eqjoinsel_semi(Oid opfuncoid, if (hasmatch2[j]) continue; if (DatumGetBool(FunctionCall2Coll(&eqproc, - sslot1->stacoll, + collation, sslot1->values[i], sslot2->values[j]))) { @@ -2635,6 +2688,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause, Oid op_lefttype; Oid op_righttype; Oid opno, + collation, lsortop, rsortop, lstatop, @@ -2659,6 +2713,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause, if (!is_opclause(clause)) return; /* shouldn't happen */ opno = ((OpExpr *) clause)->opno; + collation = ((OpExpr *) clause)->inputcollid; left = get_leftop((Expr *) clause); right = get_rightop((Expr *) clause); if (!right) @@ -2792,20 +2847,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause, /* Try to get ranges of both inputs */ if (!isgt) { - if (!get_variable_range(root, &leftvar, lstatop, + if (!get_variable_range(root, &leftvar, lstatop, collation, &leftmin, &leftmax)) goto fail; /* no range available from stats */ - if (!get_variable_range(root, &rightvar, rstatop, + if (!get_variable_range(root, &rightvar, rstatop, collation, &rightmin, &rightmax)) goto fail; /* no range available from stats */ } else { /* need to swap the max and min */ - if (!get_variable_range(root, &leftvar, lstatop, + if (!get_variable_range(root, &leftvar, lstatop, collation, &leftmax, &leftmin)) goto fail; /* no range available from stats */ - if (!get_variable_range(root, &rightvar, rstatop, + if (!get_variable_range(root, &rightvar, rstatop, collation, &rightmax, &rightmin)) goto fail; /* no range available from stats */ } @@ -2815,13 +2870,13 @@ mergejoinscansel(PlannerInfo *root, Node *clause, * fraction that's <= the right-side maximum value. But only believe * non-default estimates, else stick with our 1.0. */ - selec = scalarineqsel(root, leop, isgt, true, &leftvar, + selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar, rightmax, op_righttype); if (selec != DEFAULT_INEQ_SEL) *leftend = selec; /* And similarly for the right variable. */ - selec = scalarineqsel(root, revleop, isgt, true, &rightvar, + selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar, leftmax, op_lefttype); if (selec != DEFAULT_INEQ_SEL) *rightend = selec; @@ -2845,13 +2900,13 @@ mergejoinscansel(PlannerInfo *root, Node *clause, * minimum value. But only believe non-default estimates, else stick with * our own default. */ - selec = scalarineqsel(root, ltop, isgt, false, &leftvar, + selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar, rightmin, op_righttype); if (selec != DEFAULT_INEQ_SEL) *leftstart = selec; /* And similarly for the right variable. */ - selec = scalarineqsel(root, revltop, isgt, false, &rightvar, + selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar, leftmin, op_lefttype); if (selec != DEFAULT_INEQ_SEL) *rightstart = selec; @@ -5124,9 +5179,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) * * sortop is the "<" comparison operator to use. This should generally * be "<" not ">", as only the former is likely to be found in pg_statistic. + * The collation must be specified too. */ static bool -get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, +get_variable_range(PlannerInfo *root, VariableStatData *vardata, + Oid sortop, Oid collation, Datum *min, Datum *max) { Datum tmin = 0; @@ -5146,7 +5203,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, * before enabling this. */ #ifdef NOT_USED - if (get_actual_variable_range(root, vardata, sortop, min, max)) + if (get_actual_variable_range(root, vardata, sortop, collation, min, max)) return true; #endif @@ -5174,7 +5231,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, * * If there is a histogram that is sorted with some other operator than * the one we want, fail --- this suggests that there is data we can't - * use. + * use. XXX consider collation too. */ if (get_attstatsslot(&sslot, vardata->statsTuple, STATISTIC_KIND_HISTOGRAM, sortop, @@ -5221,14 +5278,14 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, continue; } if (DatumGetBool(FunctionCall2Coll(&opproc, - sslot.stacoll, + collation, sslot.values[i], tmin))) { tmin = sslot.values[i]; tmin_is_mcv = true; } if (DatumGetBool(FunctionCall2Coll(&opproc, - sslot.stacoll, + collation, tmax, sslot.values[i]))) { tmax = sslot.values[i]; @@ -5258,10 +5315,11 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, * If no data available, return false. * * sortop is the "<" comparison operator to use. + * collation is the required collation. */ static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, + Oid sortop, Oid collation, Datum *min, Datum *max) { bool have_data = false; @@ -5301,9 +5359,11 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, continue; /* - * The first index column must match the desired variable and sort - * operator --- but we can use a descending-order index. + * The first index column must match the desired variable, sortop, and + * collation --- but we can use a descending-order index. */ + if (collation != index->indexcollations[0]) + continue; /* test first 'cause it's cheapest */ if (!match_index_to_operand(vardata->var, 0, index)) continue; switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0])) diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 85d9ecbfc6..521cd84130 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -143,17 +143,36 @@ extern double get_variable_numdistinct(VariableStatData *vardata, extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp); +extern double mcv_selectivity_ext(VariableStatData *vardata, + FmgrInfo *opproc, Oid collation, + Datum constval, bool varonleft, + double *sumcommonp); extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip, int *hist_size); +extern double histogram_selectivity_ext(VariableStatData *vardata, + FmgrInfo *opproc, Oid collation, + Datum constval, bool varonleft, + int min_hist_size, int n_skip, + int *hist_size); extern double ineq_histogram_selectivity(PlannerInfo *root, VariableStatData *vardata, FmgrInfo *opproc, bool isgt, bool iseq, Datum constval, Oid consttype); +extern double ineq_histogram_selectivity_ext(PlannerInfo *root, + VariableStatData *vardata, + FmgrInfo *opproc, + bool isgt, bool iseq, + Oid collation, + Datum constval, Oid consttype); extern double var_eq_const(VariableStatData *vardata, Oid oproid, Datum constval, bool constisnull, bool varonleft, bool negate); +extern double var_eq_const_ext(VariableStatData *vardata, + Oid oproid, Oid collation, + Datum constval, bool constisnull, + bool varonleft, bool negate); extern double var_eq_non_const(VariableStatData *vardata, Oid oproid, Node *other, bool varonleft, bool negate);
pgsql-bugs by date: