Re: Explicit deterministic COLLATE fails with pattern matching operations on column with non-deterministic collation - Mailing list pgsql-bugs

From Tom Lane
Subject Re: Explicit deterministic COLLATE fails with pattern matching operations on column with non-deterministic collation
Date
Msg-id 1442668.1591320121@sss.pgh.pa.us
Whole thread Raw
In response to Re: Explicit deterministic COLLATE fails with pattern matching operations on column with non-deterministic collation  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-bugs
I wrote:
> 3. Hack things up so that the core code renames all these exposed
> functions to, say, ineq_histogram_selectivity_ext() and so on,
> allowing the additional arguments to exist, but the old names would
> still be there as ABI compatibility wrappers.

Here's a proposed v12 patch along those lines.

            regards, tom lane

diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index 51545e0ef4..ec8e40626f 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -578,6 +578,7 @@ ltreeparentsel(PG_FUNCTION_ARGS)
     Oid            operator = PG_GETARG_OID(1);
     List       *args = (List *) PG_GETARG_POINTER(2);
     int            varRelid = PG_GETARG_INT32(3);
+    Oid            collation = PG_GET_COLLATION();
     VariableStatData vardata;
     Node       *other;
     bool        varonleft;
@@ -617,8 +618,9 @@ ltreeparentsel(PG_FUNCTION_ARGS)
         /*
          * Is the constant "<@" to any of the column's most common values?
          */
-        mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft,
-                                 &mcvsum);
+        mcvsel = mcv_selectivity_ext(&vardata, &contproc, collation,
+                                     constval, varonleft,
+                                     &mcvsum);

         /*
          * If the histogram is large enough, see what fraction of it the
@@ -626,9 +628,9 @@ ltreeparentsel(PG_FUNCTION_ARGS)
          * non-MCV population.  Otherwise use the default selectivity for the
          * non-MCV population.
          */
-        selec = histogram_selectivity(&vardata, &contproc,
-                                      constval, varonleft,
-                                      10, 1, &hist_size);
+        selec = histogram_selectivity_ext(&vardata, &contproc, collation,
+                                          constval, varonleft,
+                                          10, 1, &hist_size);
         if (selec < 0)
         {
             /* Nope, fall back on default */
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 77cc378196..6465c9edcc 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -90,7 +90,9 @@ static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
                                                   Selectivity *rest_selec);
 static Selectivity prefix_selectivity(PlannerInfo *root,
                                       VariableStatData *vardata,
-                                      Oid vartype, Oid opfamily, Const *prefixcon);
+                                      Oid vartype, Oid opfamily,
+                                      Oid collation,
+                                      Const *prefixcon);
 static Selectivity like_selectivity(const char *patt, int pattlen,
                                     bool case_insensitive);
 static Selectivity regex_selectivity(const char *patt, int pattlen,
@@ -586,8 +588,8 @@ patternsel_common(PlannerInfo *root,

         if (eqopr == InvalidOid)
             elog(ERROR, "no = operator for opfamily %u", opfamily);
-        result = var_eq_const(&vardata, eqopr, prefix->constvalue,
-                              false, true, false);
+        result = var_eq_const_ext(&vardata, eqopr, collation,
+                                  prefix->constvalue, false, true, false);
     }
     else
     {
@@ -618,8 +620,9 @@ patternsel_common(PlannerInfo *root,
             opfuncid = get_opcode(oprid);
         fmgr_info(opfuncid, &opproc);

-        selec = histogram_selectivity(&vardata, &opproc, constval, true,
-                                      10, 1, &hist_size);
+        selec = histogram_selectivity_ext(&vardata, &opproc, collation,
+                                          constval, true,
+                                          10, 1, &hist_size);

         /* If not at least 100 entries, use the heuristic method */
         if (hist_size < 100)
@@ -629,7 +632,7 @@ patternsel_common(PlannerInfo *root,

             if (pstatus == Pattern_Prefix_Partial)
                 prefixsel = prefix_selectivity(root, &vardata, vartype,
-                                               opfamily, prefix);
+                                               opfamily, collation, prefix);
             else
                 prefixsel = 1.0;
             heursel = prefixsel * rest_selec;
@@ -661,8 +664,9 @@ patternsel_common(PlannerInfo *root,
          * directly to the result selectivity.  Also add up the total fraction
          * represented by MCV entries.
          */
-        mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true,
-                                    &sumcommon);
+        mcv_selec = mcv_selectivity_ext(&vardata, &opproc, collation,
+                                        constval, true,
+                                        &sumcommon);

         /*
          * Now merge the results from the MCV and histogram calculations,
@@ -1170,12 +1174,13 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
  */
 static Selectivity
 prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
-                   Oid vartype, Oid opfamily, Const *prefixcon)
+                   Oid vartype, Oid opfamily,
+                   Oid collation,
+                   Const *prefixcon)
 {
     Selectivity prefixsel;
     Oid            cmpopr;
     FmgrInfo    opproc;
-    AttStatsSlot sslot;
     Const       *greaterstrcon;
     Selectivity eq_sel;

@@ -1185,10 +1190,11 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
         elog(ERROR, "no >= operator for opfamily %u", opfamily);
     fmgr_info(get_opcode(cmpopr), &opproc);

-    prefixsel = ineq_histogram_selectivity(root, vardata,
-                                           &opproc, true, true,
-                                           prefixcon->constvalue,
-                                           prefixcon->consttype);
+    prefixsel = ineq_histogram_selectivity_ext(root, vardata,
+                                               &opproc, true, true,
+                                               collation,
+                                               prefixcon->constvalue,
+                                               prefixcon->consttype);

     if (prefixsel < 0.0)
     {
@@ -1196,33 +1202,24 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
         return DEFAULT_MATCH_SEL;
     }

-    /*-------
-     * If we can create a string larger than the prefix, say
-     * "x < greaterstr".  We try to generate the string referencing the
-     * collation of the var's statistics, but if that's not available,
-     * use DEFAULT_COLLATION_OID.
-     *-------
+    /*
+     * If we can create a string larger than the prefix, say "x < greaterstr".
      */
-    if (HeapTupleIsValid(vardata->statsTuple) &&
-        get_attstatsslot(&sslot, vardata->statsTuple,
-                         STATISTIC_KIND_HISTOGRAM, InvalidOid, 0))
-         /* sslot.stacoll is set up */ ;
-    else
-        sslot.stacoll = DEFAULT_COLLATION_OID;
     cmpopr = get_opfamily_member(opfamily, vartype, vartype,
                                  BTLessStrategyNumber);
     if (cmpopr == InvalidOid)
         elog(ERROR, "no < operator for opfamily %u", opfamily);
     fmgr_info(get_opcode(cmpopr), &opproc);
-    greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll);
+    greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
     if (greaterstrcon)
     {
         Selectivity topsel;

-        topsel = ineq_histogram_selectivity(root, vardata,
-                                            &opproc, false, false,
-                                            greaterstrcon->constvalue,
-                                            greaterstrcon->consttype);
+        topsel = ineq_histogram_selectivity_ext(root, vardata,
+                                                &opproc, false, false,
+                                                collation,
+                                                greaterstrcon->constvalue,
+                                                greaterstrcon->consttype);

         /* ineq_histogram_selectivity worked before, it shouldn't fail now */
         Assert(topsel >= 0.0);
@@ -1253,8 +1250,8 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
                                  BTEqualStrategyNumber);
     if (cmpopr == InvalidOid)
         elog(ERROR, "no = operator for opfamily %u", opfamily);
-    eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue,
-                          false, true, false);
+    eq_sel = var_eq_const_ext(vardata, cmpopr, collation, prefixcon->constvalue,
+                              false, true, false);

     prefixsel = Max(prefixsel, eq_sel);

diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c
index 5e0f0614ee..89131ebab7 100644
--- a/src/backend/utils/adt/network_selfuncs.c
+++ b/src/backend/utils/adt/network_selfuncs.c
@@ -137,8 +137,9 @@ networksel(PG_FUNCTION_ARGS)
      * by MCV entries.
      */
     fmgr_info(get_opcode(operator), &proc);
-    mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft,
-                                &sumcommon);
+    mcv_selec = mcv_selectivity_ext(&vardata, &proc, InvalidOid,
+                                    constvalue, varonleft,
+                                    &sumcommon);

     /*
      * If we have a histogram, use it to estimate the proportion of the
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index b67897da88..f9a2c96b0e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -88,11 +88,7 @@
  * (if any) is passed using the standard fmgr mechanism, so that the estimator
  * function can fetch it with PG_GET_COLLATION().  Note, however, that all
  * statistics in pg_statistic are currently built using the relevant column's
- * collation.  Thus, in most cases where we are looking at statistics, we
- * should ignore the operator collation and use the stats entry's collation.
- * We expect that the error induced by doing this is usually not large enough
- * to justify complicating matters.  In any case, doing otherwise would yield
- * entirely garbage results for ordered stats data such as histograms.
+ * collation.
  *----------
  */

@@ -148,14 +144,14 @@ get_relation_stats_hook_type get_relation_stats_hook = NULL;
 get_index_stats_hook_type get_index_stats_hook = NULL;

 static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
-static double eqjoinsel_inner(Oid opfuncoid,
+static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
                               VariableStatData *vardata1, VariableStatData *vardata2,
                               double nd1, double nd2,
                               bool isdefault1, bool isdefault2,
                               AttStatsSlot *sslot1, AttStatsSlot *sslot2,
                               Form_pg_statistic stats1, Form_pg_statistic stats2,
                               bool have_mcvs1, bool have_mcvs2);
-static double eqjoinsel_semi(Oid opfuncoid,
+static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
                              VariableStatData *vardata1, VariableStatData *vardata2,
                              double nd1, double nd2,
                              bool isdefault1, bool isdefault2,
@@ -193,10 +189,11 @@ static double convert_timevalue_to_scalar(Datum value, Oid typid,
 static void examine_simple_variable(PlannerInfo *root, Var *var,
                                     VariableStatData *vardata);
 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
-                               Oid sortop, Datum *min, Datum *max);
+                               Oid sortop, Oid collation,
+                               Datum *min, Datum *max);
 static bool get_actual_variable_range(PlannerInfo *root,
                                       VariableStatData *vardata,
-                                      Oid sortop,
+                                      Oid sortop, Oid collation,
                                       Datum *min, Datum *max);
 static bool get_actual_variable_endpoint(Relation heapRel,
                                          Relation indexRel,
@@ -234,6 +231,7 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
     Oid            operator = PG_GETARG_OID(1);
     List       *args = (List *) PG_GETARG_POINTER(2);
     int            varRelid = PG_GETARG_INT32(3);
+    Oid            collation = PG_GET_COLLATION();
     VariableStatData vardata;
     Node       *other;
     bool        varonleft;
@@ -267,10 +265,10 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
      * in the query.)
      */
     if (IsA(other, Const))
-        selec = var_eq_const(&vardata, operator,
-                             ((Const *) other)->constvalue,
-                             ((Const *) other)->constisnull,
-                             varonleft, negate);
+        selec = var_eq_const_ext(&vardata, operator, collation,
+                                 ((Const *) other)->constvalue,
+                                 ((Const *) other)->constisnull,
+                                 varonleft, negate);
     else
         selec = var_eq_non_const(&vardata, operator, other,
                                  varonleft, negate);
@@ -289,6 +287,16 @@ double
 var_eq_const(VariableStatData *vardata, Oid operator,
              Datum constval, bool constisnull,
              bool varonleft, bool negate)
+{
+    return var_eq_const_ext(vardata, operator, DEFAULT_COLLATION_OID,
+                            constval, constisnull,
+                            varonleft, negate);
+}
+
+double
+var_eq_const_ext(VariableStatData *vardata, Oid operator, Oid collation,
+                 Datum constval, bool constisnull,
+                 bool varonleft, bool negate)
 {
     double        selec;
     double        nullfrac = 0.0;
@@ -353,12 +361,12 @@ var_eq_const(VariableStatData *vardata, Oid operator,
                 /* be careful to apply operator right way 'round */
                 if (varonleft)
                     match = DatumGetBool(FunctionCall2Coll(&eqproc,
-                                                           sslot.stacoll,
+                                                           collation,
                                                            sslot.values[i],
                                                            constval));
                 else
                     match = DatumGetBool(FunctionCall2Coll(&eqproc,
-                                                           sslot.stacoll,
+                                                           collation,
                                                            constval,
                                                            sslot.values[i]));
                 if (match)
@@ -555,6 +563,7 @@ neqsel(PG_FUNCTION_ARGS)
  */
 static double
 scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
+              Oid collation,
               VariableStatData *vardata, Datum constval, Oid consttype)
 {
     Form_pg_statistic stats;
@@ -654,16 +663,17 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
      * to the result selectivity.  Also add up the total fraction represented
      * by MCV entries.
      */
-    mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
-                                &sumcommon);
+    mcv_selec = mcv_selectivity_ext(vardata, &opproc, collation, constval, true,
+                                    &sumcommon);

     /*
      * If there is a histogram, determine which bin the constant falls in, and
      * compute the resulting contribution to selectivity.
      */
-    hist_selec = ineq_histogram_selectivity(root, vardata,
-                                            &opproc, isgt, iseq,
-                                            constval, consttype);
+    hist_selec = ineq_histogram_selectivity_ext(root, vardata,
+                                                &opproc, isgt, iseq,
+                                                collation,
+                                                constval, consttype);

     /*
      * Now merge the results from the MCV and histogram calculations,
@@ -707,6 +717,15 @@ double
 mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                 Datum constval, bool varonleft,
                 double *sumcommonp)
+{
+    return mcv_selectivity_ext(vardata, opproc, DEFAULT_COLLATION_OID,
+                               constval, varonleft, sumcommonp);
+}
+
+double
+mcv_selectivity_ext(VariableStatData *vardata, FmgrInfo *opproc, Oid collation,
+                    Datum constval, bool varonleft,
+                    double *sumcommonp)
 {
     double        mcv_selec,
                 sumcommon;
@@ -726,11 +745,11 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
         {
             if (varonleft ?
                 DatumGetBool(FunctionCall2Coll(opproc,
-                                               sslot.stacoll,
+                                               collation,
                                                sslot.values[i],
                                                constval)) :
                 DatumGetBool(FunctionCall2Coll(opproc,
-                                               sslot.stacoll,
+                                               collation,
                                                constval,
                                                sslot.values[i])))
                 mcv_selec += sslot.numbers[i];
@@ -780,6 +799,20 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                       Datum constval, bool varonleft,
                       int min_hist_size, int n_skip,
                       int *hist_size)
+{
+    return histogram_selectivity_ext(vardata,
+                                     opproc, DEFAULT_COLLATION_OID,
+                                     constval, varonleft,
+                                     min_hist_size, n_skip,
+                                     hist_size);
+}
+
+double
+histogram_selectivity_ext(VariableStatData *vardata,
+                          FmgrInfo *opproc, Oid collation,
+                          Datum constval, bool varonleft,
+                          int min_hist_size, int n_skip,
+                          int *hist_size)
 {
     double        result;
     AttStatsSlot sslot;
@@ -804,11 +837,11 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
             {
                 if (varonleft ?
                     DatumGetBool(FunctionCall2Coll(opproc,
-                                                   sslot.stacoll,
+                                                   collation,
                                                    sslot.values[i],
                                                    constval)) :
                     DatumGetBool(FunctionCall2Coll(opproc,
-                                                   sslot.stacoll,
+                                                   collation,
                                                    constval,
                                                    sslot.values[i])))
                     nmatch++;
@@ -848,6 +881,19 @@ ineq_histogram_selectivity(PlannerInfo *root,
                            VariableStatData *vardata,
                            FmgrInfo *opproc, bool isgt, bool iseq,
                            Datum constval, Oid consttype)
+{
+    return ineq_histogram_selectivity_ext(root, vardata,
+                                          opproc, isgt, iseq,
+                                          DEFAULT_COLLATION_OID,
+                                          constval, consttype);
+}
+
+double
+ineq_histogram_selectivity_ext(PlannerInfo *root,
+                               VariableStatData *vardata,
+                               FmgrInfo *opproc, bool isgt, bool iseq,
+                               Oid collation,
+                               Datum constval, Oid consttype)
 {
     double        hist_selec;
     AttStatsSlot sslot;
@@ -860,9 +906,11 @@ ineq_histogram_selectivity(PlannerInfo *root,
      * column type.  However, to make that work we will need to figure out
      * which staop to search for --- it's not necessarily the one we have at
      * hand!  (For example, we might have a '<=' operator rather than the '<'
-     * operator that will appear in staop.)  For now, assume that whatever
-     * appears in pg_statistic is sorted the same way our operator sorts, or
-     * the reverse way if isgt is true.
+     * operator that will appear in staop.)  The collation might not agree
+     * either.  For now, just assume that whatever appears in pg_statistic is
+     * sorted the same way our operator sorts, or the reverse way if isgt is
+     * true.  This could result in a bogus estimate, but it still seems better
+     * than falling back to the default estimate.
      */
     if (HeapTupleIsValid(vardata->statsTuple) &&
         statistic_proc_security_check(vardata, opproc->fn_oid) &&
@@ -908,6 +956,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                 have_end = get_actual_variable_range(root,
                                                      vardata,
                                                      sslot.staop,
+                                                     collation,
                                                      &sslot.values[0],
                                                      &sslot.values[1]);

@@ -925,17 +974,19 @@ ineq_histogram_selectivity(PlannerInfo *root,
                     have_end = get_actual_variable_range(root,
                                                          vardata,
                                                          sslot.staop,
+                                                         collation,
                                                          &sslot.values[0],
                                                          NULL);
                 else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
                     have_end = get_actual_variable_range(root,
                                                          vardata,
                                                          sslot.staop,
+                                                         collation,
                                                          NULL,
                                                          &sslot.values[probe]);

                 ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
-                                                       sslot.stacoll,
+                                                       collation,
                                                        sslot.values[probe],
                                                        constval));
                 if (isgt)
@@ -1020,7 +1071,7 @@ ineq_histogram_selectivity(PlannerInfo *root,
                  * values to a uniform comparison scale, and do a linear
                  * interpolation within this bin.
                  */
-                if (convert_to_scalar(constval, consttype, sslot.stacoll,
+                if (convert_to_scalar(constval, consttype, collation,
                                       &val,
                                       sslot.values[i - 1], sslot.values[i],
                                       vardata->vartype,
@@ -1160,6 +1211,7 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
     Oid            operator = PG_GETARG_OID(1);
     List       *args = (List *) PG_GETARG_POINTER(2);
     int            varRelid = PG_GETARG_INT32(3);
+    Oid            collation = PG_GET_COLLATION();
     VariableStatData vardata;
     Node       *other;
     bool        varonleft;
@@ -1212,7 +1264,7 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
     }

     /* The rest of the work is done by scalarineqsel(). */
-    selec = scalarineqsel(root, operator, isgt, iseq,
+    selec = scalarineqsel(root, operator, isgt, iseq, collation,
                           &vardata, constval, consttype);

     ReleaseVariableStats(vardata);
@@ -1277,8 +1329,8 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
          * A boolean variable V is equivalent to the clause V = 't', so we
          * compute the selectivity as if that is what we have.
          */
-        selec = var_eq_const(&vardata, BooleanEqualOperator,
-                             BoolGetDatum(true), false, true, false);
+        selec = var_eq_const_ext(&vardata, BooleanEqualOperator, InvalidOid,
+                                 BoolGetDatum(true), false, true, false);
     }
     else
     {
@@ -2003,6 +2055,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
     JoinType    jointype = (JoinType) PG_GETARG_INT16(3);
 #endif
     SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+    Oid            collation = PG_GET_COLLATION();
     double        selec;
     double        selec_inner;
     VariableStatData vardata1;
@@ -2053,7 +2106,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
     }

     /* We need to compute the inner-join selectivity in all cases */
-    selec_inner = eqjoinsel_inner(opfuncoid,
+    selec_inner = eqjoinsel_inner(opfuncoid, collation,
                                   &vardata1, &vardata2,
                                   nd1, nd2,
                                   isdefault1, isdefault2,
@@ -2080,7 +2133,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
             inner_rel = find_join_input_rel(root, sjinfo->min_righthand);

             if (!join_is_reversed)
-                selec = eqjoinsel_semi(opfuncoid,
+                selec = eqjoinsel_semi(opfuncoid, collation,
                                        &vardata1, &vardata2,
                                        nd1, nd2,
                                        isdefault1, isdefault2,
@@ -2093,7 +2146,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
                 Oid            commop = get_commutator(operator);
                 Oid            commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;

-                selec = eqjoinsel_semi(commopfuncoid,
+                selec = eqjoinsel_semi(commopfuncoid, collation,
                                        &vardata2, &vardata1,
                                        nd2, nd1,
                                        isdefault2, isdefault1,
@@ -2141,7 +2194,7 @@ eqjoinsel(PG_FUNCTION_ARGS)
  * that it's worth trying to distinguish them here.
  */
 static double
-eqjoinsel_inner(Oid opfuncoid,
+eqjoinsel_inner(Oid opfuncoid, Oid collation,
                 VariableStatData *vardata1, VariableStatData *vardata2,
                 double nd1, double nd2,
                 bool isdefault1, bool isdefault2,
@@ -2203,7 +2256,7 @@ eqjoinsel_inner(Oid opfuncoid,
                 if (hasmatch2[j])
                     continue;
                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
-                                                   sslot1->stacoll,
+                                                   collation,
                                                    sslot1->values[i],
                                                    sslot2->values[j])))
                 {
@@ -2321,7 +2374,7 @@ eqjoinsel_inner(Oid opfuncoid,
  * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
  */
 static double
-eqjoinsel_semi(Oid opfuncoid,
+eqjoinsel_semi(Oid opfuncoid, Oid collation,
                VariableStatData *vardata1, VariableStatData *vardata2,
                double nd1, double nd2,
                bool isdefault1, bool isdefault2,
@@ -2415,7 +2468,7 @@ eqjoinsel_semi(Oid opfuncoid,
                 if (hasmatch2[j])
                     continue;
                 if (DatumGetBool(FunctionCall2Coll(&eqproc,
-                                                   sslot1->stacoll,
+                                                   collation,
                                                    sslot1->values[i],
                                                    sslot2->values[j])))
                 {
@@ -2635,6 +2688,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
     Oid            op_lefttype;
     Oid            op_righttype;
     Oid            opno,
+                collation,
                 lsortop,
                 rsortop,
                 lstatop,
@@ -2659,6 +2713,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
     if (!is_opclause(clause))
         return;                    /* shouldn't happen */
     opno = ((OpExpr *) clause)->opno;
+    collation = ((OpExpr *) clause)->inputcollid;
     left = get_leftop((Expr *) clause);
     right = get_rightop((Expr *) clause);
     if (!right)
@@ -2792,20 +2847,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
     /* Try to get ranges of both inputs */
     if (!isgt)
     {
-        if (!get_variable_range(root, &leftvar, lstatop,
+        if (!get_variable_range(root, &leftvar, lstatop, collation,
                                 &leftmin, &leftmax))
             goto fail;            /* no range available from stats */
-        if (!get_variable_range(root, &rightvar, rstatop,
+        if (!get_variable_range(root, &rightvar, rstatop, collation,
                                 &rightmin, &rightmax))
             goto fail;            /* no range available from stats */
     }
     else
     {
         /* need to swap the max and min */
-        if (!get_variable_range(root, &leftvar, lstatop,
+        if (!get_variable_range(root, &leftvar, lstatop, collation,
                                 &leftmax, &leftmin))
             goto fail;            /* no range available from stats */
-        if (!get_variable_range(root, &rightvar, rstatop,
+        if (!get_variable_range(root, &rightvar, rstatop, collation,
                                 &rightmax, &rightmin))
             goto fail;            /* no range available from stats */
     }
@@ -2815,13 +2870,13 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
      * fraction that's <= the right-side maximum value.  But only believe
      * non-default estimates, else stick with our 1.0.
      */
-    selec = scalarineqsel(root, leop, isgt, true, &leftvar,
+    selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
                           rightmax, op_righttype);
     if (selec != DEFAULT_INEQ_SEL)
         *leftend = selec;

     /* And similarly for the right variable. */
-    selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
+    selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
                           leftmax, op_lefttype);
     if (selec != DEFAULT_INEQ_SEL)
         *rightend = selec;
@@ -2845,13 +2900,13 @@ mergejoinscansel(PlannerInfo *root, Node *clause,
      * minimum value.  But only believe non-default estimates, else stick with
      * our own default.
      */
-    selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
+    selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
                           rightmin, op_righttype);
     if (selec != DEFAULT_INEQ_SEL)
         *leftstart = selec;

     /* And similarly for the right variable. */
-    selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
+    selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
                           leftmin, op_lefttype);
     if (selec != DEFAULT_INEQ_SEL)
         *rightstart = selec;
@@ -5124,9 +5179,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
  *
  * sortop is the "<" comparison operator to use.  This should generally
  * be "<" not ">", as only the former is likely to be found in pg_statistic.
+ * The collation must be specified too.
  */
 static bool
-get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
+get_variable_range(PlannerInfo *root, VariableStatData *vardata,
+                   Oid sortop, Oid collation,
                    Datum *min, Datum *max)
 {
     Datum        tmin = 0;
@@ -5146,7 +5203,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
      * before enabling this.
      */
 #ifdef NOT_USED
-    if (get_actual_variable_range(root, vardata, sortop, min, max))
+    if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
         return true;
 #endif

@@ -5174,7 +5231,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
      *
      * If there is a histogram that is sorted with some other operator than
      * the one we want, fail --- this suggests that there is data we can't
-     * use.
+     * use.  XXX consider collation too.
      */
     if (get_attstatsslot(&sslot, vardata->statsTuple,
                          STATISTIC_KIND_HISTOGRAM, sortop,
@@ -5221,14 +5278,14 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
                 continue;
             }
             if (DatumGetBool(FunctionCall2Coll(&opproc,
-                                               sslot.stacoll,
+                                               collation,
                                                sslot.values[i], tmin)))
             {
                 tmin = sslot.values[i];
                 tmin_is_mcv = true;
             }
             if (DatumGetBool(FunctionCall2Coll(&opproc,
-                                               sslot.stacoll,
+                                               collation,
                                                tmax, sslot.values[i])))
             {
                 tmax = sslot.values[i];
@@ -5258,10 +5315,11 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
  *        If no data available, return false.
  *
  * sortop is the "<" comparison operator to use.
+ * collation is the required collation.
  */
 static bool
 get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
-                          Oid sortop,
+                          Oid sortop, Oid collation,
                           Datum *min, Datum *max)
 {
     bool        have_data = false;
@@ -5301,9 +5359,11 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
             continue;

         /*
-         * The first index column must match the desired variable and sort
-         * operator --- but we can use a descending-order index.
+         * The first index column must match the desired variable, sortop, and
+         * collation --- but we can use a descending-order index.
          */
+        if (collation != index->indexcollations[0])
+            continue;            /* test first 'cause it's cheapest */
         if (!match_index_to_operand(vardata->var, 0, index))
             continue;
         switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 85d9ecbfc6..521cd84130 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -143,17 +143,36 @@ extern double get_variable_numdistinct(VariableStatData *vardata,
 extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                               Datum constval, bool varonleft,
                               double *sumcommonp);
+extern double mcv_selectivity_ext(VariableStatData *vardata,
+                                  FmgrInfo *opproc, Oid collation,
+                                  Datum constval, bool varonleft,
+                                  double *sumcommonp);
 extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
                                     Datum constval, bool varonleft,
                                     int min_hist_size, int n_skip,
                                     int *hist_size);
+extern double histogram_selectivity_ext(VariableStatData *vardata,
+                                        FmgrInfo *opproc, Oid collation,
+                                        Datum constval, bool varonleft,
+                                        int min_hist_size, int n_skip,
+                                        int *hist_size);
 extern double ineq_histogram_selectivity(PlannerInfo *root,
                                          VariableStatData *vardata,
                                          FmgrInfo *opproc, bool isgt, bool iseq,
                                          Datum constval, Oid consttype);
+extern double ineq_histogram_selectivity_ext(PlannerInfo *root,
+                                             VariableStatData *vardata,
+                                             FmgrInfo *opproc,
+                                             bool isgt, bool iseq,
+                                             Oid collation,
+                                             Datum constval, Oid consttype);
 extern double var_eq_const(VariableStatData *vardata, Oid oproid,
                            Datum constval, bool constisnull,
                            bool varonleft, bool negate);
+extern double var_eq_const_ext(VariableStatData *vardata,
+                               Oid oproid, Oid collation,
+                               Datum constval, bool constisnull,
+                               bool varonleft, bool negate);
 extern double var_eq_non_const(VariableStatData *vardata, Oid oproid,
                                Node *other,
                                bool varonleft, bool negate);

pgsql-bugs by date:

Previous
From: Peter Geoghegan
Date:
Subject: Re: Potential G2-item cycles under serializable isolation
Next
From: Peter Geoghegan
Date:
Subject: Re: Potential G2-item cycles under serializable isolation