Re: New design for FK-based join selectivity estimation - Mailing list pgsql-hackers

From Tom Lane
Subject Re: New design for FK-based join selectivity estimation
Date
Msg-id 15245.1466031608@sss.pgh.pa.us
Whole thread Raw
In response to Re: New design for FK-based join selectivity estimation  (Tomas Vondra <tomas.vondra@2ndquadrant.com>)
Responses Re: New design for FK-based join selectivity estimation  (Tomas Vondra <tomas.vondra@2ndquadrant.com>)
Re: New design for FK-based join selectivity estimation  (Andrew Gierth <andrew@tao11.riddles.org.uk>)
List pgsql-hackers
Tomas Vondra <tomas.vondra@2ndquadrant.com> writes:
> Attached is a reworked patch, mostly following the new design proposal
> from this thread.

I've whacked this around quite a bit and am now reasonably happy with it.
The issue of planning speed hit should be pretty much gone, although I've
not done testing to prove that.  I've rearranged the actual selectivity
calculation some too, because tests I did here did not look very good for
anything but the plain-inner-join case.  It may be that more work is
needed there, but at least there's reasonable commentary about what we're
doing and why.

I have not adopted the plan of ignoring single-column FKs.  While it would
only take a couple more lines to do so, I think the argument that there
will be a material planning speed hit no longer has much force.  And I see
a couple of arguments in favor of allowing this code to trigger on
single-column FKs.  First, it should work well even when pg_statistic data
is missing or out of date, which would be an improvement; and second, we
are likely not going to get much beta testing of the behavior if we
restrict it to multi-col FKs.  So I think we should ship it like this for
beta even if we end up adding a filter against single-column FKs for
release.

Comments and testing appreciated.

            regards, tom lane

diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 08ed990..8e5b33c 100644
*** a/src/backend/nodes/copyfuncs.c
--- b/src/backend/nodes/copyfuncs.c
***************
*** 27,32 ****
--- 27,33 ----
  #include "nodes/plannodes.h"
  #include "nodes/relation.h"
  #include "utils/datum.h"
+ #include "utils/rel.h"


  /*
*************** _copyValue(const Value *from)
*** 4252,4257 ****
--- 4253,4276 ----
      return newnode;
  }

+
+ static ForeignKeyCacheInfo *
+ _copyForeignKeyCacheInfo(const ForeignKeyCacheInfo *from)
+ {
+     ForeignKeyCacheInfo *newnode = makeNode(ForeignKeyCacheInfo);
+
+     COPY_SCALAR_FIELD(conrelid);
+     COPY_SCALAR_FIELD(confrelid);
+     COPY_SCALAR_FIELD(nkeys);
+     /* COPY_SCALAR_FIELD might work for these, but let's not assume that */
+     memcpy(newnode->conkey, from->conkey, sizeof(newnode->conkey));
+     memcpy(newnode->confkey, from->confkey, sizeof(newnode->confkey));
+     memcpy(newnode->conpfeqop, from->conpfeqop, sizeof(newnode->conpfeqop));
+
+     return newnode;
+ }
+
+
  /*
   * copyObject
   *
*************** copyObject(const void *from)
*** 5050,5055 ****
--- 5069,5081 ----
              retval = _copyRoleSpec(from);
              break;

+             /*
+              * MISCELLANEOUS NODES
+              */
+         case T_ForeignKeyCacheInfo:
+             retval = _copyForeignKeyCacheInfo(from);
+             break;
+
          default:
              elog(ERROR, "unrecognized node type: %d", (int) nodeTag(from));
              retval = 0;            /* keep compiler quiet */
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index c7b4153..eaaa370 100644
*** a/src/backend/nodes/outfuncs.c
--- b/src/backend/nodes/outfuncs.c
***************
*** 30,35 ****
--- 30,36 ----
  #include "nodes/plannodes.h"
  #include "nodes/relation.h"
  #include "utils/datum.h"
+ #include "utils/rel.h"


  /*
*************** _outPlannerInfo(StringInfo str, const Pl
*** 2048,2053 ****
--- 2049,2055 ----
      WRITE_NODE_FIELD(append_rel_list);
      WRITE_NODE_FIELD(rowMarks);
      WRITE_NODE_FIELD(placeholder_list);
+     WRITE_NODE_FIELD(fkey_list);
      WRITE_NODE_FIELD(query_pathkeys);
      WRITE_NODE_FIELD(group_pathkeys);
      WRITE_NODE_FIELD(window_pathkeys);
*************** _outIndexOptInfo(StringInfo str, const I
*** 2138,2143 ****
--- 2140,2174 ----
  }

  static void
+ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
+ {
+     int            i;
+
+     WRITE_NODE_TYPE("FOREIGNKEYOPTINFO");
+
+     WRITE_UINT_FIELD(con_relid);
+     WRITE_UINT_FIELD(ref_relid);
+     WRITE_INT_FIELD(nkeys);
+     appendStringInfoString(str, " :conkey");
+     for (i = 0; i < node->nkeys; i++)
+         appendStringInfo(str, " %d", node->conkey[i]);
+     appendStringInfoString(str, " :confkey");
+     for (i = 0; i < node->nkeys; i++)
+         appendStringInfo(str, " %d", node->confkey[i]);
+     appendStringInfoString(str, " :conpfeqop");
+     for (i = 0; i < node->nkeys; i++)
+         appendStringInfo(str, " %u", node->conpfeqop[i]);
+     WRITE_INT_FIELD(nmatched);
+     /* for compactness, just print the number of matches per column: */
+     appendStringInfoString(str, " :eclass");
+     for (i = 0; i < node->nkeys; i++)
+         appendStringInfo(str, " %d", (node->eclass[i] != NULL));
+     appendStringInfoString(str, " :rinfos");
+     for (i = 0; i < node->nkeys; i++)
+         appendStringInfo(str, " %d", list_length(node->rinfos[i]));
+ }
+
+ static void
  _outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
  {
      /*
*************** _outConstraint(StringInfo str, const Con
*** 3207,3212 ****
--- 3238,3264 ----
      }
  }

+ static void
+ _outForeignKeyCacheInfo(StringInfo str, const ForeignKeyCacheInfo *node)
+ {
+     int            i;
+
+     WRITE_NODE_TYPE("FOREIGNKEYCACHEINFO");
+
+     WRITE_OID_FIELD(conrelid);
+     WRITE_OID_FIELD(confrelid);
+     WRITE_INT_FIELD(nkeys);
+     appendStringInfoString(str, " :conkey");
+     for (i = 0; i < node->nkeys; i++)
+         appendStringInfo(str, " %d", node->conkey[i]);
+     appendStringInfoString(str, " :confkey");
+     for (i = 0; i < node->nkeys; i++)
+         appendStringInfo(str, " %d", node->confkey[i]);
+     appendStringInfoString(str, " :conpfeqop");
+     for (i = 0; i < node->nkeys; i++)
+         appendStringInfo(str, " %u", node->conpfeqop[i]);
+ }
+

  /*
   * outNode -
*************** outNode(StringInfo str, const void *obj)
*** 3607,3612 ****
--- 3659,3667 ----
              case T_IndexOptInfo:
                  _outIndexOptInfo(str, obj);
                  break;
+             case T_ForeignKeyOptInfo:
+                 _outForeignKeyOptInfo(str, obj);
+                 break;
              case T_EquivalenceClass:
                  _outEquivalenceClass(str, obj);
                  break;
*************** outNode(StringInfo str, const void *obj)
*** 3783,3788 ****
--- 3838,3846 ----
              case T_XmlSerialize:
                  _outXmlSerialize(str, obj);
                  break;
+             case T_ForeignKeyCacheInfo:
+                 _outForeignKeyCacheInfo(str, obj);
+                 break;

              default:

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index e7f63f4..7ea6d57 100644
*** a/src/backend/optimizer/path/costsize.c
--- b/src/backend/optimizer/path/costsize.c
*************** static bool has_indexed_join_quals(NestP
*** 147,156 ****
--- 147,163 ----
  static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
                     List *quals);
  static double calc_joinrel_size_estimate(PlannerInfo *root,
+                            RelOptInfo *outer_rel,
+                            RelOptInfo *inner_rel,
                             double outer_rows,
                             double inner_rows,
                             SpecialJoinInfo *sjinfo,
                             List *restrictlist);
+ static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
+                                  Relids outer_relids,
+                                  Relids inner_relids,
+                                  SpecialJoinInfo *sjinfo,
+                                  List **restrictlist);
  static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
  static double relation_byte_size(double tuples, int width);
  static double page_size(double tuples, int width);
*************** set_joinrel_size_estimates(PlannerInfo *
*** 3837,3842 ****
--- 3844,3851 ----
                             List *restrictlist)
  {
      rel->rows = calc_joinrel_size_estimate(root,
+                                            outer_rel,
+                                            inner_rel,
                                             outer_rel->rows,
                                             inner_rel->rows,
                                             sjinfo,
*************** set_joinrel_size_estimates(PlannerInfo *
*** 3848,3855 ****
   *        Make a size estimate for a parameterized scan of a join relation.
   *
   * 'rel' is the joinrel under consideration.
!  * 'outer_rows', 'inner_rows' are the sizes of the (probably also
!  *        parameterized) join inputs under consideration.
   * 'sjinfo' is any SpecialJoinInfo relevant to this join.
   * 'restrict_clauses' lists the join clauses that need to be applied at the
   * join node (including any movable clauses that were moved down to this join,
--- 3857,3864 ----
   *        Make a size estimate for a parameterized scan of a join relation.
   *
   * 'rel' is the joinrel under consideration.
!  * 'outer_path', 'inner_path' are (probably also parameterized) Paths that
!  *        produce the relations being joined.
   * 'sjinfo' is any SpecialJoinInfo relevant to this join.
   * 'restrict_clauses' lists the join clauses that need to be applied at the
   * join node (including any movable clauses that were moved down to this join,
*************** set_joinrel_size_estimates(PlannerInfo *
*** 3860,3867 ****
   */
  double
  get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
!                                double outer_rows,
!                                double inner_rows,
                                 SpecialJoinInfo *sjinfo,
                                 List *restrict_clauses)
  {
--- 3869,3876 ----
   */
  double
  get_parameterized_joinrel_size(PlannerInfo *root, RelOptInfo *rel,
!                                Path *outer_path,
!                                Path *inner_path,
                                 SpecialJoinInfo *sjinfo,
                                 List *restrict_clauses)
  {
*************** get_parameterized_joinrel_size(PlannerIn
*** 3877,3884 ****
       * estimate for any pair with the same parameterization.
       */
      nrows = calc_joinrel_size_estimate(root,
!                                        outer_rows,
!                                        inner_rows,
                                         sjinfo,
                                         restrict_clauses);
      /* For safety, make sure result is not more than the base estimate */
--- 3886,3895 ----
       * estimate for any pair with the same parameterization.
       */
      nrows = calc_joinrel_size_estimate(root,
!                                        outer_path->parent,
!                                        inner_path->parent,
!                                        outer_path->rows,
!                                        inner_path->rows,
                                         sjinfo,
                                         restrict_clauses);
      /* For safety, make sure result is not more than the base estimate */
*************** get_parameterized_joinrel_size(PlannerIn
*** 3891,3905 ****
--- 3902,3923 ----
   * calc_joinrel_size_estimate
   *        Workhorse for set_joinrel_size_estimates and
   *        get_parameterized_joinrel_size.
+  *
+  * outer_rel/inner_rel are the relations being joined, but they should be
+  * assumed to have sizes outer_rows/inner_rows; those numbers might be less
+  * than what rel->rows says, when we are considering parameterized paths.
   */
  static double
  calc_joinrel_size_estimate(PlannerInfo *root,
+                            RelOptInfo *outer_rel,
+                            RelOptInfo *inner_rel,
                             double outer_rows,
                             double inner_rows,
                             SpecialJoinInfo *sjinfo,
                             List *restrictlist)
  {
      JoinType    jointype = sjinfo->jointype;
+     Selectivity fkselec;
      Selectivity jselec;
      Selectivity pselec;
      double        nrows;
*************** calc_joinrel_size_estimate(PlannerInfo *
*** 3910,3915 ****
--- 3928,3949 ----
       * double-counting them because they were not considered in estimating the
       * sizes of the component rels.
       *
+      * First, see whether any of the joinclauses can be matched to known FK
+      * constraints.  If so, drop those clauses from the restrictlist, and
+      * instead estimate their selectivity using FK semantics.  (We do this
+      * without regard to whether said clauses are local or "pushed down".
+      * Probably, an FK-matching clause could never be seen as pushed down at
+      * an outer join, since it would be strict and hence would be grounds for
+      * join strength reduction.)  fkselec gets the net selectivity for
+      * FK-matching clauses, or 1.0 if there are none.
+      */
+     fkselec = get_foreign_key_join_selectivity(root,
+                                                outer_rel->relids,
+                                                inner_rel->relids,
+                                                sjinfo,
+                                                &restrictlist);
+
+     /*
       * For an outer join, we have to distinguish the selectivity of the join's
       * own clauses (JOIN/ON conditions) from any clauses that were "pushed
       * down".  For inner joins we just count them all as joinclauses.
*************** calc_joinrel_size_estimate(PlannerInfo *
*** 3973,3988 ****
      switch (jointype)
      {
          case JOIN_INNER:
!             nrows = outer_rows * inner_rows * jselec;
              break;
          case JOIN_LEFT:
!             nrows = outer_rows * inner_rows * jselec;
              if (nrows < outer_rows)
                  nrows = outer_rows;
              nrows *= pselec;
              break;
          case JOIN_FULL:
!             nrows = outer_rows * inner_rows * jselec;
              if (nrows < outer_rows)
                  nrows = outer_rows;
              if (nrows < inner_rows)
--- 4007,4023 ----
      switch (jointype)
      {
          case JOIN_INNER:
!             nrows = outer_rows * inner_rows * fkselec * jselec;
!             /* pselec not used */
              break;
          case JOIN_LEFT:
!             nrows = outer_rows * inner_rows * fkselec * jselec;
              if (nrows < outer_rows)
                  nrows = outer_rows;
              nrows *= pselec;
              break;
          case JOIN_FULL:
!             nrows = outer_rows * inner_rows * fkselec * jselec;
              if (nrows < outer_rows)
                  nrows = outer_rows;
              if (nrows < inner_rows)
*************** calc_joinrel_size_estimate(PlannerInfo *
*** 3990,4000 ****
              nrows *= pselec;
              break;
          case JOIN_SEMI:
!             nrows = outer_rows * jselec;
              /* pselec not used */
              break;
          case JOIN_ANTI:
!             nrows = outer_rows * (1.0 - jselec);
              nrows *= pselec;
              break;
          default:
--- 4025,4035 ----
              nrows *= pselec;
              break;
          case JOIN_SEMI:
!             nrows = outer_rows * fkselec * jselec;
              /* pselec not used */
              break;
          case JOIN_ANTI:
!             nrows = outer_rows * (1.0 - fkselec * jselec);
              nrows *= pselec;
              break;
          default:
*************** calc_joinrel_size_estimate(PlannerInfo *
*** 4008,4013 ****
--- 4043,4261 ----
  }

  /*
+  * get_foreign_key_join_selectivity
+  *        Estimate join selectivity for foreign-key-related clauses.
+  *
+  * Remove any clauses that can be matched to FK constraints from *restrictlist,
+  * and return a substitute estimate of their selectivity.  1.0 is returned
+  * when there are no such clauses.
+  *
+  * The reason for treating such clauses specially is that we can get better
+  * estimates this way than by relying on clauselist_selectivity(), especially
+  * for multi-column FKs where that function's assumption that the clauses are
+  * independent falls down badly.  But even with single-column FKs, we may be
+  * able to get a better answer when the pg_statistic stats are missing or out
+  * of date.
+  */
+ static Selectivity
+ get_foreign_key_join_selectivity(PlannerInfo *root,
+                                  Relids outer_relids,
+                                  Relids inner_relids,
+                                  SpecialJoinInfo *sjinfo,
+                                  List **restrictlist)
+ {
+     Selectivity fkselec = 1.0;
+     JoinType    jointype = sjinfo->jointype;
+     List       *worklist = *restrictlist;
+     ListCell   *lc;
+
+     /* Consider each FK constraint that is known to match the query */
+     foreach(lc, root->fkey_list)
+     {
+         ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
+         bool        ref_is_outer;
+         List       *removedlist;
+         ListCell   *cell;
+         ListCell   *prev;
+         ListCell   *next;
+
+         /*
+          * This FK is not relevant unless it connects a baserel on one side of
+          * this join to a baserel on the other side.
+          */
+         if (bms_is_member(fkinfo->con_relid, outer_relids) &&
+             bms_is_member(fkinfo->ref_relid, inner_relids))
+             ref_is_outer = false;
+         else if (bms_is_member(fkinfo->ref_relid, outer_relids) &&
+                  bms_is_member(fkinfo->con_relid, inner_relids))
+             ref_is_outer = true;
+         else
+             continue;
+
+         /*
+          * Modify the restrictlist by removing clauses that match the FK (and
+          * putting them into removedlist instead).  It seems unsafe to modify
+          * the originally-passed List structure, so we make a shallow copy the
+          * first time through.
+          */
+         if (worklist == *restrictlist)
+             worklist = list_copy(worklist);
+
+         removedlist = NIL;
+         prev = NULL;
+         for (cell = list_head(worklist); cell; cell = next)
+         {
+             RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+             bool        remove_it = false;
+             int            i;
+
+             next = lnext(cell);
+             /* Drop this clause if it matches any column of the FK */
+             for (i = 0; i < fkinfo->nkeys; i++)
+             {
+                 if (rinfo->parent_ec)
+                 {
+                     /*
+                      * EC-derived clauses can only match by EC.  Note that
+                      * checking parent_ec is a bit of a cheat because there
+                      * are EC-derived clauses that don't have parent_ec set;
+                      * but such clauses must compare expressions that aren't
+                      * just Vars, so they cannot match the FK anyway.
+                      */
+                     if (fkinfo->eclass[i] == rinfo->parent_ec)
+                     {
+                         remove_it = true;
+                         break;
+                     }
+                 }
+                 else
+                 {
+                     /* Otherwise, see if rinfo was previously matched to EC */
+                     if (list_member_ptr(fkinfo->rinfos[i], rinfo))
+                     {
+                         remove_it = true;
+                         break;
+                     }
+                 }
+             }
+             if (remove_it)
+             {
+                 worklist = list_delete_cell(worklist, cell, prev);
+                 removedlist = lappend(removedlist, rinfo);
+             }
+             else
+                 prev = cell;
+         }
+
+         /*
+          * If we failed to remove any matching clauses, chicken out and ignore
+          * this FK; applying its selectivity might result in double-counting.
+          * It's not obvious that this is correct, but remember that we already
+          * know the FK matches query quals someplace.  There are just two
+          * reasons we might fail to find matching clauses in this join:
+          *
+          * Some FK we matched earlier might have removed the relevant clause.
+          * This is particularly likely with EC matches, since equivclass.c
+          * provides only a single EC-derived clause per join.  But that means
+          * we have something like A.R1 and B.R2 on one side of the join that
+          * each reference C.PK on the other side, in which case applying the
+          * selectivities of both FK constraints would be double-counting.
+          *
+          * Otherwise, if we didn't find the matching clause in this join, it
+          * must be outerjoin-delayed and will be applied at some higher join
+          * level.  Applying the FK's selectivity anyway would result in
+          * underestimating both this join size and the higher join level's.
+          *
+          * For a multi-column FK, it's possible that we found matches to some
+          * columns but not all, implying that one of the above effects applied
+          * to just some of the columns.  For the moment, we go ahead and
+          * remove those clauses and apply the FK's selectivity anyway.  It
+          * might be better to put back the removed clauses and ignore the FK;
+          * but that amounts to betting on independence of the clauses, which
+          * doesn't seem like a good bet in such messy cases.
+          */
+         if (removedlist == NIL)
+             continue;
+
+         /*
+          * Finally we get to the payoff: estimate selectivity using the
+          * knowledge that each referencing row will match exactly one row in
+          * the referenced table.
+          *
+          * XXX that's not true in the presence of nulls in the referencing
+          * column(s), so in principle we should derate the estimate for those.
+          * However (1) if there are any strict restriction clauses for the
+          * referencing column(s) elsewhere in the query, derating here would
+          * be double-counting the null fraction, and (2) it's not very clear
+          * how to combine null fractions for multiple referencing columns.
+          *
+          * In the first branch of the logic below, null derating is done
+          * implicitly by relying on clause_selectivity(); in the other two
+          * paths, we do nothing for now about correcting for nulls.
+          *
+          * XXX another point here is that if either side of an FK constraint
+          * is an inheritance parent, we estimate as though the constraint
+          * covers all its children as well.  This is not an unreasonable
+          * assumption for a referencing table, ie the user probably applied
+          * identical constraints to all child tables (though perhaps we ought
+          * to check that).  But it's not possible to have done that for a
+          * referenced table.  Fortunately, precisely because that doesn't
+          * work, it is uncommon in practice to have an FK referencing a parent
+          * table.  So, at least for now, disregard inheritance here.
+          */
+         if (ref_is_outer && jointype != JOIN_INNER)
+         {
+             /*
+              * When the referenced table is on the outer side of a non-inner
+              * join, knowing that each inner row has exactly one match is not
+              * as useful as one could wish, since we really need to know the
+              * fraction of outer rows with a match.  Still, we can avoid the
+              * folly of multiplying the per-column estimates together.  Take
+              * the smallest per-column selectivity, instead.  (This should
+              * correspond to the FK column with the most nulls.)
+              */
+             Selectivity thisfksel = 1.0;
+
+             foreach(cell, removedlist)
+             {
+                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(cell);
+                 Selectivity csel;
+
+                 csel = clause_selectivity(root, (Node *) rinfo,
+                                           0, jointype, sjinfo);
+                 thisfksel = Min(thisfksel, csel);
+             }
+             fkselec *= thisfksel;
+         }
+         else if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+         {
+             /*
+              * For JOIN_SEMI and JOIN_ANTI, the selectivity is defined as the
+              * fraction of LHS rows that have matches.  If the referenced
+              * table is on the inner side, that means the selectivity is 1.0
+              * (modulo nulls, which we're ignoring for now).  We already
+              * covered the other case, so no work here.
+              */
+         }
+         else
+         {
+             /*
+              * Otherwise, selectivity is exactly 1/referenced-table-size; but
+              * guard against tuples == 0.  Note we should use the raw table
+              * tuple count, not any estimate of its filtered or joined size.
+              */
+             RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
+             double        ref_tuples = Max(ref_rel->tuples, 1.0);
+
+             fkselec *= 1.0 / ref_tuples;
+         }
+     }
+
+     *restrictlist = worklist;
+     return fkselec;
+ }
+
+ /*
   * set_subquery_size_estimates
   *        Set the size estimates for a base relation that is a subquery.
   *
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index bfa0c65..0e50ad5 100644
*** a/src/backend/optimizer/path/equivclass.c
--- b/src/backend/optimizer/path/equivclass.c
*************** exprs_known_equal(PlannerInfo *root, Nod
*** 1926,1931 ****
--- 1926,2010 ----


  /*
+  * match_eclasses_to_foreign_key_col
+  *      See whether a foreign key column match is proven by any eclass.
+  *
+  * If the referenced and referencing Vars of the fkey's colno'th column are
+  * known equal due to any eclass, return that eclass; otherwise return NULL.
+  * (In principle there might be more than one matching eclass if multiple
+  * collations are involved, but since collation doesn't matter for equality,
+  * we ignore that fine point here.)  This is much like exprs_known_equal,
+  * except that we insist on the comparison operator matching the eclass, so
+  * that the result is definite not approximate.
+  */
+ EquivalenceClass *
+ match_eclasses_to_foreign_key_col(PlannerInfo *root,
+                                   ForeignKeyOptInfo *fkinfo,
+                                   int colno)
+ {
+     Index        var1varno = fkinfo->con_relid;
+     AttrNumber    var1attno = fkinfo->conkey[colno];
+     Index        var2varno = fkinfo->ref_relid;
+     AttrNumber    var2attno = fkinfo->confkey[colno];
+     Oid            eqop = fkinfo->conpfeqop[colno];
+     List       *opfamilies = NIL;        /* compute only if needed */
+     ListCell   *lc1;
+
+     foreach(lc1, root->eq_classes)
+     {
+         EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc1);
+         bool        item1member = false;
+         bool        item2member = false;
+         ListCell   *lc2;
+
+         /* Never match to a volatile EC */
+         if (ec->ec_has_volatile)
+             continue;
+         /* Note: it seems okay to match to "broken" eclasses here */
+
+         foreach(lc2, ec->ec_members)
+         {
+             EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+             Var           *var;
+
+             if (em->em_is_child)
+                 continue;        /* ignore children here */
+
+             /* EM must be a Var, possibly with RelabelType */
+             var = (Var *) em->em_expr;
+             while (var && IsA(var, RelabelType))
+                 var = (Var *) ((RelabelType *) var)->arg;
+             if (!(var && IsA(var, Var)))
+                 continue;
+
+             /* Match? */
+             if (var->varno == var1varno && var->varattno == var1attno)
+                 item1member = true;
+             else if (var->varno == var2varno && var->varattno == var2attno)
+                 item2member = true;
+
+             /* Have we found both PK and FK column in this EC? */
+             if (item1member && item2member)
+             {
+                 /*
+                  * Succeed if eqop matches EC's opfamilies.  We could test
+                  * this before scanning the members, but it's probably cheaper
+                  * to test for member matches first.
+                  */
+                 if (opfamilies == NIL)    /* compute if we didn't already */
+                     opfamilies = get_mergejoin_opfamilies(eqop);
+                 if (equal(opfamilies, ec->ec_opfamilies))
+                     return ec;
+                 /* Otherwise, done with this EC, move on to the next */
+                 break;
+             }
+         }
+     }
+     return NULL;
+ }
+
+
+ /*
   * add_child_rel_equivalences
   *      Search for EC members that reference the parent_rel, and
   *      add transformed members referencing the child_rel.
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 3d305eb..e28a8dc 100644
*** a/src/backend/optimizer/plan/analyzejoins.c
--- b/src/backend/optimizer/plan/analyzejoins.c
*************** remove_rel_from_query(PlannerInfo *root,
*** 433,438 ****
--- 433,443 ----
              distribute_restrictinfo_to_rels(root, rinfo);
          }
      }
+
+     /*
+      * There may be references to the rel in root->fkey_list, but if so,
+      * match_foreign_keys_to_quals() will get rid of them.
+      */
  }

  /*
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 1a1c26a..4d8cf9f 100644
*** a/src/backend/optimizer/plan/initsplan.c
--- b/src/backend/optimizer/plan/initsplan.c
*************** build_implied_join_equality(Oid opno,
*** 2306,2311 ****
--- 2306,2454 ----
  }


+ /*
+  * match_foreign_keys_to_quals
+  *        Match foreign-key constraints to equivalence classes and join quals
+  *
+  * The idea here is to see which query join conditions match equality
+  * constraints of a foreign-key relationship.  For such join conditions,
+  * we can use the FK semantics to make selectivity estimates that are more
+  * reliable than estimating from statistics, especially for multiple-column
+  * FKs, where the normal assumption of independent conditions tends to fail.
+  *
+  * In this function we annotate the ForeignKeyOptInfos in root->fkey_list
+  * with info about which eclasses and join qual clauses they match, and
+  * discard any ForeignKeyOptInfos that are irrelevant for the query.
+  */
+ void
+ match_foreign_keys_to_quals(PlannerInfo *root)
+ {
+     List       *newlist = NIL;
+     ListCell   *lc;
+
+     foreach(lc, root->fkey_list)
+     {
+         ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc);
+         RelOptInfo *con_rel = find_base_rel(root, fkinfo->con_relid);
+         RelOptInfo *ref_rel = find_base_rel(root, fkinfo->ref_relid);
+         int            colno;
+
+         /*
+          * Ignore FK unless both rels are baserels.  This gets rid of FKs that
+          * link to inheritance child rels (otherrels) and those that link to
+          * rels removed by join removal (dead rels).
+          */
+         if (con_rel->reloptkind != RELOPT_BASEREL ||
+             ref_rel->reloptkind != RELOPT_BASEREL)
+             continue;
+
+         /*
+          * Scan the columns and try to match them to eclasses and quals.
+          *
+          * Note: for simple inner joins, any match should be in an eclass.
+          * "Loose" quals that match an FK equality must have been rejected for
+          * EC status because they are outer-join quals or similar.  We still
+          * consider them to match the FK, though, since that produces better
+          * selectivity estimates than not matching.
+          */
+         for (colno = 0; colno < fkinfo->nkeys; colno++)
+         {
+             AttrNumber    con_attno,
+                         ref_attno;
+             Oid            fpeqop;
+             ListCell   *lc2;
+
+             fkinfo->eclass[colno] = match_eclasses_to_foreign_key_col(root,
+                                                                       fkinfo,
+                                                                       colno);
+             /* Don't bother looking for loose quals if we got an EC match */
+             if (fkinfo->eclass[colno] != NULL)
+             {
+                 fkinfo->nmatched++;
+                 continue;
+             }
+
+             /*
+              * Scan joininfo list for relevant clauses.  Either rel's joininfo
+              * list would do equally well; we use con_rel's.
+              */
+             con_attno = fkinfo->conkey[colno];
+             ref_attno = fkinfo->confkey[colno];
+             fpeqop = InvalidOid;    /* we'll look this up only if needed */
+
+             foreach(lc2, con_rel->joininfo)
+             {
+                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
+                 OpExpr       *clause = (OpExpr *) rinfo->clause;
+                 Var           *leftvar;
+                 Var           *rightvar;
+
+                 /* only binary OpExprs are useful for consideration */
+                 if (!IsA(clause, OpExpr) ||
+                     list_length(clause->args) != 2)
+                     continue;
+                 leftvar = (Var *) get_leftop((Expr *) clause);
+                 rightvar = (Var *) get_rightop((Expr *) clause);
+
+                 /* Operands must be Vars, possibly with RelabelType */
+                 while (leftvar && IsA(leftvar, RelabelType))
+                     leftvar = (Var *) ((RelabelType *) leftvar)->arg;
+                 if (!(leftvar && IsA(leftvar, Var)))
+                     continue;
+                 while (rightvar && IsA(rightvar, RelabelType))
+                     rightvar = (Var *) ((RelabelType *) rightvar)->arg;
+                 if (!(rightvar && IsA(rightvar, Var)))
+                     continue;
+
+                 /* Now try to match the vars to the current foreign key cols */
+                 if (fkinfo->ref_relid == leftvar->varno &&
+                     ref_attno == leftvar->varattno &&
+                     fkinfo->con_relid == rightvar->varno &&
+                     con_attno == rightvar->varattno)
+                 {
+                     /* Vars match, but is it the right operator? */
+                     if (clause->opno == fkinfo->conpfeqop[colno])
+                         fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno],
+                                                         rinfo);
+                 }
+                 else if (fkinfo->ref_relid == rightvar->varno &&
+                          ref_attno == rightvar->varattno &&
+                          fkinfo->con_relid == leftvar->varno &&
+                          con_attno == leftvar->varattno)
+                 {
+                     /*
+                      * Reverse match, must check commutator operator.  Look it
+                      * up if we didn't already.  (In the worst case we might
+                      * do multiple lookups here, but that would require an FK
+                      * equality operator without commutator, which is
+                      * unlikely.)
+                      */
+                     if (!OidIsValid(fpeqop))
+                         fpeqop = get_commutator(fkinfo->conpfeqop[colno]);
+                     if (clause->opno == fpeqop)
+                         fkinfo->rinfos[colno] = lappend(fkinfo->rinfos[colno],
+                                                         rinfo);
+                 }
+             }
+             /* If we found any matching loose quals, count col as matched */
+             if (fkinfo->rinfos[colno])
+                 fkinfo->nmatched++;
+         }
+
+         /*
+          * Currently, we drop multicolumn FKs that aren't fully matched to the
+          * query.  Later we might figure out how to derive some sort of
+          * weakened estimate from them, in which case this test should be
+          * weakened to "if (fkinfo->nmatched > 0)".
+          */
+         if (fkinfo->nmatched == fkinfo->nkeys)
+             newlist = lappend(newlist, fkinfo);
+     }
+     /* Replace fkey_list, thereby discarding any useless entries */
+     root->fkey_list = newlist;
+ }
+
+
  /*****************************************************************************
   *
   *     CHECKS FOR MERGEJOINABLE AND HASHJOINABLE CLAUSES
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index edd95d8..27234ff 100644
*** a/src/backend/optimizer/plan/planmain.c
--- b/src/backend/optimizer/plan/planmain.c
*************** query_planner(PlannerInfo *root, List *t
*** 115,120 ****
--- 115,121 ----
      root->full_join_clauses = NIL;
      root->join_info_list = NIL;
      root->placeholder_list = NIL;
+     root->fkey_list = NIL;
      root->initial_rels = NIL;

      /*
*************** query_planner(PlannerInfo *root, List *t
*** 206,211 ****
--- 207,220 ----
      create_lateral_join_info(root);

      /*
+      * Match foreign keys to equivalence classes and join quals.  This must be
+      * done after finalizing equivalence classes, and it's useful to wait till
+      * after join removal so that we can skip processing foreign keys
+      * involving removed relations.
+      */
+     match_foreign_keys_to_quals(root);
+
+     /*
       * Look for join OR clauses that we can extract single-relation
       * restriction OR clauses from.
       */
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 6aa8192..b0658f2 100644
*** a/src/backend/optimizer/util/plancat.c
--- b/src/backend/optimizer/util/plancat.c
*************** int            constraint_exclusion = CONSTRAINT_
*** 52,57 ****
--- 52,59 ----
  get_relation_info_hook_type get_relation_info_hook = NULL;


+ static void get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
+                           Relation relation);
  static bool infer_collation_opclass_match(InferenceElem *elem, Relation idxRel,
                                List *idxExprs);
  static int32 get_rel_data_width(Relation rel, int32 *attr_widths);
*************** static List *build_index_tlist(PlannerIn
*** 77,82 ****
--- 79,86 ----
   *    pages        number of pages
   *    tuples        number of tuples
   *
+  * Also, add information about the relation's foreign keys to root->fkey_list.
+  *
   * Also, initialize the attr_needed[] and attr_widths[] arrays.  In most
   * cases these are left as zeroes, but sometimes we need to compute attr
   * widths here, and we may as well cache the results for costsize.c.
*************** get_relation_info(PlannerInfo *root, Oid
*** 403,408 ****
--- 407,415 ----
          rel->fdwroutine = NULL;
      }

+     /* Collect info about relation's foreign keys, if relevant */
+     get_relation_foreign_keys(root, rel, relation);
+
      heap_close(relation, NoLock);

      /*
*************** get_relation_info(PlannerInfo *root, Oid
*** 415,420 ****
--- 422,516 ----
  }

  /*
+  * get_relation_foreign_keys -
+  *      Retrieves foreign key information for a given relation.
+  *
+  * ForeignKeyOptInfos for relevant foreign keys are created and added to
+  * root->fkey_list.  We do this now while we have the relcache entry open.
+  * We could sometimes avoid making useless ForeignKeyOptInfos if we waited
+  * until all RelOptInfos have been built, but the cost of re-opening the
+  * relcache entries would probably exceed any savings.
+  */
+ static void
+ get_relation_foreign_keys(PlannerInfo *root, RelOptInfo *rel,
+                           Relation relation)
+ {
+     List       *rtable = root->parse->rtable;
+     List       *cachedfkeys;
+     ListCell   *lc;
+
+     /*
+      * If it's not a baserel, we don't care about its FKs.  Also, if the query
+      * references only a single relation, we can skip the lookup since no FKs
+      * could satisfy the requirements below.
+      */
+     if (rel->reloptkind != RELOPT_BASEREL ||
+         list_length(rtable) < 2)
+         return;
+
+     /*
+      * Extract data about relation's FKs from the relcache.  Note that this
+      * list belongs to the relcache and might disappear in a cache flush, so
+      * we must not do any further catalog access within this function.
+      */
+     cachedfkeys = RelationGetFKeyList(relation);
+
+     /*
+      * Figure out which FKs are of interest for this query, and create
+      * ForeignKeyOptInfos for them.  We want only FKs that reference some
+      * other RTE of the current query.  In queries containing self-joins,
+      * there might be more than one other RTE for a referenced table, and we
+      * should make a ForeignKeyOptInfo for each occurrence.
+      *
+      * Ideally, we would ignore RTEs that correspond to non-baserels, but it's
+      * too hard to identify those here, so we might end up making some useless
+      * ForeignKeyOptInfos.  If so, match_foreign_keys_to_quals() will remove
+      * them again.
+      */
+     foreach(lc, cachedfkeys)
+     {
+         ForeignKeyCacheInfo *cachedfk = (ForeignKeyCacheInfo *) lfirst(lc);
+         Index        rti;
+         ListCell   *lc2;
+
+         /* conrelid should always be that of the table we're considering */
+         Assert(cachedfk->conrelid == RelationGetRelid(relation));
+
+         /* Scan to find other RTEs matching confrelid */
+         rti = 0;
+         foreach(lc2, rtable)
+         {
+             RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2);
+             ForeignKeyOptInfo *info;
+
+             rti++;
+             /* Ignore if not the correct table */
+             if (rte->rtekind != RTE_RELATION ||
+                 rte->relid != cachedfk->confrelid)
+                 continue;
+             /* Ignore self-referential FKs; we only care about joins */
+             if (rti == rel->relid)
+                 continue;
+
+             /* OK, let's make an entry */
+             info = makeNode(ForeignKeyOptInfo);
+             info->con_relid = rel->relid;
+             info->ref_relid = rti;
+             info->nkeys = cachedfk->nkeys;
+             memcpy(info->conkey, cachedfk->conkey, sizeof(info->conkey));
+             memcpy(info->confkey, cachedfk->confkey, sizeof(info->confkey));
+             memcpy(info->conpfeqop, cachedfk->conpfeqop, sizeof(info->conpfeqop));
+             /* zero out fields to be filled by match_foreign_keys_to_quals */
+             info->nmatched = 0;
+             memset(info->eclass, 0, sizeof(info->eclass));
+             memset(info->rinfos, 0, sizeof(info->rinfos));
+
+             root->fkey_list = lappend(root->fkey_list, info);
+         }
+     }
+ }
+
+ /*
   * infer_arbiter_indexes -
   *      Determine the unique indexes used to arbitrate speculative insertion.
   *
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index ba185ae..a0a284b 100644
*** a/src/backend/optimizer/util/relnode.c
--- b/src/backend/optimizer/util/relnode.c
*************** get_joinrel_parampathinfo(PlannerInfo *r
*** 1264,1271 ****

      /* Estimate the number of rows returned by the parameterized join */
      rows = get_parameterized_joinrel_size(root, joinrel,
!                                           outer_path->rows,
!                                           inner_path->rows,
                                            sjinfo,
                                            *restrict_clauses);

--- 1264,1271 ----

      /* Estimate the number of rows returned by the parameterized join */
      rows = get_parameterized_joinrel_size(root, joinrel,
!                                           outer_path,
!                                           inner_path,
                                            sjinfo,
                                            *restrict_clauses);

diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index c958758..8d2ad01 100644
*** a/src/backend/utils/cache/relcache.c
--- b/src/backend/utils/cache/relcache.c
*************** RelationBuildDesc(Oid targetRelId, bool
*** 1049,1054 ****
--- 1049,1058 ----
      else
          relation->rd_rsdesc = NULL;

+     /* foreign key data is not loaded till asked for */
+     relation->rd_fkeylist = NIL;
+     relation->rd_fkeyvalid = false;
+
      /*
       * if it's an index, initialize index-related information
       */
*************** RelationDestroyRelation(Relation relatio
*** 2030,2040 ****
          else
              FreeTupleDesc(relation->rd_att);
      }
      list_free(relation->rd_indexlist);
      bms_free(relation->rd_indexattr);
      bms_free(relation->rd_keyattr);
      bms_free(relation->rd_idattr);
-     FreeTriggerDesc(relation->trigdesc);
      if (relation->rd_options)
          pfree(relation->rd_options);
      if (relation->rd_indextuple)
--- 2034,2045 ----
          else
              FreeTupleDesc(relation->rd_att);
      }
+     FreeTriggerDesc(relation->trigdesc);
+     list_free_deep(relation->rd_fkeylist);
      list_free(relation->rd_indexlist);
      bms_free(relation->rd_indexattr);
      bms_free(relation->rd_keyattr);
      bms_free(relation->rd_idattr);
      if (relation->rd_options)
          pfree(relation->rd_options);
      if (relation->rd_indextuple)
*************** CheckConstraintCmp(const void *a, const
*** 3804,3809 ****
--- 3809,3955 ----
  }

  /*
+  * RelationGetFKeyList -- get a list of foreign key info for the relation
+  *
+  * Returns a list of ForeignKeyCacheInfo structs, one per FK constraining
+  * the given relation.  This data is a direct copy of relevant fields from
+  * pg_constraint.  The list items are in no particular order.
+  *
+  * CAUTION: the returned list is part of the relcache's data, and could
+  * vanish in a relcache entry reset.  Callers must inspect or copy it
+  * before doing anything that might trigger a cache flush, such as
+  * system catalog accesses.  copyObject() can be used if desired.
+  * (We define it this way because current callers want to filter and
+  * modify the list entries anyway, so copying would be a waste of time.)
+  */
+ List *
+ RelationGetFKeyList(Relation relation)
+ {
+     List       *result;
+     Relation    conrel;
+     SysScanDesc conscan;
+     ScanKeyData skey;
+     HeapTuple    htup;
+     List       *oldlist;
+     MemoryContext oldcxt;
+
+     /* Quick exit if we already computed the list. */
+     if (relation->rd_fkeyvalid)
+         return relation->rd_fkeylist;
+
+     /* Fast path: if it doesn't have any triggers, it can't have FKs */
+     if (!relation->rd_rel->relhastriggers)
+         return NIL;
+
+     /*
+      * We build the list we intend to return (in the caller's context) while
+      * doing the scan.  After successfully completing the scan, we copy that
+      * list into the relcache entry.  This avoids cache-context memory leakage
+      * if we get some sort of error partway through.
+      */
+     result = NIL;
+
+     /* Prepare to scan pg_constraint for entries having conrelid = this rel. */
+     ScanKeyInit(&skey,
+                 Anum_pg_constraint_conrelid,
+                 BTEqualStrategyNumber, F_OIDEQ,
+                 ObjectIdGetDatum(RelationGetRelid(relation)));
+
+     conrel = heap_open(ConstraintRelationId, AccessShareLock);
+     conscan = systable_beginscan(conrel, ConstraintRelidIndexId, true,
+                                  NULL, 1, &skey);
+
+     while (HeapTupleIsValid(htup = systable_getnext(conscan)))
+     {
+         Form_pg_constraint constraint = (Form_pg_constraint) GETSTRUCT(htup);
+         ForeignKeyCacheInfo *info;
+         Datum        adatum;
+         bool        isnull;
+         ArrayType  *arr;
+         int            nelem;
+
+         /* consider only foreign keys */
+         if (constraint->contype != CONSTRAINT_FOREIGN)
+             continue;
+
+         info = makeNode(ForeignKeyCacheInfo);
+         info->conrelid = constraint->conrelid;
+         info->confrelid = constraint->confrelid;
+
+         /* Extract data from conkey field */
+         adatum = fastgetattr(htup, Anum_pg_constraint_conkey,
+                              conrel->rd_att, &isnull);
+         if (isnull)
+             elog(ERROR, "null conkey for rel %s",
+                  RelationGetRelationName(relation));
+
+         arr = DatumGetArrayTypeP(adatum);        /* ensure not toasted */
+         nelem = ARR_DIMS(arr)[0];
+         if (ARR_NDIM(arr) != 1 ||
+             nelem < 1 ||
+             nelem > INDEX_MAX_KEYS ||
+             ARR_HASNULL(arr) ||
+             ARR_ELEMTYPE(arr) != INT2OID)
+             elog(ERROR, "conkey is not a 1-D smallint array");
+
+         info->nkeys = nelem;
+         memcpy(info->conkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
+
+         /* Likewise for confkey */
+         adatum = fastgetattr(htup, Anum_pg_constraint_confkey,
+                              conrel->rd_att, &isnull);
+         if (isnull)
+             elog(ERROR, "null confkey for rel %s",
+                  RelationGetRelationName(relation));
+
+         arr = DatumGetArrayTypeP(adatum);        /* ensure not toasted */
+         nelem = ARR_DIMS(arr)[0];
+         if (ARR_NDIM(arr) != 1 ||
+             nelem != info->nkeys ||
+             ARR_HASNULL(arr) ||
+             ARR_ELEMTYPE(arr) != INT2OID)
+             elog(ERROR, "confkey is not a 1-D smallint array");
+
+         memcpy(info->confkey, ARR_DATA_PTR(arr), nelem * sizeof(AttrNumber));
+
+         /* Likewise for conpfeqop */
+         adatum = fastgetattr(htup, Anum_pg_constraint_conpfeqop,
+                              conrel->rd_att, &isnull);
+         if (isnull)
+             elog(ERROR, "null conpfeqop for rel %s",
+                  RelationGetRelationName(relation));
+
+         arr = DatumGetArrayTypeP(adatum);        /* ensure not toasted */
+         nelem = ARR_DIMS(arr)[0];
+         if (ARR_NDIM(arr) != 1 ||
+             nelem != info->nkeys ||
+             ARR_HASNULL(arr) ||
+             ARR_ELEMTYPE(arr) != OIDOID)
+             elog(ERROR, "conpfeqop is not a 1-D OID array");
+
+         memcpy(info->conpfeqop, ARR_DATA_PTR(arr), nelem * sizeof(Oid));
+
+         /* Add FK's node to the result list */
+         result = lappend(result, info);
+     }
+
+     systable_endscan(conscan);
+     heap_close(conrel, AccessShareLock);
+
+     /* Now save a copy of the completed list in the relcache entry. */
+     oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+     oldlist = relation->rd_fkeylist;
+     relation->rd_fkeylist = copyObject(result);
+     relation->rd_fkeyvalid = true;
+     MemoryContextSwitchTo(oldcxt);
+
+     /* Don't leak the old list, if there is one */
+     list_free_deep(oldlist);
+
+     return result;
+ }
+
+ /*
   * RelationGetIndexList -- get a list of OIDs of indexes on this relation
   *
   * The index list is created only if someone requests it.  We scan pg_index
*************** load_relcache_init_file(bool shared)
*** 4892,4898 ****
           * format is complex and subject to change).  They must be rebuilt if
           * needed by RelationCacheInitializePhase3.  This is not expected to
           * be a big performance hit since few system catalogs have such. Ditto
!          * for index expressions, predicates, exclusion info, and FDW info.
           */
          rel->rd_rules = NULL;
          rel->rd_rulescxt = NULL;
--- 5038,5045 ----
           * format is complex and subject to change).  They must be rebuilt if
           * needed by RelationCacheInitializePhase3.  This is not expected to
           * be a big performance hit since few system catalogs have such. Ditto
!          * for RLS policy data, index expressions, predicates, exclusion info,
!          * and FDW info.
           */
          rel->rd_rules = NULL;
          rel->rd_rulescxt = NULL;
*************** load_relcache_init_file(bool shared)
*** 4914,4919 ****
--- 5061,5068 ----
          else
              rel->rd_refcnt = 0;
          rel->rd_indexvalid = 0;
+         rel->rd_fkeylist = NIL;
+         rel->rd_fkeyvalid = false;
          rel->rd_indexlist = NIL;
          rel->rd_oidindex = InvalidOid;
          rel->rd_replidindex = InvalidOid;
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index c4b9c14..8f46091 100644
*** a/src/include/nodes/nodes.h
--- b/src/include/nodes/nodes.h
*************** typedef enum NodeTag
*** 223,228 ****
--- 223,229 ----
      T_PlannerGlobal,
      T_RelOptInfo,
      T_IndexOptInfo,
+     T_ForeignKeyOptInfo,
      T_ParamPathInfo,
      T_Path,
      T_IndexPath,
*************** typedef enum NodeTag
*** 478,484 ****
      T_InlineCodeBlock,            /* in nodes/parsenodes.h */
      T_FdwRoutine,                /* in foreign/fdwapi.h */
      T_IndexAmRoutine,            /* in access/amapi.h */
!     T_TsmRoutine                /* in access/tsmapi.h */
  } NodeTag;

  /*
--- 479,486 ----
      T_InlineCodeBlock,            /* in nodes/parsenodes.h */
      T_FdwRoutine,                /* in foreign/fdwapi.h */
      T_IndexAmRoutine,            /* in access/amapi.h */
!     T_TsmRoutine,                /* in access/tsmapi.h */
!     T_ForeignKeyCacheInfo        /* in utils/rel.h */
  } NodeTag;

  /*
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index a4892cb..4d0cb2e 100644
*** a/src/include/nodes/relation.h
--- b/src/include/nodes/relation.h
*************** typedef struct PlannerInfo
*** 251,256 ****
--- 251,258 ----

      List       *placeholder_list;        /* list of PlaceHolderInfos */

+     List       *fkey_list;        /* list of ForeignKeyOptInfos */
+
      List       *query_pathkeys; /* desired pathkeys for query_planner() */

      List       *group_pathkeys; /* groupClause pathkeys, if any */
*************** typedef struct IndexOptInfo
*** 622,627 ****
--- 624,657 ----
      void        (*amcostestimate) ();    /* AM's cost estimator */
  } IndexOptInfo;

+ /*
+  * ForeignKeyOptInfo
+  *        Per-foreign-key information for planning/optimization
+  *
+  * The per-FK-column arrays can be fixed-size because we allow at most
+  * INDEX_MAX_KEYS columns in a foreign key constraint.  Each array has
+  * nkeys valid entries.
+  */
+ typedef struct ForeignKeyOptInfo
+ {
+     NodeTag        type;
+
+     /* Basic data about the foreign key (fetched from catalogs): */
+     Index        con_relid;        /* RT index of the referencing table */
+     Index        ref_relid;        /* RT index of the referenced table */
+     int            nkeys;            /* number of columns in the foreign key */
+     AttrNumber    conkey[INDEX_MAX_KEYS]; /* cols in referencing table */
+     AttrNumber    confkey[INDEX_MAX_KEYS];        /* cols in referenced table */
+     Oid            conpfeqop[INDEX_MAX_KEYS];        /* PK = FK operator OIDs */
+
+     /* Derived info about whether FK's equality conditions match the query: */
+     int            nmatched;        /* number of column conditions matched */
+     /* Pointer to eclass matching each column's condition, if there is one */
+     struct EquivalenceClass *eclass[INDEX_MAX_KEYS];
+     /* List of non-EC RestrictInfos matching each column's condition */
+     List       *rinfos[INDEX_MAX_KEYS];
+ } ForeignKeyOptInfo;
+

  /*
   * EquivalenceClasses
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index f41f9e9..2a4df2f 100644
*** a/src/include/optimizer/cost.h
--- b/src/include/optimizer/cost.h
*************** extern double get_parameterized_baserel_
*** 167,174 ****
                                 List *param_clauses);
  extern double get_parameterized_joinrel_size(PlannerInfo *root,
                                 RelOptInfo *rel,
!                                double outer_rows,
!                                double inner_rows,
                                 SpecialJoinInfo *sjinfo,
                                 List *restrict_clauses);
  extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
--- 167,174 ----
                                 List *param_clauses);
  extern double get_parameterized_joinrel_size(PlannerInfo *root,
                                 RelOptInfo *rel,
!                                Path *outer_path,
!                                Path *inner_path,
                                 SpecialJoinInfo *sjinfo,
                                 List *restrict_clauses);
  extern void set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index f3b25e2..a0008ad 100644
*** a/src/include/optimizer/paths.h
--- b/src/include/optimizer/paths.h
*************** extern List *generate_join_implied_equal
*** 139,144 ****
--- 139,147 ----
                                           Relids outer_relids,
                                           RelOptInfo *inner_rel);
  extern bool exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2);
+ extern EquivalenceClass *match_eclasses_to_foreign_key_col(PlannerInfo *root,
+                                   ForeignKeyOptInfo *fkinfo,
+                                   int colno);
  extern void add_child_rel_equivalences(PlannerInfo *root,
                             AppendRelInfo *appinfo,
                             RelOptInfo *parent_rel,
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index a48400b..c529085 100644
*** a/src/include/optimizer/planmain.h
--- b/src/include/optimizer/planmain.h
*************** extern RestrictInfo *build_implied_join_
*** 95,100 ****
--- 95,101 ----
                              Expr *item2,
                              Relids qualscope,
                              Relids nullable_relids);
+ extern void match_foreign_keys_to_quals(PlannerInfo *root);

  /*
   * prototypes for plan/analyzejoins.c
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index fd858fd..ed14442 100644
*** a/src/include/utils/rel.h
--- b/src/include/utils/rel.h
*************** typedef struct RelationData
*** 90,95 ****
--- 90,99 ----
      /* use "struct" here to avoid needing to include rowsecurity.h: */
      struct RowSecurityDesc *rd_rsdesc;    /* row security policies, or NULL */

+     /* data managed by RelationGetFKeyList: */
+     List       *rd_fkeylist;    /* list of ForeignKeyCacheInfo (see below) */
+     bool        rd_fkeyvalid;    /* true if list has been computed */
+
      /* data managed by RelationGetIndexList: */
      List       *rd_indexlist;    /* list of OIDs of indexes on relation */
      Oid            rd_oidindex;    /* OID of unique index on OID, if any */
*************** typedef struct RelationData
*** 170,175 ****
--- 174,207 ----
      struct PgStat_TableStatus *pgstat_info;        /* statistics collection area */
  } RelationData;

+
+ /*
+  * ForeignKeyCacheInfo
+  *        Information the relcache can cache about foreign key constraints
+  *
+  * This is basically just an image of relevant columns from pg_constraint.
+  * We make it a subclass of Node so that copyObject() can be used on a list
+  * of these, but we also ensure it is a "flat" object without substructure,
+  * so that list_free_deep() is sufficient to free such a list.
+  * The per-FK-column arrays can be fixed-size because we allow at most
+  * INDEX_MAX_KEYS columns in a foreign key constraint.
+  *
+  * Currently, we only cache fields of interest to the planner, but the
+  * set of fields could be expanded in future.
+  */
+ typedef struct ForeignKeyCacheInfo
+ {
+     NodeTag        type;
+     Oid            conrelid;        /* relation constrained by the foreign key */
+     Oid            confrelid;        /* relation referenced by the foreign key */
+     int            nkeys;            /* number of columns in the foreign key */
+     /* these arrays each have nkeys valid entries: */
+     AttrNumber    conkey[INDEX_MAX_KEYS]; /* cols in referencing table */
+     AttrNumber    confkey[INDEX_MAX_KEYS];        /* cols in referenced table */
+     Oid            conpfeqop[INDEX_MAX_KEYS];        /* PK = FK operator OIDs */
+ } ForeignKeyCacheInfo;
+
+
  /*
   * StdRdOptions
   *        Standard contents of rd_options for heaps and generic indexes.
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 1b48304..6ea7dd2 100644
*** a/src/include/utils/relcache.h
--- b/src/include/utils/relcache.h
*************** extern void RelationClose(Relation relat
*** 37,42 ****
--- 37,43 ----
  /*
   * Routines to compute/retrieve additional cached information
   */
+ extern List *RelationGetFKeyList(Relation relation);
  extern List *RelationGetIndexList(Relation relation);
  extern Oid    RelationGetOidIndex(Relation relation);
  extern Oid    RelationGetReplicaIndex(Relation relation);

pgsql-hackers by date:

Previous
From: Andres Freund
Date:
Subject: Re: Re: [COMMITTERS] pgsql: Avoid extra locks in GetSnapshotData if old_snapshot_threshold <
Next
From: Robbie Harwood
Date:
Subject: Re: [PATCH v12] GSSAPI encryption support