Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque - Mailing list pgsql-committers

From Tom Lane
Subject Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque
Date
Msg-id 590235.1732915884@sss.pgh.pa.us
Whole thread Raw
In response to Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque
List pgsql-committers
I wrote:
> The ideas I'd been toying with last night involved a pre-scan over
> the join tree to calculate the potential nullingrels of each leaf RTE
> (same idea as RelOptInfo.nulling_relids, but of course we don't have
> any RelOptInfos yet).  That seems painful though because we'd have to
> update the data structure somehow after each subquery pullup.

I realized that we can make that work by doing the pre-calculation
at the start of each pull_up_simple_subquery.  We only have to do
it for subqueries marked LATERAL, so this doesn't seem too horribly
inefficient.  Draft patch attached.  I didn't spend effort on
devising test cases, but it works for your example.

            regards, tom lane

diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 2e0d41a8d1..ff9c1cd24e 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -42,6 +42,17 @@
 #include "rewrite/rewriteManip.h"


+typedef struct nullingrel_info
+{
+    /*
+     * For each leaf RTE, nullingrels[rti] is the set of relids of outer joins
+     * that potentially null that RTE.
+     */
+    Relids       *nullingrels;
+    /* Length of range table (maximum index in nullingrels[]) */
+    int            rtlength;        /* used only for assertion checks */
+} nullingrel_info;
+
 typedef struct pullup_replace_vars_context
 {
     PlannerInfo *root;
@@ -49,6 +60,8 @@ typedef struct pullup_replace_vars_context
     RangeTblEntry *target_rte;    /* RTE of subquery */
     Relids        relids;            /* relids within subquery, as numbered after
                                  * pullup (set only if target_rte->lateral) */
+    nullingrel_info *nullinfo;    /* per-RTE nullingrel info (set only if
+                                 * target_rte->lateral) */
     bool       *outer_hasSubLinks;    /* -> outer query's hasSubLinks */
     int            varno;            /* varno of subquery */
     bool        wrap_non_vars;    /* do we need all non-Var outputs to be PHVs? */
@@ -142,6 +155,9 @@ static void substitute_phv_relids(Node *node,
 static void fix_append_rel_relids(PlannerInfo *root, int varno,
                                   Relids subrelids);
 static Node *find_jointree_node_for_rel(Node *jtnode, int relid);
+static nullingrel_info *get_nullingrels(Query *parse);
+static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels,
+                                    nullingrel_info *info);


 /*
@@ -1259,10 +1275,16 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
     rvcontext.targetlist = subquery->targetList;
     rvcontext.target_rte = rte;
     if (rte->lateral)
+    {
         rvcontext.relids = get_relids_in_jointree((Node *) subquery->jointree,
                                                   true, true);
-    else                        /* won't need relids */
+        rvcontext.nullinfo = get_nullingrels(parse);
+    }
+    else                        /* won't need these fields */
+    {
         rvcontext.relids = NULL;
+        rvcontext.nullinfo = NULL;
+    }
     rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
     rvcontext.varno = varno;
     /* this flag will be set below, if needed */
@@ -1809,7 +1831,8 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte)
     rvcontext.root = root;
     rvcontext.targetlist = tlist;
     rvcontext.target_rte = rte;
-    rvcontext.relids = NULL;
+    rvcontext.relids = NULL;    /* can't be any lateral references here */
+    rvcontext.nullinfo = NULL;
     rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
     rvcontext.varno = varno;
     rvcontext.wrap_non_vars = false;
@@ -1971,9 +1994,10 @@ pull_up_constant_function(PlannerInfo *root, Node *jtnode,
     /*
      * Since this function was reduced to a Const, it doesn't contain any
      * lateral references, even if it's marked as LATERAL.  This means we
-     * don't need to fill relids.
+     * don't need to fill relids or nullinfo.
      */
     rvcontext.relids = NULL;
+    rvcontext.nullinfo = NULL;

     rvcontext.outer_hasSubLinks = &parse->hasSubLinks;
     rvcontext.varno = ((RangeTblRef *) jtnode)->rtindex;
@@ -2688,9 +2712,42 @@ pullup_replace_vars_callback(Var *var,
         {
             /*
              * There should be Vars/PHVs within the expression that we can
-             * modify.  Per above discussion, modify only Vars/PHVs of the
-             * subquery, not lateral references.
+             * modify.  Vars/PHVs of the subquery should have the full
+             * var->varnullingrels added to them, but if there are lateral
+             * references within the expression, those must be marked with
+             * only the nullingrels that potentially apply to them.  That
+             * could be different for different lateral relations, so we have
+             * to do this work for each one.
              */
+            if (rcon->target_rte->lateral)
+            {
+                nullingrel_info *nullinfo = rcon->nullinfo;
+                Relids        lvarnos;
+                int            lvarno;
+
+                /*
+                 * Identify lateral varnos used within newnode.  We must do
+                 * this before injecting var->varnullingrels into the tree.
+                 */
+                lvarnos = pull_varnos(rcon->root, newnode);
+                lvarnos = bms_del_members(lvarnos, rcon->relids);
+                /* For each one, add relevant nullingrels if any */
+                lvarno = -1;
+                while ((lvarno = bms_next_member(lvarnos, lvarno)) >= 0)
+                {
+                    Relids        lnullingrels;
+
+                    Assert(lvarno > 0 && lvarno <= nullinfo->rtlength);
+                    lnullingrels = bms_intersect(var->varnullingrels,
+                                                 nullinfo->nullingrels[lvarno]);
+                    if (!bms_is_empty(lnullingrels))
+                        newnode = add_nulling_relids(newnode,
+                                                     bms_make_singleton(lvarno),
+                                                     lnullingrels);
+                }
+            }
+
+            /* Finally, deal with Vars/PHVs of the subquery itself */
             newnode = add_nulling_relids(newnode,
                                          rcon->relids,
                                          var->varnullingrels);
@@ -4120,3 +4177,94 @@ find_jointree_node_for_rel(Node *jtnode, int relid)
              (int) nodeTag(jtnode));
     return NULL;
 }
+
+/*
+ * get_nullingrels: collect info about which outer joins null which relations
+ *
+ * The result struct contains, for each leaf relation used in the query,
+ * the set of relids of outer joins that potentially null that rel.
+ */
+static nullingrel_info *
+get_nullingrels(Query *parse)
+{
+    nullingrel_info *result = palloc_object(nullingrel_info);
+
+    result->rtlength = list_length(parse->rtable);
+    result->nullingrels = palloc0_array(Relids, result->rtlength + 1);
+    get_nullingrels_recurse((Node *) parse->jointree, NULL, result);
+    return result;
+}
+
+/*
+ * Recursive guts of get_nullingrels().
+ *
+ * Note: at any recursion level, the passed-down upper_nullingrels must be
+ * treated as a constant, but it can be stored directly into *info
+ * if we're at leaf level.  Upper recursion levels do not free their mutated
+ * copies of the nullingrels, because those are probably referenced by
+ * at least one leaf rel.
+ */
+static void
+get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels,
+                        nullingrel_info *info)
+{
+    if (jtnode == NULL)
+        return;
+    if (IsA(jtnode, RangeTblRef))
+    {
+        int            varno = ((RangeTblRef *) jtnode)->rtindex;
+
+        Assert(varno > 0 && varno <= info->rtlength);
+        info->nullingrels[varno] = upper_nullingrels;
+    }
+    else if (IsA(jtnode, FromExpr))
+    {
+        FromExpr   *f = (FromExpr *) jtnode;
+        ListCell   *l;
+
+        foreach(l, f->fromlist)
+        {
+            get_nullingrels_recurse(lfirst(l), upper_nullingrels, info);
+        }
+    }
+    else if (IsA(jtnode, JoinExpr))
+    {
+        JoinExpr   *j = (JoinExpr *) jtnode;
+        Relids        local_nullingrels;
+
+        switch (j->jointype)
+        {
+            case JOIN_INNER:
+                get_nullingrels_recurse(j->larg, upper_nullingrels, info);
+                get_nullingrels_recurse(j->rarg, upper_nullingrels, info);
+                break;
+            case JOIN_LEFT:
+            case JOIN_SEMI:
+            case JOIN_ANTI:
+                local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+                                                   j->rtindex);
+                get_nullingrels_recurse(j->larg, upper_nullingrels, info);
+                get_nullingrels_recurse(j->rarg, local_nullingrels, info);
+                break;
+            case JOIN_FULL:
+                local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+                                                   j->rtindex);
+                get_nullingrels_recurse(j->larg, local_nullingrels, info);
+                get_nullingrels_recurse(j->rarg, local_nullingrels, info);
+                break;
+            case JOIN_RIGHT:
+                local_nullingrels = bms_add_member(bms_copy(upper_nullingrels),
+                                                   j->rtindex);
+                get_nullingrels_recurse(j->larg, local_nullingrels, info);
+                get_nullingrels_recurse(j->rarg, upper_nullingrels, info);
+                break;
+            default:
+                elog(ERROR, "unrecognized join type: %d",
+                     (int) j->jointype);
+                break;
+        }
+    }
+    else
+        elog(ERROR, "unrecognized node type: %d",
+             (int) nodeTag(jtnode));
+}
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index b54428b38c..2d4c870423 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3665,6 +3665,7 @@ nodeitem
 normal_rand_fctx
 nsphash_hash
 ntile_context
+nullingrel_info
 numeric
 object_access_hook_type
 object_access_hook_type_str

pgsql-committers by date:

Previous
From: Tom Lane
Date:
Subject: Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque
Next
From: Richard Guo
Date:
Subject: Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque