Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque - Mailing list pgsql-committers
From | Tom Lane |
---|---|
Subject | Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque |
Date | |
Msg-id | 590235.1732915884@sss.pgh.pa.us Whole thread Raw |
In response to | Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: pgsql: Avoid mislabeling of lateral references when pulling up a subque
|
List | pgsql-committers |
I wrote: > The ideas I'd been toying with last night involved a pre-scan over > the join tree to calculate the potential nullingrels of each leaf RTE > (same idea as RelOptInfo.nulling_relids, but of course we don't have > any RelOptInfos yet). That seems painful though because we'd have to > update the data structure somehow after each subquery pullup. I realized that we can make that work by doing the pre-calculation at the start of each pull_up_simple_subquery. We only have to do it for subqueries marked LATERAL, so this doesn't seem too horribly inefficient. Draft patch attached. I didn't spend effort on devising test cases, but it works for your example. regards, tom lane diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 2e0d41a8d1..ff9c1cd24e 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -42,6 +42,17 @@ #include "rewrite/rewriteManip.h" +typedef struct nullingrel_info +{ + /* + * For each leaf RTE, nullingrels[rti] is the set of relids of outer joins + * that potentially null that RTE. + */ + Relids *nullingrels; + /* Length of range table (maximum index in nullingrels[]) */ + int rtlength; /* used only for assertion checks */ +} nullingrel_info; + typedef struct pullup_replace_vars_context { PlannerInfo *root; @@ -49,6 +60,8 @@ typedef struct pullup_replace_vars_context RangeTblEntry *target_rte; /* RTE of subquery */ Relids relids; /* relids within subquery, as numbered after * pullup (set only if target_rte->lateral) */ + nullingrel_info *nullinfo; /* per-RTE nullingrel info (set only if + * target_rte->lateral) */ bool *outer_hasSubLinks; /* -> outer query's hasSubLinks */ int varno; /* varno of subquery */ bool wrap_non_vars; /* do we need all non-Var outputs to be PHVs? */ @@ -142,6 +155,9 @@ static void substitute_phv_relids(Node *node, static void fix_append_rel_relids(PlannerInfo *root, int varno, Relids subrelids); static Node *find_jointree_node_for_rel(Node *jtnode, int relid); +static nullingrel_info *get_nullingrels(Query *parse); +static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels, + nullingrel_info *info); /* @@ -1259,10 +1275,16 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, rvcontext.targetlist = subquery->targetList; rvcontext.target_rte = rte; if (rte->lateral) + { rvcontext.relids = get_relids_in_jointree((Node *) subquery->jointree, true, true); - else /* won't need relids */ + rvcontext.nullinfo = get_nullingrels(parse); + } + else /* won't need these fields */ + { rvcontext.relids = NULL; + rvcontext.nullinfo = NULL; + } rvcontext.outer_hasSubLinks = &parse->hasSubLinks; rvcontext.varno = varno; /* this flag will be set below, if needed */ @@ -1809,7 +1831,8 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte) rvcontext.root = root; rvcontext.targetlist = tlist; rvcontext.target_rte = rte; - rvcontext.relids = NULL; + rvcontext.relids = NULL; /* can't be any lateral references here */ + rvcontext.nullinfo = NULL; rvcontext.outer_hasSubLinks = &parse->hasSubLinks; rvcontext.varno = varno; rvcontext.wrap_non_vars = false; @@ -1971,9 +1994,10 @@ pull_up_constant_function(PlannerInfo *root, Node *jtnode, /* * Since this function was reduced to a Const, it doesn't contain any * lateral references, even if it's marked as LATERAL. This means we - * don't need to fill relids. + * don't need to fill relids or nullinfo. */ rvcontext.relids = NULL; + rvcontext.nullinfo = NULL; rvcontext.outer_hasSubLinks = &parse->hasSubLinks; rvcontext.varno = ((RangeTblRef *) jtnode)->rtindex; @@ -2688,9 +2712,42 @@ pullup_replace_vars_callback(Var *var, { /* * There should be Vars/PHVs within the expression that we can - * modify. Per above discussion, modify only Vars/PHVs of the - * subquery, not lateral references. + * modify. Vars/PHVs of the subquery should have the full + * var->varnullingrels added to them, but if there are lateral + * references within the expression, those must be marked with + * only the nullingrels that potentially apply to them. That + * could be different for different lateral relations, so we have + * to do this work for each one. */ + if (rcon->target_rte->lateral) + { + nullingrel_info *nullinfo = rcon->nullinfo; + Relids lvarnos; + int lvarno; + + /* + * Identify lateral varnos used within newnode. We must do + * this before injecting var->varnullingrels into the tree. + */ + lvarnos = pull_varnos(rcon->root, newnode); + lvarnos = bms_del_members(lvarnos, rcon->relids); + /* For each one, add relevant nullingrels if any */ + lvarno = -1; + while ((lvarno = bms_next_member(lvarnos, lvarno)) >= 0) + { + Relids lnullingrels; + + Assert(lvarno > 0 && lvarno <= nullinfo->rtlength); + lnullingrels = bms_intersect(var->varnullingrels, + nullinfo->nullingrels[lvarno]); + if (!bms_is_empty(lnullingrels)) + newnode = add_nulling_relids(newnode, + bms_make_singleton(lvarno), + lnullingrels); + } + } + + /* Finally, deal with Vars/PHVs of the subquery itself */ newnode = add_nulling_relids(newnode, rcon->relids, var->varnullingrels); @@ -4120,3 +4177,94 @@ find_jointree_node_for_rel(Node *jtnode, int relid) (int) nodeTag(jtnode)); return NULL; } + +/* + * get_nullingrels: collect info about which outer joins null which relations + * + * The result struct contains, for each leaf relation used in the query, + * the set of relids of outer joins that potentially null that rel. + */ +static nullingrel_info * +get_nullingrels(Query *parse) +{ + nullingrel_info *result = palloc_object(nullingrel_info); + + result->rtlength = list_length(parse->rtable); + result->nullingrels = palloc0_array(Relids, result->rtlength + 1); + get_nullingrels_recurse((Node *) parse->jointree, NULL, result); + return result; +} + +/* + * Recursive guts of get_nullingrels(). + * + * Note: at any recursion level, the passed-down upper_nullingrels must be + * treated as a constant, but it can be stored directly into *info + * if we're at leaf level. Upper recursion levels do not free their mutated + * copies of the nullingrels, because those are probably referenced by + * at least one leaf rel. + */ +static void +get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels, + nullingrel_info *info) +{ + if (jtnode == NULL) + return; + if (IsA(jtnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jtnode)->rtindex; + + Assert(varno > 0 && varno <= info->rtlength); + info->nullingrels[varno] = upper_nullingrels; + } + else if (IsA(jtnode, FromExpr)) + { + FromExpr *f = (FromExpr *) jtnode; + ListCell *l; + + foreach(l, f->fromlist) + { + get_nullingrels_recurse(lfirst(l), upper_nullingrels, info); + } + } + else if (IsA(jtnode, JoinExpr)) + { + JoinExpr *j = (JoinExpr *) jtnode; + Relids local_nullingrels; + + switch (j->jointype) + { + case JOIN_INNER: + get_nullingrels_recurse(j->larg, upper_nullingrels, info); + get_nullingrels_recurse(j->rarg, upper_nullingrels, info); + break; + case JOIN_LEFT: + case JOIN_SEMI: + case JOIN_ANTI: + local_nullingrels = bms_add_member(bms_copy(upper_nullingrels), + j->rtindex); + get_nullingrels_recurse(j->larg, upper_nullingrels, info); + get_nullingrels_recurse(j->rarg, local_nullingrels, info); + break; + case JOIN_FULL: + local_nullingrels = bms_add_member(bms_copy(upper_nullingrels), + j->rtindex); + get_nullingrels_recurse(j->larg, local_nullingrels, info); + get_nullingrels_recurse(j->rarg, local_nullingrels, info); + break; + case JOIN_RIGHT: + local_nullingrels = bms_add_member(bms_copy(upper_nullingrels), + j->rtindex); + get_nullingrels_recurse(j->larg, local_nullingrels, info); + get_nullingrels_recurse(j->rarg, upper_nullingrels, info); + break; + default: + elog(ERROR, "unrecognized join type: %d", + (int) j->jointype); + break; + } + } + else + elog(ERROR, "unrecognized node type: %d", + (int) nodeTag(jtnode)); +} diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index b54428b38c..2d4c870423 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3665,6 +3665,7 @@ nodeitem normal_rand_fctx nsphash_hash ntile_context +nullingrel_info numeric object_access_hook_type object_access_hook_type_str
pgsql-committers by date: