From 41273bc5db9785ba88038c15b762a1ffd367b762 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 11 Nov 2025 23:12:39 +0100 Subject: [PATCH v5 2/2] Allow dimensions with some join restrictions --- src/backend/optimizer/plan/analyzejoins.c | 154 +++++++++++++++++----- 1 file changed, 119 insertions(+), 35 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 79a7f0c8608..bc19c2b537c 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -2782,7 +2782,8 @@ starjoin_match_to_foreign_key(PlannerInfo *root, RelOptInfo *rel) * with respect to the rels after it). */ static bool -starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr) +starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr, + bool allow_restrictions) { Index rti = rtr->rtindex; RangeTblEntry *rte = root->simple_rte_array[rti]; @@ -2815,7 +2816,7 @@ starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr) * XXX This blocks the simplified planning for LEFT (or OUTER) joins, * because outer joins imply restrictions. */ - if (has_join_restriction(root, rel)) + if (!allow_restrictions && has_join_restriction(root, rel)) return false; /* @@ -2953,6 +2954,28 @@ starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr) * to disable the optimization if needed, I think - don't collapse the * dimensions into the "group" join item. It would require changes to * the generic join search, to be aware of the new item type. + * + * The search for dimensions may perform multiple passes over the list, to + * allow treating some rels with restrictions as dimensions. Relations + * without restrictions can be moved to an arbitrary place in the join + * tree. We leverage that by moving it to the list of dimensions, which + * may skip over various other relations. + * + * Relations with join order do not allow these arbitrary moves. But we can + * allow treating them dimensions in some cases. A join restriction does not + * imply we can't move the relation at all, otherwise we wouldn't be allowed + * to move any relations when there's a single relation with a restriction. + * It means we can't change the relative order of restricted relations. + * + * This means we can treat a relation with a restriction as a dimension, + * as long as it's the last in the current joinlist (after some relations + * were already moved to list of dimensions). + * + * To do this we walk the joinlist multiple times, and in each iteration + * we try to identify as many dimensions as possible. We walk the list in + * reverse, and we add dimensions to the beginning of the list. This way + * we preserve the original syntactic join order. If we find no dimensions + * in a loop, we're done. */ List * starjoin_adjust_joins(PlannerInfo *root, List *joinlist) @@ -2961,6 +2984,8 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist) List *newlist = NIL; List *dimensions = NIL; int nlist = list_length(joinlist); + int nitems; + Node **items; /* Do nothing if starjoin optimization not enabled. */ if (!enable_starjoin_join_search) @@ -2978,6 +3003,15 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist) (nlist == 1 && !IsA(linitial(joinlist), List))) return joinlist; + /* expand the list into an array, to make backwards processing easier */ + items = palloc_array(Node *, nlist); + + nitems = 0; + foreach(lc, joinlist) + { + items[nitems++] = (Node *) lfirst(lc); + } + /* * Process the current join problem - split the elements into dimensions * and non-dimensions. If there are dimensions, add them back at the end, @@ -2989,6 +3023,9 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist) * to check if it's a dimension. Other types of elements are just added * back to the list as-is. * + * Walk the list backwards, to preserve syntactic join order. This allows + * tracking "last" relation. If we find no dimension, we're done. + * * XXX I think we need to be careful to keep the order of the list (for * the non-dimension entries). The join_search_one_level() relies on that * when handling join order restrictions. @@ -2998,47 +3035,94 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist) * something they don't need. A mutable iterator might be a way, but I'm * not sure how expensive this really is. */ - foreach(lc, joinlist) + for (;;) { - Node *item = (Node *) lfirst(lc); + bool found = false; /* found at least one dimension */ + bool last = true; /* is this the current last rel */ - /* a separate join search problem, handle it recursively */ - if (IsA(item, List)) + for (int i = (nitems - 1); i >= 0; i--) { - newlist = lappend(newlist, - starjoin_adjust_joins(root, (List *) item)); - continue; + Node *item = items[i]; + + /* skip empty items (already moved to dimensions) */ + if (item == NULL) + continue; + + /* do nothing about join subproblems, leave them in place */ + if (IsA(item, List)) + { + /* XXX do we need to disable "false" for join subtree? */ + last = false; + continue; + } + + /* + * If it's not a List, it has to be a RangeTblRef - jinlists can't + * contain any other elements (see make_rel_from_joinlist). + */ + Assert(IsA(item, RangeTblRef)); + + /* + * Is it a dimension? + * + * An entry representing a baserel. If it's a dimension, save it + * in a separate list, and we'll add it at the "top" of the join + * at the end. Otherwise add it to the list just like other + * elements. + * + * We do this only when the joinlist has at least 3 items. For + * fewer rels the optimization does not matter, there's only a + * single join order anyway. That only skips the optimization on + * this level - we still do the recursion, and that might hit a + * larger join problem. + * + * XXX If we decide to treat the rel as a dimension, don't update + * the "last" flag. The next relation will be the last one. + * + * XXX We might have a new GUC to customize the cutoff limit, but + * for now it seems good enough to do it whenever applicable. If + * we find it's not worth it for less than N rels, we can add it + * later. + */ + if ((nlist >= 3) && + starjoin_is_dimension(root, (RangeTblRef *) item, last)) + { + /* add it to the beginning of the list */ + dimensions = lcons(item, dimensions); + items[i] = NULL; + found = true; + continue; + } + + /* + * Not a dimension. Leave it in the array, but remember the next + * item (backwards) is no longer the last one. + * + * XXX Maybe we don't need to reset "last" if the item does not + * have join restrictions? + */ + last = false; } - /* - * If it's not a List, it has to be a RangeTblRef - jinlists can't - * contain any other elements (see make_rel_from_joinlist). - */ - Assert(IsA(item, RangeTblRef)); + /* terminate when a loop finds no dimension */ + if (!found) + break; + } - /* - * An entry representing a baserel. If it's a dimension, save it in a - * separate list, and we'll add it at the "top" of the join at the - * end. Otherwise add it to the list just like other elements. - * - * We do this only when the joinlist has at least 3 items. For fewer - * rels the optimization does not matter, there's only a single join - * order anyway. That only skips the optimization on this level - we - * still do the recursion, and that might hit a larger join problem. - * - * XXX We might have a new GUC to customize the cutoff limit, but for - * now it seems good enough to do it whenever applicable. If we find - * it's not worth it for less than N rels, we can add it later. - */ - if ((nlist >= 3) && - starjoin_is_dimension(root, (RangeTblRef *) item)) - { - dimensions = lappend(dimensions, item); + /* + * Add items remaining in the input array to the newlist. We need to do + * this every time, even without dimensions, because we need to recurse to + * the nested join problems. + */ + for (int i = 0; i < nitems; i++) + { + if (items[i] == NULL) continue; - } - /* not a dimension, add it to the list directly */ - newlist = lappend(newlist, item); + if (IsA(items[i], List)) + items[i] = (Node *) starjoin_adjust_joins(root, (List *) items[i]); + + newlist = lappend(newlist, items[i]); } /* -- 2.51.1