Re: Even when the data is already ordered, MergeAppend still adds a Sort node - Mailing list pgsql-hackers

From feichanghong
Subject Re: Even when the data is already ordered, MergeAppend still adds a Sort node
Date
Msg-id tencent_690316E14D29E13D3323ABD3FCE3C2C49005@qq.com
Whole thread Raw
In response to Even when the data is already ordered, MergeAppend still adds a Sort node  (feichanghong <feichanghong@qq.com>)
List pgsql-hackers

On Jul 18, 2025, at 22:37, feichanghong <feichanghong@qq.com> wrote:

Should we retain the complete `pathkeys` in `Path->pathkeys` for use by the
upper layers of the subquery, rather than just keeping the portion trimmed by
`PlannerInfo->query_pathkeys`? I'm not sure if my understanding is correct.

Currently, a rough solution I can think of is: if the current query is not the
outermost Query, then pathkey_is_redundant should ignore the "EC contains a
constant" check. The specific fix is as follows:
```
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8b04d40d36d..9fd1f3f8ff7 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -31,7 +31,7 @@
 /* Consider reordering of GROUP BY keys? */
 bool           enable_group_by_reordering = true;
 
-static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
+static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys, bool check_ec);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
                                                                                         RelOptInfo *partrel,
                                                                                         int partkeycol);
@@ -104,7 +104,7 @@ make_canonical_pathkey(PlannerInfo *root,
  *             returns the updated 'target' list.
  */
 List *
-append_pathkeys(List *target, List *source)
+append_pathkeys(List *target, List *source, bool check_ec)
 {
        ListCell   *lc;
 
@@ -114,7 +114,7 @@ append_pathkeys(List *target, List *source)
        {
                PathKey    *pk = lfirst_node(PathKey, lc);
 
-               if (!pathkey_is_redundant(pk, target))
+               if (!pathkey_is_redundant(pk, target, check_ec))
                        target = lappend(target, pk);
        }
        return target;
@@ -156,13 +156,13 @@ append_pathkeys(List *target, List *source)
  * pointer comparison is enough to decide whether canonical ECs are the same.
  */
 static bool
-pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
+pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys, bool check_ec)
 {
        EquivalenceClass *new_ec = new_pathkey->pk_eclass;
        ListCell   *lc;
 
        /* Check for EC containing a constant --- unconditionally redundant */
-       if (EC_MUST_BE_REDUNDANT(new_ec))
+       if (check_ec && EC_MUST_BE_REDUNDANT(new_ec))
                return true;
 
        /* If same EC already used in list, then redundant */
@@ -798,7 +798,7 @@ build_index_pathkeys(PlannerInfo *root,
                         * We found the sort key in an EquivalenceClass, so it's relevant
                         * for this query.  Add it to list, unless it's redundant.
                         */
-                       if (!pathkey_is_redundant(cpathkey, retval))
+                       if (!pathkey_is_redundant(cpathkey, retval, root->query_level == 1))
                                retval = lappend(retval, cpathkey);
                }
                else
@@ -958,7 +958,7 @@ build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel,
                         * We found the sort key in an EquivalenceClass, so it's relevant
                         * for this query.  Add it to list, unless it's redundant.
                         */
-                       if (!pathkey_is_redundant(cpathkey, retval))
+                       if (!pathkey_is_redundant(cpathkey, retval, root->query_level == 1))
                                retval = lappend(retval, cpathkey);
                }
                else
@@ -1229,7 +1229,7 @@ convert_subquery_pathkeys(PlannerInfo *root, RelOptInfo *rel,
                 * Eliminate redundant ordering info; could happen if outer query
                 * equivalences subquery keys...
                 */
-               if (!pathkey_is_redundant(best_pathkey, retval))
+               if (!pathkey_is_redundant(best_pathkey, retval, root->query_level == 1))
                {
                        retval = lappend(retval, best_pathkey);
                        retvallen++;
@@ -1428,7 +1428,7 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
                }
 
                /* Canonical form eliminates redundant ordering keys */
-               if (!pathkey_is_redundant(pathkey, pathkeys))
+               if (!pathkey_is_redundant(pathkey, pathkeys, root->query_level == 1))
                        pathkeys = lappend(pathkeys, pathkey);
                else if (remove_redundant)
                        *sortclauses = foreach_delete_current(*sortclauses, l);
@@ -1823,7 +1823,7 @@ select_outer_pathkeys_for_merge(PlannerInfo *root,
                                                                                 COMPARE_LT,
                                                                                 false);
                /* can't be redundant because no duplicate ECs */
-               Assert(!pathkey_is_redundant(pathkey, pathkeys));
+               Assert(!pathkey_is_redundant(pathkey, pathkeys, root->query_level == 1));
                pathkeys = lappend(pathkeys, pathkey);
        }
 
@@ -1925,7 +1925,7 @@ make_inner_pathkeys_for_merge(PlannerInfo *root,
                 * reason, it certainly wouldn't match any available sort order for
                 * the input relation.
                 */
-               if (!pathkey_is_redundant(pathkey, pathkeys))
+               if (!pathkey_is_redundant(pathkey, pathkeys, root->query_level == 1))
                        pathkeys = lappend(pathkeys, pathkey);
        }
 
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 549aedcfa99..1dcc3ae29b9 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3312,7 +3312,8 @@ adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
                                /* include the GROUP BY pathkeys, if they exist */
                                if (grouppathkeys != NIL)
                                        currpathkeys = append_pathkeys(list_copy(grouppathkeys),
-                                                                                                  currpathkeys);
+                                                                                                  currpathkeys,
+                                                                                                  root->query_level == 1);
 
                                /* record that we found pathkeys for this aggregate */
                                aggindexes = bms_add_member(aggindexes, i);
@@ -3324,7 +3325,8 @@ adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
                                /* include the GROUP BY pathkeys, if they exist */
                                if (grouppathkeys != NIL)
                                        pathkeys = append_pathkeys(list_copy(grouppathkeys),
-                                                                                          pathkeys);
+                                                                                          pathkeys,
+                                                                                          root->query_level == 1);
 
                                /* are 'pathkeys' compatible or better than 'currpathkeys'? */
                                switch (compare_pathkeys(currpathkeys, pathkeys))
@@ -6275,7 +6277,7 @@ make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 
                /* Okay, make the combined pathkeys */
                if (window_pathkeys != NIL)
-                       window_pathkeys = append_pathkeys(window_pathkeys, orderby_pathkeys);
+                       window_pathkeys = append_pathkeys(window_pathkeys, orderby_pathkeys, root->query_level == 1);
                else
                        window_pathkeys = orderby_pathkeys;
        }
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 8410531f2d6..7766ae07cab 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -271,7 +271,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
                                                                           RelOptInfo *rel,
                                                                           List *pathkeys);
 extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
-extern List *append_pathkeys(List *target, List *source);
+extern List *append_pathkeys(List *target, List *source, bool check_ec);
 extern PathKey *make_canonical_pathkey(PlannerInfo *root,
                                                                           EquivalenceClass *eclass, Oid opfamily,
                                                                           CompareType cmptype, bool nulls_first);
```

Looking forward to hearing everyone's suggestions.


Best Regards,
Fei Changhong

pgsql-hackers by date:

Previous
From: Peter Geoghegan
Date:
Subject: Re: index prefetching
Next
From: "David G. Johnston"
Date:
Subject: Re: Documenting inlining SQL functions