From f2fb47a38003e76aa06b5c6dd2afbeaedef36561 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 6 Feb 2024 11:11:21 +0800 Subject: [PATCH v1] review diff --- src/backend/optimizer/path/pathkeys.c | 96 +++++++++----------------- src/backend/optimizer/plan/planner.c | 84 +--------------------- src/backend/optimizer/prep/prepunion.c | 77 +++++++++++++++++++-- src/backend/optimizer/util/pathnode.c | 11 --- src/include/optimizer/paths.h | 2 - 5 files changed, 104 insertions(+), 166 deletions(-) diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index 2eb144f1df..e0281ef84c 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -985,71 +985,6 @@ build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, return retval; } -/* - * build_setop_pathkeys - * Build and return a list of PathKeys, one for each non-junk target in - * 'tlist'. - */ -List * -build_setop_pathkeys(PlannerInfo *root, SetOperationStmt *op, - Relids relids, List *tlist) -{ - ListCell *lc; - ListCell *sgccell = list_head(op->groupClauses); - List *retval = NIL; - - foreach(lc, tlist) - { - TargetEntry *tle = lfirst_node(TargetEntry, lc); - SortGroupClause *sgc; - - Oid opfamily; - Oid opcintype; - int16 strategy; - PathKey *cpathkey; - - if (tle->resjunk) - continue; - - /* - * XXX query_is_distinct_for() is happy to Assert this, should this do - * that rather than ERROR? - */ - if (sgccell == NULL) - elog(ERROR, "too few group clauses"); - - sgc = lfirst_node(SortGroupClause, sgccell); - - /* Find the operator in pg_amop --- failure shouldn't happen */ - if (!get_ordering_op_properties(sgc->sortop, - &opfamily, &opcintype, &strategy)) - elog(ERROR, "operator %u is not a valid ordering operator", - sgc->eqop); - - cpathkey = make_pathkey_from_sortinfo(root, - tle->expr, - opfamily, - opcintype, - exprCollation((Node *) tle->expr), - false, - sgc->nulls_first, - 0, - relids, - true); - retval = lappend(retval, cpathkey); - sgccell = lnext(op->groupClauses, sgccell); - - /* - * There's no need to look for redundant pathkeys as set operations - * have no ability to have non-child constants in an EquivalenceClass. - * Let's just make sure that remains true. - */ - Assert(!EC_MUST_BE_REDUNDANT(cpathkey->pk_eclass)); - } - - return retval; -} - /* * build_expression_pathkey * Build a pathkeys list that describes an ordering by a single expression @@ -2251,6 +2186,34 @@ pathkeys_useful_for_grouping(PlannerInfo *root, List *pathkeys) return n; } +/* + * pathkeys_useful_for_setop + * Count the number of pathkeys that are useful for meeting the ordering + * requested by the query's set operation. + * + * Because we the have the possibility of incremental sort, a prefix list of + * keys is potentially useful for improving the performance of the set + * operation's ordering. Thus we return 0, if no valuable keys are found, or + * the number of leading keys shared by the list and the set operation's + * ordering.. + */ +static int +pathkeys_useful_for_setop(PlannerInfo *root, List *pathkeys) +{ + int n_common_pathkeys; + + if (root->setop_pathkeys == NIL) + return 0; /* no special setop ordering requested */ + + if (pathkeys == NIL) + return 0; /* unordered path */ + + (void) pathkeys_count_contained_in(root->setop_pathkeys, pathkeys, + &n_common_pathkeys); + + return n_common_pathkeys; +} + /* * truncate_useless_pathkeys * Shorten the given pathkey list to just the useful pathkeys. @@ -2268,6 +2231,9 @@ truncate_useless_pathkeys(PlannerInfo *root, if (nuseful2 > nuseful) nuseful = nuseful2; nuseful2 = pathkeys_useful_for_grouping(root, pathkeys); + if (nuseful2 > nuseful) + nuseful = nuseful2; + nuseful2 = pathkeys_useful_for_setop(root, pathkeys); if (nuseful2 > nuseful) nuseful = nuseful2; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 5cb140c049..47006edeae 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1769,13 +1769,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) foreach(lc, current_rel->pathlist) { Path *path = (Path *) lfirst(lc); - Path *input_path; - - /* - * Keep record of the unmodified path so that we can still tell which - * one is the cheapest_input_path. - */ - input_path = path; /* * If there is a FOR [KEY] UPDATE/SHARE clause, add the LockRows node. @@ -1804,82 +1797,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } /* - * Create paths to suit final sort order required for setop_pathkeys. - * Here we'll sort the cheapest input path (if not sorted already) and - * incremental sort any paths which are partially sorted. We also - * include the cheapest path as-is so that the set operation can be - * cheaply implemented using a method which does not require the input - * to be sorted. - */ - if (root->setop_pathkeys != NIL) - { - Path *cheapest_input_path = current_rel->cheapest_total_path; - bool is_sorted; - int presorted_keys; - - is_sorted = pathkeys_count_contained_in(root->setop_pathkeys, - path->pathkeys, - &presorted_keys); - - if (!is_sorted) - { - /* - * Try at least sorting the cheapest path and also try - * incrementally sorting any path which is partially sorted - * already (no need to deal with paths which have presorted - * keys when incremental sort is disabled unless it's the - * cheapest input path). - */ - if (input_path != cheapest_input_path) - { - if (presorted_keys == 0 || !enable_incremental_sort) - continue; - } - else - { - /* - * If this is an INSERT/UPDATE/DELETE/MERGE, add a - * ModifyTable path. - */ - if (parse->commandType != CMD_SELECT) - path = build_final_modify_table_path(root, - final_rel, - path); - - /* - * The union planner might want to try a hash-based method - * of executing the set operation, so let's provide the - * cheapest input path so that it can do that as cheaply - * as possible. If the cheapest_input_path happens to be - * already correctly sorted then we'll add it to final_rel - * at the end of the loop. - */ - add_path(final_rel, path); - } - - /* - * We've no need to consider both a sort and incremental sort. - * We'll just do a sort if there are no presorted keys and an - * incremental sort when there are presorted keys. - */ - if (presorted_keys == 0 || !enable_incremental_sort) - path = (Path *) create_sort_path(root, - final_rel, - path, - root->setop_pathkeys, - limit_tuples); - else - path = (Path *) create_incremental_sort_path(root, - final_rel, - path, - root->setop_pathkeys, - presorted_keys, - limit_tuples); - } - } - - /* - * If this is an INSERT/UPDATE/DELETE/MERGE, add a ModifyTable path. + * If this is an INSERT/UPDATE/DELETE/MERGE, add the ModifyTable node. */ if (parse->commandType != CMD_SELECT) path = build_final_modify_table_path(root, final_rel, path); diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index dfdf2ec0a9..b0938e0110 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -588,6 +588,74 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, { Path *subpath = (Path *) lfirst(lc); List *pathkeys; + Path *cheapest_input_path = final_rel->cheapest_total_path; + List *setop_pathkeys = rel->subroot->setop_pathkeys; + bool is_sorted; + int presorted_keys; + + /* + * Include the cheapest path as-is so that the set operation can be + * cheaply implemented using a method which does not require the input + * to be sorted. + */ + if (subpath == cheapest_input_path) + { + /* Convert subpath's pathkeys to outer representation */ + pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys, + make_tlist_from_pathtarget(subpath->pathtarget)); + + /* Generate outer path using this subpath */ + add_path(rel, (Path *) create_subqueryscan_path(root, + rel, + subpath, + trivial_tlist, + pathkeys, + NULL)); + } + + /* + * Create paths to suit final sort order required for setop_pathkeys. + * Here we'll sort the cheapest input path (if not sorted already) and + * incremental sort any paths which are partially sorted. + */ + is_sorted = pathkeys_count_contained_in(setop_pathkeys, + subpath->pathkeys, + &presorted_keys); + + if (!is_sorted) + { + double limittuples = rel->subroot->limit_tuples; + + /* + * Try at least sorting the cheapest path and also try + * incrementally sorting any path which is partially sorted + * already (no need to deal with paths which have presorted + * keys when incremental sort is disabled unless it's the + * cheapest input path). + */ + if (subpath != cheapest_input_path && + (presorted_keys == 0 || !enable_incremental_sort)) + continue; + + /* + * We've no need to consider both a sort and incremental sort. + * We'll just do a sort if there are no presorted keys and an + * incremental sort when there are presorted keys. + */ + if (presorted_keys == 0 || !enable_incremental_sort) + subpath = (Path *) create_sort_path(root, + final_rel, + subpath, + setop_pathkeys, + limittuples); + else + subpath = (Path *) create_incremental_sort_path(root, + final_rel, + subpath, + setop_pathkeys, + presorted_keys, + limittuples); + } /* Convert subpath's pathkeys to outer representation */ pathkeys = convert_subquery_pathkeys(root, rel, subpath->pathkeys, @@ -690,12 +758,11 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, if (try_sorted) { - /* determine the pathkeys for sorting by the whole target list */ - union_pathkeys = build_setop_pathkeys(root, - op, - bms_make_singleton(0), - tlist); + /* Identify the grouping semantics */ groupList = generate_setop_grouplist(op, tlist); + + /* Determine the pathkeys for sorting by the whole target list */ + union_pathkeys = make_pathkeys_for_sortclauses(root, groupList, tlist); } /* diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e375cd52b5..64a2cd1045 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -587,22 +587,11 @@ add_path(RelOptInfo *parent_rel, Path *new_path) parent_rel->pathlist = foreach_delete_current(parent_rel->pathlist, p1); -#ifdef NOT_USED - - /* - * XXX fixme. When creating the final upper rel paths for queries - * which have setop_pathkey set, we'll at the cheapest path as-is - * also try sorting the cheapest path. If the costs of each are - * fuzzily the same then we might choose to pfree the cheapest - * path. That's bad as the sort uses that path. - */ - /* * Delete the data pointed-to by the deleted cell, if possible */ if (!IsA(old_path, IndexPath)) pfree(old_path); -#endif } else { diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 7dc3507e8d..1165e98c5b 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -222,8 +222,6 @@ extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir); extern List *build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, ScanDirection scandir, bool *partialkeys); -extern List *build_setop_pathkeys(PlannerInfo *root, SetOperationStmt *op, - Relids relids, List *tlist); extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr, Oid opno, Relids rel, bool create_it); -- 2.31.0