From efad6c39e247078c6d3cdf3cf8561bd5d35004e6 Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Fri, 23 Feb 2024 11:12:18 +0800 Subject: [PATCH v1 2/9] Introduce RelAggInfo structure to store info for grouped paths. This commit introduces RelAggInfo structure to store information needed to create grouped paths for base and join rels. It also revises the RelInfoList related structures and functions so that they can be used with RelAggInfos. --- src/backend/optimizer/util/relnode.c | 66 +++++++++++++++++-------- src/include/nodes/pathnodes.h | 73 ++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 21 deletions(-) diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 9e25750acd..c88da963db 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -36,13 +36,13 @@ /* - * An entry of a hash table that we use to make lookup for RelOptInfo - * structures more efficient. + * An entry of a hash table that we use to make lookup for RelOptInfo or + * RelAggInfo structures more efficient. */ typedef struct RelInfoEntry { Relids relids; /* hash key --- MUST BE FIRST */ - RelOptInfo *rel; + void *data; } RelInfoEntry; static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, @@ -477,7 +477,7 @@ find_base_rel_ignore_join(PlannerInfo *root, int relid) /* * build_rel_hash - * Construct the auxiliary hash table for relations. + * Construct the auxiliary hash table for relation specific data. */ static void build_rel_hash(RelInfoList *list) @@ -497,19 +497,27 @@ build_rel_hash(RelInfoList *list) &hash_ctl, HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); - /* Insert all the already-existing relations */ + /* Insert all the already-existing relation specific infos */ foreach(l, list->items) { - RelOptInfo *rel = (RelOptInfo *) lfirst(l); + void *item = lfirst(l); RelInfoEntry *hentry; bool found; + Relids relids; + + Assert(IsA(item, RelOptInfo) || IsA(item, RelAggInfo)); + + if (IsA(item, RelOptInfo)) + relids = ((RelOptInfo *) item)->relids; + else + relids = ((RelAggInfo *) item)->relids; hentry = (RelInfoEntry *) hash_search(hashtab, - &(rel->relids), + &relids, HASH_ENTER, &found); Assert(!found); - hentry->rel = rel; + hentry->data = item; } list->hash = hashtab; @@ -517,9 +525,9 @@ build_rel_hash(RelInfoList *list) /* * find_rel_info - * Find an RelOptInfo entry. + * Find an RelOptInfo or a RelAggInfo entry. */ -static RelOptInfo * +static void * find_rel_info(RelInfoList *list, Relids relids) { if (list == NULL) @@ -550,7 +558,7 @@ find_rel_info(RelInfoList *list, Relids relids) HASH_FIND, NULL); if (hentry) - return hentry->rel; + return hentry->data; } else { @@ -558,10 +566,18 @@ find_rel_info(RelInfoList *list, Relids relids) foreach(l, list->items) { - RelOptInfo *rel = (RelOptInfo *) lfirst(l); + void *item = lfirst(l); + Relids item_relids = NULL; + + Assert(IsA(item, RelOptInfo) || IsA(item, RelAggInfo)); - if (bms_equal(rel->relids, relids)) - return rel; + if (IsA(item, RelOptInfo)) + item_relids = ((RelOptInfo *) item)->relids; + else if (IsA(item, RelAggInfo)) + item_relids = ((RelAggInfo *) item)->relids; + + if (bms_equal(item_relids, relids)) + return item; } } @@ -576,32 +592,40 @@ find_rel_info(RelInfoList *list, Relids relids) RelOptInfo * find_join_rel(PlannerInfo *root, Relids relids) { - return find_rel_info(root->join_rel_list, relids); + return (RelOptInfo *) find_rel_info(root->join_rel_list, relids); } /* * add_rel_info - * Add given relation to the given list. Also add it to the auxiliary + * Add relation specific info to a list, and also add it to the auxiliary * hashtable if there is one. */ static void -add_rel_info(RelInfoList *list, RelOptInfo *rel) +add_rel_info(RelInfoList *list, void *data) { + Assert(IsA(data, RelOptInfo) || IsA(data, RelAggInfo)); + /* GEQO requires us to append the new relation to the end of the list! */ - list->items = lappend(list->items, rel); + list->items = lappend(list->items, data); /* store it into the auxiliary hashtable if there is one. */ if (list->hash) { + Relids relids; RelInfoEntry *hentry; bool found; + if (IsA(data, RelOptInfo)) + relids = ((RelOptInfo *) data)->relids; + else + relids = ((RelAggInfo *) data)->relids; + hentry = (RelInfoEntry *) hash_search(list->hash, - &(rel->relids), + &relids, HASH_ENTER, &found); Assert(!found); - hentry->rel = rel; + hentry->data = data; } } @@ -1496,7 +1520,7 @@ fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids) /* If we already made this upperrel for the query, return it */ if (list) { - upperrel = find_rel_info(list, relids); + upperrel = (RelOptInfo *) find_rel_info(list, relids); if (upperrel) return upperrel; } diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index be51e2c652..d67f725ad6 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1065,6 +1065,79 @@ typedef struct RelOptInfo ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \ (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs) +/* + * RelAggInfo + * Information needed to create grouped paths for base and join rels. + * + * "relids" is the set of relation identifiers (RT indexes), just like with + * RelOptInfo. + * + * "target" will be used as pathtarget if partial aggregation is applied to + * base relation or join. The same target will also --- if the relation is a + * join --- be used to join grouped path to a non-grouped one. This target can + * contain plain-Var grouping expressions and Aggref nodes. + * + * Note: There's a convention that Aggref expressions are supposed to follow + * the other expressions of the target. Iterations of ->exprs may rely on this + * arrangement. + * + * "agg_input" contains Vars used either as grouping expressions or aggregate + * arguments. Paths providing the aggregation plan with input data should use + * this target. The only difference from reltarget of the non-grouped relation + * is that some items can have sortgroupref initialized. + * + * "input_rows" is the estimated number of input rows for AggPath. It's + * actually just a workspace for users of the structure, i.e. not initialized + * when instance of the structure is created. + * + * "grouped_rows" is the estimated number of result rows of the AggPath. + * + * "group_clauses", "group_exprs" and "group_pathkeys" are lists of + * SortGroupClause, the corresponding grouping expressions and PathKey + * respectively. + * + * "agg_exprs" is a list of Aggref nodes for the aggregation of the relation's + * paths. + */ +typedef struct RelAggInfo +{ + pg_node_attr(no_copy_equal, no_read, no_query_jumble) + + NodeTag type; + + /* + * the same as in RelOptInfo; set of base + OJ relids (rangetable indexes) + */ + Relids relids; + + /* + * the targetlist for Paths scanning this grouped rel; list of Vars/Exprs, + * cost, width + */ + struct PathTarget *target; + + /* + * the targetlist for Paths that generate input for the grouped paths + */ + struct PathTarget *agg_input; + + /* estimated number of input tuples for the grouped paths */ + Cardinality input_rows; + + /* estimated number of result tuples of the grouped relation*/ + Cardinality grouped_rows; + + /* a list of SortGroupClause's */ + List *group_clauses; + /* a list of grouping expressions */ + List *group_exprs; + /* a list of PathKeys */ + List *group_pathkeys; + + /* a list of Aggref nodes */ + List *agg_exprs; +} RelAggInfo; + /* * IndexOptInfo * Per-index information for planning/optimization -- 2.31.0