From b43aac217ba51854c5a22636f94f14e81bae3991 Mon Sep 17 00:00:00 2001 From: amitlan Date: Thu, 24 Mar 2022 22:47:03 +0900 Subject: [PATCH v7 2/4] Add [Merge]Append.partitioned_rels To record the RT indexes of all partitioned ancestors leading up to leaf partitions that are appended by the node. If a given [Merge]Append node is left out from the plan due to there being only one element in its list of child subplans, then its partitioned_rels set is added to PlannerGlobal.elidedAppendPartedRels that is passed down to the executor through PlannedStmt. There are no users for partitioned_rels and elidedAppendPartedRels as of this commit, though a later commit will require the ability to extract the set of relations that must be locked to make a plan tree safe for execution by walking the plan tree itself, so having the partitioned tables be also present in the plan tree will be helpful. Note that currently the executor relies on the fact that the set of relations to be locked can be obtained by simply scanning the range table that's made available in PlannedStmt along with the plan tree. --- src/backend/nodes/copyfuncs.c | 3 +++ src/backend/nodes/outfuncs.c | 5 +++++ src/backend/nodes/readfuncs.c | 3 +++ src/backend/optimizer/path/joinrels.c | 9 ++++++++ src/backend/optimizer/plan/createplan.c | 18 +++++++++++++++- src/backend/optimizer/plan/planner.c | 8 +++++++ src/backend/optimizer/plan/setrefs.c | 28 +++++++++++++++++++++++++ src/backend/optimizer/util/inherit.c | 16 ++++++++++++++ src/backend/optimizer/util/relnode.c | 20 ++++++++++++++++++ src/include/nodes/pathnodes.h | 22 +++++++++++++++++++ src/include/nodes/plannodes.h | 17 +++++++++++++++ 11 files changed, 148 insertions(+), 1 deletion(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 2cbd8aa0df..d4b5cc7e59 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -106,6 +106,7 @@ _copyPlannedStmt(const PlannedStmt *from) COPY_NODE_FIELD(invalItems); COPY_NODE_FIELD(paramExecTypes); COPY_NODE_FIELD(utilityStmt); + COPY_BITMAPSET_FIELD(elidedAppendPartedRels); COPY_LOCATION_FIELD(stmt_location); COPY_SCALAR_FIELD(stmt_len); @@ -253,6 +254,7 @@ _copyAppend(const Append *from) COPY_SCALAR_FIELD(nasyncplans); COPY_SCALAR_FIELD(first_partial_plan); COPY_NODE_FIELD(part_prune_info); + COPY_BITMAPSET_FIELD(partitioned_rels); return newnode; } @@ -281,6 +283,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); COPY_NODE_FIELD(part_prune_info); + COPY_BITMAPSET_FIELD(partitioned_rels); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index c25f0bd684..99056272f3 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -324,6 +324,7 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node) WRITE_NODE_FIELD(invalItems); WRITE_NODE_FIELD(paramExecTypes); WRITE_NODE_FIELD(utilityStmt); + WRITE_BITMAPSET_FIELD(elidedAppendPartedRels); WRITE_LOCATION_FIELD(stmt_location); WRITE_INT_FIELD(stmt_len); } @@ -443,6 +444,7 @@ _outAppend(StringInfo str, const Append *node) WRITE_INT_FIELD(nasyncplans); WRITE_INT_FIELD(first_partial_plan); WRITE_NODE_FIELD(part_prune_info); + WRITE_BITMAPSET_FIELD(partitioned_rels); } static void @@ -460,6 +462,7 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) WRITE_OID_ARRAY(collations, node->numCols); WRITE_BOOL_ARRAY(nullsFirst, node->numCols); WRITE_NODE_FIELD(part_prune_info); + WRITE_BITMAPSET_FIELD(partitioned_rels); } static void @@ -2333,6 +2336,7 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node) WRITE_BOOL_FIELD(parallelModeOK); WRITE_BOOL_FIELD(parallelModeNeeded); WRITE_CHAR_FIELD(maxParallelHazard); + WRITE_BITMAPSET_FIELD(elidedAppendPartedRels); } static void @@ -2444,6 +2448,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_BOOL_FIELD(partbounds_merged); WRITE_BITMAPSET_FIELD(live_parts); WRITE_BITMAPSET_FIELD(all_partrels); + WRITE_BITMAPSET_FIELD(partitioned_rels); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index e0b3ad1ed2..7536f216bd 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1662,6 +1662,7 @@ _readPlannedStmt(void) READ_NODE_FIELD(invalItems); READ_NODE_FIELD(paramExecTypes); READ_NODE_FIELD(utilityStmt); + READ_BITMAPSET_FIELD(elidedAppendPartedRels); READ_LOCATION_FIELD(stmt_location); READ_INT_FIELD(stmt_len); @@ -1784,6 +1785,7 @@ _readAppend(void) READ_INT_FIELD(nasyncplans); READ_INT_FIELD(first_partial_plan); READ_NODE_FIELD(part_prune_info); + READ_BITMAPSET_FIELD(partitioned_rels); READ_DONE(); } @@ -1806,6 +1808,7 @@ _readMergeAppend(void) READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); READ_NODE_FIELD(part_prune_info); + READ_BITMAPSET_FIELD(partitioned_rels); READ_DONE(); } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 9da3ff2f9a..e74d40fee3 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1549,6 +1549,15 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, populate_joinrel_with_paths(root, child_rel1, child_rel2, child_joinrel, child_sjinfo, child_restrictlist); + + /* + * A parent relation's partitioned_rels must be a superset of the sets + * of all its children, direct or indirect, so bubble up the child + * joinrel's set. + */ + joinrel->partitioned_rels = + bms_add_members(joinrel->partitioned_rels, + child_joinrel->partitioned_rels); } } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index fa069a217c..0026086591 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -26,10 +26,12 @@ #include "nodes/extensible.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/appendinfo.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/paramassign.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" #include "optimizer/plancat.h" @@ -1331,11 +1333,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) best_path->subpaths, prunequal); } - plan->appendplans = subplans; plan->nasyncplans = nasyncplans; plan->first_partial_plan = best_path->first_partial_path; plan->part_prune_info = partpruneinfo; + plan->partitioned_rels = bms_copy(rel->partitioned_rels); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1499,6 +1501,20 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, node->mergeplans = subplans; node->part_prune_info = partpruneinfo; + /* + * We need to explicitly add to the plan node the RT indexes of any + * partitioned tables whose partitions will be scanned by the nodes in + * 'subplans'. There can be multiple RT indexes in the set due to the + * partition tree being multi-level and/or this being a plan for UNION ALL + * over multiple partition trees. Along with scanrelids of leaf-level Scan + * nodes, this allows the executor to lock the full set of relations being + * scanned by this node. + * + * Note that 'apprelids' only contains the top-level base relation(s), so + * is not sufficient for the purpose. + */ + node->partitioned_rels = bms_copy(rel->partitioned_rels); + /* * If prepare_sort_from_pathkeys added sort columns, but we were told to * produce either the exact tlist or a narrow tlist, we should get rid of diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index bd09f85aea..374a9d9753 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -529,6 +529,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->paramExecTypes = glob->paramExecTypes; /* utilityStmt should be null, but we might as well copy it */ result->utilityStmt = parse->utilityStmt; + result->elidedAppendPartedRels = glob->elidedAppendPartedRels; result->stmt_location = parse->stmt_location; result->stmt_len = parse->stmt_len; @@ -7365,6 +7366,13 @@ create_partitionwise_grouping_paths(PlannerInfo *root, add_paths_to_append_rel(root, grouped_rel, grouped_live_children); } + + /* + * Input rel might be a partitioned appendrel, though grouped_rel has at + * this point taken its role as the an appendrel owning the former's + * children, so copy the former's partitioned_rels set into the latter. + */ + grouped_rel->partitioned_rels = bms_copy(input_rel->partitioned_rels); } /* diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index a7b11b7f03..dbdeb8ec9d 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1512,6 +1512,10 @@ set_append_references(PlannerInfo *root, lfirst(l) = set_plan_refs(root, (Plan *) lfirst(l), rtoffset); } + /* Fix up partitioned_rels before possibly removing the Append below. */ + aplan->partitioned_rels = offset_relid_set(aplan->partitioned_rels, + rtoffset); + /* * See if it's safe to get rid of the Append entirely. For this to be * safe, there must be only one child plan and that child plan's parallel @@ -1522,8 +1526,17 @@ set_append_references(PlannerInfo *root, */ if (list_length(aplan->appendplans) == 1 && ((Plan *) linitial(aplan->appendplans))->parallel_aware == aplan->plan.parallel_aware) + { + /* + * Partitioned table involved, if any, must be made known to the + * executor. + */ + root->glob->elidedAppendPartedRels = + bms_add_members(root->glob->elidedAppendPartedRels, + aplan->partitioned_rels); return clean_up_removed_plan_level((Plan *) aplan, (Plan *) linitial(aplan->appendplans)); + } /* * Otherwise, clean up the Append as needed. It's okay to do this after @@ -1584,6 +1597,12 @@ set_mergeappend_references(PlannerInfo *root, lfirst(l) = set_plan_refs(root, (Plan *) lfirst(l), rtoffset); } + /* + * Fix up partitioned_rels before possibly removing the MergeAppend below. + */ + mplan->partitioned_rels = offset_relid_set(mplan->partitioned_rels, + rtoffset); + /* * See if it's safe to get rid of the MergeAppend entirely. For this to * be safe, there must be only one child plan and that child plan's @@ -1594,8 +1613,17 @@ set_mergeappend_references(PlannerInfo *root, */ if (list_length(mplan->mergeplans) == 1 && ((Plan *) linitial(mplan->mergeplans))->parallel_aware == mplan->plan.parallel_aware) + { + /* + * Partitioned tables involved, if any, must be made known to the + * executor. + */ + root->glob->elidedAppendPartedRels = + bms_add_members(root->glob->elidedAppendPartedRels, + mplan->partitioned_rels); return clean_up_removed_plan_level((Plan *) mplan, (Plan *) linitial(mplan->mergeplans)); + } /* * Otherwise, clean up the MergeAppend as needed. It's okay to do this diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 7e134822f3..56912e4101 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -406,6 +406,14 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, childrte, childRTindex, childrel, top_parentrc, lockmode); + /* + * A parent relation's partitioned_rels must be a superset of the sets + * of all its children, direct or indirect, so bubble up the child + * rel's set. + */ + relinfo->partitioned_rels = bms_add_members(relinfo->partitioned_rels, + childrelinfo->partitioned_rels); + /* Close child relation, but keep locks */ table_close(childrel, NoLock); } @@ -737,6 +745,14 @@ expand_appendrel_subquery(PlannerInfo *root, RelOptInfo *rel, /* Child may itself be an inherited rel, either table or subquery. */ if (childrte->inh) expand_inherited_rtentry(root, childrel, childrte, childRTindex); + + /* + * A parent relation's partitioned_rels must be a superset of the sets + * of all its children, direct or indirect, so bubble up the child + * rel's set. + */ + rel->partitioned_rels = bms_add_members(rel->partitioned_rels, + childrel->partitioned_rels); } } diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 520409f4ba..1d082a8fdd 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -361,6 +361,10 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) } } + /* A partitioned appendrel. */ + if (rel->part_scheme != NULL) + rel->partitioned_rels = bms_copy(rel->relids); + /* Save the finished struct in the query's simple_rel_array */ root->simple_rel_array[relid] = rel; @@ -729,6 +733,14 @@ build_join_rel(PlannerInfo *root, set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, sjinfo, restrictlist); + /* + * The joinrel may get processed as an appendrel via partitionwise join + * if both outer and inner rels are partitioned, so set partitioned_rels + * appropriately. + */ + joinrel->partitioned_rels = bms_union(outer_rel->partitioned_rels, + inner_rel->partitioned_rels); + /* * Set the consider_parallel flag if this joinrel could potentially be * scanned within a parallel worker. If this flag is false for either @@ -897,6 +909,14 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, sjinfo, restrictlist); + /* + * The joinrel may get processed as an appendrel via partitionwise join + * if both outer and inner rels are partitioned, so set partitioned_rels + * appropriately. + */ + joinrel->partitioned_rels = bms_union(outer_rel->partitioned_rels, + inner_rel->partitioned_rels); + /* We build the join only once. */ Assert(!find_join_rel(root, joinrel->relids)); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 1f3845b3fe..5327d9ba8b 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -130,6 +130,11 @@ typedef struct PlannerGlobal char maxParallelHazard; /* worst PROPARALLEL hazard level */ PartitionDirectory partition_directory; /* partition descriptors */ + + Bitmapset *elidedAppendPartedRels; /* Combined partitioned_rels of all + * single-subplan [Merge]Append nodes + * that have been removed fron the + * various plan trees. */ } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ @@ -773,6 +778,23 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ + + /* + * For an appendrel parent relation (base, join, or upper) that is + * partitioned, this stores the RT indexes of all the paritioned ancestors + * including itself that lead up to the individual leaf partitions that + * will be scanned to produce this relation's output rows. The relid set + * is copied into the resulting Append or MergeAppend plan node for + * allowing the executor to take appropriate locks on those relations, + * unless the node is deemed useless in setrefs.c due to having a single + * leaf subplan and thus elided from the final plan, in which case, the set + * is added into PlannerGlobal.elidedAppendPartedRels. + * + * Note that 'apprelids' of those nodes only contains the top-level base + * relation(s), so is not sufficient for said purpose. + */ + + Bitmapset *partitioned_rels; } RelOptInfo; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 0b518ce6b2..bd87c35d6c 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -85,6 +85,11 @@ typedef struct PlannedStmt Node *utilityStmt; /* non-null if this is utility stmt */ + Bitmapset *elidedAppendPartedRels; /* Combined partitioned_rels of all + * single-subplan [Merge]Append nodes + * that have been removed from the + * various plan trees. */ + /* statement location in source string (copied from Query) */ int stmt_location; /* start location, or -1 if unknown */ int stmt_len; /* length in bytes; 0 means "rest of string" */ @@ -261,6 +266,12 @@ typedef struct Append /* Info for run-time subplan pruning; NULL if we're not doing that */ struct PartitionPruneInfo *part_prune_info; + + /* + * RT indexes of all partitioned parents whose partitions' plans are + * present in appendplans. + */ + Bitmapset *partitioned_rels; } Append; /* ---------------- @@ -281,6 +292,12 @@ typedef struct MergeAppend bool *nullsFirst; /* NULLS FIRST/LAST directions */ /* Info for run-time subplan pruning; NULL if we're not doing that */ struct PartitionPruneInfo *part_prune_info; + + /* + * RT indexes of all partitioned parents whose partitions' plans are + * present in appendplans. + */ + Bitmapset *partitioned_rels; } MergeAppend; /* ---------------- -- 2.24.1