From 8b99146c9b8c4826e1434d3f006597681c24cd45 Mon Sep 17 00:00:00 2001 From: amitlan Date: Thu, 24 Mar 2022 22:47:03 +0900 Subject: [PATCH v8 2/4] Add [Merge]Append.partitioned_rels To record the RT indexes of all partitioned ancestors leading up to leaf partitions that are appended by the node. If a given [Merge]Append node is left out from the plan due to there being only one element in its list of child subplans, then its partitioned_rels set is added to PlannerGlobal.elidedAppendPartedRels that is passed down to the executor through PlannedStmt. There are no users for partitioned_rels and elidedAppendPartedRels as of this commit, though a later commit will require the ability to extract the set of relations that must be locked to make a plan tree safe for execution by walking the plan tree itself, so having the partitioned tables be also present in the plan tree will be helpful. Note that currently the executor relies on the fact that the set of relations to be locked can be obtained by simply scanning the range table that's made available in PlannedStmt along with the plan tree. --- src/backend/nodes/copyfuncs.c | 3 +++ src/backend/nodes/outfuncs.c | 5 +++++ src/backend/nodes/readfuncs.c | 3 +++ src/backend/optimizer/path/joinrels.c | 9 ++++++++ src/backend/optimizer/plan/createplan.c | 18 +++++++++++++++- src/backend/optimizer/plan/planner.c | 8 +++++++ src/backend/optimizer/plan/setrefs.c | 28 +++++++++++++++++++++++++ src/backend/optimizer/util/inherit.c | 16 ++++++++++++++ src/backend/optimizer/util/relnode.c | 20 ++++++++++++++++++ src/include/nodes/pathnodes.h | 22 +++++++++++++++++++ src/include/nodes/plannodes.h | 17 +++++++++++++++ 11 files changed, 148 insertions(+), 1 deletion(-) diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 56505557bf..29c515d7db 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -106,6 +106,7 @@ _copyPlannedStmt(const PlannedStmt *from) COPY_NODE_FIELD(invalItems); COPY_NODE_FIELD(paramExecTypes); COPY_NODE_FIELD(utilityStmt); + COPY_BITMAPSET_FIELD(elidedAppendPartedRels); COPY_LOCATION_FIELD(stmt_location); COPY_SCALAR_FIELD(stmt_len); @@ -254,6 +255,7 @@ _copyAppend(const Append *from) COPY_SCALAR_FIELD(nasyncplans); COPY_SCALAR_FIELD(first_partial_plan); COPY_NODE_FIELD(part_prune_info); + COPY_BITMAPSET_FIELD(partitioned_rels); return newnode; } @@ -282,6 +284,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); COPY_NODE_FIELD(part_prune_info); + COPY_BITMAPSET_FIELD(partitioned_rels); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 6e39590730..108ede9af9 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -324,6 +324,7 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node) WRITE_NODE_FIELD(invalItems); WRITE_NODE_FIELD(paramExecTypes); WRITE_NODE_FIELD(utilityStmt); + WRITE_BITMAPSET_FIELD(elidedAppendPartedRels); WRITE_LOCATION_FIELD(stmt_location); WRITE_INT_FIELD(stmt_len); } @@ -444,6 +445,7 @@ _outAppend(StringInfo str, const Append *node) WRITE_INT_FIELD(nasyncplans); WRITE_INT_FIELD(first_partial_plan); WRITE_NODE_FIELD(part_prune_info); + WRITE_BITMAPSET_FIELD(partitioned_rels); } static void @@ -461,6 +463,7 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) WRITE_OID_ARRAY(collations, node->numCols); WRITE_BOOL_ARRAY(nullsFirst, node->numCols); WRITE_NODE_FIELD(part_prune_info); + WRITE_BITMAPSET_FIELD(partitioned_rels); } static void @@ -2404,6 +2407,7 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node) WRITE_BOOL_FIELD(parallelModeOK); WRITE_BOOL_FIELD(parallelModeNeeded); WRITE_CHAR_FIELD(maxParallelHazard); + WRITE_BITMAPSET_FIELD(elidedAppendPartedRels); } static void @@ -2515,6 +2519,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_BOOL_FIELD(partbounds_merged); WRITE_BITMAPSET_FIELD(live_parts); WRITE_BITMAPSET_FIELD(all_partrels); + WRITE_BITMAPSET_FIELD(partitioned_rels); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index c94b2561f0..ce146dd45e 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1794,6 +1794,7 @@ _readPlannedStmt(void) READ_NODE_FIELD(invalItems); READ_NODE_FIELD(paramExecTypes); READ_NODE_FIELD(utilityStmt); + READ_BITMAPSET_FIELD(elidedAppendPartedRels); READ_LOCATION_FIELD(stmt_location); READ_INT_FIELD(stmt_len); @@ -1917,6 +1918,7 @@ _readAppend(void) READ_INT_FIELD(nasyncplans); READ_INT_FIELD(first_partial_plan); READ_NODE_FIELD(part_prune_info); + READ_BITMAPSET_FIELD(partitioned_rels); READ_DONE(); } @@ -1939,6 +1941,7 @@ _readMergeAppend(void) READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); READ_NODE_FIELD(part_prune_info); + READ_BITMAPSET_FIELD(partitioned_rels); READ_DONE(); } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 9da3ff2f9a..e74d40fee3 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1549,6 +1549,15 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, populate_joinrel_with_paths(root, child_rel1, child_rel2, child_joinrel, child_sjinfo, child_restrictlist); + + /* + * A parent relation's partitioned_rels must be a superset of the sets + * of all its children, direct or indirect, so bubble up the child + * joinrel's set. + */ + joinrel->partitioned_rels = + bms_add_members(joinrel->partitioned_rels, + child_joinrel->partitioned_rels); } } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 179c87c671..99868a1a79 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -26,10 +26,12 @@ #include "nodes/extensible.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/appendinfo.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/optimizer.h" #include "optimizer/paramassign.h" +#include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/placeholder.h" #include "optimizer/plancat.h" @@ -1332,11 +1334,11 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) best_path->subpaths, prunequal); } - plan->appendplans = subplans; plan->nasyncplans = nasyncplans; plan->first_partial_plan = best_path->first_partial_path; plan->part_prune_info = partpruneinfo; + plan->partitioned_rels = bms_copy(rel->partitioned_rels); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1500,6 +1502,20 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, node->mergeplans = subplans; node->part_prune_info = partpruneinfo; + /* + * We need to explicitly add to the plan node the RT indexes of any + * partitioned tables whose partitions will be scanned by the nodes in + * 'subplans'. There can be multiple RT indexes in the set due to the + * partition tree being multi-level and/or this being a plan for UNION ALL + * over multiple partition trees. Along with scanrelids of leaf-level Scan + * nodes, this allows the executor to lock the full set of relations being + * scanned by this node. + * + * Note that 'apprelids' only contains the top-level base relation(s), so + * is not sufficient for the purpose. + */ + node->partitioned_rels = bms_copy(rel->partitioned_rels); + /* * If prepare_sort_from_pathkeys added sort columns, but we were told to * produce either the exact tlist or a narrow tlist, we should get rid of diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b2569c5d0c..c769b4b4b9 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -529,6 +529,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->paramExecTypes = glob->paramExecTypes; /* utilityStmt should be null, but we might as well copy it */ result->utilityStmt = parse->utilityStmt; + result->elidedAppendPartedRels = glob->elidedAppendPartedRels; result->stmt_location = parse->stmt_location; result->stmt_len = parse->stmt_len; @@ -7534,6 +7535,13 @@ create_partitionwise_grouping_paths(PlannerInfo *root, add_paths_to_append_rel(root, grouped_rel, grouped_live_children); } + + /* + * Input rel might be a partitioned appendrel, though grouped_rel has at + * this point taken its role as the an appendrel owning the former's + * children, so copy the former's partitioned_rels set into the latter. + */ + grouped_rel->partitioned_rels = bms_copy(input_rel->partitioned_rels); } /* diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index bf4c722c02..8214edec54 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1574,6 +1574,10 @@ set_append_references(PlannerInfo *root, lfirst(l) = set_plan_refs(root, (Plan *) lfirst(l), rtoffset); } + /* Fix up partitioned_rels before possibly removing the Append below. */ + aplan->partitioned_rels = offset_relid_set(aplan->partitioned_rels, + rtoffset); + /* * See if it's safe to get rid of the Append entirely. For this to be * safe, there must be only one child plan and that child plan's parallel @@ -1584,8 +1588,17 @@ set_append_references(PlannerInfo *root, */ if (list_length(aplan->appendplans) == 1 && ((Plan *) linitial(aplan->appendplans))->parallel_aware == aplan->plan.parallel_aware) + { + /* + * Partitioned table involved, if any, must be made known to the + * executor. + */ + root->glob->elidedAppendPartedRels = + bms_add_members(root->glob->elidedAppendPartedRels, + aplan->partitioned_rels); return clean_up_removed_plan_level((Plan *) aplan, (Plan *) linitial(aplan->appendplans)); + } /* * Otherwise, clean up the Append as needed. It's okay to do this after @@ -1646,6 +1659,12 @@ set_mergeappend_references(PlannerInfo *root, lfirst(l) = set_plan_refs(root, (Plan *) lfirst(l), rtoffset); } + /* + * Fix up partitioned_rels before possibly removing the MergeAppend below. + */ + mplan->partitioned_rels = offset_relid_set(mplan->partitioned_rels, + rtoffset); + /* * See if it's safe to get rid of the MergeAppend entirely. For this to * be safe, there must be only one child plan and that child plan's @@ -1656,8 +1675,17 @@ set_mergeappend_references(PlannerInfo *root, */ if (list_length(mplan->mergeplans) == 1 && ((Plan *) linitial(mplan->mergeplans))->parallel_aware == mplan->plan.parallel_aware) + { + /* + * Partitioned tables involved, if any, must be made known to the + * executor. + */ + root->glob->elidedAppendPartedRels = + bms_add_members(root->glob->elidedAppendPartedRels, + mplan->partitioned_rels); return clean_up_removed_plan_level((Plan *) mplan, (Plan *) linitial(mplan->mergeplans)); + } /* * Otherwise, clean up the MergeAppend as needed. It's okay to do this diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index 7e134822f3..56912e4101 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -406,6 +406,14 @@ expand_partitioned_rtentry(PlannerInfo *root, RelOptInfo *relinfo, childrte, childRTindex, childrel, top_parentrc, lockmode); + /* + * A parent relation's partitioned_rels must be a superset of the sets + * of all its children, direct or indirect, so bubble up the child + * rel's set. + */ + relinfo->partitioned_rels = bms_add_members(relinfo->partitioned_rels, + childrelinfo->partitioned_rels); + /* Close child relation, but keep locks */ table_close(childrel, NoLock); } @@ -737,6 +745,14 @@ expand_appendrel_subquery(PlannerInfo *root, RelOptInfo *rel, /* Child may itself be an inherited rel, either table or subquery. */ if (childrte->inh) expand_inherited_rtentry(root, childrel, childrte, childRTindex); + + /* + * A parent relation's partitioned_rels must be a superset of the sets + * of all its children, direct or indirect, so bubble up the child + * rel's set. + */ + rel->partitioned_rels = bms_add_members(rel->partitioned_rels, + childrel->partitioned_rels); } } diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 520409f4ba..1d082a8fdd 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -361,6 +361,10 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) } } + /* A partitioned appendrel. */ + if (rel->part_scheme != NULL) + rel->partitioned_rels = bms_copy(rel->relids); + /* Save the finished struct in the query's simple_rel_array */ root->simple_rel_array[relid] = rel; @@ -729,6 +733,14 @@ build_join_rel(PlannerInfo *root, set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, sjinfo, restrictlist); + /* + * The joinrel may get processed as an appendrel via partitionwise join + * if both outer and inner rels are partitioned, so set partitioned_rels + * appropriately. + */ + joinrel->partitioned_rels = bms_union(outer_rel->partitioned_rels, + inner_rel->partitioned_rels); + /* * Set the consider_parallel flag if this joinrel could potentially be * scanned within a parallel worker. If this flag is false for either @@ -897,6 +909,14 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel, sjinfo, restrictlist); + /* + * The joinrel may get processed as an appendrel via partitionwise join + * if both outer and inner rels are partitioned, so set partitioned_rels + * appropriately. + */ + joinrel->partitioned_rels = bms_union(outer_rel->partitioned_rels, + inner_rel->partitioned_rels); + /* We build the join only once. */ Assert(!find_join_rel(root, joinrel->relids)); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 6cbcb67bdf..ef9b54739a 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -130,6 +130,11 @@ typedef struct PlannerGlobal char maxParallelHazard; /* worst PROPARALLEL hazard level */ PartitionDirectory partition_directory; /* partition descriptors */ + + Bitmapset *elidedAppendPartedRels; /* Combined partitioned_rels of all + * single-subplan [Merge]Append nodes + * that have been removed fron the + * various plan trees. */ } PlannerGlobal; /* macro for fetching the Plan associated with a SubPlan node */ @@ -773,6 +778,23 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ + + /* + * For an appendrel parent relation (base, join, or upper) that is + * partitioned, this stores the RT indexes of all the paritioned ancestors + * including itself that lead up to the individual leaf partitions that + * will be scanned to produce this relation's output rows. The relid set + * is copied into the resulting Append or MergeAppend plan node for + * allowing the executor to take appropriate locks on those relations, + * unless the node is deemed useless in setrefs.c due to having a single + * leaf subplan and thus elided from the final plan, in which case, the set + * is added into PlannerGlobal.elidedAppendPartedRels. + * + * Note that 'apprelids' of those nodes only contains the top-level base + * relation(s), so is not sufficient for said purpose. + */ + + Bitmapset *partitioned_rels; } RelOptInfo; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 50ef3dda05..a823c7c20d 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -86,6 +86,11 @@ typedef struct PlannedStmt Node *utilityStmt; /* non-null if this is utility stmt */ + Bitmapset *elidedAppendPartedRels; /* Combined partitioned_rels of all + * single-subplan [Merge]Append nodes + * that have been removed from the + * various plan trees. */ + /* statement location in source string (copied from Query) */ int stmt_location; /* start location, or -1 if unknown */ int stmt_len; /* length in bytes; 0 means "rest of string" */ @@ -264,6 +269,12 @@ typedef struct Append /* Info for run-time subplan pruning; NULL if we're not doing that */ struct PartitionPruneInfo *part_prune_info; + + /* + * RT indexes of all partitioned parents whose partitions' plans are + * present in appendplans. + */ + Bitmapset *partitioned_rels; } Append; /* ---------------- @@ -284,6 +295,12 @@ typedef struct MergeAppend bool *nullsFirst; /* NULLS FIRST/LAST directions */ /* Info for run-time subplan pruning; NULL if we're not doing that */ struct PartitionPruneInfo *part_prune_info; + + /* + * RT indexes of all partitioned parents whose partitions' plans are + * present in appendplans. + */ + Bitmapset *partitioned_rels; } MergeAppend; /* ---------------- -- 2.24.1