From ea9cd0625c7c8741f8655f08750ea80b81fac018 Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Fri, 6 Apr 2018 22:30:19 +1200 Subject: [PATCH v21 5/5] Improve planning speed of partitioned table UPDATE/DELETEs By making a call to grouping_planner for the complete parse of the query we can make use of the faster partition pruning code used there. This will identify all partitions which could be pruned as IS_DUMMY_RELs, of which we can skip performing each individual grouping_planner call inside inheritance_planner. This can improve planner performance significantly when there are many partitions. There may be a slight slowdown when no partitions could be pruned or when there are very few (1 or 2) partitions. However it seems better to optimize the case when partitions are pruned, rather than the case where they're not, as those queries are less likely to be fast to execute. The case for partitioned tables with just 1 or 2 leaf partitions does not seem worth worrying about too much. The measured regression on 1 partition was just 10% of overall planning time. This commit also implements run-time partition pruning for UPDATE/DELETE. --- src/backend/commands/explain.c | 4 +- src/backend/executor/execMerge.c | 6 +- src/backend/executor/execPartition.c | 18 +-- src/backend/executor/nodeModifyTable.c | 217 +++++++++++++++++++++----- src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/outfuncs.c | 1 + src/backend/nodes/readfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 36 ++++- src/backend/optimizer/plan/planner.c | 59 +++++++ src/backend/optimizer/util/partprune.c | 40 +++-- src/include/nodes/execnodes.h | 13 +- src/include/nodes/plannodes.h | 2 + src/include/optimizer/partprune.h | 3 +- src/test/regress/expected/partition_prune.out | 110 +++++++++++++ src/test/regress/sql/partition_prune.sql | 34 ++++ 15 files changed, 472 insertions(+), 73 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 549622da93..12c933056d 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3028,14 +3028,14 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, /* Should we explicitly label target relations? */ labeltargets = (mtstate->mt_nplans > 1 || (mtstate->mt_nplans == 1 && - mtstate->resultRelInfo->ri_RangeTableIndex != node->nominalRelation)); + mtstate->resultRelInfos[0]->ri_RangeTableIndex != node->nominalRelation)); if (labeltargets) ExplainOpenGroup("Target Tables", "Target Tables", false, es); for (j = 0; j < mtstate->mt_nplans; j++) { - ResultRelInfo *resultRelInfo = mtstate->resultRelInfo + j; + ResultRelInfo *resultRelInfo = mtstate->resultRelInfos[j]; FdwRoutine *fdwroutine = resultRelInfo->ri_FdwRoutine; if (labeltargets) diff --git a/src/backend/executor/execMerge.c b/src/backend/executor/execMerge.c index d39ddd3034..7ff1bfc96b 100644 --- a/src/backend/executor/execMerge.c +++ b/src/backend/executor/execMerge.c @@ -203,8 +203,8 @@ ExecMergeMatched(ModifyTableState *mtstate, EState *estate, if (resultRelInfo == NULL) { resultRelInfo = ExecInitPartitionInfo(mtstate, - mtstate->resultRelInfo, - proute, estate, leaf_part_index); + mtstate->resultRelInfos[0], + proute, estate, leaf_part_index); Assert(resultRelInfo != NULL); } } @@ -500,7 +500,7 @@ ExecMergeNotMatched(ModifyTableState *mtstate, EState *estate, * the currently active result relation, which corresponds to the root * of the partition tree. */ - resultRelInfo = mtstate->resultRelInfo; + resultRelInfo = mtstate->resultRelInfos[0]; /* * For INSERT actions, root relation's merge action is OK since the diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 6cfd026474..f76edd3523 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -101,7 +101,7 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) if (is_update) { - update_rri = mtstate->resultRelInfo; + update_rri = mtstate->resultRelInfos[0]; num_update_rri = list_length(node->plans); proute->subplan_partition_offsets = palloc(num_update_rri * sizeof(int)); @@ -421,8 +421,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, List *wcoList; List *wcoExprs = NIL; ListCell *ll; - int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; - Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + int firstVarno = mtstate->resultRelInfos[0]->ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfos[0]->ri_RelationDesc; /* * In the case of INSERT on a partitioned table, there is only one @@ -477,8 +477,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, TupleTableSlot *slot; ExprContext *econtext; List *returningList; - int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; - Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + int firstVarno = mtstate->resultRelInfos[0]->ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfos[0]->ri_RelationDesc; /* See the comment above for WCO lists. */ Assert((node->operation == CMD_INSERT && @@ -533,8 +533,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, if (node && node->onConflictAction != ONCONFLICT_NONE) { TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx]; - int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; - Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + int firstVarno = mtstate->resultRelInfos[0]->ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfos[0]->ri_RelationDesc; TupleDesc partrelDesc = RelationGetDescr(partrel); ExprContext *econtext = mtstate->ps.ps_ExprContext; ListCell *lc; @@ -674,8 +674,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, { TupleDesc partrelDesc = RelationGetDescr(partrel); TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx]; - int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; - Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + int firstVarno = mtstate->resultRelInfos[0]->ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfos[0]->ri_RelationDesc; /* * If the root parent and partition have the same tuple diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 0ebf37bd24..7f5d7a1bf4 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -54,6 +54,9 @@ #include "utils/rel.h" #include "utils/tqual.h" +/* Special values for mt_whichplan */ +#define WHICHPLAN_CHOOSE_PARTITIONS -1 +#define WHICHPLAN_NO_MATCHING_PARTITIONS -2 static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, @@ -1214,12 +1217,12 @@ lreplace:; map_index = resultRelInfo->ri_PartitionLeafIndex; Assert(mtstate->rootResultRelInfo == NULL); tupconv_map = TupConvMapForLeaf(proute, - mtstate->resultRelInfo, - map_index); + mtstate->resultRelInfos[0], + map_index); } else { - map_index = resultRelInfo - mtstate->resultRelInfo; + map_index = mtstate->mt_whichplan; Assert(map_index >= 0 && map_index < mtstate->mt_nplans); tupconv_map = tupconv_map_for_subplan(mtstate, map_index); } @@ -1632,12 +1635,12 @@ static void fireBSTriggers(ModifyTableState *node) { ModifyTable *plan = (ModifyTable *) node->ps.plan; - ResultRelInfo *resultRelInfo = node->resultRelInfo; + ResultRelInfo *resultRelInfo = node->resultRelInfos[0]; /* * If the node modifies a partitioned table, we must fire its triggers. - * Note that in that case, node->resultRelInfo points to the first leaf - * partition, not the root table. + * Note that in that case, node->resultRelInfos[0] points to the first + * leaf partition, not the root table. */ if (node->rootResultRelInfo != NULL) resultRelInfo = node->rootResultRelInfo; @@ -1683,13 +1686,14 @@ static ResultRelInfo * getTargetResultRelInfo(ModifyTableState *node) { /* - * Note that if the node modifies a partitioned table, node->resultRelInfo - * points to the first leaf partition, not the root table. + * Note that if the node modifies a partitioned table, + * node->resultRelInfos[0] points to the first leaf partition, not the + * root table. */ if (node->rootResultRelInfo != NULL) return node->rootResultRelInfo; else - return node->resultRelInfo; + return node->resultRelInfos[0]; } /* @@ -1910,7 +1914,7 @@ static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate) { ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate); - ResultRelInfo *resultRelInfos = mtstate->resultRelInfo; + ResultRelInfo **resultRelInfos = mtstate->resultRelInfos; TupleDesc outdesc; int numResultRelInfos = mtstate->mt_nplans; int i; @@ -1941,7 +1945,7 @@ ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate) for (i = 0; i < numResultRelInfos; ++i) { mtstate->mt_per_subplan_tupconv_maps[i] = - convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc), + convert_tuples_by_name(RelationGetDescr(resultRelInfos[i]->ri_RelationDesc), outdesc, gettext_noop("could not convert row type")); } @@ -2079,8 +2083,47 @@ ExecModifyTable(PlanState *pstate) node->fireBSTriggers = false; } + if (node->mt_whichplan < 0) + { + /* Handle choosing the valid subpartitions */ + if (node->mt_whichplan == WHICHPLAN_CHOOSE_PARTITIONS) + { + PartitionPruning *pprune = node->partition_pruning; + + /* There should always be at least one */ + Assert(node->mt_nplans > 0); + + /* + * When partition pruning is enabled and exec params match the + * partition key then determine the minimum set of matching + * subnodes. Otherwise we match to all subnodes. + */ + if (pprune != NULL && !bms_is_empty(pprune->execparams)) + { + node->mt_valid_subplans = ExecFindMatchingSubPlans(pprune); + node->mt_whichplan = bms_next_member(node->mt_valid_subplans, -1); + + /* If no subplan matches these params then we're done */ + if (node->mt_whichplan < 0) + goto done; + } + else + { + node->mt_valid_subplans = bms_add_range(NULL, 0, + node->mt_nplans - 1); + node->mt_whichplan = 0; + } + } + + /* partition pruning determined that no partitions match */ + else if (node->mt_whichplan == WHICHPLAN_NO_MATCHING_PARTITIONS) + goto done; + else + elog(ERROR, "invalid subplan index: %d", node->mt_whichplan); + } + /* Preload local variables */ - resultRelInfo = node->resultRelInfo + node->mt_whichplan; + resultRelInfo = node->resultRelInfos[node->mt_whichplan]; subplanstate = node->mt_plans[node->mt_whichplan]; junkfilter = resultRelInfo->ri_junkFilter; @@ -2114,11 +2157,12 @@ ExecModifyTable(PlanState *pstate) if (TupIsNull(planSlot)) { /* advance to next subplan if any */ - node->mt_whichplan++; + node->mt_whichplan = bms_next_member(node->mt_valid_subplans, + node->mt_whichplan); - if (node->mt_whichplan < node->mt_nplans) + if (node->mt_whichplan >= 0) { - resultRelInfo++; + resultRelInfo = node->resultRelInfos[node->mt_whichplan]; subplanstate = node->mt_plans[node->mt_whichplan]; junkfilter = resultRelInfo->ri_junkFilter; estate->es_result_relation_info = resultRelInfo; @@ -2285,6 +2329,8 @@ ExecModifyTable(PlanState *pstate) /* Restore es_result_relation_info before exiting */ estate->es_result_relation_info = saved_resultRelInfo; +done: + /* * We're done, but fire AFTER STATEMENT triggers before exiting. */ @@ -2309,9 +2355,11 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ResultRelInfo *resultRelInfo; Plan *subplan; ListCell *l; - int i; + int i, + j; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; + Bitmapset *validsubplans; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2327,9 +2375,73 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->operation = operation; mtstate->canSetTag = node->canSetTag; mtstate->mt_done = false; + mtstate->mt_whichplan = WHICHPLAN_CHOOSE_PARTITIONS; + + /* If run-time partition pruning is enabled, then setup that up now */ + if (node->part_prune_infos != NIL) + { + PartitionPruning *pprune; + + ExecAssignExprContext(estate, &mtstate->ps); + + pprune = ExecSetupPartitionPruning(&mtstate->ps, + node->part_prune_infos); + + /* + * When there are external params matching the partition key we may be + * able to prune away ModifyTable plans. + */ + if (!bms_is_empty(pprune->extparams)) + { + /* Determine which subplans match the external params */ + validsubplans = ExecFindInitialMatchingSubPlans(pprune, + list_length(node->plans)); + + /* + * If no plans match the given parameters then we must handle this + * case in a special way. The problem here is that code in + * explain.c requires a ModifyTable to have at least one plan in + * order for it to properly determine the Vars in that plan's + * targetlist. We sidestep this issue by just initializing the + * first subplan and set a special value for mt_whichplan to cause + * it to never actually scan any subnodes. We can't simply set + * mt_done as we still need to fire any statement level triggers. + */ + if (bms_is_empty(validsubplans)) + { + mtstate->mt_whichplan = WHICHPLAN_NO_MATCHING_PARTITIONS; + + /* Mark the first as valid so that it's initialized below */ + validsubplans = bms_make_singleton(0); + } + + nplans = bms_num_members(validsubplans); + } + else + { + /* We'll need to initialize all subplans */ + nplans = list_length(node->plans); + validsubplans = bms_add_range(NULL, 0, nplans - 1); + } + + mtstate->partition_pruning = pprune; + } + else + { + nplans = list_length(node->plans); + + /* + * When run-time partition pruning is not enabled we can just mark all + * plans as valid, they must also all be initialized. + */ + validsubplans = bms_add_range(NULL, 0, nplans - 1); + mtstate->partition_pruning = NULL; + } + mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans); - mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex; + mtstate->resultRelInfos = (ResultRelInfo **) + palloc(sizeof(ResultRelInfo *) * nplans); /* If modifying a partitioned table, initialize the root table info */ if (node->rootResultRelIndex >= 0) @@ -2353,8 +2465,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ saved_resultRelInfo = estate->es_result_relation_info; - resultRelInfo = mtstate->resultRelInfo; - /* * mergeTargetRelation must be set if we're running MERGE and mustn't be * set if we're not. @@ -2362,13 +2472,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) Assert(operation != CMD_MERGE || node->mergeTargetRelation > 0); Assert(operation == CMD_MERGE || node->mergeTargetRelation == 0); - resultRelInfo->ri_mergeTargetRTI = node->mergeTargetRelation; - - i = 0; + j = i = 0; foreach(l, node->plans) { + if (!bms_is_member(i, validsubplans)) + { + i++; + continue; + } + subplan = (Plan *) lfirst(l); + resultRelInfo = estate->es_result_relations + node->resultRelIndex + i; + mtstate->resultRelInfos[j] = resultRelInfo; + /* Initialize the usesFdwDirectModify flag */ resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i, node->fdwDirectModifyPlans); @@ -2405,7 +2522,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* Now init the plan for this result rel */ estate->es_result_relation_info = resultRelInfo; - mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags); + mtstate->mt_plans[j] = ExecInitNode(subplan, estate, eflags); /* Also let FDWs init themselves for foreign-table result rels */ if (!resultRelInfo->ri_usesFdwDirectModify && @@ -2421,10 +2538,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) eflags); } - resultRelInfo++; i++; + j++; } + mtstate->resultRelInfos[0]->ri_mergeTargetRTI = node->mergeTargetRelation; + estate->es_result_relation_info = saved_resultRelInfo; /* Get the target relation */ @@ -2477,26 +2596,34 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* * Initialize any WITH CHECK OPTION constraints if needed. */ - resultRelInfo = mtstate->resultRelInfo; - i = 0; + j = i = 0; foreach(l, node->withCheckOptionLists) { - List *wcoList = (List *) lfirst(l); + List *wcoList; List *wcoExprs = NIL; ListCell *ll; + if (!bms_is_member(i, validsubplans)) + { + i++; + continue; + } + + wcoList = (List *) lfirst(l); + foreach(ll, wcoList) { WithCheckOption *wco = (WithCheckOption *) lfirst(ll); ExprState *wcoExpr = ExecInitQual((List *) wco->qual, - mtstate->mt_plans[i]); + mtstate->mt_plans[j]); wcoExprs = lappend(wcoExprs, wcoExpr); } - + resultRelInfo = mtstate->resultRelInfos[j]; resultRelInfo->ri_WithCheckOptions = wcoList; resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; - resultRelInfo++; + + j++; i++; } @@ -2526,15 +2653,25 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* * Build a projection for each result rel. */ - resultRelInfo = mtstate->resultRelInfo; + j = i = 0; foreach(l, node->returningLists) { - List *rlist = (List *) lfirst(l); + List *rlist; + + if (!bms_is_member(i, validsubplans)) + { + i++; + continue; + } + rlist = (List *) lfirst(l); + + resultRelInfo = mtstate->resultRelInfos[j]; resultRelInfo->ri_projectReturning = ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, resultRelInfo->ri_RelationDesc->rd_att); - resultRelInfo++; + j++; + i++; } } else @@ -2545,12 +2682,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ mtstate->ps.plan->targetlist = NIL; ExecInitResultTupleSlotTL(estate, &mtstate->ps); - - mtstate->ps.ps_ExprContext = NULL; } /* Set the list of arbiter indexes if needed for ON CONFLICT */ - resultRelInfo = mtstate->resultRelInfo; + resultRelInfo = mtstate->resultRelInfos[0]; if (node->onConflictAction != ONCONFLICT_NONE) resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes; @@ -2654,12 +2789,11 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } } - resultRelInfo = mtstate->resultRelInfo; + resultRelInfo = mtstate->resultRelInfos[0]; if (mtstate->operation == CMD_MERGE) ExecInitMerge(mtstate, estate, resultRelInfo); /* select first subplan */ - mtstate->mt_whichplan = 0; subplan = (Plan *) linitial(node->plans); EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan, mtstate->mt_arowmarks[0]); @@ -2709,11 +2843,11 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (junk_filter_needed) { - resultRelInfo = mtstate->resultRelInfo; for (i = 0; i < nplans; i++) { JunkFilter *j; + resultRelInfo = mtstate->resultRelInfos[i]; subplan = mtstate->mt_plans[i]->plan; if (operation == CMD_INSERT || operation == CMD_UPDATE) @@ -2766,13 +2900,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } resultRelInfo->ri_junkFilter = j; - resultRelInfo++; } } else { if (operation == CMD_INSERT) - ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc, + ExecCheckPlanOutput(mtstate->resultRelInfos[0]->ri_RelationDesc, subplan->targetlist); } } @@ -2819,7 +2952,7 @@ ExecEndModifyTable(ModifyTableState *node) */ for (i = 0; i < node->mt_nplans; i++) { - ResultRelInfo *resultRelInfo = node->resultRelInfo + i; + ResultRelInfo *resultRelInfo = node->resultRelInfos[i]; if (!resultRelInfo->ri_usesFdwDirectModify && resultRelInfo->ri_FdwRoutine != NULL && diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 57222d8f74..3cd8dcb4f1 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -225,6 +225,7 @@ _copyModifyTable(const ModifyTable *from) COPY_NODE_FIELD(exclRelTlist); COPY_NODE_FIELD(mergeSourceTargetList); COPY_NODE_FIELD(mergeActionList); + COPY_NODE_FIELD(part_prune_infos); return newnode; } diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 83e67627ad..02ec025953 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -393,6 +393,7 @@ _outModifyTable(StringInfo str, const ModifyTable *node) WRITE_NODE_FIELD(exclRelTlist); WRITE_NODE_FIELD(mergeSourceTargetList); WRITE_NODE_FIELD(mergeActionList); + WRITE_NODE_FIELD(part_prune_infos); } static void diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 1667881127..426e825229 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1640,6 +1640,7 @@ _readModifyTable(void) READ_NODE_FIELD(exclRelTlist); READ_NODE_FIELD(mergeSourceTargetList); READ_NODE_FIELD(mergeActionList); + READ_NODE_FIELD(part_prune_infos); READ_DONE(); } diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 093ceaa867..cd75c59496 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -295,7 +295,8 @@ static ModifyTable *make_modifytable(PlannerInfo *root, List *withCheckOptionLists, List *returningLists, List *rowMarks, OnConflictExpr *onconflict, List *mergeSourceTargetList, - List *mergeActionList, int epqParam); + List *mergeActionList, int epqParam, + List *partpruneinfos); static GatherMerge *create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path); @@ -1108,7 +1109,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) */ if (prunequal != NIL) partpruneinfos = make_partition_pruneinfo(root, - best_path->partitioned_rels, + best_path->partitioned_rels, NIL, best_path->subpaths, prunequal); } @@ -1258,7 +1259,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path) */ if (prunequal != NIL) partpruneinfos = make_partition_pruneinfo(root, - best_path->partitioned_rels, + best_path->partitioned_rels, NIL, best_path->subpaths, prunequal); } @@ -2484,6 +2485,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) List *subplans = NIL; ListCell *subpaths, *subroots; + List *partpruneinfos = NIL; /* Build the plan for each input path */ forboth(subpaths, best_path->subpaths, @@ -2512,6 +2514,27 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) subplans = lappend(subplans, subplan); } + if (best_path->partitioned_rels != NIL) + { + int partrelid = linitial_int(best_path->partitioned_rels); + RelOptInfo *rel = root->simple_rel_array[partrelid]; + List *prunequal = NIL; + + prunequal = extract_actual_clauses(rel->baserestrictinfo, false); + + /* + * If any quals exist, then these may be useful to allow us to perform + * further partition pruning during execution. We'll generate a + * PartitionPruneInfo for each partitioned rel to store these quals + * and allow translation of partition indexes into subpath indexes. + */ + if (prunequal != NIL) + partpruneinfos = make_partition_pruneinfo(root, + best_path->partitioned_rels, + best_path->resultRelations, + best_path->subpaths, prunequal); + } + plan = make_modifytable(root, best_path->operation, best_path->canSetTag, @@ -2527,7 +2550,8 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) best_path->onconflict, best_path->mergeSourceTargetList, best_path->mergeActionList, - best_path->epqParam); + best_path->epqParam, + partpruneinfos); copy_generic_path_info(&plan->plan, &best_path->path); @@ -6600,7 +6624,8 @@ make_modifytable(PlannerInfo *root, List *withCheckOptionLists, List *returningLists, List *rowMarks, OnConflictExpr *onconflict, List *mergeSourceTargetList, - List *mergeActionList, int epqParam) + List *mergeActionList, int epqParam, + List *partpruneinfos) { ModifyTable *node = makeNode(ModifyTable); List *fdw_private_list; @@ -6662,6 +6687,7 @@ make_modifytable(PlannerInfo *root, node->mergeSourceTargetList = mergeSourceTargetList; node->mergeActionList = mergeActionList; node->epqParam = epqParam; + node->part_prune_infos = partpruneinfos; /* * For each result relation that is a foreign table, allow the FDW to diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 421dc79cc4..5cfc665347 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1197,6 +1197,7 @@ inheritance_planner(PlannerInfo *root) Query *parent_parse; Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex); PlannerInfo **parent_roots = NULL; + PlannerInfo *partition_root = NULL; Assert(parse->commandType != CMD_INSERT); @@ -1274,6 +1275,32 @@ inheritance_planner(PlannerInfo *root) * the ModifyTable node, if one is needed at all. */ partitioned_relids = bms_make_singleton(top_parentRTindex); + + /* + * For partitioned tables, since we're able to determine the minimum + * set of partitions required much more easily than what we can do + * with an inheritance hierarchy, we invoke the grouping_planner on + * the entire given query in order to determine the minimum set of + * partitions which will be required below. This may mean that we + * invoke the grouping planner far fewer times, as otherwise we'd have + * to invoke it once for each partition. + */ + + /* + * Since the planner tends to scribble on the parse, we must make a + * copy of it. We also must make copies of the PlannerInfo and + * PlannerGlobal since these will also be modified from the call to + * grouping_planner. + */ + partition_root = makeNode(PlannerInfo); + partition_root->glob = makeNode(PlannerGlobal); + + memcpy(partition_root, root, sizeof(PlannerInfo)); + memcpy(partition_root->glob, root->glob, sizeof(PlannerGlobal)); + + partition_root->parse = copyObject(partition_root->parse); + + grouping_planner(partition_root, true, 0.0 /* retrieve all tuples */ ); } /* @@ -1304,6 +1331,21 @@ inheritance_planner(PlannerInfo *root) if (!bms_is_member(appinfo->parent_relid, parent_relids)) continue; + /* + * If the target rel is a partitioned table then skip any child + * partitions which were found to be dummies by the grouping_planner + * call performed above. + */ + if (partition_root) + { + RelOptInfo *rel; + + rel = find_base_rel(partition_root, appinfo->child_relid); + + if (IS_DUMMY_REL(rel)) + continue; + } + /* * expand_inherited_rtentry() always processes a parent before any of * that parent's children, so the parent_root for this relation should @@ -1629,6 +1671,23 @@ inheritance_planner(PlannerInfo *root) Assert(list_length(partitioned_rels) >= 1); } + /* + * The individual grouping_planner calls per partition above performed no + * planning on the actual partitioned tables, however, in order to allow + * partition pruning at run-time we must know the baserestrictinfo of each + * partition. We simply replace the RelOptInfos from the initial full + * plan which was generated and replace the non-complete RelOptInfos which + * are stored in root. + */ + if (partition_root) + { + int i; + + i = -1; + while ((i = bms_next_member(partitioned_relids, i)) >= 0) + root->simple_rel_array[i] = partition_root->simple_rel_array[i]; + } + /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */ add_path(final_rel, (Path *) create_modifytable_path(root, final_rel, diff --git a/src/backend/optimizer/util/partprune.c b/src/backend/optimizer/util/partprune.c index 390efdc7a8..34e3684d4a 100644 --- a/src/backend/optimizer/util/partprune.c +++ b/src/backend/optimizer/util/partprune.c @@ -244,7 +244,10 @@ generate_partition_pruning_steps(RelOptInfo *rel, List *clauses, * * Here we generate partition pruning steps for 'prunequal' and also build a * data stucture which allows mapping of partition indexes into 'subpaths' - * indexes. + * or 'resultRelations' indexes. + * + * If 'resultRelations' is non-NIL, then this List of relids is used to build + * the mapping structures. Otherwise the 'subpaths' List is used. * * If no Params were found to match the partition key in any of the * 'partitioned_rels', then we return NIL. In such a case run-time partition @@ -252,7 +255,8 @@ generate_partition_pruning_steps(RelOptInfo *rel, List *clauses, */ List * make_partition_pruneinfo(PlannerInfo *root, List *partition_rels, - List *subpaths, List *prunequal) + List *resultRelations, List *subpaths, + List *prunequal) { RelOptInfo *targetpart = NULL; ListCell *lc; @@ -269,16 +273,34 @@ make_partition_pruneinfo(PlannerInfo *root, List *partition_rels, relid_subnode_map = palloc0(sizeof(int) * root->simple_rel_array_size); relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size); - i = 1; - foreach(lc, subpaths) + /* + * If 'resultRelations' are present then map these, otherwise we map the + * 'subpaths' List. + */ + if (resultRelations != NIL) + { + i = 1; + foreach(lc, resultRelations) + { + int resultrel = lfirst_int(lc); + + Assert(resultrel < root->simple_rel_array_size); + relid_subnode_map[resultrel] = i++; + } + } + else { - Path *path = (Path *) lfirst(lc); - RelOptInfo *pathrel = path->parent; + i = 1; + foreach(lc, subpaths) + { + Path *path = (Path *) lfirst(lc); + RelOptInfo *pathrel = path->parent; - Assert(IS_SIMPLE_REL(pathrel)); - Assert(pathrel->relid < root->simple_rel_array_size); + Assert(IS_SIMPLE_REL(pathrel)); + Assert(pathrel->relid < root->simple_rel_array_size); - relid_subnode_map[pathrel->relid] = i++; + relid_subnode_map[pathrel->relid] = i++; + } } /* Likewise for the partition_rels */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 1986abaa9c..e37b5da0bf 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1027,6 +1027,8 @@ typedef struct EPQState } EPQState; +struct PartitionPruning; + /* ---------------- * ResultState information * ---------------- @@ -1083,7 +1085,7 @@ typedef struct ModifyTableState PlanState **mt_plans; /* subplans (one per target rel) */ int mt_nplans; /* number of plans in the array */ int mt_whichplan; /* which one is being executed (0..n-1) */ - ResultRelInfo *resultRelInfo; /* per-subplan target relations */ + ResultRelInfo **resultRelInfos; /* per-subplan target relations */ ResultRelInfo *rootResultRelInfo; /* root target relation (partitioned * table root) */ List **mt_arowmarks; /* per-subplan ExecAuxRowMark lists */ @@ -1109,6 +1111,14 @@ typedef struct ModifyTableState /* Flags showing which subcommands are present INS/UPD/DEL/DO NOTHING */ int mt_merge_subcommands; + + /* + * Details required to allow partitions to be eliminated from the scan, or + * NULL if not possible. + */ + struct PartitionPruning *partition_pruning; + Bitmapset *mt_valid_subplans; /* for runtime pruning, valid mt_plans + * indexes to scan. */ } ModifyTableState; /* ---------------- @@ -1130,7 +1140,6 @@ struct AppendState; typedef struct AppendState AppendState; struct ParallelAppendState; typedef struct ParallelAppendState ParallelAppendState; -struct PartitionPruning; struct AppendState { diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index a7dbd31466..c1b6c21f70 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -242,6 +242,8 @@ typedef struct ModifyTable List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ List *mergeSourceTargetList; List *mergeActionList; /* actions for MERGE */ + List *part_prune_infos; /* Mapping details for run-time subplan + * pruning, one per partitioned_rels */ } ModifyTable; /* ---------------- diff --git a/src/include/optimizer/partprune.h b/src/include/optimizer/partprune.h index ff8d18847f..b7352d150c 100644 --- a/src/include/optimizer/partprune.h +++ b/src/include/optimizer/partprune.h @@ -21,6 +21,7 @@ extern List *generate_partition_pruning_steps(RelOptInfo *rel, List *clauses, bool *constfalse); extern List *make_partition_pruneinfo(PlannerInfo *root, List *partition_rels, - List *subpaths, List *prunequal); + List *resultRelations, List *subpaths, + List *prunequal); #endif /* PARTPRUNE_H */ diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 792924fe0b..86d0c9cab6 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2795,4 +2795,114 @@ explain (analyze, costs off, summary off, timing off) select * from ma_test wher reset enable_seqscan; reset enable_sort; +-- +-- Test run-time pruning of ModifyTable subnodes +-- +-- Ensure only ma_test_p3 is scanned. +explain (analyze, costs off, summary off, timing off) delete from ma_test where a = (select 29); + QUERY PLAN +------------------------------------------------------ + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p1 + Delete on ma_test_p2 + Delete on ma_test_p3 + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Seq Scan on ma_test_p1 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p2 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p3 (actual rows=1 loops=1) + Filter: (a = $0) + Rows Removed by Filter: 9 +(13 rows) + +-- Ensure no partitions are scanned. +explain (analyze, costs off, summary off, timing off) delete from ma_test where a = (select 30); + QUERY PLAN +----------------------------------------------- + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p1 + Delete on ma_test_p2 + Delete on ma_test_p3 + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Seq Scan on ma_test_p1 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p2 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p3 (never executed) + Filter: (a = $0) +(12 rows) + +-- Ensure partition pruning works with an update of the partition key. +explain (analyze, costs off, summary off, timing off) update ma_test set a = 29 where a = (select 1); + QUERY PLAN +------------------------------------------------------ + Update on ma_test (actual rows=0 loops=1) + Update on ma_test_p1 + Update on ma_test_p2 + Update on ma_test_p3 + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Seq Scan on ma_test_p1 (actual rows=1 loops=1) + Filter: (a = $0) + Rows Removed by Filter: 9 + -> Seq Scan on ma_test_p2 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p3 (never executed) + Filter: (a = $0) +(13 rows) + +-- Verify the above command +select tableoid::regclass,a from ma_test where a = 29; + tableoid | a +------------+---- + ma_test_p3 | 29 +(1 row) + +truncate ma_test; +prepare mt_q1 (int) as +delete from ma_test where a > $1; +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); + QUERY PLAN +------------------------------------------------------------------------------- + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p2 + Delete on ma_test_p3 + Subplans Pruned: 1 + -> Index Scan using ma_test_p2_a_idx on ma_test_p2 (actual rows=0 loops=1) + Index Cond: (a > $1) + -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=0 loops=1) + Index Cond: (a > $1) +(8 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); + QUERY PLAN +------------------------------------------------------------------------------- + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p3 + Subplans Pruned: 2 + -> Index Scan using ma_test_p3_a_idx on ma_test_p3 (actual rows=0 loops=1) + Index Cond: (a > $1) +(5 rows) + +-- Ensure ModifyTable behaves correctly when no subplans match exec params +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); + QUERY PLAN +------------------------------------------------------------------------ + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p1 + Subplans Pruned: 2 + -> Index Scan using ma_test_p1_a_idx on ma_test_p1 (never executed) + Index Cond: (a > $1) +(5 rows) + drop table ma_test; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 6f389bed1f..73dd6438a2 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -658,4 +658,38 @@ explain (analyze, costs off, summary off, timing off) select * from ma_test wher reset enable_seqscan; reset enable_sort; +-- +-- Test run-time pruning of ModifyTable subnodes +-- + +-- Ensure only ma_test_p3 is scanned. +explain (analyze, costs off, summary off, timing off) delete from ma_test where a = (select 29); + +-- Ensure no partitions are scanned. +explain (analyze, costs off, summary off, timing off) delete from ma_test where a = (select 30); + +-- Ensure partition pruning works with an update of the partition key. +explain (analyze, costs off, summary off, timing off) update ma_test set a = 29 where a = (select 1); + +-- Verify the above command +select tableoid::regclass,a from ma_test where a = 29; + +truncate ma_test; + +prepare mt_q1 (int) as +delete from ma_test where a > $1; + +-- Execute query 5 times to allow choose_custom_plan +-- to start considering a generic plan. +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); +execute mt_q1(0); + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); +-- Ensure ModifyTable behaves correctly when no subplans match exec params +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); + drop table ma_test; -- 2.16.2.windows.1