From 0fe7d6d606bbea6ec520bd960bdd9b6b90da50a1 Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Tue, 26 Jun 2018 23:49:31 +1200 Subject: [PATCH v1] Fix run-time partition pruning for UNION ALL parents The run-time partition pruning code added in 499be013d was unaware that the partition_rels list that's built during add_paths_to_append_rel() could be non-empty for relations other than just partitioned relations. It can also be set for UNION ALL parents where one or more union children are partitioned tables. This can cause the partitioned_rels list to end up with the partition relids from multiple partition hierarchies to become mixed. This commit resolved that issue by never mixing the relids from different UNION ALL children. Instead we maintain a List of Lists containing the partitioned relids. This commit also adds all the new required code in both the planner and executor to allow run-time pruning to work for UNION ALL parents which query multiple partitioned tables. --- src/backend/executor/execPartition.c | 389 +++++++++++++++----------- src/backend/executor/nodeAppend.c | 4 +- src/backend/nodes/copyfuncs.c | 17 +- src/backend/nodes/outfuncs.c | 16 +- src/backend/nodes/readfuncs.c | 15 +- src/backend/optimizer/path/allpaths.c | 14 +- src/backend/optimizer/plan/createplan.c | 54 +++- src/backend/partitioning/partprune.c | 293 ++++++++++++++----- src/include/executor/execPartition.h | 25 +- src/include/nodes/nodes.h | 1 + src/include/nodes/plannodes.h | 39 ++- src/include/partitioning/partprune.h | 3 +- src/test/regress/expected/partition_prune.out | 90 ++++++ src/test/regress/sql/partition_prune.sql | 8 + 14 files changed, 706 insertions(+), 262 deletions(-) diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 7a4665cc4e..c8369fed81 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -48,8 +48,8 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, bool *isnull, int maxfieldlen); static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map); -static void find_matching_subplans_recurse(PartitionPruneState *prunestate, - PartitionPruningData *pprune, +static void find_matching_subplans_recurse(PartitionPruningData *pprune, + PartitionedRelPruningData *prelprune, bool initial_prune, Bitmapset **validsubplans); @@ -1394,34 +1394,44 @@ adjust_partition_tlist(List *tlist, TupleConversionMap *map) * * 'planstate' is the parent plan node's execution state. * - * 'partitionpruneinfo' is a List of PartitionPruneInfos as generated by + * 'partitionpruneinfo' is a PartitionedRelPruneInfo as generated by * make_partition_pruneinfo. Here we build a PartitionPruneState containing a - * PartitionPruningData for each item in that List. This data can be re-used - * each time we re-evaluate which partitions match the pruning steps provided - * in each PartitionPruneInfo. + * PartitionPruningData for each 'prune_infos' in 'partitionpruneinfo', in + * turn, a PartitionedRelPruningData is created for each + * PartitionedRelPruneInfo stored in the 'prune_infos'. This two-level system + * is required in order to support run-time pruning with UNION ALL parents + * containing one or more partitioned tables as children. The data stored in + * each PartitionedRelPruningData can be re-used each time we re-evaluate + * which partitions match the pruning steps provided in each + * PartitionedRelPruneInfo. */ PartitionPruneState * -ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) +ExecCreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *partitionpruneinfo) { PartitionPruneState *prunestate; PartitionPruningData *prunedata; ListCell *lc; - int i; + int n_part_hierarchies; + int i, + j; + + Assert(partitionpruneinfo != NULL); - Assert(partitionpruneinfo != NIL); + n_part_hierarchies = list_length(partitionpruneinfo->prune_infos); /* * Allocate the data structure */ prunestate = (PartitionPruneState *) palloc(sizeof(PartitionPruneState)); prunedata = (PartitionPruningData *) - palloc(sizeof(PartitionPruningData) * list_length(partitionpruneinfo)); + palloc(sizeof(PartitionPruningData) * n_part_hierarchies); prunestate->partprunedata = prunedata; - prunestate->num_partprunedata = list_length(partitionpruneinfo); + prunestate->num_partprunedata = n_part_hierarchies; prunestate->do_initial_prune = false; /* may be set below */ prunestate->do_exec_prune = false; /* may be set below */ prunestate->execparamids = NULL; + prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans); /* * Create a short-term memory context which we'll use when making calls to @@ -1435,113 +1445,128 @@ ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) ALLOCSET_DEFAULT_SIZES); i = 0; - foreach(lc, partitionpruneinfo) + foreach(lc, partitionpruneinfo->prune_infos) { - PartitionPruneInfo *pinfo = castNode(PartitionPruneInfo, lfirst(lc)); - PartitionPruningData *pprune = &prunedata[i]; - PartitionPruneContext *context = &pprune->context; - PartitionDesc partdesc; - PartitionKey partkey; - int partnatts; - int n_steps; ListCell *lc2; + List *partrelpruneinfos = lfirst_node(List, lc); + PartitionedRelPruningData *partrelprunedata; + int npartrelpruneinfos = list_length(partrelpruneinfos); - /* - * We must copy the subplan_map rather than pointing directly to the - * plan's version, as we may end up making modifications to it later. - */ - pprune->subplan_map = palloc(sizeof(int) * pinfo->nparts); - memcpy(pprune->subplan_map, pinfo->subplan_map, - sizeof(int) * pinfo->nparts); - - /* We can use the subpart_map verbatim, since we never modify it */ - pprune->subpart_map = pinfo->subpart_map; + partrelprunedata = palloc(sizeof(PartitionedRelPruningData) * + npartrelpruneinfos); + prunedata[i].partrelprunedata = partrelprunedata; + prunedata[i].num_partrelprunedata = npartrelpruneinfos; - /* present_parts is also subject to later modification */ - pprune->present_parts = bms_copy(pinfo->present_parts); - - /* - * We need to hold a pin on the partitioned table's relcache entry so - * that we can rely on its copies of the table's partition key and - * partition descriptor. We need not get a lock though; one should - * have been acquired already by InitPlan or - * ExecLockNonLeafAppendTables. - */ - context->partrel = relation_open(pinfo->reloid, NoLock); - - partkey = RelationGetPartitionKey(context->partrel); - partdesc = RelationGetPartitionDesc(context->partrel); - n_steps = list_length(pinfo->pruning_steps); - - context->strategy = partkey->strategy; - context->partnatts = partnatts = partkey->partnatts; - context->nparts = pinfo->nparts; - context->boundinfo = partdesc->boundinfo; - context->partcollation = partkey->partcollation; - context->partsupfunc = partkey->partsupfunc; - - /* We'll look up type-specific support functions as needed */ - context->stepcmpfuncs = (FmgrInfo *) - palloc0(sizeof(FmgrInfo) * n_steps * partnatts); - - context->ppccontext = CurrentMemoryContext; - context->planstate = planstate; - - /* Initialize expression state for each expression we need */ - context->exprstates = (ExprState **) - palloc0(sizeof(ExprState *) * n_steps * partnatts); - foreach(lc2, pinfo->pruning_steps) + j = 0; + foreach(lc2, partrelpruneinfos) { - PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc2); + PartitionedRelPruneInfo *pinfo = castNode(PartitionedRelPruneInfo, lfirst(lc2)); + PartitionedRelPruningData *prelprune = &partrelprunedata[j]; + PartitionPruneContext *context = &prelprune->context; + PartitionDesc partdesc; + PartitionKey partkey; + int partnatts; + int n_steps; ListCell *lc3; - int keyno; - /* not needed for other step kinds */ - if (!IsA(step, PartitionPruneStepOp)) - continue; + /* + * We must copy the subplan_map rather than pointing directly to + * the plan's version, as we may end up making modifications to it + * later. + */ + prelprune->subplan_map = palloc(sizeof(int) * pinfo->nparts); + memcpy(prelprune->subplan_map, pinfo->subplan_map, + sizeof(int) * pinfo->nparts); + + /* We can use the subpart_map verbatim, since we never modify it */ + prelprune->subpart_map = pinfo->subpart_map; - Assert(list_length(step->exprs) <= partnatts); + /* present_parts is also subject to later modification */ + prelprune->present_parts = bms_copy(pinfo->present_parts); - keyno = 0; - foreach(lc3, step->exprs) + /* + * We need to hold a pin on the partitioned table's relcache entry + * so that we can rely on its copies of the table's partition key + * and partition descriptor. We need not get a lock though; one + * should have been acquired already by InitPlan or + * ExecLockNonLeafAppendTables. + */ + context->partrel = relation_open(pinfo->reloid, NoLock); + + partkey = RelationGetPartitionKey(context->partrel); + partdesc = RelationGetPartitionDesc(context->partrel); + n_steps = list_length(pinfo->pruning_steps); + + context->strategy = partkey->strategy; + context->partnatts = partnatts = partkey->partnatts; + context->nparts = pinfo->nparts; + context->boundinfo = partdesc->boundinfo; + context->partcollation = partkey->partcollation; + context->partsupfunc = partkey->partsupfunc; + + /* We'll look up type-specific support functions as needed */ + context->stepcmpfuncs = (FmgrInfo *) + palloc0(sizeof(FmgrInfo) * n_steps * partnatts); + + context->ppccontext = CurrentMemoryContext; + context->planstate = planstate; + + /* Initialize expression state for each expression we need */ + context->exprstates = (ExprState **) + palloc0(sizeof(ExprState *) * n_steps * partnatts); + foreach(lc3, pinfo->pruning_steps) { - Expr *expr = (Expr *) lfirst(lc3); + PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc3); + ListCell *lc4; + int keyno; + + /* not needed for other step kinds */ + if (!IsA(step, PartitionPruneStepOp)) + continue; + + Assert(list_length(step->exprs) <= partnatts); - /* not needed for Consts */ - if (!IsA(expr, Const)) + keyno = 0; + foreach(lc4, step->exprs) { - int stateidx = PruneCxtStateIdx(partnatts, - step->step.step_id, - keyno); + Expr *expr = (Expr *) lfirst(lc4); - context->exprstates[stateidx] = - ExecInitExpr(expr, context->planstate); + /* not needed for Consts */ + if (!IsA(expr, Const)) + { + int stateidx = PruneCxtStateIdx(partnatts, + step->step.step_id, + keyno); + + context->exprstates[stateidx] = + ExecInitExpr(expr, context->planstate); + } + keyno++; } - keyno++; } - } - /* Array is not modified at runtime, so just point to plan's copy */ - context->exprhasexecparam = pinfo->hasexecparam; + /* Array is not modified at runtime, so just point to plan's copy */ + context->exprhasexecparam = pinfo->hasexecparam; - pprune->pruning_steps = pinfo->pruning_steps; - pprune->do_initial_prune = pinfo->do_initial_prune; - pprune->do_exec_prune = pinfo->do_exec_prune; + prelprune->pruning_steps = pinfo->pruning_steps; + prelprune->do_initial_prune = pinfo->do_initial_prune; + prelprune->do_exec_prune = pinfo->do_exec_prune; - /* Record if pruning would be useful at any level */ - prunestate->do_initial_prune |= pinfo->do_initial_prune; - prunestate->do_exec_prune |= pinfo->do_exec_prune; + /* Record if pruning would be useful at any level */ + prunestate->do_initial_prune |= pinfo->do_initial_prune; + prunestate->do_exec_prune |= pinfo->do_exec_prune; - /* - * Accumulate the IDs of all PARAM_EXEC Params affecting the - * partitioning decisions at this plan node. - */ - prunestate->execparamids = bms_add_members(prunestate->execparamids, - pinfo->execparamids); + /* + * Accumulate the IDs of all PARAM_EXEC Params affecting the + * partitioning decisions at this plan node. + */ + prunestate->execparamids = bms_add_members(prunestate->execparamids, + pinfo->execparamids); + j++; + } i++; } - return prunestate; } @@ -1555,13 +1580,21 @@ ExecCreatePartitionPruneState(PlanState *planstate, List *partitionpruneinfo) void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate) { - int i; + int i, + j; for (i = 0; i < prunestate->num_partprunedata; i++) { PartitionPruningData *pprune = &prunestate->partprunedata[i]; - relation_close(pprune->context.partrel, NoLock); + for (j = 0; j < pprune->num_partrelprunedata; j++) + { + PartitionedRelPruningData *prelprune; + + prelprune = &pprune->partrelprunedata[j]; + + relation_close(prelprune->context.partrel, NoLock); + } } } @@ -1581,31 +1614,42 @@ ExecDestroyPartitionPruneState(PartitionPruneState *prunestate) Bitmapset * ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) { - PartitionPruningData *pprune; MemoryContext oldcontext; Bitmapset *result = NULL; + int i; Assert(prunestate->do_initial_prune); - pprune = prunestate->partprunedata; - /* * Switch to a temp context to avoid leaking memory in the executor's * memory context. */ oldcontext = MemoryContextSwitchTo(prunestate->prune_context); - /* Perform pruning without using PARAM_EXEC Params */ - find_matching_subplans_recurse(prunestate, pprune, true, &result); + for (i = 0; i < prunestate->num_partprunedata; i++) + { + PartitionPruningData *pprune; + PartitionedRelPruningData *prelprune; + + pprune = &prunestate->partprunedata[i]; + prelprune = &pprune->partrelprunedata[0]; + + /* Perform pruning without using PARAM_EXEC Params */ + find_matching_subplans_recurse(pprune, prelprune, true, &result); + + /* Expression eval may have used space in node's ps_ExprContext too */ + ResetExprContext(prelprune->context.planstate->ps_ExprContext); + } MemoryContextSwitchTo(oldcontext); /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + /* Add in any subplans which partition pruning didn't account for */ + result = bms_add_members(result, prunestate->other_subplans); + MemoryContextReset(prunestate->prune_context); - /* Expression eval may have used space in node's ps_ExprContext too */ - ResetExprContext(pprune->context.planstate->ps_ExprContext); /* * If any subplans were pruned, we must re-sequence the subplan indexes so @@ -1633,59 +1677,70 @@ ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) } /* - * Now we can update each PartitionPruneInfo's subplan_map with new - * subplan indexes. We must also recompute its present_parts bitmap. - * We perform this loop in back-to-front order so that we determine - * present_parts for the lowest-level partitioned tables first. This - * way we can tell whether a sub-partitioned table's partitions were - * entirely pruned so we can exclude that from 'present_parts'. + * Now we can update each PartitionedRelPruneInfo's subplan_map with + * new subplan indexes. We must also recompute its present_parts + * bitmap. We perform this loop in back-to-front order so that we + * determine present_parts for the lowest-level partitioned tables + * first. This way we can tell whether a sub-partitioned table's + * partitions were entirely pruned so we can exclude that from + * 'present_parts'. */ - for (i = prunestate->num_partprunedata - 1; i >= 0; i--) + + for (i = 0; i < prunestate->num_partprunedata; i++) { - int nparts; int j; + PartitionPruningData *prelpruneinfo; - pprune = &prunestate->partprunedata[i]; - nparts = pprune->context.nparts; - /* We just rebuild present_parts from scratch */ - bms_free(pprune->present_parts); - pprune->present_parts = NULL; + prelpruneinfo = &prunestate->partprunedata[i]; - for (j = 0; j < nparts; j++) + for (j = prelpruneinfo->num_partrelprunedata - 1; j >= 0; j--) { - int oldidx = pprune->subplan_map[j]; - int subidx; + PartitionedRelPruningData *pprune; + int nparts; + int k; - /* - * If this partition existed as a subplan then change the old - * subplan index to the new subplan index. The new index may - * become -1 if the partition was pruned above, or it may just - * come earlier in the subplan list due to some subplans being - * removed earlier in the list. If it's a subpartition, add - * it to present_parts unless it's entirely pruned. - */ - if (oldidx >= 0) - { - Assert(oldidx < nsubplans); - pprune->subplan_map[j] = new_subplan_indexes[oldidx]; + pprune = &prelpruneinfo->partrelprunedata[j]; + nparts = pprune->context.nparts; + /* We just rebuild present_parts from scratch */ + bms_free(pprune->present_parts); + pprune->present_parts = NULL; - if (new_subplan_indexes[oldidx] >= 0) - pprune->present_parts = - bms_add_member(pprune->present_parts, j); - } - else if ((subidx = pprune->subpart_map[j]) >= 0) + for (k = 0; k < nparts; k++) { - PartitionPruningData *subprune; + int oldidx = pprune->subplan_map[k]; + int subidx; - subprune = &prunestate->partprunedata[subidx]; + /* + * If this partition existed as a subplan then change the + * old subplan index to the new subplan index. The new + * index may become -1 if the partition was pruned above, + * or it may just come earlier in the subplan list due to + * some subplans being removed earlier in the list. If + * it's a subpartition, add it to present_parts unless + * it's entirely pruned. + */ + if (oldidx >= 0) + { + Assert(oldidx < nsubplans); + pprune->subplan_map[k] = new_subplan_indexes[oldidx]; - if (!bms_is_empty(subprune->present_parts)) - pprune->present_parts = - bms_add_member(pprune->present_parts, j); + if (new_subplan_indexes[oldidx] >= 0) + pprune->present_parts = + bms_add_member(pprune->present_parts, k); + } + else if ((subidx = pprune->subpart_map[k]) >= 0) + { + PartitionedRelPruningData *subprune; + + subprune = &prelpruneinfo->partrelprunedata[subidx]; + + if (!bms_is_empty(subprune->present_parts)) + pprune->present_parts = + bms_add_member(pprune->present_parts, k); + } } } } - pfree(new_subplan_indexes); } @@ -1702,11 +1757,9 @@ ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate) { - PartitionPruningData *pprune; MemoryContext oldcontext; Bitmapset *result = NULL; - - pprune = prunestate->partprunedata; + int i; /* * Switch to a temp context to avoid leaking memory in the executor's @@ -1714,16 +1767,29 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate) */ oldcontext = MemoryContextSwitchTo(prunestate->prune_context); - find_matching_subplans_recurse(prunestate, pprune, false, &result); + for (i = 0; i < prunestate->num_partprunedata; i++) + { + PartitionPruningData *pprune; + PartitionedRelPruningData *prelprune; + + pprune = &prunestate->partprunedata[i]; + prelprune = &pprune->partrelprunedata[0]; + + find_matching_subplans_recurse(pprune, prelprune, false, &result); + + /* Expression eval may have used space in node's ps_ExprContext too */ + ResetExprContext(prelprune->context.planstate->ps_ExprContext); + } MemoryContextSwitchTo(oldcontext); /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + /* Add in any subplans which partition pruning didn't account for */ + result = bms_add_members(result, prunestate->other_subplans); + MemoryContextReset(prunestate->prune_context); - /* Expression eval may have used space in node's ps_ExprContext too */ - ResetExprContext(pprune->context.planstate->ps_ExprContext); return result; } @@ -1736,8 +1802,8 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate) * Adds valid (non-prunable) subplan IDs to *validsubplans */ static void -find_matching_subplans_recurse(PartitionPruneState *prunestate, - PartitionPruningData *pprune, +find_matching_subplans_recurse(PartitionPruningData *pprune, + PartitionedRelPruningData *prelprune, bool initial_prune, Bitmapset **validsubplans) { @@ -1748,15 +1814,16 @@ find_matching_subplans_recurse(PartitionPruneState *prunestate, check_stack_depth(); /* Only prune if pruning would be useful at this level. */ - if (initial_prune ? pprune->do_initial_prune : pprune->do_exec_prune) + if (initial_prune ? prelprune->do_initial_prune : + prelprune->do_exec_prune) { - PartitionPruneContext *context = &pprune->context; + PartitionPruneContext *context = &prelprune->context; /* Set whether we can evaluate PARAM_EXEC Params or not */ context->evalexecparams = !initial_prune; partset = get_matching_partitions(context, - pprune->pruning_steps); + prelprune->pruning_steps); } else { @@ -1764,23 +1831,23 @@ find_matching_subplans_recurse(PartitionPruneState *prunestate, * If no pruning is to be done, just include all partitions at this * level. */ - partset = pprune->present_parts; + partset = prelprune->present_parts; } /* Translate partset into subplan indexes */ i = -1; while ((i = bms_next_member(partset, i)) >= 0) { - if (pprune->subplan_map[i] >= 0) + if (prelprune->subplan_map[i] >= 0) *validsubplans = bms_add_member(*validsubplans, - pprune->subplan_map[i]); + prelprune->subplan_map[i]); else { - int partidx = pprune->subpart_map[i]; + int partidx = prelprune->subpart_map[i]; if (partidx >= 0) - find_matching_subplans_recurse(prunestate, - &prunestate->partprunedata[partidx], + find_matching_subplans_recurse(pprune, + &pprune->partrelprunedata[partidx], initial_prune, validsubplans); else { diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index 5ce4fb43e1..97451ed820 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -129,7 +129,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) appendstate->as_whichplan = INVALID_SUBPLAN_INDEX; /* If run-time partition pruning is enabled, then set that up now */ - if (node->part_prune_infos != NIL) + if (node->part_prune_info) { PartitionPruneState *prunestate; @@ -138,7 +138,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) /* Create the working data structure for pruning. */ prunestate = ExecCreatePartitionPruneState(&appendstate->ps, - node->part_prune_infos); + node->part_prune_info); appendstate->as_prune_state = prunestate; /* Perform an initial partition prune, if required. */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 1c12075b01..9c71cde14a 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -245,7 +245,7 @@ _copyAppend(const Append *from) COPY_NODE_FIELD(appendplans); COPY_SCALAR_FIELD(first_partial_plan); COPY_NODE_FIELD(partitioned_rels); - COPY_NODE_FIELD(part_prune_infos); + COPY_NODE_FIELD(part_prune_info); return newnode; } @@ -1181,6 +1181,18 @@ _copyPartitionPruneInfo(const PartitionPruneInfo *from) { PartitionPruneInfo *newnode = makeNode(PartitionPruneInfo); + COPY_NODE_FIELD(prune_infos); + COPY_BITMAPSET_FIELD(other_subplans); + + return newnode; +} + + +static PartitionedRelPruneInfo * +_copyPartitionedRelPruneInfo(const PartitionedRelPruneInfo *from) +{ + PartitionedRelPruneInfo *newnode = makeNode(PartitionedRelPruneInfo); + COPY_SCALAR_FIELD(reloid); COPY_NODE_FIELD(pruning_steps); COPY_BITMAPSET_FIELD(present_parts); @@ -4907,6 +4919,9 @@ copyObjectImpl(const void *from) case T_PartitionPruneInfo: retval = _copyPartitionPruneInfo(from); break; + case T_PartitionedRelPruneInfo: + retval = _copyPartitionedRelPruneInfo(from); + break; case T_PartitionPruneStepOp: retval = _copyPartitionPruneStepOp(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 979d523e00..ef599342a8 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -402,7 +402,7 @@ _outAppend(StringInfo str, const Append *node) WRITE_NODE_FIELD(appendplans); WRITE_INT_FIELD(first_partial_plan); WRITE_NODE_FIELD(partitioned_rels); - WRITE_NODE_FIELD(part_prune_infos); + WRITE_NODE_FIELD(part_prune_info); } static void @@ -1012,10 +1012,19 @@ _outPlanRowMark(StringInfo str, const PlanRowMark *node) static void _outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node) +{ + WRITE_NODE_TYPE("PARTITIONPRUNEINFO"); + + WRITE_NODE_FIELD(prune_infos); + WRITE_BITMAPSET_FIELD(other_subplans); +} + +static void +_outPartitionedRelPruneInfo(StringInfo str, const PartitionedRelPruneInfo *node) { int i; - WRITE_NODE_TYPE("PARTITIONPRUNEINFO"); + WRITE_NODE_TYPE("PARTITIONEDRELPRUNEINFO"); WRITE_OID_FIELD(reloid); WRITE_NODE_FIELD(pruning_steps); @@ -3830,6 +3839,9 @@ outNode(StringInfo str, const void *obj) case T_PartitionPruneInfo: _outPartitionPruneInfo(str, obj); break; + case T_PartitionedRelPruneInfo: + _outPartitionedRelPruneInfo(str, obj); + break; case T_PartitionPruneStepOp: _outPartitionPruneStepOp(str, obj); break; diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 42aff7f57a..ea4e8df62e 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1612,7 +1612,7 @@ _readAppend(void) READ_NODE_FIELD(appendplans); READ_INT_FIELD(first_partial_plan); READ_NODE_FIELD(partitioned_rels); - READ_NODE_FIELD(part_prune_infos); + READ_NODE_FIELD(part_prune_info); READ_DONE(); } @@ -2328,6 +2328,17 @@ _readPartitionPruneInfo(void) { READ_LOCALS(PartitionPruneInfo); + READ_NODE_FIELD(prune_infos); + READ_BITMAPSET_FIELD(other_subplans); + + READ_DONE(); +} + +static PartitionedRelPruneInfo * +_readPartitionedRelPruneInfo(void) +{ + READ_LOCALS(PartitionedRelPruneInfo); + READ_OID_FIELD(reloid); READ_NODE_FIELD(pruning_steps); READ_BITMAPSET_FIELD(present_parts); @@ -2725,6 +2736,8 @@ parseNodeString(void) return_value = _readPlanRowMark(); else if (MATCH("PARTITIONPRUNEINFO", 18)) return_value = _readPartitionPruneInfo(); + else if (MATCH("PARTITIONEDRELPRUNEINFO", 23)) + return_value = _readPartitionedRelPruneInfo(); else if (MATCH("PARTITIONPRUNESTEPOP", 20)) return_value = _readPartitionPruneStepOp(); else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25)) diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 3ada379f8b..2adbebcd35 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1455,14 +1455,22 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, /* * If we need to build partitioned_rels, accumulate the partitioned * rels for this child. We must ensure that parents are always listed - * before their child partitioned tables. + * before their child partitioned tables. For UNION ALL parents, so + * not to mix different partition hierarchies, we store a List of + * lists containing the child relids. */ if (build_partitioned_rels) { List *cprels = childrel->partitioned_child_rels; - partitioned_rels = list_concat(partitioned_rels, - list_copy(cprels)); + if (rel->rtekind == RTE_SUBQUERY) + { + if (cprels != NIL) + partitioned_rels = lappend(partitioned_rels, cprels); + } + else + partitioned_rels = list_concat(partitioned_rels, + list_copy(cprels)); } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index cf82b7052d..3a69baf9df 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -133,6 +133,7 @@ static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root, static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, List **qual, List **indexqual, List **indexECs); static void bitmap_subplan_mark_shared(Plan *plan); +static List *flatten_partitioned_rels(List *partitioned_rels); static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path, List *tlist, List *scan_clauses); static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root, @@ -211,7 +212,8 @@ static NamedTuplestoreScan *make_namedtuplestorescan(List *qptlist, List *qpqual static WorkTableScan *make_worktablescan(List *qptlist, List *qpqual, Index scanrelid, int wtParam); static Append *make_append(List *appendplans, int first_partial_plan, - List *tlist, List *partitioned_rels, List *partpruneinfos); + List *tlist, List *partitioned_rels, + PartitionPruneInfo *partpruneinfo); static RecursiveUnion *make_recursive_union(List *tlist, Plan *lefttree, Plan *righttree, @@ -1039,7 +1041,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) List *subplans = NIL; ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; - List *partpruneinfos = NIL; + PartitionPruneInfo *partpruneinfo = NULL; /* * The subpaths list could be empty, if every child was proven empty by @@ -1099,13 +1101,12 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) /* * If any quals exist, they may be useful to perform further partition - * pruning during execution. Generate a PartitionPruneInfo for each - * partitioned rel to store these quals and allow translation of - * partition indexes into subpath indexes. + * pruning during execution. Attempt to generate a PartitionPruneInfo + * object to allow further pruning to be done during execution. */ if (prunequal != NIL) - partpruneinfos = - make_partition_pruneinfo(root, + partpruneinfo = + make_partition_pruneinfo(root, rel, best_path->partitioned_rels, best_path->subpaths, prunequal); } @@ -1119,7 +1120,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path) plan = make_append(subplans, best_path->first_partial_path, tlist, best_path->partitioned_rels, - partpruneinfos); + partpruneinfo); copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -5074,6 +5075,35 @@ bitmap_subplan_mark_shared(Plan *plan) elog(ERROR, "unrecognized node type: %d", nodeTag(plan)); } +/* + * flatten_partitioned_rels + * Flatten upto a 2-deep List hierarchy of relids. + */ +static List * +flatten_partitioned_rels(List *partitioned_rels) +{ + if (partitioned_rels == NIL) + return NIL; + else if (partitioned_rels->type == T_IntList) + return partitioned_rels; + else + { + ListCell *lc; + List *newlist = NIL; + + foreach(lc, partitioned_rels) + { + List *sublist = lfirst(lc); + + Assert(sublist->type == T_IntList); + + newlist = list_concat(newlist, list_copy(sublist)); + } + + return newlist; + } +} + /***************************************************************************** * * PLAN NODE BUILDING ROUTINES @@ -5417,7 +5447,7 @@ make_foreignscan(List *qptlist, static Append * make_append(List *appendplans, int first_partial_plan, List *tlist, List *partitioned_rels, - List *partpruneinfos) + PartitionPruneInfo *partpruneinfo) { Append *node = makeNode(Append); Plan *plan = &node->plan; @@ -5428,8 +5458,8 @@ make_append(List *appendplans, int first_partial_plan, plan->righttree = NULL; node->appendplans = appendplans; node->first_partial_plan = first_partial_plan; - node->partitioned_rels = partitioned_rels; - node->part_prune_infos = partpruneinfos; + node->partitioned_rels = flatten_partitioned_rels(partitioned_rels); + node->part_prune_info = partpruneinfo; return node; } @@ -6586,7 +6616,7 @@ make_modifytable(PlannerInfo *root, node->operation = operation; node->canSetTag = canSetTag; node->nominalRelation = nominalRelation; - node->partitioned_rels = partitioned_rels; + node->partitioned_rels = flatten_partitioned_rels(partitioned_rels); node->partColsUpdated = partColsUpdated; node->resultRelations = resultRelations; node->resultRelIndex = -1; /* will be set correctly in setrefs.c */ diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index cdc61a8997..218cac71ef 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -111,7 +111,11 @@ typedef struct PruneStepResult bool scan_null; /* Scan the partition for NULL values? */ } PruneStepResult; - +static List *make_partitionedrel_pruneinfo(PlannerInfo *root, + RelOptInfo *parentrel, + int *relid_subplan_map, + List *partitioned_rels, List *prunequal, + Bitmapset **matchedsubplans); static List *gen_partprune_steps(RelOptInfo *rel, List *clauses, bool *contradictory); static List *gen_partprune_steps_internal(GeneratePruningStepsContext *context, @@ -160,8 +164,8 @@ static PruneStepResult *get_matching_range_bounds(PartitionPruneContext *context FmgrInfo *partsupfunc, Bitmapset *nullkeys); static Bitmapset *pull_exec_paramids(Expr *expr); static bool pull_exec_paramids_walker(Node *node, Bitmapset **context); -static bool analyze_partkey_exprs(PartitionPruneInfo *pinfo, List *steps, - int partnatts); +static bool analyze_partkey_exprs(PartitionedRelPruneInfo *prelinfo, + List *steps, int partnatts); static PruneStepResult *perform_pruning_base_step(PartitionPruneContext *context, PartitionPruneStepOp *opstep); static PruneStepResult *perform_pruning_combine_step(PartitionPruneContext *context, @@ -176,38 +180,36 @@ static bool partkey_datum_from_expr(PartitionPruneContext *context, /* * make_partition_pruneinfo - * Build List of PartitionPruneInfos, one for each partitioned rel. - * These can be used in the executor to allow additional partition - * pruning to take place. + * Builds a PartitionPruneInfo which can be used in the executor to allow + * additional partition pruning to take place. Returns NULL when + * partition pruning would be useless. * - * Here we generate partition pruning steps for 'prunequal' and also build a - * data structure which allows mapping of partition indexes into 'subpaths' - * indexes. + * Here we build a PartitionedRelPrune info for each partitioned relation in + * 'partitioned_rels'. This list can either contain a list of relids of each + * partitioned relation, or contain a list of Lists which contain the relids. * - * If no non-Const expressions are being compared to the partition key in any - * of the 'partitioned_rels', then we return NIL to indicate no run-time - * pruning should be performed. Run-time pruning would be useless, since the - * pruning done during planning will have pruned everything that can be. + * Any subpaths which could not be matched to a partitioned rel are set in + * the returned PartitionPruneInfo's 'other_subplans'. Callers will likely + * want to ensure that subplans listed here are not pruned. */ -List * -make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, - List *subpaths, List *prunequal) +PartitionPruneInfo * +make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, + List *partitioned_rels, List *subpaths, + List *prunequal) { - RelOptInfo *targetpart = NULL; - List *pinfolist = NIL; - bool doruntimeprune = false; + PartitionPruneInfo *pruneinfo; + Bitmapset *allmatchedsubplans = NULL; int *relid_subplan_map; - int *relid_subpart_map; ListCell *lc; + List *prunerelinfos; int i; /* - * Construct two temporary arrays to map from planner relids to subplan - * and sub-partition indexes. For convenience, we use 1-based indexes - * here, so that zero can represent an un-filled array entry. + * Construct a temporary array to map from planner relids to subplan + * indexes. For convenience, we use 1-based indexes here, so that zero + * can represent an un-filled array entry. */ relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size); - relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size); /* * relid_subplan_map maps relid of a leaf partition to the index in @@ -227,10 +229,132 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, relid_subplan_map[pathrel->relid] = i++; } + /* + * We now build a PartitionedRelPruneInfo for each partitioned rel. For + * UNION ALL parents, the partitioned_rels will be a List of List, but + * when the parent is a partitioned table, this will just be a list of + * ints. + */ + if (partitioned_rels->type == T_IntList) + { + Bitmapset *matchedsubplans = NULL; + List *prelinfolist; + + prelinfolist = make_partitionedrel_pruneinfo(root, parentrel, + relid_subplan_map, + partitioned_rels, prunequal, + &matchedsubplans); + + /* Only record matchedsubplans if pruning will be performed */ + if (prelinfolist != NIL) + { + prunerelinfos = list_make1(prelinfolist); + allmatchedsubplans = matchedsubplans; + } + else + prunerelinfos = NIL; + } + else + { + Assert(partitioned_rels->type == T_List); + + prunerelinfos = NIL; + + foreach(lc, partitioned_rels) + { + List *rels = lfirst(lc); + List *prelinfolist; + Bitmapset *matchedsubplans = NULL; + + prelinfolist = make_partitionedrel_pruneinfo(root, parentrel, + relid_subplan_map, + rels, prunequal, + + &matchedsubplans); + + /* Only record matchedsubplans if pruning will be performed */ + if (prelinfolist != NIL) + { + prunerelinfos = lappend(prunerelinfos, prelinfolist); + allmatchedsubplans = bms_join(matchedsubplans, + allmatchedsubplans); + } + } + } + + pfree(relid_subplan_map); + + /* No run-time pruning required. */ + if (prunerelinfos == NIL) + return NULL; + + pruneinfo = makeNode(PartitionPruneInfo); + pruneinfo->prune_infos = prunerelinfos; + + /* + * Some subplans may not belong to of the listed partitioned_rels. This + * can happen for UNION ALL queries which include a non-partitioned table. + * We record all of the subplans which we didn't build any + * PartitionedRelPruneInfo for so that callers can easily identify which + * subplans should not be pruned. + */ + if (bms_num_members(allmatchedsubplans) < list_length(subpaths)) + { + Bitmapset *other_subplans; + + /* Create an inverted set of allmatchedsubplans */ + other_subplans = bms_add_range(NULL, 0, list_length(subpaths) - 1); + other_subplans = bms_del_members(other_subplans, allmatchedsubplans); + + pruneinfo->other_subplans = other_subplans; + } + else + pruneinfo->other_subplans = NULL; + + return pruneinfo; +} + +/* + * make_partitionedrel_pruneinfo + * Build List of PartitionedRelPruneInfos, one for each partitioned rel. + * These can be used in the executor to allow additional partition + * pruning to take place. + * + * Here we generate partition pruning steps for 'prunequal' and also build a + * data structure which allows mapping of partition indexes into 'subpaths' + * indexes. + * + * If no non-Const expressions are being compared to the partition key in any + * of the 'partitioned_rels', then we return NIL to indicate no run-time + * pruning should be performed. Run-time pruning would be useless, since the + * pruning done during planning will have pruned everything that can be. + */ +static List * +make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, + int *relid_subplan_map, + List *partitioned_rels, List *prunequal, + Bitmapset **matchedsubplans) +{ + RelOptInfo *targetpart = NULL; + List *prelinfolist = NIL; + bool doruntimeprune = false; + bool hascontradictingquals = false; + ListCell *lc; + int *relid_subpart_map; + Bitmapset *subplansfound = NULL; + int i; + + /* + * Construct a temporary array to map from planner relids to index of the + * partitioned_rel. For convenience, we use 1-based indexes here, so that + * zero can represent an un-filled array entry. + */ + relid_subpart_map = palloc0(sizeof(int) * root->simple_rel_array_size); + /* * relid_subpart_map maps relid of a non-leaf partition to the index in * 'partitioned_rels' of that rel (which will also be the index in the - * returned PartitionPruneInfo list of the info for that partition). + * returned PartitionedRelPruneInfo list of the info for that partition). */ i = 1; foreach(lc, partitioned_rels) @@ -246,12 +370,12 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, relid_subpart_map[rti] = i++; } - /* We now build a PartitionPruneInfo for each partitioned rel */ + /* We now build a PartitionedRelPruneInfo for each partitioned rel */ foreach(lc, partitioned_rels) { Index rti = lfirst_int(lc); RelOptInfo *subpart = find_base_rel(root, rti); - PartitionPruneInfo *pinfo; + PartitionedRelPruneInfo *prelinfo; RangeTblEntry *rte; Bitmapset *present_parts; int nparts = subpart->nparts; @@ -269,6 +393,31 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, if (!targetpart) { targetpart = subpart; + + /* + * When the first listed partitioned table is not the same rel as + * 'parentrel', then we must be dealing with a UNION ALL + * parentrel. We'd better translate the pruning qual so that it's + * compatible with the top-level partitioned table. We overwrite + * the input parameter here so that subsequent translations for + * sub-partitioned tables translate from the top-level partitioned + * table, rather than the UNION ALL parent. + */ + if (parentrel != subpart) + { + int nappinfos; + AppendRelInfo **appinfos = find_appinfos_by_relids(root, + subpart->relids, + &nappinfos); + + prunequal = (List *) adjust_appendrel_attrs(root, (Node *) + prunequal, + nappinfos, + appinfos); + + pfree(appinfos); + } + partprunequal = prunequal; } else @@ -288,19 +437,7 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, pruning_steps = gen_partprune_steps(subpart, partprunequal, &contradictory); - if (contradictory) - { - /* - * This shouldn't happen as the planner should have detected this - * earlier. However, we do use additional quals from parameterized - * paths here. These do only compare Params to the partition key, - * so this shouldn't cause the discovery of any new qual - * contradictions that were not previously discovered as the Param - * values are unknown during planning. Anyway, we'd better do - * something sane here, so let's just disable run-time pruning. - */ - return NIL; - } + hascontradictingquals |= contradictory; /* * Construct the subplan and subpart maps for this partitioning level. @@ -320,32 +457,56 @@ make_partition_pruneinfo(PlannerInfo *root, List *partitioned_rels, subplan_map[i] = subplanidx; subpart_map[i] = subpartidx; - if (subplanidx >= 0 || subpartidx >= 0) + if (subplanidx >= 0) + { + present_parts = bms_add_member(present_parts, i); + + /* Record finding this subplan */ + subplansfound = bms_add_member(subplansfound, subplanidx); + } + else if (subpartidx >= 0) present_parts = bms_add_member(present_parts, i); } + rte = root->simple_rte_array[subpart->relid]; - pinfo = makeNode(PartitionPruneInfo); - pinfo->reloid = rte->relid; - pinfo->pruning_steps = pruning_steps; - pinfo->present_parts = present_parts; - pinfo->nparts = nparts; - pinfo->subplan_map = subplan_map; - pinfo->subpart_map = subpart_map; + prelinfo = makeNode(PartitionedRelPruneInfo); + prelinfo->reloid = rte->relid; + prelinfo->pruning_steps = pruning_steps; + prelinfo->present_parts = present_parts; + prelinfo->nparts = nparts; + prelinfo->subplan_map = subplan_map; + prelinfo->subpart_map = subpart_map; /* Determine which pruning types should be enabled at this level */ - doruntimeprune |= analyze_partkey_exprs(pinfo, pruning_steps, + doruntimeprune |= analyze_partkey_exprs(prelinfo, pruning_steps, partnatts); - pinfolist = lappend(pinfolist, pinfo); + prelinfolist = lappend(prelinfolist, prelinfo); } - pfree(relid_subplan_map); pfree(relid_subpart_map); + *matchedsubplans = subplansfound; + + if (hascontradictingquals) + { + /* + * This shouldn't happen as the planner should have detected this + * earlier. However, we do use additional quals from parameterized + * paths here. These do only compare Params to the partition key, so + * this shouldn't cause the discovery of any new qual contradictions + * that were not previously discovered as the Param values are unknown + * during planning. Anyway, we'd better do something sane here, so + * let's just disable run-time pruning. + */ + return NIL; + } + + if (doruntimeprune) - return pinfolist; + return prelinfolist; /* No run-time pruning required. */ return NIL; @@ -2752,10 +2913,11 @@ pull_exec_paramids_walker(Node *node, Bitmapset **context) * executor startup-time or executor run-time pruning. * * Returns true if any executor partition pruning should be attempted at this - * level. Also fills fields of *pinfo to record how to process each step. + * level. Also fills fields of *prelinfo to record how to process each step. */ static bool -analyze_partkey_exprs(PartitionPruneInfo *pinfo, List *steps, int partnatts) +analyze_partkey_exprs(PartitionedRelPruneInfo *prelinfo, List *steps, + int partnatts) { bool doruntimeprune = false; ListCell *lc; @@ -2765,11 +2927,12 @@ analyze_partkey_exprs(PartitionPruneInfo *pinfo, List *steps, int partnatts) * Otherwise, if their expressions aren't simple Consts, they require * startup-time pruning. */ - pinfo->nexprs = list_length(steps) * partnatts; - pinfo->hasexecparam = (bool *) palloc0(sizeof(bool) * pinfo->nexprs); - pinfo->do_initial_prune = false; - pinfo->do_exec_prune = false; - pinfo->execparamids = NULL; + prelinfo->nexprs = list_length(steps) * partnatts; + prelinfo->hasexecparam = (bool *) palloc0(sizeof(bool) * + prelinfo->nexprs); + prelinfo->do_initial_prune = false; + prelinfo->do_exec_prune = false; + prelinfo->execparamids = NULL; foreach(lc, steps) { @@ -2793,16 +2956,16 @@ analyze_partkey_exprs(PartitionPruneInfo *pinfo, List *steps, int partnatts) step->step.step_id, keyno); - Assert(stateidx < pinfo->nexprs); + Assert(stateidx < prelinfo->nexprs); hasexecparams = !bms_is_empty(execparamids); - pinfo->hasexecparam[stateidx] = hasexecparams; - pinfo->execparamids = bms_join(pinfo->execparamids, - execparamids); + prelinfo->hasexecparam[stateidx] = hasexecparams; + prelinfo->execparamids = bms_join(prelinfo->execparamids, + execparamids); if (hasexecparams) - pinfo->do_exec_prune = true; + prelinfo->do_exec_prune = true; else - pinfo->do_initial_prune = true; + prelinfo->do_initial_prune = true; doruntimeprune = true; } diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 862bf65060..c1bee3dd31 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -113,13 +113,13 @@ typedef struct PartitionTupleRouting } PartitionTupleRouting; /*----------------------- - * PartitionPruningData - Per-partitioned-table data for run-time pruning + * PartitionedRelPruningData - Per-partitioned-table data for run-time pruning * of partitions. For a multilevel partitioned table, we have one of these * for the topmost partition plus one for each non-leaf child partition, * ordered such that parents appear before their children. * * subplan_map[] and subpart_map[] have the same definitions as in - * PartitionPruneInfo (see plannodes.h); though note that here, + * PartitionedRelPruneInfo (see plannodes.h); though note that here, * subpart_map contains indexes into PartitionPruneState.partprunedata[]. * * subplan_map Subplan index by partition index, or -1. @@ -136,7 +136,7 @@ typedef struct PartitionTupleRouting * executor run (for this partitioning level). *----------------------- */ -typedef struct PartitionPruningData +typedef struct PartitionedRelPruningData { int *subplan_map; int *subpart_map; @@ -145,6 +145,17 @@ typedef struct PartitionPruningData List *pruning_steps; bool do_initial_prune; bool do_exec_prune; +} PartitionedRelPruningData; + +/* + * PartitionPruningData - Encapsulates an array of PartitionedRelPruningData + * which belong to a single partition hierarchy containing 1 or more + * partitions. + */ +typedef struct PartitionPruningData +{ + PartitionedRelPruningData *partrelprunedata; + int num_partrelprunedata; } PartitionPruningData; /*----------------------- @@ -170,6 +181,11 @@ typedef struct PartitionPruningData * any of the partprunedata structs. Pruning must be * done again each time the value of one of these * parameters changes. + * other_subplans Contains subplan indexes which don't belong to any + * 'partprunedata', e.g UNION ALL children that are not + * partitioned tables or a partitioned table that the + * planner deemed run-time pruning to be useless for. + * These must not be pruned. * prune_context A short-lived memory context in which to execute the * partition pruning functions. *----------------------- @@ -181,6 +197,7 @@ typedef struct PartitionPruneState bool do_initial_prune; bool do_exec_prune; Bitmapset *execparamids; + Bitmapset *other_subplans; MemoryContext prune_context; } PartitionPruneState; @@ -209,7 +226,7 @@ extern HeapTuple ConvertPartitionTupleSlot(TupleConversionMap *map, extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, PartitionTupleRouting *proute); extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate, - List *partitionpruneinfo); + PartitionPruneInfo *partitionpruneinfo); extern void ExecDestroyPartitionPruneState(PartitionPruneState *prunestate); extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate); extern Bitmapset *ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 43f1552241..697d3d7a5f 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -88,6 +88,7 @@ typedef enum NodeTag T_NestLoopParam, T_PlanRowMark, T_PartitionPruneInfo, + T_PartitionedRelPruneInfo, T_PartitionPruneStepOp, T_PartitionPruneStepCombine, T_PlanInvalItem, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 5201c6d4bc..b341aa7f35 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -241,6 +241,8 @@ typedef struct ModifyTable List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } ModifyTable; +struct PartitionPruneInfo; + /* ---------------- * Append node - * Generate the concatenation of the results of sub-plans. @@ -260,8 +262,8 @@ typedef struct Append /* RT indexes of non-leaf tables in a partition tree */ List *partitioned_rels; - /* Info for run-time subplan pruning, one entry per partitioned_rels */ - List *part_prune_infos; /* List of PartitionPruneInfo */ + /* Info for run-time subplan pruning */ + struct PartitionPruneInfo *part_prune_info; } Append; /* ---------------- @@ -1051,18 +1053,34 @@ typedef struct PlanRowMark */ /* - * PartitionPruneInfo - Details required to allow the executor to prune + * PartitionPruneInfo- - Details required to allow the executor to prune * partitions. * + * prune_infos List of Lists containing PartitionedRelPruneInfo + * other_subplans Indexes of any subplans which are not accounted for + * by any of the PartitionedRelPruneInfo stored in + * 'prune_infos'. + */ +typedef struct PartitionPruneInfo +{ + NodeTag type; + List *prune_infos; + Bitmapset *other_subplans; +} PartitionPruneInfo; + +/* + * PartitionedRelPruneInfo - Details required to allow the executor to prune + * partitions for a single partitioned table. + * * Here we store mapping details to allow translation of a partitioned table's * index as returned by the partition pruning code into subplan indexes for * plan types which support arbitrary numbers of subplans, such as Append. * We also store various details to tell the executor when it should be * performing partition pruning. * - * Each PartitionPruneInfo describes the partitioning rules for a single + * Each PartitionedRelPruneInfo describes the partitioning rules for a single * partitioned table (a/k/a level of partitioning). For a multilevel - * partitioned table, we have a List of PartitionPruneInfos, where the + * partitioned table, we have a List of PartitionedRelPruneInfo, where the * first entry represents the topmost partitioned table and additional * entries represent non-leaf child partitions, ordered such that parents * appear before their children. @@ -1073,11 +1091,12 @@ typedef struct PlanRowMark * zero-based index of the partition's subplan in the parent plan's subplan * list; it is -1 if the partition is non-leaf or has been pruned. For a * non-leaf partition p, subpart_map[p] contains the zero-based index of - * that sub-partition's PartitionPruneInfo in the plan's PartitionPruneInfo - * list; it is -1 if the partition is a leaf or has been pruned. All these - * indexes are global across the whole partitioned table and Append plan node. + * that sub-partition's PartitionedRelPruneInfo in the plan's + * PartitionedRelPruneInfo list; it is -1 if the partition is a leaf or has + * been pruned. All these indexes are global across the whole partitioned + * table and the parenting plan node. */ -typedef struct PartitionPruneInfo +typedef struct PartitionedRelPruneInfo { NodeTag type; Oid reloid; /* OID of partition rel for this level */ @@ -1095,7 +1114,7 @@ typedef struct PartitionPruneInfo bool do_exec_prune; /* true if pruning should be performed during * executor run. */ Bitmapset *execparamids; /* All PARAM_EXEC Param IDs in pruning_steps */ -} PartitionPruneInfo; +} PartitionedRelPruneInfo; /* * Abstract Node type for partition pruning steps (there are no concrete diff --git a/src/include/partitioning/partprune.h b/src/include/partitioning/partprune.h index 9944d2832f..df3bcb737d 100644 --- a/src/include/partitioning/partprune.h +++ b/src/include/partitioning/partprune.h @@ -74,7 +74,8 @@ typedef struct PartitionPruneContext #define PruneCxtStateIdx(partnatts, step_id, keyno) \ ((partnatts) * (step_id) + (keyno)) -extern List *make_partition_pruneinfo(PlannerInfo *root, +extern PartitionPruneInfo *make_partition_pruneinfo(PlannerInfo *root, + RelOptInfo *parentrel, List *partitioned_rels, List *subpaths, List *prunequal); extern Relids prune_append_rel_partitions(RelOptInfo *rel); diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 9059147e17..65b979eb6a 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2326,6 +2326,96 @@ select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 Index Cond: (a = $0) (52 rows) +-- Test run-time partition pruning with UNION ALL parents +explain (analyze, costs off, summary off, timing off) +select * from (select * from ab where a = 1 union all select * from ab) ab where b = (select 1); + QUERY PLAN +------------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Append (actual rows=0 loops=1) + -> Bitmap Heap Scan on ab_a1_b1 ab_a1_b1_1 (actual rows=0 loops=1) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1) + Index Cond: (a = 1) + -> Bitmap Heap Scan on ab_a1_b2 ab_a1_b2_1 (never executed) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) + Index Cond: (a = 1) + -> Bitmap Heap Scan on ab_a1_b3 ab_a1_b3_1 (never executed) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) + Index Cond: (a = 1) + -> Seq Scan on ab_a1_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a1_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a1_b3 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a2_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a2_b3 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a3_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a3_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a3_b3 (never executed) + Filter: (b = $0) +(37 rows) + +-- A case containing a UNION ALL with a non-partitioned child. +explain (analyze, costs off, summary off, timing off) +select * from (select * from ab where a = 1 union all (values(10,5)) union all select * from ab) ab where b = (select 1); + QUERY PLAN +------------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Append (actual rows=0 loops=1) + -> Bitmap Heap Scan on ab_a1_b1 ab_a1_b1_1 (actual rows=0 loops=1) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1) + Index Cond: (a = 1) + -> Bitmap Heap Scan on ab_a1_b2 ab_a1_b2_1 (never executed) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b2_a_idx (never executed) + Index Cond: (a = 1) + -> Bitmap Heap Scan on ab_a1_b3 ab_a1_b3_1 (never executed) + Recheck Cond: (a = 1) + Filter: (b = $0) + -> Bitmap Index Scan on ab_a1_b3_a_idx (never executed) + Index Cond: (a = 1) + -> Result (actual rows=0 loops=1) + One-Time Filter: (5 = $0) + -> Seq Scan on ab_a1_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a1_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a1_b3 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a2_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a2_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a2_b3 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a3_b1 (actual rows=0 loops=1) + Filter: (b = $0) + -> Seq Scan on ab_a3_b2 (never executed) + Filter: (b = $0) + -> Seq Scan on ab_a3_b3 (never executed) + Filter: (b = $0) +(39 rows) + deallocate ab_q1; deallocate ab_q2; deallocate ab_q3; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 11b92bfada..c5203f1c95 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -526,6 +526,14 @@ reset max_parallel_workers_per_gather; explain (analyze, costs off, summary off, timing off) select * from ab where a = (select max(a) from lprt_a) and b = (select max(a)-1 from lprt_a); +-- Test run-time partition pruning with UNION ALL parents +explain (analyze, costs off, summary off, timing off) +select * from (select * from ab where a = 1 union all select * from ab) ab where b = (select 1); + +-- A case containing a UNION ALL with a non-partitioned child. +explain (analyze, costs off, summary off, timing off) +select * from (select * from ab where a = 1 union all (values(10,5)) union all select * from ab) ab where b = (select 1); + deallocate ab_q1; deallocate ab_q2; deallocate ab_q3; -- 2.16.2.windows.1