From 3c0c7f9f5f8bdf89c6afd06e26ba6d5490af9221 Mon Sep 17 00:00:00 2001 From: amitlan Date: Wed, 22 Dec 2021 16:55:17 +0900 Subject: [PATCH v13] Optimize AcquireExecutorLocks() to skip pruned partitions --- src/backend/commands/copyto.c | 2 +- src/backend/commands/createas.c | 2 +- src/backend/commands/explain.c | 7 +- src/backend/commands/extension.c | 2 +- src/backend/commands/matview.c | 2 +- src/backend/commands/prepare.c | 26 ++- src/backend/executor/README | 27 +++ src/backend/executor/execMain.c | 46 +++++ src/backend/executor/execParallel.c | 28 ++- src/backend/executor/execPartition.c | 238 ++++++++++++++++++++---- src/backend/executor/execUtils.c | 1 + src/backend/executor/functions.c | 2 +- src/backend/executor/nodeAppend.c | 16 +- src/backend/executor/nodeMergeAppend.c | 9 +- src/backend/executor/spi.c | 27 ++- src/backend/nodes/copyfuncs.c | 33 +++- src/backend/nodes/outfuncs.c | 36 +++- src/backend/nodes/readfuncs.c | 56 +++++- src/backend/optimizer/plan/createplan.c | 25 +-- src/backend/optimizer/plan/planner.c | 3 + src/backend/optimizer/plan/setrefs.c | 104 ++++++++--- src/backend/partitioning/partprune.c | 59 +++++- src/backend/tcop/postgres.c | 8 +- src/backend/tcop/pquery.c | 25 ++- src/backend/utils/cache/plancache.c | 184 +++++++++++++++--- src/backend/utils/mmgr/portalmem.c | 19 ++ src/include/commands/explain.h | 3 +- src/include/executor/execPartition.h | 12 +- src/include/executor/execdesc.h | 3 + src/include/executor/executor.h | 2 + src/include/nodes/execnodes.h | 30 +++ src/include/nodes/nodes.h | 4 + src/include/nodes/pathnodes.h | 15 ++ src/include/nodes/plannodes.h | 39 +++- src/include/partitioning/partprune.h | 8 +- src/include/utils/plancache.h | 3 +- src/include/utils/portal.h | 3 + 37 files changed, 942 insertions(+), 167 deletions(-) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 55c38b04c4..d403eb2309 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -542,7 +542,7 @@ BeginCopyTo(ParseState *pstate, ((DR_copy *) dest)->cstate = cstate; /* Create a QueryDesc requesting no output */ - cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + cstate->queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c index 9abbb6b555..f6607f2454 100644 --- a/src/backend/commands/createas.c +++ b/src/backend/commands/createas.c @@ -325,7 +325,7 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt, UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ - queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext, + queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 1e5701b8eb..7ba9852e51 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -407,7 +407,7 @@ ExplainOneQuery(Query *query, int cursorOptions, } /* run it (if needed) and produce output */ - ExplainOnePlan(plan, into, es, queryString, params, queryEnv, + ExplainOnePlan(plan, NULL, into, es, queryString, params, queryEnv, &planduration, (es->buffers ? &bufusage : NULL)); } } @@ -515,7 +515,8 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, * to call it. */ void -ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, +ExplainOnePlan(PlannedStmt *plannedstmt, PartitionPruneResult *part_prune_result, + IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, const BufferUsage *bufusage) @@ -563,7 +564,7 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es, dest = None_Receiver; /* Create a QueryDesc for the query */ - queryDesc = CreateQueryDesc(plannedstmt, queryString, + queryDesc = CreateQueryDesc(plannedstmt, part_prune_result, queryString, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, instrument_option); diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index 1013790dbb..54734a3a93 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -776,7 +776,7 @@ execute_sql_string(const char *sql) { QueryDesc *qdesc; - qdesc = CreateQueryDesc(stmt, + qdesc = CreateQueryDesc(stmt, NULL, sql, GetActiveSnapshot(), NULL, dest, NULL, NULL, 0); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 9ab248d25e..2be1782bc4 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -416,7 +416,7 @@ refresh_matview_datafill(DestReceiver *dest, Query *query, UpdateActiveSnapshotCommandId(); /* Create a QueryDesc, redirecting output to our tuple receiver */ - queryDesc = CreateQueryDesc(plan, queryString, + queryDesc = CreateQueryDesc(plan, NULL, queryString, GetActiveSnapshot(), InvalidSnapshot, dest, NULL, NULL, 0); diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index 80738547ed..c7360712b1 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -155,6 +155,7 @@ ExecuteQuery(ParseState *pstate, PreparedStatement *entry; CachedPlan *cplan; List *plan_list; + List *part_prune_result_list; ParamListInfo paramLI = NULL; EState *estate = NULL; Portal portal; @@ -193,7 +194,10 @@ ExecuteQuery(ParseState *pstate, entry->plansource->query_string); /* Replan if needed, and increment plan refcount for portal */ - cplan = GetCachedPlan(entry->plansource, paramLI, NULL, NULL); + cplan = GetCachedPlan(entry->plansource, paramLI, NULL, NULL, + &part_prune_result_list); + Assert(list_length(cplan->stmt_list) == + list_length(part_prune_result_list)); plan_list = cplan->stmt_list; /* @@ -207,6 +211,9 @@ ExecuteQuery(ParseState *pstate, plan_list, cplan); + /* Copy PartitionPruneResults into the portal's context. */ + PortalStorePartitionPruneResults(portal, part_prune_result_list); + /* * For CREATE TABLE ... AS EXECUTE, we must verify that the prepared * statement is one that produces tuples. Currently we insist that it be @@ -576,7 +583,9 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, const char *query_string; CachedPlan *cplan; List *plan_list; - ListCell *p; + List *part_prune_result_list; + ListCell *p, + *pp; ParamListInfo paramLI = NULL; EState *estate = NULL; instr_time planstart; @@ -619,7 +628,10 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, /* Replan if needed, and acquire a transient refcount */ cplan = GetCachedPlan(entry->plansource, paramLI, - CurrentResourceOwner, queryEnv); + CurrentResourceOwner, queryEnv, + &part_prune_result_list); + Assert(list_length(cplan->stmt_list) == + list_length(part_prune_result_list)); INSTR_TIME_SET_CURRENT(planduration); INSTR_TIME_SUBTRACT(planduration, planstart); @@ -634,13 +646,15 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, plan_list = cplan->stmt_list; /* Explain each query */ - foreach(p, plan_list) + forboth(p, plan_list, pp, part_prune_result_list) { PlannedStmt *pstmt = lfirst_node(PlannedStmt, p); + PartitionPruneResult *part_prune_result = lfirst_node(PartitionPruneResult, pp); if (pstmt->commandType != CMD_UTILITY) - ExplainOnePlan(pstmt, into, es, query_string, paramLI, queryEnv, - &planduration, (es->buffers ? &bufusage : NULL)); + ExplainOnePlan(pstmt, part_prune_result, into, es, query_string, + paramLI, queryEnv, &planduration, + (es->buffers ? &bufusage : NULL)); else ExplainOneUtility(pstmt->utilityStmt, into, es, query_string, paramLI, queryEnv); diff --git a/src/backend/executor/README b/src/backend/executor/README index 0b5183fc4a..e0802be723 100644 --- a/src/backend/executor/README +++ b/src/backend/executor/README @@ -65,6 +65,29 @@ found there. This currently only occurs for Append and MergeAppend nodes. In this case the non-required subplans are ignored and the executor state's subnode array will become out of sequence to the plan's subplan list. +Actually, the so-called execution time pruning may also occur even before the +execution has started. One case where that occurs is when a cached generic +plan is being validated for execution by plancache.c: GetCachedPlan(), which +proceeds by locking all the relations that will be scanned by that plan. If +the generic plan contains nodes that can perform execution time partition +pruning (that is, contain a PartitionPruneInfo), a subset of pruning steps +contained in the PartitionPruneInfos that do not depend on execution actually +having started (called "initial" pruning steps) are performed at this point +to figure out the minimal set of child subplans that satisfy those pruning +instructions. AcquireExecutorLocks() looking at a particular plan will then +lock only the relations scanned by those surviving subplans (along with those +present in PlannedStmt.minLockRelids), and ignore those scanned by the pruned +subplans, even though the pruned subplans themselves are not removed from the +plan tree. The result of pruning (that is, the set of indexes of surviving +subplans in their parent's list of child subplans) is saved as a list of +bitmapsets, with one element for every PartitionPruneInfo referenced in the +plan (PlannedStmt.partPruneInfos). The list is packaged into a +PartitionPruneResult node, which is passed along with the PlannedStmt to the +executor via the QueryDesc. It is imperative that the executor and any third +party code invoked by it that gets passed the plan tree look at the plan's +PartitionPruneResult to determine whether a particular child subplan of a +parent node that supports pruning is valid for a given execution. + Each Plan node may have expression trees associated with it, to represent its target list, qualification conditions, etc. These trees are also read-only to the executor, but the executor state for expression evaluation @@ -286,6 +309,10 @@ Query Processing Control Flow This is a sketch of control flow for full query processing: + [ ExecutorDoInitialPruning ] --- an optional step to perform initial + partition pruning on the plan tree the result of which is passed + to the executor via QueryDesc + CreateQueryDesc ExecutorStart diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index ef2fd46092..05cc99df8f 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -49,11 +49,13 @@ #include "commands/matview.h" #include "commands/trigger.h" #include "executor/execdebug.h" +#include "executor/execPartition.h" #include "executor/nodeSubplan.h" #include "foreign/fdwapi.h" #include "jit/jit.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "parser/parsetree.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" @@ -104,6 +106,47 @@ static void EvalPlanQualStart(EPQState *epqstate, Plan *planTree); /* end of local decls */ +/* ---------------------------------------------------------------- + * ExecutorDoInitialPruning + * + * Performs initial partition pruning to figure out the minimal set of + * subplans to be executed and the set of RT indexes of the corresponding + * leaf partitions + * + * Returned PartitionPruneResult must be subsequently passed to the executor + * so that it can reuse the result of pruning. It's important that the + * has the same view of which partitions are initially pruned (by not doing + * the pruning again itself) or otherwise it risks initializing subplans whose + * partitions would not have been locked. + * + * Note: Partitioned tables mentioned in PartitionedRelPruneInfo nodes that + * drive the pruning will be locked before doing the pruning. + */ +PartitionPruneResult * +ExecutorDoInitialPruning(PlannedStmt *plannedstmt, ParamListInfo params) +{ + PartitionPruneResult *result; + ListCell *lc; + + /* Only get here if there is any pruning to do. */ + Assert(plannedstmt->containsInitialPruning); + + result = makeNode(PartitionPruneResult); + foreach(lc, plannedstmt->partPruneInfos) + { + PartitionPruneInfo *pruneinfo = lfirst(lc); + Bitmapset *valid_subplan_offs; + + valid_subplan_offs = + ExecPartitionDoInitialPruning(plannedstmt, params, pruneinfo, + &result->scan_leafpart_rtis); + result->valid_subplan_offs_list = + lappend(result->valid_subplan_offs_list, + valid_subplan_offs); + } + + return result; +} /* ---------------------------------------------------------------- * ExecutorStart @@ -806,6 +849,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) { CmdType operation = queryDesc->operation; PlannedStmt *plannedstmt = queryDesc->plannedstmt; + PartitionPruneResult *part_prune_result = queryDesc->part_prune_result; Plan *plan = plannedstmt->planTree; List *rangeTable = plannedstmt->rtable; EState *estate = queryDesc->estate; @@ -825,6 +869,8 @@ InitPlan(QueryDesc *queryDesc, int eflags) ExecInitRangeTable(estate, rangeTable); estate->es_plannedstmt = plannedstmt; + estate->es_part_prune_infos = plannedstmt->partPruneInfos; + estate->es_part_prune_result = part_prune_result; /* * Next, build the ExecRowMark array from the PlanRowMark(s), if any. diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 9a0d5d59ef..805f86c503 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -66,6 +66,7 @@ #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xE000000000000008) #define PARALLEL_KEY_JIT_INSTRUMENTATION UINT64CONST(0xE000000000000009) #define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xE00000000000000A) +#define PARALLEL_KEY_PARTITIONPRUNERESULT UINT64CONST(0xE00000000000000B) #define PARALLEL_TUPLE_QUEUE_SIZE 65536 @@ -182,7 +183,9 @@ ExecSerializePlan(Plan *plan, EState *estate) pstmt->transientPlan = false; pstmt->dependsOnRole = false; pstmt->parallelModeNeeded = false; + pstmt->containsInitialPruning = false; pstmt->planTree = plan; + pstmt->partPruneInfos = estate->es_part_prune_infos; pstmt->rtable = estate->es_range_table; pstmt->resultRelations = NIL; pstmt->appendRelations = NIL; @@ -596,12 +599,15 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, FixedParallelExecutorState *fpes; char *pstmt_data; char *pstmt_space; + char *part_prune_result_data; + char *part_prune_result_space; char *paramlistinfo_space; BufferUsage *bufusage_space; WalUsage *walusage_space; SharedExecutorInstrumentation *instrumentation = NULL; SharedJitInstrumentation *jit_instrumentation = NULL; int pstmt_len; + int part_prune_result_len; int paramlistinfo_len; int instrumentation_len = 0; int jit_instrumentation_len = 0; @@ -630,6 +636,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, /* Fix up and serialize plan to be sent to workers. */ pstmt_data = ExecSerializePlan(planstate->plan, estate); + part_prune_result_data = nodeToString(estate->es_part_prune_result); /* Create a parallel context. */ pcxt = CreateParallelContext("postgres", "ParallelQueryMain", nworkers); @@ -656,6 +663,11 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, shm_toc_estimate_chunk(&pcxt->estimator, pstmt_len); shm_toc_estimate_keys(&pcxt->estimator, 1); + /* Estimate space for serialized PartitionPruneResult. */ + part_prune_result_len = strlen(part_prune_result_data) + 1; + shm_toc_estimate_chunk(&pcxt->estimator, part_prune_result_len); + shm_toc_estimate_keys(&pcxt->estimator, 1); + /* Estimate space for serialized ParamListInfo. */ paramlistinfo_len = EstimateParamListSpace(estate->es_param_list_info); shm_toc_estimate_chunk(&pcxt->estimator, paramlistinfo_len); @@ -750,6 +762,12 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, memcpy(pstmt_space, pstmt_data, pstmt_len); shm_toc_insert(pcxt->toc, PARALLEL_KEY_PLANNEDSTMT, pstmt_space); + /* Store serialized PartitionPruneResult */ + part_prune_result_space = shm_toc_allocate(pcxt->toc, part_prune_result_len); + memcpy(part_prune_result_space, part_prune_result_data, part_prune_result_len); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_PARTITIONPRUNERESULT, + part_prune_result_space); + /* Store serialized ParamListInfo. */ paramlistinfo_space = shm_toc_allocate(pcxt->toc, paramlistinfo_len); shm_toc_insert(pcxt->toc, PARALLEL_KEY_PARAMLISTINFO, paramlistinfo_space); @@ -1231,8 +1249,10 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, int instrument_options) { char *pstmtspace; + char *part_prune_result_space; char *paramspace; PlannedStmt *pstmt; + PartitionPruneResult *part_prune_result; ParamListInfo paramLI; char *queryString; @@ -1243,12 +1263,18 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, pstmtspace = shm_toc_lookup(toc, PARALLEL_KEY_PLANNEDSTMT, false); pstmt = (PlannedStmt *) stringToNode(pstmtspace); + /* Reconstruct leader-supplied PartitionPruneResult. */ + part_prune_result_space = + shm_toc_lookup(toc, PARALLEL_KEY_PARTITIONPRUNERESULT, false); + part_prune_result = (PartitionPruneResult *) + stringToNode(part_prune_result_space); + /* Reconstruct ParamListInfo. */ paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMLISTINFO, false); paramLI = RestoreParamList(¶mspace); /* Create a QueryDesc for the query. */ - return CreateQueryDesc(pstmt, + return CreateQueryDesc(pstmt, part_prune_result, queryString, GetActiveSnapshot(), InvalidSnapshot, receiver, paramLI, NULL, instrument_options); diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 615bd80973..3037742b8d 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -25,6 +25,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "parser/parsetree.h" #include "partitioning/partbounds.h" #include "partitioning/partdesc.h" #include "partitioning/partprune.h" @@ -185,7 +186,11 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, static List *adjust_partition_colnos(List *colnos, ResultRelInfo *leaf_part_rri); static List *adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap); static PartitionPruneState *CreatePartitionPruneState(PlanState *planstate, - PartitionPruneInfo *pruneinfo); + PartitionPruneInfo *pruneinfo, + bool consider_initial_steps, + bool consider_exec_steps, + List *rtable, ExprContext *econtext, + PartitionDirectory partdir); static void InitPartitionPruneContext(PartitionPruneContext *context, List *pruning_steps, PartitionDesc partdesc, @@ -198,7 +203,8 @@ static void PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate, static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans); + Bitmapset **validsubplans, + Bitmapset **scan_leafpart_rtis); /* @@ -1587,8 +1593,10 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) * considered to be a stable expression, it can change value from one plan * node scan to the next during query execution. Stable comparison * expressions that don't involve such Params allow partition pruning to be - * done once during executor startup. Expressions that do involve such Params - * require us to prune separately for each scan of the parent plan node. + * done once during executor startup or during ExecutorDoInitialPruning() that + * runs as part of performing AcquireExecutorLocks() on a given plan tree. + * Expressions that do involve such Params require us to prune separately for + * each scan of the parent plan node. * * Note that pruning away unneeded subplans during executor startup has the * added benefit of not having to initialize the unneeded subplans at all. @@ -1605,6 +1613,13 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) * account for initial pruning possibly having eliminated some of the * subplans. * + * ExecPartitionDoInitialPruning: + * Do initial pruning with the information contained in a given + * PartitionPruneInfo to determine the minimal set of child subplans + * to be executed of the parent plan node to which the PartitionPruneInfo + * belongs and also the set of the RT indexes of leaf partitions that will + * be scanned with those subplans. + * * ExecFindMatchingSubPlans: * Returns indexes of matching subplans after evaluating the expressions * that are safe to evaluate at a given point. This function is first @@ -1622,8 +1637,9 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) * * On return, *initially_valid_subplans is assigned the set of indexes of * child subplans that must be initialized along with the parent plan node. - * Initial pruning is performed here if needed and in that case only the - * surviving subplans' indexes are added. + * Initial pruning is performed here if needed (unless it has already been done + * by ExecDoInitialPruning()), and in that case only the surviving subplans' + * indexes are added. * * If subplans are indeed pruned, subplan_map arrays contained in the returned * PartitionPruneState are re-sequenced to not count those, though only if the @@ -1632,23 +1648,59 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) PartitionPruneState * ExecInitPartitionPruning(PlanState *planstate, int n_total_subplans, - PartitionPruneInfo *pruneinfo, + int part_prune_index, Bitmapset **initially_valid_subplans) { - PartitionPruneState *prunestate; EState *estate = planstate->state; + PartitionPruneInfo *pruneinfo = list_nth(estate->es_part_prune_infos, + part_prune_index); + PartitionPruneResult *pruneresult = estate->es_part_prune_result; + PartitionPruneState *prunestate; + bool do_pruning = (pruneinfo->needs_init_pruning || + pruneinfo->needs_exec_pruning); - /* We may need an expression context to evaluate partition exprs */ - ExecAssignExprContext(estate, planstate); + /* + * No need to do initial pruning if it was done already by + * ExecutorDoInitialPruning(), which it would be if es_part_prune_result + * has been set. + */ + if (pruneresult) + do_pruning = pruneinfo->needs_exec_pruning; + + prunestate = NULL; + if (do_pruning) + { + /* We may need an expression context to evaluate partition exprs */ + ExecAssignExprContext(estate, planstate); - /* Create the working data structure for pruning */ - prunestate = CreatePartitionPruneState(planstate, pruneinfo); + /* For data reading, executor always omits detached partitions */ + if (estate->es_partition_directory == NULL) + estate->es_partition_directory = + CreatePartitionDirectory(estate->es_query_cxt, false); + + /* + * Create the working data structure for pruning. No need to consider + * initial pruning steps if we have a PartitionPruneResult. + */ + prunestate = CreatePartitionPruneState(planstate, pruneinfo, + pruneresult == NULL, true, + NIL, planstate->ps_ExprContext, + estate->es_partition_directory); + } /* * Perform an initial partition prune pass, if required. */ - if (prunestate->do_initial_prune) - *initially_valid_subplans = ExecFindMatchingSubPlans(prunestate, true); + if (pruneresult) + { + *initially_valid_subplans = + list_nth(pruneresult->valid_subplan_offs_list, part_prune_index); + } + else if (prunestate && prunestate->do_initial_prune) + { + *initially_valid_subplans = ExecFindMatchingSubPlans(prunestate, true, + NULL); + } else { /* No pruning, so we'll need to initialize all subplans */ @@ -1669,7 +1721,7 @@ ExecInitPartitionPruning(PlanState *planstate, * leaves invalid data in prunestate, because that data won't be * consulted again (cf initial Assert in ExecFindMatchingSubPlans). */ - if (prunestate->do_exec_prune) + if (prunestate && prunestate->do_exec_prune) PartitionPruneFixSubPlanMap(prunestate, *initially_valid_subplans, n_total_subplans); @@ -1678,11 +1730,72 @@ ExecInitPartitionPruning(PlanState *planstate, return prunestate; } +/* + * ExecPartitionDoInitialPruning + * Perform initial pruning using given PartitionPruneInfo to determine + * the minimal set of child subplans to be executed of the parent plan + * node to which the PartitionPruneInfo belongs and also the set of RT + * indexes of leaf partitions that will scanned with those subplans. + */ +Bitmapset * +ExecPartitionDoInitialPruning(PlannedStmt *plannedstmt, ParamListInfo params, + PartitionPruneInfo *pruneinfo, + Bitmapset **scan_leafpart_rtis) +{ + List *rtable = plannedstmt->rtable; + ExprContext *econtext; + PartitionDirectory pdir; + MemoryContext oldcontext, + tmpcontext; + PartitionPruneState *prunestate; + Bitmapset *valid_subplan_offs; + + /* + * A temporary context to allocate stuff needded to run the pruning steps. + */ + tmpcontext = AllocSetContextCreate(CurrentMemoryContext, + "initial pruning working data", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(tmpcontext); + + /* + * PartitionDirectory to look up partition descriptors, which omits + * detached partitions, just like in the executor proper. + */ + pdir = CreatePartitionDirectory(CurrentMemoryContext, false); + + /* + * We don't yet have a PlanState for the parent plan node, so must create + * a standalone ExprContext to evaluate pruning expressions, equipped with + * the information about the EXTERN parameters that the caller passed us. + * Note that that's okay because the initial pruning steps do not contain + * anything that requires the execution to have started. + */ + econtext = CreateStandaloneExprContext(); + econtext->ecxt_param_list_info = params; + prunestate = CreatePartitionPruneState(NULL, pruneinfo, true, false, + rtable, econtext, pdir); + MemoryContextSwitchTo(oldcontext); + + /* Do the initial pruning. */ + valid_subplan_offs = ExecFindMatchingSubPlans(prunestate, true, + scan_leafpart_rtis); + + FreeExprContext(econtext, true); + DestroyPartitionDirectory(pdir); + MemoryContextDelete(tmpcontext); + + return valid_subplan_offs; +} + /* * CreatePartitionPruneState * Build the data structure required for calling ExecFindMatchingSubPlans * - * 'planstate' is the parent plan node's execution state. + * 'planstate', if not NULL, is the parent plan node's execution state. It + * can be NULL if being called before ExecutorStart(), in which case, + * 'rtable' (range table), 'econtext', and 'partdir' must be explicitly + * provided. * * 'pruneinfo' is a PartitionPruneInfo as generated by * make_partition_pruneinfo. Here we build a PartitionPruneState containing a @@ -1696,19 +1809,21 @@ ExecInitPartitionPruning(PlanState *planstate, * PartitionedRelPruneInfo. */ static PartitionPruneState * -CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) +CreatePartitionPruneState(PlanState *planstate, + PartitionPruneInfo *pruneinfo, + bool consider_initial_steps, + bool consider_exec_steps, + List *rtable, ExprContext *econtext, + PartitionDirectory partdir) { - EState *estate = planstate->state; + EState *estate = planstate ? planstate->state : NULL; PartitionPruneState *prunestate; int n_part_hierarchies; ListCell *lc; int i; - ExprContext *econtext = planstate->ps_ExprContext; - /* For data reading, executor always omits detached partitions */ - if (estate->es_partition_directory == NULL) - estate->es_partition_directory = - CreatePartitionDirectory(estate->es_query_cxt, false); + Assert((estate != NULL) || + (partdir != NULL && econtext != NULL && rtable != NIL)); n_part_hierarchies = list_length(pruneinfo->prune_infos); Assert(n_part_hierarchies > 0); @@ -1759,19 +1874,48 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2); PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j]; Relation partrel; + bool close_partrel = false; PartitionDesc partdesc; PartitionKey partkey; /* - * We can rely on the copies of the partitioned table's partition - * key and partition descriptor appearing in its relcache entry, - * because that entry will be held open and locked for the - * duration of this executor run. + * Must open the relation by ourselves when called before the + * execution has started, such as, when called during + * ExecutorDoInitialPruning() on a cached plan. In that case, + * sub-partitions must be locked, because AcquirePlannerLocks() + * would not have seen them. (1st relation in a partrelpruneinfos + * list is always the root partitioned table appearing in the + * query, which AcquirePlannerLocks() would have locked; the + * Assert in relation_open() guards that assumption.) + */ + if (estate == NULL) + { + RangeTblEntry *rte = rt_fetch(pinfo->rtindex, rtable); + int lockmode = (j == 0) ? NoLock : rte->rellockmode; + + partrel = table_open(rte->relid, lockmode); + close_partrel = true; + } + else + partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); + + /* + * We can rely on the copy of the partitioned table's partition + * key from in its relcache entry, because it can't change (or + * get destroyed) as long as the relation is locked. Partition + * descriptor is taken from the PartitionDirectory associated with + * the table that is held open long enough for the descriptor to + * remain valid while it's used to perform the pruning steps. */ - partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); partkey = RelationGetPartitionKey(partrel); - partdesc = PartitionDirectoryLookup(estate->es_partition_directory, - partrel); + partdesc = PartitionDirectoryLookup(partdir, partrel); + + /* + * Must close partrel, keeping the lock taken, if we're not using + * EState's entry. + */ + if (close_partrel) + table_close(partrel, NoLock); /* * Initialize the subplan_map and subpart_map. @@ -1785,6 +1929,7 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) Assert(partdesc->nparts >= pinfo->nparts); pprune->nparts = partdesc->nparts; pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts); + pprune->rti_map = palloc(sizeof(Index) * partdesc->nparts); if (partdesc->nparts == pinfo->nparts) { /* @@ -1795,6 +1940,8 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) pprune->subpart_map = pinfo->subpart_map; memcpy(pprune->subplan_map, pinfo->subplan_map, sizeof(int) * pinfo->nparts); + memcpy(pprune->rti_map, pinfo->rti_map, + sizeof(int) * pinfo->nparts); /* * Double-check that the list of unpruned relations has not @@ -1845,6 +1992,8 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) pinfo->subplan_map[pd_idx]; pprune->subpart_map[pp_idx] = pinfo->subpart_map[pd_idx]; + pprune->rti_map[pp_idx] = + pinfo->rti_map[pd_idx]; pd_idx++; } else @@ -1852,6 +2001,7 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) /* this partdesc entry is not in the plan */ pprune->subplan_map[pp_idx] = -1; pprune->subpart_map[pp_idx] = -1; + pprune->rti_map[pp_idx] = 0; } } @@ -1873,7 +2023,7 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) * Initialize pruning contexts as needed. */ pprune->initial_pruning_steps = pinfo->initial_pruning_steps; - if (pinfo->initial_pruning_steps) + if (consider_initial_steps && pinfo->initial_pruning_steps) { InitPartitionPruneContext(&pprune->initial_context, pinfo->initial_pruning_steps, @@ -1883,7 +2033,7 @@ CreatePartitionPruneState(PlanState *planstate, PartitionPruneInfo *pruneinfo) prunestate->do_initial_prune = true; } pprune->exec_pruning_steps = pinfo->exec_pruning_steps; - if (pinfo->exec_pruning_steps) + if (consider_exec_steps && pinfo->exec_pruning_steps) { InitPartitionPruneContext(&pprune->exec_context, pinfo->exec_pruning_steps, @@ -2111,10 +2261,14 @@ PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate, * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This * differentiates the initial executor-time pruning step from later * runtime pruning. + * + * RT indexes of leaf partitions scanned by the chosen subplans are added to + * *scan_leafpart_rtis if the pointer is non-NULL. */ Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune) + bool initial_prune, + Bitmapset **scan_leafpart_rtis) { Bitmapset *result = NULL; MemoryContext oldcontext; @@ -2149,7 +2303,7 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, */ pprune = &prunedata->partrelprunedata[0]; find_matching_subplans_recurse(prunedata, pprune, initial_prune, - &result); + &result, scan_leafpart_rtis); /* Expression eval may have used space in ExprContext too */ if (pprune->exec_pruning_steps) @@ -2163,6 +2317,8 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + if (scan_leafpart_rtis) + *scan_leafpart_rtis = bms_copy(*scan_leafpart_rtis); MemoryContextReset(prunestate->prune_context); @@ -2173,13 +2329,15 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, * find_matching_subplans_recurse * Recursive worker function for ExecFindMatchingSubPlans * - * Adds valid (non-prunable) subplan IDs to *validsubplans + * Adds valid (non-prunable) subplan IDs to *validsubplans and RT indexes of + * of the corresponding leaf partitions to *scan_leafpart_rtis (if asked for). */ static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans) + Bitmapset **validsubplans, + Bitmapset **scan_leafpart_rtis) { Bitmapset *partset; int i; @@ -2206,8 +2364,13 @@ find_matching_subplans_recurse(PartitionPruningData *prunedata, while ((i = bms_next_member(partset, i)) >= 0) { if (pprune->subplan_map[i] >= 0) + { *validsubplans = bms_add_member(*validsubplans, pprune->subplan_map[i]); + if (scan_leafpart_rtis && pprune->rti_map[i] > 0) + *scan_leafpart_rtis = bms_add_member(*scan_leafpart_rtis, + pprune->rti_map[i]); + } else { int partidx = pprune->subpart_map[i]; @@ -2215,7 +2378,8 @@ find_matching_subplans_recurse(PartitionPruningData *prunedata, if (partidx >= 0) find_matching_subplans_recurse(prunedata, &prunedata->partrelprunedata[partidx], - initial_prune, validsubplans); + initial_prune, validsubplans, + scan_leafpart_rtis); else { /* diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 9df1f81ea8..639145abe9 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -119,6 +119,7 @@ CreateExecutorState(void) estate->es_relations = NULL; estate->es_rowmarks = NULL; estate->es_plannedstmt = NULL; + estate->es_part_prune_result = NULL; estate->es_junkFilter = NULL; diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index f9460ae506..a2182a6b1f 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -844,7 +844,7 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache) else dest = None_Receiver; - es->qd = CreateQueryDesc(es->stmt, + es->qd = CreateQueryDesc(es->stmt, NULL, fcache->src, GetActiveSnapshot(), InvalidSnapshot, diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index 357e10a1d7..09f26658e2 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -94,6 +94,7 @@ static bool ExecAppendAsyncRequest(AppendState *node, TupleTableSlot **result); static void ExecAppendAsyncEventWait(AppendState *node); static void classify_matching_subplans(AppendState *node); + /* ---------------------------------------------------------------- * ExecInitAppend * @@ -134,7 +135,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) appendstate->as_begun = false; /* If run-time partition pruning is enabled, then set that up now */ - if (node->part_prune_info != NULL) + if (node->part_prune_index >= 0) { PartitionPruneState *prunestate; @@ -145,7 +146,7 @@ ExecInitAppend(Append *node, EState *estate, int eflags) */ prunestate = ExecInitPartitionPruning(&appendstate->ps, list_length(node->appendplans), - node->part_prune_info, + node->part_prune_index, &validsubplans); appendstate->as_prune_state = prunestate; nplans = bms_num_members(validsubplans); @@ -155,7 +156,8 @@ ExecInitAppend(Append *node, EState *estate, int eflags) * subplan, we can fill as_valid_subplans immediately, preventing * later calls to ExecFindMatchingSubPlans. */ - if (!prunestate->do_exec_prune && nplans > 0) + if (appendstate->as_prune_state == NULL || + (!appendstate->as_prune_state->do_exec_prune && nplans > 0)) appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1); } else @@ -577,7 +579,7 @@ choose_next_subplan_locally(AppendState *node) } else if (node->as_valid_subplans == NULL) node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); whichplan = -1; } @@ -642,7 +644,7 @@ choose_next_subplan_for_leader(AppendState *node) if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); /* * Mark each invalid plan as finished to allow the loop below to @@ -717,7 +719,7 @@ choose_next_subplan_for_worker(AppendState *node) else if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); mark_invalid_subplans_as_finished(node); } @@ -868,7 +870,7 @@ ExecAppendAsyncBegin(AppendState *node) if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); classify_matching_subplans(node); } diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index ecf9052e03..7708cfffda 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -82,7 +82,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) mergestate->ps.ExecProcNode = ExecMergeAppend; /* If run-time partition pruning is enabled, then set that up now */ - if (node->part_prune_info != NULL) + if (node->part_prune_index >= 0) { PartitionPruneState *prunestate; @@ -93,7 +93,7 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) */ prunestate = ExecInitPartitionPruning(&mergestate->ps, list_length(node->mergeplans), - node->part_prune_info, + node->part_prune_index, &validsubplans); mergestate->ms_prune_state = prunestate; nplans = bms_num_members(validsubplans); @@ -103,7 +103,8 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) * subplan, we can fill as_valid_subplans immediately, preventing * later calls to ExecFindMatchingSubPlans. */ - if (!prunestate->do_exec_prune && nplans > 0) + if (mergestate->ms_prune_state == NULL || + (!mergestate->ms_prune_state->do_exec_prune && nplans > 0)) mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1); } else @@ -218,7 +219,7 @@ ExecMergeAppend(PlanState *pstate) */ if (node->ms_valid_subplans == NULL) node->ms_valid_subplans = - ExecFindMatchingSubPlans(node->ms_prune_state, false); + ExecFindMatchingSubPlans(node->ms_prune_state, false, NULL); /* * First time through: pull the first tuple from each valid subplan, diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 042a5f8b0a..729e2fd7b2 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1578,6 +1578,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, CachedPlanSource *plansource; CachedPlan *cplan; List *stmt_list; + List *part_prune_result_list; char *query_string; Snapshot snapshot; MemoryContext oldcontext; @@ -1657,7 +1658,10 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, */ /* Replan if needed, and increment plan refcount for portal */ - cplan = GetCachedPlan(plansource, paramLI, NULL, _SPI_current->queryEnv); + cplan = GetCachedPlan(plansource, paramLI, NULL, _SPI_current->queryEnv, + &part_prune_result_list); + Assert(list_length(cplan->stmt_list) == + list_length(part_prune_result_list)); stmt_list = cplan->stmt_list; if (!plan->saved) @@ -1685,6 +1689,9 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, stmt_list, cplan); + /* Copy PartitionPruneResults into the portal's context. */ + PortalStorePartitionPruneResults(portal, part_prune_result_list); + /* * Set up options for portal. Default SCROLL type is chosen the same way * as PerformCursorOpen does it. @@ -2092,7 +2099,8 @@ SPI_plan_get_cached_plan(SPIPlanPtr plan) /* Get the generic plan for the query */ cplan = GetCachedPlan(plansource, NULL, plan->saved ? CurrentResourceOwner : NULL, - _SPI_current->queryEnv); + _SPI_current->queryEnv, + NULL /* Not interested in PartitionPruneResults */); Assert(cplan == plansource->gplan); /* Pop the error context stack */ @@ -2473,7 +2481,9 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, { CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); List *stmt_list; - ListCell *lc2; + List *part_prune_result_list; + ListCell *lc2, + *lc3; spicallbackarg.query = plansource->query_string; @@ -2549,8 +2559,10 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, * plan, the refcount must be backed by the plan_owner. */ cplan = GetCachedPlan(plansource, options->params, - plan_owner, _SPI_current->queryEnv); - + plan_owner, _SPI_current->queryEnv, + &part_prune_result_list); + Assert(list_length(cplan->stmt_list) == + list_length(part_prune_result_list)); stmt_list = cplan->stmt_list; /* @@ -2589,9 +2601,10 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, } } - foreach(lc2, stmt_list) + forboth(lc2, stmt_list, lc3, part_prune_result_list) { PlannedStmt *stmt = lfirst_node(PlannedStmt, lc2); + PartitionPruneResult *part_prune_result = lfirst_node(PartitionPruneResult, lc3); bool canSetTag = stmt->canSetTag; DestReceiver *dest; @@ -2663,7 +2676,7 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, else snap = InvalidSnapshot; - qdesc = CreateQueryDesc(stmt, + qdesc = CreateQueryDesc(stmt, part_prune_result, plansource->query_string, snap, crosscheck_snapshot, dest, diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 46a1943d97..9642e74ef1 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -96,7 +96,10 @@ _copyPlannedStmt(const PlannedStmt *from) COPY_SCALAR_FIELD(parallelModeNeeded); COPY_SCALAR_FIELD(jitFlags); COPY_NODE_FIELD(planTree); + COPY_NODE_FIELD(partPruneInfos); + COPY_SCALAR_FIELD(containsInitialPruning); COPY_NODE_FIELD(rtable); + COPY_BITMAPSET_FIELD(minLockRelids); COPY_NODE_FIELD(resultRelations); COPY_NODE_FIELD(appendRelations); COPY_NODE_FIELD(subplans); @@ -253,7 +256,7 @@ _copyAppend(const Append *from) COPY_NODE_FIELD(appendplans); COPY_SCALAR_FIELD(nasyncplans); COPY_SCALAR_FIELD(first_partial_plan); - COPY_NODE_FIELD(part_prune_info); + COPY_SCALAR_FIELD(part_prune_index); return newnode; } @@ -281,7 +284,7 @@ _copyMergeAppend(const MergeAppend *from) COPY_POINTER_FIELD(sortOperators, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(collations, from->numCols * sizeof(Oid)); COPY_POINTER_FIELD(nullsFirst, from->numCols * sizeof(bool)); - COPY_NODE_FIELD(part_prune_info); + COPY_SCALAR_FIELD(part_prune_index); return newnode; } @@ -1280,6 +1283,8 @@ _copyPartitionPruneInfo(const PartitionPruneInfo *from) PartitionPruneInfo *newnode = makeNode(PartitionPruneInfo); COPY_NODE_FIELD(prune_infos); + COPY_SCALAR_FIELD(needs_init_pruning); + COPY_SCALAR_FIELD(needs_exec_pruning); COPY_BITMAPSET_FIELD(other_subplans); return newnode; @@ -1296,6 +1301,7 @@ _copyPartitionedRelPruneInfo(const PartitionedRelPruneInfo *from) COPY_POINTER_FIELD(subplan_map, from->nparts * sizeof(int)); COPY_POINTER_FIELD(subpart_map, from->nparts * sizeof(int)); COPY_POINTER_FIELD(relid_map, from->nparts * sizeof(Oid)); + COPY_POINTER_FIELD(rti_map, from->nparts * sizeof(Index)); COPY_NODE_FIELD(initial_pruning_steps); COPY_NODE_FIELD(exec_pruning_steps); COPY_BITMAPSET_FIELD(execparamids); @@ -5469,6 +5475,21 @@ _copyExtensibleNode(const ExtensibleNode *from) return newnode; } +/* **************************************************************** + * execnodes.h copy functions + * **************************************************************** + */ +static PartitionPruneResult * +_copyPartitionPruneResult(const PartitionPruneResult *from) +{ + PartitionPruneResult *newnode = makeNode(PartitionPruneResult); + + COPY_NODE_FIELD(valid_subplan_offs_list); + COPY_BITMAPSET_FIELD(scan_leafpart_rtis); + + return newnode; +} + /* **************************************************************** * value.h copy functions * **************************************************************** @@ -5523,7 +5544,6 @@ _copyBitString(const BitString *from) return newnode; } - static ForeignKeyCacheInfo * _copyForeignKeyCacheInfo(const ForeignKeyCacheInfo *from) { @@ -6565,6 +6585,13 @@ copyObjectImpl(const void *from) retval = _copyPublicationTable(from); break; + /* + * EXECUTION NODES + */ + case T_PartitionPruneResult: + retval = _copyPartitionPruneResult(from); + break; + /* * MISCELLANEOUS NODES */ diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 13e1643530..0cbcbc8ed4 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -314,7 +314,10 @@ _outPlannedStmt(StringInfo str, const PlannedStmt *node) WRITE_BOOL_FIELD(parallelModeNeeded); WRITE_INT_FIELD(jitFlags); WRITE_NODE_FIELD(planTree); + WRITE_NODE_FIELD(partPruneInfos); + WRITE_BOOL_FIELD(containsInitialPruning); WRITE_NODE_FIELD(rtable); + WRITE_BITMAPSET_FIELD(minLockRelids); WRITE_NODE_FIELD(resultRelations); WRITE_NODE_FIELD(appendRelations); WRITE_NODE_FIELD(subplans); @@ -443,7 +446,7 @@ _outAppend(StringInfo str, const Append *node) WRITE_NODE_FIELD(appendplans); WRITE_INT_FIELD(nasyncplans); WRITE_INT_FIELD(first_partial_plan); - WRITE_NODE_FIELD(part_prune_info); + WRITE_INT_FIELD(part_prune_index); } static void @@ -460,7 +463,7 @@ _outMergeAppend(StringInfo str, const MergeAppend *node) WRITE_OID_ARRAY(sortOperators, node->numCols); WRITE_OID_ARRAY(collations, node->numCols); WRITE_BOOL_ARRAY(nullsFirst, node->numCols); - WRITE_NODE_FIELD(part_prune_info); + WRITE_INT_FIELD(part_prune_index); } static void @@ -1006,6 +1009,8 @@ _outPartitionPruneInfo(StringInfo str, const PartitionPruneInfo *node) WRITE_NODE_TYPE("PARTITIONPRUNEINFO"); WRITE_NODE_FIELD(prune_infos); + WRITE_BOOL_FIELD(needs_init_pruning); + WRITE_BOOL_FIELD(needs_exec_pruning); WRITE_BITMAPSET_FIELD(other_subplans); } @@ -1020,6 +1025,7 @@ _outPartitionedRelPruneInfo(StringInfo str, const PartitionedRelPruneInfo *node) WRITE_INT_ARRAY(subplan_map, node->nparts); WRITE_INT_ARRAY(subpart_map, node->nparts); WRITE_OID_ARRAY(relid_map, node->nparts); + WRITE_INDEX_ARRAY(rti_map, node->nparts); WRITE_NODE_FIELD(initial_pruning_steps); WRITE_NODE_FIELD(exec_pruning_steps); WRITE_BITMAPSET_FIELD(execparamids); @@ -2420,6 +2426,9 @@ _outPlannerGlobal(StringInfo str, const PlannerGlobal *node) WRITE_NODE_FIELD(finalrowmarks); WRITE_NODE_FIELD(resultRelations); WRITE_NODE_FIELD(appendRelations); + WRITE_NODE_FIELD(partPruneInfos); + WRITE_BOOL_FIELD(containsInitialPruning); + WRITE_BITMAPSET_FIELD(minLockRelids); WRITE_NODE_FIELD(relationOids); WRITE_NODE_FIELD(invalItems); WRITE_NODE_FIELD(paramExecTypes); @@ -2487,6 +2496,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node) WRITE_BITMAPSET_FIELD(curOuterRels); WRITE_NODE_FIELD(curOuterParams); WRITE_BOOL_FIELD(partColsUpdated); + WRITE_NODE_FIELD(partPruneInfos); } static void @@ -2840,6 +2850,21 @@ _outExtensibleNode(StringInfo str, const ExtensibleNode *node) methods->nodeOut(str, node); } +/***************************************************************************** + * + * Stuff from execnodes.h + * + *****************************************************************************/ + +static void +_outPartitionPruneResult(StringInfo str, const PartitionPruneResult *node) +{ + WRITE_NODE_TYPE("PARTITIONPRUNERESULT"); + + WRITE_NODE_FIELD(valid_subplan_offs_list); + WRITE_BITMAPSET_FIELD(scan_leafpart_rtis); +} + /***************************************************************************** * * Stuff from parsenodes.h. @@ -4748,6 +4773,13 @@ outNode(StringInfo str, const void *obj) _outJsonTableSibling(str, obj); break; + /* + * EXECUTION NODES + */ + case T_PartitionPruneResult: + _outPartitionPruneResult(str, obj); + break; + default: /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 48f7216c9e..25e1df7068 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -164,6 +164,11 @@ token = pg_strtok(&length); /* skip :fldname */ \ local_node->fldname = readIntCols(len) +/* Read an Index array */ +#define READ_INDEX_ARRAY(fldname, len) \ + token = pg_strtok(&length); /* skip :fldname */ \ + local_node->fldname = readIndexCols(len) + /* Read a bool array */ #define READ_BOOL_ARRAY(fldname, len) \ token = pg_strtok(&length); /* skip :fldname */ \ @@ -1814,7 +1819,10 @@ _readPlannedStmt(void) READ_BOOL_FIELD(parallelModeNeeded); READ_INT_FIELD(jitFlags); READ_NODE_FIELD(planTree); + READ_NODE_FIELD(partPruneInfos); + READ_BOOL_FIELD(containsInitialPruning); READ_NODE_FIELD(rtable); + READ_BITMAPSET_FIELD(minLockRelids); READ_NODE_FIELD(resultRelations); READ_NODE_FIELD(appendRelations); READ_NODE_FIELD(subplans); @@ -1946,7 +1954,7 @@ _readAppend(void) READ_NODE_FIELD(appendplans); READ_INT_FIELD(nasyncplans); READ_INT_FIELD(first_partial_plan); - READ_NODE_FIELD(part_prune_info); + READ_INT_FIELD(part_prune_index); READ_DONE(); } @@ -1968,7 +1976,7 @@ _readMergeAppend(void) READ_OID_ARRAY(sortOperators, local_node->numCols); READ_OID_ARRAY(collations, local_node->numCols); READ_BOOL_ARRAY(nullsFirst, local_node->numCols); - READ_NODE_FIELD(part_prune_info); + READ_INT_FIELD(part_prune_index); READ_DONE(); } @@ -2763,6 +2771,8 @@ _readPartitionPruneInfo(void) READ_LOCALS(PartitionPruneInfo); READ_NODE_FIELD(prune_infos); + READ_BOOL_FIELD(needs_init_pruning); + READ_BOOL_FIELD(needs_exec_pruning); READ_BITMAPSET_FIELD(other_subplans); READ_DONE(); @@ -2779,6 +2789,7 @@ _readPartitionedRelPruneInfo(void) READ_INT_ARRAY(subplan_map, local_node->nparts); READ_INT_ARRAY(subpart_map, local_node->nparts); READ_OID_ARRAY(relid_map, local_node->nparts); + READ_INDEX_ARRAY(rti_map, local_node->nparts); READ_NODE_FIELD(initial_pruning_steps); READ_NODE_FIELD(exec_pruning_steps); READ_BITMAPSET_FIELD(execparamids); @@ -2932,6 +2943,21 @@ _readPartitionRangeDatum(void) READ_DONE(); } + +/* + * _readPartitionPruneResult + */ +static PartitionPruneResult * +_readPartitionPruneResult(void) +{ + READ_LOCALS(PartitionPruneResult); + + READ_NODE_FIELD(valid_subplan_offs_list); + READ_BITMAPSET_FIELD(scan_leafpart_rtis); + + READ_DONE(); +} + /* * parseNodeString * @@ -3229,6 +3255,8 @@ parseNodeString(void) return_value = _readJsonTableParent(); else if (MATCH("JSONTABSNODE", 12)) return_value = _readJsonTableSibling(); + else if (MATCH("PARTITIONPRUNERESULT", 20)) + return_value = _readPartitionPruneResult(); else { elog(ERROR, "badly formatted node string \"%.32s\"...", token); @@ -3372,6 +3400,30 @@ readIntCols(int numCols) return int_vals; } +/* + * readIndexCols + */ +Index * +readIndexCols(int numCols) +{ + int tokenLength, + i; + const char *token; + Index *index_vals; + + if (numCols <= 0) + return NULL; + + index_vals = (Index *) palloc(numCols * sizeof(Index)); + for (i = 0; i < numCols; i++) + { + token = pg_strtok(&tokenLength); + index_vals[i] = atoui(token); + } + + return index_vals; +} + /* * readBoolCols */ diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 51591bb812..e7f977fb96 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1183,7 +1183,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) ListCell *subpaths; int nasyncplans = 0; RelOptInfo *rel = best_path->path.parent; - PartitionPruneInfo *partpruneinfo = NULL; + int part_prune_index = -1; int nodenumsortkeys = 0; AttrNumber *nodeSortColIdx = NULL; Oid *nodeSortOperators = NULL; @@ -1357,16 +1357,17 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) } if (prunequal != NIL) - partpruneinfo = - make_partition_pruneinfo(root, rel, - best_path->subpaths, - prunequal); + part_prune_index= make_partition_pruneinfo(root, rel, + best_path->subpaths, + prunequal); } plan->appendplans = subplans; plan->nasyncplans = nasyncplans; plan->first_partial_plan = best_path->first_partial_path; - plan->part_prune_info = partpruneinfo; + + /* Will be updated later in set_plan_references(). */ + plan->part_prune_index = part_prune_index; copy_generic_path_info(&plan->plan, (Path *) best_path); @@ -1406,7 +1407,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, List *subplans = NIL; ListCell *subpaths; RelOptInfo *rel = best_path->path.parent; - PartitionPruneInfo *partpruneinfo = NULL; + int part_prune_index = -1; /* * We don't have the actual creation of the MergeAppend node split out @@ -1522,13 +1523,15 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, } if (prunequal != NIL) - partpruneinfo = make_partition_pruneinfo(root, rel, - best_path->subpaths, - prunequal); + part_prune_index= make_partition_pruneinfo(root, rel, + best_path->subpaths, + prunequal); } node->mergeplans = subplans; - node->part_prune_info = partpruneinfo; + + /* Will be updated later in set_plan_references(). */ + node->part_prune_index = part_prune_index; /* * If prepare_sort_from_pathkeys added sort columns, but we were told to diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index b2569c5d0c..2aa051d862 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -518,7 +518,10 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->dependsOnRole = glob->dependsOnRole; result->parallelModeNeeded = glob->parallelModeNeeded; result->planTree = top_plan; + result->partPruneInfos = glob->partPruneInfos; + result->containsInitialPruning = glob->containsInitialPruning; result->rtable = glob->finalrtable; + result->minLockRelids = glob->minLockRelids; result->resultRelations = glob->resultRelations; result->appendRelations = glob->appendRelations; result->subplans = glob->subplans; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 7519723081..fc66986e1c 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -251,7 +251,7 @@ set_plan_references(PlannerInfo *root, Plan *plan) Plan *result; PlannerGlobal *glob = root->glob; int rtoffset = list_length(glob->finalrtable); - ListCell *lc; + ListCell *lc; /* * Add all the query's RTEs to the flattened rangetable. The live ones @@ -260,6 +260,16 @@ set_plan_references(PlannerInfo *root, Plan *plan) */ add_rtes_to_flat_rtable(root, false); + /* + * Add the query's adjusted range of RT indexes to glob->minLockRelids. + * The adjusted RT indexes of prunable relations will be deleted from the + * set below where PartitionPruneInfos are processed. + */ + glob->minLockRelids = + bms_add_range(glob->minLockRelids, + rtoffset + 1, + rtoffset + list_length(root->parse->rtable)); + /* * Adjust RT indexes of PlanRowMarks and add to final rowmarks list */ @@ -338,6 +348,56 @@ set_plan_references(PlannerInfo *root, Plan *plan) } } + /* Also fix up the information in PartitionPruneInfos. */ + foreach (lc, root->partPruneInfos) + { + PartitionPruneInfo *pruneinfo = lfirst(lc); + Bitmapset *leafpart_rtis = NULL; + ListCell *l; + + foreach(l, pruneinfo->prune_infos) + { + List *prune_infos = lfirst(l); + ListCell *l2; + + foreach(l2, prune_infos) + { + PartitionedRelPruneInfo *pinfo = lfirst(l2); + int i; + + /* RT index of the partitione table. */ + pinfo->rtindex += rtoffset; + + /* And also those of the leaf partitions. */ + for (i = 0; i < pinfo->nparts; i++) + { + if (pinfo->rti_map[i] > 0) + { + pinfo->rti_map[i] += rtoffset; + leafpart_rtis = bms_add_member(leafpart_rtis, + pinfo->rti_map[i]); + } + } + } + } + + if (pruneinfo->needs_init_pruning) + { + glob->containsInitialPruning = true; + + /* + * Delete the leaf partition RTIs from the global set of relations + * to be locked before executing the plan. AcquireExecutorLocks() + * will find the ones to add to the set after performing initial + * pruning. + */ + glob->minLockRelids = bms_del_members(glob->minLockRelids, + leafpart_rtis); + } + + glob->partPruneInfos = lappend(glob->partPruneInfos, pruneinfo); + } + return result; } @@ -1610,21 +1670,12 @@ set_append_references(PlannerInfo *root, aplan->apprelids = offset_relid_set(aplan->apprelids, rtoffset); - if (aplan->part_prune_info) - { - foreach(l, aplan->part_prune_info->prune_infos) - { - List *prune_infos = lfirst(l); - ListCell *l2; - - foreach(l2, prune_infos) - { - PartitionedRelPruneInfo *pinfo = lfirst(l2); - - pinfo->rtindex += rtoffset; - } - } - } + /* + * PartitionPruneInfos will be added to a list in PlannerGlobal, so update + * the index. + */ + if (aplan->part_prune_index >= 0) + aplan->part_prune_index += list_length(root->glob->partPruneInfos); /* We don't need to recurse to lefttree or righttree ... */ Assert(aplan->plan.lefttree == NULL); @@ -1682,21 +1733,12 @@ set_mergeappend_references(PlannerInfo *root, mplan->apprelids = offset_relid_set(mplan->apprelids, rtoffset); - if (mplan->part_prune_info) - { - foreach(l, mplan->part_prune_info->prune_infos) - { - List *prune_infos = lfirst(l); - ListCell *l2; - - foreach(l2, prune_infos) - { - PartitionedRelPruneInfo *pinfo = lfirst(l2); - - pinfo->rtindex += rtoffset; - } - } - } + /* + * PartitionPruneInfos will be added to a list in PlannerGlobal, so update + * the index. + */ + if (mplan->part_prune_index >= 0) + mplan->part_prune_index += list_length(root->glob->partPruneInfos); /* We don't need to recurse to lefttree or righttree ... */ Assert(mplan->plan.lefttree == NULL); diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 9d3c05aed3..5a5f5dee46 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -144,7 +144,9 @@ static List *make_partitionedrel_pruneinfo(PlannerInfo *root, List *prunequal, Bitmapset *partrelids, int *relid_subplan_map, - Bitmapset **matchedsubplans); + Bitmapset **matchedsubplans, + bool *needs_init_pruning, + bool *needs_exec_pruning); static void gen_partprune_steps(RelOptInfo *rel, List *clauses, PartClauseTarget target, GeneratePruningStepsContext *context); @@ -209,16 +211,20 @@ static void partkey_datum_from_expr(PartitionPruneContext *context, /* * make_partition_pruneinfo - * Builds a PartitionPruneInfo which can be used in the executor to allow - * additional partition pruning to take place. Returns NULL when - * partition pruning would be useless. + * Checks if the given set of quals can be used to build pruning steps + * that the executor will use to prune useless ones from given set of + * child paths, and if so builds a PartitionPruneInfo that will allow the + * executor to do do and append it to root->partPruneInfos. + * + * Return value is 0-based index of the added PartitionPruneInfo or -1 if one + * was not built after all. * * 'parentrel' is the RelOptInfo for an appendrel, and 'subpaths' is the list * of scan paths for its child rels. * 'prunequal' is a list of potential pruning quals (i.e., restriction * clauses that are applicable to the appendrel). */ -PartitionPruneInfo * +int make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *subpaths, List *prunequal) @@ -230,6 +236,8 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *relid_subplan_map; ListCell *lc; int i; + bool needs_init_pruning = false; + bool needs_exec_pruning = false; /* * Scan the subpaths to see which ones are scans of partition child @@ -309,12 +317,16 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, Bitmapset *partrelids = (Bitmapset *) lfirst(lc); List *pinfolist; Bitmapset *matchedsubplans = NULL; + bool partrel_needs_init_pruning; + bool partrel_needs_exec_pruning; pinfolist = make_partitionedrel_pruneinfo(root, parentrel, prunequal, partrelids, relid_subplan_map, - &matchedsubplans); + &matchedsubplans, + &partrel_needs_init_pruning, + &partrel_needs_exec_pruning); /* When pruning is possible, record the matched subplans */ if (pinfolist != NIL) @@ -323,6 +335,10 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, allmatchedsubplans = bms_join(matchedsubplans, allmatchedsubplans); } + if (!needs_init_pruning) + needs_init_pruning = partrel_needs_init_pruning; + if (!needs_exec_pruning) + needs_exec_pruning = partrel_needs_exec_pruning; } pfree(relid_subplan_map); @@ -332,11 +348,13 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, * quals, then we can just not bother with run-time pruning. */ if (prunerelinfos == NIL) - return NULL; + return -1; /* Else build the result data structure */ pruneinfo = makeNode(PartitionPruneInfo); pruneinfo->prune_infos = prunerelinfos; + pruneinfo->needs_init_pruning = needs_init_pruning; + pruneinfo->needs_exec_pruning = needs_exec_pruning; /* * Some subplans may not belong to any of the identified partitioned rels. @@ -358,7 +376,9 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, else pruneinfo->other_subplans = NULL; - return pruneinfo; + root->partPruneInfos = lappend(root->partPruneInfos, pruneinfo); + + return list_length(root->partPruneInfos) - 1; } /* @@ -435,13 +455,18 @@ add_part_relids(List *allpartrelids, Bitmapset *partrelids) * If we cannot find any useful run-time pruning steps, return NIL. * However, on success, each rel identified in partrelids will have * an element in the result list, even if some of them are useless. + * *needs_init_pruning and *needs_exec_pruning are set to indicate that the + * returned PartitionedRelPruneInfos contains pruning steps that can be + * performed before and after execution begins, respectively. */ static List * make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *prunequal, Bitmapset *partrelids, int *relid_subplan_map, - Bitmapset **matchedsubplans) + Bitmapset **matchedsubplans, + bool *needs_init_pruning, + bool *needs_exec_pruning) { RelOptInfo *targetpart = NULL; List *pinfolist = NIL; @@ -452,6 +477,10 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int rti; int i; + /* Will find out below. */ + *needs_init_pruning = false; + *needs_exec_pruning = false; + /* * Examine each partitioned rel, constructing a temporary array to map * from planner relids to index of the partitioned rel, and building a @@ -539,6 +568,9 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, * executor per-scan pruning steps. This first pass creates startup * pruning steps and detects whether there's any possibly-useful quals * that would require per-scan pruning. + * + * In the first pass, we note whether the 2nd pass is necessary by + * by noting the presence of EXEC parameters. */ gen_partprune_steps(subpart, partprunequal, PARTTARGET_INITIAL, &context); @@ -613,6 +645,11 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, pinfo->execparamids = execparamids; /* Remaining fields will be filled in the next loop */ + if (!*needs_init_pruning) + *needs_init_pruning = (initial_pruning_steps != NIL); + if (!*needs_exec_pruning) + *needs_exec_pruning = (exec_pruning_steps != NIL); + pinfolist = lappend(pinfolist, pinfo); } @@ -640,6 +677,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, int *subplan_map; int *subpart_map; Oid *relid_map; + Index *rti_map; /* * Construct the subplan and subpart maps for this partitioning level. @@ -652,6 +690,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, subpart_map = (int *) palloc(nparts * sizeof(int)); memset(subpart_map, -1, nparts * sizeof(int)); relid_map = (Oid *) palloc0(nparts * sizeof(Oid)); + rti_map = (Index *) palloc0(nparts * sizeof(Index)); present_parts = NULL; i = -1; @@ -666,6 +705,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, subplan_map[i] = subplanidx = relid_subplan_map[partrel->relid] - 1; subpart_map[i] = subpartidx = relid_subpart_map[partrel->relid] - 1; relid_map[i] = planner_rt_fetch(partrel->relid, root)->relid; + rti_map[i] = partrel->relid; if (subplanidx >= 0) { present_parts = bms_add_member(present_parts, i); @@ -690,6 +730,7 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, pinfo->subplan_map = subplan_map; pinfo->subpart_map = subpart_map; pinfo->relid_map = relid_map; + pinfo->rti_map = rti_map; } pfree(relid_subpart_map); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 95dc2e2c83..8dc52a158f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1603,6 +1603,7 @@ exec_bind_message(StringInfo input_message) int16 *rformats = NULL; CachedPlanSource *psrc; CachedPlan *cplan; + List *part_prune_result_list; Portal portal; char *query_string; char *saved_stmt_name; @@ -1978,7 +1979,9 @@ exec_bind_message(StringInfo input_message) * will be generated in MessageContext. The plan refcount will be * assigned to the Portal, so it will be released at portal destruction. */ - cplan = GetCachedPlan(psrc, params, NULL, NULL); + cplan = GetCachedPlan(psrc, params, NULL, NULL, &part_prune_result_list); + Assert(list_length(cplan->stmt_list) == + list_length(part_prune_result_list)); /* * Now we can define the portal. @@ -1993,6 +1996,9 @@ exec_bind_message(StringInfo input_message) cplan->stmt_list, cplan); + /* Copy PartitionPruneResults into the portal's context. */ + PortalStorePartitionPruneResults(portal, part_prune_result_list); + /* Done with the snapshot used for parameter I/O and parsing/planning */ if (snapshot_set) PopActiveSnapshot(); diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 5aa5a350f3..a627448a5a 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -35,7 +35,7 @@ Portal ActivePortal = NULL; -static void ProcessQuery(PlannedStmt *plan, +static void ProcessQuery(PlannedStmt *plan, PartitionPruneResult *part_prune_result, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, @@ -65,6 +65,7 @@ static void DoPortalRewind(Portal portal); */ QueryDesc * CreateQueryDesc(PlannedStmt *plannedstmt, + PartitionPruneResult *part_prune_result, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, @@ -77,6 +78,8 @@ CreateQueryDesc(PlannedStmt *plannedstmt, qd->operation = plannedstmt->commandType; /* operation */ qd->plannedstmt = plannedstmt; /* plan */ + qd->part_prune_result = part_prune_result; /* ExecutorDoInitialPruning() + * output for plan */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ /* RI check snapshot */ @@ -122,6 +125,7 @@ FreeQueryDesc(QueryDesc *qdesc) * PORTAL_ONE_RETURNING, or PORTAL_ONE_MOD_WITH portal * * plan: the plan tree for the query + * part_prune_result: ExecutorDoInitialPruning() output for the plan tree * sourceText: the source text of the query * params: any parameters needed * dest: where to send results @@ -134,6 +138,7 @@ FreeQueryDesc(QueryDesc *qdesc) */ static void ProcessQuery(PlannedStmt *plan, + PartitionPruneResult *part_prune_result, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, @@ -145,7 +150,7 @@ ProcessQuery(PlannedStmt *plan, /* * Create the QueryDesc object */ - queryDesc = CreateQueryDesc(plan, sourceText, + queryDesc = CreateQueryDesc(plan, part_prune_result, sourceText, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0); @@ -491,8 +496,13 @@ PortalStart(Portal portal, ParamListInfo params, /* * Create QueryDesc in portal's context; for the moment, set * the destination to DestNone. + * + * There is no PartitionPruneResult unless the PlannedStmt is + * from a CachedPlan. */ queryDesc = CreateQueryDesc(linitial_node(PlannedStmt, portal->stmts), + portal->part_prune_results == NIL ? NULL : + linitial(portal->part_prune_results), portal->sourceText, GetActiveSnapshot(), InvalidSnapshot, @@ -1194,6 +1204,7 @@ PortalRunMulti(Portal portal, { bool active_snapshot_set = false; ListCell *stmtlist_item; + int i; /* * If the destination is DestRemoteExecute, change to DestNone. The @@ -1214,9 +1225,15 @@ PortalRunMulti(Portal portal, * Loop to handle the individual queries generated from a single parsetree * by analysis and rewrite. */ + i = 0; foreach(stmtlist_item, portal->stmts) { PlannedStmt *pstmt = lfirst_node(PlannedStmt, stmtlist_item); + PartitionPruneResult *part_prune_result = portal->part_prune_results ? + list_nth(portal->part_prune_results, i) : + NULL; + + i++; /* * If we got a cancel signal in prior command, quit @@ -1274,7 +1291,7 @@ PortalRunMulti(Portal portal, if (pstmt->canSetTag) { /* statement can set tag string */ - ProcessQuery(pstmt, + ProcessQuery(pstmt, part_prune_result, portal->sourceText, portal->portalParams, portal->queryEnv, @@ -1283,7 +1300,7 @@ PortalRunMulti(Portal portal, else { /* stmt added by rewrite cannot set tag */ - ProcessQuery(pstmt, + ProcessQuery(pstmt, part_prune_result, portal->sourceText, portal->portalParams, portal->queryEnv, diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 4cf6db504f..6cb473f2f4 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -99,14 +99,19 @@ static dlist_head cached_expression_list = DLIST_STATIC_INIT(cached_expression_l static void ReleaseGenericPlan(CachedPlanSource *plansource); static List *RevalidateCachedQuery(CachedPlanSource *plansource, QueryEnvironment *queryEnv); -static bool CheckCachedPlan(CachedPlanSource *plansource); +static bool CheckCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, + List **part_prune_result_list); static CachedPlan *BuildCachedPlan(CachedPlanSource *plansource, List *qlist, - ParamListInfo boundParams, QueryEnvironment *queryEnv); + ParamListInfo boundParams, QueryEnvironment *queryEnv, + List **part_prune_result_list); static bool choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams); static double cached_plan_cost(CachedPlan *plan, bool include_planner); static Query *QueryListGetPrimaryStmt(List *stmts); -static void AcquireExecutorLocks(List *stmt_list, bool acquire); +static void AcquireExecutorLocks(List *stmt_list, ParamListInfo boundParams, + List **part_prune_result_list, + List **lockedRelids_per_stmt); +static void ReleaseExecutorLocks(List *stmt_list, List *lockedRelids_per_stmt); static void AcquirePlannerLocks(List *stmt_list, bool acquire); static void ScanQueryForLocks(Query *parsetree, bool acquire); static bool ScanQueryWalker(Node *node, bool *acquire); @@ -790,15 +795,20 @@ RevalidateCachedQuery(CachedPlanSource *plansource, * * On a "true" return, we have acquired the locks needed to run the plan. * (We must do this for the "true" result to be race-condition-free.) + * + * See GetCachedPlan()'s comment for a description of part_prune_result_list. */ static bool -CheckCachedPlan(CachedPlanSource *plansource) +CheckCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, + List **part_prune_result_list) { CachedPlan *plan = plansource->gplan; /* Assert that caller checked the querytree */ Assert(plansource->is_valid); + *part_prune_result_list = NIL; + /* If there's no generic plan, just say "false" */ if (!plan) return false; @@ -820,13 +830,21 @@ CheckCachedPlan(CachedPlanSource *plansource) */ if (plan->is_valid) { + List *lockedRelids_per_stmt; + /* * Plan must have positive refcount because it is referenced by * plansource; so no need to fear it disappears under us here. */ Assert(plan->refcount > 0); - AcquireExecutorLocks(plan->stmt_list, true); + /* + * Lock relations scanned by the plan. This is where the pruning + * happens if needed. + */ + AcquireExecutorLocks(plan->stmt_list, boundParams, + part_prune_result_list, + &lockedRelids_per_stmt); /* * If plan was transient, check to see if TransactionXmin has @@ -848,7 +866,14 @@ CheckCachedPlan(CachedPlanSource *plansource) } /* Oops, the race case happened. Release useless locks. */ - AcquireExecutorLocks(plan->stmt_list, false); + ReleaseExecutorLocks(plan->stmt_list, lockedRelids_per_stmt); + + /* + * The output list and any objects therein have been allocated in the + * caller's hopefully short-lived context, so will not remain leaked + * for long, though reset to avoid its accidentally being looked at. + */ + *part_prune_result_list = NIL; } /* @@ -874,10 +899,15 @@ CheckCachedPlan(CachedPlanSource *plansource) * Planning work is done in the caller's memory context. The finished plan * is in a child memory context, which typically should get reparented * (unless this is a one-shot plan, in which case we don't copy the plan). + * + * A list of NULLs is returned in *part_prune_result_list, meaning that no + * PartitionPruneResult nodes have yet been created for the plans in + * stmt_list. */ static CachedPlan * BuildCachedPlan(CachedPlanSource *plansource, List *qlist, - ParamListInfo boundParams, QueryEnvironment *queryEnv) + ParamListInfo boundParams, QueryEnvironment *queryEnv, + List **part_prune_result_list) { CachedPlan *plan; List *plist; @@ -1007,6 +1037,17 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, MemoryContextSwitchTo(oldcxt); + /* + * No actual PartitionPruneResults yet to add, though must initialize + * the list to have the same number of elements as the list of + * PlannedStmts. + */ + *part_prune_result_list = NIL; + foreach(lc, plist) + { + *part_prune_result_list = lappend(*part_prune_result_list, NULL); + } + return plan; } @@ -1126,6 +1167,17 @@ cached_plan_cost(CachedPlan *plan, bool include_planner) * plan or a custom plan for the given parameters: the caller does not know * which it will get. * + * For every PlannedStmt found in the returned CachedPlan, an element that + * is either a PartitionPruneResult or a NULL is added to + * *part_prune_result_list if needed. The former if the PlannedStmt is from + * the existing CachedPlan that is otherwise valid and contains at least one + * PartitionPruneInfo that has "initial" pruning steps. Those steps are + * performed by calling ExecutorDoInitialPruning() to determine only those + * leaf partitions that need to be locked by AcquireExecutorLocks() by pruning + * away subplans that don't match the pruning conditions. The + * PartitionPruneResult contains a list of bitmapsets of the indexes of + * matching subplans, one for each PartitionPruneInfo. + * * On return, the plan is valid and we have sufficient locks to begin * execution. * @@ -1139,11 +1191,13 @@ cached_plan_cost(CachedPlan *plan, bool include_planner) */ CachedPlan * GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, - ResourceOwner owner, QueryEnvironment *queryEnv) + ResourceOwner owner, QueryEnvironment *queryEnv, + List **part_prune_result_list) { CachedPlan *plan = NULL; List *qlist; bool customplan; + List *my_part_prune_result_list; /* Assert caller is doing things in a sane order */ Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); @@ -1160,7 +1214,8 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, if (!customplan) { - if (CheckCachedPlan(plansource)) + if (CheckCachedPlan(plansource, boundParams, + &my_part_prune_result_list)) { /* We want a generic plan, and we already have a valid one */ plan = plansource->gplan; @@ -1169,7 +1224,8 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, else { /* Build a new generic plan */ - plan = BuildCachedPlan(plansource, qlist, NULL, queryEnv); + plan = BuildCachedPlan(plansource, qlist, NULL, queryEnv, + &my_part_prune_result_list); /* Just make real sure plansource->gplan is clear */ ReleaseGenericPlan(plansource); /* Link the new generic plan into the plansource */ @@ -1214,7 +1270,8 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, if (customplan) { /* Build a custom plan */ - plan = BuildCachedPlan(plansource, qlist, boundParams, queryEnv); + plan = BuildCachedPlan(plansource, qlist, boundParams, queryEnv, + &my_part_prune_result_list); /* Accumulate total costs of custom plans */ plansource->total_custom_cost += cached_plan_cost(plan, true); @@ -1246,6 +1303,9 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, plan->is_saved = true; } + if (part_prune_result_list) + *part_prune_result_list = my_part_prune_result_list; + return plan; } @@ -1737,17 +1797,29 @@ QueryListGetPrimaryStmt(List *stmts) /* * AcquireExecutorLocks: acquire locks needed for execution of a cached plan; - * or release them if acquire is false. + * + * See GetCachedPlan()'s comment for a description of part_prune_result_list. + * + * On return, *lockedRelids_per_stmt will contain a bitmapset for every + * PlannedStmt in stmt_list, containing the RT indexes of relation entries + * in its range table that were actually locked, or NULL if the PlannedStmt + * contains a utility statement. */ static void -AcquireExecutorLocks(List *stmt_list, bool acquire) +AcquireExecutorLocks(List *stmt_list, ParamListInfo boundParams, + List **part_prune_result_list, + List **lockedRelids_per_stmt) { ListCell *lc1; + *part_prune_result_list = *lockedRelids_per_stmt = NIL; foreach(lc1, stmt_list) { PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); - ListCell *lc2; + PartitionPruneResult *part_prune_result = NULL; + Bitmapset *allLockRelids; + Bitmapset *lockedRelids = NULL; + int rti; if (plannedstmt->commandType == CMD_UTILITY) { @@ -1761,13 +1833,35 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) Query *query = UtilityContainsQuery(plannedstmt->utilityStmt); if (query) - ScanQueryForLocks(query, acquire); + ScanQueryForLocks(query, true); + *part_prune_result_list = lappend(*part_prune_result_list, NULL); continue; } - foreach(lc2, plannedstmt->rtable) + /* + * Figure out the set of relations that would need to be locked + * before executing the plan. + */ + if (plannedstmt->containsInitialPruning) { - RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2); + /* + * Obtain the set of partitions to be locked from the + * PartitionPruneInfos by considering the result of performing + * initial partition pruning. + */ + PartitionPruneResult *part_prune_result = + ExecutorDoInitialPruning(plannedstmt, boundParams); + + allLockRelids = bms_union(plannedstmt->minLockRelids, + part_prune_result->scan_leafpart_rtis); + } + else + allLockRelids = plannedstmt->minLockRelids; + + rti = -1; + while ((rti = bms_next_member(allLockRelids, rti)) > 0) + { + RangeTblEntry *rte = rt_fetch(rti, plannedstmt->rtable); if (rte->rtekind != RTE_RELATION) continue; @@ -1778,10 +1872,58 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) * fail if it's been dropped entirely --- we'll just transiently * acquire a non-conflicting lock. */ - if (acquire) - LockRelationOid(rte->relid, rte->rellockmode); - else - UnlockRelationOid(rte->relid, rte->rellockmode); + LockRelationOid(rte->relid, rte->rellockmode); + lockedRelids = bms_add_member(lockedRelids, rti); + } + + *part_prune_result_list = lappend(*part_prune_result_list, + part_prune_result); + *lockedRelids_per_stmt = lappend(*lockedRelids_per_stmt, lockedRelids); + } +} + +/* + * ReleaseExecutorLocks + * Release locks that would've been acquired by an earlier call to + * AcquireExecutorLocks() + */ +static void +ReleaseExecutorLocks(List *stmt_list, List *lockedRelids_per_stmt) +{ + ListCell *lc1, + *lc2; + + forboth(lc1, stmt_list, lc2, lockedRelids_per_stmt) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); + Bitmapset *lockedRelids = lfirst(lc2); + int rti; + + if (plannedstmt->commandType == CMD_UTILITY) + { + /* + * Ignore utility statements, except those (such as EXPLAIN) that + * contain a parsed-but-not-planned query. Note: it's okay to use + * ScanQueryForLocks, even though the query hasn't been through + * rule rewriting, because rewriting doesn't change the query + * representation. + */ + Query *query = UtilityContainsQuery(plannedstmt->utilityStmt); + + if (query) + ScanQueryForLocks(query, false); + continue; + } + + rti = -1; + while ((rti = bms_next_member(lockedRelids, rti)) >= 0) + { + RangeTblEntry *rte = rt_fetch(rti, plannedstmt->rtable); + + Assert(rte->rtekind == RTE_RELATION); + + /* See the comment in AcquireExecutorLocks(). */ + UnlockRelationOid(rte->relid, rte->rellockmode); } } } diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c index d549f66d4a..1bbe6b704b 100644 --- a/src/backend/utils/mmgr/portalmem.c +++ b/src/backend/utils/mmgr/portalmem.c @@ -303,6 +303,25 @@ PortalDefineQuery(Portal portal, portal->status = PORTAL_DEFINED; } +/* + * PortalStorePartitionPruneResults + * Copy the given list of PartitionPruneResults into the portal's + * context + * + * This allows the caller to ensure that the list exists as long as the portal + * does. + */ +void +PortalStorePartitionPruneResults(Portal portal, List *part_prune_results) +{ + MemoryContext oldcxt; + + AssertArg(PortalIsValid(portal)); + oldcxt = MemoryContextSwitchTo(portal->portalContext); + portal->part_prune_results = copyObject(part_prune_results); + MemoryContextSwitchTo(oldcxt); +} + /* * PortalReleaseCachedPlan * Release a portal's reference to its cached plan, if any. diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h index 666977fb1f..34975c69ee 100644 --- a/src/include/commands/explain.h +++ b/src/include/commands/explain.h @@ -87,7 +87,8 @@ extern void ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv); -extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, +extern void ExplainOnePlan(PlannedStmt *plannedstmt, PartitionPruneResult *part_prune_resul, + IntoClause *into, ExplainState *es, const char *queryString, ParamListInfo params, QueryEnvironment *queryEnv, const instr_time *planduration, diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 708435e952..bd8776402e 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -45,6 +45,7 @@ extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, * nparts Length of subplan_map[] and subpart_map[]. * subplan_map Subplan index by partition index, or -1. * subpart_map Subpart index by partition index, or -1. + * rti_map Range table index by partition index, or 0. * present_parts A Bitmapset of the partition indexes that we * have subplans or subparts for. * initial_pruning_steps List of PartitionPruneSteps used to @@ -61,6 +62,7 @@ typedef struct PartitionedRelPruningData int nparts; int *subplan_map; int *subpart_map; + Index *rti_map; Bitmapset *present_parts; List *initial_pruning_steps; List *exec_pruning_steps; @@ -123,9 +125,13 @@ typedef struct PartitionPruneState extern PartitionPruneState *ExecInitPartitionPruning(PlanState *planstate, int n_total_subplans, - PartitionPruneInfo *pruneinfo, + int part_prune_index, Bitmapset **initially_valid_subplans); extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune); - + bool initial_prune, + Bitmapset **scan_leafpart_rtis); +extern Bitmapset *ExecPartitionDoInitialPruning(PlannedStmt *plannedstmt, + ParamListInfo params, + PartitionPruneInfo *pruneinfo, + Bitmapset **scan_leafpart_rtis); #endif /* EXECPARTITION_H */ diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index e79e2c001f..60d5644908 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -35,6 +35,8 @@ typedef struct QueryDesc /* These fields are provided by CreateQueryDesc */ CmdType operation; /* CMD_SELECT, CMD_UPDATE, etc. */ PlannedStmt *plannedstmt; /* planner's output (could be utility, too) */ + PartitionPruneResult *part_prune_result; /* ExecutorDoInitialPruning()'s + * output for plannedstmt */ const char *sourceText; /* source text of the query */ Snapshot snapshot; /* snapshot to use for query */ Snapshot crosscheck_snapshot; /* crosscheck for RI update/delete */ @@ -57,6 +59,7 @@ typedef struct QueryDesc /* in pquery.c */ extern QueryDesc *CreateQueryDesc(PlannedStmt *plannedstmt, + PartitionPruneResult *part_prune_result, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 873772f188..57dc0e8077 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -185,6 +185,8 @@ ExecGetJunkAttribute(TupleTableSlot *slot, AttrNumber attno, bool *isNull) /* * prototypes from functions in execMain.c */ +extern PartitionPruneResult *ExecutorDoInitialPruning(PlannedStmt *plannedstmt, + ParamListInfo params); extern void ExecutorStart(QueryDesc *queryDesc, int eflags); extern void standard_ExecutorStart(QueryDesc *queryDesc, int eflags); extern void ExecutorRun(QueryDesc *queryDesc, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index cbbcff81d2..3de4df1b05 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -596,6 +596,8 @@ typedef struct EState struct ExecRowMark **es_rowmarks; /* Array of per-range-table-entry * ExecRowMarks, or NULL if none */ PlannedStmt *es_plannedstmt; /* link to top of plan tree */ + List *es_part_prune_infos; /* PlannedStmt.partPruneInfos */ + struct PartitionPruneResult *es_part_prune_result; /* QueryDesc.part_prune_result */ const char *es_sourceText; /* Source text from QueryDesc */ JunkFilter *es_junkFilter; /* top-level junk filter, if any */ @@ -984,6 +986,34 @@ typedef struct DomainConstraintState */ typedef TupleTableSlot *(*ExecProcNodeMtd) (struct PlanState *pstate); +/*---------------- + * PartitionPruneResult + * + * The result of performing ExecutorDoInitialPruning() invocation on a given + * PlannedStmt. + * + * Contains a list of Bitmapset of the indexes of the subplans remaining after + * performing initial pruning by calling ExecFindMatchingSubPlans() for every + * PartitionPruneInfos found in PlannedStmt.partPruneInfos. RT indexes of the + * leaf partitions scanned by those subplans across all PartitionPruneInfos + * are added into scan_leafpart_rtis. + * + * This is used by GetCachedPlan() to inform its callers of the pruning + * decisions made when performing AcquireExecutorLocks() on a given cached + * PlannedStmt, which the callers then pass that on to the executor. The + * executor refers to this node when made available when initializing the plan + * nodes to which those PartitionPruneInfos apply so that the same set of + * qualifying subplans are initialized, rather than deriving that set again by + * redoing initial pruning. + */ +typedef struct PartitionPruneResult +{ + NodeTag type; + + List *valid_subplan_offs_list; + Bitmapset *scan_leafpart_rtis; +} PartitionPruneResult; + /* ---------------- * PlanState node * diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 300824258e..de312b9215 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -97,6 +97,9 @@ typedef enum NodeTag T_PartitionPruneStepCombine, T_PlanInvalItem, + /* TAGS FOR EXECUTOR PREP NODES (execnodes.h) */ + T_PartitionPruneResult, + /* * TAGS FOR PLAN STATE NODES (execnodes.h) * @@ -673,6 +676,7 @@ extern struct Bitmapset *readBitmapset(void); extern uintptr_t readDatum(bool typbyval); extern bool *readBoolCols(int numCols); extern int *readIntCols(int numCols); +extern Index *readIndexCols(int numCols); extern Oid *readOidCols(int numCols); extern int16 *readAttrNumberCols(int numCols); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 6cbcb67bdf..d9c482e08b 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -107,6 +107,18 @@ typedef struct PlannerGlobal List *appendRelations; /* "flat" list of AppendRelInfos */ + List *partPruneInfos; /* List of PartitionPruneInfo contained in + * the plan */ + + bool containsInitialPruning; /* Do any of those PartitionPruneInfos + * have initial (pre-exec) pruning + * steps in them? */ + + Bitmapset *minLockRelids; /* Indexes of all range table entries minus + * indexes of range table entries of the leaf + * partitions scanned by prunable subplans; + * see AcquireExecutorLocks() */ + List *relationOids; /* OIDs of relations the plan depends on */ List *invalItems; /* other dependencies, as PlanInvalItems */ @@ -377,6 +389,9 @@ struct PlannerInfo /* Does this query modify any partition key columns? */ bool partColsUpdated; + + /* PartitionPruneInfos added in this query's plan. */ + List *partPruneInfos; }; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 10dd35f011..44997d595d 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -64,8 +64,20 @@ typedef struct PlannedStmt struct Plan *planTree; /* tree of Plan nodes */ + List *partPruneInfos; /* List of PartitionPruneInfo contained in + * the plan */ + + bool containsInitialPruning; /* Do any of those PartitionPruneInfos + * have initial (pre-exec) pruning + * steps in them? */ + List *rtable; /* list of RangeTblEntry nodes */ + Bitmapset *minLockRelids; /* Indexes of all range table entries minus + * indexes of range table entries of the leaf + * partitions scanned by prunable subplans; + * see AcquireExecutorLocks() */ + /* rtable indexes of target relations for INSERT/UPDATE/DELETE */ List *resultRelations; /* integer list of RT indexes, or NIL */ @@ -262,8 +274,12 @@ typedef struct Append */ int first_partial_plan; - /* Info for run-time subplan pruning; NULL if we're not doing that */ - struct PartitionPruneInfo *part_prune_info; + /* + * Index of this plan's PartitionPruneInfo in PlannedStmt.partPruneInfos + * to be used for run-time subplan pruning; -1 if run-time pruning is + * not needed. + */ + int part_prune_index; } Append; /* ---------------- @@ -282,8 +298,13 @@ typedef struct MergeAppend Oid *sortOperators; /* OIDs of operators to sort them by */ Oid *collations; /* OIDs of collations */ bool *nullsFirst; /* NULLS FIRST/LAST directions */ - /* Info for run-time subplan pruning; NULL if we're not doing that */ - struct PartitionPruneInfo *part_prune_info; + + /* + * Index of this plan's PartitionPruneInfo in PlannedStmt.partPruneInfos + * to be used for run-time subplan pruning; -1 if run-time pruning is + * not needed. + */ + int part_prune_index; } MergeAppend; /* ---------------- @@ -1187,6 +1208,13 @@ typedef struct PlanRowMark * prune_infos List of Lists containing PartitionedRelPruneInfo nodes, * one sublist per run-time-prunable partition hierarchy * appearing in the parent plan node's subplans. + * + * needs_init_pruning Does any of the PartitionedRelPruneInfos in + * prune_infos have its initial_pruning_steps set? + * + * needs_exec_pruning Does any of the PartitionedRelPruneInfos in + * prune_infos have its exec_pruning_steps set? + * * other_subplans Indexes of any subplans that are not accounted for * by any of the PartitionedRelPruneInfo nodes in * "prune_infos". These subplans must not be pruned. @@ -1195,6 +1223,8 @@ typedef struct PartitionPruneInfo { NodeTag type; List *prune_infos; + bool needs_init_pruning; + bool needs_exec_pruning; Bitmapset *other_subplans; } PartitionPruneInfo; @@ -1225,6 +1255,7 @@ typedef struct PartitionedRelPruneInfo int *subplan_map; /* subplan index by partition index, or -1 */ int *subpart_map; /* subpart index by partition index, or -1 */ Oid *relid_map; /* relation OID by partition index, or 0 */ + Index *rti_map; /* Range table index by partition index, 0. */ /* * initial_pruning_steps shows how to prune during executor startup (i.e., diff --git a/src/include/partitioning/partprune.h b/src/include/partitioning/partprune.h index 90684efa25..ebf0dcff8c 100644 --- a/src/include/partitioning/partprune.h +++ b/src/include/partitioning/partprune.h @@ -70,10 +70,10 @@ typedef struct PartitionPruneContext #define PruneCxtStateIdx(partnatts, step_id, keyno) \ ((partnatts) * (step_id) + (keyno)) -extern PartitionPruneInfo *make_partition_pruneinfo(struct PlannerInfo *root, - struct RelOptInfo *parentrel, - List *subpaths, - List *prunequal); +extern int make_partition_pruneinfo(struct PlannerInfo *root, + struct RelOptInfo *parentrel, + List *subpaths, + List *prunequal); extern Bitmapset *prune_append_rel_partitions(struct RelOptInfo *rel); extern Bitmapset *get_matching_partitions(PartitionPruneContext *context, List *pruning_steps); diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index 95b99e3d25..449200b949 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -220,7 +220,8 @@ extern List *CachedPlanGetTargetList(CachedPlanSource *plansource, extern CachedPlan *GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, ResourceOwner owner, - QueryEnvironment *queryEnv); + QueryEnvironment *queryEnv, + List **part_prune_result_list); extern void ReleaseCachedPlan(CachedPlan *plan, ResourceOwner owner); extern bool CachedPlanAllowsSimpleValidityCheck(CachedPlanSource *plansource, diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h index aeddbdafe5..9f7727a837 100644 --- a/src/include/utils/portal.h +++ b/src/include/utils/portal.h @@ -138,6 +138,7 @@ typedef struct PortalData QueryCompletion qc; /* command completion data for executed query */ List *stmts; /* list of PlannedStmts */ CachedPlan *cplan; /* CachedPlan, if stmts are from one */ + List *part_prune_results; /* list of PartitionPruneResults */ ParamListInfo portalParams; /* params to pass to query */ QueryEnvironment *queryEnv; /* environment for query */ @@ -242,6 +243,8 @@ extern void PortalDefineQuery(Portal portal, CommandTag commandTag, List *stmts, CachedPlan *cplan); +extern void PortalStorePartitionPruneResults(Portal portal, + List *part_prune_result_list); extern PlannedStmt *PortalGetPrimaryStmt(Portal portal); extern void PortalCreateHoldStore(Portal portal); extern void PortalHashTableDeleteAll(void); -- 2.24.1