From 0522447f5816211ac3e32ebc6920d7f7805718d6 Mon Sep 17 00:00:00 2001 From: amitlan Date: Thu, 26 Jan 2023 10:52:24 +0900 Subject: [PATCH v31 3/3] Move CachedPlan validation locking to ExecutorStart() --- src/backend/executor/execMain.c | 163 +++++++++++++++++++++++-- src/backend/executor/execParallel.c | 38 +++++- src/backend/executor/execPartition.c | 90 +++++++++++--- src/backend/executor/execUtils.c | 8 +- src/backend/executor/nodeAppend.c | 11 +- src/backend/executor/nodeMergeAppend.c | 5 +- src/backend/optimizer/plan/setrefs.c | 36 ++++++ src/backend/utils/cache/plancache.c | 146 +++++++--------------- src/include/executor/execPartition.h | 8 +- src/include/executor/execdesc.h | 6 + src/include/executor/executor.h | 2 + src/include/nodes/execnodes.h | 1 + src/include/nodes/pathnodes.h | 4 +- src/include/nodes/plannodes.h | 31 ++++- src/include/utils/plancache.h | 1 + 15 files changed, 404 insertions(+), 146 deletions(-) diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 45c999bcdb..68743d5f66 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -49,6 +49,7 @@ #include "commands/matview.h" #include "commands/trigger.h" #include "executor/execdebug.h" +#include "executor/execPartition.h" #include "executor/nodeSubplan.h" #include "foreign/fdwapi.h" #include "jit/jit.h" @@ -64,6 +65,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/partcache.h" +#include "utils/plancache.h" #include "utils/rls.h" #include "utils/ruleutils.h" #include "utils/snapmgr.h" @@ -79,7 +81,12 @@ ExecutorEnd_hook_type ExecutorEnd_hook = NULL; ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL; /* decls for local routines only used within this module */ -static void InitPlan(QueryDesc *queryDesc, int eflags); +static void InitPlan(QueryDesc *queryDesc, int eflags, bool *replan); +static void ExecLockRelationsIfNeeded(QueryDesc *queryDesc, bool *replan); +static Bitmapset *ExecDoInitialPartitionPruning(PlannedStmt *stmt, + EState *estate); +static void AcquireExecutorLocks(Bitmapset *lockRelids, EState *estate, + bool acquire); static void CheckValidRowMarkRel(Relation rel, RowMarkType markType); static void ExecPostprocessPlan(EState *estate); static void ExecEndPlan(PlanState *planstate, EState *estate); @@ -270,7 +277,7 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags, bool *replan) /* * Initialize the plan state tree */ - InitPlan(queryDesc, eflags); + InitPlan(queryDesc, eflags, replan); MemoryContextSwitchTo(oldcontext); } @@ -801,7 +808,7 @@ ExecCheckXactReadOnly(PlannedStmt *plannedstmt) * ---------------------------------------------------------------- */ static void -InitPlan(QueryDesc *queryDesc, int eflags) +InitPlan(QueryDesc *queryDesc, int eflags, bool *replan) { CmdType operation = queryDesc->operation; PlannedStmt *plannedstmt = queryDesc->plannedstmt; @@ -814,19 +821,26 @@ InitPlan(QueryDesc *queryDesc, int eflags) int i; /* - * Do permissions checks and save the list for later use. - */ - ExecCheckPermissions(rangeTable, plannedstmt->permInfos, true); - estate->es_rteperminfos = plannedstmt->permInfos; - - /* - * initialize the node's execution state + * Initialize es_range_table and es_relations. */ ExecInitRangeTable(estate, rangeTable); estate->es_plannedstmt = plannedstmt; estate->es_part_prune_infos = plannedstmt->partPruneInfos; + /* + * Acquire locks on relations referenced in the plan if it comes + * from a CachedPlan after performing "initial" partition pruning. + * Results of pruning, if any, are saved in es_part_prune_results. + */ + ExecLockRelationsIfNeeded(queryDesc, replan); + + /* + * Do permissions checks and save the list for later use. + */ + ExecCheckPermissions(rangeTable, plannedstmt->permInfos, true); + estate->es_rteperminfos = plannedstmt->permInfos; + /* * Next, build the ExecRowMark array from the PlanRowMark(s), if any. */ @@ -982,6 +996,133 @@ InitPlan(QueryDesc *queryDesc, int eflags) queryDesc->planstate = planstate; } +/* + * ExecLockRelationsIfNeeded + * Lock relations that a query's plan depends on if the plan comes + * from a CachedPlan + * + * On return, we have all acquired the locks needed to run the plan. + * Also *replan is set to true if acquiring those locks would have invalidated + * the CachedPlan. + */ +static void +ExecLockRelationsIfNeeded(QueryDesc *queryDesc, bool *replan) +{ + PlannedStmt *plannedstmt = queryDesc->plannedstmt; + EState *estate = queryDesc->estate; + CachedPlan *cplan = queryDesc->cplan; + Bitmapset *allLockRelids; + + /* Nothing to do if the plan tree is not cached. */ + if (cplan == NULL || cplan->is_oneshot) + return; + + Assert(plannedstmt); + Assert(replan); + *replan = false; + + /* + * Temporarily signal to ExecGetRangeTableRelation() that it must take + * take a lock. This is needed for CreatePartitionPruneState() to be + * able to open parent partitioned tables using + * ExecGetRangeTableRelation(). + */ + estate->es_top_eflags |= EXEC_FLAG_GET_LOCKS; + + allLockRelids = plannedstmt->minLockRelids; + if (plannedstmt->containsInitialPruning) + { + Bitmapset *partRelids = ExecDoInitialPartitionPruning(plannedstmt, + estate); + + allLockRelids = bms_add_members(allLockRelids, partRelids); + } + + /* Done with it. */ + estate->es_top_eflags &= ~EXEC_FLAG_GET_LOCKS; + + /* Acquire locks. */ + AcquireExecutorLocks(allLockRelids, estate, true); + + /* Check if acquiring those locks has invalidated the plan. */ + *replan = !CachedPlanStillValid(cplan); + + /* Release useless locks if needed. */ + if (*replan) + AcquireExecutorLocks(allLockRelids, estate, false); +} + +/* + * ExecDoInitialPartitionPruning + * Perform initial partition pruning if needed by the plan + * + * The return value is the set of RT indexes of surviving partitions. + * A list of PartitionPruneResult with an element for each in + * plannedstmt->partPruneInfos is saved in estate->es_part_prune_results. + */ +static Bitmapset * +ExecDoInitialPartitionPruning(PlannedStmt *plannedstmt, EState *estate) +{ + ListCell *lc; + Bitmapset *lockPartRelids = NULL; + + Assert(plannedstmt->containsInitialPruning); + Assert(plannedstmt->partPruneInfos); + + foreach(lc, plannedstmt->partPruneInfos) + { + PartitionPruneInfo *pruneinfo = lfirst_node(PartitionPruneInfo, lc); + PartitionPruneState *prunestate; + PartitionPruneResult *pruneresult; + Bitmapset *validsubplans; + + /* No PlanState here; unnecessary for "initial" pruning. */ + prunestate = ExecCreatePartitionPruneState(NULL, estate, pruneinfo, + true, false); + validsubplans = ExecFindMatchingSubPlans(prunestate, true, + &lockPartRelids); + + pruneresult = makeNode(PartitionPruneResult); + pruneresult->root_parent_relids = bms_copy(pruneinfo->root_parent_relids); + pruneresult->validsubplans = bms_copy(validsubplans); + estate->es_part_prune_results = lappend(estate->es_part_prune_results, + pruneresult); + } + + return lockPartRelids; +} + +/* + * AcquireExecutorLocks: acquire locks needed for execution of a cached plan; + * or release them if acquire is false. + */ +static void +AcquireExecutorLocks(Bitmapset *lockRelids, EState *estate, bool acquire) +{ + int rti; + + rti = -1; + while ((rti = bms_next_member(lockRelids, rti)) > 0) + { + RangeTblEntry *rte = exec_rt_fetch(rti, estate); + + if (!(rte->rtekind == RTE_RELATION || + (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid)))) + continue; + + /* + * Acquire the appropriate type of lock on each relation OID. Note + * that we don't actually try to open the rel, and hence will not + * fail if it's been dropped entirely --- we'll just transiently + * acquire a non-conflicting lock. + */ + if (acquire) + LockRelationOid(rte->relid, rte->rellockmode); + else + UnlockRelationOid(rte->relid, rte->rellockmode); + } +} + /* * Check that a proposed result relation is a legal target for the operation * @@ -1396,7 +1537,7 @@ ExecGetAncestorResultRels(EState *estate, ResultRelInfo *resultRelInfo) /* * All ancestors up to the root target relation must have been - * locked by the planner or AcquireExecutorLocks(). + * locked by the planner. */ ancRel = table_open(ancOid, NoLock); rInfo = makeNode(ResultRelInfo); diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 1f5d6d4d64..5c967451ce 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -66,6 +66,7 @@ #define PARALLEL_KEY_QUERY_TEXT UINT64CONST(0xE000000000000008) #define PARALLEL_KEY_JIT_INSTRUMENTATION UINT64CONST(0xE000000000000009) #define PARALLEL_KEY_WAL_USAGE UINT64CONST(0xE00000000000000A) +#define PARALLEL_KEY_PARTITION_PRUNE_RESULTS UINT64CONST(0xE00000000000000B) #define PARALLEL_TUPLE_QUEUE_SIZE 65536 @@ -599,12 +600,15 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, FixedParallelExecutorState *fpes; char *pstmt_data; char *pstmt_space; + char *part_prune_results_data; + char *part_prune_results_space; char *paramlistinfo_space; BufferUsage *bufusage_space; WalUsage *walusage_space; SharedExecutorInstrumentation *instrumentation = NULL; SharedJitInstrumentation *jit_instrumentation = NULL; int pstmt_len; + int part_prune_results_len; int paramlistinfo_len; int instrumentation_len = 0; int jit_instrumentation_len = 0; @@ -633,6 +637,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, /* Fix up and serialize plan to be sent to workers. */ pstmt_data = ExecSerializePlan(planstate->plan, estate); + part_prune_results_data = nodeToString(estate->es_part_prune_results); /* Create a parallel context. */ pcxt = CreateParallelContext("postgres", "ParallelQueryMain", nworkers); @@ -659,6 +664,11 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, shm_toc_estimate_chunk(&pcxt->estimator, pstmt_len); shm_toc_estimate_keys(&pcxt->estimator, 1); + /* Estimate space for serialized List of PartitionPruneResult. */ + part_prune_results_len = strlen(part_prune_results_data) + 1; + shm_toc_estimate_chunk(&pcxt->estimator, part_prune_results_len); + shm_toc_estimate_keys(&pcxt->estimator, 1); + /* Estimate space for serialized ParamListInfo. */ paramlistinfo_len = EstimateParamListSpace(estate->es_param_list_info); shm_toc_estimate_chunk(&pcxt->estimator, paramlistinfo_len); @@ -753,6 +763,12 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate, memcpy(pstmt_space, pstmt_data, pstmt_len); shm_toc_insert(pcxt->toc, PARALLEL_KEY_PLANNEDSTMT, pstmt_space); + /* Store serialized List of PartitionPruneResult */ + part_prune_results_space = shm_toc_allocate(pcxt->toc, part_prune_results_len); + memcpy(part_prune_results_space, part_prune_results_data, part_prune_results_len); + shm_toc_insert(pcxt->toc, PARALLEL_KEY_PARTITION_PRUNE_RESULTS, + part_prune_results_space); + /* Store serialized ParamListInfo. */ paramlistinfo_space = shm_toc_allocate(pcxt->toc, paramlistinfo_len); shm_toc_insert(pcxt->toc, PARALLEL_KEY_PARAMLISTINFO, paramlistinfo_space); @@ -1234,8 +1250,11 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, int instrument_options) { char *pstmtspace; + char *part_prune_results_space; char *paramspace; PlannedStmt *pstmt; + QueryDesc *queryDesc; + List *part_prune_results; ParamListInfo paramLI; char *queryString; @@ -1246,6 +1265,11 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, pstmtspace = shm_toc_lookup(toc, PARALLEL_KEY_PLANNEDSTMT, false); pstmt = (PlannedStmt *) stringToNode(pstmtspace); + /* Reconstruct leader-supplied PartitionPruneResult. */ + part_prune_results_space = + shm_toc_lookup(toc, PARALLEL_KEY_PARTITION_PRUNE_RESULTS, false); + part_prune_results = (List *) stringToNode(part_prune_results_space); + /* Reconstruct ParamListInfo. */ paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMLISTINFO, false); paramLI = RestoreParamList(¶mspace); @@ -1255,11 +1279,15 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver, * here even if the containing plan tree may have come from one in the * leader. */ - return CreateQueryDesc(pstmt, - NULL, - queryString, - GetActiveSnapshot(), InvalidSnapshot, - receiver, paramLI, NULL, instrument_options); + queryDesc = CreateQueryDesc(pstmt, + NULL, + queryString, + GetActiveSnapshot(), InvalidSnapshot, + receiver, paramLI, NULL, instrument_options); + + queryDesc->part_prune_results = part_prune_results; + + return queryDesc; } /* diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 4b91bb7403..09e0d7aa9c 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -196,7 +196,8 @@ static void PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate, static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans); + Bitmapset **validsubplans, + Bitmapset **scan_leafpart_rtis); /* @@ -1782,8 +1783,10 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) * * On return, *initially_valid_subplans is assigned the set of indexes of * child subplans that must be initialized along with the parent plan node. - * Initial pruning is performed here if needed and in that case only the - * surviving subplans' indexes are added. + * That set is computed by either performing the "initial pruning" here or + * reusing the one present in EState.es_part_prune_results[part_prune_index] + * if it has been set, which it would be if ExecDoInitialPartitionPruning() + * would have done the initial pruning. * * If subplans are indeed pruned, subplan_map arrays contained in the returned * PartitionPruneState are re-sequenced to not count those, though only if the @@ -1796,9 +1799,10 @@ ExecInitPartitionPruning(PlanState *planstate, Bitmapset *root_parent_relids, Bitmapset **initially_valid_subplans) { - PartitionPruneState *prunestate; + PartitionPruneState *prunestate = NULL; EState *estate = planstate->state; PartitionPruneInfo *pruneinfo; + PartitionPruneResult *pruneresult = NULL; /* Obtain the pruneinfo we need, and make sure it's the right one */ pruneinfo = list_nth(estate->es_part_prune_infos, part_prune_index); @@ -1814,22 +1818,56 @@ ExecInitPartitionPruning(PlanState *planstate, /* We may need an expression context to evaluate partition exprs */ ExecAssignExprContext(estate, planstate); - /* Create the working data structure for pruning */ - prunestate = ExecCreatePartitionPruneState(planstate, estate, pruneinfo, - pruneinfo->needs_init_pruning, - pruneinfo->needs_exec_pruning); + /* Initial pruning already done if es_part_prune_results has been set. */ + if (estate->es_part_prune_results) + { + pruneresult = list_nth_node(PartitionPruneResult, + estate->es_part_prune_results, + part_prune_index); + if (!bms_equal(root_parent_relids, pruneinfo->root_parent_relids)) + ereport(ERROR, + errcode(ERRCODE_INTERNAL_ERROR), + errmsg_internal("mismatching PartitionPruneInfo and PartitionPruneResult at part_prune_index %d", + part_prune_index), + errdetail_internal("prunresult relids %s, pruneinfo relids %s", + bmsToString(pruneresult->root_parent_relids), + bmsToString(pruneinfo->root_parent_relids))); + } + + if (pruneresult == NULL || pruneinfo->needs_exec_pruning) + { + /* We may need an expression context to evaluate partition exprs */ + ExecAssignExprContext(estate, planstate); + + /* + * Create the working data structure for pruning. No need to consider + * initial pruning steps if we have a PartitionPruneResult. + */ + prunestate = ExecCreatePartitionPruneState(planstate, estate, + pruneinfo, + pruneresult == NULL, + pruneinfo->needs_exec_pruning); + } /* * Perform an initial partition prune pass, if required. */ - if (prunestate->do_initial_prune) - *initially_valid_subplans = ExecFindMatchingSubPlans(prunestate, true); + if (pruneresult) + { + *initially_valid_subplans = bms_copy(pruneresult->validsubplans); + } + else if (prunestate && prunestate->do_initial_prune) + { + *initially_valid_subplans = ExecFindMatchingSubPlans(prunestate, true, + NULL); + } else { - /* No pruning, so we'll need to initialize all subplans */ + /* No initial pruning, so we'll need to initialize all subplans */ Assert(n_total_subplans > 0); *initially_valid_subplans = bms_add_range(NULL, 0, n_total_subplans - 1); + return prunestate; } /* @@ -1837,7 +1875,8 @@ ExecInitPartitionPruning(PlanState *planstate, * that were removed above due to initial pruning. No need to do this if * no steps were removed. */ - if (bms_num_members(*initially_valid_subplans) < n_total_subplans) + if (prunestate && + bms_num_members(*initially_valid_subplans) < n_total_subplans) { /* * We can safely skip this when !do_exec_prune, even though that @@ -2295,10 +2334,14 @@ PartitionPruneFixSubPlanMap(PartitionPruneState *prunestate, * Pass initial_prune if PARAM_EXEC Params cannot yet be evaluated. This * differentiates the initial executor-time pruning step from later * runtime pruning. + * + * RT indexes of leaf partitions scanned by the chosen subplans are added to + * *scan_leafpart_rtis if the pointer is non-NULL. */ Bitmapset * ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune) + bool initial_prune, + Bitmapset **scan_leafpart_rtis) { Bitmapset *result = NULL; MemoryContext oldcontext; @@ -2333,10 +2376,10 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, */ pprune = &prunedata->partrelprunedata[0]; find_matching_subplans_recurse(prunedata, pprune, initial_prune, - &result); + &result, scan_leafpart_rtis); /* Expression eval may have used space in ExprContext too */ - if (pprune->exec_pruning_steps) + if (pprune->exec_pruning_steps && !initial_prune) ResetExprContext(pprune->exec_context.exprcontext); } @@ -2347,6 +2390,8 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, /* Copy result out of the temp context before we reset it */ result = bms_copy(result); + if (scan_leafpart_rtis) + *scan_leafpart_rtis = bms_copy(*scan_leafpart_rtis); MemoryContextReset(prunestate->prune_context); @@ -2357,13 +2402,15 @@ ExecFindMatchingSubPlans(PartitionPruneState *prunestate, * find_matching_subplans_recurse * Recursive worker function for ExecFindMatchingSubPlans * - * Adds valid (non-prunable) subplan IDs to *validsubplans + * Adds valid (non-prunable) subplan IDs to *validsubplans and RT indexes of + * of the corresponding leaf partitions to *scan_leafpart_rtis (if asked for). */ static void find_matching_subplans_recurse(PartitionPruningData *prunedata, PartitionedRelPruningData *pprune, bool initial_prune, - Bitmapset **validsubplans) + Bitmapset **validsubplans, + Bitmapset **scan_leafpart_rtis) { Bitmapset *partset; int i; @@ -2390,8 +2437,14 @@ find_matching_subplans_recurse(PartitionPruningData *prunedata, while ((i = bms_next_member(partset, i)) >= 0) { if (pprune->subplan_map[i] >= 0) + { *validsubplans = bms_add_member(*validsubplans, pprune->subplan_map[i]); + Assert(pprune->rti_map[i] > 0); + if (scan_leafpart_rtis) + *scan_leafpart_rtis = bms_add_member(*scan_leafpart_rtis, + pprune->rti_map[i]); + } else { int partidx = pprune->subpart_map[i]; @@ -2399,7 +2452,8 @@ find_matching_subplans_recurse(PartitionPruningData *prunedata, if (partidx >= 0) find_matching_subplans_recurse(prunedata, &prunedata->partrelprunedata[partidx], - initial_prune, validsubplans); + initial_prune, validsubplans, + scan_leafpart_rtis); else { /* diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index c33a3c0bec..035ed8a872 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -140,6 +140,7 @@ CreateExecutorState(void) estate->es_param_exec_vals = NULL; estate->es_queryEnv = NULL; + estate->es_part_prune_results = NIL; estate->es_query_cxt = qcontext; @@ -800,7 +801,12 @@ ExecGetRangeTableRelation(EState *estate, Index rti) Assert(rte->rtekind == RTE_RELATION); - if (!IsParallelWorker()) + /* + * Must take a lock on the relation if we got here by way of + * ExecLockRelationsIfNeeded(). + */ + if (!IsParallelWorker() && + (estate->es_top_eflags & EXEC_FLAG_GET_LOCKS) == 0) { /* * In a normal query, we should already have the appropriate lock, diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c index cb25499b3f..2f585793da 100644 --- a/src/backend/executor/nodeAppend.c +++ b/src/backend/executor/nodeAppend.c @@ -156,7 +156,8 @@ ExecInitAppend(Append *node, EState *estate, int eflags) * subplan, we can fill as_valid_subplans immediately, preventing * later calls to ExecFindMatchingSubPlans. */ - if (!prunestate->do_exec_prune && nplans > 0) + if (appendstate->as_prune_state == NULL || + (!appendstate->as_prune_state->do_exec_prune && nplans > 0)) appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1); } else @@ -578,7 +579,7 @@ choose_next_subplan_locally(AppendState *node) } else if (node->as_valid_subplans == NULL) node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); whichplan = -1; } @@ -643,7 +644,7 @@ choose_next_subplan_for_leader(AppendState *node) if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); /* * Mark each invalid plan as finished to allow the loop below to @@ -718,7 +719,7 @@ choose_next_subplan_for_worker(AppendState *node) else if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); mark_invalid_subplans_as_finished(node); } @@ -869,7 +870,7 @@ ExecAppendAsyncBegin(AppendState *node) if (node->as_valid_subplans == NULL) { node->as_valid_subplans = - ExecFindMatchingSubPlans(node->as_prune_state, false); + ExecFindMatchingSubPlans(node->as_prune_state, false, NULL); classify_matching_subplans(node); } diff --git a/src/backend/executor/nodeMergeAppend.c b/src/backend/executor/nodeMergeAppend.c index 399b39c598..c653084515 100644 --- a/src/backend/executor/nodeMergeAppend.c +++ b/src/backend/executor/nodeMergeAppend.c @@ -104,7 +104,8 @@ ExecInitMergeAppend(MergeAppend *node, EState *estate, int eflags) * subplan, we can fill ms_valid_subplans immediately, preventing * later calls to ExecFindMatchingSubPlans. */ - if (!prunestate->do_exec_prune && nplans > 0) + if (mergestate->ms_prune_state == NULL || + (!mergestate->ms_prune_state->do_exec_prune && nplans > 0)) mergestate->ms_valid_subplans = bms_add_range(NULL, 0, nplans - 1); } else @@ -219,7 +220,7 @@ ExecMergeAppend(PlanState *pstate) */ if (node->ms_valid_subplans == NULL) node->ms_valid_subplans = - ExecFindMatchingSubPlans(node->ms_prune_state, false); + ExecFindMatchingSubPlans(node->ms_prune_state, false, NULL); /* * First time through: pull the first tuple from each valid subplan, diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index b4fa8d90bc..ff363be811 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -372,6 +372,7 @@ set_plan_references(PlannerInfo *root, Plan *plan) { PartitionPruneInfo *pruneinfo = lfirst(lc); ListCell *l; + Bitmapset *leafpart_rtis = NULL; pruneinfo->root_parent_relids = offset_relid_set(pruneinfo->root_parent_relids, rtoffset); @@ -383,17 +384,52 @@ set_plan_references(PlannerInfo *root, Plan *plan) foreach(l2, prune_infos) { PartitionedRelPruneInfo *pinfo = lfirst(l2); + int i; /* RT index of the table to which the pinfo belongs. */ pinfo->rtindex += rtoffset; + + /* Also of the leaf partitions that might be scanned. */ + for (i = 0; i < pinfo->nparts; i++) + { + if (pinfo->rti_map[i] > 0 && pinfo->subplan_map[i] >= 0) + { + pinfo->rti_map[i] += rtoffset; + leafpart_rtis = bms_add_member(leafpart_rtis, + pinfo->rti_map[i]); + } + } } } + if (pruneinfo->needs_init_pruning) + { + glob->containsInitialPruning = true; + + /* + * Delete the leaf partition RTIs from the set of relations to be + * locked by AcquireExecutorLocks(). The actual set of leaf + * partitions to be locked is computed by + * ExecLockRelationsIfNeeded(). + */ + glob->minLockRelids = bms_del_members(glob->minLockRelids, + leafpart_rtis); + } + glob->partPruneInfos = lappend(glob->partPruneInfos, pruneinfo); glob->containsInitialPruning |= pruneinfo->needs_init_pruning; } + /* + * It seems worth doing a bms_copy() on glob->minLockRelids if we deleted + * bits from it above to get rid of any empty tail bits. It seems better + * for the loop over this set in AcquireExecutorLocks() to not have to go + * through those useless bit words. + */ + if (glob->containsInitialPruning) + glob->minLockRelids = bms_copy(glob->minLockRelids); + return result; } diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index f113170140..af5e9b1609 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -100,13 +100,13 @@ static void ReleaseGenericPlan(CachedPlanSource *plansource); static List *RevalidateCachedQuery(CachedPlanSource *plansource, QueryEnvironment *queryEnv); static bool CheckCachedPlan(CachedPlanSource *plansource); +static bool GenericPlanIsValid(CachedPlan *cplan); static CachedPlan *BuildCachedPlan(CachedPlanSource *plansource, List *qlist, ParamListInfo boundParams, QueryEnvironment *queryEnv); static bool choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams); static double cached_plan_cost(CachedPlan *plan, bool include_planner); static Query *QueryListGetPrimaryStmt(List *stmts); -static void AcquireExecutorLocks(List *stmt_list, bool acquire); static void AcquirePlannerLocks(List *stmt_list, bool acquire); static void ScanQueryForLocks(Query *parsetree, bool acquire); static bool ScanQueryWalker(Node *node, bool *acquire); @@ -787,9 +787,6 @@ RevalidateCachedQuery(CachedPlanSource *plansource, * * Caller must have already called RevalidateCachedQuery to verify that the * querytree is up to date. - * - * On a "true" return, we have acquired the locks needed to run the plan. - * (We must do this for the "true" result to be race-condition-free.) */ static bool CheckCachedPlan(CachedPlanSource *plansource) @@ -803,60 +800,69 @@ CheckCachedPlan(CachedPlanSource *plansource) if (!plan) return false; - Assert(plan->magic == CACHEDPLAN_MAGIC); - /* Generic plans are never one-shot */ - Assert(!plan->is_oneshot); + if (GenericPlanIsValid(plan)) + return true; /* - * If plan isn't valid for current role, we can't use it. + * Plan has been invalidated, so unlink it from the parent and release it. */ - if (plan->is_valid && plan->dependsOnRole && - plan->planRoleId != GetUserId()) - plan->is_valid = false; + ReleaseGenericPlan(plansource); - /* - * If it appears valid, acquire locks and recheck; this is much the same - * logic as in RevalidateCachedQuery, but for a plan. - */ - if (plan->is_valid) + return false; +} + +/* + * GenericPlanIsValid + * Is a generic plan still valid? + * + * It may have gone stale due to concurrent schema modifications of relations + * mentioned in the plan or a couple of other things mentioned below. + */ +static bool +GenericPlanIsValid(CachedPlan *cplan) +{ + Assert(cplan != NULL); + Assert(cplan->magic == CACHEDPLAN_MAGIC); + /* Generic plans are never one-shot */ + Assert(!cplan->is_oneshot); + + if (cplan->is_valid) { /* * Plan must have positive refcount because it is referenced by * plansource; so no need to fear it disappears under us here. */ - Assert(plan->refcount > 0); - - AcquireExecutorLocks(plan->stmt_list, true); + Assert(cplan->refcount > 0); /* - * If plan was transient, check to see if TransactionXmin has - * advanced, and if so invalidate it. + * If plan isn't valid for current role, we can't use it. */ - if (plan->is_valid && - TransactionIdIsValid(plan->saved_xmin) && - !TransactionIdEquals(plan->saved_xmin, TransactionXmin)) - plan->is_valid = false; + if (cplan->dependsOnRole && cplan->planRoleId != GetUserId()) + cplan->is_valid = false; /* - * By now, if any invalidation has happened, the inval callback - * functions will have marked the plan invalid. + * If plan was transient, check to see if TransactionXmin has + * advanced, and if so invalidate it. */ - if (plan->is_valid) - { - /* Successfully revalidated and locked the query. */ - return true; - } - - /* Oops, the race case happened. Release useless locks. */ - AcquireExecutorLocks(plan->stmt_list, false); + if (TransactionIdIsValid(cplan->saved_xmin) && + !TransactionIdEquals(cplan->saved_xmin, TransactionXmin)) + cplan->is_valid = false; } - /* - * Plan has been invalidated, so unlink it from the parent and release it. - */ - ReleaseGenericPlan(plansource); + return cplan->is_valid; +} - return false; +/* + * CachedPlanStillValid + * Returns if a cached generic plan is still valid + * + * Called by the executor after it has finished taking locks on a plan tree + * in a CachedPlan. + */ +bool +CachedPlanStillValid(CachedPlan *cplan) +{ + return GenericPlanIsValid(cplan); } /* @@ -1126,9 +1132,6 @@ cached_plan_cost(CachedPlan *plan, bool include_planner) * plan or a custom plan for the given parameters: the caller does not know * which it will get. * - * On return, the plan is valid and we have sufficient locks to begin - * execution. - * * On return, the refcount of the plan has been incremented; a later * ReleaseCachedPlan() call is expected. If "owner" is not NULL then * the refcount has been reported to that ResourceOwner (note that this @@ -1362,6 +1365,7 @@ CachedPlanAllowsSimpleValidityCheck(CachedPlanSource *plansource, /* * Reject if AcquireExecutorLocks would have anything to do. This is * probably unnecessary given the previous check, but let's be safe. + * XXX - maybe remove? */ foreach(lc, plan->stmt_list) { @@ -1735,62 +1739,6 @@ QueryListGetPrimaryStmt(List *stmts) return NULL; } -/* - * AcquireExecutorLocks: acquire locks needed for execution of a cached plan; - * or release them if acquire is false. - */ -static void -AcquireExecutorLocks(List *stmt_list, bool acquire) -{ - ListCell *lc1; - - foreach(lc1, stmt_list) - { - PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); - Bitmapset *allLockRelids; - int rti; - - if (plannedstmt->commandType == CMD_UTILITY) - { - /* - * Ignore utility statements, except those (such as EXPLAIN) that - * contain a parsed-but-not-planned query. Note: it's okay to use - * ScanQueryForLocks, even though the query hasn't been through - * rule rewriting, because rewriting doesn't change the query - * representation. - */ - Query *query = UtilityContainsQuery(plannedstmt->utilityStmt); - - Assert(plannedstmt->minLockRelids == NULL); - if (query) - ScanQueryForLocks(query, acquire); - continue; - } - - allLockRelids = plannedstmt->minLockRelids; - rti = -1; - while ((rti = bms_next_member(allLockRelids, rti)) > 0) - { - RangeTblEntry *rte = rt_fetch(rti, plannedstmt->rtable); - - if (!(rte->rtekind == RTE_RELATION || - (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid)))) - continue; - - /* - * Acquire the appropriate type of lock on each relation OID. Note - * that we don't actually try to open the rel, and hence will not - * fail if it's been dropped entirely --- we'll just transiently - * acquire a non-conflicting lock. - */ - if (acquire) - LockRelationOid(rte->relid, rte->rellockmode); - else - UnlockRelationOid(rte->relid, rte->rellockmode); - } - } -} - /* * AcquirePlannerLocks: acquire locks needed for planning of a querytree list; * or release them if acquire is false. diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 21d85a7809..526f5781da 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -133,5 +133,11 @@ extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate, bool consider_initial_steps, bool consider_exec_steps); extern Bitmapset *ExecFindMatchingSubPlans(PartitionPruneState *prunestate, - bool initial_prune); + bool initial_prune, + Bitmapset **scan_leafpart_rtis); +extern PartitionPruneState *ExecCreatePartitionPruneState(PlanState *planstate, + EState *estate, + PartitionPruneInfo *pruneinfo, + bool consider_initial_steps, + bool consider_exec_steps); #endif /* EXECPARTITION_H */ diff --git a/src/include/executor/execdesc.h b/src/include/executor/execdesc.h index 4b7368a0dc..595297df6c 100644 --- a/src/include/executor/execdesc.h +++ b/src/include/executor/execdesc.h @@ -46,6 +46,12 @@ typedef struct QueryDesc QueryEnvironment *queryEnv; /* query environment passed in */ int instrument_options; /* OR of InstrumentOption flags */ + /* + * Used by ExecParallelGetQueryDesc() to save the result of initial + * partition pruning performed by the leader. + */ + List *part_prune_results; /* list of PartitionPruneResult */ + /* These fields are set by ExecutorStart */ TupleDesc tupDesc; /* descriptor for result tuples */ EState *estate; /* executor's query-wide state */ diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 63f3d09804..755e231675 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -59,6 +59,8 @@ #define EXEC_FLAG_MARK 0x0008 /* need mark/restore */ #define EXEC_FLAG_SKIP_TRIGGERS 0x0010 /* skip AfterTrigger calls */ #define EXEC_FLAG_WITH_NO_DATA 0x0020 /* rel scannability doesn't matter */ +#define EXEC_FLAG_GET_LOCKS 0x0400 /* should ExecGetRangeTableRelation() + * lock relations? */ /* Hook for plugins to get control in ExecutorStart() */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 20f4c8b35f..b361592e2d 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -620,6 +620,7 @@ typedef struct EState List *es_rteperminfos; /* List of RTEPermissionInfo */ PlannedStmt *es_plannedstmt; /* link to top of plan tree */ List *es_part_prune_infos; /* PlannedStmt.partPruneInfos */ + List *es_part_prune_results; /* QueryDesc.part_prune_results */ const char *es_sourceText; /* Source text from QueryDesc */ JunkFilter *es_junkFilter; /* top-level junk filter, if any */ diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index d00b5dcb03..83e5c665c7 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -134,8 +134,8 @@ typedef struct PlannerGlobal bool containsInitialPruning; /* - * Indexes of all range table entries; for AcquireExecutorLocks()'s - * perusal. + * Indexes of all range table entries except those of leaf partitions + * scanned by prunable subplans; for AcquireExecutorLocks() perusal. */ Bitmapset *minLockRelids; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 7b53f990e0..e76e945c8c 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -82,8 +82,9 @@ typedef struct PlannedStmt List *permInfos; /* list of RTEPermissionInfo nodes for rtable * entries needing one */ - Bitmapset *minLockRelids; /* Indexes of all range table entries; for - * AcquireExecutorLocks()'s perusal */ + Bitmapset *minLockRelids; /* Indexes of all range table entries except + * those of leaf partitions scanned by + * prunable subplans */ /* rtable indexes of target relations for INSERT/UPDATE/DELETE/MERGE */ List *resultRelations; /* integer list of RT indexes, or NIL */ @@ -1575,6 +1576,32 @@ typedef struct PartitionPruneStepCombine List *source_stepids; } PartitionPruneStepCombine; +/*---------------- + * PartitionPruneResult + * + * The result of performing ExecPartitionDoInitialPruning() on a given + * PartitionPruneInfo. + * + * root_parent_relids is same as PartitionPruneInfo.root_parent_relids. It's + * there for cross-checking in ExecInitPartitionPruning() that the + * PartitionPruneResult and the PartitionPruneInfo at a given index in + * EState.es_part_prune_results and EState.es_part_prune_infos, respectively, + * belong to the same parent plan node. + * + * validsubplans contains the indexes of subplans remaining after performing + * initial pruning by calling ExecFindMatchingSubPlans() on the + * PartitionPruneInfo. + * + * This is used to store the result of initial partition pruning that is + * peformed in ExecDoInitialPartitionPruning(). + */ +typedef struct PartitionPruneResult +{ + NodeTag type; + + Bitmapset *root_parent_relids; + Bitmapset *validsubplans; +} PartitionPruneResult; /* * Plan invalidation info diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index a443181d41..7c664bad35 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -221,6 +221,7 @@ extern CachedPlan *GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, ResourceOwner owner, QueryEnvironment *queryEnv); +extern bool CachedPlanStillValid(CachedPlan *cplan); extern void ReleaseCachedPlan(CachedPlan *plan, ResourceOwner owner); extern bool CachedPlanAllowsSimpleValidityCheck(CachedPlanSource *plansource, -- 2.35.3