From 648b9f5c89069692bbb46cf579576be50a9147f2 Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Thu, 26 Mar 2026 18:15:39 +0900 Subject: [PATCH v10 3/5] Use pruning-aware locking in cached plans Extend GetCachedPlan()'s lock acquisition to perform initial partition pruning via ExecutorPrep(), then lock only the surviving partitions. This avoids unnecessary locking of pruned partitions when reusing a generic cached plan. Introduce CachedPlanPrepData to carry the EState created by ExecutorPrep() through the plan caching layer. The prep_estate field is populated when GetCachedPlan() prepares a reused single-statement generic plan. Adjust call sites in SPI, portals, and EXPLAIN to propagate this to ExecutorStart(). Disable pruning-aware locking for multi-statement CachedPlans, which arise from rule rewriting. PortalRunMulti() executes such statements sequentially with CommandCounterIncrement() between them, so later statements' pruning expressions may see different results depending on when they are evaluated. Evaluating all statements' pruning upfront during GetCachedPlan() would produce stale results for later statements. Additionally, PortalRunMulti() calls MemoryContextDeleteChildren(portalContext) between statements, which would destroy EStates prepared for later statements. The fallback to locking all partitions is safe and sufficient here; multi-statement plans from rule rewriting are uncommon. Partition pruning expressions may call PL functions that require an active snapshot (e.g., via EnsurePortalSnapshotExists()). AcquireExecutorLocksUnpruned() establishes one before calling ExecutorPrep() if needed, ensuring these expressions can execute correctly during plan cache validation. To maintain correctness when all target partitions are pruned, also reinstate the firstResultRel locking behavior lost in commit 28317de72. That commit required the first ModifyTable target to remain initialized for executor assumptions to hold. We now explicitly track these relids in PlannerGlobal and PlannedStmt so they are locked even if pruned, preserving that rule across cached plan reuse. Regression tests are included to verify: - Only surviving partitions are locked when pruning is enabled, and all partitions are locked when it is disabled (pg_locks inspection). - Multiple ModifyTable nodes (via writable CTEs) handle the case where all target partitions are pruned, exercising firstResultRels. - Plan invalidation during pruning-aware lock setup (DDL triggered by a pruning expression) discards the prep state and replans cleanly. - Multi-statement CachedPlans (from rule rewriting) fall back to locking all partitions, avoiding stale pruning and use-after-free. Note for extension authors: code that accesses partition relations through EState must check that the RT index is a member of es_unpruned_relids before opening the relation. Previously this was an optimization (avoid processing pruned partitions); it is now a correctness requirement, because pruned partitions may not be locked. ExecGetRangeTableRelation() already enforces this with an error when called on a pruned relation. --- src/backend/commands/prepare.c | 19 +- src/backend/executor/execMain.c | 4 + src/backend/executor/functions.c | 1 + src/backend/executor/nodeModifyTable.c | 5 +- src/backend/executor/spi.c | 24 +- src/backend/optimizer/plan/planner.c | 1 + src/backend/optimizer/plan/setrefs.c | 18 ++ src/backend/tcop/postgres.c | 8 +- src/backend/tcop/pquery.c | 1 + src/backend/utils/cache/plancache.c | 246 +++++++++++++++++- src/include/nodes/pathnodes.h | 3 + src/include/nodes/plannodes.h | 10 + src/include/utils/plancache.h | 38 ++- src/test/regress/expected/partition_prune.out | 184 +++++++++++++ src/test/regress/expected/plancache.out | 63 +++++ src/test/regress/sql/partition_prune.sql | 116 +++++++++ src/test/regress/sql/plancache.sql | 52 ++++ 17 files changed, 769 insertions(+), 24 deletions(-) diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c index c24d97f7e5a..621fd30fd5e 100644 --- a/src/backend/commands/prepare.c +++ b/src/backend/commands/prepare.c @@ -156,6 +156,7 @@ ExecuteQuery(ParseState *pstate, { PreparedStatement *entry; CachedPlan *cplan; + CachedPlanPrepData cprep = {0}; List *plan_list; ParamListInfo paramLI = NULL; EState *estate = NULL; @@ -195,8 +196,11 @@ ExecuteQuery(ParseState *pstate, entry->plansource->query_string); /* Replan if needed, and increment plan refcount for portal */ - cplan = GetCachedPlan(entry->plansource, paramLI, NULL, NULL); + cprep.context = portal->portalContext; + cprep.owner = portal->resowner; + cplan = GetCachedPlan(entry->plansource, paramLI, NULL, NULL, &cprep); plan_list = cplan->stmt_list; + Assert(cprep.prep_estate == NULL || list_length(plan_list) == 1); /* * DO NOT add any logic that could possibly throw an error between @@ -207,7 +211,7 @@ ExecuteQuery(ParseState *pstate, query_string, entry->plansource->commandTag, plan_list, - NULL, + cprep.prep_estate, cplan); /* @@ -577,6 +581,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, PreparedStatement *entry; const char *query_string; CachedPlan *cplan; + CachedPlanPrepData cprep = {0}; List *plan_list; ListCell *p; ParamListInfo paramLI = NULL; @@ -633,8 +638,13 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, } /* Replan if needed, and acquire a transient refcount */ + cprep.context = CurrentMemoryContext; + cprep.owner = CurrentResourceOwner; + if (es->generic) + cprep.eflags = EXEC_FLAG_EXPLAIN_GENERIC; cplan = GetCachedPlan(entry->plansource, paramLI, - CurrentResourceOwner, pstate->p_queryEnv); + CurrentResourceOwner, pstate->p_queryEnv, + &cprep); INSTR_TIME_SET_CURRENT(planduration); INSTR_TIME_SUBTRACT(planduration, planstart); @@ -655,12 +665,13 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es, plan_list = cplan->stmt_list; /* Explain each query */ + Assert(cprep.prep_estate == NULL || list_length(plan_list) == 1); foreach(p, plan_list) { PlannedStmt *pstmt = lfirst_node(PlannedStmt, p); if (pstmt->commandType != CMD_UTILITY) - ExplainOnePlan(pstmt, NULL, + ExplainOnePlan(pstmt, cprep.prep_estate, into, es, query_string, paramLI, pstate->p_queryEnv, &planduration, (es->buffers ? &bufusage : NULL), es->memory ? &mem_counters : NULL); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index cc7794f58db..051b5d7bfcf 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -334,6 +334,10 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags) * * Returns an EState that the caller must either pass to ExecutorStart() * for reuse or free via FreeExecutorState() if execution will not proceed. + * GetCachedPlan() uses this to determine, based on initial pruning + * results, which partitions to lock; if the resulting EState is not + * delivered to ExecutorStart(), the executor would operate on unlocked + * relations. See the assert checks in standard_ExecutorStart(). */ EState * ExecutorPrep(PlannedStmt *pstmt, ParamListInfo params, ResourceOwner owner, diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index 952a784c924..c0ca72b38dd 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -699,6 +699,7 @@ init_execution_state(SQLFunctionCachePtr fcache) fcache->cplan = GetCachedPlan(plansource, fcache->paramLI, fcache->cowner, + NULL, NULL); /* diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 4cd5e262e0f..9230f2b554f 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -4865,8 +4865,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) * as a reference for building the ResultRelInfo of the target partition. * In either case, it doesn't matter which result relation is kept, so we * just keep the first one, if all others have been pruned. See also, - * ExecDoInitialPruning(), which ensures that this first result relation - * has been locked. + * AcquireExecutorLocksUnpruned(), which ensures that this first result + * relation has been locked. */ i = 0; foreach(l, node->resultRelations) @@ -4880,6 +4880,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* all result relations pruned; keep the first one */ keep_rel = true; rti = linitial_int(node->resultRelations); + Assert(list_member_int(estate->es_plannedstmt->firstResultRels, rti)); i = 0; } diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 32c9d987c59..eb9552f85db 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1580,6 +1580,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, { CachedPlanSource *plansource; CachedPlan *cplan; + CachedPlanPrepData cprep = {0}; List *stmt_list; char *query_string; Snapshot snapshot; @@ -1660,8 +1661,12 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, */ /* Replan if needed, and increment plan refcount for portal */ - cplan = GetCachedPlan(plansource, paramLI, NULL, _SPI_current->queryEnv); + cprep.context = portal->portalContext; + cprep.owner = portal->resowner; + cplan = GetCachedPlan(plansource, paramLI, NULL, _SPI_current->queryEnv, + &cprep); stmt_list = cplan->stmt_list; + Assert(cprep.prep_estate == NULL || list_length(stmt_list) == 1); if (!plan->saved) { @@ -1670,7 +1675,10 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, * so must copy the plan into the portal's context. An error here * will result in leaking our refcount on the plan, but it doesn't * matter because the plan is unsaved and hence transient anyway. + * + * Unsaved plans use custom plans, so prep should be a no-op. */ + Assert(cprep.prep_estate == NULL); oldcontext = MemoryContextSwitchTo(portal->portalContext); stmt_list = copyObject(stmt_list); MemoryContextSwitchTo(oldcontext); @@ -1686,7 +1694,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan, query_string, plansource->commandTag, stmt_list, - NULL, + cprep.prep_estate, cplan); /* @@ -2104,7 +2112,8 @@ SPI_plan_get_cached_plan(SPIPlanPtr plan) /* Get the generic plan for the query */ cplan = GetCachedPlan(plansource, NULL, plan->saved ? CurrentResourceOwner : NULL, - _SPI_current->queryEnv); + _SPI_current->queryEnv, + NULL); Assert(cplan == plansource->gplan); /* Pop the error context stack */ @@ -2501,6 +2510,7 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1); List *stmt_list; ListCell *lc2; + CachedPlanPrepData cprep = {0}; spicallbackarg.query = plansource->query_string; @@ -2575,8 +2585,11 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, * Replan if needed, and increment plan refcount. If it's a saved * plan, the refcount must be backed by the plan_owner. */ + cprep.context = CurrentMemoryContext; + cprep.owner = CurrentResourceOwner; cplan = GetCachedPlan(plansource, options->params, - plan_owner, _SPI_current->queryEnv); + plan_owner, _SPI_current->queryEnv, + &cprep); stmt_list = cplan->stmt_list; @@ -2616,6 +2629,7 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, } } + Assert(cprep.prep_estate == NULL || list_length(stmt_list) == 1); foreach(lc2, stmt_list) { PlannedStmt *stmt = lfirst_node(PlannedStmt, lc2); @@ -2697,7 +2711,7 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options, options->params, _SPI_current->queryEnv, 0, - NULL); + cprep.prep_estate); res = _SPI_pquery(qdesc, fire_triggers, canSetTag ? options->tcount : 0); FreeQueryDesc(qdesc); diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 42604a0f75c..afa61d357c5 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -657,6 +657,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, result->permInfos = glob->finalrteperminfos; result->subrtinfos = glob->subrtinfos; result->resultRelations = glob->resultRelations; + result->firstResultRels = glob->firstResultRels; result->appendRelations = glob->appendRelations; result->subplans = glob->subplans; result->rewindPlanIDs = glob->rewindPlanIDs; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 1b5b9b5ed9c..8c9956e687e 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -384,6 +384,24 @@ set_plan_references(PlannerInfo *root, Plan *plan) } } + /* + * Record the first result relation if it belongs to the set of + * initially prunable relations. We use bms_next_member() to get + * the lowest-numbered leaf result rel, which matches + * linitial_int(ModifyTable.resultRelations) because partition + * expansion preserves RT index order. ExecInitModifyTable() asserts + * that the recorded index matches what it actually needs. + */ + if (root->leaf_result_relids) + { + Index firstResultRel = bms_next_member(root->leaf_result_relids, -1); + + firstResultRel += rtoffset; + if (bms_is_member(firstResultRel, root->glob->prunableRelids)) + root->glob->firstResultRels = + lappend_int(root->glob->firstResultRels, firstResultRel); + } + return result; } diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index ccdb6c01071..487258641a5 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1637,6 +1637,7 @@ exec_bind_message(StringInfo input_message) int16 *rformats = NULL; CachedPlanSource *psrc; CachedPlan *cplan; + CachedPlanPrepData cprep = {0}; Portal portal; char *query_string; char *saved_stmt_name; @@ -2018,7 +2019,10 @@ exec_bind_message(StringInfo input_message) * will be generated in MessageContext. The plan refcount will be * assigned to the Portal, so it will be released at portal destruction. */ - cplan = GetCachedPlan(psrc, params, NULL, NULL); + cprep.context = portal->portalContext; + cprep.owner = portal->resowner; + cplan = GetCachedPlan(psrc, params, NULL, NULL, &cprep); + Assert(cprep.prep_estate == NULL || list_length(cplan->stmt_list) == 1); /* * Now we can define the portal. @@ -2031,7 +2035,7 @@ exec_bind_message(StringInfo input_message) query_string, psrc->commandTag, cplan->stmt_list, - NULL, + cprep.prep_estate, cplan); /* Portal is defined, set the plan ID based on its contents. */ diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c index 42ef3e82f82..b52c4c619ee 100644 --- a/src/backend/tcop/pquery.c +++ b/src/backend/tcop/pquery.c @@ -1214,6 +1214,7 @@ PortalRunMulti(Portal portal, * Loop to handle the individual queries generated from a single parsetree * by analysis and rewrite. */ + Assert(portal->prep_estate == NULL || list_length(portal->stmts) == 1); foreach(stmtlist_item, portal->stmts) { PlannedStmt *pstmt = lfirst_node(PlannedStmt, stmtlist_item); diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 698e7c1aa22..b0c4d62564d 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -93,14 +93,17 @@ static bool StmtPlanRequiresRevalidation(CachedPlanSource *plansource); static bool BuildingPlanRequiresSnapshot(CachedPlanSource *plansource); static List *RevalidateCachedQuery(CachedPlanSource *plansource, QueryEnvironment *queryEnv); -static bool CheckCachedPlan(CachedPlanSource *plansource); +static bool CheckCachedPlan(CachedPlanSource *plansource, CachedPlanPrepData *cprep); static CachedPlan *BuildCachedPlan(CachedPlanSource *plansource, List *qlist, ParamListInfo boundParams, QueryEnvironment *queryEnv); static bool choose_custom_plan(CachedPlanSource *plansource, ParamListInfo boundParams); static double cached_plan_cost(CachedPlan *plan, bool include_planner); static Query *QueryListGetPrimaryStmt(List *stmts); -static void AcquireExecutorLocks(List *stmt_list, bool acquire); +static void AcquireExecutorLocksAll(List *stmt_list, bool acquire); +static void AcquireExecutorLocksUnpruned(List *stmt_list, bool acquire, + CachedPlanPrepData *cprep); +static void CachedPlanPrepCleanup(CachedPlanPrepData *cprep); static void AcquirePlannerLocks(List *stmt_list, bool acquire); static void ScanQueryForLocks(Query *parsetree, bool acquire); static bool ScanQueryWalker(Node *node, bool *acquire); @@ -942,6 +945,12 @@ RevalidateCachedQuery(CachedPlanSource *plansource, /* * CheckCachedPlan: see if the CachedPlanSource's generic plan is valid. * + * If 'cprep' is not NULL and the generic plan contains only a single + * statement, ExecutorPrep() is applied to that PlannedStmt to compute the set + * of partitions that survive initial runtime pruning in order to only lock + * them. The EState is saved in cprep.prep_estate, which must be passed to + * ExecutorStart() for reuse. + * * Caller must have already called RevalidateCachedQuery to verify that the * querytree is up to date. * @@ -949,7 +958,7 @@ RevalidateCachedQuery(CachedPlanSource *plansource, * (We must do this for the "true" result to be race-condition-free.) */ static bool -CheckCachedPlan(CachedPlanSource *plansource) +CheckCachedPlan(CachedPlanSource *plansource, CachedPlanPrepData *cprep) { CachedPlan *plan = plansource->gplan; @@ -983,7 +992,19 @@ CheckCachedPlan(CachedPlanSource *plansource) */ Assert(plan->refcount > 0); - AcquireExecutorLocks(plan->stmt_list, true); + /* + * Multi-statement CachedPlans (from rule rewriting) must not + * use pruning-aware locking, because later statements' pruning + * expressions could see stale results if evaluated before + * earlier statements have executed. + */ + if (cprep && list_length(plan->stmt_list) > 1) + cprep = NULL; + + if (cprep) + AcquireExecutorLocksUnpruned(plan->stmt_list, true, cprep); + else + AcquireExecutorLocksAll(plan->stmt_list, true); /* * If plan was transient, check to see if TransactionXmin has @@ -1005,7 +1026,13 @@ CheckCachedPlan(CachedPlanSource *plansource) } /* Oops, the race case happened. Release useless locks. */ - AcquireExecutorLocks(plan->stmt_list, false); + if (cprep) + AcquireExecutorLocksUnpruned(plan->stmt_list, false, cprep); + else + AcquireExecutorLocksAll(plan->stmt_list, false); + + /* Also clean up ExecutorPrep() state, if necessary. */ + CachedPlanPrepCleanup(cprep); } /* @@ -1285,6 +1312,16 @@ cached_plan_cost(CachedPlan *plan, bool include_planner) * On return, the plan is valid and we have sufficient locks to begin * execution. * + * If 'cprep' is not NULL and a single-statement generic plan is reused, + * the function performs initial pruning via ExecutorPrep() and locks only + * the surviving partitions. The resulting EState is stored in + * cprep->prep_estate and must be delivered to ExecutorStart() via + * QueryDesc->estate (or the equivalent portal/SPI path). Failure + * to do so means the executor will operate on relations for which + * locks were never acquired. Passing NULL for cprep is always safe; + * all partitions are locked as before. Multi-statement plans also + * fall back to locking all partitions. + * * On return, the refcount of the plan has been incremented; a later * ReleaseCachedPlan() call is expected. If "owner" is not NULL then * the refcount has been reported to that ResourceOwner (note that this @@ -1295,7 +1332,8 @@ cached_plan_cost(CachedPlan *plan, bool include_planner) */ CachedPlan * GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, - ResourceOwner owner, QueryEnvironment *queryEnv) + ResourceOwner owner, QueryEnvironment *queryEnv, + CachedPlanPrepData *cprep) { CachedPlan *plan = NULL; List *qlist; @@ -1317,7 +1355,9 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, if (!customplan) { - if (CheckCachedPlan(plansource)) + if (cprep) + cprep->params = boundParams; + if (CheckCachedPlan(plansource, cprep)) { /* We want a generic plan, and we already have a valid one */ plan = plansource->gplan; @@ -1904,11 +1944,13 @@ QueryListGetPrimaryStmt(List *stmts) } /* - * AcquireExecutorLocks: acquire locks needed for execution of a cached plan; - * or release them if acquire is false. + * AcquireExecutorLocksAll: acquire locks needed for execution of a cached + * plan; or release them if acquire is false. + * + * This locks all relations in a given PlannedStmt's range table. */ static void -AcquireExecutorLocks(List *stmt_list, bool acquire) +AcquireExecutorLocksAll(List *stmt_list, bool acquire) { ListCell *lc1; @@ -1955,6 +1997,190 @@ AcquireExecutorLocks(List *stmt_list, bool acquire) } } +/* + * LockRelids + * Acquire or release locks on the specified relids, which reference + * entries in the provided range table. + * + * Helper for AcquireExecutorLocksUnpruned(). + */ +static void +LockRelids(List *rtable, Bitmapset *relids, bool acquire) +{ + int rtindex = -1; + + while ((rtindex = bms_next_member(relids, rtindex)) >= 0) + { + RangeTblEntry *rte = list_nth_node(RangeTblEntry, rtable, rtindex - 1); + + Assert(rte->rtekind == RTE_RELATION || + (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid))); + + /* + * Acquire the appropriate type of lock on each relation OID. Note + * that we don't actually try to open the rel, and hence will not + * fail if it's been dropped entirely --- we'll just transiently + * acquire a non-conflicting lock. + */ + if (acquire) + LockRelationOid(rte->relid, rte->rellockmode); + else + UnlockRelationOid(rte->relid, rte->rellockmode); + } +} + +/* + * AcquireExecutorLocksUnpruned + * Acquire or release execution locks for only unpruned relations + * referenced by the given single-statement PlannedStmt list. + * + * On acquire, this: + * - locks unprunable rels listed in PlannedStmt.unprunableRelids + * - runs ExecutorPrep() to perform initial runtime pruning + * - locks the surviving partitions reported in the prep estate + * - stores the EState in cprep->prep_estate + * + * On release, it: + * - uses the EState in cprep->prep_estate to determine which + * relids to unlock + * + * Memory allocation for the EState happens in cprep->context. + * Locks are acquired using cprep->owner. + */ +static void +AcquireExecutorLocksUnpruned(List *stmt_list, bool acquire, + CachedPlanPrepData *cprep) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(cprep->context); + ListCell *lc1; + EState *prep_estate; + + Assert(cprep); + + /* + * When releasing locks, use the EState created during acquisition to + * determine which relids to unlock. + */ + prep_estate = cprep->prep_estate; + Assert(!acquire || prep_estate == NULL); + foreach(lc1, stmt_list) + { + PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1); + + if (plannedstmt->commandType == CMD_UTILITY) + { + /* Same as AcquireExecutorLocks(). */ + Query *query = UtilityContainsQuery(plannedstmt->utilityStmt); + + if (query) + ScanQueryForLocks(query, acquire); + continue; + } + + /* + * Lock tables mentioned in the original query and other unprunable + * relations that were added to the plan via inheritance expansion. + */ + LockRelids(plannedstmt->rtable, plannedstmt->unprunableRelids, acquire); + + /* Lock partitions surviving runtime initial pruning. */ + if (acquire) + { + /* + * Pruning expressions may call PL functions that require an active + * snapshot (e.g., via EnsurePortalSnapshotExists()). Establish one + * if needed. + */ + bool snap_pushed = false; + + if (!ActiveSnapshotSet()) + { + PushActiveSnapshot(GetTransactionSnapshot()); + snap_pushed = true; + } + + prep_estate = ExecutorPrep(plannedstmt, cprep->params, + cprep->owner, cprep->eflags); + Assert(prep_estate); + cprep->prep_estate = prep_estate; + + if (snap_pushed) + PopActiveSnapshot(); + } + + if (prep_estate) + { + /* + * es_unpruned_relids includes plannedstmt->unprunableRelids, + * which we've already locked. Filter them out to avoid double-locking. + */ + Bitmapset *lock_relids = bms_difference(prep_estate->es_unpruned_relids, + plannedstmt->unprunableRelids); + + /* + * We must always include the first result relation of each + * ModifyTable node in the plan, that is, the one mentioned in + * plannedstmt->firstResultRels in the set of relations to be + * locked to satisfy executor assumptions described + * in ExecInitModifyTable(). This can be wasteful, because we + * may not need to use the first result relation at all if other + * result relations are unpruned and thus sufficient for the + * ModifyTable node's needs. Unfortunately, we don't have per-node + * unpruned_relids set to determine that other result relations + * are included. + */ + if (plannedstmt->resultRelations) + { + ListCell *lc2; + + foreach(lc2, plannedstmt->firstResultRels) + { + Index firstResultRel = lfirst_int(lc2); + + if (!bms_is_member(firstResultRel, lock_relids)) + lock_relids = bms_add_member(lock_relids, firstResultRel); + } + } + + LockRelids(plannedstmt->rtable, lock_relids, acquire); + bms_free(lock_relids); + } + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * CachedPlanPrepCleanup + * Dispose of EState built during pruning-aware lock acquisition. + * + * This is used when CheckCachedPlan() discovers that a CachedPlan has + * become invalid after AcquireExecutorLocksUnpruned() has already run. + * The execution locks have already been released by that point; this + * function frees the EState that the executor will never see. + */ +static void +CachedPlanPrepCleanup(CachedPlanPrepData *cprep) +{ + EState *prep_estate; + ResourceOwner oldowner; + + if (cprep == NULL) + return; + + /* Switch to owner that ExecutorPrep() would have used. */ + oldowner = CurrentResourceOwner; + CurrentResourceOwner = cprep->owner; + + prep_estate = cprep->prep_estate; + Assert(prep_estate); + ExecCloseRangeTableRelations(prep_estate); + FreeExecutorState(prep_estate); + CurrentResourceOwner = oldowner; + + cprep->prep_estate = NULL; +} + /* * AcquirePlannerLocks: acquire locks needed for planning of a querytree list; * or release them if acquire is false. diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 27758ec16fe..4fd9d9bcc56 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -217,6 +217,9 @@ typedef struct PlannerGlobal /* "flat" list of integer RT indexes */ List *resultRelations; + /* "flat" list of integer RT indexes (one per ModifyTable node) */ + List *firstResultRels; + /* "flat" list of AppendRelInfos */ List *appendRelations; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b6185825fcb..55279cbbda8 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -121,6 +121,16 @@ typedef struct PlannedStmt /* integer list of RT indexes, or NIL */ List *resultRelations; + /* + * rtable indexes of first target relation in each ModifyTable node in the + * plan for INSERT/UPDATE/DELETE/MERGE. NIL if resultRelations is NIL. + * + * These are used by AcquireExecutorLocksUnpruned() to ensure that the + * first result rel for each ModifyTable remains locked even if pruned; + * see ExecInitModifyTable() for the executor side assumptions. + */ + List *firstResultRels; + /* list of AppendRelInfo nodes */ List *appendRelations; diff --git a/src/include/utils/plancache.h b/src/include/utils/plancache.h index 7a4a85c8038..1a153b816eb 100644 --- a/src/include/utils/plancache.h +++ b/src/include/utils/plancache.h @@ -27,6 +27,9 @@ typedef struct Query Query; typedef struct RawStmt RawStmt; +/* to avoid including execnodes.h */ +typedef struct EState EState; + /* possible values for plan_cache_mode */ typedef enum { @@ -196,6 +199,38 @@ typedef struct CachedExpression dlist_node node; /* link in global list of CachedExpressions */ } CachedExpression; +/* + * CachedPlanPrepData + * Carries ExecutorPrep results for a CachedPlan's PlannedStmt, + * along with context and owner information needed to allocate them. + * + * prep_estate is populated when GetCachedPlan() prepares a reused + * single-statement generic plan. Multi-statement plans (from rule + * rewriting) fall back to locking all partitions and leave this NULL. + * If the plan is found invalid after locking, the EState is freed + * by CachedPlanPrepCleanup() before retrying. + * + * ExecutorPrep state is allocated in 'context' and owned by 'owner'. + * + * eflags controls ExecutorPrep() behavior during initial pruning. + * Normally zero; set EXEC_FLAG_EXPLAIN_GENERIC to suppress pruning + * in EXPLAIN (GENERIC_PLAN). Need not match the eflags later passed + * to ExecutorStart(). + * + * prep_estate must reach ExecutorStart() to be adopted for execution. + * If the plan is invalidated before that happens, CachedPlanPrepCleanup() + * frees it instead. The EState is allocated in 'context' and its + * resources tracked under 'owner', which the caller sets to match the + * execution environment (e.g., portal context and resowner). + */ +typedef struct CachedPlanPrepData +{ + EState *prep_estate; /* EState for the PlannedStmt */ + ParamListInfo params; /* params visible to ExecutorPrep */ + MemoryContext context; /* where to allocate EState and its fields */ + ResourceOwner owner; /* ResourceOwner for ExecutorPrep state */ + int eflags; /* executor flags to control ExecutorPrep */ +} CachedPlanPrepData; extern void InitPlanCache(void); extern void ResetPlanCache(void); @@ -240,7 +275,8 @@ extern List *CachedPlanGetTargetList(CachedPlanSource *plansource, extern CachedPlan *GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams, ResourceOwner owner, - QueryEnvironment *queryEnv); + QueryEnvironment *queryEnv, + CachedPlanPrepData *cprep); extern void ReleaseCachedPlan(CachedPlan *plan, ResourceOwner owner); extern bool CachedPlanAllowsSimpleValidityCheck(CachedPlanSource *plansource, diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index deacdd75807..61781389d2f 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -4824,3 +4824,187 @@ select min(a) over (partition by a order by a) from part_abc where a >= stable_o drop view part_abc_view; drop table part_abc; +-- +-- Verify that pruning-aware locking skips pruned partitions +-- when reusing a generic cached plan. +-- +set plan_cache_mode to force_generic_plan; +create table prunelock_p (a int) partition by list (a); +create table prunelock_p1 partition of prunelock_p for values in (1); +create table prunelock_p2 partition of prunelock_p for values in (2); +create table prunelock_p3 partition of prunelock_p for values in (3); +prepare prunelock_q (int) as select * from prunelock_p where a = $1; +-- Force generic plan creation +explain (costs off) execute prunelock_q(1); + QUERY PLAN +---------------------------------------------- + Append + Subplans Removed: 2 + -> Seq Scan on prunelock_p1 prunelock_p_1 + Filter: (a = $1) +(4 rows) + +-- Execute and check which child partitions are locked +begin; +execute prunelock_q(1); + a +--- +(0 rows) + +select c.relname + from pg_locks l + join pg_class c on c.oid = l.relation + where l.pid = pg_backend_pid() + and c.relname like 'prunelock_p_' + order by c.relname; + relname +-------------- + prunelock_p1 +(1 row) + +commit; +deallocate prunelock_q; +-- Turn pruning off +set enable_partition_pruning to off; +prepare prunelock_q (int) as select * from prunelock_p where a = $1; +-- Force generic plan creation +explain (costs off) execute prunelock_q(1); + QUERY PLAN +---------------------------------------------- + Append + -> Seq Scan on prunelock_p1 prunelock_p_1 + Filter: (a = $1) + -> Seq Scan on prunelock_p2 prunelock_p_2 + Filter: (a = $1) + -> Seq Scan on prunelock_p3 prunelock_p_3 + Filter: (a = $1) +(7 rows) + +-- Execute and check which child partitions are locked +begin; +execute prunelock_q(1); + a +--- +(0 rows) + +select c.relname + from pg_locks l + join pg_class c on c.oid = l.relation + where l.pid = pg_backend_pid() + and c.relname like 'prunelock_p_' + order by c.relname; + relname +-------------- + prunelock_p1 + prunelock_p2 + prunelock_p3 +(3 rows) + +commit; +deallocate prunelock_q; +reset enable_partition_pruning; +-- +-- Verify firstResultRels handling with multiple ModifyTable nodes +-- (writable CTEs) targeting a partitioned table. When a pruning +-- parameter matches no partition, all result relations are pruned +-- and the executor must still find a usable first result relation +-- for each ModifyTable node. +-- +prepare prunelock_mt_q (int, int) as + with upd1 as (update prunelock_p set a = a), + upd2 as (update prunelock_p set a = a where a = $2) + update prunelock_p set a = a where a = $1; +-- Force generic plan creation +explain (costs off) execute prunelock_mt_q(1, 2); + QUERY PLAN +------------------------------------------------------------ + Update on prunelock_p + Update on prunelock_p1 prunelock_p_1 + CTE upd1 + -> Update on prunelock_p prunelock_p_3 + Update on prunelock_p1 prunelock_p_4 + Update on prunelock_p2 prunelock_p_5 + Update on prunelock_p3 prunelock_p_6 + -> Append + -> Seq Scan on prunelock_p1 prunelock_p_4 + -> Seq Scan on prunelock_p2 prunelock_p_5 + -> Seq Scan on prunelock_p3 prunelock_p_6 + CTE upd2 + -> Update on prunelock_p prunelock_p_7 + Update on prunelock_p2 prunelock_p_8 + -> Append + Subplans Removed: 2 + -> Seq Scan on prunelock_p2 prunelock_p_8 + Filter: (a = $2) + -> Append + Subplans Removed: 2 + -> Seq Scan on prunelock_p1 prunelock_p_1 + Filter: (a = $1) +(22 rows) + +-- All partitions pruned: value 4 matches no partition, so each +-- ModifyTable must still initialize correctly with no matching +-- result relations. +explain (costs off) execute prunelock_mt_q(4, 5); + QUERY PLAN +------------------------------------------------------------ + Update on prunelock_p + CTE upd1 + -> Update on prunelock_p prunelock_p_2 + Update on prunelock_p1 prunelock_p_3 + Update on prunelock_p2 prunelock_p_4 + Update on prunelock_p3 prunelock_p_5 + -> Append + -> Seq Scan on prunelock_p1 prunelock_p_3 + -> Seq Scan on prunelock_p2 prunelock_p_4 + -> Seq Scan on prunelock_p3 prunelock_p_5 + CTE upd2 + -> Update on prunelock_p prunelock_p_6 + -> Append + Subplans Removed: 3 + -> Append + Subplans Removed: 3 +(16 rows) + +deallocate prunelock_mt_q; +drop table prunelock_p; +-- +-- Verify that pruning-aware locking falls back to locking all +-- partitions for multi-statement CachedPlans. Rule rewriting can +-- expand a single statement into multiple PlannedStmts, and later +-- statements must not have their pruning evaluated before earlier +-- ones have executed, since CCI between statements can change what +-- pruning expressions see. +-- +create table prune_config (val int); +insert into prune_config values (1); +create table multistmt_pt (a int, b int) partition by list (a); +create table multistmt_pt_1 partition of multistmt_pt for values in (1); +create table multistmt_pt_2 partition of multistmt_pt for values in (2); +insert into multistmt_pt values (1, 0), (2, 0); +create function get_prune_val() returns int as $$ + select val from prune_config; +$$ language sql stable; +create rule config_upd_rule as on update to multistmt_pt + do also update prune_config set val = 2; +set plan_cache_mode to force_generic_plan; +prepare multi_q as update multistmt_pt set b = b + 1 where a = get_prune_val(); +-- first execute creates the generic plan +execute multi_q; +-- reset for the real test +update prune_config set val = 1; +update multistmt_pt set b = 0; +-- second execute reuses the plan; pruning-aware locking kicks in +execute multi_q; +select * from multistmt_pt order by a; + a | b +---+--- + 1 | 0 + 2 | 1 +(2 rows) + +deallocate multi_q; +drop rule config_upd_rule on multistmt_pt; +drop function get_prune_val; +drop table multistmt_pt, prune_config; +reset plan_cache_mode; diff --git a/src/test/regress/expected/plancache.out b/src/test/regress/expected/plancache.out index 4e59188196c..3043dbfac2d 100644 --- a/src/test/regress/expected/plancache.out +++ b/src/test/regress/expected/plancache.out @@ -398,3 +398,66 @@ select name, generic_plans, custom_plans from pg_prepared_statements (1 row) drop table test_mode; +-- This exercises the CachedPlanPrepCleanup() path, which must free +-- the EState created by ExecutorPrep() when the plan is invalidated +-- before execution begins. The pruning expression uses a stable SQL +-- function that calls a volatile plpgsql function. That function +-- performs DDL on a partition when a separate "signal" table says to +-- do so. The second EXECUTE should replan cleanly after the DDL. +set plan_cache_mode to force_generic_plan; +create table inval_during_pruning_p (a int) partition by list (a); +create table inval_during_pruning_p1 partition of inval_during_pruning_p for values in (1); +create table inval_during_pruning_p2 partition of inval_during_pruning_p for values in (2); +insert into inval_during_pruning_p values (1), (2); +create table inval_during_pruning_signal (create_idx bool not null); +insert into inval_during_pruning_signal values (false); +create or replace function invalidate_plancache_func() returns int +as $$ +declare + create_index bool; +begin + -- Perform DDL on a partition if asked to + select create_idx into create_index from inval_during_pruning_signal for update; + if create_index = true then + raise notice 'creating index on partition inval_during_pruning_p1'; + create index on inval_during_pruning_p1 (a); + update inval_during_pruning_signal set create_idx = false; + end if; + -- value that pruning will match against partition bounds + return 1; +end; +$$ language plpgsql volatile; +create or replace function stable_pruning_val() returns int as $$ + select invalidate_plancache_func(); +$$ language sql stable; +prepare inval_during_pruning_q as select * from inval_during_pruning_p where a = stable_pruning_val(); +-- Build a generic plan and run pruning once, but don't set the signal +-- for invalidate_plancache_func() to perform the DDL. +explain (verbose, costs off) execute inval_during_pruning_q; + QUERY PLAN +--------------------------------------------------------------------------- + Append + Subplans Removed: 1 + -> Seq Scan on public.inval_during_pruning_p1 inval_during_pruning_p_1 + Output: inval_during_pruning_p_1.a + Filter: (inval_during_pruning_p_1.a = stable_pruning_val()) +(5 rows) + +-- Reuse the generic plan. Make invalidate_plancache_func() perform DDL +-- during this execution, which should force replanning without errors. +update inval_during_pruning_signal set create_idx = true; +explain (verbose, costs off) execute inval_during_pruning_q; +NOTICE: creating index on partition inval_during_pruning_p1 + QUERY PLAN +--------------------------------------------------------------------------- + Append + Subplans Removed: 1 + -> Seq Scan on public.inval_during_pruning_p1 inval_during_pruning_p_1 + Output: inval_during_pruning_p_1.a + Filter: (inval_during_pruning_p_1.a = stable_pruning_val()) +(5 rows) + +deallocate inval_during_pruning_q; +drop table inval_during_pruning_p, inval_during_pruning_signal; +drop function invalidate_plancache_func, stable_pruning_val; +reset plan_cache_mode; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index d93c0c03bab..692415a8d9f 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -1447,3 +1447,119 @@ select min(a) over (partition by a order by a) from part_abc where a >= stable_o drop view part_abc_view; drop table part_abc; + +-- +-- Verify that pruning-aware locking skips pruned partitions +-- when reusing a generic cached plan. +-- +set plan_cache_mode to force_generic_plan; + +create table prunelock_p (a int) partition by list (a); +create table prunelock_p1 partition of prunelock_p for values in (1); +create table prunelock_p2 partition of prunelock_p for values in (2); +create table prunelock_p3 partition of prunelock_p for values in (3); + +prepare prunelock_q (int) as select * from prunelock_p where a = $1; + +-- Force generic plan creation +explain (costs off) execute prunelock_q(1); + +-- Execute and check which child partitions are locked +begin; +execute prunelock_q(1); + +select c.relname + from pg_locks l + join pg_class c on c.oid = l.relation + where l.pid = pg_backend_pid() + and c.relname like 'prunelock_p_' + order by c.relname; +commit; + +deallocate prunelock_q; + +-- Turn pruning off +set enable_partition_pruning to off; + +prepare prunelock_q (int) as select * from prunelock_p where a = $1; + +-- Force generic plan creation +explain (costs off) execute prunelock_q(1); + +-- Execute and check which child partitions are locked +begin; +execute prunelock_q(1); + +select c.relname + from pg_locks l + join pg_class c on c.oid = l.relation + where l.pid = pg_backend_pid() + and c.relname like 'prunelock_p_' + order by c.relname; +commit; + +deallocate prunelock_q; +reset enable_partition_pruning; + +-- +-- Verify firstResultRels handling with multiple ModifyTable nodes +-- (writable CTEs) targeting a partitioned table. When a pruning +-- parameter matches no partition, all result relations are pruned +-- and the executor must still find a usable first result relation +-- for each ModifyTable node. +-- +prepare prunelock_mt_q (int, int) as + with upd1 as (update prunelock_p set a = a), + upd2 as (update prunelock_p set a = a where a = $2) + update prunelock_p set a = a where a = $1; + +-- Force generic plan creation +explain (costs off) execute prunelock_mt_q(1, 2); + +-- All partitions pruned: value 4 matches no partition, so each +-- ModifyTable must still initialize correctly with no matching +-- result relations. +explain (costs off) execute prunelock_mt_q(4, 5); + +deallocate prunelock_mt_q; +drop table prunelock_p; + +-- +-- Verify that pruning-aware locking falls back to locking all +-- partitions for multi-statement CachedPlans. Rule rewriting can +-- expand a single statement into multiple PlannedStmts, and later +-- statements must not have their pruning evaluated before earlier +-- ones have executed, since CCI between statements can change what +-- pruning expressions see. +-- +create table prune_config (val int); +insert into prune_config values (1); + +create table multistmt_pt (a int, b int) partition by list (a); +create table multistmt_pt_1 partition of multistmt_pt for values in (1); +create table multistmt_pt_2 partition of multistmt_pt for values in (2); +insert into multistmt_pt values (1, 0), (2, 0); + +create function get_prune_val() returns int as $$ + select val from prune_config; +$$ language sql stable; + +create rule config_upd_rule as on update to multistmt_pt + do also update prune_config set val = 2; + +set plan_cache_mode to force_generic_plan; +prepare multi_q as update multistmt_pt set b = b + 1 where a = get_prune_val(); +-- first execute creates the generic plan +execute multi_q; +-- reset for the real test +update prune_config set val = 1; +update multistmt_pt set b = 0; +-- second execute reuses the plan; pruning-aware locking kicks in +execute multi_q; +select * from multistmt_pt order by a; + +deallocate multi_q; +drop rule config_upd_rule on multistmt_pt; +drop function get_prune_val; +drop table multistmt_pt, prune_config; +reset plan_cache_mode; diff --git a/src/test/regress/sql/plancache.sql b/src/test/regress/sql/plancache.sql index 4b2f11dcc64..6a8b8787de6 100644 --- a/src/test/regress/sql/plancache.sql +++ b/src/test/regress/sql/plancache.sql @@ -223,3 +223,55 @@ select name, generic_plans, custom_plans from pg_prepared_statements where name = 'test_mode_pp'; drop table test_mode; + +-- This exercises the CachedPlanPrepCleanup() path, which must free +-- the EState created by ExecutorPrep() when the plan is invalidated +-- before execution begins. The pruning expression uses a stable SQL +-- function that calls a volatile plpgsql function. That function +-- performs DDL on a partition when a separate "signal" table says to +-- do so. The second EXECUTE should replan cleanly after the DDL. +set plan_cache_mode to force_generic_plan; +create table inval_during_pruning_p (a int) partition by list (a); +create table inval_during_pruning_p1 partition of inval_during_pruning_p for values in (1); +create table inval_during_pruning_p2 partition of inval_during_pruning_p for values in (2); +insert into inval_during_pruning_p values (1), (2); + +create table inval_during_pruning_signal (create_idx bool not null); +insert into inval_during_pruning_signal values (false); +create or replace function invalidate_plancache_func() returns int +as $$ +declare + create_index bool; +begin + -- Perform DDL on a partition if asked to + select create_idx into create_index from inval_during_pruning_signal for update; + if create_index = true then + raise notice 'creating index on partition inval_during_pruning_p1'; + create index on inval_during_pruning_p1 (a); + update inval_during_pruning_signal set create_idx = false; + end if; + -- value that pruning will match against partition bounds + return 1; +end; +$$ language plpgsql volatile; + +create or replace function stable_pruning_val() returns int as $$ + select invalidate_plancache_func(); +$$ language sql stable; + +prepare inval_during_pruning_q as select * from inval_during_pruning_p where a = stable_pruning_val(); + +-- Build a generic plan and run pruning once, but don't set the signal +-- for invalidate_plancache_func() to perform the DDL. +explain (verbose, costs off) execute inval_during_pruning_q; + +-- Reuse the generic plan. Make invalidate_plancache_func() perform DDL +-- during this execution, which should force replanning without errors. +update inval_during_pruning_signal set create_idx = true; +explain (verbose, costs off) execute inval_during_pruning_q; + +deallocate inval_during_pruning_q; +drop table inval_during_pruning_p, inval_during_pruning_signal; +drop function invalidate_plancache_func, stable_pruning_val; + +reset plan_cache_mode; -- 2.47.3