From dacc87da678bd159624be097cc00070e885fd4f3 Mon Sep 17 00:00:00 2001 From: Amit Langote Date: Thu, 13 Mar 2025 21:07:01 +0900 Subject: [PATCH v3] Ensure first result relation is included even if pruned This started as an investigation into crashes and misbehavior in MERGE statements when all target partitions were pruned. These issues became possible after commit cbc127917e, which introduced tracking of unpruned relids to avoid processing pruned relations, and changed ExecInitModifyTable() to process only unpruned result relations. Some executor code paths rely on ModifyTableState.resultRelInfo[0] being present and initialized, even when no result relations remain after pruning. For example, ExecMerge() and ExecMergeNotMatched() use the first resultRelInfo to determine the appropriate action. Similarly, ExecInitPartitionInfo() assumes that at least one result relation exists. To preserve these assumptions, ExecDoInitialPruning() now ensures that the first result relation of each ModifyTable node in the plan is locked, even if it was pruned. ExecInitModifyTable() then includes it in the initialized result relation list. To support this, PlannedStmt now includes a list of RT indexes identifying the first result relation of each ModifyTable node in the plan. This avoids crashes and undefined behavior while still allowing pruning of other result relations to proceed as before. Bug: #18830 Reported-by: Robins Tharakan Diagnozed-by: Tender Wang Diagnozed-by: Dean Rasheed Suggested-by: Dean Rasheed Reviewed-by: Dean Rasheed Reviewed-by: Tender Wang Discussion: https://postgr.es/m/18830-1f31ea1dc930d444%40postgresql.org --- src/backend/commands/explain.c | 11 ++- src/backend/executor/execMain.c | 2 +- src/backend/executor/execPartition.c | 25 +++++- src/backend/executor/execUtils.c | 18 ++-- src/backend/executor/nodeModifyTable.c | 41 +++++++++- src/backend/optimizer/plan/planner.c | 1 + src/backend/optimizer/plan/setrefs.c | 3 + src/include/executor/execPartition.h | 4 +- src/include/executor/executor.h | 2 +- src/include/nodes/pathnodes.h | 3 + src/include/nodes/plannodes.h | 7 ++ src/test/regress/expected/partition_prune.out | 82 +++++++++++++++++++ src/test/regress/sql/partition_prune.sql | 32 ++++++++ 13 files changed, 214 insertions(+), 17 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 19ffcc2cacb..f1da1325b2f 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -4695,10 +4695,17 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, break; } - /* Should we explicitly label target relations? */ + /* + * Should we explicitly label target relations? + * + * Do not show it in the nrels == 1 case if it is a pruned relation added + * by ExecInitModifyTable() in certain cases. + */ labeltargets = (mtstate->mt_nrels > 1 || (mtstate->mt_nrels == 1 && - mtstate->resultRelInfo[0].ri_RangeTableIndex != node->nominalRelation)); + mtstate->resultRelInfo[0].ri_RangeTableIndex != node->nominalRelation && + bms_is_member(mtstate->resultRelInfo[0].ri_RangeTableIndex, + mtstate->ps.state->es_unpruned_relids))); if (labeltargets) ExplainOpenGroup("Target Tables", "Target Tables", false, es); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 0493b7d5365..e9bd98c7738 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1006,7 +1006,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) case ROW_MARK_SHARE: case ROW_MARK_KEYSHARE: case ROW_MARK_REFERENCE: - relation = ExecGetRangeTableRelation(estate, rc->rti); + relation = ExecGetRangeTableRelation(estate, rc->rti, false); break; case ROW_MARK_COPY: /* no physical table access is required */ diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 5cd5e2eeb80..d0557299032 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -1819,6 +1819,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) void ExecDoInitialPruning(EState *estate) { + PlannedStmt *stmt = estate->es_plannedstmt; ListCell *lc; List *locked_relids = NIL; @@ -1867,6 +1868,28 @@ ExecDoInitialPruning(EState *estate) validsubplans); } + /* + * Lock the first result relation even if it was pruned. Some executor + * paths (e.g., in nodeModifyTable.c and execPartition.c) expect this + * relation to be locked regardless of pruning. + */ + if (stmt->resultRelations && ExecShouldLockRelations(estate)) + { + foreach(lc, stmt->firstResultRels) + { + Index firstResultRel = lfirst_int(lc); + + if (!bms_is_member(firstResultRel, estate->es_unpruned_relids)) + { + RangeTblEntry *rte = exec_rt_fetch(firstResultRel, estate); + + Assert(rte->rtekind == RTE_RELATION && rte->rellockmode != NoLock); + LockRelationOid(rte->relid, rte->rellockmode); + locked_relids = lappend_int(locked_relids, firstResultRel); + } + } + } + /* * Release the useless locks if the plan won't be executed. This is the * same as what CheckCachedPlan() in plancache.c does. @@ -2076,7 +2099,7 @@ CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo, * because that entry will be held open and locked for the * duration of this executor run. */ - partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); + partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false); /* Remember for InitExecPartitionPruneContext(). */ pprune->partrel = partrel; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 39d6f4d819e..55ab18fb826 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -746,7 +746,7 @@ ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags) Relation rel; /* Open the relation. */ - rel = ExecGetRangeTableRelation(estate, scanrelid); + rel = ExecGetRangeTableRelation(estate, scanrelid, false); /* * Complain if we're attempting a scan of an unscannable relation, except @@ -815,18 +815,22 @@ ExecInitRangeTable(EState *estate, List *rangeTable, List *permInfos, * * The Relations will be closed in ExecEndPlan(). * - * Note: The caller must ensure that 'rti' refers to an unpruned relation - * (i.e., it is a member of estate->es_unpruned_relids) before calling this - * function. Attempting to open a pruned relation will result in an error. + * If isResultRel is true, the relation is being used as a result relation. + * Such a relation might have been pruned, which is OK for result relations, + * but not for scan relations; see the details in ExecInitModifyTable(). If + * isResultRel is false, the caller must ensure that 'rti' refers to an + * unpruned relation (i.e., it is a member of estate->es_unpruned_relids) + * before calling this function. Attempting to open a pruned relation for + * scanning will result in an error. */ Relation -ExecGetRangeTableRelation(EState *estate, Index rti) +ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel) { Relation rel; Assert(rti > 0 && rti <= estate->es_range_table_size); - if (!bms_is_member(rti, estate->es_unpruned_relids)) + if (!isResultRel && !bms_is_member(rti, estate->es_unpruned_relids)) elog(ERROR, "trying to open a pruned relation"); rel = estate->es_relations[rti - 1]; @@ -880,7 +884,7 @@ ExecInitResultRelation(EState *estate, ResultRelInfo *resultRelInfo, { Relation resultRelationDesc; - resultRelationDesc = ExecGetRangeTableRelation(estate, rti); + resultRelationDesc = ExecGetRangeTableRelation(estate, rti, true); InitResultRelInfo(resultRelInfo, resultRelationDesc, rti, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index b0fe50075ad..97a1993f7c1 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -4471,6 +4471,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ModifyTableState *mtstate; Plan *subplan = outerPlan(node); CmdType operation = node->operation; + int total_nrels = list_length(node->resultRelations); int nrels; List *resultRelations = NIL; List *withCheckOptionLists = NIL; @@ -4536,8 +4537,42 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } i++; } + nrels = list_length(resultRelations); + /* + * We must avoid pruning every result relation. This is important for + * MERGE, since even if every result relation is pruned from the subplan, + * there might still be NOT MATCHED rows, for which there may be INSERT + * actions to perform. To allow these actions to be found, at least one + * result relation must be kept. In addition, when inserting into a + * partitioned table, ExecInitPartitionInfo() needs a ResultRelInfo struct + * as a reference for building the ResultRelInfo of the target partition. + * In either case, it doesn't matter which result relation is kept, so we + * just keep the first one, if all others have been pruned. + */ + if (nrels == 0) + { + nrels = 1; + resultRelations = lappend_int(resultRelations, + linitial_int(node->resultRelations)); + if (node->withCheckOptionLists) + withCheckOptionLists = lappend(withCheckOptionLists, + linitial(node->withCheckOptionLists)); + if (node->returningLists) + returningLists = lappend(returningLists, + linitial(node->returningLists)); + if (node->updateColnosLists) + updateColnosLists = lappend(updateColnosLists, + linitial(node->updateColnosLists)); + if (node->mergeActionLists) + mergeActionLists = lappend(mergeActionLists, + linitial(node->mergeActionLists)); + if (node->mergeJoinConditions) + mergeJoinConditions = lappend(mergeJoinConditions, + linitial(node->mergeJoinConditions)); + } + /* * create state structure */ @@ -4735,7 +4770,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ mtstate->mt_resultOidAttno = ExecFindJunkAttributeInTlist(subplan->targetlist, "tableoid"); - Assert(AttributeNumberIsValid(mtstate->mt_resultOidAttno) || nrels == 1); + Assert(AttributeNumberIsValid(mtstate->mt_resultOidAttno) || total_nrels == 1); mtstate->mt_lastResultOid = InvalidOid; /* force lookup at first tuple */ mtstate->mt_lastResultIndex = 0; /* must be zero if no such attr */ @@ -4832,7 +4867,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (node->onConflictAction != ONCONFLICT_NONE) { /* insert may only have one relation, inheritance is not expanded */ - Assert(nrels == 1); + Assert(total_nrels == 1); resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes; } @@ -4979,7 +5014,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (operation == CMD_INSERT) { /* insert may only have one relation, inheritance is not expanded */ - Assert(nrels == 1); + Assert(total_nrels == 1); resultRelInfo = mtstate->resultRelInfo; if (!resultRelInfo->ri_usesFdwDirectModify && resultRelInfo->ri_FdwRoutine != NULL && diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a4d523dcb0f..141177e7413 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -562,6 +562,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, glob->prunableRelids); result->permInfos = glob->finalrteperminfos; result->resultRelations = glob->resultRelations; + result->firstResultRels = glob->firstResultRels; result->appendRelations = glob->appendRelations; result->subplans = glob->subplans; result->rewindPlanIDs = glob->rewindPlanIDs; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 999a5a8ab5a..150e9f060ee 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1248,6 +1248,9 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) lappend_int(root->glob->resultRelations, splan->rootRelation); } + root->glob->firstResultRels = + lappend_int(root->glob->firstResultRels, + linitial_int(splan->resultRelations)); } break; case T_Append: diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 626613012f9..3b3f46aced0 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -43,8 +43,8 @@ extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, * subpart_map contains indexes into PartitionPruningData.partrelprunedata[]. * * partrel Partitioned table Relation; obtained by - * ExecGetRangeTableRelation(estate, rti), where - * rti is PartitionedRelPruneInfo.rtindex. + * ExecGetRangeTableRelation(estate, rti, false), + * where rti is PartitionedRelPruneInfo.rtindex. * nparts Length of subplan_map[] and subpart_map[]. * subplan_map Subplan index by partition index, or -1. * subpart_map Subpart index by partition index, or -1. diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 0d2ffabda68..3255bccb038 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -680,7 +680,7 @@ exec_rt_fetch(Index rti, EState *estate) return (RangeTblEntry *) list_nth(estate->es_range_table, rti - 1); } -extern Relation ExecGetRangeTableRelation(EState *estate, Index rti); +extern Relation ExecGetRangeTableRelation(EState *estate, Index rti, bool result_rel); extern void ExecInitResultRelation(EState *estate, ResultRelInfo *resultRelInfo, Index rti); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index fbf05322c75..f8e5671726e 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -138,6 +138,9 @@ typedef struct PlannerGlobal /* "flat" list of integer RT indexes */ List *resultRelations; + /* "flat" list of integer RT indexes */ + List *firstResultRels; + /* "flat" list of AppendRelInfos */ List *appendRelations; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 22841211f48..f78bffd90cf 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -102,6 +102,13 @@ typedef struct PlannedStmt /* integer list of RT indexes, or NIL */ List *resultRelations; + /* + * rtable indexes of first target relation in each ModifyTable node in the + * plan for INSERT/UPDATE/DELETE/MERGE + */ + /* integer list of RT indexes, or NIL */ + List *firstResultRels; + /* list of AppendRelInfo nodes */ List *appendRelations; diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 8097f4e9282..0bf35260b46 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -4662,6 +4662,88 @@ table part_abc_view; 2 | c | t (1 row) +-- MERGE ... INSERT when all pruned from MERGE source. +begin; +explain (costs off) +merge into part_abc_view pt +using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 +when not matched then insert values (1, 'd', false) returning pt.a; + QUERY PLAN +------------------------------------------------ + Merge on part_abc + -> Nested Loop Left Join + -> Seq Scan on part_abc_2 pt2 + Filter: ((stable_one() + 1) = a) + -> Materialize + -> Append + Subplans Removed: 2 +(7 rows) + +merge into part_abc_view pt +using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 +when not matched then insert values (1, 'd', false) returning pt.a; + a +--- + 1 +(1 row) + +table part_abc_view; + a | b | c +---+---+--- + 1 | d | f + 2 | c | t +(2 rows) + +rollback; +-- A case with multiple ModifyTable nodes. +begin; +create table part_abc_log (action text, a int, b text, c bool); +explain (costs off) +with t as ( + merge into part_abc_view pt + using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 + when not matched then insert values (1, 'd', false) returning merge_action(), pt.* +) +insert into part_abc_log select * from t returning *; + QUERY PLAN +-------------------------------------------------------- + Insert on part_abc_log + CTE t + -> Merge on part_abc + -> Nested Loop Left Join + -> Seq Scan on part_abc_2 pt2 + Filter: ((stable_one() + 1) = a) + -> Materialize + -> Append + Subplans Removed: 2 + -> CTE Scan on t +(10 rows) + +with t as ( + merge into part_abc_view pt + using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 + when not matched then insert values (1, 'd', false) returning merge_action(), pt.* +) +insert into part_abc_log select * from t returning *; + action | a | b | c +--------+---+---+--- + INSERT | 1 | d | f +(1 row) + +table part_abc_view; + a | b | c +---+---+--- + 1 | d | f + 2 | c | t +(2 rows) + +table part_abc_log; + action | a | b | c +--------+---+---+--- + INSERT | 1 | d | f +(1 row) + +rollback; -- A case with nested MergeAppend with its own PartitionPruneInfo. create index on part_abc (a); alter table part_abc add d int; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 4a2c74b0899..f6db9479f54 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -1401,6 +1401,38 @@ using (select stable_one() + 2 as pid) as q join part_abc_1 pt1 on (q.pid = pt1. when matched then delete returning pt.a; table part_abc_view; +-- MERGE ... INSERT when all pruned from MERGE source. +begin; +explain (costs off) +merge into part_abc_view pt +using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 +when not matched then insert values (1, 'd', false) returning pt.a; +merge into part_abc_view pt +using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 +when not matched then insert values (1, 'd', false) returning pt.a; +table part_abc_view; +rollback; + +-- A case with multiple ModifyTable nodes. +begin; +create table part_abc_log (action text, a int, b text, c bool); +explain (costs off) +with t as ( + merge into part_abc_view pt + using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 + when not matched then insert values (1, 'd', false) returning merge_action(), pt.* +) +insert into part_abc_log select * from t returning *; +with t as ( + merge into part_abc_view pt + using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 + when not matched then insert values (1, 'd', false) returning merge_action(), pt.* +) +insert into part_abc_log select * from t returning *; +table part_abc_view; +table part_abc_log; +rollback; + -- A case with nested MergeAppend with its own PartitionPruneInfo. create index on part_abc (a); alter table part_abc add d int; -- 2.43.0