Re: BUG #17800: ON CONFLICT DO UPDATE fails to detect incompatible fields that leads to a server crash - Mailing list pgsql-bugs

From Tom Lane
Subject Re: BUG #17800: ON CONFLICT DO UPDATE fails to detect incompatible fields that leads to a server crash
Date
Msg-id 586533.1677178414@sss.pgh.pa.us
Whole thread Raw
In response to Re: BUG #17800: ON CONFLICT DO UPDATE fails to detect incompatible fields that leads to a server crash  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: BUG #17800: ON CONFLICT DO UPDATE fails to detect incompatible fields that leads to a server crash
List pgsql-bugs
I wrote:
> Hmm ... this doesn't look very much like what I was imagining.  Let
> me draft a prototype and we can compare.

Here's what I was thinking about.  I didn't bother adding regression
test cases yet, but it fixes both of the symptoms Alexander found.

This looks pretty workable to me, and in particular I think it'd be
safe to backpatch (with some fields moved to the end of their structs
to satisfy ABI worries).  We could then revert 3f7323cbb et al
in the back branches.

            regards, tom lane

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 812ead95bc..23f2dbfef3 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -133,6 +133,7 @@ ExecInitExpr(Expr *node, PlanState *parent)
     /* Initialize ExprState with empty step list */
     state = makeNode(ExprState);
     state->expr = node;
+    state->last_param = -1;
     state->parent = parent;
     state->ext_params = NULL;

@@ -170,6 +171,7 @@ ExecInitExprWithParams(Expr *node, ParamListInfo ext_params)
     /* Initialize ExprState with empty step list */
     state = makeNode(ExprState);
     state->expr = node;
+    state->last_param = -1;
     state->parent = NULL;
     state->ext_params = ext_params;

@@ -222,6 +224,7 @@ ExecInitQual(List *qual, PlanState *parent)

     state = makeNode(ExprState);
     state->expr = (Expr *) qual;
+    state->last_param = -1;
     state->parent = parent;
     state->ext_params = NULL;

@@ -367,6 +370,7 @@ ExecBuildProjectionInfo(List *targetList,
     projInfo->pi_state.type = T_ExprState;
     state = &projInfo->pi_state;
     state->expr = (Expr *) targetList;
+    state->last_param = -1;
     state->parent = parent;
     state->ext_params = NULL;

@@ -538,6 +542,7 @@ ExecBuildUpdateProjection(List *targetList,
         state->expr = (Expr *) targetList;
     else
         state->expr = NULL;        /* not used */
+    state->last_param = -1;
     state->parent = parent;
     state->ext_params = NULL;

@@ -874,18 +879,46 @@ ExecCheck(ExprState *state, ExprContext *econtext)
 /*
  * Prepare a compiled expression for execution.  This has to be called for
  * every ExprState before it can be executed.
- *
- * NB: While this currently only calls ExecReadyInterpretedExpr(),
- * this will likely get extended to further expression evaluation methods.
- * Therefore this should be used instead of directly calling
- * ExecReadyInterpretedExpr().
  */
 static void
 ExecReadyExpr(ExprState *state)
 {
+    /*
+     * We need one last fixup in the steps list: if there are any subplan
+     * nodes with private output arrays, their associated PARAM_EXEC params
+     * need to be given pointers to those arrays.
+     */
+    if (state->parent != NULL)
+    {
+        ListCell   *lc;
+
+        foreach(lc, state->parent->subPlan)
+        {
+            SubPlanState *sstate = lfirst_node(SubPlanState, lc);
+            List       *setParam;
+            int            idx;
+
+            if (sstate->private_exec_vals == NULL)
+                continue;        /* nothing to do here */
+
+            setParam = sstate->subplan->setParam;
+            idx = state->last_param;
+            while (idx >= 0)
+            {
+                ExprEvalStep *as = &state->steps[idx];
+
+                if (list_member_int(setParam, as->d.param.paramid))
+                    as->d.param.private_exec_vals = sstate->private_exec_vals;
+                idx = as->d.param.prev_param;
+            }
+        }
+    }
+
+    /* Now see if JIT wants to compile it */
     if (jit_compile_expr(state))
         return;

+    /* Nope, use the default interpreted-expression implementation */
     ExecReadyInterpretedExpr(state);
 }

@@ -993,7 +1026,12 @@ ExecInitExprRec(Expr *node, ExprState *state,
                         scratch.opcode = EEOP_PARAM_EXEC;
                         scratch.d.param.paramid = param->paramid;
                         scratch.d.param.paramtype = param->paramtype;
+                        /* For now, assume it's not a MULTIEXPR output Param */
+                        scratch.d.param.private_exec_vals = NULL;
+                        /* ...but link it into a list so we can fix it later */
+                        scratch.d.param.prev_param = state->last_param;
                         ExprEvalPushStep(state, &scratch);
+                        state->last_param = state->steps_len - 1;
                         break;
                     case PARAM_EXTERN:

@@ -1622,6 +1660,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
                  */
                 elemstate = makeNode(ExprState);
                 elemstate->expr = acoerce->elemexpr;
+                elemstate->last_param = -1;
                 elemstate->parent = state->parent;
                 elemstate->ext_params = state->ext_params;

@@ -3270,6 +3309,7 @@ ExecBuildAggTrans(AggState *aggstate, AggStatePerPhase phase,
     LastAttnumInfo deform = {0, 0, 0};

     state->expr = (Expr *) aggstate;
+    state->last_param = -1;
     state->parent = parent;

     scratch.resvalue = &state->resvalue;
@@ -3739,6 +3779,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,

     state->expr = NULL;
     state->flags = EEO_FLAG_IS_QUAL;
+    state->last_param = -1;
     state->parent = parent;

     scratch.resvalue = &state->resvalue;
@@ -3889,6 +3930,7 @@ ExecBuildParamSetEqual(TupleDesc desc,

     state->expr = NULL;
     state->flags = EEO_FLAG_IS_QUAL;
+    state->last_param = -1;
     state->parent = parent;

     scratch.resvalue = &state->resvalue;
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 19351fe34b..268c7fde70 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -2413,15 +2413,19 @@ ExecEvalFuncExprStrictFusage(ExprState *state, ExprEvalStep *op,
 /*
  * Evaluate a PARAM_EXEC parameter.
  *
- * PARAM_EXEC params (internal executor parameters) are stored in the
- * ecxt_param_exec_vals array, and can be accessed by array index.
+ * Most PARAM_EXEC params (internal executor parameters) are stored in the
+ * ecxt_param_exec_vals array, but ones representing MULTIEXPR subplan outputs
+ * may be in a private array.  In either case the paramid is the array index.
  */
 void
 ExecEvalParamExec(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
 {
+    ParamExecData *param_array = op->d.param.private_exec_vals;
     ParamExecData *prm;

-    prm = &(econtext->ecxt_param_exec_vals[op->d.param.paramid]);
+    if (param_array == NULL)
+        param_array = econtext->ecxt_param_exec_vals;
+    prm = &(param_array[op->d.param.paramid]);
     if (unlikely(prm->execPlan != NULL))
     {
         /* Parameter not evaluated yet, so go do it */
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 64af36a187..79d8f1a33e 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -255,20 +255,30 @@ ExecScanSubPlan(SubPlanState *node,
      * same underlying subplan.  However, that fails to hold for MULTIEXPRs
      * because they can have non-empty args lists, and the "same" args might
      * have mutated into different forms in different parts of a plan tree.
-     * There is currently no problem because MULTIEXPR can appear only in an
-     * UPDATE's top-level target list, so it won't get duplicated anyplace.
-     * Postgres versions before v14 had to make concrete efforts to avoid
-     * sharing output parameters across different clones of a MULTIEXPR, and
-     * the problem could recur someday.
+     * Therefore, MULTIEXPR SubPlans that are not initplans (i.e., have
+     * non-empty args lists) are given private output arrays that are
+     * allocated when the containing targetlist expression is compiled.  This
+     * solution works because the referencing Params must be part of the same
+     * targetlist expression, so we can fix them up to find the private output
+     * array during expression compilation.
      */
     if (subLinkType == MULTIEXPR_SUBLINK)
     {
-        EState       *estate = node->parent->state;
+        ParamExecData *param_array;
+
+        /*
+         * If subplan has a private output array, use that; otherwise use
+         * es_param_exec_vals.
+         */
+        if (node->private_exec_vals)
+            param_array = node->private_exec_vals;
+        else
+            param_array = node->parent->state->es_param_exec_vals;

         foreach(l, subplan->setParam)
         {
             int            paramid = lfirst_int(l);
-            ParamExecData *prm = &(estate->es_param_exec_vals[paramid]);
+            ParamExecData *prm = &(param_array[paramid]);

             prm->execPlan = node;
         }
@@ -830,6 +840,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
     /*
      * initialize my state
      */
+    sstate->private_exec_vals = NULL;
     sstate->curTuple = NULL;
     sstate->curArray = PointerGetDatum(NULL);
     sstate->projLeft = NULL;
@@ -861,12 +872,36 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
      */
     if (subplan->setParam != NIL && subplan->subLinkType != CTE_SUBLINK)
     {
+        ParamExecData *param_array = estate->es_param_exec_vals;
         ListCell   *lst;

+        /*
+         * If this is a MULTIEXPR subplan (and not an initplan), it needs a
+         * private ParamExecData output array.  We index this array with the
+         * same Param IDs used for the query-global es_param_exec_vals array,
+         * so there will be some wasted space, but not enough to sweat over.
+         */
+        if (subplan->subLinkType == MULTIEXPR_SUBLINK &&
+            subplan->parParam != NIL)
+        {
+            int            max_paramid = 0;
+
+            foreach(lst, subplan->setParam)
+            {
+                int            paramid = lfirst_int(lst);
+
+                max_paramid = Max(max_paramid, paramid);
+            }
+
+            param_array = palloc0_array(ParamExecData, max_paramid + 1);
+            sstate->private_exec_vals = param_array;
+        }
+
+        /* Now set the execPlan links within the correct output array. */
         foreach(lst, subplan->setParam)
         {
             int            paramid = lfirst_int(lst);
-            ParamExecData *prm = &(estate->es_param_exec_vals[paramid]);
+            ParamExecData *prm = &(param_array[paramid]);

             prm->execPlan = sstate;
         }
@@ -1057,8 +1092,8 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
  * Note that this routine MUST clear the execPlan fields of the plan's
  * output parameters after evaluating them!
  *
- * The results of this function are stored in the EState associated with the
- * ExprContext (particularly, its ecxt_param_exec_vals); any pass-by-ref
+ * The results of this function are usually stored in the es_param_exec_vals
+ * array of the EState associated with the ExprContext; any pass-by-ref
  * result Datums are allocated in the EState's per-query memory.  The passed
  * econtext can be any ExprContext belonging to that EState; which one is
  * important only to the extent that the ExprContext's per-tuple memory
@@ -1067,6 +1102,11 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
  * that data isn't needed after we return.  In practice, because initplan
  * parameters are never more complex than Vars, Aggrefs, etc, evaluating them
  * currently never leaks any memory anyway.)
+ *
+ * However, for MULTIEXPR subplans we instead store the output in a private
+ * output array.  This avoids confusion when several "mutated" versions of
+ * the same MULTIEXPR subplan appear in a query, which is possible in an
+ * UPDATE on a partitioned table or inheritance tree.
  * ----------------------------------------------------------------
  */
 void
@@ -1077,6 +1117,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext)
     SubLinkType subLinkType = subplan->subLinkType;
     EState       *estate = planstate->state;
     ScanDirection dir = estate->es_direction;
+    ParamExecData *output_array;
     MemoryContext oldcontext;
     TupleTableSlot *slot;
     ListCell   *pvar;
@@ -1126,6 +1167,17 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext)
         planstate->chgParam = bms_add_member(planstate->chgParam, paramid);
     }

+    /*
+     * Decide which ParamExecData array receives the output.  Typically it's
+     * the ecxt_param_exec_vals array, but not if the subplan has a private
+     * output array.  (That's currently only possible for MULTIEXPR subplans,
+     * but it's simplest to support it for all cases.)
+     */
+    if (node->private_exec_vals)
+        output_array = node->private_exec_vals;
+    else
+        output_array = econtext->ecxt_param_exec_vals;
+
     /*
      * Run the plan.  (If it needs to be rescanned, the first ExecProcNode
      * call will take care of that.)
@@ -1141,7 +1193,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext)
         {
             /* There can be only one setParam... */
             int            paramid = linitial_int(subplan->setParam);
-            ParamExecData *prm = &(econtext->ecxt_param_exec_vals[paramid]);
+            ParamExecData *prm = &(output_array[paramid]);

             prm->execPlan = NULL;
             prm->value = BoolGetDatum(true);
@@ -1191,7 +1243,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext)
         foreach(l, subplan->setParam)
         {
             int            paramid = lfirst_int(l);
-            ParamExecData *prm = &(econtext->ecxt_param_exec_vals[paramid]);
+            ParamExecData *prm = &(output_array[paramid]);

             prm->execPlan = NULL;
             prm->value = heap_getattr(node->curTuple, i, tdesc,
@@ -1204,7 +1256,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext)
     {
         /* There can be only one setParam... */
         int            paramid = linitial_int(subplan->setParam);
-        ParamExecData *prm = &(econtext->ecxt_param_exec_vals[paramid]);
+        ParamExecData *prm = &(output_array[paramid]);

         /*
          * We build the result array in query context so it won't disappear;
@@ -1226,7 +1278,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext)
         {
             /* There can be only one setParam... */
             int            paramid = linitial_int(subplan->setParam);
-            ParamExecData *prm = &(econtext->ecxt_param_exec_vals[paramid]);
+            ParamExecData *prm = &(output_array[paramid]);

             prm->execPlan = NULL;
             prm->value = BoolGetDatum(false);
@@ -1238,7 +1290,7 @@ ExecSetParamPlan(SubPlanState *node, ExprContext *econtext)
             foreach(l, subplan->setParam)
             {
                 int            paramid = lfirst_int(l);
-                ParamExecData *prm = &(econtext->ecxt_param_exec_vals[paramid]);
+                ParamExecData *prm = &(output_array[paramid]);

                 prm->execPlan = NULL;
                 prm->value = (Datum) 0;
@@ -1302,6 +1354,8 @@ ExecReScanSetParamPlan(SubPlanState *node, PlanState *parent)
         elog(ERROR, "setParam list of initplan is empty");
     if (bms_is_empty(planstate->plan->extParam))
         elog(ERROR, "extParam set of initplan is empty");
+    if (node->private_exec_vals != NULL)
+        elog(ERROR, "initplan should not have private output area");

     /*
      * Don't actually re-scan: it'll happen inside ExecSetParamPlan if needed.
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 06c3adc0a1..c43bdcea2b 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -379,6 +379,11 @@ typedef struct ExprEvalStep
         {
             int            paramid;    /* numeric ID for parameter */
             Oid            paramtype;    /* OID of parameter's datatype */
+            /* These fields are used for EEOP_PARAM_EXEC only: */
+            /* Param's value is to be sought at private_exec_vals[paramid], */
+            /* or ecxt_param_exec_vals[paramid] if private_exec_vals is NULL */
+            ParamExecData *private_exec_vals;
+            int            prev_param; /* previous PARAM_EXEC's index, or -1 */
         }            param;

         /* for EEOP_PARAM_CALLBACK */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 20f4c8b35f..2ae9cc702e 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -119,8 +119,9 @@ typedef struct ExprState

     int            steps_len;        /* number of steps currently */
     int            steps_alloc;    /* allocated length of steps array */
+    int            last_param;        /* index of last EEOP_PARAM_EXEC step, or -1 */

-#define FIELDNO_EXPRSTATE_PARENT 11
+#define FIELDNO_EXPRSTATE_PARENT 12
     struct PlanState *parent;    /* parent PlanState node, if any */
     ParamListInfo ext_params;    /* for compiling PARAM_EXTERN nodes */

@@ -948,6 +949,7 @@ typedef struct SubPlanState
     struct PlanState *parent;    /* parent plan node's state tree */
     ExprState  *testexpr;        /* state of combining expression */
     List       *args;            /* states of argument expression(s) */
+    ParamExecData *private_exec_vals;    /* if not NULL, output values go here */
     HeapTuple    curTuple;        /* copy of most recent tuple from subplan */
     Datum        curArray;        /* most recent array from ARRAY() subplan */
     /* these are used when hashing the subselect's output: */
diff --git a/src/include/nodes/params.h b/src/include/nodes/params.h
index ad23113a61..90ddbaffed 100644
--- a/src/include/nodes/params.h
+++ b/src/include/nodes/params.h
@@ -132,14 +132,16 @@ typedef struct ParamListInfoData
  *      ParamExecData entries are used for executor internal parameters
  *      (that is, values being passed into or out of a sub-query).  The
  *      paramid of a PARAM_EXEC Param is a (zero-based) index into an
- *      array of ParamExecData records, which is referenced through
- *      es_param_exec_vals or ecxt_param_exec_vals.
+ *      array of ParamExecData records, which is typically found via
+ *      es_param_exec_vals or ecxt_param_exec_vals.  But Params that
+ *      reference the output of a MULTIEXPR subplan will reference a
+ *      private ParamExecData array for that specific subplan.
  *
  *      If execPlan is not NULL, it points to a SubPlanState node that needs
  *      to be executed to produce the value.  (This is done so that we can have
  *      lazy evaluation of InitPlans: they aren't executed until/unless a
- *      result value is needed.)    Otherwise the value is assumed to be valid
- *      when needed.
+ *      result value is needed.  MULTIEXPR subplans also use this mechanism.)
+ *      Otherwise the value is assumed to be valid when needed.
  * ----------------
  */


pgsql-bugs by date:

Previous
From: Dean Rasheed
Date:
Subject: Re: BUG #17803: Rule "ALSO INSERT ... SELECT ..." fails to substitute default values
Next
From: Tom Lane
Date:
Subject: Re: Clause accidentally pushed down ( Possible bug in Making Vars outer-join aware)