Re: BUG #18497: Heap-use-after-free in plpgsql - Mailing list pgsql-bugs

From Tom Lane
Subject Re: BUG #18497: Heap-use-after-free in plpgsql
Date
Msg-id 1694260.1718234686@sss.pgh.pa.us
Whole thread Raw
In response to BUG #18497: Heap-use-after-free in plpgsql  (PG Bug reporting form <noreply@postgresql.org>)
List pgsql-bugs
PG Bug reporting form <noreply@postgresql.org> writes:
> When building postgresql on REL_16_STABLE tag with ASAN assertion error:

Thanks for the report!

> Two sql files are required:

This can actually be reproduced more simply: you don't need two
sessions, because it's purely straight-line behavior.  You don't
need ASAN either, as a plain old --enable-cassert (or more
specifically -DCLOBBER_FREED_MEMORY) build will show it too --
or at least it does on HEAD:

regression=# select f1();
ERROR:  unexpected plan node type: 328
CONTEXT:  PL/pgSQL function f1() line 4 at assignment
regression=# select f1();
ERROR:  unrecognized node type: 2139062143
CONTEXT:  PL/pgSQL function f1() line 4 at assignment

It appears to me that there are two independent bugs here.

One is that exec_eval_simple_expr() assumes, when revalidating
a simple-expression plan, that it doesn't have to recheck the
"purely syntactic" tests made in exec_simple_check_plan().
I think that's mostly correct, but it's wrong at least with
respect to checking query->hasTargetSRFs: this test case shows
how to break that.  You could probably cause query->hasAggs to
change in the same way.  On the whole it seems safest to
refactor so that we recheck all of those things.

Right now, since we aren't rechecking that, we end up trying to
apply exec_save_simple_expr to a plan tree containing a ProjectSet
node, which it isn't expecting, leading to the "unexpected plan
node type" error.  And then we have the second bug: longjmp'ing
out of this leaves behind some dangling pointers, which is what
causes the "unrecognized node type: 2139062143" error (or the ASAN
complaint) during the second call.  So we need to be more careful
about leaving a clean state in case we fail out of here.

The attached seems to be enough to fix this.  I verified the
second aspect by leaving the new exec_is_simple_query(expr) test
out of exec_eval_simple_expr, in which case you still get the
"unexpected plan node type" error but things are OK on retries.

            regards, tom lane

diff --git a/src/pl/plpgsql/src/expected/plpgsql_simple.out b/src/pl/plpgsql/src/expected/plpgsql_simple.out
index e1f5d670fb..7b22e60f19 100644
--- a/src/pl/plpgsql/src/expected/plpgsql_simple.out
+++ b/src/pl/plpgsql/src/expected/plpgsql_simple.out
@@ -89,3 +89,32 @@ select simplecaller();
             4
 (1 row)

+-- Check case where called function changes from non-SRF to SRF (bug #18497)
+create or replace function simplecaller() returns int language plpgsql
+as $$
+declare x int;
+begin
+  x := simplesql();
+  return x;
+end$$;
+select simplecaller();
+ simplecaller
+--------------
+            4
+(1 row)
+
+drop function simplesql();
+create function simplesql() returns setof int language sql
+as $$select 22 + 22$$;
+select simplecaller();
+ simplecaller
+--------------
+           44
+(1 row)
+
+select simplecaller();
+ simplecaller
+--------------
+           44
+(1 row)
+
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 6947575b94..d1958812b7 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -339,6 +339,7 @@ static void exec_eval_cleanup(PLpgSQL_execstate *estate);
 static void exec_prepare_plan(PLpgSQL_execstate *estate,
                               PLpgSQL_expr *expr, int cursorOptions);
 static void exec_simple_check_plan(PLpgSQL_execstate *estate, PLpgSQL_expr *expr);
+static bool exec_is_simple_query(PLpgSQL_expr *expr);
 static void exec_save_simple_expr(PLpgSQL_expr *expr, CachedPlan *cplan);
 static void exec_check_rw_parameter(PLpgSQL_expr *expr);
 static void exec_check_assignable(PLpgSQL_execstate *estate, int dno);
@@ -6088,18 +6089,26 @@ exec_eval_simple_expr(PLpgSQL_execstate *estate,
     {
         /* Need to replan */
         CachedPlan *cplan;
+        List       *plansources;
+        CachedPlanSource *plansource;

         /*
          * If we have a valid refcount on some previous version of the plan,
          * release it, so we don't leak plans intra-transaction.
          */
         if (expr->expr_simple_plan_lxid == curlxid)
-        {
             ReleaseCachedPlan(expr->expr_simple_plan,
                               estate->simple_eval_resowner);
-            expr->expr_simple_plan = NULL;
-            expr->expr_simple_plan_lxid = InvalidLocalTransactionId;
-        }
+
+        /*
+         * Reset to "not simple" to leave sane state (with no dangling
+         * pointers) in case we fail while replanning.
+         */
+        expr->expr_simple_expr = NULL;
+        expr->expr_rw_param = NULL;
+        expr->expr_simple_plansource = NULL;
+        expr->expr_simple_plan = NULL;
+        expr->expr_simple_plan_lxid = InvalidLocalTransactionId;

         /* Do the replanning work in the eval_mcontext */
         oldcontext = MemoryContextSwitchTo(get_eval_mcontext(estate));
@@ -6114,16 +6123,24 @@ exec_eval_simple_expr(PLpgSQL_execstate *estate,
          */
         Assert(cplan != NULL);

+        /* However, maybe the single CachedPlanSource is now different? */
+        plansources = SPI_plan_get_plan_sources(expr->plan);
+        plansource = (CachedPlanSource *) linitial(plansources);
+
         /*
-         * This test probably can't fail either, but if it does, cope by
-         * declaring the plan to be non-simple.  On success, we'll acquire a
-         * refcount on the new plan, stored in simple_eval_resowner.
+         * Recheck exec_is_simple_query, which could now report false in
+         * edge-case scenarios such as a non-SRF having been replaced with a
+         * SRF.  Also recheck CachedPlanAllowsSimpleValidityCheck, just to be
+         * sure.  If either test fails, cope by declaring the plan to be
+         * non-simple.  On success, we'll acquire a refcount on the new plan,
+         * stored in simple_eval_resowner.
          */
-        if (CachedPlanAllowsSimpleValidityCheck(expr->expr_simple_plansource,
-                                                cplan,
+        if (exec_is_simple_query(expr) &&
+            CachedPlanAllowsSimpleValidityCheck(plansource, cplan,
                                                 estate->simple_eval_resowner))
         {
             /* Remember that we have the refcount */
+            expr->expr_simple_plansource = plansource;
             expr->expr_simple_plan = cplan;
             expr->expr_simple_plan_lxid = curlxid;
         }
@@ -6131,9 +6148,6 @@ exec_eval_simple_expr(PLpgSQL_execstate *estate,
         {
             /* Release SPI_plan_get_cached_plan's refcount */
             ReleaseCachedPlan(cplan, CurrentResourceOwner);
-            /* Mark expression as non-simple, and fail */
-            expr->expr_simple_expr = NULL;
-            expr->expr_rw_param = NULL;
             return false;
         }

@@ -7974,7 +7988,6 @@ exec_simple_check_plan(PLpgSQL_execstate *estate, PLpgSQL_expr *expr)
 {
     List       *plansources;
     CachedPlanSource *plansource;
-    Query       *query;
     CachedPlan *cplan;
     MemoryContext oldcontext;

@@ -7990,31 +8003,88 @@ exec_simple_check_plan(PLpgSQL_execstate *estate, PLpgSQL_expr *expr)
      * called immediately after creating the CachedPlanSource, we need not
      * worry about the query being stale.
      */
+    if (!exec_is_simple_query(expr))
+        return;
+
+    /* exec_is_simple_query verified that there's just one CachedPlanSource */
+    plansources = SPI_plan_get_plan_sources(expr->plan);
+    plansource = (CachedPlanSource *) linitial(plansources);
+
+    /*
+     * Get the generic plan for the query.  If replanning is needed, do that
+     * work in the eval_mcontext.  (Note that replanning could throw an error,
+     * in which case the expr is left marked "not simple", which is fine.)
+     */
+    oldcontext = MemoryContextSwitchTo(get_eval_mcontext(estate));
+    cplan = SPI_plan_get_cached_plan(expr->plan);
+    MemoryContextSwitchTo(oldcontext);
+
+    /* Can't fail, because we checked for a single CachedPlanSource above */
+    Assert(cplan != NULL);
+
+    /*
+     * Verify that plancache.c thinks the plan is simple enough to use
+     * CachedPlanIsSimplyValid.  Given the restrictions above, it's unlikely
+     * that this could fail, but if it does, just treat plan as not simple. On
+     * success, save a refcount on the plan in the simple-expression resowner.
+     */
+    if (CachedPlanAllowsSimpleValidityCheck(plansource, cplan,
+                                            estate->simple_eval_resowner))
+    {
+        /* Remember that we have the refcount */
+        expr->expr_simple_plansource = plansource;
+        expr->expr_simple_plan = cplan;
+        expr->expr_simple_plan_lxid = MyProc->vxid.lxid;
+
+        /* Share the remaining work with the replan code path */
+        exec_save_simple_expr(expr, cplan);
+    }
+
+    /*
+     * Release the plan refcount obtained by SPI_plan_get_cached_plan.  (This
+     * refcount is held by the wrong resowner, so we can't just repurpose it.)
+     */
+    ReleaseCachedPlan(cplan, CurrentResourceOwner);
+}
+
+/*
+ * exec_is_simple_query - precheck a query tree to see if it might be simple
+ *
+ * Check the analyzed-and-rewritten form of a query to see if we will be
+ * able to treat it as a simple expression.  It is caller's responsibility
+ * that the CachedPlanSource be up-to-date.
+ */
+static bool
+exec_is_simple_query(PLpgSQL_expr *expr)
+{
+    List       *plansources;
+    CachedPlanSource *plansource;
+    Query       *query;

     /*
-     * We can only test queries that resulted in exactly one CachedPlanSource
+     * We can only test queries that resulted in exactly one CachedPlanSource.
      */
     plansources = SPI_plan_get_plan_sources(expr->plan);
     if (list_length(plansources) != 1)
-        return;
+        return false;
     plansource = (CachedPlanSource *) linitial(plansources);

     /*
      * 1. There must be one single querytree.
      */
     if (list_length(plansource->query_list) != 1)
-        return;
+        return false;
     query = (Query *) linitial(plansource->query_list);

     /*
-     * 2. It must be a plain SELECT query without any input tables
+     * 2. It must be a plain SELECT query without any input tables.
      */
     if (!IsA(query, Query))
-        return;
+        return false;
     if (query->commandType != CMD_SELECT)
-        return;
+        return false;
     if (query->rtable != NIL)
-        return;
+        return false;

     /*
      * 3. Can't have any subplans, aggregates, qual clauses either.  (These
@@ -8038,51 +8108,18 @@ exec_simple_check_plan(PLpgSQL_execstate *estate, PLpgSQL_expr *expr)
         query->limitOffset ||
         query->limitCount ||
         query->setOperations)
-        return;
+        return false;

     /*
-     * 4. The query must have a single attribute as result
+     * 4. The query must have a single attribute as result.
      */
     if (list_length(query->targetList) != 1)
-        return;
+        return false;

     /*
      * OK, we can treat it as a simple plan.
-     *
-     * Get the generic plan for the query.  If replanning is needed, do that
-     * work in the eval_mcontext.  (Note that replanning could throw an error,
-     * in which case the expr is left marked "not simple", which is fine.)
      */
-    oldcontext = MemoryContextSwitchTo(get_eval_mcontext(estate));
-    cplan = SPI_plan_get_cached_plan(expr->plan);
-    MemoryContextSwitchTo(oldcontext);
-
-    /* Can't fail, because we checked for a single CachedPlanSource above */
-    Assert(cplan != NULL);
-
-    /*
-     * Verify that plancache.c thinks the plan is simple enough to use
-     * CachedPlanIsSimplyValid.  Given the restrictions above, it's unlikely
-     * that this could fail, but if it does, just treat plan as not simple. On
-     * success, save a refcount on the plan in the simple-expression resowner.
-     */
-    if (CachedPlanAllowsSimpleValidityCheck(plansource, cplan,
-                                            estate->simple_eval_resowner))
-    {
-        /* Remember that we have the refcount */
-        expr->expr_simple_plansource = plansource;
-        expr->expr_simple_plan = cplan;
-        expr->expr_simple_plan_lxid = MyProc->vxid.lxid;
-
-        /* Share the remaining work with the replan code path */
-        exec_save_simple_expr(expr, cplan);
-    }
-
-    /*
-     * Release the plan refcount obtained by SPI_plan_get_cached_plan.  (This
-     * refcount is held by the wrong resowner, so we can't just repurpose it.)
-     */
-    ReleaseCachedPlan(cplan, CurrentResourceOwner);
+    return true;
 }

 /*
diff --git a/src/pl/plpgsql/src/sql/plpgsql_simple.sql b/src/pl/plpgsql/src/sql/plpgsql_simple.sql
index 57020d22d6..143bf09dce 100644
--- a/src/pl/plpgsql/src/sql/plpgsql_simple.sql
+++ b/src/pl/plpgsql/src/sql/plpgsql_simple.sql
@@ -80,3 +80,25 @@ create or replace function simplesql() returns int language sql
 as $$select 2 + 2$$;

 select simplecaller();
+
+
+-- Check case where called function changes from non-SRF to SRF (bug #18497)
+
+create or replace function simplecaller() returns int language plpgsql
+as $$
+declare x int;
+begin
+  x := simplesql();
+  return x;
+end$$;
+
+select simplecaller();
+
+drop function simplesql();
+
+create function simplesql() returns setof int language sql
+as $$select 22 + 22$$;
+
+select simplecaller();
+
+select simplecaller();

pgsql-bugs by date:

Previous
From: Thomas Munro
Date:
Subject: Re: BUG #18503: Reproducible 'Segmentation fault' in 16.3 on ARM64
Next
From: Tender Wang
Date:
Subject: Re: BUG #18377: Assert false in "partdesc->nparts >= pinfo->nparts", fileName="execPartition.c", lineNumber=1943