Re: Possible bug (or at least unexpected behavior) - Mailing list pgsql-sql

From Tom Lane
Subject Re: Possible bug (or at least unexpected behavior)
Date
Msg-id 84934.1660079457@sss.pgh.pa.us
Whole thread Raw
In response to Re: Possible bug (or at least unexpected behavior)  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-sql
I wrote:
> ... It looks like memory management
> in SQL functions is not coping well with expanded arrays, but I'm
> not quite sure where it's going off the rails.

It doesn't seem to be a memory management problem, but rather that
SQL functions are being careless with arguments that are read/write
expanded Datums.  A function that is passed such an argument is
allowed to modify it in-place, and array_append does so to reduce
the expense of repeatedly appending to an array value.  If you
have two array_append's referencing the same parameter in a SQL
function, and that parameter is passed in as a R/W datum, you
get the wrong answer: the second array_append will see the effects
of the first one.  Also, if the SQL function does array_append
first and array_cardinality second, array_cardinality reports the
wrong result.

Now, it doesn't look like your example function does either of those
things, but it turns out that it does after function inlining.  The
planner effectively flattens out one level of the recursion, creating
a plan in which we do have these hazards.

The only practical fix I can see is to force SQL function parameter
values to read-only.  We could do better if we knew which parameters
are actually multiply referenced in the function, but we don't have
the infrastructure needed to detect that.  I'm not convinced that
it'd be appropriate to expend a lot of effort here --- non-inlined
execution of a SQL function is a pretty low-performance situation in
any case.  So I've just gone for the simplest possible fix in the
attached.

            regards, tom lane

diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index 076226868f..e134a82ff7 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -939,6 +939,7 @@ postquel_sub_params(SQLFunctionCachePtr fcache,
     if (nargs > 0)
     {
         ParamListInfo paramLI;
+        Oid           *argtypes = fcache->pinfo->argtypes;

         if (fcache->paramLI == NULL)
         {
@@ -955,10 +956,24 @@ postquel_sub_params(SQLFunctionCachePtr fcache,
         {
             ParamExternData *prm = ¶mLI->params[i];

-            prm->value = fcinfo->args[i].value;
+            /*
+             * If an incoming parameter value is a R/W expanded datum, we
+             * force it to R/O.  We'd be perfectly entitled to scribble on it,
+             * but the problem is that if the parameter is referenced more
+             * than once in the function, earlier references might mutate the
+             * value seen by later references, which won't do at all.  We
+             * could do better if we could be sure of the number of Param
+             * nodes in the function's plans; but we might not have planned
+             * all the statements yet, nor do we have plan tree walker
+             * infrastructure.  (Examining the parse trees is not good enough,
+             * because of possible function inlining during planning.)
+             */
             prm->isnull = fcinfo->args[i].isnull;
+            prm->value = MakeExpandedObjectReadOnly(fcinfo->args[i].value,
+                                                    prm->isnull,
+                                                    get_typlen(argtypes[i]));
             prm->pflags = 0;
-            prm->ptype = fcache->pinfo->argtypes[i];
+            prm->ptype = argtypes[i];
         }
     }
     else
diff --git a/src/test/regress/expected/create_function_sql.out b/src/test/regress/expected/create_function_sql.out
index a31daffbf3..50aca5940f 100644
--- a/src/test/regress/expected/create_function_sql.out
+++ b/src/test/regress/expected/create_function_sql.out
@@ -666,6 +666,22 @@ SELECT * FROM voidtest5(3);
 -----------
 (0 rows)

+-- Regression tests for bugs:
+-- Check that arguments that are R/W expanded datums aren't corrupted by
+-- multiple uses.  This test knows that array_append() returns a R/W datum
+-- and will modify a R/W array input in-place.  We use SETOF to prevent
+-- inlining of the SQL function.
+CREATE FUNCTION double_append(anyarray, anyelement) RETURNS SETOF anyarray
+LANGUAGE SQL IMMUTABLE AS
+$$ SELECT array_append($1, $2) || array_append($1, $2) $$;
+SELECT double_append(array_append(ARRAY[q1], q2), q3)
+  FROM (VALUES(1,2,3), (4,5,6)) v(q1,q2,q3);
+ double_append
+---------------
+ {1,2,3,1,2,3}
+ {4,5,6,4,5,6}
+(2 rows)
+
 -- Things that shouldn't work:
 CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
     AS 'SELECT ''not an integer'';';
@@ -692,7 +708,7 @@ CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
 ERROR:  only one AS item needed for language "sql"
 -- Cleanup
 DROP SCHEMA temp_func_test CASCADE;
-NOTICE:  drop cascades to 29 other objects
+NOTICE:  drop cascades to 30 other objects
 DETAIL:  drop cascades to function functest_a_1(text,date)
 drop cascades to function functest_a_2(text[])
 drop cascades to function functest_a_3()
@@ -722,5 +738,6 @@ drop cascades to function voidtest2(integer,integer)
 drop cascades to function voidtest3(integer)
 drop cascades to function voidtest4(integer)
 drop cascades to function voidtest5(integer)
+drop cascades to function double_append(anyarray,anyelement)
 DROP USER regress_unpriv_user;
 RESET search_path;
diff --git a/src/test/regress/sql/create_function_sql.sql b/src/test/regress/sql/create_function_sql.sql
index cc0ccd8db1..89e9af3a49 100644
--- a/src/test/regress/sql/create_function_sql.sql
+++ b/src/test/regress/sql/create_function_sql.sql
@@ -385,6 +385,19 @@ CREATE FUNCTION voidtest5(a int) RETURNS SETOF VOID LANGUAGE SQL AS
 $$ SELECT generate_series(1, a) $$ STABLE;
 SELECT * FROM voidtest5(3);

+-- Regression tests for bugs:
+
+-- Check that arguments that are R/W expanded datums aren't corrupted by
+-- multiple uses.  This test knows that array_append() returns a R/W datum
+-- and will modify a R/W array input in-place.  We use SETOF to prevent
+-- inlining of the SQL function.
+CREATE FUNCTION double_append(anyarray, anyelement) RETURNS SETOF anyarray
+LANGUAGE SQL IMMUTABLE AS
+$$ SELECT array_append($1, $2) || array_append($1, $2) $$;
+
+SELECT double_append(array_append(ARRAY[q1], q2), q3)
+  FROM (VALUES(1,2,3), (4,5,6)) v(q1,q2,q3);
+
 -- Things that shouldn't work:

 CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL

pgsql-sql by date:

Previous
From: Pierre Chevalier
Date:
Subject: Re: select items based on 2 columns
Next
From: "John Ericson"
Date:
Subject: Alternative to "AT TIME ZONE" that is less of a foot-gun?