Thread: BUG #15990: PROCEDURE throws "SQL Error [XX000]: ERROR: no known snapshots" with PostGIS geometries

The following bug has been logged on the website:

Bug reference:      15990
Logged by:          Andreas Wicht
Email address:      a.wicht@gmail.com
PostgreSQL version: 11.5
Operating system:   Ubuntu 18.04
Description:

Hi there,

I am not sure where to place this problem, here or at the PostGIS mailing
list. I'd like to start here though.
I have a function which needs a commit after each loop (inserting a result
into a target table). So far I worked around this requirement with dblink.
When the new procedures were implemented I tried to port the function to a
procedure, greatly reducing the complexity.
While testing I started to get the above mentioned error. 
I could dumb the procedure down to the very basics to reproduce the error.

Note that the procedure fails as soon as the geometry column is part of the
SELECT statement defining the FOR loop.
Researching this error did not yield any useful information to me (at least
none which is evident to me).

Steps to reproduce:
CREATE EXTENSION postgis;
CREATE SCHEMA temp;

wget
https://www.statistik-berlin-brandenburg.de/opendata/RBS_OD_ORT_2016_12.zip
unzip RBS_OD_ORT_2016_12.zip
shp2pgsql -I -g geom -s 25833 RBS_OD_ORT_2016_12.shp temp.test | psql -h XXX
-p XXX -d XXX -U XXX

CREATE TABLE temp.mytable (gid integer, geom geometry);

CREATE OR REPLACE PROCEDURE temp.testprocedure(polygon_tbl regclass)
AS $$
DECLARE
    _poly_tbl   ALIAS FOR $1;
    _rcd        RECORD;
BEGIN
    FOR _rcd IN
        EXECUTE format ('SELECT gid, geom FROM %s', _poly_tbl)
    LOOP
        INSERT INTO temp.mytable (gid, geom) VALUES (_rcd.gid, _rcd.geom);
        COMMIT;
    END LOOP;
END;
$$
LANGUAGE plpgsql;

CALL temp.testprocedure('temp.test');

---------
PostGIS version:
POSTGIS="2.5.2 r17328" [EXTENSION] PGSQL="110" GEOS="3.6.2-CAPI-1.10.2
4d2925d6" PROJ="Rel. 4.9.3, 15 August 2016" GDAL="GDAL 2.2.3, released
2017/11/20" LIBXML="2.9.4" LIBJSON="0.12.1" LIBPROTOBUF="1.2.1" TOPOLOGY
RASTER

PostgeSQL version:
PostgreSQL 11.5 (Ubuntu 11.5-1.pgdg18.04+1) on x86_64-pc-linux-gnu, compiled
by gcc (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0, 64-bit

Greetings
Andreas


Hi,

On 2019-09-04 10:06:16 +0000, PG Bug reporting form wrote:
> Note that the procedure fails as soon as the geometry column is part of the
> SELECT statement defining the FOR loop.
> Researching this error did not yield any useful information to me (at least
> none which is evident to me).

The error is from:
static void
init_toast_snapshot(Snapshot toast_snapshot)
{
    Snapshot    snapshot = GetOldestSnapshot();

    if (snapshot == NULL)
        elog(ERROR, "no known snapshots");

    InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
}


> CREATE OR REPLACE PROCEDURE temp.testprocedure(polygon_tbl regclass)
> AS $$
> DECLARE
>     _poly_tbl   ALIAS FOR $1;
>     _rcd        RECORD;
> BEGIN
>     FOR _rcd IN
>         EXECUTE format ('SELECT gid, geom FROM %s', _poly_tbl)
>     LOOP
>         INSERT INTO temp.mytable (gid, geom) VALUES (_rcd.gid, _rcd.geom);
>         COMMIT;
>     END LOOP;
> END;
> $$
> LANGUAGE plpgsql;

Hm. I don't immediately see anything here that could really be postgis
specific. I assume it's just because the geom datum is large and gets
toasted.  A bit of playing shows that it can be reproduced without:

CREATE TABLE toasted(id serial primary key, data text);
INSERT INTO toasted(data) VALUES((SELECT string_agg(random()::text, ':') FROM generate_series(1, 1000)));
INSERT INTO toasted(data) VALUES((SELECT string_agg(random()::text, ':') FROM generate_series(1, 1000)));
INSERT 0 1

DO $$ DECLARE v_r record; BEGIN FOR v_r in SELECT data FROM toasted LOOP INSERT INTO toasted(data)
VALUES(v_r.data);COMMIT;ENDLOOP;END;$$;
 

ERROR:  XX000: no known snapshots
CONTEXT:  PL/pgSQL function inline_code_block line 1 at FOR over SELECT rows
LOCATION:  init_toast_snapshot, tuptoaster.c:2416

Note that there's no errors if there's only one already in the table,
not if all the data is inserted without being sourced from a table.

This looks like it might be a procedure related bug to me. Peter?

The backtrace in my lightly modified tree is:

#0  init_toast_snapshot (toast_snapshot=0x7ffd5dc53280)
    at /home/andres/src/postgresql/src/backend/access/heap/tuptoaster.c:2416
#1  0x000055ee5a7fc0ef in toast_fetch_datum (attr=0x55ee5d155a78)
    at /home/andres/src/postgresql/src/backend/access/heap/tuptoaster.c:1930
#2  0x000055ee5a7f8bb2 in heap_tuple_fetch_attr (attr=0x55ee5d155a78)
    at /home/andres/src/postgresql/src/backend/access/heap/tuptoaster.c:108
#3  0x000055ee5a7fad29 in toast_flatten_tuple (tup=0x55ee5d155a48,
    tupleDesc=0x55ee5d14f510)
    at /home/andres/src/postgresql/src/backend/access/heap/tuptoaster.c:1110
#4  0x000055ee5ac77d32 in expanded_record_set_tuple (erh=0x55ee5d14f3f8,
    tuple=0x55ee5d155a48, copy=true, expand_external=true)
    at /home/andres/src/postgresql/src/backend/utils/adt/expandedrecord.c:473
#5  0x00007f4450307cef in exec_for_query (estate=0x7ffd5dc57920, stmt=0x55ee5d154b00,
    portal=0x55ee5d09e040, prefetch_ok=true)
    at /home/andres/src/postgresql/src/pl/plpgsql/src/pl_exec.c:5970
#6  0x00007f4450301984 in exec_stmt_fors (estate=0x7ffd5dc57920, stmt=0x55ee5d154b00)
    at /home/andres/src/postgresql/src/pl/plpgsql/src/pl_exec.c:2791
#7  0x00007f44502ffedc in exec_stmt (estate=0x7ffd5dc57920, stmt=0x55ee5d154b00)
    at /home/andres/src/postgresql/src/pl/plpgsql/src/pl_exec.c:1997
#8  0x00007f44502ffc94 in exec_stmts (estate=0x7ffd5dc57920, stmts=0x55ee5d154ef0)
    at /home/andres/src/postgresql/src/pl/plpgsql/src/pl_exec.c:1924
#9  0x00007f44502ffb40 in exec_stmt_block (estate=0x7ffd5dc57920, block=0x55ee5d154f28)
    at /home/andres/src/postgresql/src/pl/plpgsql/src/pl_exec.c:1865
#10 0x00007f44502ffdce in exec_stmt (estate=0x7ffd5dc57920, stmt=0x55ee5d154f28)
    at /home/andres/src/postgresql/src/pl/plpgsql/src/pl_exec.c:1957
#11 0x00007f44502fd542 in plpgsql_exec_function (func=0x55ee5d149a98,
    fcinfo=0x7ffd5dc57b60, simple_eval_estate=0x55ee5d125448, atomic=false)
    at /home/andres/src/postgresql/src/pl/plpgsql/src/pl_exec.c:589
#12 0x00007f44502f7d58 in plpgsql_inline_handler (fcinfo=0x7ffd5dc57c40)
    at /home/andres/src/postgresql/src/pl/plpgsql/src/pl_handler.c:339
#13 0x000055ee5adab248 in FunctionCall1Coll (flinfo=0x7ffd5dc57ca0, collation=0, arg1=94482251264896) at
/home/andres/src/postgresql/src/backend/utils/fmgr/fmgr.c:1140
#14 0x000055ee5adabde6 in OidFunctionCall1Coll (functionId=13404, collation=0, arg1=94482251264896) at
/home/andres/src/postgresql/src/backend/utils/fmgr/fmgr.c:1418
#15 0x000055ee5a981ab1 in ExecuteDoStmt (stmt=0x55ee5d037070, atomic=false) at
/home/andres/src/postgresql/src/backend/commands/functioncmds.c:2266
#16 0x000055ee5ac265e8 in standard_ProcessUtility (pstmt=0x55ee5d037370,
    queryString=0x55ee5d0363a8 "DO $$ DECLARE v_r record; BEGIN FOR v_r in SELECT data FROM toasted LOOP INSERT INTO
toasted(data)VALUES(v_r.data);COMMIT;END LOOP;END;$$;",
 
    context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x55ee5d037440, completionTag=0x7ffd5dc58140 "")
at/home/andres/src/postgresql/src/backend/tcop/utility.c:523
 
#17 0x000055ee5ac26123 in ProcessUtility (pstmt=0x55ee5d037370,
    queryString=0x55ee5d0363a8 "DO $$ DECLARE v_r record; BEGIN FOR v_r in SELECT data FROM toasted LOOP INSERT INTO
toasted(data)VALUES(v_r.data);COMMIT;END LOOP;END;$$;",
 
    context=PROCESS_UTILITY_TOPLEVEL, params=0x0, queryEnv=0x0, dest=0x55ee5d037440, completionTag=0x7ffd5dc58140 "")
at/home/andres/src/postgresql/src/backend/tcop/utility.c:360
 
#18 0x000055ee5ac24f8a in PortalRunUtility (portal=0x55ee5d09df28, pstmt=0x55ee5d037370, isTopLevel=true,
setHoldSnapshot=false,dest=0x55ee5d037440,
 
    completionTag=0x7ffd5dc58140 "") at /home/andres/src/postgresql/src/backend/tcop/pquery.c:1175
#19 0x000055ee5ac251ae in PortalRunMulti (portal=0x55ee5d09df28, isTopLevel=true, setHoldSnapshot=false,
dest=0x55ee5d037440,altdest=0x55ee5d037440,
 
    completionTag=0x7ffd5dc58140 "") at /home/andres/src/postgresql/src/backend/tcop/pquery.c:1321
#20 0x000055ee5ac246ba in PortalRun (portal=0x55ee5d09df28, count=9223372036854775807, isTopLevel=true, run_once=true,
dest=0x55ee5d037440,altdest=0x55ee5d037440,
 
    completionTag=0x7ffd5dc58140 "") at /home/andres/src/postgresql/src/backend/tcop/pquery.c:796
#21 0x000055ee5ac1e0b8 in exec_simple_query (
    query_string=0x55ee5d0363a8 "DO $$ DECLARE v_r record; BEGIN FOR v_r in SELECT data FROM toasted LOOP INSERT INTO
toasted(data)VALUES(v_r.data);COMMIT;END LOOP;END;$$;")
 
    at /home/andres/src/postgresql/src/backend/tcop/postgres.c:1231
#22 0x000055ee5ac2276a in PostgresMain (argc=1, argv=0x55ee5d05c758, dbname=0x55ee5d05c6a0 "postgres",
username=0x55ee5d032918"andres")
 
    at /home/andres/src/postgresql/src/backend/tcop/postgres.c:4256
#23 0x000055ee5ab72e74 in BackendRun (port=0x55ee5d057b00) at
/home/andres/src/postgresql/src/backend/postmaster/postmaster.c:4446
#24 0x000055ee5ab725ce in BackendStartup (port=0x55ee5d057b00) at
/home/andres/src/postgresql/src/backend/postmaster/postmaster.c:4137
#25 0x000055ee5ab6e702 in ServerLoop () at /home/andres/src/postgresql/src/backend/postmaster/postmaster.c:1704
#26 0x000055ee5ab6df34 in PostmasterMain (argc=37, argv=0x55ee5d030290) at
/home/andres/src/postgresql/src/backend/postmaster/postmaster.c:1377
#27 0x000055ee5aa7bb76 in main (argc=37, argv=0x55ee5d030290) at
/home/andres/src/postgresql/src/backend/main/main.c:210


Which seems to suggest that the snapshot management for procedures
(possibly not even just plpgsql), isn't quite right.

Greetings,

Andres Freund



Andres Freund <andres@anarazel.de> writes:
> This looks like it might be a procedure related bug to me. Peter?

Somebody reported this same bug again today [1] ... why has it still
not been dealt with?

After an admittedly cursory look-around, it seems like the problem
can be stated as "init_toast_snapshot expects that there already
is a transaction snapshot, which there is not because we just
committed and nothing has re-established a transaction snapshot".
So the question is, where shall we force a new transaction snapshot
to be created after a COMMIT/ROLLBACK inside a procedure?

The most localized fix would be to let init_toast_snapshot itself
do that, but that seems like a bit of a layering violation; plus
I'm not quite convinced that's the only place with the issue.
(However, it *is* the only caller of GetOldestSnapshot() AFAICS.
So maybe everyplace else is calling GetTransactionSnapshot() to
begin with, in which case this could do likewise I should think.)

It really seems to me like _SPI_commit() should have started a
new transaction --- why is it okay to delay that?

BTW, why is this not caught by the plpgsql-toast.spec isolation test?
Seems like that's doing almost exactly the same thing as the trouble
reports.

            regards, tom lane

[1] https://www.postgresql.org/message-id/dae29212-ad31-8701-ef16-dd7420bfaa56%40perfexpert.ch



[ Roping Robert into this, as committer of 3e2f3c2e4 ]

I wrote:
> After an admittedly cursory look-around, it seems like the problem
> can be stated as "init_toast_snapshot expects that there already
> is a transaction snapshot, which there is not because we just
> committed and nothing has re-established a transaction snapshot".
> So the question is, where shall we force a new transaction snapshot
> to be created after a COMMIT/ROLLBACK inside a procedure?

> The most localized fix would be to let init_toast_snapshot itself
> do that, but that seems like a bit of a layering violation; plus
> I'm not quite convinced that's the only place with the issue.

I tried this, which leads to a nicely small patch and seems to resolve
the existing reports, but now I'm not sure that it's actually safe.
I think the bigger-picture question is, if we're trying to detoast
as the first step in a new transaction of a procedure, where's the
guarantee that the TOAST data still exists to be fetched?  For sure
we aren't holding any locks that would stop VACUUM from reclaiming
recently-dead TOAST rows.

In a recent discussion at [1], Konstantin Knizhnik reasoned that the
problem is that plpgsql is holding rows that it's prefetched but not
yet detoasted, and proposed disabling prefetch to solve this.  I think
he's probably right, although his patch strikes me as both overcomplicated
and wrong.  I suspect we must disable prefetch in any non-atomic
execution context, because we can't know whether a COMMIT will be executed
by some called procedure.

I'm still wondering why plpgsql-toast.spec is failing to show the
problem, too.

            regards, tom lane

[1] https://www.postgresql.org/message-id/flat/03644c0e6bb82132ac783982b6abffdf%40postgrespro.ru

diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index 730cd04a2d..386c5bda2b 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -638,8 +638,12 @@ init_toast_snapshot(Snapshot toast_snapshot)
 {
     Snapshot    snapshot = GetOldestSnapshot();

+    /*
+     * It is possible to get here when no snapshot has yet been established in
+     * the current transaction.  If so, just create a transaction snapshot.
+     */
     if (snapshot == NULL)
-        elog(ERROR, "no known snapshots");
+        snapshot = GetTransactionSnapshot();

     InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
 }

I wrote:
> I think the bigger-picture question is, if we're trying to detoast
> as the first step in a new transaction of a procedure, where's the
> guarantee that the TOAST data still exists to be fetched?  For sure
> we aren't holding any locks that would stop VACUUM from reclaiming
> recently-dead TOAST rows.

Yeah.  So here's a patch I actually believe in to some extent, based
on Konstantin's idea.

> I'm still wondering why plpgsql-toast.spec is failing to show the
> problem, too.

The answer to that is that it's not testing commit-inside-a-cursor-loop,
and if it were, it'd still fail to show the problem because its test
table contains just one row.

Interestingly, if you try the test case added here without adding the
code patch, you get a "missing chunk number ... for toast value ..."
error, not "no known snapshots".  I think that's because the test
case has additional commands after the COMMIT, causing the transaction
snapshot to get re-established.

            regards, tom lane

diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index b4c70aaa7f..27d6ea7551 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -5826,6 +5826,17 @@ exec_for_query(PLpgSQL_execstate *estate, PLpgSQL_stmt_forq *stmt,
      */
     PinPortal(portal);

+    /*
+     * In a non-atomic context, we dare not prefetch, even if it would
+     * otherwise be safe.  Aside from any semantic hazards that that might
+     * create, if we prefetch toasted data and then the user commits the
+     * transaction, the toast references could turn into dangling pointers.
+     * (Rows we haven't yet fetched from the cursor are safe, because the
+     * PersistHoldablePortal mechanism handles this scenario.)
+     */
+    if (!estate->atomic)
+        prefetch_ok = false;
+
     /*
      * Fetch the initial tuple(s).  If prefetching is allowed then we grab a
      * few more rows to avoid multiple trips through executor startup
diff --git a/src/test/isolation/expected/plpgsql-toast.out b/src/test/isolation/expected/plpgsql-toast.out
index fc557da5e7..4f216b94b6 100644
--- a/src/test/isolation/expected/plpgsql-toast.out
+++ b/src/test/isolation/expected/plpgsql-toast.out
@@ -192,3 +192,46 @@ pg_advisory_unlock
 t
 s1: NOTICE:  length(r) = 6002
 step assign5: <... completed>
+
+starting permutation: lock assign6 vacuum unlock
+pg_advisory_unlock_all
+
+
+pg_advisory_unlock_all
+
+
+step lock:
+    SELECT pg_advisory_lock(1);
+
+pg_advisory_lock
+
+
+step assign6:
+do $$
+  declare
+    r record;
+  begin
+    insert into test1 values (2, repeat('bar', 3000));
+    insert into test1 values (3, repeat('baz', 4000));
+    for r in select test1.b from test1 loop
+      delete from test1;
+      commit;
+      perform pg_advisory_lock(1);
+      raise notice 'length(r) = %', length(r::text);
+    end loop;
+  end;
+$$;
+ <waiting ...>
+step vacuum:
+    VACUUM test1;
+
+step unlock:
+    SELECT pg_advisory_unlock(1);
+
+pg_advisory_unlock
+
+t
+s1: NOTICE:  length(r) = 6002
+s1: NOTICE:  length(r) = 9002
+s1: NOTICE:  length(r) = 12002
+step assign6: <... completed>
diff --git a/src/test/isolation/specs/plpgsql-toast.spec b/src/test/isolation/specs/plpgsql-toast.spec
index fe7090addb..bd2948b3d3 100644
--- a/src/test/isolation/specs/plpgsql-toast.spec
+++ b/src/test/isolation/specs/plpgsql-toast.spec
@@ -112,6 +112,25 @@ do $$
 $$;
 }

+# committing inside a cursor loop presents its own hazards
+step "assign6"
+{
+do $$
+  declare
+    r record;
+  begin
+    insert into test1 values (2, repeat('bar', 3000));
+    insert into test1 values (3, repeat('baz', 4000));
+    for r in select test1.b from test1 loop
+      delete from test1;
+      commit;
+      perform pg_advisory_lock(1);
+      raise notice 'length(r) = %', length(r::text);
+    end loop;
+  end;
+$$;
+}
+
 session "s2"
 setup
 {
@@ -135,3 +154,4 @@ permutation "lock" "assign2" "vacuum" "unlock"
 permutation "lock" "assign3" "vacuum" "unlock"
 permutation "lock" "assign4" "vacuum" "unlock"
 permutation "lock" "assign5" "vacuum" "unlock"
+permutation "lock" "assign6" "vacuum" "unlock"

... okay, now I'm roping Alvaro into this thread, because the attached
test case (extracted from [1]) shows that there's still a problem,
and this time it seems like we are dropping the ball on snapshot
management.

The sequence of events here is that after the first COMMIT inside the
loop, we call _SPI_execute_plan to execute the "select txt into t from
test2 where i=r.i;".  It does what it's supposed to, i.e.

            PushActiveSnapshot(GetTransactionSnapshot());
            ... run query ...
            PopActiveSnapshot();

and then hands back a tuple that includes a toasted datum.  plpgsql
knows it must detoast that value before storing it into "t", but
when it calls the toaster, GetOldestSnapshot returns NULL because
we have neither any "active" nor any "registered" snapshots.

ISTM there are two ways we could look at this:

1. COMMIT is dropping the ball by not forcing there to be any
registered transaction-level snapshot afterward.  (Maybe it's
not exactly COMMIT that must do this, but in any case the
snapshot situation after COMMIT is clearly different from
normal running, and that seems highly bug-prone.)

2. GetOldestSnapshot ought to be willing to fall back to
CurrentSnapshot if FirstSnapshotSet is true but there are
no active or registered snapshots.  But it's not clear how
its promises about returning the "oldest" snapshot would apply.

Thoughts?

            regards, tom lane

[1] https://www.postgresql.org/message-id/65424747-42ed-43d5-4cca-6b03481409a4%40perfexpert.ch

drop table if exists test2;
CREATE TABLE test2(i integer, txt text);
alter table test2 alter column txt set storage external;
insert into test2 values (1, lpad('x', 3000));
insert into test2 values (2, lpad('x', 3000));

DO $$
DECLARE
    r record;
    t text;
BEGIN
  FOR r in (SELECT i FROM test2)
     LOOP
       select txt into t from test2 where i=r.i;
       COMMIT;
    END LOOP;
END;
$$;

Andres Freund <andres@anarazel.de> writes:
> On 2021-05-12 11:37:46 -0400, Tom Lane wrote:
>> ISTM there are two ways we could look at this:
>> 
>> 1. COMMIT is dropping the ball by not forcing there to be any
>> registered transaction-level snapshot afterward.  (Maybe it's
>> not exactly COMMIT that must do this, but in any case the
>> snapshot situation after COMMIT is clearly different from
>> normal running, and that seems highly bug-prone.)
>> 
>> 2. GetOldestSnapshot ought to be willing to fall back to
>> CurrentSnapshot if FirstSnapshotSet is true but there are
>> no active or registered snapshots.  But it's not clear how
>> its promises about returning the "oldest" snapshot would apply.

> FirstSnapshotSet doesn't indicate the snapshot is usable, unless
> IsolationUsesXactSnapshot() - in which case it also registers the
> snapshot. We don't maintain MyProc->xmin outside of that, which I think
> means we can't rely on the snapshot at all?  Or am I missing something?

Yeah, on further thought, the real question to be asking here is
"what's protecting that in-flight datum from becoming a dangling
pointer?".  AFAICT, the answer right now is "nothing".  Therefore,
it is *never* okay for plpgsql to be executing without a registered
transaction snapshot; and it seems quite unlikely that it'd be any
safer for any other PL.

I'm batting the ball back into Peter's court.

            regards, tom lane



I wrote:
> Yeah, on further thought, the real question to be asking here is
> "what's protecting that in-flight datum from becoming a dangling
> pointer?".  AFAICT, the answer right now is "nothing".  Therefore,
> it is *never* okay for plpgsql to be executing without a registered
> transaction snapshot; and it seems quite unlikely that it'd be any
> safer for any other PL.

Here's a draft patch for this issue.

I concluded that the reason things work for a normal DO block
or plpgsql procedure (without internal transaction management)
is that pquery.c -- in particular, PortalRunUtility -- establishes
a snapshot for the duration of execution of the portal that is
running the CALL or DO.  That allows us to detoast the results of
a SPI query even though SPI has released whatever statement-level
snapshot it was using.  Hence, what we should do to fix this is
to try to restore that state of affairs after an internal commit.
We can't just make a snapshot instantly, because it may be that
the user wants to issue SET command(s) to change the transaction
mode and/or LOCK commands to acquire locks before a new
transaction snapshot is taken.  This means that _SPI_execute_plan
needs to deal with re-creating the Portal-level snapshot, but not
till just before executing a command that has to have a snapshot.

(Of course, fixing it in _SPI_execute_plan only fixes things for
PLs that go through SPI.  But if you don't, you were pretty much
on your own before, and you still are.)

So the attached patch removes PortalRunUtility's local state in
favor of adding a Portal field that tracks the snapshot provided
by the Portal, if any.  Then when we make a new snapshot that
logically substitutes for the portal's original snapshot, we can
store it in that field and it'll be released when we finally
exit the portal.

There's a loose end that this doesn't really resolve, which is
that a procedure could return output data that involves a toast
pointer.  If the procedure's last action was COMMIT, we'll
come back to ExecuteCallStmt with no snapshot that protects the
validity of that toast pointer.  For the moment I just added a
comment that says PLs mustn't do that.  I think that is safe
for plpgsql, because it treats output parameters like local
variables, and it already knows it mustn't store toast pointers
in procedure local variables.  It's probably all right for our
other PLs too, because they don't really deal in direct use of
toasted datums anyway.  But it still seems scary, because there is
really no good way to detect a violation.

A couple of other bits of unfinished business:

* This basically moves all of the snapshot management logic into
_SPI_execute_plan, making "no_snapshots" quite a misnomer.
It kind of was already, because of its contribution to whether
we pass down PROCESS_UTILITY_QUERY_NONATOMIC, but now it's a
flat out lie.  We still need a flag to control what happens with
PROCESS_UTILITY_QUERY_NONATOMIC, so I renamed it to
allow_nonatomic.  For the moment I just did that renaming locally
in _SPI_execute_plan.  In HEAD, we ought to propagate that name
into SPIExecuteOptions, but I haven't done that here.

* I think we could remove the PLPGSQL_STMT_SET infrastructure
and go back to treating SET/RESET the same as other utility
commands so far as plpgsql is concerned.  I haven't done that
here either, because it wouldn't be appropriate to do in the
back branches (if only for fear of breaking pldebugger).

Note that we still need the other patch to discourage plpgsql
from leaving not-yet-detoasted datums hanging around in a
FOR loop's fetch queue.

            regards, tom lane

diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 9548287217..4c12aa33df 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -64,6 +64,7 @@
 #include "parser/parse_func.h"
 #include "parser/parse_type.h"
 #include "pgstat.h"
+#include "tcop/pquery.h"
 #include "tcop/utility.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
@@ -2319,6 +2320,20 @@ ExecuteCallStmt(CallStmt *stmt, ParamListInfo params, bool atomic, DestReceiver
         if (fcinfo->isnull)
             elog(ERROR, "procedure returned null record");

+        /*
+         * Ensure there's an active snapshot whilst we execute whatever's
+         * involved here.  Note that this is *not* sufficient to make the
+         * world safe for TOAST pointers to be included in the returned data:
+         * the referenced data could have gone away while we didn't hold a
+         * snapshot.  Hence, it's incumbent on PLs that can do COMMIT/ROLLBACK
+         * to not return TOAST pointers, unless those pointers were fetched
+         * after the last COMMIT/ROLLBACK in the procedure.
+         *
+         * XXX that is a really nasty, hard-to-test requirement.  Is there a
+         * way to remove it?
+         */
+        EnsurePortalSnapshotExists();
+
         td = DatumGetHeapTupleHeader(retval);
         tupType = HeapTupleHeaderGetTypeId(td);
         tupTypmod = HeapTupleHeaderGetTypMod(td);
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 00aa78ea53..0912967fe3 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -66,7 +66,7 @@ static void _SPI_prepare_oneshot_plan(const char *src, SPIPlanPtr plan);

 static int    _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
                               Snapshot snapshot, Snapshot crosscheck_snapshot,
-                              bool read_only, bool no_snapshots,
+                              bool read_only, bool allow_nonatomic,
                               bool fire_triggers, uint64 tcount,
                               DestReceiver *caller_dest,
                               ResourceOwner plan_owner);
@@ -260,12 +260,8 @@ _SPI_commit(bool chain)
     /* Start the actual commit */
     _SPI_current->internal_xact = true;

-    /*
-     * Before committing, pop all active snapshots to avoid error about
-     * "snapshot %p still active".
-     */
-    while (ActiveSnapshotSet())
-        PopActiveSnapshot();
+    /* Release snapshots associated with portals */
+    ForgetPortalSnapshots();

     if (chain)
         SaveTransactionCharacteristics();
@@ -322,6 +318,9 @@ _SPI_rollback(bool chain)
     /* Start the actual rollback */
     _SPI_current->internal_xact = true;

+    /* Release snapshots associated with portals */
+    ForgetPortalSnapshots();
+
     if (chain)
         SaveTransactionCharacteristics();

@@ -2264,7 +2263,7 @@ _SPI_prepare_oneshot_plan(const char *src, SPIPlanPtr plan)
  *        behavior of taking a new snapshot for each query.
  * crosscheck_snapshot: for RI use, all others pass InvalidSnapshot
  * read_only: true for read-only execution (no CommandCounterIncrement)
- * no_snapshots: true to skip snapshot management
+ * allow_nonatomic: true to allow nonatomic CALL/DO execution
  * fire_triggers: true to fire AFTER triggers at end of query (normal case);
  *        false means any AFTER triggers are postponed to end of outer query
  * tcount: execution tuple-count limit, or 0 for none
@@ -2275,7 +2274,7 @@ _SPI_prepare_oneshot_plan(const char *src, SPIPlanPtr plan)
 static int
 _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
                   Snapshot snapshot, Snapshot crosscheck_snapshot,
-                  bool read_only, bool no_snapshots,
+                  bool read_only, bool allow_nonatomic,
                   bool fire_triggers, uint64 tcount,
                   DestReceiver *caller_dest, ResourceOwner plan_owner)
 {
@@ -2318,11 +2317,12 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
      * In the first two cases, we can just push the snap onto the stack once
      * for the whole plan list.
      *
-     * But if no_snapshots is true, then don't manage snapshots at all here.
-     * The caller must then take care of that.
+     * Note that snapshot != InvalidSnapshot implies an atomic execution
+     * context.
      */
-    if (snapshot != InvalidSnapshot && !no_snapshots)
+    if (snapshot != InvalidSnapshot)
     {
+        Assert(!allow_nonatomic);
         if (read_only)
         {
             PushActiveSnapshot(snapshot);
@@ -2408,15 +2408,39 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
         stmt_list = cplan->stmt_list;

         /*
-         * In the default non-read-only case, get a new snapshot, replacing
-         * any that we pushed in a previous cycle.
+         * If we weren't given a specific snapshot to use, and the statement
+         * list requires a snapshot, set that up.
          */
-        if (snapshot == InvalidSnapshot && !read_only && !no_snapshots)
+        if (snapshot == InvalidSnapshot &&
+            (list_length(stmt_list) > 1 ||
+             (list_length(stmt_list) == 1 &&
+              PlannedStmtRequiresSnapshot(linitial_node(PlannedStmt,
+                                                        stmt_list)))))
         {
-            if (pushed_active_snap)
-                PopActiveSnapshot();
-            PushActiveSnapshot(GetTransactionSnapshot());
-            pushed_active_snap = true;
+            /*
+             * First, ensure there's a Portal-level snapshot.  This back-fills
+             * the snapshot stack in case the previous operation was a COMMIT
+             * or ROLLBACK inside a procedure or DO block.  (We can't put back
+             * the Portal snapshot any sooner, or we'd break cases like doing
+             * SET or LOCK just after COMMIT.)  It's enough to check once per
+             * statement list, since COMMIT/ROLLBACK/CALL/DO can't appear
+             * within a multi-statement list.
+             */
+            EnsurePortalSnapshotExists();
+
+            /*
+             * In the default non-read-only case, get a new per-statement-list
+             * snapshot, replacing any that we pushed in a previous cycle.
+             * Skip it when doing non-atomic execution, though (we rely
+             * entirely on the Portal snapshot in that case).
+             */
+            if (!read_only && !allow_nonatomic)
+            {
+                if (pushed_active_snap)
+                    PopActiveSnapshot();
+                PushActiveSnapshot(GetTransactionSnapshot());
+                pushed_active_snap = true;
+            }
         }

         foreach(lc2, stmt_list)
@@ -2434,6 +2458,7 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
             _SPI_current->processed = 0;
             _SPI_current->tuptable = NULL;

+            /* Check for unsupported cases. */
             if (stmt->utilityStmt)
             {
                 if (IsA(stmt->utilityStmt, CopyStmt))
@@ -2462,9 +2487,10 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,

             /*
              * If not read-only mode, advance the command counter before each
-             * command and update the snapshot.
+             * command and update the snapshot.  (But skip it if the snapshot
+             * isn't under our control.)
              */
-            if (!read_only && !no_snapshots)
+            if (!read_only && pushed_active_snap)
             {
                 CommandCounterIncrement();
                 UpdateActiveSnapshotCommandId();
@@ -2507,13 +2533,11 @@ _SPI_execute_plan(SPIPlanPtr plan, ParamListInfo paramLI,
                 QueryCompletion qc;

                 /*
-                 * If the SPI context is atomic, or we are asked to manage
-                 * snapshots, then we are in an atomic execution context.
-                 * Conversely, to propagate a nonatomic execution context, the
-                 * caller must be in a nonatomic SPI context and manage
-                 * snapshots itself.
+                 * If the SPI context is atomic, or we were not told to allow
+                 * nonatomic operations, tell ProcessUtility this is an atomic
+                 * execution context.
                  */
-                if (_SPI_current->atomic || !no_snapshots)
+                if (_SPI_current->atomic || !allow_nonatomic)
                     context = PROCESS_UTILITY_QUERY;
                 else
                     context = PROCESS_UTILITY_QUERY_NONATOMIC;
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 1432554d5a..d92a512839 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -349,6 +349,13 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel,
     EState       *estate;
     RangeTblEntry *rte;

+    /*
+     * Input functions may need an active snapshot, as may AFTER triggers
+     * invoked during finish_estate.  For safety, ensure an active snapshot
+     * exists throughout all our usage of the executor.
+     */
+    PushActiveSnapshot(GetTransactionSnapshot());
+
     estate = CreateExecutorState();

     rte = makeNode(RangeTblEntry);
@@ -400,6 +407,7 @@ finish_estate(EState *estate)
     /* Cleanup. */
     ExecResetTupleTable(estate->es_tupleTable, false);
     FreeExecutorState(estate);
+    PopActiveSnapshot();
 }

 /*
@@ -1212,9 +1220,6 @@ apply_handle_insert(StringInfo s)
                                         RelationGetDescr(rel->localrel),
                                         &TTSOpsVirtual);

-    /* Input functions may need an active snapshot, so get one */
-    PushActiveSnapshot(GetTransactionSnapshot());
-
     /* Process and store remote tuple in the slot */
     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
     slot_store_data(remoteslot, rel, &newtup);
@@ -1229,8 +1234,6 @@ apply_handle_insert(StringInfo s)
         apply_handle_insert_internal(resultRelInfo, estate,
                                      remoteslot);

-    PopActiveSnapshot();
-
     finish_estate(estate);

     logicalrep_rel_close(rel, NoLock);
@@ -1358,8 +1361,6 @@ apply_handle_update(StringInfo s)
     /* Also populate extraUpdatedCols, in case we have generated columns */
     fill_extraUpdatedCols(target_rte, rel->localrel);

-    PushActiveSnapshot(GetTransactionSnapshot());
-
     /* Build the search tuple. */
     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
     slot_store_data(remoteslot, rel,
@@ -1374,8 +1375,6 @@ apply_handle_update(StringInfo s)
         apply_handle_update_internal(resultRelInfo, estate,
                                      remoteslot, &newtup, rel);

-    PopActiveSnapshot();
-
     finish_estate(estate);

     logicalrep_rel_close(rel, NoLock);
@@ -1482,8 +1481,6 @@ apply_handle_delete(StringInfo s)
                                         RelationGetDescr(rel->localrel),
                                         &TTSOpsVirtual);

-    PushActiveSnapshot(GetTransactionSnapshot());
-
     /* Build the search tuple. */
     oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
     slot_store_data(remoteslot, rel, &oldtup);
@@ -1497,8 +1494,6 @@ apply_handle_delete(StringInfo s)
         apply_handle_delete_internal(resultRelInfo, estate,
                                      remoteslot, &rel->remoterel);

-    PopActiveSnapshot();
-
     finish_estate(estate);

     logicalrep_rel_close(rel, NoLock);
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 44f5fe8fc9..f7f08e6c05 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -476,6 +476,13 @@ PortalStart(Portal portal, ParamListInfo params,
                 else
                     PushActiveSnapshot(GetTransactionSnapshot());

+                /*
+                 * We could remember the snapshot in portal->portalSnapshot,
+                 * but presently there seems no need to, as this code path
+                 * cannot be used for non-atomic execution.  Hence there can't
+                 * be any commit/abort that might destroy the snapshot.
+                 */
+
                 /*
                  * Create QueryDesc in portal's context; for the moment, set
                  * the destination to DestNone.
@@ -1116,45 +1123,26 @@ PortalRunUtility(Portal portal, PlannedStmt *pstmt,
                  bool isTopLevel, bool setHoldSnapshot,
                  DestReceiver *dest, QueryCompletion *qc)
 {
-    Node       *utilityStmt = pstmt->utilityStmt;
-    Snapshot    snapshot;
-
     /*
-     * Set snapshot if utility stmt needs one.  Most reliable way to do this
-     * seems to be to enumerate those that do not need one; this is a short
-     * list.  Transaction control, LOCK, and SET must *not* set a snapshot
-     * since they need to be executable at the start of a transaction-snapshot
-     * mode transaction without freezing a snapshot.  By extension we allow
-     * SHOW not to set a snapshot.  The other stmts listed are just efficiency
-     * hacks.  Beware of listing anything that can modify the database --- if,
-     * say, it has to update an index with expressions that invoke
-     * user-defined functions, then it had better have a snapshot.
+     * Set snapshot if utility stmt needs one.
      */
-    if (!(IsA(utilityStmt, TransactionStmt) ||
-          IsA(utilityStmt, LockStmt) ||
-          IsA(utilityStmt, VariableSetStmt) ||
-          IsA(utilityStmt, VariableShowStmt) ||
-          IsA(utilityStmt, ConstraintsSetStmt) ||
-    /* efficiency hacks from here down */
-          IsA(utilityStmt, FetchStmt) ||
-          IsA(utilityStmt, ListenStmt) ||
-          IsA(utilityStmt, NotifyStmt) ||
-          IsA(utilityStmt, UnlistenStmt) ||
-          IsA(utilityStmt, CheckPointStmt)))
+    if (PlannedStmtRequiresSnapshot(pstmt))
     {
-        snapshot = GetTransactionSnapshot();
+        Snapshot    snapshot = GetTransactionSnapshot();
+
         /* If told to, register the snapshot we're using and save in portal */
         if (setHoldSnapshot)
         {
             snapshot = RegisterSnapshot(snapshot);
             portal->holdSnapshot = snapshot;
         }
+        /* In any case, make the snapshot active and remember it in portal */
         PushActiveSnapshot(snapshot);
         /* PushActiveSnapshot might have copied the snapshot */
-        snapshot = GetActiveSnapshot();
+        portal->portalSnapshot = GetActiveSnapshot();
     }
     else
-        snapshot = NULL;
+        portal->portalSnapshot = NULL;

     ProcessUtility(pstmt,
                    portal->sourceText,
@@ -1168,13 +1156,17 @@ PortalRunUtility(Portal portal, PlannedStmt *pstmt,
     MemoryContextSwitchTo(portal->portalContext);

     /*
-     * Some utility commands may pop the ActiveSnapshot stack from under us,
-     * so be careful to only pop the stack if our snapshot is still at the
-     * top.
+     * Some utility commands (e.g., VACUUM) pop the ActiveSnapshot stack from
+     * under us, so don't complain if it's now empty.  Otherwise, our snapshot
+     * should be the top one; pop it.  Note that this could be a different
+     * snapshot from the one we made above; see EnsurePortalSnapshotExists.
      */
-    if (snapshot != NULL && ActiveSnapshotSet() &&
-        snapshot == GetActiveSnapshot())
+    if (portal->portalSnapshot != NULL && ActiveSnapshotSet())
+    {
+        Assert(portal->portalSnapshot == GetActiveSnapshot());
         PopActiveSnapshot();
+    }
+    portal->portalSnapshot = NULL;
 }

 /*
@@ -1256,6 +1248,12 @@ PortalRunMulti(Portal portal,
                  * from what holdSnapshot has.)
                  */
                 PushCopiedSnapshot(snapshot);
+
+                /*
+                 * As for PORTAL_ONE_SELECT portals, it does not seem
+                 * necessary to maintain portal->portalSnapshot here.
+                 */
+
                 active_snapshot_set = true;
             }
             else
@@ -1692,3 +1690,78 @@ DoPortalRewind(Portal portal)
     portal->atEnd = false;
     portal->portalPos = 0;
 }
+
+/*
+ * PlannedStmtRequiresSnapshot - what it says on the tin
+ */
+bool
+PlannedStmtRequiresSnapshot(PlannedStmt *pstmt)
+{
+    Node       *utilityStmt = pstmt->utilityStmt;
+
+    /* If it's not a utility statement, it definitely needs a snapshot */
+    if (utilityStmt == NULL)
+        return true;
+
+    /*
+     * Most utility statements need a snapshot, and the default presumption
+     * about new ones should be that they do too.  Hence, enumerate those that
+     * do not need one.
+     *
+     * Transaction control, LOCK, and SET must *not* set a snapshot, since
+     * they need to be executable at the start of a transaction-snapshot-mode
+     * transaction without freezing a snapshot.  By extension we allow SHOW
+     * not to set a snapshot.  The other stmts listed are just efficiency
+     * hacks.  Beware of listing anything that can modify the database --- if,
+     * say, it has to update an index with expressions that invoke
+     * user-defined functions, then it had better have a snapshot.
+     */
+    if (IsA(utilityStmt, TransactionStmt) ||
+        IsA(utilityStmt, LockStmt) ||
+        IsA(utilityStmt, VariableSetStmt) ||
+        IsA(utilityStmt, VariableShowStmt) ||
+        IsA(utilityStmt, ConstraintsSetStmt) ||
+    /* efficiency hacks from here down */
+        IsA(utilityStmt, FetchStmt) ||
+        IsA(utilityStmt, ListenStmt) ||
+        IsA(utilityStmt, NotifyStmt) ||
+        IsA(utilityStmt, UnlistenStmt) ||
+        IsA(utilityStmt, CheckPointStmt))
+        return false;
+
+    return true;
+}
+
+/*
+ * EnsurePortalSnapshotExists - recreate Portal-level snapshot, if needed
+ *
+ * Generally, we will have an active snapshot whenever we are executing
+ * inside a Portal, unless the Portal's query is one of the utility
+ * statements exempted from that rule (see PlannedStmtRequiresSnapshot).
+ * However, procedures and DO blocks can commit or abort the transaction,
+ * and thereby destroy all snapshots.  This function can be called to
+ * re-establish the Portal-level snapshot when none exists.
+ */
+void
+EnsurePortalSnapshotExists(void)
+{
+    Portal        portal;
+
+    /*
+     * Nothing to do if a snapshot is set.  (We take it on faith that the
+     * outermost active snapshot belongs to some Portal; or if there is no
+     * Portal, it's somebody else's responsibility to manage things.)
+     */
+    if (ActiveSnapshotSet())
+        return;
+
+    /* Otherwise, we'd better have an active Portal */
+    portal = ActivePortal;
+    Assert(portal != NULL);
+    Assert(portal->portalSnapshot == NULL);
+
+    /* Create a new snapshot and make it active */
+    PushActiveSnapshot(GetTransactionSnapshot());
+    /* PushActiveSnapshot might have copied the snapshot */
+    portal->portalSnapshot = GetActiveSnapshot();
+}
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index 66e3181815..5c30e141f5 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -502,6 +502,9 @@ PortalDrop(Portal portal, bool isTopCommit)
         portal->cleanup = NULL;
     }

+    /* There shouldn't be an active snapshot anymore, except after error */
+    Assert(portal->portalSnapshot == NULL || !isTopCommit);
+
     /*
      * Remove portal from hash table.  Because we do this here, we will not
      * come back to try to remove the portal again if there's any error in the
@@ -709,6 +712,8 @@ PreCommit_Portals(bool isPrepare)
                 portal->holdSnapshot = NULL;
             }
             portal->resowner = NULL;
+            /* Clear portalSnapshot too, for cleanliness */
+            portal->portalSnapshot = NULL;
             continue;
         }

@@ -1278,3 +1283,54 @@ HoldPinnedPortals(void)
         }
     }
 }
+
+/*
+ * Drop the outer active snapshots for all portals, so that no snapshots
+ * remain active.
+ *
+ * Like HoldPinnedPortals, this must be called when initiating a COMMIT or
+ * ROLLBACK inside a procedure.  This has to be separate from that since it
+ * should not be run until we're done with steps that are likely to fail.
+ *
+ * It's tempting to fold this into PreCommit_Portals, but to do so, we'd
+ * need to clean up snapshot management in VACUUM and perhaps other places.
+ */
+void
+ForgetPortalSnapshots(void)
+{
+    HASH_SEQ_STATUS status;
+    PortalHashEnt *hentry;
+    int            numPortalSnaps = 0;
+    int            numActiveSnaps = 0;
+
+    /* First, scan PortalHashTable and clear portalSnapshot fields */
+    hash_seq_init(&status, PortalHashTable);
+
+    while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL)
+    {
+        Portal        portal = hentry->portal;
+
+        if (portal->portalSnapshot != NULL)
+        {
+            portal->portalSnapshot = NULL;
+            numPortalSnaps++;
+        }
+        /* portal->holdSnapshot will be cleaned up in PreCommit_Portals */
+    }
+
+    /*
+     * Now, pop all the active snapshots, which should be just those that were
+     * portal snapshots.  Ideally we'd drive this directly off the portal
+     * scan, but there's no good way to visit the portals in the correct
+     * order.  So just cross-check after the fact.
+     */
+    while (ActiveSnapshotSet())
+    {
+        PopActiveSnapshot();
+        numActiveSnaps++;
+    }
+
+    if (numPortalSnaps != numActiveSnaps)
+        elog(ERROR, "portal snapshots (%d) did not account for all active snapshots (%d)",
+             numPortalSnaps, numActiveSnaps);
+}
diff --git a/src/include/tcop/pquery.h b/src/include/tcop/pquery.h
index ed2c4d374b..2318f04ff0 100644
--- a/src/include/tcop/pquery.h
+++ b/src/include/tcop/pquery.h
@@ -17,6 +17,8 @@
 #include "nodes/parsenodes.h"
 #include "utils/portal.h"

+struct PlannedStmt;                /* avoid including plannodes.h here */
+

 extern PGDLLIMPORT Portal ActivePortal;

@@ -42,4 +44,8 @@ extern uint64 PortalRunFetch(Portal portal,
                              long count,
                              DestReceiver *dest);

+extern bool PlannedStmtRequiresSnapshot(struct PlannedStmt *pstmt);
+
+extern void EnsurePortalSnapshotExists(void);
+
 #endif                            /* PQUERY_H */
diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h
index 3c17b039cc..2e5bbdd0ec 100644
--- a/src/include/utils/portal.h
+++ b/src/include/utils/portal.h
@@ -160,6 +160,14 @@ typedef struct PortalData
     /* and these are the format codes to use for the columns: */
     int16       *formats;        /* a format code for each column */

+    /*
+     * Outermost ActiveSnapshot for execution of the portal's queries.  For
+     * all but a few utility commands, we require such a snapshot to exist.
+     * This ensures that TOAST references in query results can be detoasted,
+     * and helps to reduce thrashing of the process's exposed xmin.
+     */
+    Snapshot    portalSnapshot; /* active snapshot, or NULL if none */
+
     /*
      * Where we store tuples for a held cursor or a PORTAL_ONE_RETURNING or
      * PORTAL_UTIL_SELECT query.  (A cursor held past the end of its
@@ -237,5 +245,6 @@ extern void PortalCreateHoldStore(Portal portal);
 extern void PortalHashTableDeleteAll(void);
 extern bool ThereAreNoReadyPortals(void);
 extern void HoldPinnedPortals(void);
+extern void ForgetPortalSnapshots(void);

 #endif                            /* PORTAL_H */
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index b4c70aaa7f..5b0f698ac7 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -2159,7 +2159,6 @@ exec_stmt_call(PLpgSQL_execstate *estate, PLpgSQL_stmt_call *stmt)
     PLpgSQL_expr *expr = stmt->expr;
     LocalTransactionId before_lxid;
     LocalTransactionId after_lxid;
-    bool        pushed_active_snap = false;
     ParamListInfo paramLI;
     SPIExecuteOptions options;
     int            rc;
@@ -2189,22 +2188,13 @@ exec_stmt_call(PLpgSQL_execstate *estate, PLpgSQL_stmt_call *stmt)

     before_lxid = MyProc->lxid;

-    /*
-     * The procedure call could end transactions, which would upset the
-     * snapshot management in SPI_execute*, so handle snapshots here instead.
-     * Set a snapshot only for non-read-only procedures, similar to SPI
-     * behavior.
-     */
-    if (!estate->readonly_func)
-    {
-        PushActiveSnapshot(GetTransactionSnapshot());
-        pushed_active_snap = true;
-    }
-
     /*
      * If we have a procedure-lifespan resowner, use that to hold the refcount
      * for the plan.  This avoids refcount leakage complaints if the called
      * procedure ends the current transaction.
+     *
+     * Also, disable SPI's snapshot management; this is a hacky way of telling
+     * it to allow non-atomicness to propagate to the callee.
      */
     memset(&options, 0, sizeof(options));
     options.params = paramLI;
@@ -2220,17 +2210,7 @@ exec_stmt_call(PLpgSQL_execstate *estate, PLpgSQL_stmt_call *stmt)

     after_lxid = MyProc->lxid;

-    if (before_lxid == after_lxid)
-    {
-        /*
-         * If we are still in the same transaction after the call, pop the
-         * snapshot that we might have pushed.  (If it's a new transaction,
-         * then all the snapshots are gone already.)
-         */
-        if (pushed_active_snap)
-            PopActiveSnapshot();
-    }
-    else
+    if (before_lxid != after_lxid)
     {
         /*
          * If we are in a new transaction after the call, we need to build new
@@ -4954,6 +4934,7 @@ exec_stmt_rollback(PLpgSQL_execstate *estate, PLpgSQL_stmt_rollback *stmt)
  *
  * We just parse and execute the statement normally, but we have to do it
  * without setting a snapshot, for things like SET TRANSACTION.
+ * XXX spi.c now handles this correctly, so we no longer need a special case.
  */
 static int
 exec_stmt_set(PLpgSQL_execstate *estate, PLpgSQL_stmt_set *stmt)
@@ -4967,7 +4948,6 @@ exec_stmt_set(PLpgSQL_execstate *estate, PLpgSQL_stmt_set *stmt)

     memset(&options, 0, sizeof(options));
     options.read_only = estate->readonly_func;
-    options.no_snapshots = true;    /* disable SPI's snapshot management */

     rc = SPI_execute_plan_extended(expr->plan, &options);

diff --git a/src/test/isolation/expected/plpgsql-toast.out b/src/test/isolation/expected/plpgsql-toast.out
index fc557da5e7..61a33c113c 100644
--- a/src/test/isolation/expected/plpgsql-toast.out
+++ b/src/test/isolation/expected/plpgsql-toast.out
@@ -192,3 +192,30 @@ pg_advisory_unlock
 t
 s1: NOTICE:  length(r) = 6002
 step assign5: <... completed>
+
+starting permutation: fetch-after-commit
+pg_advisory_unlock_all
+
+
+pg_advisory_unlock_all
+
+
+s1: NOTICE:  length(t) = 6000
+s1: NOTICE:  length(t) = 9000
+s1: NOTICE:  length(t) = 12000
+step fetch-after-commit:
+do $$
+  declare
+    r record;
+    t text;
+  begin
+    insert into test1 values (2, repeat('bar', 3000));
+    insert into test1 values (3, repeat('baz', 4000));
+    for r in select test1.a from test1 loop
+      commit;
+      select b into t from test1 where a = r.a;
+      raise notice 'length(t) = %', length(t);
+    end loop;
+  end;
+$$;
+
diff --git a/src/test/isolation/specs/plpgsql-toast.spec b/src/test/isolation/specs/plpgsql-toast.spec
index fe7090addb..8806e3e6dd 100644
--- a/src/test/isolation/specs/plpgsql-toast.spec
+++ b/src/test/isolation/specs/plpgsql-toast.spec
@@ -112,6 +112,26 @@ do $$
 $$;
 }

+# Check that the results of a query can be detoasted just after committing
+# (there's no interaction with VACUUM here)
+step "fetch-after-commit"
+{
+do $$
+  declare
+    r record;
+    t text;
+  begin
+    insert into test1 values (2, repeat('bar', 3000));
+    insert into test1 values (3, repeat('baz', 4000));
+    for r in select test1.a from test1 loop
+      commit;
+      select b into t from test1 where a = r.a;
+      raise notice 'length(t) = %', length(t);
+    end loop;
+  end;
+$$;
+}
+
 session "s2"
 setup
 {
@@ -135,3 +155,4 @@ permutation "lock" "assign2" "vacuum" "unlock"
 permutation "lock" "assign3" "vacuum" "unlock"
 permutation "lock" "assign4" "vacuum" "unlock"
 permutation "lock" "assign5" "vacuum" "unlock"
+permutation "fetch-after-commit"