Re: Fwd: Core dump with nested CREATE TEMP TABLE - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Fwd: Core dump with nested CREATE TEMP TABLE
Date
Msg-id 28085.1441335851@sss.pgh.pa.us
Whole thread Raw
In response to Re: Fwd: Core dump with nested CREATE TEMP TABLE  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: Fwd: Core dump with nested CREATE TEMP TABLE  (Michael Paquier <michael.paquier@gmail.com>)
Re: Fwd: Core dump with nested CREATE TEMP TABLE  (Noah Misch <noah@leadboat.com>)
List pgsql-hackers
I wrote:
> Hmm.  I am not feeling especially comfortable about this: it's not clear
> that there's anything preventing a suspended portal from containing a
> dangerous reference.  However, a quick trial did not show that it was
> possible to break it --- in the cases I tried, we detected that a cached
> plan was no longer valid, tried to rebuild it, and noticed the missing
> object at that point.  So maybe it's OK.

After further thought I concluded that this probably is safe.  The
portal's original query was created and planned when it was opened,
so it cannot contain any references to objects of the failed
subtransaction.  We have a hazard from queries within functions,
but if the portal is suspended then no such functions are in progress.

Attached is an updated patch with comments to this effect and some
minor other code cleanup (mainly, not assuming that CurrentResourceOwner
is the right thing to use in AtSubAbort_Portals).

            regards, tom lane

diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b53d95f..ae9e850 100644
*** a/src/backend/access/transam/xact.c
--- b/src/backend/access/transam/xact.c
*************** AbortSubTransaction(void)
*** 4585,4590 ****
--- 4585,4591 ----
          AfterTriggerEndSubXact(false);
          AtSubAbort_Portals(s->subTransactionId,
                             s->parent->subTransactionId,
+                            s->curTransactionOwner,
                             s->parent->curTransactionOwner);
          AtEOSubXact_LargeObject(false, s->subTransactionId,
                                  s->parent->subTransactionId);
diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c
index 2794537..327b2a5 100644
*** a/src/backend/commands/portalcmds.c
--- b/src/backend/commands/portalcmds.c
*************** PersistHoldablePortal(Portal portal)
*** 335,345 ****
      /*
       * Check for improper portal use, and mark portal active.
       */
!     if (portal->status != PORTAL_READY)
!         ereport(ERROR,
!                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
!                  errmsg("portal \"%s\" cannot be run", portal->name)));
!     portal->status = PORTAL_ACTIVE;

      /*
       * Set up global portal context pointers.
--- 335,341 ----
      /*
       * Check for improper portal use, and mark portal active.
       */
!     MarkPortalActive(portal);

      /*
       * Set up global portal context pointers.
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 9c14e8a..0df86a2 100644
*** a/src/backend/tcop/pquery.c
--- b/src/backend/tcop/pquery.c
*************** PortalRun(Portal portal, long count, boo
*** 734,744 ****
      /*
       * Check for improper portal use, and mark portal active.
       */
!     if (portal->status != PORTAL_READY)
!         ereport(ERROR,
!                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
!                  errmsg("portal \"%s\" cannot be run", portal->name)));
!     portal->status = PORTAL_ACTIVE;

      /*
       * Set up global portal context pointers.
--- 734,740 ----
      /*
       * Check for improper portal use, and mark portal active.
       */
!     MarkPortalActive(portal);

      /*
       * Set up global portal context pointers.
*************** PortalRunFetch(Portal portal,
*** 1398,1408 ****
      /*
       * Check for improper portal use, and mark portal active.
       */
!     if (portal->status != PORTAL_READY)
!         ereport(ERROR,
!                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
!                  errmsg("portal \"%s\" cannot be run", portal->name)));
!     portal->status = PORTAL_ACTIVE;

      /*
       * Set up global portal context pointers.
--- 1394,1400 ----
      /*
       * Check for improper portal use, and mark portal active.
       */
!     MarkPortalActive(portal);

      /*
       * Set up global portal context pointers.
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index 7530498..44a87f7 100644
*** a/src/backend/utils/mmgr/portalmem.c
--- b/src/backend/utils/mmgr/portalmem.c
*************** CreatePortal(const char *name, bool allo
*** 232,237 ****
--- 232,238 ----
      portal->status = PORTAL_NEW;
      portal->cleanup = PortalCleanup;
      portal->createSubid = GetCurrentSubTransactionId();
+     portal->activeSubid = portal->createSubid;
      portal->strategy = PORTAL_MULTI_QUERY;
      portal->cursorOptions = CURSOR_OPT_NO_SCROLL;
      portal->atStart = true;
*************** UnpinPortal(Portal portal)
*** 403,408 ****
--- 404,428 ----
  }

  /*
+  * MarkPortalActive
+  *        Transition a portal from READY to ACTIVE state.
+  *
+  * NOTE: never set portal->status = PORTAL_ACTIVE directly; call this instead.
+  */
+ void
+ MarkPortalActive(Portal portal)
+ {
+     /* For safety, this is a runtime test not just an Assert */
+     if (portal->status != PORTAL_READY)
+         ereport(ERROR,
+                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                  errmsg("portal \"%s\" cannot be run", portal->name)));
+     /* Perform the state transition */
+     portal->status = PORTAL_ACTIVE;
+     portal->activeSubid = GetCurrentSubTransactionId();
+ }
+
+ /*
   * MarkPortalDone
   *        Transition a portal from ACTIVE to DONE state.
   *
*************** PreCommit_Portals(bool isPrepare)
*** 690,695 ****
--- 710,716 ----
               * not belonging to this transaction.
               */
              portal->createSubid = InvalidSubTransactionId;
+             portal->activeSubid = InvalidSubTransactionId;

              /* Report we changed state */
              result = true;
*************** AtCleanup_Portals(void)
*** 836,843 ****
  /*
   * Pre-subcommit processing for portals.
   *
!  * Reassign the portals created in the current subtransaction to the parent
!  * subtransaction.
   */
  void
  AtSubCommit_Portals(SubTransactionId mySubid,
--- 857,864 ----
  /*
   * Pre-subcommit processing for portals.
   *
!  * Reassign portals created or used in the current subtransaction to the
!  * parent subtransaction.
   */
  void
  AtSubCommit_Portals(SubTransactionId mySubid,
*************** AtSubCommit_Portals(SubTransactionId myS
*** 859,872 ****
              if (portal->resowner)
                  ResourceOwnerNewParent(portal->resowner, parentXactOwner);
          }
      }
  }

  /*
   * Subtransaction abort handling for portals.
   *
!  * Deactivate portals created during the failed subtransaction.
!  * Note that per AtSubCommit_Portals, this will catch portals created
   * in descendants of the subtransaction too.
   *
   * We don't destroy any portals here; that's done in AtSubCleanup_Portals.
--- 880,895 ----
              if (portal->resowner)
                  ResourceOwnerNewParent(portal->resowner, parentXactOwner);
          }
+         if (portal->activeSubid == mySubid)
+             portal->activeSubid = parentSubid;
      }
  }

  /*
   * Subtransaction abort handling for portals.
   *
!  * Deactivate portals created or used during the failed subtransaction.
!  * Note that per AtSubCommit_Portals, this will catch portals created/used
   * in descendants of the subtransaction too.
   *
   * We don't destroy any portals here; that's done in AtSubCleanup_Portals.
*************** AtSubCommit_Portals(SubTransactionId myS
*** 874,879 ****
--- 897,903 ----
  void
  AtSubAbort_Portals(SubTransactionId mySubid,
                     SubTransactionId parentSubid,
+                    ResourceOwner myXactOwner,
                     ResourceOwner parentXactOwner)
  {
      HASH_SEQ_STATUS status;
*************** AtSubAbort_Portals(SubTransactionId mySu
*** 885,900 ****
      {
          Portal        portal = hentry->portal;

          if (portal->createSubid != mySubid)
              continue;

          /*
           * Force any live portals of my own subtransaction into FAILED state.
           * We have to do this because they might refer to objects created or
!          * changed in the failed subtransaction, leading to crashes if
!          * execution is resumed, or even if we just try to run ExecutorEnd.
!          * (Note we do NOT do this to upper-level portals, since they cannot
!          * have such references and hence may be able to continue.)
           */
          if (portal->status == PORTAL_READY ||
              portal->status == PORTAL_ACTIVE)
--- 909,966 ----
      {
          Portal        portal = hentry->portal;

+         /* Was it created in this subtransaction? */
          if (portal->createSubid != mySubid)
+         {
+             /* No, but maybe it was used in this subtransaction? */
+             if (portal->activeSubid == mySubid)
+             {
+                 /* Maintain activeSubid until the portal is removed */
+                 portal->activeSubid = parentSubid;
+
+                 /*
+                  * Upper-level portals that failed while running in this
+                  * subtransaction must be forced into FAILED state, for the
+                  * same reasons discussed below.
+                  *
+                  * We assume we can get away without forcing upper-level READY
+                  * portals to fail, even if they were run and then suspended.
+                  * In theory a suspended upper-level portal could have
+                  * acquired some references to objects that are about to be
+                  * destroyed, but there should be sufficient defenses against
+                  * such cases: the portal's original query cannot contain such
+                  * references, and any references within, say, cached plans of
+                  * PL/pgSQL functions are not from active queries and should
+                  * be protected by revalidation logic.
+                  */
+                 if (portal->status == PORTAL_ACTIVE)
+                     MarkPortalFailed(portal);
+
+                 /*
+                  * Also, if we failed it during the current subtransaction
+                  * (either just above, or earlier), reattach its resource
+                  * owner to the current subtransaction's resource owner, so
+                  * that any resources it still holds will be released while
+                  * cleaning up this subtransaction.  This prevents some corner
+                  * cases wherein we might get Asserts or worse while cleaning
+                  * up objects created during the current subtransaction
+                  * (because they're still referenced within this portal).
+                  */
+                 if (portal->status == PORTAL_FAILED && portal->resowner)
+                 {
+                     ResourceOwnerNewParent(portal->resowner, myXactOwner);
+                     portal->resowner = NULL;
+                 }
+             }
+             /* Done if it wasn't created in this subtransaction */
              continue;
+         }

          /*
           * Force any live portals of my own subtransaction into FAILED state.
           * We have to do this because they might refer to objects created or
!          * changed in the failed subtransaction, leading to crashes within
!          * ExecutorEnd when portalcmds.c tries to close down the portal.
           */
          if (portal->status == PORTAL_READY ||
              portal->status == PORTAL_ACTIVE)
diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h
index 1267b9e..c369107 100644
*** a/src/include/utils/portal.h
--- b/src/include/utils/portal.h
*************** typedef struct PortalData
*** 119,130 ****
      MemoryContext heap;            /* subsidiary memory for portal */
      ResourceOwner resowner;        /* resources owned by portal */
      void        (*cleanup) (Portal portal);        /* cleanup hook */
-     SubTransactionId createSubid;        /* the ID of the creating subxact */

      /*
!      * if createSubid is InvalidSubTransactionId, the portal is held over from
!      * a previous transaction
       */

      /* The query or queries the portal will execute */
      const char *sourceText;        /* text of query (as of 8.4, never NULL) */
--- 119,134 ----
      MemoryContext heap;            /* subsidiary memory for portal */
      ResourceOwner resowner;        /* resources owned by portal */
      void        (*cleanup) (Portal portal);        /* cleanup hook */

      /*
!      * State data for remembering which subtransaction(s) the portal was
!      * created or used in.  If the portal is held over from a previous
!      * transaction, both subxids are InvalidSubTransactionId.  Otherwise,
!      * createSubid is the creating subxact and activeSubid is the last subxact
!      * in which we ran the portal.
       */
+     SubTransactionId createSubid;        /* the creating subxact */
+     SubTransactionId activeSubid;        /* the last subxact with activity */

      /* The query or queries the portal will execute */
      const char *sourceText;        /* text of query (as of 8.4, never NULL) */
*************** extern void AtSubCommit_Portals(SubTrans
*** 201,212 ****
--- 205,218 ----
                      ResourceOwner parentXactOwner);
  extern void AtSubAbort_Portals(SubTransactionId mySubid,
                     SubTransactionId parentSubid,
+                    ResourceOwner myXactOwner,
                     ResourceOwner parentXactOwner);
  extern void AtSubCleanup_Portals(SubTransactionId mySubid);
  extern Portal CreatePortal(const char *name, bool allowDup, bool dupSilent);
  extern Portal CreateNewPortal(void);
  extern void PinPortal(Portal portal);
  extern void UnpinPortal(Portal portal);
+ extern void MarkPortalActive(Portal portal);
  extern void MarkPortalDone(Portal portal);
  extern void MarkPortalFailed(Portal portal);
  extern void PortalDrop(Portal portal, bool isTopCommit);
diff --git a/src/test/regress/expected/transactions.out b/src/test/regress/expected/transactions.out
index 5d70863..d9b702d 100644
*** a/src/test/regress/expected/transactions.out
--- b/src/test/regress/expected/transactions.out
*************** fetch from foo;
*** 613,618 ****
--- 613,664 ----
  (1 row)

  abort;
+ -- Test for proper cleanup after a failure in a cursor portal
+ -- that was created in an outer subtransaction
+ CREATE FUNCTION invert(x float8) RETURNS float8 LANGUAGE plpgsql AS
+ $$ begin return 1/x; end $$;
+ CREATE FUNCTION create_temp_tab() RETURNS text
+ LANGUAGE plpgsql AS $$
+ BEGIN
+   CREATE TEMP TABLE new_table (f1 float8);
+   -- case of interest is that we fail while holding an open
+   -- relcache reference to new_table
+   INSERT INTO new_table SELECT invert(0.0);
+   RETURN 'foo';
+ END $$;
+ BEGIN;
+ DECLARE ok CURSOR FOR SELECT * FROM int8_tbl;
+ DECLARE ctt CURSOR FOR SELECT create_temp_tab();
+ FETCH ok;
+  q1  | q2
+ -----+-----
+  123 | 456
+ (1 row)
+
+ SAVEPOINT s1;
+ FETCH ok;  -- should work
+  q1  |        q2
+ -----+------------------
+  123 | 4567890123456789
+ (1 row)
+
+ FETCH ctt; -- error occurs here
+ ERROR:  division by zero
+ CONTEXT:  PL/pgSQL function invert(double precision) line 1 at RETURN
+ SQL statement "INSERT INTO new_table SELECT invert(0.0)"
+ PL/pgSQL function create_temp_tab() line 6 at SQL statement
+ ROLLBACK TO s1;
+ FETCH ok;  -- should work
+         q1        | q2
+ ------------------+-----
+  4567890123456789 | 123
+ (1 row)
+
+ FETCH ctt; -- must be rejected
+ ERROR:  portal "ctt" cannot be run
+ COMMIT;
+ DROP FUNCTION create_temp_tab();
+ DROP FUNCTION invert(x float8);
  -- Test for successful cleanup of an aborted transaction at session exit.
  -- THIS MUST BE THE LAST TEST IN THIS FILE.
  begin;
diff --git a/src/test/regress/sql/transactions.sql b/src/test/regress/sql/transactions.sql
index 9fac4a3..bf9cb05 100644
*** a/src/test/regress/sql/transactions.sql
--- b/src/test/regress/sql/transactions.sql
*************** fetch from foo;
*** 387,392 ****
--- 387,424 ----

  abort;

+
+ -- Test for proper cleanup after a failure in a cursor portal
+ -- that was created in an outer subtransaction
+ CREATE FUNCTION invert(x float8) RETURNS float8 LANGUAGE plpgsql AS
+ $$ begin return 1/x; end $$;
+
+ CREATE FUNCTION create_temp_tab() RETURNS text
+ LANGUAGE plpgsql AS $$
+ BEGIN
+   CREATE TEMP TABLE new_table (f1 float8);
+   -- case of interest is that we fail while holding an open
+   -- relcache reference to new_table
+   INSERT INTO new_table SELECT invert(0.0);
+   RETURN 'foo';
+ END $$;
+
+ BEGIN;
+ DECLARE ok CURSOR FOR SELECT * FROM int8_tbl;
+ DECLARE ctt CURSOR FOR SELECT create_temp_tab();
+ FETCH ok;
+ SAVEPOINT s1;
+ FETCH ok;  -- should work
+ FETCH ctt; -- error occurs here
+ ROLLBACK TO s1;
+ FETCH ok;  -- should work
+ FETCH ctt; -- must be rejected
+ COMMIT;
+
+ DROP FUNCTION create_temp_tab();
+ DROP FUNCTION invert(x float8);
+
+
  -- Test for successful cleanup of an aborted transaction at session exit.
  -- THIS MUST BE THE LAST TEST IN THIS FILE.


pgsql-hackers by date:

Previous
From: Alvaro Herrera
Date:
Subject: Re: BRIN INDEX value
Next
From: "Shulgin, Oleksandr"
Date:
Subject: Re: On-demand running query plans using auto_explain and signals