Re: smgrsettransient mechanism is full of bugs - Mailing list pgsql-hackers

From Tom Lane
Subject Re: smgrsettransient mechanism is full of bugs
Date
Msg-id 3832.1350329930@sss.pgh.pa.us
Whole thread Raw
In response to smgrsettransient mechanism is full of bugs  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
I wrote:
> I got a bit suspicious of the transient-file mechanism introduced in
> commit fba105b1099f4f5fa7283bb17cba6fed2baa8d0c after noticing that
> ...
> I believe that we probably ought to revert this mechanism entirely, and
> build a new implementation based on these concepts:
> * An SMgrRelation is transient if and only if it doesn't have an
> "owning" relcache entry.  Keep a list of all such SmgrRelations, and
> close them all at transaction end.  (Obviously, an SMgrRelation gets
> removed from the list if it acquires an owner mid-transaction.)
> * There's no such concept as FD_XACT_TRANSIENT at the fd.c level.
> Rather, we close and delete the VFD entry when told to by SmgrRelation
> closure.

Attached is a draft patch for that, presented in two parts: the first
part just reverts commit fba105b1099f4f5fa7283bb17cba6fed2baa8d0c,
and the second part installs the new mechanism.  I'm reasonably pleased
with the way this turned out; I think it's cleaner as well as more
reliable than the previous patch.

The list-management code could possibly be replaced with slist once that
patch goes in, but since this needs to be back-patched, I didn't
consider that for the moment.

Comments?

            regards, tom lane

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 56095b32501efef651b0e650c5938de9b16d46f9..bdcbe47ac9e9f5ce0db5f90b8ddb567bf34ade00 100644
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
*************** BufferGetTag(Buffer buffer, RelFileNode
*** 1882,1891 ****
   * written.)
   *
   * If the caller has an smgr reference for the buffer's relation, pass it
!  * as the second parameter.  If not, pass NULL.  In the latter case, the
!  * relation will be marked as "transient" so that the corresponding
!  * kernel-level file descriptors are closed when the current transaction ends,
!  * if any.
   */
  static void
  FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
--- 1882,1888 ----
   * written.)
   *
   * If the caller has an smgr reference for the buffer's relation, pass it
!  * as the second parameter.  If not, pass NULL.
   */
  static void
  FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 1909,1920 ****
      errcontext.previous = error_context_stack;
      error_context_stack = &errcontext;

!     /* Find smgr relation for buffer, and mark it as transient */
      if (reln == NULL)
-     {
          reln = smgropen(buf->tag.rnode, InvalidBackendId);
-         smgrsettransient(reln);
-     }

      TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
                                          buf->tag.blockNum,
--- 1906,1914 ----
      errcontext.previous = error_context_stack;
      error_context_stack = &errcontext;

!     /* Find smgr relation for buffer */
      if (reln == NULL)
          reln = smgropen(buf->tag.rnode, InvalidBackendId);

      TRACE_POSTGRESQL_BUFFER_FLUSH_START(buf->tag.forkNum,
                                          buf->tag.blockNum,
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index fed25fd7e7cdda9aaa2c5b646f2e40cf50f000ed..ecb62ba01aeb968aedab3260d95ce07a44aa61fb 100644
*** a/src/backend/storage/file/fd.c
--- b/src/backend/storage/file/fd.c
*************** int            max_safe_fds = 32;    /* default if n
*** 126,133 ****
  /* these are the assigned bits in fdstate below: */
  #define FD_TEMPORARY        (1 << 0)    /* T = delete when closed */
  #define FD_XACT_TEMPORARY    (1 << 1)    /* T = delete at eoXact */
- #define FD_XACT_TRANSIENT    (1 << 2)    /* T = close (not delete) at aoXact,
-                                          * but keep VFD */

  typedef struct vfd
  {
--- 126,131 ----
*************** static Size SizeVfdCache = 0;
*** 158,165 ****
   */
  static int    nfile = 0;

! /* True if there are files to close/delete at end of transaction */
! static bool have_pending_fd_cleanup = false;

  /*
   * Tracks the total size of all temporary files.  Note: when temp_file_limit
--- 156,166 ----
   */
  static int    nfile = 0;

! /*
!  * Flag to tell whether it's worth scanning VfdCache looking for temp files
!  * to close
!  */
! static bool have_xact_temporary_files = false;

  /*
   * Tracks the total size of all temporary files.  Note: when temp_file_limit
*************** LruDelete(File file)
*** 607,613 ****
      Vfd           *vfdP;

      Assert(file != 0);
-     Assert(!FileIsNotOpen(file));

      DO_DB(elog(LOG, "LruDelete %d (%s)",
                 file, VfdCache[file].fileName));
--- 608,613 ----
*************** OpenTemporaryFile(bool interXact)
*** 971,977 ****
          VfdCache[file].resowner = CurrentResourceOwner;

          /* ensure cleanup happens at eoxact */
!         have_pending_fd_cleanup = true;
      }

      return file;
--- 971,977 ----
          VfdCache[file].resowner = CurrentResourceOwner;

          /* ensure cleanup happens at eoxact */
!         have_xact_temporary_files = true;
      }

      return file;
*************** OpenTemporaryFileInTablespace(Oid tblspc
*** 1045,1069 ****
  }

  /*
-  * Set the transient flag on a file
-  *
-  * This flag tells CleanupTempFiles to close the kernel-level file descriptor
-  * (but not the VFD itself) at end of transaction.
-  */
- void
- FileSetTransient(File file)
- {
-     Vfd           *vfdP;
-
-     Assert(FileIsValid(file));
-
-     vfdP = &VfdCache[file];
-     vfdP->fdstate |= FD_XACT_TRANSIENT;
-
-     have_pending_fd_cleanup = true;
- }
-
- /*
   * close a file when done with it
   */
  void
--- 1045,1050 ----
*************** AtEOSubXact_Files(bool isCommit, SubTran
*** 1863,1871 ****
   * particularly care which).  All still-open per-transaction temporary file
   * VFDs are closed, which also causes the underlying files to be deleted
   * (although they should've been closed already by the ResourceOwner
!  * cleanup). Transient files have their kernel file descriptors closed.
!  * Furthermore, all "allocated" stdio files are closed. We also forget any
!  * transaction-local temp tablespace list.
   */
  void
  AtEOXact_Files(void)
--- 1844,1851 ----
   * particularly care which).  All still-open per-transaction temporary file
   * VFDs are closed, which also causes the underlying files to be deleted
   * (although they should've been closed already by the ResourceOwner
!  * cleanup). Furthermore, all "allocated" stdio files are closed. We also
!  * forget any transaction-local temp tablespace list.
   */
  void
  AtEOXact_Files(void)
*************** AtProcExit_Files(int code, Datum arg)
*** 1888,1897 ****
  }

  /*
!  * General cleanup routine for fd.c.
!  *
!  * Temporary files are closed, and their underlying files deleted.
!  * Transient files are closed.
   *
   * isProcExit: if true, this is being called as the backend process is
   * exiting. If that's the case, we should remove all temporary files; if
--- 1868,1874 ----
  }

  /*
!  * Close temporary files and delete their underlying files.
   *
   * isProcExit: if true, this is being called as the backend process is
   * exiting. If that's the case, we should remove all temporary files; if
*************** CleanupTempFiles(bool isProcExit)
*** 1908,1958 ****
       * Careful here: at proc_exit we need extra cleanup, not just
       * xact_temporary files.
       */
!     if (isProcExit || have_pending_fd_cleanup)
      {
          Assert(FileIsNotOpen(0));        /* Make sure ring not corrupted */
          for (i = 1; i < SizeVfdCache; i++)
          {
              unsigned short fdstate = VfdCache[i].fdstate;

!             if (VfdCache[i].fileName != NULL)
              {
!                 if (fdstate & FD_TEMPORARY)
!                 {
!                     /*
!                      * If we're in the process of exiting a backend process,
!                      * close all temporary files. Otherwise, only close
!                      * temporary files local to the current transaction. They
!                      * should be closed by the ResourceOwner mechanism
!                      * already, so this is just a debugging cross-check.
!                      */
!                     if (isProcExit)
!                         FileClose(i);
!                     else if (fdstate & FD_XACT_TEMPORARY)
!                     {
!                         elog(WARNING,
!                         "temporary file %s not closed at end-of-transaction",
!                              VfdCache[i].fileName);
!                         FileClose(i);
!                     }
!                 }
!                 else if (fdstate & FD_XACT_TRANSIENT)
                  {
!                     /*
!                      * Close the FD, and remove the entry from the LRU ring,
!                      * but also remove the flag from the VFD.  This is to
!                      * ensure that if the VFD is reused in the future for
!                      * non-transient access, we don't close it inappropriately
!                      * then.
!                      */
!                     if (!FileIsNotOpen(i))
!                         LruDelete(i);
!                     VfdCache[i].fdstate &= ~FD_XACT_TRANSIENT;
                  }
              }
          }

!         have_pending_fd_cleanup = false;
      }

      /* Clean up "allocated" stdio files and dirs. */
--- 1885,1919 ----
       * Careful here: at proc_exit we need extra cleanup, not just
       * xact_temporary files.
       */
!     if (isProcExit || have_xact_temporary_files)
      {
          Assert(FileIsNotOpen(0));        /* Make sure ring not corrupted */
          for (i = 1; i < SizeVfdCache; i++)
          {
              unsigned short fdstate = VfdCache[i].fdstate;

!             if ((fdstate & FD_TEMPORARY) && VfdCache[i].fileName != NULL)
              {
!                 /*
!                  * If we're in the process of exiting a backend process, close
!                  * all temporary files. Otherwise, only close temporary files
!                  * local to the current transaction. They should be closed by
!                  * the ResourceOwner mechanism already, so this is just a
!                  * debugging cross-check.
!                  */
!                 if (isProcExit)
!                     FileClose(i);
!                 else if (fdstate & FD_XACT_TEMPORARY)
                  {
!                     elog(WARNING,
!                          "temporary file %s not closed at end-of-transaction",
!                          VfdCache[i].fileName);
!                     FileClose(i);
                  }
              }
          }

!         have_xact_temporary_files = false;
      }

      /* Clean up "allocated" stdio files and dirs. */
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 97742b92bb4ffa33db3554658511911955f47550..3f4ab49dab4a5a8d92c9673120f2d4964c29fdac 100644
*** a/src/backend/storage/smgr/md.c
--- b/src/backend/storage/smgr/md.c
*************** mdcreate(SMgrRelation reln, ForkNumber f
*** 300,308 ****

      pfree(path);

-     if (reln->smgr_transient)
-         FileSetTransient(fd);
-
      reln->md_fd[forkNum] = _fdvec_alloc();

      reln->md_fd[forkNum]->mdfd_vfd = fd;
--- 300,305 ----
*************** mdopen(SMgrRelation reln, ForkNumber for
*** 585,593 ****

      pfree(path);

-     if (reln->smgr_transient)
-         FileSetTransient(fd);
-
      reln->md_fd[forknum] = mdfd = _fdvec_alloc();

      mdfd->mdfd_vfd = fd;
--- 582,587 ----
*************** _mdfd_openseg(SMgrRelation reln, ForkNum
*** 1680,1688 ****
      if (fd < 0)
          return NULL;

-     if (reln->smgr_transient)
-         FileSetTransient(fd);
-
      /* allocate an mdfdvec entry for it */
      v = _fdvec_alloc();

--- 1674,1679 ----
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 0cec1477f3faf6551ad141b5107da1ea70b8b54b..6319d1e8589138be32e498e77cc4ce484211c1a1 100644
*** a/src/backend/storage/smgr/smgr.c
--- b/src/backend/storage/smgr/smgr.c
*************** smgropen(RelFileNode rnode, BackendId ba
*** 163,196 ****
          reln->smgr_targblock = InvalidBlockNumber;
          reln->smgr_fsm_nblocks = InvalidBlockNumber;
          reln->smgr_vm_nblocks = InvalidBlockNumber;
-         reln->smgr_transient = false;
          reln->smgr_which = 0;    /* we only have md.c at present */

          /* mark it not open */
          for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
              reln->md_fd[forknum] = NULL;
      }
-     else
-         /* if it was transient before, it no longer is */
-         reln->smgr_transient = false;

      return reln;
  }

  /*
-  * smgrsettransient() -- mark an SMgrRelation object as transaction-bound
-  *
-  * The main effect of this is that all opened files are marked to be
-  * kernel-level closed (but not necessarily VFD-closed) when the current
-  * transaction ends.
-  */
- void
- smgrsettransient(SMgrRelation reln)
- {
-     reln->smgr_transient = true;
- }
-
- /*
   * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object
   *
   * There can be only one owner at a time; this is sufficient since currently
--- 163,179 ----
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index bad9f10c62ec84351bf191eecde7ad99d433d867..849bb1025d9d483916784378e7beb8370e647e51 100644
*** a/src/include/storage/fd.h
--- b/src/include/storage/fd.h
*************** extern int    max_safe_fds;
*** 66,72 ****
  /* Operations on virtual Files --- equivalent to Unix kernel file ops */
  extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
  extern File OpenTemporaryFile(bool interXact);
- extern void FileSetTransient(File file);
  extern void FileClose(File file);
  extern int    FilePrefetch(File file, off_t offset, int amount);
  extern int    FileRead(File file, char *buffer, int amount);
--- 66,71 ----
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 3560d539076da83a3f2897f3109fcabc4b72d5d0..92b9198d5428c1f51617f595d062da1bc8af5106 100644
*** a/src/include/storage/smgr.h
--- b/src/include/storage/smgr.h
*************** typedef struct SMgrRelationData
*** 60,66 ****
       * submodules.    Do not touch them from elsewhere.
       */
      int            smgr_which;        /* storage manager selector */
-     bool        smgr_transient; /* T if files are to be closed at EOXact */

      /* for md.c; NULL for forks that are not open */
      struct _MdfdVec *md_fd[MAX_FORKNUM + 1];
--- 60,65 ----
*************** typedef SMgrRelationData *SMgrRelation;
*** 73,79 ****

  extern void smgrinit(void);
  extern SMgrRelation smgropen(RelFileNode rnode, BackendId backend);
- extern void smgrsettransient(SMgrRelation reln);
  extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
  extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
  extern void smgrclose(SMgrRelation reln);
--- 72,77 ----
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index e7a6606ec3b33d285ac032c18fb64e557abdf821..c24df3f38c2bbf2e0becfe0394cb92fd6b6566d1 100644
*** a/src/backend/access/transam/xact.c
--- b/src/backend/access/transam/xact.c
*************** CommitTransaction(void)
*** 1949,1955 ****
      AtEOXact_SPI(true);
      AtEOXact_on_commit_actions(true);
      AtEOXact_Namespace(true);
!     /* smgrcommit already done */
      AtEOXact_Files();
      AtEOXact_ComboCid();
      AtEOXact_HashTables(true);
--- 1949,1955 ----
      AtEOXact_SPI(true);
      AtEOXact_on_commit_actions(true);
      AtEOXact_Namespace(true);
!     AtEOXact_SMgr();
      AtEOXact_Files();
      AtEOXact_ComboCid();
      AtEOXact_HashTables(true);
*************** PrepareTransaction(void)
*** 2202,2208 ****
      AtEOXact_SPI(true);
      AtEOXact_on_commit_actions(true);
      AtEOXact_Namespace(true);
!     /* smgrcommit already done */
      AtEOXact_Files();
      AtEOXact_ComboCid();
      AtEOXact_HashTables(true);
--- 2202,2208 ----
      AtEOXact_SPI(true);
      AtEOXact_on_commit_actions(true);
      AtEOXact_Namespace(true);
!     AtEOXact_SMgr();
      AtEOXact_Files();
      AtEOXact_ComboCid();
      AtEOXact_HashTables(true);
*************** AbortTransaction(void)
*** 2348,2353 ****
--- 2348,2354 ----
          AtEOXact_SPI(false);
          AtEOXact_on_commit_actions(false);
          AtEOXact_Namespace(false);
+         AtEOXact_SMgr();
          AtEOXact_Files();
          AtEOXact_ComboCid();
          AtEOXact_HashTables(false);
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 748fd85edb0d7175e5ffd99288b92eddd8706d20..709ccf1f2561907f96f1ae2bf8ddc47e326efbb1 100644
*** a/src/backend/postmaster/bgwriter.c
--- b/src/backend/postmaster/bgwriter.c
*************** BackgroundWriterMain(void)
*** 183,188 ****
--- 183,189 ----
                               false, true);
          /* we needn't bother with the other ResourceOwnerRelease phases */
          AtEOXact_Buffers(false);
+         AtEOXact_SMgr();
          AtEOXact_Files();
          AtEOXact_HashTables(false);

diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index c5f32059a792bc407bd01e1fab26c180148ebf3d..18e6a4e8c4be1c1e6041cd854a55f367ffd4ba6e 100644
*** a/src/backend/postmaster/checkpointer.c
--- b/src/backend/postmaster/checkpointer.c
*************** CheckpointerMain(void)
*** 291,296 ****
--- 291,297 ----
                               false, true);
          /* we needn't bother with the other ResourceOwnerRelease phases */
          AtEOXact_Buffers(false);
+         AtEOXact_SMgr();
          AtEOXact_Files();
          AtEOXact_HashTables(false);

diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index 43139017c276648ec292a81a4684e605701e2205..c3e15ef7595de7373e319540691304285672c178 100644
*** a/src/backend/postmaster/walwriter.c
--- b/src/backend/postmaster/walwriter.c
*************** WalWriterMain(void)
*** 188,193 ****
--- 188,194 ----
                               false, true);
          /* we needn't bother with the other ResourceOwnerRelease phases */
          AtEOXact_Buffers(false);
+         AtEOXact_SMgr();
          AtEOXact_Files();
          AtEOXact_HashTables(false);

diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 6319d1e8589138be32e498e77cc4ce484211c1a1..5dff8b3702ee64c355a050aa76242b99b6eea151 100644
*** a/src/backend/storage/smgr/smgr.c
--- b/src/backend/storage/smgr/smgr.c
*************** static const int NSmgr = lengthof(smgrsw
*** 76,86 ****
--- 76,90 ----

  /*
   * Each backend has a hashtable that stores all extant SMgrRelation objects.
+  * In addition, "unowned" SMgrRelation objects are chained together in a list.
   */
  static HTAB *SMgrRelationHash = NULL;

+ static SMgrRelation first_unowned_reln = NULL;
+
  /* local function prototypes */
  static void smgrshutdown(int code, Datum arg);
+ static void remove_from_unowned_list(SMgrRelation reln);


  /*
*************** smgrshutdown(int code, Datum arg)
*** 124,130 ****
  /*
   *    smgropen() -- Return an SMgrRelation object, creating it if need be.
   *
!  *        This does not attempt to actually open the object.
   */
  SMgrRelation
  smgropen(RelFileNode rnode, BackendId backend)
--- 128,134 ----
  /*
   *    smgropen() -- Return an SMgrRelation object, creating it if need be.
   *
!  *        This does not attempt to actually open the underlying file.
   */
  SMgrRelation
  smgropen(RelFileNode rnode, BackendId backend)
*************** smgropen(RelFileNode rnode, BackendId ba
*** 144,149 ****
--- 148,154 ----
          ctl.hash = tag_hash;
          SMgrRelationHash = hash_create("smgr relation table", 400,
                                         &ctl, HASH_ELEM | HASH_FUNCTION);
+         first_unowned_reln = NULL;
      }

      /* Look up or create an entry */
*************** smgropen(RelFileNode rnode, BackendId ba
*** 168,173 ****
--- 173,182 ----
          /* mark it not open */
          for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
              reln->md_fd[forknum] = NULL;
+
+         /* place it at head of unowned list (to make smgrsetowner cheap) */
+         reln->next_unowned_reln = first_unowned_reln;
+         first_unowned_reln = reln;
      }

      return reln;
*************** smgropen(RelFileNode rnode, BackendId ba
*** 182,195 ****
--- 191,212 ----
  void
  smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
  {
+     /* We don't currently support "disowning" an SMgrRelation here */
+     Assert(owner != NULL);
+
      /*
       * First, unhook any old owner.  (Normally there shouldn't be any, but it
       * seems possible that this can happen during swap_relation_files()
       * depending on the order of processing.  It's ok to close the old
       * relcache entry early in that case.)
+      *
+      * If there isn't an old owner, then the reln should be in the unowned
+      * list, and we need to remove it.
       */
      if (reln->smgr_owner)
          *(reln->smgr_owner) = NULL;
+     else
+         remove_from_unowned_list(reln);

      /* Now establish the ownership relationship. */
      reln->smgr_owner = owner;
*************** smgrsetowner(SMgrRelation *owner, SMgrRe
*** 197,202 ****
--- 214,251 ----
  }

  /*
+  * remove_from_unowned_list -- unlink an SMgrRelation from the unowned list
+  *
+  * If the reln is not present in the list, nothing happens.  Typically this
+  * would be caller error, but there seems no reason to throw an error.
+  *
+  * In the worst case this could be rather slow; but in all the cases that seem
+  * likely to be performance-critical, the reln being sought will actually be
+  * first in the list.  Furthermore, the number of unowned relns touched in any
+  * one transaction shouldn't be all that high typically.  So it doesn't seem
+  * worth expending the additional space and management logic needed for a
+  * doubly-linked list.
+  */
+ static void
+ remove_from_unowned_list(SMgrRelation reln)
+ {
+     SMgrRelation *link;
+     SMgrRelation cur;
+
+     for (link = &first_unowned_reln, cur = *link;
+          cur != NULL;
+          link = &cur->next_unowned_reln, cur = *link)
+     {
+         if (cur == reln)
+         {
+             *link = cur->next_unowned_reln;
+             cur->next_unowned_reln = NULL;
+             break;
+         }
+     }
+ }
+
+ /*
   *    smgrexists() -- Does the underlying file for a fork exist?
   */
  bool
*************** smgrclose(SMgrRelation reln)
*** 219,224 ****
--- 268,276 ----

      owner = reln->smgr_owner;

+     if (!owner)
+         remove_from_unowned_list(reln);
+
      if (hash_search(SMgrRelationHash,
                      (void *) &(reln->smgr_rnode),
                      HASH_REMOVE, NULL) == NULL)
*************** smgrpostckpt(void)
*** 600,602 ****
--- 652,680 ----
              (*(smgrsw[i].smgr_post_ckpt)) ();
      }
  }
+
+ /*
+  * AtEOXact_SMgr
+  *
+  * This routine is called during transaction commit or abort (it doesn't
+  * particularly care which).  All transient SMgrRelation objects are closed.
+  *
+  * We do this as a compromise between wanting transient SMgrRelations to
+  * live awhile (to amortize the costs of blind writes of multiple blocks)
+  * and needing them to not live forever (since we're probably holding open
+  * a kernel file descriptor for the underlying file, and we need to ensure
+  * that gets closed reasonably soon if the file gets deleted).
+  */
+ void
+ AtEOXact_SMgr(void)
+ {
+     /*
+      * Zap all unowned SMgrRelations.  We rely on smgrclose() to remove each
+      * one from the list.
+      */
+     while (first_unowned_reln != NULL)
+     {
+         Assert(first_unowned_reln->smgr_owner == NULL);
+         smgrclose(first_unowned_reln);
+     }
+ }
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 92b9198d5428c1f51617f595d062da1bc8af5106..9eccf7671ed99be4c068e3df50a75c21c1ce9e51 100644
*** a/src/include/storage/smgr.h
--- b/src/include/storage/smgr.h
***************
*** 33,38 ****
--- 33,41 ----
   * without having to make the smgr explicitly aware of relcache.  There
   * can't be more than one "owner" pointer per SMgrRelation, but that's
   * all we need.
+  *
+  * SMgrRelations that do not have an "owner" are considered to be transient,
+  * and are deleted at end of transaction.
   */
  typedef struct SMgrRelationData
  {
*************** typedef struct SMgrRelationData
*** 63,68 ****
--- 66,74 ----

      /* for md.c; NULL for forks that are not open */
      struct _MdfdVec *md_fd[MAX_FORKNUM + 1];
+
+     /* if unowned, list link in list of all unowned SMgrRelations */
+     struct SMgrRelationData *next_unowned_reln;
  } SMgrRelationData;

  typedef SMgrRelationData *SMgrRelation;
*************** extern void smgrimmedsync(SMgrRelation r
*** 95,100 ****
--- 101,107 ----
  extern void smgrpreckpt(void);
  extern void smgrsync(void);
  extern void smgrpostckpt(void);
+ extern void AtEOXact_SMgr(void);


  /* internals: move me elsewhere -- ay 7/94 */

pgsql-hackers by date:

Previous
From: Dimitri Fontaine
Date:
Subject: Re: Truncate if exists
Next
From: Christopher Browne
Date:
Subject: Re: Potential autovacuum optimization: new tables