Re: [HACKERS] Unportable implementation of background worker start - Mailing list pgsql-hackers

From Tom Lane
Subject Re: [HACKERS] Unportable implementation of background worker start
Date
Msg-id 8322.1493240739@sss.pgh.pa.us
Whole thread Raw
In response to Re: [HACKERS] Unportable implementation of background worker start  (Andres Freund <andres@anarazel.de>)
Responses Re: [HACKERS] Unportable implementation of background worker start  (Andres Freund <andres@anarazel.de>)
List pgsql-hackers
Andres Freund <andres@anarazel.de> writes:
> I'd still like to get something like your CLOEXEC patch applied
> independently however.

Here's an updated version of that, which makes use of our previous
conclusion that F_SETFD/FD_CLOEXEC are available everywhere except
Windows, and fixes some sloppy thinking about the EXEC_BACKEND case.

I went ahead and changed the call to epoll_create into epoll_create1.
I'm not too concerned about loss of portability there --- it seems
unlikely that many people are still using ten-year-old glibc, and
even less likely that any of them would be interested in running
current Postgres on their stable-unto-death platform.  We could add
a configure test for epoll_create1 if you feel one's needed, but
I think it'd just be a waste of cycles.

I propose to push this into HEAD and 9.6 too.

            regards, tom lane

diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 0d0701a..946fbff 100644
*** a/src/backend/storage/ipc/latch.c
--- b/src/backend/storage/ipc/latch.c
*************** static volatile sig_atomic_t waiting = f
*** 118,123 ****
--- 118,126 ----
  static int    selfpipe_readfd = -1;
  static int    selfpipe_writefd = -1;

+ /* Process owning the self-pipe --- needed for checking purposes */
+ static int    selfpipe_owner_pid = 0;
+
  /* Private function prototypes */
  static void sendSelfPipeByte(void);
  static void drainSelfPipe(void);
*************** InitializeLatchSupport(void)
*** 146,152 ****
  #ifndef WIN32
      int            pipefd[2];

!     Assert(selfpipe_readfd == -1);

      /*
       * Set up the self-pipe that allows a signal handler to wake up the
--- 149,189 ----
  #ifndef WIN32
      int            pipefd[2];

!     if (IsUnderPostmaster)
!     {
!         /*
!          * We might have inherited connections to a self-pipe created by the
!          * postmaster.  It's critical that child processes create their own
!          * self-pipes, of course, and we really want them to close the
!          * inherited FDs for safety's sake.
!          */
!         if (selfpipe_owner_pid != 0)
!         {
!             /* Assert we go through here but once in a child process */
!             Assert(selfpipe_owner_pid != MyProcPid);
!             /* Release postmaster's pipe FDs; ignore any error */
!             (void) close(selfpipe_readfd);
!             (void) close(selfpipe_writefd);
!             /* Clean up, just for safety's sake; we'll set these below */
!             selfpipe_readfd = selfpipe_writefd = -1;
!             selfpipe_owner_pid = 0;
!         }
!         else
!         {
!             /*
!              * Postmaster didn't create a self-pipe ... or else we're in an
!              * EXEC_BACKEND build, in which case it doesn't matter since the
!              * postmaster's pipe FDs were closed by the action of FD_CLOEXEC.
!              */
!             Assert(selfpipe_readfd == -1);
!         }
!     }
!     else
!     {
!         /* In postmaster or standalone backend, assert we do this but once */
!         Assert(selfpipe_readfd == -1);
!         Assert(selfpipe_owner_pid == 0);
!     }

      /*
       * Set up the self-pipe that allows a signal handler to wake up the
*************** InitializeLatchSupport(void)
*** 154,176 ****
       * that SetLatch won't block if the event has already been set many times
       * filling the kernel buffer. Make the read-end non-blocking too, so that
       * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
       */
      if (pipe(pipefd) < 0)
          elog(FATAL, "pipe() failed: %m");
      if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) == -1)
!         elog(FATAL, "fcntl() failed on read-end of self-pipe: %m");
      if (fcntl(pipefd[1], F_SETFL, O_NONBLOCK) == -1)
!         elog(FATAL, "fcntl() failed on write-end of self-pipe: %m");

      selfpipe_readfd = pipefd[0];
      selfpipe_writefd = pipefd[1];
  #else
      /* currently, nothing to do here for Windows */
  #endif
  }

  /*
!  * Initialize a backend-local latch.
   */
  void
  InitLatch(volatile Latch *latch)
--- 191,220 ----
       * that SetLatch won't block if the event has already been set many times
       * filling the kernel buffer. Make the read-end non-blocking too, so that
       * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
+      * Also, make both FDs close-on-exec, since we surely do not want any
+      * child processes messing with them.
       */
      if (pipe(pipefd) < 0)
          elog(FATAL, "pipe() failed: %m");
      if (fcntl(pipefd[0], F_SETFL, O_NONBLOCK) == -1)
!         elog(FATAL, "fcntl(F_SETFL) failed on read-end of self-pipe: %m");
      if (fcntl(pipefd[1], F_SETFL, O_NONBLOCK) == -1)
!         elog(FATAL, "fcntl(F_SETFL) failed on write-end of self-pipe: %m");
!     if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) == -1)
!         elog(FATAL, "fcntl(F_SETFD) failed on read-end of self-pipe: %m");
!     if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) == -1)
!         elog(FATAL, "fcntl(F_SETFD) failed on write-end of self-pipe: %m");

      selfpipe_readfd = pipefd[0];
      selfpipe_writefd = pipefd[1];
+     selfpipe_owner_pid = MyProcPid;
  #else
      /* currently, nothing to do here for Windows */
  #endif
  }

  /*
!  * Initialize a process-local latch.
   */
  void
  InitLatch(volatile Latch *latch)
*************** InitLatch(volatile Latch *latch)
*** 181,187 ****

  #ifndef WIN32
      /* Assert InitializeLatchSupport has been called in this process */
!     Assert(selfpipe_readfd >= 0);
  #else
      latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
      if (latch->event == NULL)
--- 225,231 ----

  #ifndef WIN32
      /* Assert InitializeLatchSupport has been called in this process */
!     Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
  #else
      latch->event = CreateEvent(NULL, TRUE, FALSE, NULL);
      if (latch->event == NULL)
*************** InitLatch(volatile Latch *latch)
*** 199,204 ****
--- 243,252 ----
   * containing the latch with ShmemInitStruct. (The Unix implementation
   * doesn't actually require that, but the Windows one does.) Because of
   * this restriction, we have no concurrency issues to worry about here.
+  *
+  * Note that other handles created in this module are never marked as
+  * inheritable.  Thus we do not need to worry about cleaning up child
+  * process references to postmaster-private latches or WaitEventSets.
   */
  void
  InitSharedLatch(volatile Latch *latch)
*************** OwnLatch(volatile Latch *latch)
*** 244,250 ****

  #ifndef WIN32
      /* Assert InitializeLatchSupport has been called in this process */
!     Assert(selfpipe_readfd >= 0);
  #endif

      if (latch->owner_pid != 0)
--- 292,298 ----

  #ifndef WIN32
      /* Assert InitializeLatchSupport has been called in this process */
!     Assert(selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid);
  #endif

      if (latch->owner_pid != 0)
*************** DisownLatch(volatile Latch *latch)
*** 277,283 ****
   * is incurred when WL_TIMEOUT is given, so avoid using a timeout if possible.
   *
   * The latch must be owned by the current process, ie. it must be a
!  * backend-local latch initialized with InitLatch, or a shared latch
   * associated with the current process by calling OwnLatch.
   *
   * Returns bit mask indicating which condition(s) caused the wake-up. Note
--- 325,331 ----
   * is incurred when WL_TIMEOUT is given, so avoid using a timeout if possible.
   *
   * The latch must be owned by the current process, ie. it must be a
!  * process-local latch initialized with InitLatch, or a shared latch
   * associated with the current process by calling OwnLatch.
   *
   * Returns bit mask indicating which condition(s) caused the wake-up. Note
*************** CreateWaitEventSet(MemoryContext context
*** 517,525 ****
      set->nevents_space = nevents;

  #if defined(WAIT_USE_EPOLL)
!     set->epoll_fd = epoll_create(nevents);
      if (set->epoll_fd < 0)
!         elog(ERROR, "epoll_create failed: %m");
  #elif defined(WAIT_USE_WIN32)

      /*
--- 565,573 ----
      set->nevents_space = nevents;

  #if defined(WAIT_USE_EPOLL)
!     set->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
      if (set->epoll_fd < 0)
!         elog(ERROR, "epoll_create1 failed: %m");
  #elif defined(WAIT_USE_WIN32)

      /*
*************** CreateWaitEventSet(MemoryContext context
*** 540,545 ****
--- 588,599 ----

  /*
   * Free a previously created WaitEventSet.
+  *
+  * Note: preferably, this shouldn't have to free any resources that could be
+  * inherited across an exec().  If it did, we'd likely leak those resources in
+  * many scenarios.  For the epoll case, we ensure that by setting FD_CLOEXEC
+  * when the FD is created.  For the Windows case, we assume that the handles
+  * involved are non-inheritable.
   */
  void
  FreeWaitEventSet(WaitEventSet *set)
*************** FreeWaitEventSet(WaitEventSet *set)
*** 586,592 ****
   * used to modify previously added wait events using ModifyWaitEvent().
   *
   * In the WL_LATCH_SET case the latch must be owned by the current process,
!  * i.e. it must be a backend-local latch initialized with InitLatch, or a
   * shared latch associated with the current process by calling OwnLatch.
   *
   * In the WL_SOCKET_READABLE/WRITEABLE case, EOF and error conditions are
--- 640,646 ----
   * used to modify previously added wait events using ModifyWaitEvent().
   *
   * In the WL_LATCH_SET case the latch must be owned by the current process,
!  * i.e. it must be a process-local latch initialized with InitLatch, or a
   * shared latch associated with the current process by calling OwnLatch.
   *
   * In the WL_SOCKET_READABLE/WRITEABLE case, EOF and error conditions are

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

pgsql-hackers by date:

Previous
From: Robert Haas
Date:
Subject: Re: [HACKERS] [POC] hash partitioning
Next
From: Petr Jelinek
Date:
Subject: Re: [HACKERS] Logical replication in the same cluster