Re: Multiplexing SUGUSR1 - Mailing list pgsql-hackers

From Heikki Linnakangas
Subject Re: Multiplexing SUGUSR1
Date
Msg-id 493F9DAE.5090305@enterprisedb.com
Whole thread Raw
In response to Re: Multiplexing SUGUSR1  ("Fujii Masao" <masao.fujii@gmail.com>)
List pgsql-hackers
Fujii Masao wrote:
> Hi,
>
> On Wed, Dec 10, 2008 at 1:43 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> Heikki Linnakangas <heikki.linnakangas@enterprisedb.com> writes:
>>> I'm surprised you feel that way. You suggested earlier
>>> (http://archives.postgresql.org/message-id/28487.1221147665@sss.pgh.pa.us)
>>> that a solution that only works for processes attached to shared memory
>>> would probably suffice for now.
>> Well, I wasn't complaining about the dependence on being attached to
>> shared memory.  What I'm complaining about is the dependence on the
>> rather complex PGPROC data structure.
>>
>>> That seems hard, considering that we also want it to work without
>>> locking. Hmm, I presume we can use spinlocks in a signal handler?
>>> Perhaps some sort of a hash table protected by a spinlock would work.
>> No, locks are right out if the postmaster is supposed to be able to use
>> it.  What I was thinking of is a simple linear array of PIDs and
>> sig_atomic_t flags.  The slots could be assigned on the basis of
>> backendid, but callers trying to send a signal would have to scan the
>> array looking for the matching PID.  (This doesn't seem outlandishly
>> expensive considering that one is about to do a kernel call anyway.
>> You might be able to save a few cycles by having the PID array separate
>> from the flag array, which should improve the cache friendliness of the
>> scan.)  Also, for those callers who do have access to a PGPROC, there
>> could be a separate entry point that passes backendid instead of PID to
>> eliminate the search.
>
> Thanks for the comment!
> I updated the patch so. Is this patch ready to apply?

Looks like we stepped on each others toes, I was just about to send a
similar patch. Attached is my version, it's essentially the same as yours.

My version doesn't have support for auxiliary processes. Does the
synchronous replication patch need that?

This comment is wrong, though the code below it is right:

> *** base/src/backend/bootstrap/bootstrap.c    2008-12-10 16:29:10.000000000 +0900
> --- new/src/backend/bootstrap/bootstrap.c    2008-12-10 17:16:23.000000000 +0900
> ***************
> *** 389,394 ****
> --- 389,403 ----
>           InitAuxiliaryProcess();
>   #endif
>
> +         /*
> +          * Assign backend ID to auxiliary processes like backends, in order to
> +          * allow multiplexing signal to auxiliary processes. Since backends use
> +          * ID in the range from 1 to MaxBackends, we assign auxiliary processes
> +          * with MaxBackends + AuxProcType as an unique ID.
> +          */
> +         MyBackendId = MaxBackends + auxType;
> +         MyProc->backendId = MyBackendId;
> +
>           /* finish setting up bufmgr.c */
>           InitBufferPoolBackend();

Backends use IDs in the range 0 to MaxBackends-1, inclusive.

--
   Heikki Linnakangas
   EnterpriseDB   http://www.enterprisedb.com
*** src/backend/postmaster/autovacuum.c
--- src/backend/postmaster/autovacuum.c
***************
*** 91,96 ****
--- 91,97 ----
  #include "storage/pmsignal.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
+ #include "storage/procsignal.h"
  #include "storage/sinvaladt.h"
  #include "tcop/tcopprot.h"
  #include "utils/dynahash.h"
***************
*** 1477,1483 **** AutoVacWorkerMain(int argc, char *argv[])
      pqsignal(SIGALRM, handle_sig_alarm);

      pqsignal(SIGPIPE, SIG_IGN);
!     pqsignal(SIGUSR1, CatchupInterruptHandler);
      /* We don't listen for async notifies */
      pqsignal(SIGUSR2, SIG_IGN);
      pqsignal(SIGFPE, FloatExceptionHandler);
--- 1478,1484 ----
      pqsignal(SIGALRM, handle_sig_alarm);

      pqsignal(SIGPIPE, SIG_IGN);
!     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
      /* We don't listen for async notifies */
      pqsignal(SIGUSR2, SIG_IGN);
      pqsignal(SIGFPE, FloatExceptionHandler);
*** src/backend/storage/ipc/Makefile
--- src/backend/storage/ipc/Makefile
***************
*** 15,21 **** override CFLAGS+= -fno-inline
  endif
  endif

! OBJS = ipc.o ipci.o pmsignal.o procarray.o shmem.o shmqueue.o \
      sinval.o sinvaladt.o

  include $(top_srcdir)/src/backend/common.mk
--- 15,21 ----
  endif
  endif

! OBJS = ipc.o ipci.o pmsignal.o procarray.o procsignal.o shmem.o shmqueue.o \
      sinval.o sinvaladt.o

  include $(top_srcdir)/src/backend/common.mk
*** src/backend/storage/ipc/ipci.c
--- src/backend/storage/ipc/ipci.c
***************
*** 30,35 ****
--- 30,36 ----
  #include "storage/pg_shmem.h"
  #include "storage/pmsignal.h"
  #include "storage/procarray.h"
+ #include "storage/procsignal.h"
  #include "storage/sinvaladt.h"
  #include "storage/spin.h"

***************
*** 111,116 **** CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
--- 112,118 ----
          size = add_size(size, SInvalShmemSize());
          size = add_size(size, BgWriterShmemSize());
          size = add_size(size, AutoVacuumShmemSize());
+         size = add_size(size, ProcSignalShmemSize());
          size = add_size(size, BTreeShmemSize());
          size = add_size(size, SyncScanShmemSize());
  #ifdef EXEC_BACKEND
***************
*** 207,212 **** CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
--- 209,215 ----
      PMSignalInit();
      BgWriterShmemInit();
      AutoVacuumShmemInit();
+     ProcSignalShmemInit();

      /*
       * Set up other modules that need some shared memory space
*** /dev/null
--- src/backend/storage/ipc/procsignal.c
***************
*** 0 ****
--- 1,214 ----
+ /*-------------------------------------------------------------------------
+  *
+  * procsignal.c
+  *      routines for signaling processes
+  *
+  *
+  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * IDENTIFICATION
+  *      $PostgreSQL$
+  *
+  *-------------------------------------------------------------------------
+  */
+ #include "postgres.h"
+
+ #include <signal.h>
+ #include <unistd.h>
+
+ #include "miscadmin.h"
+ #include "storage/backendid.h"
+ #include "storage/ipc.h"
+ #include "storage/procsignal.h"
+ #include "storage/shmem.h"
+ #include "storage/sinval.h"
+
+ /*
+  * SIGUSR1 signal is multiplexed for multiple events. The specific reason
+  * is communicated via flags in shared memory. We keep a boolean flag for
+  * each possible "reason", so that different reasons can be signaled by
+  * different backends at the same time.    (However, if the same reason is
+  * signaled more than once simultaneously, the process will observe it only
+  * once.)
+  *
+  * Each process that wants to receive signals registers its process ID
+  * in the ProcSignalSlots array. The array is indexed by backend ID to make
+  * slot allocation simple, and to avoid having to search the array when you
+  * know the backend ID of the process you're signaling.
+  *
+  * The flags are actually declared as "volatile sig_atomic_t" for maximum
+  * portability.  This should ensure that loads and stores of the flag
+  * values are atomic, allowing us to dispense with any explicit locking.
+  */
+ typedef struct {
+     pid_t        pss_pid;
+     sig_atomic_t pss_signalFlags[NUM_PROCSIGNALS];
+ } ProcSignalSlot;
+
+ static ProcSignalSlot *ProcSignalSlots;
+
+ static bool CheckProcSignal(ProcSignalReason reason);
+ static void CleanupProcSignalState(int status, Datum arg);
+
+ /*
+  * ProcSignalShmemInit - initialize during shared-memory creation
+  */
+ Size
+ ProcSignalShmemSize(void)
+ {
+     return MaxBackends * sizeof(ProcSignalSlot);
+ }
+
+
+ /*
+  * ProcSignalShmemInit - initialize during shared-memory creation
+  */
+ void
+ ProcSignalShmemInit(void)
+ {
+     bool        found;
+
+     ProcSignalSlots = (ProcSignalSlot *)
+         ShmemInitStruct("ProcSignalSlots", ProcSignalShmemSize(),
+                         &found);
+
+     if (!found)
+         MemSet(ProcSignalSlots, 0, ProcSignalShmemSize());
+ }
+
+ /*
+  * ProcSignalInit - register a process to the procsignal array
+  */
+ void
+ ProcSignalInit(void)
+ {
+     volatile ProcSignalSlot *slot;
+
+     Assert(MyBackendId >= 0 && MyBackendId < MaxBackends);
+
+     slot = &ProcSignalSlots[MyBackendId];
+     MemSet(slot->pss_signalFlags, 0, NUM_PROCSIGNALS * sizeof(sig_atomic_t));
+     slot->pss_pid = MyProcPid;
+
+     on_shmem_exit(CleanupProcSignalState, Int32GetDatum(MyBackendId));
+ }
+
+
+ /*
+  * CleanupProcSignalState
+  *        Remove current process from ProcSignalSlots
+  *
+  * This function is called via on_shmem_exit() during backend shutdown.
+  *
+  * arg is really of type "BackendId".
+  */
+ static void
+ CleanupProcSignalState(int status, Datum arg)
+ {
+     BackendId    backendId = (BackendId) DatumGetInt32(arg);
+     volatile ProcSignalSlot *slot;
+
+     slot = &ProcSignalSlots[backendId];
+
+     /* sanity check */
+     if (slot->pss_pid != MyProcPid)
+     {
+         /*
+          * don't ERROR here. We're exiting anyway, and don't want to
+          * get into infinite loop trying to exit
+          */
+         elog(WARNING, "current process not in ProcSignalSlots array");
+         return;
+     }
+     slot->pss_pid = 0;
+ }
+
+ /*
+  * SendProcSignal - signal a process with a reason code
+  *
+  * Providing backendId is optional, but it will speed up the operation.
+  *
+  * Not to be confused with ProcSendSignal
+  */
+ void
+ SendProcSignal(pid_t pid, ProcSignalReason reason, BackendId backendId)
+ {
+     volatile ProcSignalSlot *slot;
+
+     if (backendId != InvalidBackendId)
+     {
+         slot = &ProcSignalSlots[backendId];
+
+         /*
+          * Note: Since there's no locking, it's possible that the target
+          * process detaches from shared memory and exits right after this
+          * test, before we set the flag and send signal. And the signal slot
+          * might even be recycled by a new process, so it's remotely possible
+          * that we set a flag for a wrong process. That's OK, all the signals
+          * are such that no harm is done if they're mistakenly fired.
+          */
+         if (slot->pss_pid == pid)
+         {
+             /* Atomically set the proper flag */
+             slot->pss_signalFlags[reason] = true;
+             /* Send signal */
+             kill(pid, SIGUSR1);
+         }
+     }
+     else
+     {
+         /* same as above, but linear search the array using pid */
+         int i;
+         for (i = 0; i < MaxBackends; i++)
+         {
+             slot = &ProcSignalSlots[i];
+             if (slot->pss_pid == pid)
+             {
+                 /* the above note about race conditions applies here too */
+
+                 /* Atomically set the proper flag */
+                 slot->pss_signalFlags[reason] = true;
+                 /* Send signal */
+                 kill(pid, SIGUSR1);
+
+                 break;
+             }
+         }
+     }
+ }
+
+ /*
+  * CheckProcSignal - check to see if a particular reason has been
+  * signaled, and clear the signal flag.  Should be called after receiving
+  * SIGUSR1.
+  */
+ static bool
+ CheckProcSignal(ProcSignalReason reason)
+ {
+     volatile ProcSignalSlot *slot;
+
+     slot = &ProcSignalSlots[MyBackendId];
+
+     /* Careful here --- don't clear flag if we haven't seen it set */
+     if (slot->pss_signalFlags[reason])
+     {
+         slot->pss_signalFlags[reason] = false;
+         return true;
+     }
+     return false;
+ }
+
+ /*
+  * procsignal_sigusr1_handler - handle SIGUSR1 signal.
+  */
+ void
+ procsignal_sigusr1_handler(SIGNAL_ARGS)
+ {
+     int        save_errno = errno;
+
+     if (CheckProcSignal(PROCSIG_CATCHUP_INTERRUPT))
+         HandleCatchupInterrupt();
+
+     errno = save_errno;
+ }
*** src/backend/storage/ipc/sinval.c
--- src/backend/storage/ipc/sinval.c
***************
*** 26,33 ****
   * Because backends sitting idle will not be reading sinval events, we
   * need a way to give an idle backend a swift kick in the rear and make
   * it catch up before the sinval queue overflows and forces it to go
!  * through a cache reset exercise.    This is done by sending SIGUSR1
!  * to any backend that gets too far behind.
   *
   * State for catchup events consists of two flags: one saying whether
   * the signal handler is currently allowed to call ProcessCatchupEvent
--- 26,33 ----
   * Because backends sitting idle will not be reading sinval events, we
   * need a way to give an idle backend a swift kick in the rear and make
   * it catch up before the sinval queue overflows and forces it to go
!  * through a cache reset exercise.    This is done by sending
!  * PROCSIG_CATHCUP_INTERRUPT to any backend that gets too far behind.
   *
   * State for catchup events consists of two flags: one saying whether
   * the signal handler is currently allowed to call ProcessCatchupEvent
***************
*** 144,152 **** ReceiveSharedInvalidMessages(


  /*
!  * CatchupInterruptHandler
   *
!  * This is the signal handler for SIGUSR1.
   *
   * If we are idle (catchupInterruptEnabled is set), we can safely
   * invoke ProcessCatchupEvent directly.  Otherwise, just set a flag
--- 144,152 ----


  /*
!  * HandleCatchupInterrupt
   *
!  * This is called when PROCSIG_CATCHUP_INTERRUPT signal is received.
   *
   * If we are idle (catchupInterruptEnabled is set), we can safely
   * invoke ProcessCatchupEvent directly.  Otherwise, just set a flag
***************
*** 156,168 **** ReceiveSharedInvalidMessages(
   * since there's no longer any reason to do anything.)
   */
  void
! CatchupInterruptHandler(SIGNAL_ARGS)
  {
-     int            save_errno = errno;
-
      /*
!      * Note: this is a SIGNAL HANDLER.    You must be very wary what you do
!      * here.
       */

      /* Don't joggle the elbow of proc_exit */
--- 156,166 ----
   * since there's no longer any reason to do anything.)
   */
  void
! HandleCatchupInterrupt(void)
  {
      /*
!      * Note: this called by a SIGNAL HANDLER.  You must be very wary what
!      * you do here.
       */

      /* Don't joggle the elbow of proc_exit */
***************
*** 216,223 **** CatchupInterruptHandler(SIGNAL_ARGS)
           */
          catchupInterruptOccurred = 1;
      }
-
-     errno = save_errno;
  }

  /*
--- 214,219 ----
***************
*** 289,295 **** DisableCatchupInterrupt(void)
  /*
   * ProcessCatchupEvent
   *
!  * Respond to a catchup event (SIGUSR1) from another backend.
   *
   * This is called either directly from the SIGUSR1 signal handler,
   * or the next time control reaches the outer idle loop (assuming
--- 285,292 ----
  /*
   * ProcessCatchupEvent
   *
!  * Respond to a catchup event (PROCSIG_CATCHUP_INTERRUPT) from another
!  * backend.
   *
   * This is called either directly from the SIGUSR1 signal handler,
   * or the next time control reaches the outer idle loop (assuming
*** src/backend/storage/ipc/sinvaladt.c
--- src/backend/storage/ipc/sinvaladt.c
***************
*** 21,26 ****
--- 21,27 ----
  #include "storage/backendid.h"
  #include "storage/ipc.h"
  #include "storage/proc.h"
+ #include "storage/procsignal.h"
  #include "storage/shmem.h"
  #include "storage/sinvaladt.h"
  #include "storage/spin.h"
***************
*** 138,143 **** typedef struct ProcState
--- 139,145 ----
  {
      /* procPid is zero in an inactive ProcState array entry. */
      pid_t        procPid;        /* PID of backend, for signaling */
+     BackendId    backendId;        /* backend ID, for signaling */
      /* nextMsgNum is meaningless if procPid == 0 or resetState is true. */
      int            nextMsgNum;        /* next message number to read */
      bool        resetState;        /* backend needs to reset its state */
***************
*** 236,241 **** CreateSharedInvalidationState(void)
--- 238,244 ----
      for (i = 0; i < shmInvalBuffer->maxBackends; i++)
      {
          shmInvalBuffer->procState[i].procPid = 0;            /* inactive */
+         shmInvalBuffer->procState[i].backendId = InvalidBackendId;
          shmInvalBuffer->procState[i].nextMsgNum = 0;        /* meaningless */
          shmInvalBuffer->procState[i].resetState = false;
          shmInvalBuffer->procState[i].signaled = false;
***************
*** 304,309 **** SharedInvalBackendInit(void)
--- 307,313 ----

      /* mark myself active, with all extant messages already read */
      stateP->procPid = MyProcPid;
+     stateP->backendId = MyBackendId;
      stateP->nextMsgNum = segP->maxMsgNum;
      stateP->resetState = false;
      stateP->signaled = false;
***************
*** 342,347 **** CleanupInvalidationState(int status, Datum arg)
--- 346,352 ----

      /* Mark myself inactive */
      stateP->procPid = 0;
+     stateP->backendId = InvalidBackendId;
      stateP->nextMsgNum = 0;
      stateP->resetState = false;
      stateP->signaled = false;
***************
*** 644,661 **** SICleanupQueue(bool callerHasWriteLock, int minFree)
          segP->nextThreshold = (numMsgs / CLEANUP_QUANTUM + 1) * CLEANUP_QUANTUM;

      /*
!      * Lastly, signal anyone who needs a catchup interrupt.  Since kill()
!      * might not be fast, we don't want to hold locks while executing it.
       */
      if (needSig)
      {
!         pid_t    his_pid = needSig->procPid;

          needSig->signaled = true;
          LWLockRelease(SInvalReadLock);
          LWLockRelease(SInvalWriteLock);
          elog(DEBUG4, "sending sinval catchup signal to PID %d", (int) his_pid);
!         kill(his_pid, SIGUSR1);
          if (callerHasWriteLock)
              LWLockAcquire(SInvalWriteLock, LW_EXCLUSIVE);
      }
--- 649,668 ----
          segP->nextThreshold = (numMsgs / CLEANUP_QUANTUM + 1) * CLEANUP_QUANTUM;

      /*
!      * Lastly, signal anyone who needs a catchup interrupt.  Since
!      * SendProcSignal() might not be fast, we don't want to hold locks while
!      * executing it.
       */
      if (needSig)
      {
!         pid_t his_pid = needSig->procPid;
!         BackendId his_backendId = needSig->backendId;

          needSig->signaled = true;
          LWLockRelease(SInvalReadLock);
          LWLockRelease(SInvalWriteLock);
          elog(DEBUG4, "sending sinval catchup signal to PID %d", (int) his_pid);
!         SendProcSignal(his_pid, PROCSIG_CATCHUP_INTERRUPT, his_backendId);
          if (callerHasWriteLock)
              LWLockAcquire(SInvalWriteLock, LW_EXCLUSIVE);
      }
*** src/backend/tcop/postgres.c
--- src/backend/tcop/postgres.c
***************
*** 59,64 ****
--- 59,65 ----
  #include "storage/bufmgr.h"
  #include "storage/ipc.h"
  #include "storage/proc.h"
+ #include "storage/procsignal.h"
  #include "storage/sinval.h"
  #include "tcop/fastpath.h"
  #include "tcop/pquery.h"
***************
*** 3180,3186 **** PostgresMain(int argc, char *argv[], const char *username)
       * of output during who-knows-what operation...
       */
      pqsignal(SIGPIPE, SIG_IGN);
!     pqsignal(SIGUSR1, CatchupInterruptHandler);
      pqsignal(SIGUSR2, NotifyInterruptHandler);
      pqsignal(SIGFPE, FloatExceptionHandler);

--- 3181,3187 ----
       * of output during who-knows-what operation...
       */
      pqsignal(SIGPIPE, SIG_IGN);
!     pqsignal(SIGUSR1, procsignal_sigusr1_handler);
      pqsignal(SIGUSR2, NotifyInterruptHandler);
      pqsignal(SIGFPE, FloatExceptionHandler);

*** src/backend/utils/init/postinit.c
--- src/backend/utils/init/postinit.c
***************
*** 39,44 ****
--- 39,45 ----
  #include "storage/lmgr.h"
  #include "storage/proc.h"
  #include "storage/procarray.h"
+ #include "storage/procsignal.h"
  #include "storage/sinvaladt.h"
  #include "storage/smgr.h"
  #include "utils/acl.h"
***************
*** 445,450 **** InitPostgres(const char *in_dbname, Oid dboid, const char *username,
--- 446,453 ----
      if (MyBackendId > MaxBackends || MyBackendId <= 0)
          elog(FATAL, "bad backend id: %d", MyBackendId);

+     ProcSignalInit();
+
      /*
       * bufmgr needs another initialization call too
       */
*** /dev/null
--- src/include/storage/procsignal.h
***************
*** 0 ****
--- 1,42 ----
+ /*-------------------------------------------------------------------------
+  *
+  * procsignal.h
+  *      routines for signaling processes XXX
+  *
+  *
+  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * $PostgreSQL$
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef PROCSIGNAL_H
+ #define PROCSIGNAL_H
+
+
+ /*
+  * Reasons for signaling a process (a backend or an auxiliary process, like
+  * autovacuum worker). We can cope with simultaneous signals for different
+  * reasons. If the same reason is signaled multiple times in quick succession,
+  * however, the process is likely to observe only one notification of it.
+  * This is okay for the present uses.
+  */
+ typedef enum
+ {
+        PROCSIG_CATCHUP_INTERRUPT,      /* catchup interrupt */
+        NUM_PROCSIGNALS                         /* Must be last value of enum! */
+ } ProcSignalReason;
+
+ /*
+  * prototypes for functions in procsignal.c
+  */
+ extern Size ProcSignalShmemSize(void);
+ extern void ProcSignalShmemInit(void);
+ extern void ProcSignalInit(void);
+ extern void SendProcSignal(pid_t pid, ProcSignalReason reason,
+                            BackendId backendId);
+
+ extern void procsignal_sigusr1_handler(SIGNAL_ARGS);
+
+ #endif   /* PROCSIGNAL_H */
*** src/include/storage/sinval.h
--- src/include/storage/sinval.h
***************
*** 89,96 **** extern void ReceiveSharedInvalidMessages(
                        void (*invalFunction) (SharedInvalidationMessage *msg),
                               void (*resetFunction) (void));

! /* signal handler for catchup events (SIGUSR1) */
! extern void CatchupInterruptHandler(SIGNAL_ARGS);

  /*
   * enable/disable processing of catchup events directly from signal handler.
--- 89,96 ----
                        void (*invalFunction) (SharedInvalidationMessage *msg),
                               void (*resetFunction) (void));

! /* signal handler for catchup events (PROCSIG_CATCHUP_INTERRUPT) */
! extern void HandleCatchupInterrupt(void);

  /*
   * enable/disable processing of catchup events directly from signal handler.

pgsql-hackers by date:

Previous
From: "Fujii Masao"
Date:
Subject: Re: Multiplexing SUGUSR1
Next
From: Heikki Linnakangas
Date:
Subject: Re: Multiplexing SUGUSR1