Re: Non-robustness in pmsignal.c - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Non-robustness in pmsignal.c
Date
Msg-id 3654052.1665249307@sss.pgh.pa.us
Whole thread Raw
In response to Re: Non-robustness in pmsignal.c  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: Non-robustness in pmsignal.c  (Andres Freund <andres@anarazel.de>)
List pgsql-hackers
I wrote:
> Andres Freund <andres@anarazel.de> writes:
>> Only PM_CHILD_ACTIVE and PM_CHILD_WALSENDER though. We could afford another
>> MaxLivePostmasterChildren() sized array...

> Oh, I see what you mean --- one private and one public array.
> Maybe that makes more sense than what I did, not sure.

Yeah, that's definitely a better way.  I'll push this after the
release freeze lifts.

            regards, tom lane

diff --git a/src/backend/storage/ipc/pmsignal.c b/src/backend/storage/ipc/pmsignal.c
index 3f0ec5e6b8..c85521d364 100644
--- a/src/backend/storage/ipc/pmsignal.c
+++ b/src/backend/storage/ipc/pmsignal.c
@@ -26,6 +26,7 @@
 #include "replication/walsender.h"
 #include "storage/pmsignal.h"
 #include "storage/shmem.h"
+#include "utils/memutils.h"


 /*
@@ -75,12 +76,21 @@ struct PMSignalData
     QuitSignalReason sigquit_reason;    /* why SIGQUIT was sent */
     /* per-child-process flags */
     int            num_child_flags;    /* # of entries in PMChildFlags[] */
-    int            next_child_flag;    /* next slot to try to assign */
     sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER];
 };

+/* PMSignalState pointer is valid in both postmaster and child processes */
 NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL;

+/*
+ * These static variables are valid only in the postmaster.  We keep a
+ * duplicative private array so that we can trust its state even if some
+ * failing child has clobbered the PMSignalData struct in shared memory.
+ */
+static int    num_child_inuse;    /* # of entries in PMChildInUse[] */
+static int    next_child_inuse;    /* next slot to try to assign */
+static bool *PMChildInUse;        /* true if i'th flag slot is assigned */
+
 /*
  * Signal handler to be notified if postmaster dies.
  */
@@ -142,7 +152,25 @@ PMSignalShmemInit(void)
     {
         /* initialize all flags to zeroes */
         MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize());
-        PMSignalState->num_child_flags = MaxLivePostmasterChildren();
+        num_child_inuse = MaxLivePostmasterChildren();
+        PMSignalState->num_child_flags = num_child_inuse;
+
+        /*
+         * Also allocate postmaster's private PMChildInUse[] array.  We
+         * might've already done that in a previous shared-memory creation
+         * cycle, in which case free the old array to avoid a leak.  (Do it
+         * like this to support the possibility that MaxLivePostmasterChildren
+         * changed.)  In a standalone backend, we do not need this.
+         */
+        if (PostmasterContext != NULL)
+        {
+            if (PMChildInUse)
+                pfree(PMChildInUse);
+            PMChildInUse = (bool *)
+                MemoryContextAllocZero(PostmasterContext,
+                                       num_child_inuse * sizeof(bool));
+        }
+        next_child_inuse = 0;
     }
 }

@@ -218,21 +246,24 @@ GetQuitSignalReason(void)
 int
 AssignPostmasterChildSlot(void)
 {
-    int            slot = PMSignalState->next_child_flag;
+    int            slot = next_child_inuse;
     int            n;

     /*
-     * Scan for a free slot.  We track the last slot assigned so as not to
-     * waste time repeatedly rescanning low-numbered slots.
+     * Scan for a free slot.  Notice that we trust nothing about the contents
+     * of PMSignalState, but use only postmaster-local data for this decision.
+     * We track the last slot assigned so as not to waste time repeatedly
+     * rescanning low-numbered slots.
      */
-    for (n = PMSignalState->num_child_flags; n > 0; n--)
+    for (n = num_child_inuse; n > 0; n--)
     {
         if (--slot < 0)
-            slot = PMSignalState->num_child_flags - 1;
-        if (PMSignalState->PMChildFlags[slot] == PM_CHILD_UNUSED)
+            slot = num_child_inuse - 1;
+        if (!PMChildInUse[slot])
         {
+            PMChildInUse[slot] = true;
             PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED;
-            PMSignalState->next_child_flag = slot;
+            next_child_inuse = slot;
             return slot + 1;
         }
     }
@@ -254,7 +285,7 @@ ReleasePostmasterChildSlot(int slot)
 {
     bool        result;

-    Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
+    Assert(slot > 0 && slot <= num_child_inuse);
     slot--;

     /*
@@ -264,17 +295,18 @@ ReleasePostmasterChildSlot(int slot)
      */
     result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED);
     PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED;
+    PMChildInUse[slot] = false;
     return result;
 }

 /*
  * IsPostmasterChildWalSender - check if given slot is in use by a
- * walsender process.
+ * walsender process.  This is called only by the postmaster.
  */
 bool
 IsPostmasterChildWalSender(int slot)
 {
-    Assert(slot > 0 && slot <= PMSignalState->num_child_flags);
+    Assert(slot > 0 && slot <= num_child_inuse);
     slot--;

     if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER)

pgsql-hackers by date:

Previous
From: Nathan Bossart
Date:
Subject: Re: use has_privs_of_role() for pg_hba.conf
Next
From: Andres Freund
Date:
Subject: Re: Non-robustness in pmsignal.c