Re: testing HS/SR - 1 vs 2 performance - Mailing list pgsql-hackers

From Heikki Linnakangas
Subject Re: testing HS/SR - 1 vs 2 performance
Date
Msg-id 4BC82002.3010307@enterprisedb.com
Whole thread Raw
In response to Re: testing HS/SR - 1 vs 2 performance  (Simon Riggs <simon@2ndQuadrant.com>)
Responses Re: testing HS/SR - 1 vs 2 performance  (Simon Riggs <simon@2ndQuadrant.com>)
List pgsql-hackers
Simon Riggs wrote:
> On Tue, 2010-04-13 at 21:09 +0300, Heikki Linnakangas wrote:
>> A quick fix would be to check if there's any entries in the hash table
>> before scanning it. That would eliminate the overhead when there's no
>> in-progress transactions in the master. But as soon as there's even one,
>> the overhead comes back.
>
> Any fix should be fairly quick because of the way its modularised - with
> something like this in mind.
>
> I'll try a circular buffer implementation, with fastpath.

I started experimenting with a sorted array based implementation on
Tuesday but got carried away with other stuff. I now got back to that
and cleaned it up.

How does the attached patch look like? It's probably similar to what you
had in mind.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 88169b4..4a04051 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -64,8 +64,10 @@ typedef struct ProcArrayStruct
     int            numProcs;        /* number of valid procs entries */
     int            maxProcs;        /* allocated size of procs array */

-    int            numKnownAssignedXids;    /* current number of known assigned
-                                         * xids */
+    int            firstKnownAssigned;        /* location of first valid known
+                                         * assigned xid in the array */
+    int            lastKnownAssigned;        /* location of last valid known
+                                         * assigned xid in the array */
     int            maxKnownAssignedXids;    /* allocated size of known assigned
                                          * xids */

@@ -87,7 +89,8 @@ static ProcArrayStruct *procArray;
 /*
  * Bookkeeping for tracking emulated transactions in recovery
  */
-static HTAB *KnownAssignedXidsHash;
+static TransactionId *KnownAssignedXidsArray;
+static bool *KnownAssignedXidsValidArray;
 static TransactionId latestObservedXid = InvalidTransactionId;

 /*
@@ -201,28 +204,33 @@ CreateSharedProcArray(void)
         /* Normal processing */
         procArray->numProcs = 0;
         procArray->maxProcs = PROCARRAY_MAXPROCS;
-        procArray->numKnownAssignedXids = 0;
         procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
+        procArray->firstKnownAssignedXid = 0;
+        procArray->lastKnownAssignedXid = 0;
         procArray->lastOverflowedXid = InvalidTransactionId;
     }

     if (XLogRequestRecoveryConnections)
     {
-        /* Create or attach to the KnownAssignedXids hash table */
-        HASHCTL        info;
-
-        MemSet(&info, 0, sizeof(info));
-        info.keysize = sizeof(TransactionId);
-        info.entrysize = sizeof(TransactionId);
-        info.hash = tag_hash;
-
-        KnownAssignedXidsHash = ShmemInitHash("KnownAssignedXids Hash",
-                                              TOTAL_MAX_CACHED_SUBXIDS,
-                                              TOTAL_MAX_CACHED_SUBXIDS,
-                                              &info,
-                                              HASH_ELEM | HASH_FUNCTION);
-        if (!KnownAssignedXidsHash)
-            elog(FATAL, "could not initialize known assigned xids hash table");
+        Size size;
+        /* Create or attach to the KnownAssignedXids arrays */
+        size = mul_size(sizeof(TransactionId), TOTAL_MAX_CACHED_SUBXIDS);
+        KnownAssignedXidsArray = ShmemInitStruct("KnownAssignedXidsArray",
+                                                 size,
+                                                 &found);
+        if (!KnownAssignedXidsArray)
+            elog(FATAL, "could not initialize known assigned xids array");
+        if (!found)
+            MemSet(KnownAssignedXidsArray, 0, size);
+
+        size = mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS);
+        KnownAssignedXidsValidArray = ShmemInitStruct("KnownAssignedXidsValidArray",
+                                                      size,
+                                                      &found);
+        if (!KnownAssignedXidsValidArray)
+            elog(FATAL, "could not initialize known assigned xids used array");
+        if (!found)
+            MemSet(KnownAssignedXidsValidArray, 0, size);
     }
 }

@@ -2162,7 +2170,7 @@ DisplayXidCache(void)
  *
  * During recovery we do not fret too much about the distinction between
  * top-level xids and subtransaction xids. We hold both together in
- * a hash table called KnownAssignedXids. In backends, this is copied into
+ * an array called KnownAssignedXids. In backends, this is copied into
  * snapshots in GetSnapshotData(), taking advantage
  * of the fact that XidInMVCCSnapshot() doesn't care about the distinction
  * either. Subtransaction xids are effectively treated as top-level xids
@@ -2207,7 +2215,7 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
      * We can see WAL records before the running-xacts snapshot that contain
      * XIDs that are not in the running-xacts snapshot, but that we know to
      * have finished before the running-xacts snapshot was taken. Don't waste
-     * precious shared memory by keeping them in the hash table.
+     * precious shared memory by keeping them in the array.
      *
      * We can also see WAL records before the running-xacts snapshot that
      * contain XIDs that are not in the running-xacts snapshot for a different
@@ -2330,24 +2338,30 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
 /*
  * Private module functions to manipulate KnownAssignedXids
  *
- * There are 3 main users of the KnownAssignedXids data structure:
+ * We use a fixed-size sorted array in shared memory to keep track of XIDs
+ * that we consider as in-progress in the master at this time. This data
+ * structure is called KnownAssignedXids, and it has 4 main users:
  *
- *     * backends taking snapshots
+ *     * backends taking snapshots, copying all XIDs in the array
+ *     * backends checking if an XID exists in the array
  *     * startup process adding new knownassigned xids
  *     * startup process removing xids as transactions end
  *
- * If we make KnownAssignedXids a simple sorted array then the first two
- * operations are fast, but the last one is at least O(N). If we make
- * KnownAssignedXids a hash table then the last two operations are fast,
- * though we have to do more work at snapshot time. Doing more work at
- * commit could slow down taking snapshots anyway because of lwlock
- * contention. Scanning the hash table is O(N) on the max size of the array,
- * so performs poorly in comparison when we have very low numbers of
- * write transactions to process. But at least it is constant overhead
- * and a sequential memory scan will utilise hardware memory readahead
- * to give much improved performance. In any case the emphasis must be on
- * having the standby process changes quickly so that it can provide
- * high availability. So we choose to implement as a hash table.
+ * Typically, there is only a few entries in the array, compared to the
+ * maximum size, so we keep track of the first and last valid entry in the
+ * array to make scanning it quick. That also makes it quick to add entries
+ * to the end. XIDs are assigned in monotonically increasing order, so new
+ * entries always go to the end.
+ *
+ * When an entry is removed, it's merely marked as invalid, but left in
+ * place in the array. There is a second array of booleans,
+ * KnownAssignedXidsValidArray, which keeps track of which entries in the
+ * KnownAssignedXidsArray are valid.
+ *
+ * Because we leave the entry in place when an XID is marked as removed, the
+ * array will eventually fill up. When an entry needs to be added and there
+ * is no room for it, the array is compacted by copying all valid entries to
+ * the beginning of the array, removing all invalid entries.
  */

 /*
@@ -2358,41 +2372,49 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
 static void
 KnownAssignedXidsAdd(TransactionId *xids, int nxids)
 {
-    TransactionId *result;
-    bool        found;
     int            i;

     for (i = 0; i < nxids; i++)
     {
-        Assert(TransactionIdIsValid(xids[i]));
+        TransactionId xid = xids[i];

-        elog(trace_recovery(DEBUG4), "adding KnownAssignedXid %u", xids[i]);
+        Assert(TransactionIdIsValid(xid));

-        procArray->numKnownAssignedXids++;
-        if (procArray->numKnownAssignedXids > procArray->maxKnownAssignedXids)
-        {
-            KnownAssignedXidsDisplay(LOG);
-            LWLockRelease(ProcArrayLock);
-            elog(ERROR, "too many KnownAssignedXids (%u)", procArray->maxKnownAssignedXids);
-        }
+        elog(trace_recovery(DEBUG4), "adding KnownAssignedXid %u", xid);

-        result = (TransactionId *) hash_search(KnownAssignedXidsHash, &xids[i], HASH_ENTER,
-                                               &found);
+        Assert(procArray->lastKnownAssignedXid == 0 ||
+               TransactionIdFollows(xid, KnownAssignedXidsArray[procArray->lastKnownAssignedXid - 1]));

-        if (!result)
+        /* Compact if necessary */
+        if (procArray->lastKnownAssignedXid == procArray->maxKnownAssignedXids)
         {
-            LWLockRelease(ProcArrayLock);
-            ereport(ERROR,
-                    (errcode(ERRCODE_OUT_OF_MEMORY),
-                     errmsg("out of shared memory")));
-        }
+            int j;
+            int k;

-        if (found)
-        {
-            KnownAssignedXidsDisplay(LOG);
-            LWLockRelease(ProcArrayLock);
-            elog(ERROR, "found duplicate KnownAssignedXid %u", xids[i]);
+            k = 0;
+            for (j = procArray->firstKnownAssignedXid; j < procArray->lastKnownAssignedXid; j++)
+            {
+                if (KnownAssignedXidsValidArray[j])
+                {
+                    KnownAssignedXidsArray[k] = KnownAssignedXidsArray[j];
+                    KnownAssignedXidsValidArray[k] = true;
+                    k++;
+                }
+            }
+            procArray->firstKnownAssignedXid = 0;
+            procArray->lastKnownAssignedXid = k;
+
+            if (procArray->lastKnownAssignedXid == procArray->maxKnownAssignedXids)
+            {
+                KnownAssignedXidsDisplay(LOG);
+                LWLockRelease(ProcArrayLock);
+                elog(ERROR, "too many KnownAssignedXids (%u)", procArray->maxKnownAssignedXids);
+            }
         }
+
+        KnownAssignedXidsArray[procArray->lastKnownAssignedXid] = xid;
+        KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid] = true;
+        procArray->lastKnownAssignedXid++;
     }
 }

@@ -2404,10 +2426,21 @@ KnownAssignedXidsAdd(TransactionId *xids, int nxids)
 static bool
 KnownAssignedXidsExist(TransactionId xid)
 {
-    bool        found;
+    int low = procArray->firstKnownAssignedXid;
+    int high = procArray->lastKnownAssignedXid - 1;

-    (void) hash_search(KnownAssignedXidsHash, &xid, HASH_FIND, &found);
-    return found;
+    while (low <= high)
+    {
+        int middle = low + (high - low) / 2;
+
+        if (xid == KnownAssignedXidsArray[middle])
+            return true;
+        else if (xid > KnownAssignedXidsArray[middle])
+            low = middle + 1;
+        else
+            high = middle - 1;
+    }
+    return false;
 }

 /*
@@ -2418,17 +2451,37 @@ KnownAssignedXidsExist(TransactionId xid)
 static void
 KnownAssignedXidsRemove(TransactionId xid)
 {
-    bool        found;
+    int low = procArray->firstKnownAssignedXid;
+    int high = procArray->lastKnownAssignedXid - 1;

     Assert(TransactionIdIsValid(xid));

     elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);

-    (void) hash_search(KnownAssignedXidsHash, &xid, HASH_REMOVE, &found);
-
-    if (found)
-        procArray->numKnownAssignedXids--;
-    Assert(procArray->numKnownAssignedXids >= 0);
+    while (low <= high)
+    {
+        int middle = low + (high - low) / 2;
+
+        if (xid == KnownAssignedXidsArray[middle])
+        {
+            KnownAssignedXidsValidArray[middle] = false;
+            if (middle == procArray->lastKnownAssignedXid - 1)
+            {
+                while (procArray->lastKnownAssignedXid >= 0 &&
!KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid- 1]) 
+                    procArray->lastKnownAssignedXid--;
+            }
+            if (middle == procArray->firstKnownAssignedXid)
+            {
+                while (procArray->firstKnownAssignedXid < procArray->lastKnownAssignedXid &&
!KnownAssignedXidsValidArray[procArray->firstKnownAssignedXid])
+                    procArray->firstKnownAssignedXid++;
+            }
+            return;
+        }
+        else if (xid > KnownAssignedXidsArray[middle])
+            low = middle + 1;
+        else
+            high = middle - 1;
+    }

     /*
      * We can fail to find an xid if the xid came from a subtransaction that
@@ -2466,26 +2519,30 @@ static int
 KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
                                TransactionId xmax)
 {
-    HASH_SEQ_STATUS status;
-    TransactionId *knownXid;
     int            count = 0;
+    int            i;

-    hash_seq_init(&status, KnownAssignedXidsHash);
-    while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
+    for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
     {
+        TransactionId xid = KnownAssignedXidsArray[i];
+
+        if (!KnownAssignedXidsValidArray[i])
+            continue;
+
         /*
-         * Filter out anything higher than xmax
+         * Filter out anything higher than xmax. The array is sorted so
+         * we can stop as soon as we find one that's too big
          */
-        if (TransactionIdPrecedes(xmax, *knownXid))
-            continue;
+        if (TransactionIdPrecedes(xmax, xid))
+            break;

-        *xarray = *knownXid;
+        *xarray = xid;
         xarray++;
         count++;

         /* update xmin if required */
-        if (TransactionIdPrecedes(*knownXid, *xmin))
-            *xmin = *knownXid;
+        if (TransactionIdPrecedes(xid, *xmin))
+            *xmin = xid;
     }

     return count;
@@ -2500,34 +2557,34 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
 static void
 KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts)
 {
-    TransactionId *knownXid;
-    HASH_SEQ_STATUS status;
+    int i;

-    if (TransactionIdIsValid(xid))
-        elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", xid);
-    else
+    if (!TransactionIdIsValid(xid))
+    {
         elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
+        procArray->firstKnownAssignedXid = procArray->lastKnownAssignedXid = 0;
+        return;
+    }
+    elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", xid);

-    hash_seq_init(&status, KnownAssignedXidsHash);
-    while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
+    for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
     {
-        TransactionId removeXid = *knownXid;
-        bool        found;
+        TransactionId removeXid = KnownAssignedXidsArray[i];
+        if (!KnownAssignedXidsValidArray[i])
+            continue;

         if (!TransactionIdIsValid(xid) || TransactionIdPrecedes(removeXid, xid))
         {
             if (keepPreparedXacts && StandbyTransactionIdIsPrepared(removeXid))
                 continue;
-            else
-            {
-                (void) hash_search(KnownAssignedXidsHash, &removeXid,
-                                   HASH_REMOVE, &found);
-                if (found)
-                    procArray->numKnownAssignedXids--;
-                Assert(procArray->numKnownAssignedXids >= 0);
-            }
+            KnownAssignedXidsValidArray[i] = false;
         }
     }
+    while (procArray->lastKnownAssignedXid >= 0 && !KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid - 1])
+        procArray->lastKnownAssignedXid--;
+
+    while (procArray->firstKnownAssignedXid < procArray->lastKnownAssignedXid &&
!KnownAssignedXidsValidArray[procArray->firstKnownAssignedXid])
+        procArray->firstKnownAssignedXid++;
 }

 /*
@@ -2538,26 +2595,21 @@ KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts)
 static void
 KnownAssignedXidsDisplay(int trace_level)
 {
-    HASH_SEQ_STATUS status;
-    TransactionId *knownXid;
     StringInfoData buf;
-    TransactionId *xids;
     int            nxids;
     int            i;

-    xids = palloc(sizeof(TransactionId) * TOTAL_MAX_CACHED_SUBXIDS);
-    nxids = 0;
-
-    hash_seq_init(&status, KnownAssignedXidsHash);
-    while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
-        xids[nxids++] = *knownXid;
-
-    qsort(xids, nxids, sizeof(TransactionId), xidComparator);
-
     initStringInfo(&buf);

-    for (i = 0; i < nxids; i++)
-        appendStringInfo(&buf, "%u ", xids[i]);
+    nxids = 0;
+    for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
+    {
+        if (KnownAssignedXidsValidArray[i])
+        {
+            appendStringInfo(&buf, "%u ", KnownAssignedXidsArray[i]);
+            nxids++;
+        }
+    }

     elog(trace_level, "%d KnownAssignedXids %s", nxids, buf.data);

diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index c5c1228..eee0d58 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -203,7 +203,7 @@
  * Enable debugging print statements for WAL-related operations; see
  * also the wal_debug GUC var.
  */
-/* #define WAL_DEBUG */
+#define WAL_DEBUG

 /*
  * Enable tracing of resource consumption during sort operations;

pgsql-hackers by date:

Previous
From: Peter Eisentraut
Date:
Subject: Re: pgindent and tabs in comments
Next
From: Heikki Linnakangas
Date:
Subject: Re: Remaining Streaming Replication Open Items