Re: Protect syscache from bloating with negative cache entries - Mailing list pgsql-hackers

From Kyotaro Horiguchi
Subject Re: Protect syscache from bloating with negative cache entries
Date
Msg-id 20201109.183447.202900641623450560.horikyota.ntt@gmail.com
Whole thread Raw
In response to Re: Protect syscache from bloating with negative cache entries  (Heikki Linnakangas <hlinnaka@iki.fi>)
Responses Re: Protect syscache from bloating with negative cache entries  (Heikki Linnakangas <hlinnaka@iki.fi>)
List pgsql-hackers
At Fri, 6 Nov 2020 10:42:15 +0200, Heikki Linnakangas <hlinnaka@iki.fi> wrote in 
> Do you need the "ntaccess == 2" test? You could always increment the
> counter, and in the code that uses ntaccess to decide what to evict,
> treat all values >= 2 the same.
> 
> Need to handle integer overflow somehow. Or maybe not: integer
> overflow is so infrequent that even if a hot syscache entry gets
> evicted prematurely because its ntaccess count wrapped around to 0, it
> will happen so rarely that it won't make any difference in practice.

That relaxing simplifies the code significantly, but a significant
degradation by about 5% still exists.

(SearchCatCacheInternal())
 +     ct->naccess++;
!+     ct->lastaccess = catcacheclock;

If I removed the second line above, the degradation disappears
(-0.7%). However, I don't find the corresponding numbers in the output
of perf. The sum of the numbers for the removed instructions is (0.02
+ 0.28 = 0.3%).  I don't think the degradation as the whole doesn't
always reflect to the instruction level profiling, but I'm stuck here,
anyway.

     % samples
master  p2    patched    (p2 = patched - "ct->lastaccess = catcacheclock)
=============================================================================
 0.47 | 0.27 |  0.17 |       mov   %rbx,0x8(%rbp)
      |      |       |     SearchCatCacheInternal():
      |      |       |     ct->naccess++;
      |      |       |     ct->lastaccess = catcacheclock;
----- |----- |  0.02 |10f:   mov   catcacheclock,%rax
      |      |       |     ct->naccess++;
----- | 0.96 |  1.00 |       addl  $0x1,0x14(%rbx)
      |      |       |     return NULL;
----- | 0.11 |  0.16 |       xor   %ebp,%ebp
      |      |       |     if (!ct->negative)
 0.27 | 0.30 |  0.03 |       cmpb  $0x0,0x21(%rbx)
      |      |       |     ct->lastaccess = catcacheclock;
----- | ---- |  0.28 |       mov   %rax,0x18(%rbx)
      |      |       |     if (!ct->negative)
 0.34 | 0.08 |  0.59 |     ↓ jne   149




For your information, the same table for a bit wider range follows.

     % samples
master  p2    patched    (p2 = patched - "ct->lastaccess = catcacheclock)
=============================================================================
      |      |       |     dlist_foreach(iter, bucket)
 6.91 | 7.06 |  5.89 |       mov   0x8(%rbp),%rbx
 0.78 | 0.73 |  0.81 |       test  %rbx,%rbx
      |      |       |     ↓ je    160
      |      |       |       cmp   %rbx,%rbp
 0.46 | 0.52 |  0.39 |     ↓ jne   9d
      |      |       |     ↓ jmpq  160
      |      |       |       nop
 5.68 | 5.54 |  6.03 | 90:   mov   0x8(%rbx),%rbx
 1.44 | 1.42 |  1.43 |       cmp   %rbx,%rbp
      |      |       |     ↓ je    160
      |      |       |     {
      |      |       |     ct = dlist_container(CatCTup, cache_elem, iter.cur);
      |      |       |
      |      |       |     if (ct->dead)
30.36 |30.97 | 31.48 | 9d:   cmpb  $0x0,0x20(%rbx)
 2.63 | 2.60 |  2.69 |     ↑ jne   90
      |      |       |     continue;                       /* ignore dead entries */
      |      |       |
      |      |       |     if (ct->hash_value != hashValue)
 1.41 | 1.37 |  1.35 |       cmp   -0x24(%rbx),%edx
 3.19 | 2.97 |  2.87 |     ↑ jne   90
 7.17 | 5.53 |  6.89 |       mov   %r13,%rsi
 0.02 | 0.04 |  0.04 |       xor   %r12d,%r12d
 3.00 | 2.98 |  2.95 |     ↓ jmp   b5
 0.15 | 0.61 |  0.20 | b0:   mov   0x10(%rsp,%r12,1),%rsi
 6.58 | 5.04 |  5.95 | b5:   mov   %ecx,0xc(%rsp)
      |      |       |     CatalogCacheCompareTuple():
      |      |       |     if (!(cc_fastequal[i]) (cachekeys[i], searchkeys[i]))
 1.51 | 0.92 |  1.66 |       mov   -0x20(%rbx,%r12,1),%rdi
 0.54 | 1.64 |  0.58 |       mov   %edx,0x8(%rsp)
 3.78 | 3.11 |  3.86 |     → callq *0x38(%r14,%r12,1)
 0.43 | 2.30 |  0.34 |       mov   0x8(%rsp),%edx
 0.20 | 0.94 |  0.25 |       mov   0xc(%rsp),%ecx
 0.44 | 0.41 |  0.44 |       test  %al,%al
      |      |       |     ↑ je    90
      |      |       |     for (i = 0; i < nkeys; i++)
 2.28 | 1.07 |  2.26 |       add   $0x8,%r12
 0.08 | 0.23 |  0.07 |       cmp   $0x18,%r12
 0.11 | 0.64 |  0.10 |     ↑ jne   b0
      |      |       |     dlist_move_head():
      |      |       |     */
      |      |       |     static inline void
      |      |       |     dlist_move_head(dlist_head *head, dlist_node *node)
      |      |       |     {
      |      |       |     /* fast path if it's already at the head */
      |      |       |     if (head->head.next == node)
 0.08 | 0.61 |  0.04 |       cmp   0x8(%rbp),%rbx
 0.02 | 0.10 |  0.00 |     ↓ je    10f
      |      |       |     return;
      |      |       |
      |      |       |     dlist_delete(node);
 0.01 | 0.20 |  0.06 |       mov   0x8(%rbx),%rax
      |      |       |     dlist_delete():
      |      |       |     node->prev->next = node->next;
 0.75 | 0.13 |  0.72 |       mov   (%rbx),%rdx
 2.89 | 3.42 |  2.22 |       mov   %rax,0x8(%rdx)
      |      |       |     node->next->prev = node->prev;
 0.01 | 0.09 |  0.00 |       mov   (%rbx),%rdx
 0.04 | 0.62 |  0.58 |       mov   %rdx,(%rax)
      |      |       |     dlist_push_head():
      |      |       |     if (head->head.next == NULL)    /* convert NULL header to circular */
 0.31 | 0.08 |  0.28 |       mov   0x8(%rbp),%rax
 0.55 | 0.44 |  0.28 |       test  %rax,%rax
      |      |       |     ↓ je    180
      |      |       |     node->next = head->head.next;
 0.00 | 0.08 |  0.06 |101:   mov   %rax,0x8(%rbx)
      |      |       |     node->prev = &head->head;
 0.17 | 0.73 |  0.37 |       mov   %rbp,(%rbx)
      |      |       |     node->next->prev = node;
 0.34 | 0.08 |  1.13 |       mov   %rbx,(%rax)
      |      |       |     head->head.next = node;
 0.47 | 0.27 |  0.17 |       mov   %rbx,0x8(%rbp)
      |      |       |     SearchCatCacheInternal():
      |      |       |     ct->naccess++;
      |      |       |     ct->lastaccess = catcacheclock;
----- |----- |  0.02 |10f:   mov   catcacheclock,%rax
      |      |       |     ct->naccess++;
----- | 0.96 |  1.00 |       addl  $0x1,0x14(%rbx)
      |      |       |     return NULL;
----- | 0.11 |  0.16 |       xor   %ebp,%ebp
      |      |       |     if (!ct->negative)
 0.27 | 0.30 |  0.03 |       cmpb  $0x0,0x21(%rbx)
      |      |       |     ct->lastaccess = catcacheclock;
----- | ---- |  0.28 |       mov   %rax,0x18(%rbx)
      |      |       |     if (!ct->negative)
 0.34 | 0.08 |  0.59 |     ↓ jne   149

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
From 498a55ff07f19646ca09034dfdc4c68459a74855 Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com>
Date: Fri, 6 Nov 2020 17:27:18 +0900
Subject: [PATCH v5] CatCache expiration feature

---
 src/backend/access/transam/xact.c  |   3 +
 src/backend/utils/cache/catcache.c | 118 +++++++++++++++++++++++++++++
 src/backend/utils/misc/guc.c       |  12 +++
 src/include/utils/catcache.h       |  20 +++++
 4 files changed, 153 insertions(+)

diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index af6afcebb1..a246fcc4c0 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -1086,6 +1086,9 @@ static void
 AtStart_Cache(void)
 {
     AcceptInvalidationMessages();
+
+    if (xactStartTimestamp != 0)
+        SetCatCacheClock(xactStartTimestamp);
 }
 
 /*
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 3613ae5f44..b457fed7ab 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -38,6 +38,7 @@
 #include "utils/rel.h"
 #include "utils/resowner_private.h"
 #include "utils/syscache.h"
+#include "utils/timestamp.h"
 
 
  /* #define CACHEDEBUG */    /* turns DEBUG elogs on */
@@ -60,9 +61,18 @@
 #define CACHE_elog(...)
 #endif
 
+/*
+ * GUC variable to define the minimum age of entries that will be considered
+ * to be evicted in seconds. -1 to disable the feature.
+ */
+int catalog_cache_prune_min_age = -1;
+
 /* Cache management header --- pointer is NULL until created */
 static CatCacheHeader *CacheHdr = NULL;
 
+/* Clock for the last accessed time of a catcache entry. */
+TimestampTz    catcacheclock = 0;
+
 static inline HeapTuple SearchCatCacheInternal(CatCache *cache,
                                                int nkeys,
                                                Datum v1, Datum v2,
@@ -74,6 +84,7 @@ static pg_noinline HeapTuple SearchCatCacheMiss(CatCache *cache,
                                                 Index hashIndex,
                                                 Datum v1, Datum v2,
                                                 Datum v3, Datum v4);
+static bool CatCacheCleanupOldEntries(CatCache *cp);
 
 static uint32 CatalogCacheComputeHashValue(CatCache *cache, int nkeys,
                                            Datum v1, Datum v2, Datum v3, Datum v4);
@@ -99,6 +110,12 @@ static void CatCacheFreeKeys(TupleDesc tupdesc, int nkeys, int *attnos,
 static void CatCacheCopyKeys(TupleDesc tupdesc, int nkeys, int *attnos,
                              Datum *srckeys, Datum *dstkeys);
 
+/* GUC assign function */
+void
+assign_catalog_cache_prune_min_age(int newval, void *extra)
+{
+    catalog_cache_prune_min_age = newval;
+}
 
 /*
  *                    internal support functions
@@ -863,6 +880,10 @@ RehashCatCache(CatCache *cp)
     int            newnbuckets;
     int            i;
 
+    /* try removing old entries before expanding hash */
+    if (CatCacheCleanupOldEntries(cp))
+        return;
+
     elog(DEBUG1, "rehashing catalog cache id %d for %s; %d tups, %d buckets",
          cp->id, cp->cc_relname, cp->cc_ntup, cp->cc_nbuckets);
 
@@ -1264,6 +1285,16 @@ SearchCatCacheInternal(CatCache *cache,
          */
         dlist_move_head(bucket, &ct->cache_elem);
 
+        /*
+         * Prolong life of this entry. Since we want run as less instructions
+         * as possible and want the branch be stable for performance reasons,
+         * we don't care of wrap-around and possible false-negative for old
+         * entries. The window is quite narrow and the counter doesn't gets so
+         * large while expiration is active.
+         */
+        ct->naccess++;
+        ct->lastaccess = catcacheclock;
+
         /*
          * If it's a positive entry, bump its refcount and return it. If it's
          * negative, we can report failure to the caller.
@@ -1425,6 +1456,91 @@ SearchCatCacheMiss(CatCache *cache,
     return &ct->tuple;
 }
 
+/*
+ * CatCacheCleanupOldEntries - Remove infrequently-used entries
+ *
+ * Catcache entries happen to be left unused for a long time for several
+ * reasons. Remove such entries to prevent catcache from bloating. It is based
+ * on the similar algorithm with buffer eviction. Entries that are accessed
+ * several times in a certain period live longer than those that have had less
+ * access in the same duration.
+ */
+static bool
+CatCacheCleanupOldEntries(CatCache *cp)
+{
+    int        nremoved = 0;
+    int        i;
+    long    oldest_ts = catcacheclock;
+    long    age;
+    int        us;
+
+    /* Return immediately if disabled */
+    if (likely(catalog_cache_prune_min_age < 0))
+        return false;
+
+    /* Don't scan the hash when we know we don't have prunable entries */
+    TimestampDifference(cp->cc_oldest_ts, catcacheclock, &age, &us);
+    if (age < catalog_cache_prune_min_age)
+        return false;
+
+    /* Scan over the whole hash to find entries to remove */
+    for (i = 0 ; i < cp->cc_nbuckets ; i++)
+    {
+        dlist_mutable_iter    iter;
+
+        dlist_foreach_modify(iter, &cp->cc_bucket[i])
+        {
+            CatCTup    *ct = dlist_container(CatCTup, cache_elem, iter.cur);
+
+            /* Don't remove referenced entries */
+            if (ct->refcount == 0 &&
+                (ct->c_list == NULL || ct->c_list->refcount == 0))
+            {
+                /*
+                 * Calculate the duration from the time from the last access
+                 * to the "current" time. catcacheclock is updated
+                 * per-statement basis and additionaly udpated periodically
+                 * during a long running query.
+                 */
+                TimestampDifference(ct->lastaccess, catcacheclock, &age, &us);
+
+                if (age > catalog_cache_prune_min_age)
+                {
+                    /*
+                     * Entries that are not accessed after the last pruning
+                     * are removed in that seconds, and their lives are
+                     * prolonged according to how many times they are accessed
+                     * up to three times of the duration. We don't try shrink
+                     * buckets since pruning effectively caps catcache
+                     * expansion in the long term.
+                     */
+                    ct->naccess = Min(2, ct->naccess);
+                    if (--ct->naccess == 0)
+                    {
+                        CatCacheRemoveCTup(cp, ct);
+                        nremoved++;
+
+                        /* don't update oldest_ts by removed entry */
+                        continue;
+                    }
+                }
+            }
+
+            /* update oldest timestamp if the entry remains alive */
+            if (ct->lastaccess < oldest_ts)
+                oldest_ts = ct->lastaccess;
+        }
+    }
+
+    cp->cc_oldest_ts = oldest_ts;
+
+    if (nremoved > 0)
+        elog(DEBUG1, "pruning catalog cache id=%d for %s: removed %d / %d",
+             cp->id, cp->cc_relname, nremoved, cp->cc_ntup + nremoved);
+
+    return nremoved > 0;
+}
+
 /*
  *    ReleaseCatCache
  *
@@ -1888,6 +2004,8 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
     ct->dead = false;
     ct->negative = negative;
     ct->hash_value = hashValue;
+    ct->naccess = 1;
+    ct->lastaccess = catcacheclock;
 
     dlist_push_head(&cache->cc_bucket[hashIndex], &ct->cache_elem);
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index bb34630e8e..95213853aa 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -88,6 +88,7 @@
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
+#include "utils/catcache.h"
 #include "utils/float.h"
 #include "utils/guc_tables.h"
 #include "utils/memutils.h"
@@ -3399,6 +3400,17 @@ static struct config_int ConfigureNamesInt[] =
         check_huge_page_size, NULL, NULL
     },
 
+    {
+        {"catalog_cache_prune_min_age", PGC_USERSET, RESOURCES_MEM,
+            gettext_noop("System catalog cache entries that are living unused more than this seconds are considered
forremoval."),
 
+            gettext_noop("The value of -1 turns off pruning."),
+            GUC_UNIT_S
+        },
+        &catalog_cache_prune_min_age,
+        -1, -1, INT_MAX,
+        NULL, assign_catalog_cache_prune_min_age, NULL
+    },
+
     /* End-of-list marker */
     {
         {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index f4aa316604..a11736f767 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -22,6 +22,7 @@
 
 #include "access/htup.h"
 #include "access/skey.h"
+#include "datatype/timestamp.h"
 #include "lib/ilist.h"
 #include "utils/relcache.h"
 
@@ -61,6 +62,7 @@ typedef struct catcache
     slist_node    cc_next;        /* list link */
     ScanKeyData cc_skey[CATCACHE_MAXKEYS];    /* precomputed key info for heap
                                              * scans */
+    TimestampTz    cc_oldest_ts;    /* timestamp of the oldest tuple in the hash */
 
     /*
      * Keep these at the end, so that compiling catcache.c with CATCACHE_STATS
@@ -119,6 +121,8 @@ typedef struct catctup
     bool        dead;            /* dead but not yet removed? */
     bool        negative;        /* negative cache entry? */
     HeapTupleData tuple;        /* tuple management header */
+    unsigned int naccess;        /* # of access to this entry */
+    TimestampTz    lastaccess;        /* timestamp of the last usage */
 
     /*
      * The tuple may also be a member of at most one CatCList.  (If a single
@@ -189,6 +193,22 @@ typedef struct catcacheheader
 /* this extern duplicates utils/memutils.h... */
 extern PGDLLIMPORT MemoryContext CacheMemoryContext;
 
+
+/* for guc.c, not PGDLLPMPORT'ed */
+extern int catalog_cache_prune_min_age;
+
+/* source clock for access timestamp of catcache entries */
+extern TimestampTz catcacheclock;
+
+/* SetCatCacheClock - set catcache timestamp source clodk */
+static inline void
+SetCatCacheClock(TimestampTz ts)
+{
+    catcacheclock = ts;
+}
+
+extern void assign_catalog_cache_prune_min_age(int newval, void *extra);
+
 extern void CreateCacheMemoryContext(void);
 
 extern CatCache *InitCatCache(int id, Oid reloid, Oid indexoid,
-- 
2.18.4


pgsql-hackers by date:

Previous
From: Dilip Kumar
Date:
Subject: Re: logical streaming of xacts via test_decoding is broken
Next
From: Magnus Hagander
Date:
Subject: Re: Useless string ouput in error message