Re: Protect syscache from bloating with negative cache entries - Mailing list pgsql-hackers
From | Kyotaro Horiguchi |
---|---|
Subject | Re: Protect syscache from bloating with negative cache entries |
Date | |
Msg-id | 20201109.183447.202900641623450560.horikyota.ntt@gmail.com Whole thread Raw |
In response to | Re: Protect syscache from bloating with negative cache entries (Heikki Linnakangas <hlinnaka@iki.fi>) |
Responses |
Re: Protect syscache from bloating with negative cache entries
|
List | pgsql-hackers |
At Fri, 6 Nov 2020 10:42:15 +0200, Heikki Linnakangas <hlinnaka@iki.fi> wrote in > Do you need the "ntaccess == 2" test? You could always increment the > counter, and in the code that uses ntaccess to decide what to evict, > treat all values >= 2 the same. > > Need to handle integer overflow somehow. Or maybe not: integer > overflow is so infrequent that even if a hot syscache entry gets > evicted prematurely because its ntaccess count wrapped around to 0, it > will happen so rarely that it won't make any difference in practice. That relaxing simplifies the code significantly, but a significant degradation by about 5% still exists. (SearchCatCacheInternal()) + ct->naccess++; !+ ct->lastaccess = catcacheclock; If I removed the second line above, the degradation disappears (-0.7%). However, I don't find the corresponding numbers in the output of perf. The sum of the numbers for the removed instructions is (0.02 + 0.28 = 0.3%). I don't think the degradation as the whole doesn't always reflect to the instruction level profiling, but I'm stuck here, anyway. % samples master p2 patched (p2 = patched - "ct->lastaccess = catcacheclock) ============================================================================= 0.47 | 0.27 | 0.17 | mov %rbx,0x8(%rbp) | | | SearchCatCacheInternal(): | | | ct->naccess++; | | | ct->lastaccess = catcacheclock; ----- |----- | 0.02 |10f: mov catcacheclock,%rax | | | ct->naccess++; ----- | 0.96 | 1.00 | addl $0x1,0x14(%rbx) | | | return NULL; ----- | 0.11 | 0.16 | xor %ebp,%ebp | | | if (!ct->negative) 0.27 | 0.30 | 0.03 | cmpb $0x0,0x21(%rbx) | | | ct->lastaccess = catcacheclock; ----- | ---- | 0.28 | mov %rax,0x18(%rbx) | | | if (!ct->negative) 0.34 | 0.08 | 0.59 | ↓ jne 149 For your information, the same table for a bit wider range follows. % samples master p2 patched (p2 = patched - "ct->lastaccess = catcacheclock) ============================================================================= | | | dlist_foreach(iter, bucket) 6.91 | 7.06 | 5.89 | mov 0x8(%rbp),%rbx 0.78 | 0.73 | 0.81 | test %rbx,%rbx | | | ↓ je 160 | | | cmp %rbx,%rbp 0.46 | 0.52 | 0.39 | ↓ jne 9d | | | ↓ jmpq 160 | | | nop 5.68 | 5.54 | 6.03 | 90: mov 0x8(%rbx),%rbx 1.44 | 1.42 | 1.43 | cmp %rbx,%rbp | | | ↓ je 160 | | | { | | | ct = dlist_container(CatCTup, cache_elem, iter.cur); | | | | | | if (ct->dead) 30.36 |30.97 | 31.48 | 9d: cmpb $0x0,0x20(%rbx) 2.63 | 2.60 | 2.69 | ↑ jne 90 | | | continue; /* ignore dead entries */ | | | | | | if (ct->hash_value != hashValue) 1.41 | 1.37 | 1.35 | cmp -0x24(%rbx),%edx 3.19 | 2.97 | 2.87 | ↑ jne 90 7.17 | 5.53 | 6.89 | mov %r13,%rsi 0.02 | 0.04 | 0.04 | xor %r12d,%r12d 3.00 | 2.98 | 2.95 | ↓ jmp b5 0.15 | 0.61 | 0.20 | b0: mov 0x10(%rsp,%r12,1),%rsi 6.58 | 5.04 | 5.95 | b5: mov %ecx,0xc(%rsp) | | | CatalogCacheCompareTuple(): | | | if (!(cc_fastequal[i]) (cachekeys[i], searchkeys[i])) 1.51 | 0.92 | 1.66 | mov -0x20(%rbx,%r12,1),%rdi 0.54 | 1.64 | 0.58 | mov %edx,0x8(%rsp) 3.78 | 3.11 | 3.86 | → callq *0x38(%r14,%r12,1) 0.43 | 2.30 | 0.34 | mov 0x8(%rsp),%edx 0.20 | 0.94 | 0.25 | mov 0xc(%rsp),%ecx 0.44 | 0.41 | 0.44 | test %al,%al | | | ↑ je 90 | | | for (i = 0; i < nkeys; i++) 2.28 | 1.07 | 2.26 | add $0x8,%r12 0.08 | 0.23 | 0.07 | cmp $0x18,%r12 0.11 | 0.64 | 0.10 | ↑ jne b0 | | | dlist_move_head(): | | | */ | | | static inline void | | | dlist_move_head(dlist_head *head, dlist_node *node) | | | { | | | /* fast path if it's already at the head */ | | | if (head->head.next == node) 0.08 | 0.61 | 0.04 | cmp 0x8(%rbp),%rbx 0.02 | 0.10 | 0.00 | ↓ je 10f | | | return; | | | | | | dlist_delete(node); 0.01 | 0.20 | 0.06 | mov 0x8(%rbx),%rax | | | dlist_delete(): | | | node->prev->next = node->next; 0.75 | 0.13 | 0.72 | mov (%rbx),%rdx 2.89 | 3.42 | 2.22 | mov %rax,0x8(%rdx) | | | node->next->prev = node->prev; 0.01 | 0.09 | 0.00 | mov (%rbx),%rdx 0.04 | 0.62 | 0.58 | mov %rdx,(%rax) | | | dlist_push_head(): | | | if (head->head.next == NULL) /* convert NULL header to circular */ 0.31 | 0.08 | 0.28 | mov 0x8(%rbp),%rax 0.55 | 0.44 | 0.28 | test %rax,%rax | | | ↓ je 180 | | | node->next = head->head.next; 0.00 | 0.08 | 0.06 |101: mov %rax,0x8(%rbx) | | | node->prev = &head->head; 0.17 | 0.73 | 0.37 | mov %rbp,(%rbx) | | | node->next->prev = node; 0.34 | 0.08 | 1.13 | mov %rbx,(%rax) | | | head->head.next = node; 0.47 | 0.27 | 0.17 | mov %rbx,0x8(%rbp) | | | SearchCatCacheInternal(): | | | ct->naccess++; | | | ct->lastaccess = catcacheclock; ----- |----- | 0.02 |10f: mov catcacheclock,%rax | | | ct->naccess++; ----- | 0.96 | 1.00 | addl $0x1,0x14(%rbx) | | | return NULL; ----- | 0.11 | 0.16 | xor %ebp,%ebp | | | if (!ct->negative) 0.27 | 0.30 | 0.03 | cmpb $0x0,0x21(%rbx) | | | ct->lastaccess = catcacheclock; ----- | ---- | 0.28 | mov %rax,0x18(%rbx) | | | if (!ct->negative) 0.34 | 0.08 | 0.59 | ↓ jne 149 -- Kyotaro Horiguchi NTT Open Source Software Center From 498a55ff07f19646ca09034dfdc4c68459a74855 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horikyoga.ntt@gmail.com> Date: Fri, 6 Nov 2020 17:27:18 +0900 Subject: [PATCH v5] CatCache expiration feature --- src/backend/access/transam/xact.c | 3 + src/backend/utils/cache/catcache.c | 118 +++++++++++++++++++++++++++++ src/backend/utils/misc/guc.c | 12 +++ src/include/utils/catcache.h | 20 +++++ 4 files changed, 153 insertions(+) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index af6afcebb1..a246fcc4c0 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1086,6 +1086,9 @@ static void AtStart_Cache(void) { AcceptInvalidationMessages(); + + if (xactStartTimestamp != 0) + SetCatCacheClock(xactStartTimestamp); } /* diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 3613ae5f44..b457fed7ab 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -38,6 +38,7 @@ #include "utils/rel.h" #include "utils/resowner_private.h" #include "utils/syscache.h" +#include "utils/timestamp.h" /* #define CACHEDEBUG */ /* turns DEBUG elogs on */ @@ -60,9 +61,18 @@ #define CACHE_elog(...) #endif +/* + * GUC variable to define the minimum age of entries that will be considered + * to be evicted in seconds. -1 to disable the feature. + */ +int catalog_cache_prune_min_age = -1; + /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; +/* Clock for the last accessed time of a catcache entry. */ +TimestampTz catcacheclock = 0; + static inline HeapTuple SearchCatCacheInternal(CatCache *cache, int nkeys, Datum v1, Datum v2, @@ -74,6 +84,7 @@ static pg_noinline HeapTuple SearchCatCacheMiss(CatCache *cache, Index hashIndex, Datum v1, Datum v2, Datum v3, Datum v4); +static bool CatCacheCleanupOldEntries(CatCache *cp); static uint32 CatalogCacheComputeHashValue(CatCache *cache, int nkeys, Datum v1, Datum v2, Datum v3, Datum v4); @@ -99,6 +110,12 @@ static void CatCacheFreeKeys(TupleDesc tupdesc, int nkeys, int *attnos, static void CatCacheCopyKeys(TupleDesc tupdesc, int nkeys, int *attnos, Datum *srckeys, Datum *dstkeys); +/* GUC assign function */ +void +assign_catalog_cache_prune_min_age(int newval, void *extra) +{ + catalog_cache_prune_min_age = newval; +} /* * internal support functions @@ -863,6 +880,10 @@ RehashCatCache(CatCache *cp) int newnbuckets; int i; + /* try removing old entries before expanding hash */ + if (CatCacheCleanupOldEntries(cp)) + return; + elog(DEBUG1, "rehashing catalog cache id %d for %s; %d tups, %d buckets", cp->id, cp->cc_relname, cp->cc_ntup, cp->cc_nbuckets); @@ -1264,6 +1285,16 @@ SearchCatCacheInternal(CatCache *cache, */ dlist_move_head(bucket, &ct->cache_elem); + /* + * Prolong life of this entry. Since we want run as less instructions + * as possible and want the branch be stable for performance reasons, + * we don't care of wrap-around and possible false-negative for old + * entries. The window is quite narrow and the counter doesn't gets so + * large while expiration is active. + */ + ct->naccess++; + ct->lastaccess = catcacheclock; + /* * If it's a positive entry, bump its refcount and return it. If it's * negative, we can report failure to the caller. @@ -1425,6 +1456,91 @@ SearchCatCacheMiss(CatCache *cache, return &ct->tuple; } +/* + * CatCacheCleanupOldEntries - Remove infrequently-used entries + * + * Catcache entries happen to be left unused for a long time for several + * reasons. Remove such entries to prevent catcache from bloating. It is based + * on the similar algorithm with buffer eviction. Entries that are accessed + * several times in a certain period live longer than those that have had less + * access in the same duration. + */ +static bool +CatCacheCleanupOldEntries(CatCache *cp) +{ + int nremoved = 0; + int i; + long oldest_ts = catcacheclock; + long age; + int us; + + /* Return immediately if disabled */ + if (likely(catalog_cache_prune_min_age < 0)) + return false; + + /* Don't scan the hash when we know we don't have prunable entries */ + TimestampDifference(cp->cc_oldest_ts, catcacheclock, &age, &us); + if (age < catalog_cache_prune_min_age) + return false; + + /* Scan over the whole hash to find entries to remove */ + for (i = 0 ; i < cp->cc_nbuckets ; i++) + { + dlist_mutable_iter iter; + + dlist_foreach_modify(iter, &cp->cc_bucket[i]) + { + CatCTup *ct = dlist_container(CatCTup, cache_elem, iter.cur); + + /* Don't remove referenced entries */ + if (ct->refcount == 0 && + (ct->c_list == NULL || ct->c_list->refcount == 0)) + { + /* + * Calculate the duration from the time from the last access + * to the "current" time. catcacheclock is updated + * per-statement basis and additionaly udpated periodically + * during a long running query. + */ + TimestampDifference(ct->lastaccess, catcacheclock, &age, &us); + + if (age > catalog_cache_prune_min_age) + { + /* + * Entries that are not accessed after the last pruning + * are removed in that seconds, and their lives are + * prolonged according to how many times they are accessed + * up to three times of the duration. We don't try shrink + * buckets since pruning effectively caps catcache + * expansion in the long term. + */ + ct->naccess = Min(2, ct->naccess); + if (--ct->naccess == 0) + { + CatCacheRemoveCTup(cp, ct); + nremoved++; + + /* don't update oldest_ts by removed entry */ + continue; + } + } + } + + /* update oldest timestamp if the entry remains alive */ + if (ct->lastaccess < oldest_ts) + oldest_ts = ct->lastaccess; + } + } + + cp->cc_oldest_ts = oldest_ts; + + if (nremoved > 0) + elog(DEBUG1, "pruning catalog cache id=%d for %s: removed %d / %d", + cp->id, cp->cc_relname, nremoved, cp->cc_ntup + nremoved); + + return nremoved > 0; +} + /* * ReleaseCatCache * @@ -1888,6 +2004,8 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, ct->dead = false; ct->negative = negative; ct->hash_value = hashValue; + ct->naccess = 1; + ct->lastaccess = catcacheclock; dlist_push_head(&cache->cc_bucket[hashIndex], &ct->cache_elem); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index bb34630e8e..95213853aa 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -88,6 +88,7 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/bytea.h" +#include "utils/catcache.h" #include "utils/float.h" #include "utils/guc_tables.h" #include "utils/memutils.h" @@ -3399,6 +3400,17 @@ static struct config_int ConfigureNamesInt[] = check_huge_page_size, NULL, NULL }, + { + {"catalog_cache_prune_min_age", PGC_USERSET, RESOURCES_MEM, + gettext_noop("System catalog cache entries that are living unused more than this seconds are considered forremoval."), + gettext_noop("The value of -1 turns off pruning."), + GUC_UNIT_S + }, + &catalog_cache_prune_min_age, + -1, -1, INT_MAX, + NULL, assign_catalog_cache_prune_min_age, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index f4aa316604..a11736f767 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -22,6 +22,7 @@ #include "access/htup.h" #include "access/skey.h" +#include "datatype/timestamp.h" #include "lib/ilist.h" #include "utils/relcache.h" @@ -61,6 +62,7 @@ typedef struct catcache slist_node cc_next; /* list link */ ScanKeyData cc_skey[CATCACHE_MAXKEYS]; /* precomputed key info for heap * scans */ + TimestampTz cc_oldest_ts; /* timestamp of the oldest tuple in the hash */ /* * Keep these at the end, so that compiling catcache.c with CATCACHE_STATS @@ -119,6 +121,8 @@ typedef struct catctup bool dead; /* dead but not yet removed? */ bool negative; /* negative cache entry? */ HeapTupleData tuple; /* tuple management header */ + unsigned int naccess; /* # of access to this entry */ + TimestampTz lastaccess; /* timestamp of the last usage */ /* * The tuple may also be a member of at most one CatCList. (If a single @@ -189,6 +193,22 @@ typedef struct catcacheheader /* this extern duplicates utils/memutils.h... */ extern PGDLLIMPORT MemoryContext CacheMemoryContext; + +/* for guc.c, not PGDLLPMPORT'ed */ +extern int catalog_cache_prune_min_age; + +/* source clock for access timestamp of catcache entries */ +extern TimestampTz catcacheclock; + +/* SetCatCacheClock - set catcache timestamp source clodk */ +static inline void +SetCatCacheClock(TimestampTz ts) +{ + catcacheclock = ts; +} + +extern void assign_catalog_cache_prune_min_age(int newval, void *extra); + extern void CreateCacheMemoryContext(void); extern CatCache *InitCatCache(int id, Oid reloid, Oid indexoid, -- 2.18.4
pgsql-hackers by date: