diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 8bd57f3..13ccf1d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1661,6 +1661,66 @@ include_dir 'conf.d' + + catalog_cache_prune_min_age (integer) + + catalog_cache_prune_min_age configuration + parameter + + + + + Specifies the minimum amount of unused time in seconds at which a + system catalog cache entry is removed. -1 indicates that this feature + is disabled at all. The value defaults to 300 seconds (5 + minutes). The catalog cache entries that are not used for + the duration can be removed to prevent it from being filled up with + useless entries. This behaviour is muted until the size of a catalog + cache exceeds . + + + + + + catalog_cache_memory_target (integer) + + catalog_cache_memory_target configuration + parameter + + + + + Specifies the maximum amount of memory to which a system catalog cache + can expand without pruning in kilobytes. The value defaults to 0, + indicating that age-based pruning is always considered. After + exceeding this size, catalog cache starts pruning according to + . If you need to keep + certain amount of catalog cache entries with intermittent usage, try + increase this setting. + + + + + + catalog_cache_max_size (integer) + + catalog_cache_max_size configuration + parameter + + + + + Specifies the maximum total amount of memory allowed for all system + catalog caches in kilobytes. The value defaults to 0, indicating that + pruning by this parameter is disabled at all. After the amount of + memory used by all catalog caches exceeds this size, a new cache entry + creation will remove one or more not-recently-used cache entries. This + means frequent creation of new cache entry may lead to a slight + slowdown of queries. + + + + max_stack_depth (integer) diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 8b4d94c..4e6505e 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -71,6 +71,7 @@ #include "tcop/pquery.h" #include "tcop/tcopprot.h" #include "tcop/utility.h" +#include "utils/catcache.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/ps_status.h" @@ -2584,6 +2585,7 @@ start_xact_command(void) * not desired, the timeout has to be disabled explicitly. */ enable_statement_timeout(); + SetCatCacheClock(GetCurrentStatementStartTimestamp()); } static void @@ -3159,6 +3161,9 @@ ProcessInterrupts(void) if (ParallelMessagePending) HandleParallelMessages(); + + if (CatcacheClockTimeoutPending) + CatcacheClockTimeoutPending = false; } diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 78dd571..2b0806f 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -39,6 +39,7 @@ #include "utils/rel.h" #include "utils/resowner_private.h" #include "utils/syscache.h" +#include "utils/timeout.h" /* #define CACHEDEBUG */ /* turns DEBUG elogs on */ @@ -61,9 +62,36 @@ #define CACHE_elog(...) #endif +/* GUC variable to define the minimum age of entries that will be considered to + * be evicted in seconds. This variable is shared among various cache + * mechanisms. + */ +int catalog_cache_prune_min_age = 300; + +/* + * GUC variable to define the minimum size of hash to cosider entry eviction. + * This variable is shared among various cache mechanisms. + */ +int catalog_cache_memory_target = 0; + +/* + * GUC for limit by the number of entries. Entries are removed when the number + * of them goes above catalog_cache_max_size in kilobytes + */ +int catalog_cache_max_size = 0; + +/* + * Minimum interval between two success move of a cache entry in LRU list, + * in microseconds. + */ +#define MIN_LRU_UPDATE_INTERVAL 100000 /* 100ms */ + /* Cache management header --- pointer is NULL until created */ static CatCacheHeader *CacheHdr = NULL; +/* Clock used to record the last accessed time of a catcache record. */ +TimestampTz catcacheclock = 0; + static inline HeapTuple SearchCatCacheInternal(CatCache *cache, int nkeys, Datum v1, Datum v2, @@ -469,6 +497,7 @@ CatCacheRemoveCTup(CatCache *cache, CatCTup *ct) /* delink from linked list */ dlist_delete(&ct->cache_elem); + dlist_delete(&ct->lru_node); /* * Free keys when we're dealing with a negative entry, normal entries just @@ -478,6 +507,9 @@ CatCacheRemoveCTup(CatCache *cache, CatCTup *ct) CatCacheFreeKeys(cache->cc_tupdesc, cache->cc_nkeys, cache->cc_keyno, ct->keys); + cache->cc_memusage -= ct->size; + CacheHdr->ch_global_size -= ct->size; + pfree(ct); --cache->cc_ntup; @@ -767,6 +799,7 @@ InitCatCache(int id, MemoryContext oldcxt; size_t sz; int i; + size_t base_size; /* * nbuckets is the initial number of hash buckets to use in this catcache. @@ -798,6 +831,8 @@ InitCatCache(int id, CacheHdr = (CatCacheHeader *) palloc(sizeof(CatCacheHeader)); slist_init(&CacheHdr->ch_caches); CacheHdr->ch_ntup = 0; + dlist_init(&CacheHdr->ch_lru_list); + CacheHdr->ch_global_size = 0; #ifdef CATCACHE_STATS /* set up to dump stats at backend exit */ on_proc_exit(CatCachePrintStats, 0); @@ -809,8 +844,12 @@ InitCatCache(int id, * * Note: we rely on zeroing to initialize all the dlist headers correctly */ + base_size = MemoryContextGetUsedspace(CacheMemoryContext); sz = sizeof(CatCache) + PG_CACHE_LINE_SIZE; cp = (CatCache *) CACHELINEALIGN(palloc0(sz)); + cp->cc_head_alloc_size = + MemoryContextGetUsedspace(CacheMemoryContext) - base_size; + cp->cc_bucket = palloc0(nbuckets * sizeof(dlist_head)); /* @@ -830,6 +869,11 @@ InitCatCache(int id, for (i = 0; i < nkeys; ++i) cp->cc_keyno[i] = key[i]; + /* cc_head_alloc_size + consumed size for cc_bucket */ + cp->cc_memusage = + MemoryContextGetUsedspace(CacheMemoryContext) - base_size; + CacheHdr->ch_global_size += cp->cc_memusage; + /* * new cache is initialized as far as we can go for now. print some * debugging information, if appropriate. @@ -846,9 +890,168 @@ InitCatCache(int id, */ MemoryContextSwitchTo(oldcxt); + /* initialize catcache reference clock if haven't done yet */ + if (catcacheclock == 0) + catcacheclock = GetCurrentTimestamp(); + return cp; } +/* Helper function for CatCacheCleanupOldEntries */ +static bool +CatCacheCheckByAge(CatCache *cp, CatCTup *ct, bool *prune_by_age) +{ + long entry_age; + int us; + + if (!prune_by_age || ct->my_cache != cp) + return false; + + /* + * Calculate the duration from the time of the last access to the + * "current" time. Since catcacheclock is not advanced within a + * transaction, the entries that are accessed within the current + * transaction won't be pruned. + */ + TimestampDifference(ct->lastaccess, catcacheclock, &entry_age, &us); + + /* prune by age is done */ + if (entry_age < catalog_cache_prune_min_age) + { + *prune_by_age = false; + return false; + } + + /* + * Entries that are not accessed after last pruning are removed in + * that seconds, and that has been accessed several times are + * removed after leaving alone for up to three times of the + * duration. We don't try shrink buckets since pruning effectively + * caps catcache expansion in the long term. + */ + if (ct->naccess > 0) + { + ct->naccess--; + return false; + } + else + return true; +} + +/* Helper function for CatCacheCleanupOldEntries */ +static bool +CatCacheCheckBySize(CatCTup *ct, bool *prune_by_size) +{ + if (CacheHdr->ch_global_size >= (Size) catalog_cache_max_size * 1024) + return true; + else + { + *prune_by_size = false; + return false; /* we're satisfied */ + } +} + + +/* + * CatCacheCleanupOldEntries - Remove infrequently-used entries + * + * Catcache entries can be left alone for several reasons. We remove them if + * they are not accessed for a certain time to prevent catcache from + * bloating. The eviction is performed with the similar algorithm with buffer + * eviction using access counter. Entries that are accessed several times can + * live longer than those that have had less access in the same duration. + */ +static bool +CatCacheCleanupOldEntries(CatCache *cp) +{ + static TimestampTz prev_warn_emit = 0; + int nremoved = 0; + int nelems_before = cp->cc_ntup; + Size size_before = CacheHdr->ch_global_size; + bool prune_by_age = false; + bool prune_by_size = false; + dlist_mutable_iter iter; + + if (catalog_cache_prune_min_age >= 0 && + cp->cc_memusage > (Size) catalog_cache_memory_target * 1024L) + prune_by_age = true; + + if (catalog_cache_max_size > 0 && + CacheHdr->ch_global_size >= (Size) catalog_cache_max_size * 1024) + prune_by_size = true; + + /* Return immediately if no pruning is wanted */ + if (!prune_by_age && !prune_by_size) + return false; + + /* Scan over LRU to find entries to remove */ + dlist_foreach_modify(iter, &CacheHdr->ch_lru_list) + { + CatCTup *ct = dlist_container(CatCTup, lru_node, iter.cur); + bool remove_this = false; + + /* We don't remove referenced entry */ + if (ct->refcount != 0 || + (ct->c_list && ct->c_list->refcount != 0)) + continue; + + /* check against age. prune within this cache */ + remove_this = CatCacheCheckByAge(cp, ct, &prune_by_age); + + /* check against global size. removes from all cache */ + if (prune_by_size && !remove_this) + remove_this = CatCacheCheckBySize(ct, &prune_by_size); + + /* exit immediately if all finished */ + if (!prune_by_age && !prune_by_size) + break; + + if (!remove_this) + continue; + + /* do the work */ + CatCacheRemoveCTup(ct->my_cache, ct); + nremoved++; + } + + if (nremoved > 0) + elog(DEBUG1, "pruning catalog cache id=%d for %s: removed entiries %d / %d" + ", removed size (kB) %d / %d", + cp->id, cp->cc_relname, nremoved, nelems_before, + (int) (size_before / 1024), (int) (CacheHdr->ch_global_size / 1024)); + + /* + * If prune_by_size still remains true, we couldn't prune cache. + * Warn of too small setting of catalog_cache_max_size. Take 5 seconds + * between messages, using statement start timestamp to avoid frequent + * gettimeofday(). + */ + if (prune_by_size) + { + if (prev_warn_emit == 0 || + GetCurrentStatementStartTimestamp() - prev_warn_emit > 5000000) + { + ErrorContextCallback *oldcb; + + /* cancel error context callbacks */ + oldcb = error_context_stack; + error_context_stack = NULL; + + ereport(LOG, ( + errmsg ("cannot reduce cache size to %d kilobytes, reduced to %d kilobytes", + catalog_cache_max_size, (int)(CacheHdr->ch_global_size / 1024)), + errdetail ("Consider increasing the configuration parameter \"catalog_cache_max_size\"."), + errhidecontext(true), + errhidestmt(true))); + + error_context_stack = oldcb; + + prev_warn_emit = GetCurrentStatementStartTimestamp(); + } + } + return nremoved > 0; +} + /* * Enlarge a catcache, doubling the number of buckets. */ @@ -858,6 +1061,7 @@ RehashCatCache(CatCache *cp) dlist_head *newbucket; int newnbuckets; int i; + Size base_size = MemoryContextGetUsedspace(CacheMemoryContext); elog(DEBUG1, "rehashing catalog cache id %d for %s; %d tups, %d buckets", cp->id, cp->cc_relname, cp->cc_ntup, cp->cc_nbuckets); @@ -866,6 +1070,11 @@ RehashCatCache(CatCache *cp) newnbuckets = cp->cc_nbuckets * 2; newbucket = (dlist_head *) MemoryContextAllocZero(CacheMemoryContext, newnbuckets * sizeof(dlist_head)); + /* recalculate memory usage from the first */ + CacheHdr->ch_global_size -= cp->cc_memusage; + cp->cc_memusage = cp->cc_head_alloc_size + + MemoryContextGetUsedspace(CacheMemoryContext) - base_size; + /* Move all entries from old hash table to new. */ for (i = 0; i < cp->cc_nbuckets; i++) { @@ -878,9 +1087,12 @@ RehashCatCache(CatCache *cp) dlist_delete(iter.cur); dlist_push_head(&newbucket[hashIndex], &ct->cache_elem); + cp->cc_memusage += ct->size; } } + CacheHdr->ch_global_size += cp->cc_memusage; + /* Switch to the new array. */ pfree(cp->cc_bucket); cp->cc_nbuckets = newnbuckets; @@ -1260,6 +1472,21 @@ SearchCatCacheInternal(CatCache *cache, */ dlist_move_head(bucket, &ct->cache_elem); + /* Update access information for pruning */ + if (ct->naccess < 2) + ct->naccess++; + + /* + * We don't want too frequent update of + * LRU. catalog_cache_prune_min_age can be changed on-session so we + * need to maintain the LRU regardless of catalog_cache_prune_min_age. + */ + if (catcacheclock - ct->lastaccess > MIN_LRU_UPDATE_INTERVAL) + { + ct->lastaccess = catcacheclock; + dlist_move_tail(&CacheHdr->ch_lru_list, &ct->lru_node); + } + /* * If it's a positive entry, bump its refcount and return it. If it's * negative, we can report failure to the caller. @@ -1695,6 +1922,11 @@ SearchCatCacheList(CatCache *cache, /* Now we can build the CatCList entry. */ oldcxt = MemoryContextSwitchTo(CacheMemoryContext); nmembers = list_length(ctlist); + + /* + * Don't waste a time by counting the list in catcache memory usage, + * since it doesn't live a long life. + */ cl = (CatCList *) palloc(offsetof(CatCList, members) + nmembers * sizeof(CatCTup *)); @@ -1805,11 +2037,13 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, CatCTup *ct; HeapTuple dtp; MemoryContext oldcxt; + uint64 base_size = MemoryContextGetUsedspace(CacheMemoryContext); /* negative entries have no tuple associated */ if (ntp) { int i; + int tupsize; Assert(!negative); @@ -1828,8 +2062,8 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, /* Allocate memory for CatCTup and the cached tuple in one go */ oldcxt = MemoryContextSwitchTo(CacheMemoryContext); - ct = (CatCTup *) palloc(sizeof(CatCTup) + - MAXIMUM_ALIGNOF + dtp->t_len); + tupsize = sizeof(CatCTup) + MAXIMUM_ALIGNOF + dtp->t_len; + ct = (CatCTup *) palloc(tupsize); ct->tuple.t_len = dtp->t_len; ct->tuple.t_self = dtp->t_self; ct->tuple.t_tableOid = dtp->t_tableOid; @@ -1863,7 +2097,6 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, Assert(negative); oldcxt = MemoryContextSwitchTo(CacheMemoryContext); ct = (CatCTup *) palloc(sizeof(CatCTup)); - /* * Store keys - they'll point into separately allocated memory if not * by-value. @@ -1884,18 +2117,37 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, ct->dead = false; ct->negative = negative; ct->hash_value = hashValue; + ct->naccess = 0; + ct->lastaccess = catcacheclock; + dlist_push_tail(&CacheHdr->ch_lru_list, &ct->lru_node); dlist_push_head(&cache->cc_bucket[hashIndex], &ct->cache_elem); cache->cc_ntup++; CacheHdr->ch_ntup++; + ct->size = MemoryContextGetUsedspace(CacheMemoryContext) - base_size; + cache->cc_memusage += ct->size; + CacheHdr->ch_global_size += ct->size; + + /* increase refcount so that this survives pruning */ + ct->refcount++; + /* - * If the hash table has become too full, enlarge the buckets array. Quite - * arbitrarily, we enlarge when fill factor > 2. + * If the hash table has become too full, try cleanup by removing + * infrequently used entries to make a room for the new entry. If it + * failed, enlarge the bucket array instead. Quite arbitrarily, we try + * this when fill factor > 2. */ - if (cache->cc_ntup > cache->cc_nbuckets * 2) + if (cache->cc_ntup > cache->cc_nbuckets * 2 && + !CatCacheCleanupOldEntries(cache)) RehashCatCache(cache); + /* we may still want to prune by entry number, check it */ + else if (catalog_cache_max_size > 0 && + CacheHdr->ch_global_size > (Size) catalog_cache_max_size * 1024) + CatCacheCleanupOldEntries(cache); + + ct->refcount--; return ct; } diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index fd51934..0e8b972 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -32,6 +32,7 @@ volatile sig_atomic_t QueryCancelPending = false; volatile sig_atomic_t ProcDiePending = false; volatile sig_atomic_t ClientConnectionLost = false; volatile sig_atomic_t IdleInTransactionSessionTimeoutPending = false; +volatile sig_atomic_t CatcacheClockTimeoutPending = false; volatile sig_atomic_t ConfigReloadPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 156d147..a43132b 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -81,6 +81,7 @@ #include "tsearch/ts_cache.h" #include "utils/builtins.h" #include "utils/bytea.h" +#include "utils/catcache.h" #include "utils/guc_tables.h" #include "utils/float.h" #include "utils/memutils.h" @@ -2205,6 +2206,39 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"catalog_cache_prune_min_age", PGC_USERSET, RESOURCES_MEM, + gettext_noop("Sets the minimum unused duration of cache entries before removal."), + gettext_noop("Catalog cache entries that live unused for longer than this seconds are considered to be removed."), + GUC_UNIT_S + }, + &catalog_cache_prune_min_age, + 300, -1, INT_MAX, + NULL, NULL, NULL + }, + + { + {"catalog_cache_memory_target", PGC_USERSET, RESOURCES_MEM, + gettext_noop("Sets the minimum syscache size to keep."), + gettext_noop("Time-based cache pruning starts working after exceeding this size."), + GUC_UNIT_KB + }, + &catalog_cache_memory_target, + 0, 0, MAX_KILOBYTES, + NULL, NULL, NULL + }, + + { + {"catalog_cache_max_size", PGC_USERSET, RESOURCES_MEM, + gettext_noop("Sets the maximum size of catalog cache in kilobytes."), + NULL, + GUC_UNIT_KB + }, + &catalog_cache_max_size, + 0, 0, MAX_KILOBYTES, + NULL, NULL, NULL + }, + /* * We use the hopefully-safely-small value of 100kB as the compiled-in * default for max_stack_depth. InitializeGUCOptions will increase it if diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 194f312..2f3f98d 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -128,6 +128,9 @@ #work_mem = 4MB # min 64kB #maintenance_work_mem = 64MB # min 1MB #autovacuum_work_mem = -1 # min 1MB, or -1 to use maintenance_work_mem +#catalog_cache_memory_target = 0kB # in kB +#catalog_cache_prune_min_age = 300s # -1 disables pruning +#catalog_cache_max_size = 0kB # in kB #max_stack_depth = 2MB # min 100kB #shared_memory_type = mmap # the default is the first option # supported by the operating system: diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index c9e3500..33b800e 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -82,6 +82,7 @@ extern PGDLLIMPORT volatile sig_atomic_t InterruptPending; extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending; extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending; extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending; +extern PGDLLIMPORT volatile sig_atomic_t CatcacheClockTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t ConfigReloadPending; extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost; diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h index 34babde..c1c0977 100644 --- a/src/include/nodes/memnodes.h +++ b/src/include/nodes/memnodes.h @@ -84,10 +84,10 @@ typedef struct MemoryContextData MemoryContext firstchild; /* head of linked list of children */ MemoryContext prevchild; /* previous child of same parent */ MemoryContext nextchild; /* next child of same parent */ + Size usedspace; /* accumulates consumed memory size */ const char *name; /* context name (just for debugging) */ const char *ident; /* context ID if any (just for debugging) */ MemoryContextCallback *reset_cbs; /* list of reset/delete callbacks */ - Size usedspace; /* accumulates consumed memory size */ } MemoryContextData; /* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */ diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 65d816a..9cfd8b1 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -22,6 +22,7 @@ #include "access/htup.h" #include "access/skey.h" +#include "datatype/timestamp.h" #include "lib/ilist.h" #include "utils/relcache.h" @@ -61,6 +62,10 @@ typedef struct catcache slist_node cc_next; /* list link */ ScanKeyData cc_skey[CATCACHE_MAXKEYS]; /* precomputed key info for heap * scans */ + int cc_head_alloc_size;/* consumed memory to allocate this struct */ + int cc_memusage; /* memory usage of this catcache (excluding + * header part) */ + int cc_nfreeent; /* # of entries currently not referenced */ /* * Keep these at the end, so that compiling catcache.c with CATCACHE_STATS @@ -119,7 +124,10 @@ typedef struct catctup bool dead; /* dead but not yet removed? */ bool negative; /* negative cache entry? */ HeapTupleData tuple; /* tuple management header */ - + int naccess; /* # of access to this entry, up to 2 */ + TimestampTz lastaccess; /* approx. timestamp of the last usage */ + dlist_node lru_node; /* LRU node */ + int size; /* palloc'ed size off this tuple */ /* * The tuple may also be a member of at most one CatCList. (If a single * catcache is list-searched with varying numbers of keys, we may have to @@ -183,12 +191,34 @@ typedef struct catcacheheader { slist_head ch_caches; /* head of list of CatCache structs */ int ch_ntup; /* # of tuples in all caches */ + dlist_head ch_lru_list; /* LRU list of all caches */ + Size ch_global_size; /* sum of all caches */ } CatCacheHeader; /* this extern duplicates utils/memutils.h... */ extern PGDLLIMPORT MemoryContext CacheMemoryContext; +/* for guc.c, not PGDLLPMPORT'ed */ +extern int catalog_cache_prune_min_age; +extern int catalog_cache_memory_target; +extern int catalog_cache_max_size; + +/* to use as access timestamp of catcache entries */ +extern TimestampTz catcacheclock; + + +/* + * SetCatCacheClock - set timestamp for catcache access record and start + * maintenance timer if needed. . + */ +static inline void +SetCatCacheClock(TimestampTz ts) +{ + catcacheclock = ts; +} + +extern void assign_catalog_cache_prune_min_age(int newval, void *extra); extern void CreateCacheMemoryContext(void); extern CatCache *InitCatCache(int id, Oid reloid, Oid indexoid,