Re: Protect syscache from bloating with negative cache entries - Mailing list pgsql-hackers

From Kyotaro Horiguchi
Subject Re: Protect syscache from bloating with negative cache entries
Date
Msg-id 20210127.100747.1621298895510907596.horikyota.ntt@gmail.com
Whole thread Raw
In response to Re: Protect syscache from bloating with negative cache entries  (Heikki Linnakangas <hlinnaka@iki.fi>)
List pgsql-hackers
At Tue, 26 Jan 2021 11:43:21 +0200, Heikki Linnakangas <hlinnaka@iki.fi> wrote in 
> Hi,
> 
> On 19/11/2020 07:25, Kyotaro Horiguchi wrote:
> > Performance measurement on the attached showed better result about
> > searching but maybe worse for cache entry creation.  Each time number
> > is the mean of 10 runs.
> > # Cacache (negative) entry creation
> >             :  time(ms) (% to master)
> > master     :  3965.61    (100.0)
> > patched-off:  4040.93    (101.9)
> > patched-on :  4032.22    (101.7)
> > # Searching negative cache entries
> > master     :  8173.46    (100.0)
> > patched-off:  7983.43    ( 97.7)
> > patched-on :  8049.88    ( 98.5)
> > # Creation, searching and expiration
> > master     :  6393.23    (100.0)
> > patched-off:  6527.94    (102.1)
> > patched-on : 15880.01    (248.4)
> > That is, catcache searching gets faster by 2-3% but creation gets
> > slower by about 2%. If I moved the condition of 2 further up to
> > CatalogCacheCreateEntry(), that degradation reduced to 0.6%.
> > # Cacache (negative) entry creation
> > master      :  3967.45   (100.0)
> > patched-off :  3990.43   (100.6)
> > patched-on  :  4108.96   (103.6)
> > # Searching negative cache entries
> > master      :  8106.53   (100.0)
> > patched-off :  8036.61   ( 99.1)
> > patched-on  :  8058.18   ( 99.4)
> > # Creation, searching and expiration
> > master      :  6395.00   (100.0)
> > patched-off :  6416.57   (100.3)
> > patched-on  : 15830.91   (247.6)
> 
> Can you share the exact script or steps to reproduce these numbers? I
> presume these are from the catcachebench extension, but I can't figure
> out which scenario above corresponds to which catcachebench
> test. Also, catcachebench seems to depend on a bunch of tables being
> created in schema called "test"; what tables did you use for the above
> numbers?

gen_tbl.pl to generate the tables, then run2.sh to run the
benchmark. sumlog.pl to summarize the result of run2.sh.

$ ./gen_tbl.pl | psql postgres
$ ./run2.sh | tee rawresult.txt | ./sumlog.pl

(I found a bug in a benchmark-aid function
(CatalogCacheFlushCatalog2), I repost an updated version soon.)

Simple explanation follows since the scripts are a kind of crappy..

run2.sh:
  LOOPS    : # of execution of catcachebench() in a single run
  USES     : Take the average of this number of fastest executions in a
             single run.
  BINROOT  : Common parent directory of target binaries.
  DATADIR  : Data directory. (shared by all binaries)
  PREC     : FP format for time and percentage in a result.
  TESTS    : comma-separated numbers given to catcachebench.
  
 The "run" function spec

   run "binary-label" <binary-path> <A> <B> <C>
     where A, B and C are the value for catalog_cache_prune_min_age. ""
     means no setting (used for master binary). Currently only C is in
     effect but all the three should be non-empty string to make it
     effective.

 The result output is:

   test |   version   |  n  |    r     | stddev  
  ------+-------------+-----+----------+---------
      1 | patched-off | 1/3 | 14211.96 |  261.19

   test   : # of catcachebench(#)
   version: binary label given to the run function
   n      : USES / LOOPS
   r      : result average time of catcachebench() in milliseconds
   stddev : stddev of

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center

#! /usr/bin/perl
$collist = "";
foreach $i (0..1000) {
    $collist .= sprintf(", c%05d int", $i);
}
$collist = substr($collist, 2);

printf "drop schema if exists test cascade;\n";
printf "create schema test;\n";
foreach $i (0..2999) {
    printf "create table test.t%04d ($collist);\n", $i;
}
#!/bin/bash
LOOPS=3
USES=1
TESTS=1,2,3
BINROOT=/home/horiguti/bin
DATADIR=/home/horiguti/data/data_work
PREC="numeric(10,2)"

/usr/bin/killall postgres
/usr/bin/sleep 3

run() {
    local BINARY=$1
    local PGCTL=$2/bin/pg_ctl
    local PGSQL=$2/bin/postgres
    local PSQL=$2/bin/psql

    if [ "$3" != "" ]; then
      local SETTING1="set catalog_cache_prune_min_age to \"$3\";"
      local SETTING2="set catalog_cache_prune_min_age to \"$4\";"
      local SETTING3="set catalog_cache_prune_min_age to \"$5\";"
    fi

#    ($PGSQL -D $DATADIR 2>&1 > /dev/null)&
    ($PGSQL -D $DATADIR 2>&1 > /dev/null | /usr/bin/sed -e 's/^/# /')&
    /usr/bin/sleep 3
    ${PSQL} postgres <<EOF
create extension if not exists catcachebench;
select catcachebench(0);

$SETTING3

select distinct * from unnest(ARRAY[${TESTS}]) as test,
LATERAL 
  (select '${BINARY}' as version,
          '${USES}/' || (count(r) OVER())::text as n,
          (avg(r) OVER ())::${PREC},
          (stddev(r) OVER ())::${PREC}
   from (select catcachebench(test) as r
         from generate_series(1, ${LOOPS})) r
   order by r limit ${USES}) r

EOF
    $PGCTL --pgdata=$DATADIR stop 2>&1 > /dev/null | /usr/bin/sed -e 's/^/# /'

#    oreport > $BINARY_perf.txt
}

for i in $(seq 0 2); do
run "patched-off" $BINROOT/pgsql_catexp "-1" "-1" "-1"
run "patched-on" $BINROOT/pgsql_catexp "0" "0" "0"
run "master" $BINROOT/pgsql_master_o2 "" "" ""
done

#! /usr/bin/perl

while (<STDIN>) {
#    if (/^\s+([0-9])\s*\|\s*(\w+)\s*\|\s*(\S+)\s*\|\s*([\d.]+)\s*\|\s*(\w+)\s*$/) {
    if (/^\s+([0-9])\s*\|\s*(\S+)\s*\|\s*(\S+)\s*\|\s*([\d.]+)\s*\|\s*([\d.]+)\s*$/) {
        $test = $1;
        $bin = $2;
        $time = $4;
        if (defined $sum{$test}{$bin}) {
            $sum{$test}{$bin} += $time;
            $num{$test}{$bin}++;
        } else {
            $sum{$test}{$bin} = 0;
            $num{$test}{$bin} = 0;
        }
    }
}

foreach $t (sort {$a cmp $b} keys %sum) {
    $master{$t} = $sum{$t}{master} / $num{$t}{master};
}

foreach $t (sort {$a cmp $b} keys %sum) {
    foreach $b (sort {$a cmp $b} keys %{$sum{$t}}) {
        $mean = $sum{$t}{$b} / $num{$t}{$b};
        $ratio = 100.0 * $mean / $master{$t};
        printf("%-13s : %8.2f   (%5.1f) (%d)\n", "$t:$b", $mean, $ratio, $num{$t}{$b});
    }
}
diff --git a/contrib/catcachebench/Makefile b/contrib/catcachebench/Makefile
new file mode 100644
index 0000000000..0478818b25
--- /dev/null
+++ b/contrib/catcachebench/Makefile
@@ -0,0 +1,17 @@
+MODULE_big = catcachebench
+OBJS = catcachebench.o
+
+EXTENSION = catcachebench
+DATA = catcachebench--0.0.sql
+PGFILEDESC = "catcachebench - benchmark for catcache pruning feature"
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/catcachebench
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/catcachebench/catcachebench--0.0.sql b/contrib/catcachebench/catcachebench--0.0.sql
new file mode 100644
index 0000000000..ea9cd62abb
--- /dev/null
+++ b/contrib/catcachebench/catcachebench--0.0.sql
@@ -0,0 +1,14 @@
+/* contrib/catcachebench/catcachebench--0.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION catcachebench" to load this file. \quit
+
+CREATE FUNCTION catcachebench(IN type int)
+RETURNS double precision
+AS 'MODULE_PATHNAME', 'catcachebench'
+LANGUAGE C STRICT VOLATILE;
+
+CREATE FUNCTION catcachereadstats(OUT catid int, OUT reloid oid, OUT searches bigint, OUT hits bigint, OUT neg_hits
bigint)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'catcachereadstats'
+LANGUAGE C STRICT VOLATILE;
diff --git a/contrib/catcachebench/catcachebench.c b/contrib/catcachebench/catcachebench.c
new file mode 100644
index 0000000000..f93d60e721
--- /dev/null
+++ b/contrib/catcachebench/catcachebench.c
@@ -0,0 +1,338 @@
+/*
+ * catcachebench: test code for cache pruning feature
+ */
+/* #define CATCACHE_STATS */
+#include "postgres.h"
+#include "catalog/pg_type.h"
+#include "catalog/pg_statistic.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "libpq/pqsignal.h"
+#include "utils/catcache.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+
+Oid        tableoids[10000];
+int        ntables = 0;
+int16    attnums[1000];
+int        natts = 0;
+
+PG_MODULE_MAGIC;
+
+double catcachebench1(void);
+double catcachebench2(void);
+double catcachebench3(void);
+void collectinfo(void);
+void catcachewarmup(void);
+
+PG_FUNCTION_INFO_V1(catcachebench);
+PG_FUNCTION_INFO_V1(catcachereadstats);
+
+extern void CatalogCacheFlushCatalog2(Oid catId);
+extern int64 catcache_called;
+extern CatCache *SysCache[];
+
+typedef struct catcachestatsstate
+{
+    TupleDesc tupd;
+    int          catId;
+} catcachestatsstate;
+
+Datum
+catcachereadstats(PG_FUNCTION_ARGS)
+{
+    catcachestatsstate *state_data = NULL;
+    FuncCallContext *fctx;
+
+    if (SRF_IS_FIRSTCALL())
+    {
+        TupleDesc    tupdesc;
+        MemoryContext mctx;
+
+        fctx = SRF_FIRSTCALL_INIT();
+        mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+        state_data = palloc(sizeof(catcachestatsstate));
+
+        /* Build a tuple descriptor for our result type */
+        if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+            elog(ERROR, "return type must be a row type");
+
+        state_data->tupd = tupdesc;
+        state_data->catId = 0;
+
+        fctx->user_fctx = state_data;
+
+        MemoryContextSwitchTo(mctx);
+    }
+
+    fctx = SRF_PERCALL_SETUP();
+    state_data = fctx->user_fctx;
+
+    if (state_data->catId < SysCacheSize)
+    {
+        Datum    values[5];
+        bool    nulls[5];
+        HeapTuple    resulttup;
+        Datum    result;
+        int        catId = state_data->catId++;
+
+        memset(nulls, 0, sizeof(nulls));
+        memset(values, 0, sizeof(values));
+        values[0] = Int16GetDatum(catId);
+        values[1] = ObjectIdGetDatum(SysCache[catId]->cc_reloid);
+#ifdef CATCACHE_STATS        
+        values[2] = Int64GetDatum(SysCache[catId]->cc_searches);
+        values[3] = Int64GetDatum(SysCache[catId]->cc_hits);
+        values[4] = Int64GetDatum(SysCache[catId]->cc_neg_hits);
+#endif
+        resulttup = heap_form_tuple(state_data->tupd, values, nulls);
+        result = HeapTupleGetDatum(resulttup);
+
+        SRF_RETURN_NEXT(fctx, result);
+    }
+
+    SRF_RETURN_DONE(fctx);
+}
+
+Datum
+catcachebench(PG_FUNCTION_ARGS)
+{
+    int        testtype = PG_GETARG_INT32(0);
+    double    ms;
+
+    collectinfo();
+
+    /* flush the catalog -- safe? don't mind. */
+    CatalogCacheFlushCatalog2(StatisticRelationId);
+
+    switch (testtype)
+    {
+    case 0:
+        catcachewarmup(); /* prewarm of syscatalog */
+        PG_RETURN_NULL();
+    case 1:
+        ms = catcachebench1(); break;
+    case 2:
+        ms = catcachebench2(); break;
+    case 3:
+        ms = catcachebench3(); break;
+    default:
+        elog(ERROR, "Invalid test type: %d", testtype);
+    }
+
+    PG_RETURN_DATUM(Float8GetDatum(ms));
+}
+
+/*
+ * fetch all attribute entires of all tables.
+ */
+double
+catcachebench1(void)
+{
+    int t, a;
+    instr_time    start,
+                duration;
+
+    PG_SETMASK(&BlockSig);
+    INSTR_TIME_SET_CURRENT(start);
+    for (t = 0 ; t < ntables ; t++)
+    {
+        for (a = 0 ; a < natts ; a++)
+        {
+            HeapTuple tup;
+
+            tup = SearchSysCache3(STATRELATTINH,
+                                  ObjectIdGetDatum(tableoids[t]),
+                                  Int16GetDatum(attnums[a]),
+                                  BoolGetDatum(false));
+            /* should be null, but.. */
+            if (HeapTupleIsValid(tup))
+                ReleaseSysCache(tup);
+        }
+    }
+    INSTR_TIME_SET_CURRENT(duration);
+    INSTR_TIME_SUBTRACT(duration, start);
+    PG_SETMASK(&UnBlockSig);
+
+    return INSTR_TIME_GET_MILLISEC(duration);
+};
+
+/*
+ * fetch all attribute entires of a table 6000 times.
+ */
+double
+catcachebench2(void)
+{
+    int t, a;
+    instr_time    start,
+                duration;
+
+    PG_SETMASK(&BlockSig);
+    INSTR_TIME_SET_CURRENT(start);
+    for (t = 0 ; t < 240000 ; t++)
+    {
+        for (a = 0 ; a < natts ; a++)
+        {
+            HeapTuple tup;
+
+            tup = SearchSysCache3(STATRELATTINH,
+                                  ObjectIdGetDatum(tableoids[0]),
+                                  Int16GetDatum(attnums[a]),
+                                  BoolGetDatum(false));
+            /* should be null, but.. */
+            if (HeapTupleIsValid(tup))
+                ReleaseSysCache(tup);
+        }
+    }
+    INSTR_TIME_SET_CURRENT(duration);
+    INSTR_TIME_SUBTRACT(duration, start);
+    PG_SETMASK(&UnBlockSig);
+
+    return INSTR_TIME_GET_MILLISEC(duration);
+};
+
+/* SetCatCacheClock - set catcache timestamp source clock */
+uint64 catcacheclock;
+static inline void
+SetCatCacheClock(TimestampTz ts)
+{
+    catcacheclock = (uint64) ts;
+}
+
+/*
+ * fetch all attribute entires of all tables twice with having expiration
+ * happen.
+ */
+double
+catcachebench3(void)
+{
+    const int clock_step = 1000;
+    int i, t, a;
+    instr_time    start,
+                duration;
+
+    PG_SETMASK(&BlockSig);
+    INSTR_TIME_SET_CURRENT(start);
+    for (i = 0 ; i < 4 ; i++)
+    {
+        int ct = clock_step;
+
+        for (t = 0 ; t < ntables ; t++)
+        {
+            /*
+             * catcacheclock is updated by transaction timestamp, so needs to
+             * be updated by other means for this test to work. Here I choosed
+             * to update the clock every 1000 tables scan.
+             */
+            if (--ct < 0)
+            {
+                SetCatCacheClock(GetCurrentTimestamp());
+                ct = clock_step;
+            }
+            for (a = 0 ; a < natts ; a++)
+            {
+                HeapTuple tup;
+
+                tup = SearchSysCache3(STATRELATTINH,
+                                      ObjectIdGetDatum(tableoids[t]),
+                                      Int16GetDatum(attnums[a]),
+                                      BoolGetDatum(false));
+                /* should be null, but.. */
+                if (HeapTupleIsValid(tup))
+                    ReleaseSysCache(tup);
+            }
+        }
+    }
+    INSTR_TIME_SET_CURRENT(duration);
+    INSTR_TIME_SUBTRACT(duration, start);
+    PG_SETMASK(&UnBlockSig);
+
+    return INSTR_TIME_GET_MILLISEC(duration);
+};
+
+void
+catcachewarmup(void)
+{
+    int t, a;
+
+    /* load up catalog tables */
+    for (t = 0 ; t < ntables ; t++)
+    {
+        for (a = 0 ; a < natts ; a++)
+        {
+            HeapTuple tup;
+
+            tup = SearchSysCache3(STATRELATTINH,
+                                  ObjectIdGetDatum(tableoids[t]),
+                                  Int16GetDatum(attnums[a]),
+                                  BoolGetDatum(false));
+            /* should be null, but.. */
+            if (HeapTupleIsValid(tup))
+                ReleaseSysCache(tup);
+        }
+    }
+}
+
+void
+collectinfo(void)
+{
+    int ret;
+    Datum    values[10000];
+    bool    nulls[10000];
+    Oid        types0[] = {OIDOID};
+    int i;
+
+    ntables = 0;
+    natts = 0;
+
+    SPI_connect();
+    /* collect target tables */
+    ret = SPI_execute("select oid from pg_class where relnamespace = (select oid from pg_namespace where nspname =
\'test\')",
+                      true, 0);
+    if (ret != SPI_OK_SELECT)
+        elog(ERROR, "Failed 1");
+    if (SPI_processed == 0)
+        elog(ERROR, "no relation found in schema \"test\"");
+    if (SPI_processed > 10000)
+        elog(ERROR, "too many relation found in schema \"test\"");
+
+    for (i = 0 ; i < SPI_processed ; i++)
+    {
+        heap_deform_tuple(SPI_tuptable->vals[i], SPI_tuptable->tupdesc,
+                          values, nulls);
+        if (nulls[0])
+            elog(ERROR, "Failed 2");
+
+        tableoids[ntables++] = DatumGetObjectId(values[0]);
+    }
+    SPI_finish();
+    elog(DEBUG1, "%d tables found", ntables);
+
+    values[0] = ObjectIdGetDatum(tableoids[0]);
+    nulls[0] = false;
+    SPI_connect();
+    ret = SPI_execute_with_args("select attnum from pg_attribute where attrelid = (select oid from pg_class where oid
=$1)",
 
+                                1, types0, values, NULL, true, 0);
+    if (SPI_processed == 0)
+        elog(ERROR, "no attribute found in table %d", tableoids[0]);
+    if (SPI_processed > 10000)
+        elog(ERROR, "too many relation found in table %d", tableoids[0]);
+    
+    /* collect target attributes. assuming all tables have the same attnums */
+    for (i = 0 ; i < SPI_processed ; i++)
+    {
+        int16 attnum;
+
+        heap_deform_tuple(SPI_tuptable->vals[i], SPI_tuptable->tupdesc,
+                          values, nulls);
+        if (nulls[0])
+            elog(ERROR, "Failed 3");
+        attnum = DatumGetInt16(values[0]);
+
+        if (attnum > 0)
+            attnums[natts++] = attnum;
+    }
+    SPI_finish();
+    elog(DEBUG1, "%d attributes found", natts);
+}
diff --git a/contrib/catcachebench/catcachebench.control b/contrib/catcachebench/catcachebench.control
new file mode 100644
index 0000000000..3fc9d2e420
--- /dev/null
+++ b/contrib/catcachebench/catcachebench.control
@@ -0,0 +1,6 @@
+# catcachebench
+
+comment = 'benchmark for catcache pruning'
+default_version = '0.0'
+module_pathname = '$libdir/catcachebench'
+relocatable = true
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index fa2b49c676..11b94504af 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -740,6 +740,41 @@ CatalogCacheFlushCatalog(Oid catId)
     CACHE_elog(DEBUG2, "end of CatalogCacheFlushCatalog call");
 }
 
+
+/* FUNCTION FOR BENCHMARKING */
+void
+CatalogCacheFlushCatalog2(Oid catId)
+{
+    slist_iter    iter;
+
+    CACHE_elog(DEBUG2, "CatalogCacheFlushCatalog called for %u", catId);
+
+    slist_foreach(iter, &CacheHdr->ch_caches)
+    {
+        CatCache   *cache = slist_container(CatCache, cc_next, iter.cur);
+
+        /* Does this cache store tuples of the target catalog? */
+        if (cache->cc_reloid == catId)
+        {
+            /* Yes, so flush all its contents */
+            ResetCatalogCache(cache);
+
+            /* Tell inval.c to call syscache callbacks for this cache */
+            CallSyscacheCallbacks(cache->id, 0);
+
+            cache->cc_nbuckets = 128;
+            pfree(cache->cc_bucket);
+            cache->cc_bucket =
+                (dlist_head *) MemoryContextAllocZero(CacheMemoryContext,
+                                  cache->cc_nbuckets * sizeof(dlist_head));
+            elog(LOG, "Catcache reset");
+        }
+    }
+
+    CACHE_elog(DEBUG2, "end of CatalogCacheFlushCatalog call");
+}
+/* END: FUNCTION FOR BENCHMARKING */
+
 /*
  *        InitCatCache
  *
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index e4dc4ee34e..b60416ec63 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -994,7 +994,7 @@ static const struct cachedesc cacheinfo[] = {
     }
 };
 
-static CatCache *SysCache[SysCacheSize];
+CatCache *SysCache[SysCacheSize];
 
 static bool CacheInitialized = false;


pgsql-hackers by date:

Previous
From: Bharath Rupireddy
Date:
Subject: Re: [PATCH] postgres_fdw connection caching - cause remote sessions linger till the local session exit
Next
From: Kyotaro Horiguchi
Date:
Subject: Re: Protect syscache from bloating with negative cache entries