Re: Display Pg buffer cache (WIP) - Mailing list pgsql-patches

From Mark Kirkwood
Subject Re: Display Pg buffer cache (WIP)
Date
Msg-id 42297D6E.3000505@coretech.co.nz
Whole thread Raw
In response to Re: Display Pg buffer cache (WIP)  (Neil Conway <neilc@samurai.com>)
Responses Re: Display Pg buffer cache (WIP)
Re: Display Pg buffer cache (WIP)
List pgsql-patches
Neil Conway wrote:
> If you do decide to hold the BufMappingLock, it might make
> sense to:
>
> 1. allocate an array of NBuffers elements
> 2. acquire BufferMappingLock in share mode
> 3. sequentially scan through the buffer pool, copying data into the array
> 4. release the lock
> 5. on each subsequent call to the SRF, format and return an element of
> the array
>
> Which should reduce the time to lock is held. This will require
> allocating NBuffers * size_of_stats memory (where size_of_stats will be
> something like 16 bytes).
>

That is a better approach, so I've used it in this new iteration.

In addition to holding the BufMappingLock, each buffer header is (spin)
locked before examining it, hopefully this is correct - BTW, I like the
new buffer lock design.

I'm still using BuildTupleFromCStrings, so there is considerable use of
sprintf conversion and "temporary" char * stuff. I would like this to be
a bit cleaner, so any suggestions welcome.

regards

Mark


diff -Naur pgsql.orig/src/backend/catalog/system_views.sql pgsql/src/backend/catalog/system_views.sql
--- pgsql.orig/src/backend/catalog/system_views.sql    Fri Mar  4 14:23:09 2005
+++ pgsql/src/backend/catalog/system_views.sql    Fri Mar  4 14:21:33 2005
@@ -277,3 +277,9 @@
     DO INSTEAD NOTHING;

 GRANT SELECT, UPDATE ON pg_settings TO PUBLIC;
+
+CREATE VIEW pg_cache_dump AS
+    SELECT D.* FROM pg_cache_dump() AS D
+    (bufferid integer, relfilenode oid, reltablespace oid, reldatabase oid,
+     isdirty bool, refcount int4);
+
diff -Naur pgsql.orig/src/backend/utils/adt/cachedump.c pgsql/src/backend/utils/adt/cachedump.c
--- pgsql.orig/src/backend/utils/adt/cachedump.c    Thu Jan  1 12:00:00 1970
+++ pgsql/src/backend/utils/adt/cachedump.c    Sat Mar  5 20:21:45 2005
@@ -0,0 +1,221 @@
+/*-------------------------------------------------------------------------
+ *
+ * cachedump.c
+ *    display some contents of the buffer cache
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *      $PostgreSQL$
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "funcapi.h"
+#include "catalog/pg_type.h"
+#include "storage/buf_internals.h"
+#include "storage/bufmgr.h"
+#include "utils/relcache.h"
+#include "utils/builtins.h"
+
+
+#define NUM_CACHE_DUMP_ELEM    6
+
+/*
+ * Record structure holding the to be exposed cache data.
+ */
+typedef struct
+{
+    uint32        bufferid;
+    Oid            relfilenode;
+    Oid            reltablespace;
+    Oid            reldatabase;
+    bool        isdirty;
+    uint32        refcount;
+    BlockNumber    blocknum;
+
+} CacheDumpRec;
+
+
+/*
+ * Function context for data persisting over repeated calls.
+ */
+typedef struct
+{
+    AttInMetadata    *attinmeta;
+    CacheDumpRec    *record;
+    char            *values[NUM_CACHE_DUMP_ELEM];
+} CacheDumpContext;
+
+
+/*
+ * Function returning data from the shared buffer cache - buffer number,
+ * relation node/tablespace/database, dirty indicator and refcount.
+ */
+Datum
+cache_dump(PG_FUNCTION_ARGS)
+{
+    FuncCallContext        *funcctx;
+    Datum                result;
+    MemoryContext        oldcontext;
+    CacheDumpContext    *fctx;        /* User function context. */
+    TupleDesc            tupledesc;
+    HeapTuple            tuple;
+
+    if (SRF_IS_FIRSTCALL())
+    {
+        RelFileNode    rnode;
+        uint32        i;
+        BufferDesc    *bufHdr;
+
+
+        funcctx = SRF_FIRSTCALL_INIT();
+
+        /* Switch context when allocating stuff to be used in later calls */
+        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+        /* construct a tuple to return */
+        tupledesc = CreateTemplateTupleDesc(NUM_CACHE_DUMP_ELEM, false);
+        TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
+                                    INT4OID, -1, 0);
+        TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
+                                    OIDOID, -1, 0);
+        TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
+                                    OIDOID, -1, 0);
+        TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
+                                    OIDOID, -1, 0);
+        TupleDescInitEntry(tupledesc, (AttrNumber) 5, "isdirty",
+                                    BOOLOID, -1, 0);
+        TupleDescInitEntry(tupledesc, (AttrNumber) 6, "refcount",
+                                    INT4OID, -1, 0);
+
+        /* Generate attribute metadata needed later to produce tuples */
+        funcctx->attinmeta = TupleDescGetAttInMetadata(tupledesc);
+
+        /*
+         * Create a function context for cross-call persistence
+         * and initialize the buffer counters.
+         */
+        fctx = (CacheDumpContext *) palloc(sizeof(CacheDumpContext));
+        funcctx->max_calls = NBuffers;
+        funcctx->user_fctx = fctx;
+
+
+        /* Allocate NBuffers worth of CacheDumpRec records. */
+        fctx->record = (CacheDumpRec *) palloc(sizeof(CacheDumpRec) * NBuffers);
+
+        /* allocate the strings for tuple formation */
+        fctx->values[0] = (char *) palloc(3 * sizeof(uint32) + 1);
+        fctx->values[1] = (char *) palloc(3 * sizeof(uint32) + 1);
+        fctx->values[2] = (char *) palloc(3 * sizeof(uint32) + 1);
+        fctx->values[3] = (char *) palloc(3 * sizeof(uint32) + 1);
+        fctx->values[4] = (char *) palloc(10);
+        fctx->values[5] = (char *) palloc(3 * sizeof(uint32) + 1);
+
+
+        /* Return to original context when allocating transient memory */
+        MemoryContextSwitchTo(oldcontext);
+
+
+        /*
+         * Lock Buffer map and scan though all the buffers, saving the
+         * relevant fields in the fctx->record structure.
+         */
+        LWLockAcquire(BufMappingLock, LW_SHARED);
+
+        for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
+        {
+            /* Lock each buffer header before inspecting. */
+            LockBufHdr(bufHdr);
+
+            rnode = bufHdr->tag.rnode;
+
+            fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
+            fctx->record[i].relfilenode = rnode.relNode;
+            fctx->record[i].reltablespace = rnode.spcNode;
+            fctx->record[i].reldatabase = rnode.dbNode;
+            fctx->record[i].refcount = bufHdr->refcount;
+            fctx->record[i].blocknum = bufHdr->tag.blockNum;
+            if ( bufHdr->flags & BM_DIRTY)
+            {
+                fctx->record[i].isdirty = true;
+            }
+            else
+            {
+                fctx->record[i].isdirty = false;
+            }
+
+            UnlockBufHdr(bufHdr);
+
+        }
+
+        /* Release Buffer map. */
+        LWLockRelease(BufMappingLock);
+    }
+
+    funcctx = SRF_PERCALL_SETUP();
+
+    /* Get the saved state */
+    fctx = funcctx->user_fctx;
+
+
+    if (funcctx->call_cntr < funcctx->max_calls)
+    {
+        uint32         i = funcctx->call_cntr;
+        char        *values[NUM_CACHE_DUMP_ELEM];
+        int            j;
+
+        /*
+         * Use a temporary values array, initially pointing to
+         * fctx->values, so it can be reassigned w/o losing the storage
+         * for subsequent calls.
+         */
+        for (j = 0; j < NUM_CACHE_DUMP_ELEM; j++)
+        {
+            values[j] = fctx->values[j];
+        }
+
+
+        if (fctx->record[i].blocknum == InvalidBlockNumber)
+        {
+
+            sprintf(values[0], "%u", fctx->record[i].bufferid);
+            values[1] = NULL;
+            values[2] = NULL;
+            values[3] = NULL;
+            values[4] = NULL;
+            values[5] = NULL;
+
+        }
+        else
+        {
+
+            sprintf(values[0], "%u", fctx->record[i].bufferid);
+            sprintf(values[1], "%u", fctx->record[i].relfilenode);
+            sprintf(values[2], "%u", fctx->record[i].reltablespace);
+            sprintf(values[3], "%u", fctx->record[i].reldatabase);
+            if (fctx->record[i].isdirty)
+            {
+                strcpy(values[4], "true");
+            }
+            else
+            {
+                strcpy(values[4], "false");
+            }
+            sprintf(values[5], "%u", fctx->record[i].refcount);
+
+        }
+
+
+        /* Build and return the tuple. */
+        tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+        result = HeapTupleGetDatum(tuple);
+
+
+        SRF_RETURN_NEXT(funcctx, result);
+    }
+    else
+        SRF_RETURN_DONE(funcctx);
+}
+
diff -Naur pgsql.orig/src/include/catalog/pg_proc.h pgsql/src/include/catalog/pg_proc.h
--- pgsql.orig/src/include/catalog/pg_proc.h    Fri Mar  4 14:24:20 2005
+++ pgsql/src/include/catalog/pg_proc.h    Fri Mar  4 14:21:56 2005
@@ -3615,6 +3615,8 @@
 DATA(insert OID = 2558 ( int4                   PGNSP PGUID 12 f f t f i 1  23 "16" _null_    bool_int4 - _null_ ));
 DESCR("convert boolean to int4");

+/* cache dump */
+DATA(insert OID = 2510 (  pg_cache_dump PGNSP PGUID 12 f f t t v 0 2249 "" _null_ cache_dump - _null_ ));

 /*
  * Symbolic values for provolatile column: these indicate whether the result
diff -Naur pgsql.orig/src/include/utils/builtins.h pgsql/src/include/utils/builtins.h
--- pgsql.orig/src/include/utils/builtins.h    Fri Mar  4 14:24:31 2005
+++ pgsql/src/include/utils/builtins.h    Fri Mar  4 14:22:08 2005
@@ -823,4 +823,7 @@
 /* catalog/pg_conversion.c */
 extern Datum pg_convert_using(PG_FUNCTION_ARGS);

+/* cache dump */
+extern Datum cache_dump(PG_FUNCTION_ARGS);
+
 #endif   /* BUILTINS_H */



pgsql-patches by date:

Previous
From: Neil Conway
Date:
Subject: fork() refactoring
Next
From: Tom Lane
Date:
Subject: Re: fork() refactoring