From a24f17114aa9119dbf899166128d48aaf4106ca7 Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Mon, 25 Aug 2025 19:23:50 +0530 Subject: [PATCH 1/4] Add a view to read contents of shared buffer lookup table The view exposes the contents of the shared buffer lookup table for debugging, testing and investigation. This helped me in debugging issues where the buffer descriptor array and buffer lookup table were out of sync; either the buffer lookup table had a mapping page->buffer which wasn't present in the buffer descriptor array or a page in the buffer descriptor array didn't have corresponding entry in the buffer lookup table. pg_buffercache doesn't help with those kind of issues. Also doing that under the debugger in very painful. I intend to keep this patch while the rest of the code matures. If it is found useful as a debugging tool, we may consider make it committable and commit it. Author: Ashutosh Bapat --- .../expected/pg_buffercache.out | 39 ++++++++ .../pg_buffercache--1.5--1.6.sql | 24 +++++ contrib/pg_buffercache/pg_buffercache_pages.c | 18 ++++ contrib/pg_buffercache/sql/pg_buffercache.sql | 20 +++++ doc/src/sgml/system-views.sgml | 89 +++++++++++++++++++ src/backend/storage/buffer/buf_table.c | 58 ++++++++++++ src/include/storage/buf_internals.h | 2 + 7 files changed, 250 insertions(+) diff --git a/contrib/pg_buffercache/expected/pg_buffercache.out b/contrib/pg_buffercache/expected/pg_buffercache.out index 9a9216dc7b1..2f27bf34637 100644 --- a/contrib/pg_buffercache/expected/pg_buffercache.out +++ b/contrib/pg_buffercache/expected/pg_buffercache.out @@ -23,6 +23,26 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0; t (1 row) +-- Test the buffer lookup table function and count is <= shared_buffers +select count(*) <= (select setting::bigint + from pg_settings + where name = 'shared_buffers') +from pg_buffercache_lookup_table_entries(); + ?column? +---------- + t +(1 row) + +-- Check that pg_buffercache_lookup_table view works and count is <= shared_buffers +select count(*) <= (select setting::bigint + from pg_settings + where name = 'shared_buffers') +from pg_buffercache_lookup_table; + ?column? +---------- + t +(1 row) + -- Check that the functions / views can't be accessed by default. To avoid -- having to create a dedicated user, use the pg_database_owner pseudo-role. SET ROLE pg_database_owner; @@ -34,6 +54,10 @@ SELECT * FROM pg_buffercache_summary(); ERROR: permission denied for function pg_buffercache_summary SELECT * FROM pg_buffercache_usage_counts(); ERROR: permission denied for function pg_buffercache_usage_counts +SELECT * FROM pg_buffercache_lookup_table_entries(); +ERROR: permission denied for function pg_buffercache_lookup_table_entries +SELECT * FROM pg_buffercache_lookup_table; +ERROR: permission denied for view pg_buffercache_lookup_table RESET role; -- Check that pg_monitor is allowed to query view / function SET ROLE pg_monitor; @@ -55,6 +79,21 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts(); t (1 row) +RESET role; +-- Check that pg_read_all_stats is allowed to query buffer lookup table +SET ROLE pg_read_all_stats; +SELECT count(*) >= 0 FROM pg_buffercache_lookup_table_entries(); + ?column? +---------- + t +(1 row) + +SELECT count(*) >= 0 FROM pg_buffercache_lookup_table; + ?column? +---------- + t +(1 row) + RESET role; ------ ---- Test pg_buffercache_evict* functions diff --git a/contrib/pg_buffercache/pg_buffercache--1.5--1.6.sql b/contrib/pg_buffercache/pg_buffercache--1.5--1.6.sql index 458f054a691..9bf58567878 100644 --- a/contrib/pg_buffercache/pg_buffercache--1.5--1.6.sql +++ b/contrib/pg_buffercache/pg_buffercache--1.5--1.6.sql @@ -44,3 +44,27 @@ CREATE FUNCTION pg_buffercache_evict_all( OUT buffers_skipped int4) AS 'MODULE_PATHNAME', 'pg_buffercache_evict_all' LANGUAGE C PARALLEL SAFE VOLATILE; + +-- Add the buffer lookup table function +CREATE FUNCTION pg_buffercache_lookup_table_entries( + OUT tablespace oid, + OUT database oid, + OUT relfilenode oid, + OUT forknum int2, + OUT blocknum int8, + OUT bufferid int4) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_buffercache_lookup_table_entries' +LANGUAGE C PARALLEL SAFE VOLATILE; + +-- Create a view for convenient access. +CREATE VIEW pg_buffercache_lookup_table AS + SELECT * FROM pg_buffercache_lookup_table_entries(); + +-- Don't want these to be available to public. +REVOKE ALL ON FUNCTION pg_buffercache_lookup_table_entries() FROM PUBLIC; +REVOKE ALL ON pg_buffercache_lookup_table FROM PUBLIC; + +-- Grant access to monitoring role. +GRANT EXECUTE ON FUNCTION pg_buffercache_lookup_table_entries() TO pg_read_all_stats; +GRANT SELECT ON pg_buffercache_lookup_table TO pg_read_all_stats; diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index ab790533ff6..fe9af45febe 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -16,6 +16,7 @@ #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "utils/rel.h" +#include "utils/tuplestore.h" #define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8 @@ -100,6 +101,7 @@ PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts); PG_FUNCTION_INFO_V1(pg_buffercache_evict); PG_FUNCTION_INFO_V1(pg_buffercache_evict_relation); PG_FUNCTION_INFO_V1(pg_buffercache_evict_all); +PG_FUNCTION_INFO_V1(pg_buffercache_lookup_table_entries); /* Only need to touch memory once per backend process lifetime */ @@ -776,3 +778,19 @@ pg_buffercache_evict_all(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } + +/* + * Return lookup table content as a set of records. + */ +Datum +pg_buffercache_lookup_table_entries(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + InitMaterializedSRF(fcinfo, 0); + + /* Fill the tuplestore */ + BufTableGetContents(rsinfo->setResult, rsinfo->setDesc); + + return (Datum) 0; +} diff --git a/contrib/pg_buffercache/sql/pg_buffercache.sql b/contrib/pg_buffercache/sql/pg_buffercache.sql index 47cca1907c7..569b28aebb9 100644 --- a/contrib/pg_buffercache/sql/pg_buffercache.sql +++ b/contrib/pg_buffercache/sql/pg_buffercache.sql @@ -12,6 +12,18 @@ from pg_buffercache_summary(); SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0; +-- Test the buffer lookup table function and count is <= shared_buffers +select count(*) <= (select setting::bigint + from pg_settings + where name = 'shared_buffers') +from pg_buffercache_lookup_table_entries(); + +-- Check that pg_buffercache_lookup_table view works and count is <= shared_buffers +select count(*) <= (select setting::bigint + from pg_settings + where name = 'shared_buffers') +from pg_buffercache_lookup_table; + -- Check that the functions / views can't be accessed by default. To avoid -- having to create a dedicated user, use the pg_database_owner pseudo-role. SET ROLE pg_database_owner; @@ -19,6 +31,8 @@ SELECT * FROM pg_buffercache; SELECT * FROM pg_buffercache_pages() AS p (wrong int); SELECT * FROM pg_buffercache_summary(); SELECT * FROM pg_buffercache_usage_counts(); +SELECT * FROM pg_buffercache_lookup_table_entries(); +SELECT * FROM pg_buffercache_lookup_table; RESET role; -- Check that pg_monitor is allowed to query view / function @@ -28,6 +42,12 @@ SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary(); SELECT count(*) > 0 FROM pg_buffercache_usage_counts(); RESET role; +-- Check that pg_read_all_stats is allowed to query buffer lookup table +SET ROLE pg_read_all_stats; +SELECT count(*) >= 0 FROM pg_buffercache_lookup_table_entries(); +SELECT count(*) >= 0 FROM pg_buffercache_lookup_table; +RESET role; + ------ ---- Test pg_buffercache_evict* functions diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml index 7971498fe75..8f3e2741051 100644 --- a/doc/src/sgml/system-views.sgml +++ b/doc/src/sgml/system-views.sgml @@ -71,6 +71,11 @@ backend memory contexts + + pg_buffer_lookup_table + shared buffer lookup table + + pg_config compile-time configuration parameters @@ -901,6 +906,90 @@ AND c1.path[c2.level] = c2.path[c2.level]; + + <structname>pg_buffer_lookup_table</structname> + + pg_buffer_lookup_table + + + The pg_buffer_lookup_table view exposes the current + contents of the shared buffer lookup table. Each row represents an entry in + the lookup table mapping a relation page to the ID of buffer in which it is + cached. The shared buffer lookup table is locked for a short duration while + reading so as to ensure consistency. This may affect performance if this view + is queried very frequently. + + + <structname>pg_buffer_lookup_table</structname> View + + + + + Column Type + + + Description + + + + + + + tablespace oid + + + OID of the tablespace containing the relation + + + + + database oid + + + OID of the database containing the relation (zero for shared relations) + + + + + relfilenode oid + + + relfilenode identifying the relation + + + + + forknum int2 + + + Fork number within the relation (see ) + + + + + blocknum int8 + + + Block number within the relation + + + + + bufferid int4 + + + ID of the buffer caching the page + + + + +
+ + Access to this view is restricted to members of the + pg_read_all_stats role by default. + +
+ <structname>pg_config</structname> diff --git a/src/backend/storage/buffer/buf_table.c b/src/backend/storage/buffer/buf_table.c index 9d256559bab..f0c39ec2822 100644 --- a/src/backend/storage/buffer/buf_table.c +++ b/src/backend/storage/buffer/buf_table.c @@ -21,7 +21,12 @@ */ #include "postgres.h" +#include "fmgr.h" +#include "funcapi.h" #include "storage/buf_internals.h" +#include "storage/lwlock.h" +#include "utils/rel.h" +#include "utils/builtins.h" /* entry for buffer lookup hashtable */ typedef struct @@ -159,3 +164,56 @@ BufTableDelete(BufferTag *tagPtr, uint32 hashcode) if (!result) /* shouldn't happen */ elog(ERROR, "shared buffer hash table corrupted"); } + +/* + * BufTableGetContents + * Fill the given tuplestore with contents of the shared buffer lookup table + * + * This function is used by pg_buffercache extension to expose buffer lookup + * table contents via SQL. The caller is responsible for setting up the + * tuplestore and result set info. + */ +void +BufTableGetContents(Tuplestorestate *tupstore, TupleDesc tupdesc) +{ +/* Expected number of attributes of the buffer lookup table entry. */ +#define BUFTABLE_CONTENTS_COLS 6 + + HASH_SEQ_STATUS hstat; + BufferLookupEnt *ent; + Datum values[BUFTABLE_CONTENTS_COLS]; + bool nulls[BUFTABLE_CONTENTS_COLS]; + int i; + + memset(nulls, 0, sizeof(nulls)); + + Assert(tupdesc->natts == BUFTABLE_CONTENTS_COLS); + + /* + * Lock all buffer mapping partitions to ensure a consistent view of the + * hash table during the scan. Must grab LWLocks in partition-number order + * to avoid LWLock deadlock. + */ + for (i = 0; i < NUM_BUFFER_PARTITIONS; i++) + LWLockAcquire(BufMappingPartitionLockByIndex(i), LW_SHARED); + + hash_seq_init(&hstat, SharedBufHash); + while ((ent = (BufferLookupEnt *) hash_seq_search(&hstat)) != NULL) + { + values[0] = ObjectIdGetDatum(ent->key.spcOid); + values[1] = ObjectIdGetDatum(ent->key.dbOid); + values[2] = ObjectIdGetDatum(ent->key.relNumber); + values[3] = ObjectIdGetDatum(ent->key.forkNum); + values[4] = Int64GetDatum(ent->key.blockNum); + values[5] = Int32GetDatum(ent->id); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + /* + * Release all buffer mapping partition locks in the reverse order so as + * to avoid LWLock deadlock. + */ + for (i = NUM_BUFFER_PARTITIONS - 1; i >= 0; i--) + LWLockRelease(BufMappingPartitionLockByIndex(i)); +} diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 5400c56a965..519692702a0 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -28,6 +28,7 @@ #include "storage/spin.h" #include "utils/relcache.h" #include "utils/resowner.h" +#include "utils/tuplestore.h" /* * Buffer state is a single 32-bit variable where following data is combined. @@ -520,6 +521,7 @@ extern uint32 BufTableHashCode(BufferTag *tagPtr); extern int BufTableLookup(BufferTag *tagPtr, uint32 hashcode); extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id); extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode); +extern void BufTableGetContents(Tuplestorestate *tupstore, TupleDesc tupdesc); /* localbuf.c */ extern bool PinLocalBuffer(BufferDesc *buf_hdr, bool adjust_usagecount); base-commit: df53fa1c1ebf9bb3e8c17217f7cc1435107067fb -- 2.34.1