From 397777ca5d1512a233d3f0ba8954b0a32421ad4f Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Wed, 29 Sep 2021 15:39:45 -0400 Subject: [PATCH v12 3/4] Add system view tracking accesses to buffers Add pg_stat_buffers, a system view which tracks the number of buffers of a particular type (e.g. shared, local) allocated, written, fsync'd, and extended by each backend type. Some of these should always be zero. For example, a checkpointer backend will not use a BufferAccessStrategy (currently), so buffer type "strategy" for checkpointer will be 0 for all buffer access types (alloc, write, fsync, and extend). All backends increment a counter in their PgBackendStatus when performing a buffer access. On exit, backends send these stats to the stats collector to be persisted. When stats are reset, the backend sending the reset message will loop through and collect all of the live backends' buffer access counters, sending a reset message for each backend type containing its buffer access stats. When receiving this message, the stats collector will 1) save these reset values in an array of "resets" and 2) zero out the exited backends' saved buffer access counters. This is required for accurate stats after a reset without writing to other backends' PgBackendStatus. When the pg_stat_buffers view is queried, sum live backends' stats with saved stats from exited backends and subtract saved reset stats, returning the total. Each row of the view is for a particular backend type and a particular buffer type (e.g. shared buffer accesses by checkpointer) and each column in the view is the total number of buffers of each kind of buffer access (e.g. written). So a cell in the view would be, for example, the number of shared buffers written by checkpointer since the last stats reset. Note that this commit does not add code to increment buffer accesses for all types of buffers. It includes all possible combinations in the stats view but doesn't populate all of them. A separate proposed patch [1] which would add wrappers for smgrwrite() and extend() would provide a good location to call pgstat_increment_buffer_access_type() for unbuffered IO and avoid regressions for future users of these functions. TODO: - Remove pg_stats test I added - When finished, catalog bump [1] https://www.postgresql.org/message-id/CAAKRu_aw72w70X1P%3Dba20K8iGUvSkyz7Yk03wPPh3f9WgmcJ3g%40mail.gmail.com Discussion: https://www.postgresql.org/message-id/flat/20210415235954.qcypb4urtovzkat5%40alap3.anarazel.de#724d5cce4bcb587f9167b80a5824bc5c --- doc/src/sgml/monitoring.sgml | 116 ++++++++++++++- src/backend/catalog/system_views.sql | 11 ++ src/backend/postmaster/checkpointer.c | 1 + src/backend/postmaster/pgstat.c | 153 +++++++++++++++++++- src/backend/storage/buffer/bufmgr.c | 26 +++- src/backend/storage/buffer/freelist.c | 23 ++- src/backend/utils/activity/backend_status.c | 64 +++++++- src/backend/utils/adt/pgstatfuncs.c | 116 +++++++++++++++ src/include/catalog/pg_proc.dat | 9 ++ src/include/miscadmin.h | 2 + src/include/pgstat.h | 54 +++++++ src/include/storage/buf_internals.h | 4 +- src/include/utils/backend_status.h | 75 ++++++++++ src/test/regress/expected/rules.out | 8 + src/test/regress/sql/stats.sql | 4 + 15 files changed, 647 insertions(+), 19 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 2cd8920645..75753c3339 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -444,6 +444,15 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + pg_stat_bufferspg_stat_buffers + A row for each buffer type for each backend type showing + statistics about backend buffer activity. See + + pg_stat_buffers for details. + + + pg_stat_walpg_stat_wal One row only, showing statistics about WAL activity. See @@ -3478,6 +3487,101 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i + + <structname>pg_stat_buffers</structname> + + + pg_stat_buffers + + + + The pg_stat_buffers view has a row for each buffer + type for each backend type, containing global data for the cluster for that + backend and buffer type. + + + + <structname>pg_stat_buffers</structname> View + + + + + Column Type + + + Description + + + + + + + backend_type text + + + Type of backend (e.g. background worker, autovacuum worker). + + + + + + buffer_type text + + + Type of buffer accessed (e.g. shared). + + + + + + alloc integer + + + Number of buffers allocated. + + + + + + extend integer + + + Number of buffers extended. + + + + + + fsync integer + + + Number of buffers fsynced. + + + + + + write integer + + + Number of buffers written. + + + + + + stats_reset timestamp with time zone + + + Time at which these statistics were last reset. + + + + +
+ +
+ <structname>pg_stat_wal</structname> @@ -5074,12 +5178,14 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i Resets some cluster-wide statistics counters to zero, depending on the - argument. The argument can be bgwriter to reset - all the counters shown in - the pg_stat_bgwriter + argument. The argument can be bgwriter to reset all + the counters shown in the pg_stat_bgwriter view, archiver to reset all the counters shown in - the pg_stat_archiver view or wal - to reset all the counters shown in the pg_stat_wal view. + the pg_stat_archiver view, + wal to reset all the counters shown in the + pg_stat_wal view, or + buffers to reset all the counters shown in the + pg_stat_buffers view. This function is restricted to superusers by default, but other users diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 55f6e3711d..30280d520b 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1072,6 +1072,17 @@ CREATE VIEW pg_stat_bgwriter AS pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; +CREATE VIEW pg_stat_buffers AS +SELECT + b.backend_type, + b.buffer_type, + b.alloc, + b.extend, + b.fsync, + b.write, + b.stats_reset +FROM pg_stat_get_buffers_accesses() b; + CREATE VIEW pg_stat_wal AS SELECT w.wal_records, diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index be7366379d..931bdcaa59 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -1104,6 +1104,7 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) */ if (!AmBackgroundWriterProcess()) CheckpointerShmem->num_backend_fsync++; + pgstat_inc_buffer_access_type(BA_Fsync, Buf_Shared); LWLockRelease(CheckpointerCommLock); return false; } diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index b7d0fbaefd..3673b34f50 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -124,9 +124,12 @@ char *pgstat_stat_filename = NULL; char *pgstat_stat_tmpname = NULL; /* - * BgWriter and WAL global statistics counters. - * Stored directly in a stats message structure so they can be sent - * without needing to copy things around. We assume these init to zeroes. + * BgWriter, Checkpointer, WAL, and I/O global statistics counters. I/O global + * statistics on various buffer actions are tracked in PgBackendStatus while a + * backend is alive and then sent to stats collector before a backend exits in + * a PgStat_MsgBufferTypeAccesses. + * All others are stored directly in a stats message structure so they can be + * sent without needing to copy things around. We assume these init to zeroes. */ PgStat_MsgBgWriter PendingBgWriterStats; PgStat_MsgCheckpointer PendingCheckpointerStats; @@ -362,6 +365,7 @@ static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len); static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len); static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); static void pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len); +static void pgstat_recv_buffer_type_accesses(PgStat_MsgBufferTypeAccesses *msg, int len); static void pgstat_recv_wal(PgStat_MsgWal *msg, int len); static void pgstat_recv_slru(PgStat_MsgSLRU *msg, int len); static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); @@ -974,6 +978,7 @@ pgstat_report_stat(bool disconnect) /* Now, send function statistics */ pgstat_send_funcstats(); + /* Send WAL statistics */ pgstat_send_wal(true); @@ -1452,6 +1457,8 @@ pgstat_reset_shared_counters(const char *target) msg.m_resettarget = RESET_ARCHIVER; else if (strcmp(target, "bgwriter") == 0) msg.m_resettarget = RESET_BGWRITER; + else if (strcmp(target, "buffers") == 0) + msg.m_resettarget = RESET_BUFFERS; else if (strcmp(target, "wal") == 0) msg.m_resettarget = RESET_WAL; else @@ -1461,7 +1468,25 @@ pgstat_reset_shared_counters(const char *target) errhint("Target must be \"archiver\", \"bgwriter\", or \"wal\"."))); pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER); - pgstat_send(&msg, sizeof(msg)); + + if (msg.m_resettarget == RESET_BUFFERS) + { + int backend_type; + PgStatBufferTypeAccesses accesses[BACKEND_NUM_TYPES]; + + memset(accesses, 0, sizeof(accesses)); + pgstat_report_live_backend_accesses(accesses); + + for (backend_type = 1; backend_type < BACKEND_NUM_TYPES; backend_type++) + { + msg.m_backend_resets.backend_type = backend_type; + memcpy(&msg.m_backend_resets.bta, &accesses[backend_type], sizeof(msg.m_backend_resets.bta)); + pgstat_send(&msg, sizeof(msg)); + } + } + else + pgstat_send(&msg, sizeof(msg)); + } /* ---------- @@ -2760,6 +2785,20 @@ pgstat_twophase_postabort(TransactionId xid, uint16 info, rec->tuples_inserted + rec->tuples_updated; } +/* + * + * Support function for SQL-callable pgstat* functions. Returns a pointer to + * the BackendAccesses structure tracking buffer access statistics for both + * exited backends and reset arithmetic. + */ +PgStat_BackendAccesses * +pgstat_fetch_exited_backend_buffers(void) +{ + backend_read_statsfile(); + + return &globalStats.buffers; +} + /* ---------- * pgstat_fetch_stat_dbentry() - @@ -2999,6 +3038,14 @@ pgstat_shutdown_hook(int code, Datum arg) { Assert(!pgstat_is_shutdown); + /* + * Only need to send stats on buffer accesses when a process exits, as + * pg_stat_get_buffers_accesses() will read from live backends' + * PgBackendStatus and then sum this with totals from exited backends + * persisted by the stats collector. + */ + pgstat_send_buffers(); + /* * If we got as far as discovering our own database ID, we can report what * we did to the collector. Otherwise, we'd be sending an invalid @@ -3092,6 +3139,31 @@ pgstat_send(void *msg, int len) #endif } +/* + * Add live buffer access stats for all buffer types (e.g. shared, local) to + * those in the equivalent stats structure for exited backends. Note that this + * adds and doesn't set, so the destination buffer access stats should be + * zeroed out by the caller initially. This would commonly be used to transfer + * all buffer access stats for all buffer types for a particular backend type + * to the pgstats structure. + */ +void +pgstat_add_buffer_type_accesses(PgStatBufferAccesses *dest, PgBufferAccesses *src, int buffer_num_types) +{ + int buffer_type; + + for (buffer_type = 0; buffer_type < buffer_num_types; buffer_type++) + { + dest->allocs += pg_atomic_read_u64(&src->allocs); + dest->extends += pg_atomic_read_u64(&src->extends); + dest->fsyncs += pg_atomic_read_u64(&src->fsyncs); + dest->writes += pg_atomic_read_u64(&src->writes); + dest++; + src++; + } + +} + /* ---------- * pgstat_send_archiver() - * @@ -3148,6 +3220,32 @@ pgstat_send_bgwriter(void) MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats)); } +/* + * Before exiting, a backend sends its buffer access statistics to the + * collector so that they may be persisted + */ +void +pgstat_send_buffers(void) +{ + PgStat_MsgBufferTypeAccesses msg; + + PgBackendStatus *beentry = MyBEEntry; + + if (!beentry) + return; + + memset(&msg, 0, sizeof(msg)); + msg.backend_type = beentry->st_backendType; + + pgstat_add_buffer_type_accesses(msg.bta.bt_accesses, + (PgBufferAccesses *) &beentry->buffer_access_stats, + BUFFER_NUM_TYPES); + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_BUFFER_ACTIONS); + pgstat_send(&msg, sizeof(msg)); +} + + /* ---------- * pgstat_send_checkpointer() - * @@ -3522,6 +3620,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_checkpointer(&msg.msg_checkpointer, len); break; + case PGSTAT_MTYPE_BUFFER_ACTIONS: + pgstat_recv_buffer_type_accesses(&msg.msg_buffer_accesses, len); + break; + case PGSTAT_MTYPE_WAL: pgstat_recv_wal(&msg.msg_wal, len); break; @@ -5221,10 +5323,30 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len) { if (msg->m_resettarget == RESET_BGWRITER) { - /* Reset the global, bgwriter and checkpointer statistics for the cluster. */ - memset(&globalStats, 0, sizeof(globalStats)); + /* + * Reset the global, bgwriter and checkpointer statistics for the + * cluster. + */ + memset(&globalStats.checkpointer, 0, sizeof(globalStats.checkpointer)); + memset(&globalStats.bgwriter, 0, sizeof(globalStats.bgwriter)); globalStats.bgwriter.stat_reset_timestamp = GetCurrentTimestamp(); } + else if (msg->m_resettarget == RESET_BUFFERS) + { + BackendType backend_type = msg->m_backend_resets.backend_type; + + /* + * Though globalStats.buffers only needs to be reset once, doing so + * for every message is less brittle and the extra cost is + * irrelevant-- given how often stats are reset. + */ + memset(&globalStats.buffers.accesses, 0, sizeof(globalStats.buffers.accesses)); + globalStats.buffers.stat_reset_timestamp = GetCurrentTimestamp(); + + memcpy(&globalStats.buffers.resets[backend_type], + &msg->m_backend_resets.bta.bt_accesses, sizeof(msg->m_backend_resets.bta.bt_accesses)); + + } else if (msg->m_resettarget == RESET_ARCHIVER) { /* Reset the archiver statistics for the cluster. */ @@ -5512,6 +5634,25 @@ pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len) globalStats.checkpointer.buf_fsync_backend += msg->m_buf_fsync_backend; } +static void +pgstat_recv_buffer_type_accesses(PgStat_MsgBufferTypeAccesses *msg, int len) +{ + int buffer_type; + PgStatBufferAccesses *src_buffer_accesses = msg->bta.bt_accesses; + PgStatBufferAccesses *dest_buffer_accesses = globalStats.buffers.accesses[msg->backend_type].bt_accesses; + + for (buffer_type = 0; buffer_type < BUFFER_NUM_TYPES; buffer_type++) + { + PgStatBufferAccesses *src = &src_buffer_accesses[buffer_type]; + PgStatBufferAccesses *dest = &dest_buffer_accesses[buffer_type]; + + dest->allocs += src->allocs; + dest->extends += src->extends; + dest->fsyncs += src->fsyncs; + dest->writes += src->writes; + } +} + /* ---------- * pgstat_recv_wal() - * diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index e88e4e918b..58bf60425b 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -972,6 +972,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (isExtend) { + pgstat_inc_buffer_access_type(BA_Extend, Buf_Shared); /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); /* don't set checksum for all-zero page */ @@ -1172,6 +1173,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Loop here in case we have to try another victim buffer */ for (;;) { + bool from_ring; + /* * Ensure, while the spinlock's not yet held, that there's a free * refcount entry. @@ -1182,7 +1185,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * Select a victim buffer. The buffer is returned with its header * spinlock still held! */ - buf = StrategyGetBuffer(strategy, &buf_state); + buf = StrategyGetBuffer(strategy, &buf_state, &from_ring); Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0); @@ -1219,6 +1222,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED)) { + BufferType buftype; + /* * If using a nondefault strategy, and writing the buffer * would require a WAL flush, let the strategy decide whether @@ -1236,7 +1241,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, UnlockBufHdr(buf, buf_state); if (XLogNeedsFlush(lsn) && - StrategyRejectBuffer(strategy, buf)) + StrategyRejectBuffer(strategy, buf, &from_ring)) { /* Drop lock/pin and loop around for another buffer */ LWLockRelease(BufferDescriptorGetContentLock(buf)); @@ -1245,6 +1250,21 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } } + /* + * When a strategy is in use, if the dirty buffer was selected + * from the strategy ring and we did not bother checking the + * freelist or doing a clock sweep to look for a clean shared + * buffer to use, the write will be counted as a strategy + * write. However, if the dirty buffer was obtained from the + * freelist or a clock sweep, it is counted as a regular + * write. When a strategy is not in use, at this point, the + * write can only be a "regular" write of a dirty buffer. + */ + + buftype = from_ring ? Buf_Strategy : Buf_Shared; + pgstat_inc_buffer_access_type(BA_Write, buftype); + + /* OK, do the I/O */ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum, smgr->smgr_rnode.node.spcNode, @@ -2552,6 +2572,8 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the * buffer is clean by the time we've locked it.) */ + + pgstat_inc_buffer_access_type(BA_Write, Buf_Shared); PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 6be80476db..574965212b 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -19,6 +19,7 @@ #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "storage/proc.h" +#include "utils/backend_status.h" #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var)))) @@ -198,7 +199,7 @@ have_free_buffer(void) * return the buffer with the buffer header spinlock still held. */ BufferDesc * -StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) +StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring) { BufferDesc *buf; int bgwprocno; @@ -212,7 +213,8 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) if (strategy != NULL) { buf = GetBufferFromRing(strategy, buf_state); - if (buf != NULL) + *from_ring = buf != NULL; + if (*from_ring) return buf; } @@ -247,6 +249,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ + pgstat_inc_buffer_access_type(BA_Alloc, Buf_Shared); pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1); /* @@ -683,8 +686,14 @@ AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf) * if this buffer should be written and re-used. */ bool -StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf) +StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool *from_ring) { + /* + * If we decide to use the dirty buffer selected by StrategyGetBuffer(), + * then ensure that we count it as such in pg_stat_buffers view. + */ + *from_ring = true; + /* We only do this in bulkread mode */ if (strategy->btype != BAS_BULKREAD) return false; @@ -700,5 +709,13 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf) */ strategy->buffers[strategy->current] = InvalidBuffer; + /* + * Since we will not be writing out a dirty buffer from the ring, set + * from_ring to false so that the caller does not count this write as a + * "strategy write" and can do proper bookkeeping for pg_stat_buffers. + */ + *from_ring = false; + + return true; } diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c index 7229598822..d02326423a 100644 --- a/src/backend/utils/activity/backend_status.c +++ b/src/backend/utils/activity/backend_status.c @@ -236,6 +236,24 @@ CreateSharedBackendStatus(void) #endif } +const char * +GetBufferTypeDesc(BufferType bufferType) +{ + + switch (bufferType) + { + case Buf_Direct: + return "direct"; + case Buf_Local: + return "local"; + case Buf_Shared: + return "shared"; + case Buf_Strategy: + return "strategy"; + } + return "unknown buffer type"; +} + /* * Initialize pgstats backend activity state, and set up our on-proc-exit * hook. Called from InitPostgres and AuxiliaryProcessMain. For auxiliary @@ -279,7 +297,7 @@ pgstat_beinit(void) * pgstat_bestart() - * * Initialize this backend's entry in the PgBackendStatus array. - * Called from InitPostgres. + * Called from InitPostgres and AuxiliaryProcessMain * * Apart from auxiliary processes, MyBackendId, MyDatabaseId, * session userid, and application_name must be set for a @@ -293,6 +311,7 @@ pgstat_bestart(void) { volatile PgBackendStatus *vbeentry = MyBEEntry; PgBackendStatus lbeentry; + int buffer_type; #ifdef USE_SSL PgBackendSSLStatus lsslstatus; #endif @@ -399,6 +418,15 @@ pgstat_bestart(void) lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID; lbeentry.st_progress_command_target = InvalidOid; lbeentry.st_query_id = UINT64CONST(0); + for (buffer_type = 0; buffer_type < BUFFER_NUM_TYPES; buffer_type++) + { + PgBufferAccesses *accesses = &lbeentry.buffer_access_stats[buffer_type]; + + pg_atomic_init_u64(&accesses->allocs, 0); + pg_atomic_init_u64(&accesses->extends, 0); + pg_atomic_init_u64(&accesses->fsyncs, 0); + pg_atomic_init_u64(&accesses->writes, 0); + } /* * we don't zero st_progress_param here to save cycles; nobody should @@ -621,6 +649,34 @@ pgstat_report_activity(BackendState state, const char *cmd_str) PGSTAT_END_WRITE_ACTIVITY(beentry); } +/* + * Iterate through BackendStatusArray and capture live backends' buffer access + * stats, adding them to that backend type's member of the backend_accesses + * structure. + */ +void +pgstat_report_live_backend_accesses(PgStatBufferTypeAccesses *backend_accesses) +{ + int i; + PgBackendStatus *beentry = BackendStatusArray; + + /* + * Loop through live backends and capture reset values + */ + for (i = 0; i < MaxBackends + NUM_AUXPROCTYPES; i++) + { + beentry++; + /* Don't count dead backends. They should already be counted */ + if (beentry->st_procpid == 0) + continue; + + pgstat_add_buffer_type_accesses(backend_accesses[beentry->st_backendType].bt_accesses, + (PgBufferAccesses *) beentry->buffer_access_stats, + BUFFER_NUM_TYPES); + + } +} + /* -------- * pgstat_report_query_id() - * @@ -1046,6 +1102,12 @@ pgstat_get_my_query_id(void) } +PgBackendStatus * +pgstat_fetch_backend_statuses(void) +{ + return BackendStatusArray; +} + /* ---------- * pgstat_fetch_stat_beentry() - * diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 513f5aecf6..674a2167ec 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1781,6 +1781,122 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS) PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc); } +Datum +pg_stat_get_buffers_accesses(PG_FUNCTION_ARGS) +{ +#define NROWS ((BACKEND_NUM_TYPES - 1) * BUFFER_NUM_TYPES) + PgStat_BackendAccesses *backend_accesses; + int i; + int buffer_type, backend_type; + Datum reset_time; + PgBackendStatus *beentry; + TupleDesc tupdesc; + + Tuplestorestate *tupstore = pg_stat_make_tuplestore(fcinfo, &tupdesc); + + /* + * When adding a new column to the pg_stat_buffers view, add a new enum + * value here above COLUMN_LENGTH. + */ + enum + { + COLUMN_BACKEND_TYPE, + COLUMN_BUFFER_TYPE, + COLUMN_ALLOCS, + COLUMN_EXTENDS, + COLUMN_FSYNCS, + COLUMN_WRITES, + COLUMN_RESET_TIME, + COLUMN_LENGTH, + }; + + Datum all_values[NROWS][COLUMN_LENGTH]; + bool all_nulls[NROWS][COLUMN_LENGTH]; + + memset(all_values, 0, sizeof(all_values)); + memset(all_nulls, 0, sizeof(all_nulls)); + + /* + * Loop through all live backends and count their buffer accesses for each + * buffer type + */ + beentry = pgstat_fetch_backend_statuses(); + + for (i = 0; i < MaxBackends + NUM_AUXPROCTYPES; i++) + { + PgBufferAccesses *buffer_accesses; + + beentry++; + /* Don't count dead backends. They should already be counted */ + if (beentry->st_procpid == 0) + continue; + + buffer_accesses = beentry->buffer_access_stats; + + for (buffer_type = 0; buffer_type < BUFFER_NUM_TYPES; buffer_type++) + { + int rownum = (beentry->st_backendType - 1) * BUFFER_NUM_TYPES + buffer_type; + Datum *values = all_values[rownum]; + + /* + * COLUMN_RESET_TIME, COLUMN_BACKEND_TYPE, and COLUMN_BUFFER_TYPE + * will all be set when looping through exited backends array + */ + values[COLUMN_ALLOCS] += pg_atomic_read_u64(&buffer_accesses->allocs); + values[COLUMN_EXTENDS] += pg_atomic_read_u64(&buffer_accesses->extends); + values[COLUMN_FSYNCS] += pg_atomic_read_u64(&buffer_accesses->fsyncs); + values[COLUMN_WRITES] += pg_atomic_read_u64(&buffer_accesses->writes); + buffer_accesses++; + } + } + + /* Add stats from all exited backends */ + backend_accesses = pgstat_fetch_exited_backend_buffers(); + + reset_time = TimestampTzGetDatum(backend_accesses->stat_reset_timestamp); + + /* 0 is not a valid BackendType */ + for (backend_type = 1; backend_type < BACKEND_NUM_TYPES; backend_type++) + { + PgStatBufferAccesses *buffer_accesses = backend_accesses->accesses[backend_type].bt_accesses; + PgStatBufferAccesses *resets = backend_accesses->resets[backend_type].bt_accesses; + + Datum backend_type_desc = CStringGetTextDatum(GetBackendTypeDesc(backend_type)); + + for (buffer_type = 0; buffer_type < BUFFER_NUM_TYPES; buffer_type++) + { + /* + * Subtract 1 from backend_type to avoid having rows for B_INVALID + * BackendType + */ + Datum *values = all_values[(backend_type - 1) * BUFFER_NUM_TYPES + buffer_type]; + + values[COLUMN_BACKEND_TYPE] = backend_type_desc; + values[COLUMN_BUFFER_TYPE] = CStringGetTextDatum(GetBufferTypeDesc(buffer_type)); + values[COLUMN_ALLOCS] = values[COLUMN_ALLOCS] + buffer_accesses->allocs - resets->allocs; + values[COLUMN_EXTENDS] = values[COLUMN_EXTENDS] + buffer_accesses->extends - resets->extends; + values[COLUMN_FSYNCS] = values[COLUMN_FSYNCS] + buffer_accesses->fsyncs - resets->fsyncs; + values[COLUMN_WRITES] = values[COLUMN_WRITES] + buffer_accesses->writes - resets->writes; + values[COLUMN_RESET_TIME] = reset_time; + buffer_accesses++; + resets++; + } + } + + for (i = 0; i < NROWS; i++) + { + Datum *values = all_values[i]; + bool *nulls = all_nulls[i]; + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + return (Datum) 0; +} + /* * Returns statistics of WAL activity */ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index d068d6532e..54661e2b5f 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5642,6 +5642,15 @@ proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' }, +{ oid => '8459', descr => 'statistics: counts of various types of accesses of buffers done by each backend type', + proname => 'pg_stat_get_buffers_accesses', provolatile => 's', proisstrict => 'f', + prorows => '52', proretset => 't', + proparallel => 'r', prorettype => 'record', proargtypes => '', + proallargtypes => '{text,text,int8,int8,int8,int8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o}', + proargnames => '{backend_type,buffer_type,alloc,extend,fsync,write,stats_reset}', + prosrc => 'pg_stat_get_buffers_accesses' }, + { oid => '1136', descr => 'statistics: information about WAL activity', proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's', proparallel => 'r', prorettype => 'record', proargtypes => '', diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 90a3016065..6785fb3813 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -338,6 +338,8 @@ typedef enum BackendType B_LOGGER, } BackendType; +#define BACKEND_NUM_TYPES (B_LOGGER + 1) + extern BackendType MyBackendType; extern const char *GetBackendTypeDesc(BackendType backendType); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index bcd3588ea2..2e3dfcc01d 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -72,6 +72,7 @@ typedef enum StatMsgType PGSTAT_MTYPE_ARCHIVER, PGSTAT_MTYPE_BGWRITER, PGSTAT_MTYPE_CHECKPOINTER, + PGSTAT_MTYPE_BUFFER_ACTIONS, PGSTAT_MTYPE_WAL, PGSTAT_MTYPE_SLRU, PGSTAT_MTYPE_FUNCSTAT, @@ -138,6 +139,7 @@ typedef enum PgStat_Shared_Reset_Target { RESET_ARCHIVER, RESET_BGWRITER, + RESET_BUFFERS, RESET_WAL } PgStat_Shared_Reset_Target; @@ -331,6 +333,51 @@ typedef struct PgStat_MsgDropdb } PgStat_MsgDropdb; +/* + * Structure for counting all types of buffer accesses in the stats collector + */ +typedef struct PgStatBufferAccesses +{ + PgStat_Counter allocs; + PgStat_Counter extends; + PgStat_Counter fsyncs; + PgStat_Counter writes; +} PgStatBufferAccesses; + +/* + * Structure for counting all buffer accesses of all types of buffers. + */ +typedef struct PgStatBufferTypeAccesses +{ + PgStatBufferAccesses bt_accesses[BUFFER_NUM_TYPES]; +} PgStatBufferTypeAccesses; + +/* + * Sent by a backend to the stats collector to report all buffer accesses of + * all types of buffers for a given type of a backend. This will happen when + * the backend exits or when stats are reset. + */ +typedef struct PgStat_MsgBufferTypeAccesses +{ + PgStat_MsgHdr m_hdr; + + BackendType backend_type; + PgStatBufferTypeAccesses bta; +} PgStat_MsgBufferTypeAccesses; + +/* + * Structure used by stats collector to keep track of all types of exited + * backends' buffer accesses for all types of buffers as well as all stats from + * live backends at the time of stats reset. resets is populated using a reset + * message sent to the stats collector. + */ +typedef struct PgStat_BackendAccesses +{ + TimestampTz stat_reset_timestamp; + PgStatBufferTypeAccesses accesses[BACKEND_NUM_TYPES]; + PgStatBufferTypeAccesses resets[BACKEND_NUM_TYPES]; +} PgStat_BackendAccesses; + /* ---------- * PgStat_MsgResetcounter Sent by the backend to tell the collector * to reset counters @@ -351,6 +398,7 @@ typedef struct PgStat_MsgResetsharedcounter { PgStat_MsgHdr m_hdr; PgStat_Shared_Reset_Target m_resettarget; + PgStat_MsgBufferTypeAccesses m_backend_resets; } PgStat_MsgResetsharedcounter; /* ---------- @@ -703,6 +751,7 @@ typedef union PgStat_Msg PgStat_MsgArchiver msg_archiver; PgStat_MsgBgWriter msg_bgwriter; PgStat_MsgCheckpointer msg_checkpointer; + PgStat_MsgBufferTypeAccesses msg_buffer_accesses; PgStat_MsgWal msg_wal; PgStat_MsgSLRU msg_slru; PgStat_MsgFuncstat msg_funcstat; @@ -879,6 +928,7 @@ typedef struct PgStat_GlobalStats PgStat_CheckpointerStats checkpointer; PgStat_BgWriterStats bgwriter; + PgStat_BackendAccesses buffers; } PgStat_GlobalStats; /* @@ -1116,8 +1166,11 @@ extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, void *recdata, uint32 len); +extern void pgstat_add_buffer_type_accesses(PgStatBufferAccesses *dest, + PgBufferAccesses *src, int buffer_num_types); extern void pgstat_send_archiver(const char *xlog, bool failed); extern void pgstat_send_bgwriter(void); +extern void pgstat_send_buffers(void); extern void pgstat_send_checkpointer(void); extern void pgstat_send_wal(bool force); @@ -1126,6 +1179,7 @@ extern void pgstat_send_wal(bool force); * generate the pgstat* views. * ---------- */ +extern PgStat_BackendAccesses *pgstat_fetch_exited_backend_buffers(void); extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid); extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid); extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid); diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 33fcaf5c9a..7e385135db 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -310,10 +310,10 @@ extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag * /* freelist.c */ extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, - uint32 *buf_state); + uint32 *buf_state, bool *from_ring); extern void StrategyFreeBuffer(BufferDesc *buf); extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, - BufferDesc *buf); + BufferDesc *buf, bool *from_ring); extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc); extern void StrategyNotifyBgWriter(int bgwprocno); diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h index 8042b817df..eb134d82f1 100644 --- a/src/include/utils/backend_status.h +++ b/src/include/utils/backend_status.h @@ -13,6 +13,7 @@ #include "datatype/timestamp.h" #include "libpq/pqcomm.h" #include "miscadmin.h" /* for BackendType */ +#include "port/atomics.h" #include "utils/backend_progress.h" @@ -31,12 +32,48 @@ typedef enum BackendState STATE_DISABLED } BackendState; +/* ---------- + * IO Stats reporting utility types + * ---------- + */ + +typedef enum BufferAccessType +{ + BA_Alloc, + BA_Extend, + BA_Fsync, + BA_Write, +} BufferAccessType; + +#define BUFFER_ACCESS_NUM_TYPES (BA_Write + 1) + +typedef enum BufferType +{ + Buf_Direct, + Buf_Local, + Buf_Shared, + Buf_Strategy, +} BufferType; + +#define BUFFER_NUM_TYPES (Buf_Strategy + 1) + /* ---------- * Shared-memory data structures * ---------- */ +/* + * Structure for counting all types of buffer accesses for a live backend. + */ +typedef struct PgBufferAccesses +{ + pg_atomic_uint64 allocs; + pg_atomic_uint64 extends; + pg_atomic_uint64 fsyncs; + pg_atomic_uint64 writes; +} PgBufferAccesses; + /* * PgBackendSSLStatus * @@ -168,6 +205,7 @@ typedef struct PgBackendStatus /* query identifier, optionally computed using post_parse_analyze_hook */ uint64 st_query_id; + PgBufferAccesses buffer_access_stats[BUFFER_NUM_TYPES]; } PgBackendStatus; @@ -289,6 +327,10 @@ extern void CreateSharedBackendStatus(void); * ---------- */ +/* Utility functions */ +extern const char *GetBufferTypeDesc(BufferType bufferType); + + /* Initialization functions */ extern void pgstat_beinit(void); extern void pgstat_bestart(void); @@ -296,7 +338,39 @@ extern void pgstat_bestart(void); extern void pgstat_clear_backend_activity_snapshot(void); /* Activity reporting functions */ +typedef struct PgStatBufferTypeAccesses PgStatBufferTypeAccesses; + +static inline void +pgstat_inc_buffer_access_type(BufferAccessType ba_type, BufferType buf_type) +{ + PgBufferAccesses *accesses; + PgBackendStatus *beentry = MyBEEntry; + + Assert(beentry); + + accesses = &beentry->buffer_access_stats[buf_type]; + switch (ba_type) + { + case BA_Alloc: + pg_atomic_write_u64(&accesses->allocs, + pg_atomic_read_u64(&accesses->allocs) + 1); + break; + case BA_Extend: + pg_atomic_write_u64(&accesses->extends, + pg_atomic_read_u64(&accesses->extends) + 1); + break; + case BA_Fsync: + pg_atomic_write_u64(&accesses->fsyncs, + pg_atomic_read_u64(&accesses->fsyncs) + 1); + break; + case BA_Write: + pg_atomic_write_u64(&accesses->writes, + pg_atomic_read_u64(&accesses->writes) + 1); + break; + } +} extern void pgstat_report_activity(BackendState state, const char *cmd_str); +extern void pgstat_report_live_backend_accesses(PgStatBufferTypeAccesses *backend_accesses); extern void pgstat_report_query_id(uint64 query_id, bool force); extern void pgstat_report_tempfile(size_t filesize); extern void pgstat_report_appname(const char *appname); @@ -312,6 +386,7 @@ extern uint64 pgstat_get_my_query_id(void); * generate the pgstat* views. * ---------- */ +extern PgBackendStatus *pgstat_fetch_backend_statuses(void); extern int pgstat_fetch_stat_numbackends(void); extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid); extern LocalPgBackendStatus *pgstat_fetch_stat_local_beentry(int beid); diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 2fa00a3c29..9172b0fcd2 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1828,6 +1828,14 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync, pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; +pg_stat_buffers| SELECT b.backend_type, + b.buffer_type, + b.alloc, + b.extend, + b.fsync, + b.write, + b.stats_reset + FROM pg_stat_get_buffers_accesses() b(backend_type, buffer_type, alloc, extend, fsync, write, stats_reset); pg_stat_database| SELECT d.oid AS datid, d.datname, CASE diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index feaaee6326..4ad672b35a 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -176,4 +176,8 @@ FROM prevstats AS pr; DROP TABLE trunc_stats_test, trunc_stats_test1, trunc_stats_test2, trunc_stats_test3, trunc_stats_test4; DROP TABLE prevstats; +SELECT * FROM pg_stat_buffers; +SELECT pg_stat_reset_shared('buffers'); +SELECT pg_sleep(2); +SELECT * FROM pg_stat_buffers; -- End of Stats Test -- 2.27.0