From 629683d8fa63064ec55da2d65794d5e5af251407 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Thu, 6 Oct 2022 12:23:25 -0400 Subject: [PATCH v35 2/5] Track IO operation statistics locally Introduce "IOOp", an IO operation done by a backend, and "IOContext", the IO source, target, or type done by a backend. For example, the checkpointer may write a shared buffer out. This would be counted as an IOOp "written" on an IOContext IOCONTEXT_SHARED by BackendType "checkpointer". Each IOOp (evict, freelist acquisition, reject, repossess, reuse, read, write, extend, and fsync) is counted per IOContext (bulkread, bulkwrite, local, shared, or vacuum) through a call to pgstat_count_io_op(). The primary concern of these statistics is IO operations on data blocks during the course of normal database operations. IO operations done by, for example, the archiver or syslogger are not counted in these statistics. WAL IO, temporary file IO, and IO done directly though smgr* functions (such as when building an index) are not yet counted but would be useful future additions. IOCONTEXT_LOCAL and IOCONTEXT_SHARED IOContexts concern operations on local and shared buffers. The IOCONTEXT_BULKREAD, IOCONTEXT_BULKWRITE, and IOCONTEXT_VACUUM IOContexts concern IO operations on buffers as part of a BufferAccessStrategy. IOOP_FREELIST_ACQUIRE and IOOP_EVICT IOOps are counted in IOCONTEXT_SHARED and IOCONTEXT_LOCAL IOContexts when a buffer is acquired or allocated through [Local]BufferAlloc() and no BufferAccessStrategy is in use. When a BufferAccessStrategy is in use, shared buffers added to the strategy ring are counted as IOOP_FREELIST_ACQUIRE or IOOP_EVICT IOOps in the IOCONTEXT_[BULKREAD|BULKWRITE|VACUUM) IOContext. When one of these buffers is reused, it is counted as an IOOP_REUSE IOOp in the corresponding strategy IOContext. IOOP_WRITE IOOps are counted in the BufferAccessStrategy IOContexts whenever the reused dirty buffer is written out. Stats on IOOps in all IOContexts for a given backend are counted in a backend's local memory. A subsequent commit will expose functions for aggregating and viewing these stats. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Justin Pryzby Reviewed-by: Kyotaro Horiguchi Reviewed-by: Maciek Sakrejda Reviewed-by: Lukas Fittl Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- src/backend/postmaster/checkpointer.c | 13 ++ src/backend/storage/buffer/bufmgr.c | 84 ++++++- src/backend/storage/buffer/freelist.c | 51 ++++- src/backend/storage/buffer/localbuf.c | 6 + src/backend/storage/sync/sync.c | 2 + src/backend/utils/activity/Makefile | 1 + src/backend/utils/activity/meson.build | 1 + src/backend/utils/activity/pgstat_io_ops.c | 255 +++++++++++++++++++++ src/include/pgstat.h | 68 ++++++ src/include/storage/buf_internals.h | 2 +- src/include/storage/bufmgr.h | 7 +- src/tools/pgindent/typedefs.list | 4 + 12 files changed, 481 insertions(+), 13 deletions(-) create mode 100644 src/backend/utils/activity/pgstat_io_ops.c diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 5fc076fc14..4ea4e6a298 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -1116,6 +1116,19 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) if (!AmBackgroundWriterProcess()) CheckpointerShmem->num_backend_fsync++; LWLockRelease(CheckpointerCommLock); + + /* + * We have no way of knowing if the current IOContext is + * IOCONTEXT_SHARED or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this + * point, so count the fsync as being in the IOCONTEXT_SHARED + * IOContext. This is probably okay, because the number of backend + * fsyncs doesn't say anything about the efficacy of the + * BufferAccessStrategy. And counting both fsyncs done in + * IOCONTEXT_SHARED and IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] under + * IOCONTEXT_SHARED is likely clearer when investigating the number of + * backend fsyncs. + */ + pgstat_count_io_op(IOOP_FSYNC, IOCONTEXT_SHARED); return false; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 4e7b0b31bb..9f25c5ce32 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -482,7 +482,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr); -static void FlushBuffer(BufferDesc *buf, SMgrRelation reln); +static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOContext io_context); static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, @@ -823,6 +823,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BufferDesc *bufHdr; Block bufBlock; bool found; + IOContext io_context; bool isExtend; bool isLocalBuf = SmgrIsTemp(smgr); @@ -833,6 +834,11 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, isExtend = (blockNum == P_NEW); + if (isLocalBuf) + io_context = IOCONTEXT_LOCAL; + else + io_context = IOContextForStrategy(strategy); + TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum, smgr->smgr_rlocator.locator.spcOid, smgr->smgr_rlocator.locator.dbOid, @@ -990,6 +996,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (isExtend) { + pgstat_count_io_op(IOOP_EXTEND, io_context); /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); /* don't set checksum for all-zero page */ @@ -1015,6 +1022,9 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, instr_time io_start, io_time; + pgstat_count_io_op(IOOP_READ, io_context); + + if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); @@ -1121,6 +1131,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BufferAccessStrategy strategy, bool *foundPtr) { + bool from_ring; BufferTag newTag; /* identity of requested block */ uint32 newHash; /* hash value for newTag */ LWLock *newPartitionLock; /* buffer partition lock for it */ @@ -1190,6 +1201,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Loop here in case we have to try another victim buffer */ for (;;) { + /* * Ensure, while the spinlock's not yet held, that there's a free * refcount entry. @@ -1200,7 +1212,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * Select a victim buffer. The buffer is returned with its header * spinlock still held! */ - buf = StrategyGetBuffer(strategy, &buf_state); + buf = StrategyGetBuffer(strategy, &buf_state, &from_ring); Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0); @@ -1237,6 +1249,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED)) { + IOContext io_context; + /* * If using a nondefault strategy, and writing the buffer * would require a WAL flush, let the strategy decide whether @@ -1263,13 +1277,36 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } } + /* + * When a strategy is in use, only flushes of dirty buffers + * already in the strategy ring are counted as strategy writes + * (IOCONTEXT [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the + * purpose of IO operation statistics tracking. + * + * If a shared buffer initially added to the ring must be + * flushed before being used, this is counted as an + * IOCONTEXT_SHARED IOOP_WRITE. + * + * If a shared buffer added to the ring later because the + * current strategy buffer is pinned or in use or because all + * strategy buffers were dirty and rejected (for BAS_BULKREAD + * operations only) requires flushing, this is counted as an + * IOCONTEXT_SHARED IOOP_WRITE (from_ring will be false). + * + * When a strategy is not in use, the write can only be a + * "regular" write of a dirty shared buffer (IOCONTEXT_SHARED + * IOOP_WRITE). + */ + + io_context = IOContextForStrategy(strategy); + /* OK, do the I/O */ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum, smgr->smgr_rlocator.locator.spcOid, smgr->smgr_rlocator.locator.dbOid, smgr->smgr_rlocator.locator.relNumber); - FlushBuffer(buf, NULL); + FlushBuffer(buf, NULL, io_context); LWLockRelease(BufferDescriptorGetContentLock(buf)); ScheduleBufferTagForWriteback(&BackendWritebackContext, @@ -1441,6 +1478,31 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, UnlockBufHdr(buf, buf_state); + if (oldFlags & BM_VALID) + { + /* + * When a BufferAccessStrategy is in use, evictions adding a + * shared buffer to the strategy ring are counted in the + * corresponding strategy's context. This includes the evictions + * done to add buffers to the ring initially as well as those + * done to add a new shared buffer to the ring when current + * buffer is pinned or otherwise in use. + * + * Blocks evicted from buffers already in the strategy ring are counted + * as IOCONTEXT_BULKREAD, IOCONTEXT_BULKWRITE, or IOCONTEXT_VACUUM + * reuses. + * + * We wait until this point to count reuses and evictions in order to + * avoid incorrectly counting a buffer as reused or evicted when it was + * released because it was concurrently pinned or in use or counting it + * as reused when it was rejected or when we errored out. + */ + if (from_ring) + pgstat_count_io_op(IOOP_REUSE, IOContextForStrategy(strategy)); + else + pgstat_count_io_op(IOOP_EVICT, IOCONTEXT_SHARED); + } + if (oldPartitionLock != NULL) { BufTableDelete(&oldTag, oldHash); @@ -2570,7 +2632,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); @@ -2820,7 +2882,7 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, * as the second parameter. If not, pass NULL. */ static void -FlushBuffer(BufferDesc *buf, SMgrRelation reln) +FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOContext io_context) { XLogRecPtr recptr; ErrorContextCallback errcallback; @@ -2900,6 +2962,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) */ bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum); + pgstat_count_io_op(IOOP_WRITE, io_context); + if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); @@ -3551,6 +3615,8 @@ FlushRelationBuffers(Relation rel) localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOCONTEXT_LOCAL); + buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); @@ -3586,7 +3652,7 @@ FlushRelationBuffers(Relation rel) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, RelationGetSmgr(rel)); + FlushBuffer(bufHdr, RelationGetSmgr(rel), IOCONTEXT_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3684,7 +3750,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, srelent->srel); + FlushBuffer(bufHdr, srelent->srel, IOCONTEXT_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3894,7 +3960,7 @@ FlushDatabaseBuffers(Oid dbid) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_SHARED); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3921,7 +3987,7 @@ FlushOneBuffer(Buffer buffer) Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_SHARED); } /* diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 64728bd7ce..6eb2e00ae2 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -15,6 +15,7 @@ */ #include "postgres.h" +#include "pgstat.h" #include "port/atomics.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" @@ -192,13 +193,15 @@ have_free_buffer(void) * return the buffer with the buffer header spinlock still held. */ BufferDesc * -StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) +StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring) { BufferDesc *buf; int bgwprocno; int trycounter; uint32 local_buf_state; /* to avoid repeated (de-)referencing */ + *from_ring = false; + /* * If given a strategy object, see whether it can select a buffer. We * assume strategy objects don't need buffer_strategy_lock. @@ -207,7 +210,10 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) { buf = GetBufferFromRing(strategy, buf_state); if (buf != NULL) + { + *from_ring = true; return buf; + } } /* @@ -299,6 +305,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) if (strategy != NULL) AddBufferToRing(strategy, buf); *buf_state = local_buf_state; + return buf; } UnlockBufHdr(buf, local_buf_state); @@ -331,6 +338,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) if (strategy != NULL) AddBufferToRing(strategy, buf); *buf_state = local_buf_state; + return buf; } } @@ -596,7 +604,7 @@ FreeAccessStrategy(BufferAccessStrategy strategy) /* * GetBufferFromRing -- returns a buffer from the ring, or NULL if the - * ring is empty. + * ring is empty / not usable. * * The bufhdr spin lock is held on the returned buffer. */ @@ -643,7 +651,13 @@ GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state) /* * Tell caller to allocate a new buffer with the normal allocation * strategy. He'll then replace this ring element via AddBufferToRing. + * + * This counts as a "repossession" for the purposes of IO operation + * statistic tracking, since the reason that we no longer consider the + * current buffer to be part of the ring is that the block in it is in use + * outside of the ring, preventing us from reusing the buffer. */ + pgstat_count_io_op(IOOP_REPOSSESS, IOContextForStrategy(strategy)); return NULL; } @@ -659,6 +673,37 @@ AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf) strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf); } +/* + * Utility function returning the IOContext of a given BufferAccessStrategy's + * strategy ring. + */ +IOContext +IOContextForStrategy(BufferAccessStrategy strategy) +{ + if (!strategy) + return IOCONTEXT_SHARED; + + switch (strategy->btype) + { + case BAS_NORMAL: + /* + * Currently, GetAccessStrategy() returns NULL for + * BufferAccessStrategyType BAS_NORMAL, so this case is + * unreachable. + */ + pg_unreachable(); + return IOCONTEXT_SHARED; + case BAS_BULKREAD: + return IOCONTEXT_BULKREAD; + case BAS_BULKWRITE: + return IOCONTEXT_BULKWRITE; + case BAS_VACUUM: + return IOCONTEXT_VACUUM; + } + + elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype); +} + /* * StrategyRejectBuffer -- consider rejecting a dirty buffer * @@ -688,5 +733,7 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_r */ strategy->buffers[strategy->current] = InvalidBuffer; + pgstat_count_io_op(IOOP_REJECT, IOContextForStrategy(strategy)); + return true; } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 30d67d1c40..cb9685564f 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -18,6 +18,7 @@ #include "access/parallel.h" #include "catalog/catalog.h" #include "executor/instrument.h" +#include "pgstat.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "utils/guc_hooks.h" @@ -196,6 +197,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, LocalRefCount[b]++; ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(bufHdr)); + break; } } @@ -226,6 +228,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOCONTEXT_LOCAL); + /* Mark not-dirty now in case we error out below */ buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); @@ -256,6 +260,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, ClearBufferTag(&bufHdr->tag); buf_state &= ~(BM_VALID | BM_TAG_VALID); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); + pgstat_count_io_op(IOOP_EVICT, IOCONTEXT_LOCAL); } hresult = (LocalBufferLookupEnt *) @@ -275,6 +280,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); *foundPtr = false; + return bufHdr; } diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c index 9d6a9e9109..5718b52fb5 100644 --- a/src/backend/storage/sync/sync.c +++ b/src/backend/storage/sync/sync.c @@ -432,6 +432,8 @@ ProcessSyncRequests(void) total_elapsed += elapsed; processed++; + pgstat_count_io_op(IOOP_FSYNC, IOCONTEXT_SHARED); + if (log_checkpoints) elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms", processed, diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index a2e8507fd6..0098785089 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -22,6 +22,7 @@ OBJS = \ pgstat_checkpointer.o \ pgstat_database.o \ pgstat_function.o \ + pgstat_io_ops.o \ pgstat_relation.o \ pgstat_replslot.o \ pgstat_shmem.o \ diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build index 5b3b558a67..1038324c32 100644 --- a/src/backend/utils/activity/meson.build +++ b/src/backend/utils/activity/meson.build @@ -7,6 +7,7 @@ backend_sources += files( 'pgstat_checkpointer.c', 'pgstat_database.c', 'pgstat_function.c', + 'pgstat_io_ops.c', 'pgstat_relation.c', 'pgstat_replslot.c', 'pgstat_shmem.c', diff --git a/src/backend/utils/activity/pgstat_io_ops.c b/src/backend/utils/activity/pgstat_io_ops.c new file mode 100644 index 0000000000..6f9c250907 --- /dev/null +++ b/src/backend/utils/activity/pgstat_io_ops.c @@ -0,0 +1,255 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io_ops.c + * Implementation of IO operation statistics. + * + * This file contains the implementation of IO operation statistics. It is kept + * separate from pgstat.c to enforce the line between the statistics access / + * storage implementation and the details about individual types of + * statistics. + * + * Copyright (c) 2021-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io_ops.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + +static PgStat_IOContextOps pending_IOOpStats; + +void +pgstat_count_io_op(IOOp io_op, IOContext io_context) +{ + PgStat_IOOpCounters *pending_counters; + + Assert(io_context < IOCONTEXT_NUM_TYPES); + Assert(io_op < IOOP_NUM_TYPES); + Assert(pgstat_expect_io_op(MyBackendType, io_context, io_op)); + + pending_counters = &pending_IOOpStats.data[io_context]; + + switch (io_op) + { + case IOOP_EVICT: + pending_counters->evictions++; + break; + case IOOP_EXTEND: + pending_counters->extends++; + break; + case IOOP_FSYNC: + pending_counters->fsyncs++; + break; + case IOOP_READ: + pending_counters->reads++; + break; + case IOOP_REPOSSESS: + pending_counters->repossessions++; + break; + case IOOP_REJECT: + pending_counters->rejections++; + break; + case IOOP_REUSE: + pending_counters->reuses++; + break; + case IOOP_WRITE: + pending_counters->writes++; + break; + } + +} + +const char * +pgstat_io_context_desc(IOContext io_context) +{ + switch (io_context) + { + case IOCONTEXT_BULKREAD: + return "bulkread"; + case IOCONTEXT_BULKWRITE: + return "bulkwrite"; + case IOCONTEXT_LOCAL: + return "local"; + case IOCONTEXT_SHARED: + return "shared"; + case IOCONTEXT_VACUUM: + return "vacuum"; + } + + elog(ERROR, "unrecognized IOContext value: %d", io_context); +} + +const char * +pgstat_io_op_desc(IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return "evicted"; + case IOOP_EXTEND: + return "extended"; + case IOOP_FSYNC: + return "files synced"; + case IOOP_READ: + return "read"; + case IOOP_REPOSSESS: + return "repossessed"; + case IOOP_REJECT: + return "rejected"; + case IOOP_REUSE: + return "reused"; + case IOOP_WRITE: + return "written"; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); +} + +/* +* IO Operation statistics are not collected for all BackendTypes. +* +* The following BackendTypes do not participate in the cumulative stats +* subsystem or do not do IO operations worth reporting statistics on: +* - Syslogger because it is not connected to shared memory +* - Archiver because most relevant archiving IO is delegated to a +* specialized command or module +* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now +* +* Function returns true if BackendType participates in the cumulative stats +* subsystem for IO Operations and false if it does not. +*/ +bool +pgstat_io_op_stats_collected(BackendType bktype) +{ + return bktype != B_INVALID && bktype != B_ARCHIVER && bktype != B_LOGGER && + bktype != B_WAL_RECEIVER && bktype != B_WAL_WRITER; +} + +/* + * Some BackendTypes do not perform IO operations in certain IOContexts. Check + * that the given BackendType is expected to do IO in the given IOContext. + */ +bool +pgstat_bktype_io_context_valid(BackendType bktype, IOContext io_context) +{ + bool no_local; + + /* + * In core Postgres, only regular backends and WAL Sender processes + * executing queries should use local buffers. Parallel workers will not + * use local buffers (see InitLocalBuffers()); however, extensions + * leveraging background workers have no such limitation, so track IO + * Operations in IOCONTEXT_LOCAL for BackendType B_BG_WORKER. + */ + no_local = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || bktype + == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || bktype == + B_STANDALONE_BACKEND || bktype == B_STARTUP; + + if (io_context == IOCONTEXT_LOCAL && no_local) + return false; + + /* + * Some BackendTypes do not currently perform any IO operations in certain + * IOContexts, and, while it may not be inherently incorrect for them to + * do so, excluding those rows from the view makes the view easier to use. + */ + if ((io_context == IOCONTEXT_BULKREAD || io_context == IOCONTEXT_BULKWRITE + || io_context == IOCONTEXT_VACUUM) && (bktype == B_CHECKPOINTER + || bktype == B_BG_WRITER)) + return false; + + if (io_context == IOCONTEXT_VACUUM && bktype == B_AUTOVAC_LAUNCHER) + return false; + + if (io_context == IOCONTEXT_BULKWRITE && (bktype == B_AUTOVAC_WORKER || + bktype == B_AUTOVAC_LAUNCHER)) + return false; + + return true; +} + +/* + * Some BackendTypes will never do certain IOOps and some IOOps should not + * occur in certain IOContexts. Check that the given IOOp is valid for the + * given BackendType in the given IOContext. Note that there are currently no + * cases of an IOOp being invalid for a particular BackendType only within a + * certain IOContext. + */ +bool +pgstat_io_op_valid(BackendType bktype, IOContext io_context, IOOp io_op) +{ + bool strategy_io_context; + + /* + * Some BackendTypes should never track IO Operation statistics. + */ + Assert(pgstat_io_op_stats_collected(bktype)); + + /* + * Some BackendTypes will not do certain IOOps. + */ + if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && + (io_op == IOOP_READ || io_op == IOOP_EVICT)) + return false; + + if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || bktype == + B_CHECKPOINTER) && io_op == IOOP_EXTEND) + return false; + + /* + * Some IOOps are not valid in certain IOContexts and some IOOps are only + * valid in certain contexts. + */ + if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND) + return false; + + /* + * Only BAS_BULKREAD will reject strategy buffers + */ + if (io_context != IOCONTEXT_BULKREAD && io_op == IOOP_REJECT) + return false; + + + strategy_io_context = io_context == IOCONTEXT_BULKREAD || io_context == + IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; + + /* + * IOOP_REPOSSESS and IOOP_REUSE are only relevant when a + * BufferAccessStrategy is in use. + */ + if (!strategy_io_context && (io_op == IOOP_REJECT || io_op == + IOOP_REPOSSESS || io_op == IOOP_REUSE)) + return false; + + /* + * Temporary tables using local buffers are not logged and thus do not + * require fsync'ing. + * + * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are + * counted in the IOCONTEXT_SHARED IOContext. See comment in + * ForwardSyncRequest() for more details. + */ + if ((io_context == IOCONTEXT_LOCAL || strategy_io_context) && + io_op == IOOP_FSYNC) + return false; + + return true; +} + +bool +pgstat_expect_io_op(BackendType bktype, IOContext io_context, IOOp io_op) +{ + if (!pgstat_io_op_stats_collected(bktype)) + return false; + + if (!pgstat_bktype_io_context_valid(bktype, io_context)) + return false; + + if (!(pgstat_io_op_valid(bktype, io_context, io_op))) + return false; + + return true; +} diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 9e2ce6f011..5883aafe9c 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -14,6 +14,7 @@ #include "datatype/timestamp.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ +#include "storage/buf.h" #include "utils/backend_progress.h" /* for backward compatibility */ #include "utils/backend_status.h" /* for backward compatibility */ #include "utils/relcache.h" @@ -276,6 +277,55 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; +/* + * Types related to counting IO Operations for various IO Contexts + * When adding a new value, ensure that the proper assertions are added to + * pgstat_io_context_ops_assert_zero() and pgstat_io_op_assert_zero() (though + * the compiler will remind you about the latter) + */ + +typedef enum IOOp +{ + IOOP_EVICT = 0, + IOOP_EXTEND, + IOOP_FSYNC, + IOOP_READ, + IOOP_REJECT, + IOOP_REPOSSESS, + IOOP_REUSE, + IOOP_WRITE, +} IOOp; + +#define IOOP_NUM_TYPES (IOOP_WRITE + 1) + +typedef enum IOContext +{ + IOCONTEXT_BULKREAD = 0, + IOCONTEXT_BULKWRITE, + IOCONTEXT_LOCAL, + IOCONTEXT_SHARED, + IOCONTEXT_VACUUM, +} IOContext; + +#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1) + +typedef struct PgStat_IOOpCounters +{ + PgStat_Counter evictions; + PgStat_Counter extends; + PgStat_Counter fsyncs; + PgStat_Counter reads; + PgStat_Counter rejections; + PgStat_Counter reuses; + PgStat_Counter repossessions; + PgStat_Counter writes; +} PgStat_IOOpCounters; + +typedef struct PgStat_IOContextOps +{ + PgStat_IOOpCounters data[IOCONTEXT_NUM_TYPES]; +} PgStat_IOContextOps; + typedef struct PgStat_StatDBEntry { PgStat_Counter n_xact_commit; @@ -453,6 +503,24 @@ extern void pgstat_report_checkpointer(void); extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); +/* + * Functions in pgstat_io_ops.c + */ + +extern void pgstat_count_io_op(IOOp io_op, IOContext io_context); +extern const char *pgstat_io_context_desc(IOContext io_context); +extern const char *pgstat_io_op_desc(IOOp io_op); + +/* Validation functions in pgstat_io_ops.c */ +extern bool pgstat_io_op_stats_collected(BackendType bktype); +extern bool pgstat_bktype_io_context_valid(BackendType bktype, IOContext io_context); +extern bool pgstat_io_op_valid(BackendType bktype, IOContext io_context, IOOp io_op); +extern bool pgstat_expect_io_op(BackendType bktype, IOContext io_context, IOOp io_op); + +/* IO stats translation function in freelist.c */ +extern IOContext IOContextForStrategy(BufferAccessStrategy bas); + + /* * Functions in pgstat_database.c */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index b75481450d..7b67250747 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -392,7 +392,7 @@ extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag * /* freelist.c */ extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, - uint32 *buf_state); + uint32 *buf_state, bool *from_ring); extern void StrategyFreeBuffer(BufferDesc *buf); extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 6f4dfa0960..d0eed71f63 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -23,7 +23,12 @@ typedef void *Block; -/* Possible arguments for GetAccessStrategy() */ +/* + * Possible arguments for GetAccessStrategy(). + * + * If adding a new BufferAccessStrategyType, also add a new IOContext so + * statistics on IO operations using this strategy are tracked. + */ typedef enum BufferAccessStrategyType { BAS_NORMAL, /* Normal random access */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 2f02cc8f42..b080367073 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1106,7 +1106,9 @@ ID INFIX INT128 INTERFACE_INFO +IOContext IOFuncSelector +IOOp IPCompareMethod ITEM IV @@ -2026,6 +2028,8 @@ PgStat_FetchConsistency PgStat_FunctionCallUsage PgStat_FunctionCounts PgStat_HashKey +PgStat_IOContextOps +PgStat_IOOpCounters PgStat_Kind PgStat_KindInfo PgStat_LocalState -- 2.34.1