From cb632a7feca9162b486d8a7a90581fd45db8865c Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Thu, 3 Nov 2022 12:10:07 -0400 Subject: [PATCH v37 2/5] Track IO operation statistics locally Introduce "IOOp", an IO operation done by a backend, "IOObject", the target object of the IO, and "IOContext", the context or location of the IO operations on that object. For example, the checkpointer may write a shared buffer out. This would be counted as an IOOp "written" on an IOObject IOOBJECT_RELATION in IOContext IOCONTEXT_BUFFER_POOL by BackendType "checkpointer". Each IOOp (evict, reuse, read, write, extend, and fsync) is counted per IOObject (relation, temp relation) per IOContext (bulkread, bulkwrite, buffer pool, or vacuum) through a call to pgstat_count_io_op(). The primary concern of these statistics is IO operations on data blocks during the course of normal database operations. IO operations done by, for example, the archiver or syslogger are not counted in these statistics. WAL IO, temporary file IO, and IO done directly though smgr* functions (such as when building an index) are not yet counted but would be useful future additions. IOContext IOCONTEXT_BUFFER_POOL concerns operations on local and shared buffers. The IOCONTEXT_BULKREAD, IOCONTEXT_BULKWRITE, and IOCONTEXT_VACUUM IOContexts concern IO operations on buffers as part of a BufferAccessStrategy. IOOP_EVICT IOOps are counted in IOCONTEXT_BUFFER_POOL when a buffer is acquired or allocated through [Local]BufferAlloc() and no BufferAccessStrategy is in use. When a BufferAccessStrategy is in use, shared buffers added to the strategy ring are counted as IOOP_EVICT IOOps in the IOCONTEXT_[BULKREAD|BULKWRITE|VACUUM) IOContext. When one of these buffers is reused, it is counted as an IOOP_REUSE IOOp in the corresponding strategy IOContext. IOOP_WRITE IOOps are counted in the BufferAccessStrategy IOContexts whenever the reused dirty buffer is written out. Stats on IOOps in all IOContexts for a given backend are counted in a backend's local memory. A subsequent commit will expose functions for aggregating and viewing these stats. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Justin Pryzby Reviewed-by: Kyotaro Horiguchi Reviewed-by: Maciek Sakrejda Reviewed-by: Lukas Fittl Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- src/backend/postmaster/checkpointer.c | 13 + src/backend/storage/buffer/bufmgr.c | 95 +++++++- src/backend/storage/buffer/freelist.c | 43 +++- src/backend/storage/buffer/localbuf.c | 6 + src/backend/storage/sync/sync.c | 2 + src/backend/utils/activity/Makefile | 1 + src/backend/utils/activity/meson.build | 1 + src/backend/utils/activity/pgstat_io_ops.c | 265 +++++++++++++++++++++ src/include/pgstat.h | 80 +++++++ src/include/storage/buf_internals.h | 2 +- src/include/storage/bufmgr.h | 7 +- src/tools/pgindent/typedefs.list | 6 + 12 files changed, 508 insertions(+), 13 deletions(-) create mode 100644 src/backend/utils/activity/pgstat_io_ops.c diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 5fc076fc14..04a8f89637 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -1116,6 +1116,19 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) if (!AmBackgroundWriterProcess()) CheckpointerShmem->num_backend_fsync++; LWLockRelease(CheckpointerCommLock); + + /* + * We have no way of knowing if the current IOContext is + * IOCONTEXT_BUFFER_POOL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this + * point, so count the fsync as being in the IOCONTEXT_BUFFER_POOL + * IOContext. This is probably okay, because the number of backend + * fsyncs doesn't say anything about the efficacy of the + * BufferAccessStrategy. And counting both fsyncs done in + * IOCONTEXT_BUFFER_POOL and IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] under + * IOCONTEXT_BUFFER_POOL is likely clearer when investigating the number of + * backend fsyncs. + */ + pgstat_count_io_op(IOOP_FSYNC, IOOBJECT_RELATION, IOCONTEXT_BUFFER_POOL); return false; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 82cdec0eb1..a494d7148e 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -482,7 +482,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr); -static void FlushBuffer(BufferDesc *buf, SMgrRelation reln); +static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, + IOContext io_context, IOObject io_object); static void FindAndDropRelationBuffers(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber nForkBlock, @@ -823,6 +824,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BufferDesc *bufHdr; Block bufBlock; bool found; + IOContext io_context; + IOObject io_object; bool isExtend; bool isLocalBuf = SmgrIsTemp(smgr); @@ -833,6 +836,22 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, isExtend = (blockNum == P_NEW); + if (isLocalBuf) + { + /* + * Though a strategy object may be passed in, no strategy is employed + * when using local buffers. This could happen when doing, for example, + * CREATE TEMPORRARY TABLE AS ... + */ + io_context = IOCONTEXT_BUFFER_POOL; + io_object = IOOBJECT_TEMP_RELATION; + } + else + { + io_context = IOContextForStrategy(strategy); + io_object = IOOBJECT_RELATION; + } + TRACE_POSTGRESQL_BUFFER_READ_START(forkNum, blockNum, smgr->smgr_rlocator.locator.spcOid, smgr->smgr_rlocator.locator.dbOid, @@ -990,6 +1009,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (isExtend) { + pgstat_count_io_op(IOOP_EXTEND, io_object, io_context); /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); /* don't set checksum for all-zero page */ @@ -1015,6 +1035,9 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, instr_time io_start, io_time; + pgstat_count_io_op(IOOP_READ, io_object, io_context); + + if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); @@ -1121,6 +1144,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BufferAccessStrategy strategy, bool *foundPtr) { + bool from_ring; + IOContext io_context; BufferTag newTag; /* identity of requested block */ uint32 newHash; /* hash value for newTag */ LWLock *newPartitionLock; /* buffer partition lock for it */ @@ -1187,9 +1212,12 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ LWLockRelease(newPartitionLock); + io_context = IOContextForStrategy(strategy); + /* Loop here in case we have to try another victim buffer */ for (;;) { + /* * Ensure, while the spinlock's not yet held, that there's a free * refcount entry. @@ -1200,7 +1228,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * Select a victim buffer. The buffer is returned with its header * spinlock still held! */ - buf = StrategyGetBuffer(strategy, &buf_state); + buf = StrategyGetBuffer(strategy, &buf_state, &from_ring); Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0); @@ -1263,13 +1291,34 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } } + /* + * When a strategy is in use, only flushes of dirty buffers + * already in the strategy ring are counted as strategy writes + * (IOCONTEXT [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the + * purpose of IO operation statistics tracking. + * + * If a shared buffer initially added to the ring must be + * flushed before being used, this is counted as an + * IOCONTEXT_BUFFER_POOL IOOP_WRITE. + * + * If a shared buffer added to the ring later because the + * current strategy buffer is pinned or in use or because all + * strategy buffers were dirty and rejected (for BAS_BULKREAD + * operations only) requires flushing, this is counted as an + * IOCONTEXT_BUFFER_POOL IOOP_WRITE (from_ring will be false). + * + * When a strategy is not in use, the write can only be a + * "regular" write of a dirty shared buffer (IOCONTEXT_BUFFER_POOL + * IOOP_WRITE). + */ + /* OK, do the I/O */ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum, smgr->smgr_rlocator.locator.spcOid, smgr->smgr_rlocator.locator.dbOid, smgr->smgr_rlocator.locator.relNumber); - FlushBuffer(buf, NULL); + FlushBuffer(buf, NULL, io_context, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(buf)); ScheduleBufferTagForWriteback(&BackendWritebackContext, @@ -1441,6 +1490,30 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, UnlockBufHdr(buf, buf_state); + if (oldFlags & BM_VALID) + { + /* + * When a BufferAccessStrategy is in use, evictions adding a + * shared buffer to the strategy ring are counted in the + * corresponding strategy's context. This includes the evictions + * done to add buffers to the ring initially as well as those + * done to add a new shared buffer to the ring when current + * buffer is pinned or otherwise in use. + * + * Blocks evicted from buffers already in the strategy ring are counted + * as IOCONTEXT_BULKREAD, IOCONTEXT_BULKWRITE, or IOCONTEXT_VACUUM + * reuses. + * + * We wait until this point to count reuses and evictions in order to + * avoid incorrectly counting a buffer as reused or evicted when it was + * released because it was concurrently pinned or in use or counting it + * as reused when it was rejected or when we errored out. + */ + IOOp io_op = from_ring ? IOOP_REUSE : IOOP_EVICT; + + pgstat_count_io_op(io_op, IOOBJECT_RELATION, io_context); + } + if (oldPartitionLock != NULL) { BufTableDelete(&oldTag, oldHash); @@ -2570,7 +2643,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_BUFFER_POOL, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); @@ -2820,7 +2893,7 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, * as the second parameter. If not, pass NULL. */ static void -FlushBuffer(BufferDesc *buf, SMgrRelation reln) +FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOContext io_context, IOObject io_object) { XLogRecPtr recptr; ErrorContextCallback errcallback; @@ -2900,6 +2973,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) */ bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum); + pgstat_count_io_op(IOOP_WRITE, IOOBJECT_RELATION, io_context); + if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); @@ -3551,6 +3626,8 @@ FlushRelationBuffers(Relation rel) localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOOBJECT_TEMP_RELATION, IOCONTEXT_BUFFER_POOL); + buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); @@ -3586,7 +3663,7 @@ FlushRelationBuffers(Relation rel) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, RelationGetSmgr(rel)); + FlushBuffer(bufHdr, RelationGetSmgr(rel), IOCONTEXT_BUFFER_POOL, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3684,7 +3761,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, srelent->srel); + FlushBuffer(bufHdr, srelent->srel, IOCONTEXT_BUFFER_POOL, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3894,7 +3971,7 @@ FlushDatabaseBuffers(Oid dbid) { PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_BUFFER_POOL, IOOBJECT_RELATION); LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr); } @@ -3921,7 +3998,7 @@ FlushOneBuffer(Buffer buffer) Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, IOCONTEXT_BUFFER_POOL, IOOBJECT_RELATION); } /* diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 64728bd7ce..937c674a7a 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -15,6 +15,7 @@ */ #include "postgres.h" +#include "pgstat.h" #include "port/atomics.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" @@ -192,13 +193,15 @@ have_free_buffer(void) * return the buffer with the buffer header spinlock still held. */ BufferDesc * -StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) +StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring) { BufferDesc *buf; int bgwprocno; int trycounter; uint32 local_buf_state; /* to avoid repeated (de-)referencing */ + *from_ring = false; + /* * If given a strategy object, see whether it can select a buffer. We * assume strategy objects don't need buffer_strategy_lock. @@ -207,7 +210,10 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) { buf = GetBufferFromRing(strategy, buf_state); if (buf != NULL) + { + *from_ring = true; return buf; + } } /* @@ -299,6 +305,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) if (strategy != NULL) AddBufferToRing(strategy, buf); *buf_state = local_buf_state; + return buf; } UnlockBufHdr(buf, local_buf_state); @@ -331,6 +338,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) if (strategy != NULL) AddBufferToRing(strategy, buf); *buf_state = local_buf_state; + return buf; } } @@ -596,7 +604,7 @@ FreeAccessStrategy(BufferAccessStrategy strategy) /* * GetBufferFromRing -- returns a buffer from the ring, or NULL if the - * ring is empty. + * ring is empty / not usable. * * The bufhdr spin lock is held on the returned buffer. */ @@ -659,6 +667,37 @@ AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf) strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf); } +/* + * Utility function returning the IOContext of a given BufferAccessStrategy's + * strategy ring. + */ +IOContext +IOContextForStrategy(BufferAccessStrategy strategy) +{ + if (!strategy) + return IOCONTEXT_BUFFER_POOL; + + switch (strategy->btype) + { + case BAS_NORMAL: + /* + * Currently, GetAccessStrategy() returns NULL for + * BufferAccessStrategyType BAS_NORMAL, so this case is + * unreachable. + */ + pg_unreachable(); + return IOCONTEXT_BUFFER_POOL; + case BAS_BULKREAD: + return IOCONTEXT_BULKREAD; + case BAS_BULKWRITE: + return IOCONTEXT_BULKWRITE; + case BAS_VACUUM: + return IOCONTEXT_VACUUM; + } + + elog(ERROR, "unrecognized BufferAccessStrategyType: %d", strategy->btype); +} + /* * StrategyRejectBuffer -- consider rejecting a dirty buffer * diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 30d67d1c40..6361041f7a 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -18,6 +18,7 @@ #include "access/parallel.h" #include "catalog/catalog.h" #include "executor/instrument.h" +#include "pgstat.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "utils/guc_hooks.h" @@ -196,6 +197,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, LocalRefCount[b]++; ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(bufHdr)); + break; } } @@ -226,6 +228,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, localpage, false); + pgstat_count_io_op(IOOP_WRITE, IOOBJECT_TEMP_RELATION, IOCONTEXT_BUFFER_POOL); + /* Mark not-dirty now in case we error out below */ buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); @@ -256,6 +260,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, ClearBufferTag(&bufHdr->tag); buf_state &= ~(BM_VALID | BM_TAG_VALID); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); + pgstat_count_io_op(IOOP_EVICT, IOOBJECT_TEMP_RELATION, IOCONTEXT_BUFFER_POOL); } hresult = (LocalBufferLookupEnt *) @@ -275,6 +280,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); *foundPtr = false; + return bufHdr; } diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c index 9d6a9e9109..a1bb1cef54 100644 --- a/src/backend/storage/sync/sync.c +++ b/src/backend/storage/sync/sync.c @@ -432,6 +432,8 @@ ProcessSyncRequests(void) total_elapsed += elapsed; processed++; + pgstat_count_io_op(IOOP_FSYNC, IOOBJECT_RELATION, IOCONTEXT_BUFFER_POOL); + if (log_checkpoints) elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms", processed, diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index a2e8507fd6..0098785089 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -22,6 +22,7 @@ OBJS = \ pgstat_checkpointer.o \ pgstat_database.o \ pgstat_function.o \ + pgstat_io_ops.o \ pgstat_relation.o \ pgstat_replslot.o \ pgstat_shmem.o \ diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build index 5b3b558a67..1038324c32 100644 --- a/src/backend/utils/activity/meson.build +++ b/src/backend/utils/activity/meson.build @@ -7,6 +7,7 @@ backend_sources += files( 'pgstat_checkpointer.c', 'pgstat_database.c', 'pgstat_function.c', + 'pgstat_io_ops.c', 'pgstat_relation.c', 'pgstat_replslot.c', 'pgstat_shmem.c', diff --git a/src/backend/utils/activity/pgstat_io_ops.c b/src/backend/utils/activity/pgstat_io_ops.c new file mode 100644 index 0000000000..9e192f404a --- /dev/null +++ b/src/backend/utils/activity/pgstat_io_ops.c @@ -0,0 +1,265 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io_ops.c + * Implementation of IO operation statistics. + * + * This file contains the implementation of IO operation statistics. It is kept + * separate from pgstat.c to enforce the line between the statistics access / + * storage implementation and the details about individual types of + * statistics. + * + * Copyright (c) 2021-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io_ops.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + +static PgStat_IOContextOps pending_IOOpStats; + +void +pgstat_count_io_op(IOOp io_op, IOObject io_object, IOContext io_context) +{ + PgStat_IOOpCounters *pending_counters; + + Assert(io_context < IOCONTEXT_NUM_TYPES); + Assert(io_op < IOOP_NUM_TYPES); + Assert(pgstat_expect_io_op(MyBackendType, io_context, io_object, io_op)); + + pending_counters = &pending_IOOpStats.data[io_context].data[io_object]; + + switch (io_op) + { + case IOOP_EVICT: + pending_counters->evictions++; + break; + case IOOP_EXTEND: + pending_counters->extends++; + break; + case IOOP_FSYNC: + pending_counters->fsyncs++; + break; + case IOOP_READ: + pending_counters->reads++; + break; + case IOOP_REUSE: + pending_counters->reuses++; + break; + case IOOP_WRITE: + pending_counters->writes++; + break; + } + +} + +const char * +pgstat_io_context_desc(IOContext io_context) +{ + switch (io_context) + { + case IOCONTEXT_BULKREAD: + return "bulkread"; + case IOCONTEXT_BULKWRITE: + return "bulkwrite"; + case IOCONTEXT_BUFFER_POOL: + return "buffer pool"; + case IOCONTEXT_VACUUM: + return "vacuum"; + } + + elog(ERROR, "unrecognized IOContext value: %d", io_context); +} + +const char * +pgstat_io_object_desc(IOObject io_object) +{ + switch(io_object) + { + case IOOBJECT_RELATION: + return "relation"; + case IOOBJECT_TEMP_RELATION: + return "temp relation"; + } + + elog(ERROR, "unrecognized IOObject value: %d", io_object); +} + +const char * +pgstat_io_op_desc(IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return "evicted"; + case IOOP_EXTEND: + return "extended"; + case IOOP_FSYNC: + return "files synced"; + case IOOP_READ: + return "read"; + case IOOP_REUSE: + return "reused"; + case IOOP_WRITE: + return "written"; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); +} + +/* +* IO Operation statistics are not collected for all BackendTypes. +* +* The following BackendTypes do not participate in the cumulative stats +* subsystem or do not do IO operations worth reporting statistics on: +* - Syslogger because it is not connected to shared memory +* - Archiver because most relevant archiving IO is delegated to a +* specialized command or module +* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now +* +* Function returns true if BackendType participates in the cumulative stats +* subsystem for IO Operations and false if it does not. +*/ +bool +pgstat_io_op_stats_collected(BackendType bktype) +{ + return bktype != B_INVALID && bktype != B_ARCHIVER && bktype != B_LOGGER && + bktype != B_WAL_RECEIVER && bktype != B_WAL_WRITER; +} + + +/* + * Some BackendTypes do not perform IO operations in certain IOContexts. Some + * IOObjects are never operated on in some IOContexts. Check that the given + * BackendType is expected to do IO in the given IOContext and that the given + * IOObject is expected to be operated on in the given IOContext.. + */ +bool +pgstat_bktype_io_context_io_object_valid(BackendType bktype, + IOContext io_context, IOObject io_object) +{ + bool no_temp_rel; + + /* + * Currently, IO operations on temporary relations can only occur in the + * IOCONTEXT_BUFFER_POOL IOContext. + */ + if (io_context != IOCONTEXT_BUFFER_POOL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * In core Postgres, only regular backends and WAL Sender processes + * executing queries will use local buffers and operate on temporary + * relations. Parallel workers will not use local buffers (see + * InitLocalBuffers()); however, extensions leveraging background workers + * have no such limitation, so track IO Operations on + * IOOBJECT_TEMP_RELATION for BackendType B_BG_WORKER. + */ + no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || bktype + == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || bktype == + B_STANDALONE_BACKEND || bktype == B_STARTUP; + + if (no_temp_rel && io_context == IOCONTEXT_BUFFER_POOL && io_object == + IOOBJECT_TEMP_RELATION) + return false; + + /* + * Some BackendTypes do not currently perform any IO operations in certain + * IOContexts, and, while it may not be inherently incorrect for them to + * do so, excluding those rows from the view makes the view easier to use. + */ + if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) && + (io_context == IOCONTEXT_BULKREAD || io_context == + IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM)) + return false; + + if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM) + return false; + + if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) && + io_context == IOCONTEXT_BULKWRITE) + return false; + + return true; +} + +/* + * Some BackendTypes will never do certain IOOps and some IOOps should not + * occur in certain IOContexts. Check that the given IOOp is valid for the + * given BackendType in the given IOContext. Note that there are currently no + * cases of an IOOp being invalid for a particular BackendType only within a + * certain IOContext. + */ +bool +pgstat_io_op_valid(BackendType bktype, IOContext io_context, IOObject io_object, IOOp io_op) +{ + bool strategy_io_context; + + /* + * Some BackendTypes should never track IO Operation statistics. + */ + Assert(pgstat_io_op_stats_collected(bktype)); + + /* + * Some BackendTypes will not do certain IOOps. + */ + if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && + (io_op == IOOP_READ || io_op == IOOP_EVICT)) + return false; + + if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || bktype == + B_CHECKPOINTER) && io_op == IOOP_EXTEND) + return false; + + /* + * Some IOOps are not valid in certain IOContexts and some IOOps are only + * valid in certain contexts. + */ + if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND) + return false; + + strategy_io_context = io_context == IOCONTEXT_BULKREAD || io_context == + IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; + + /* + * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use. + */ + if (!strategy_io_context && io_op == IOOP_REUSE) + return false; + + /* + * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are + * counted in the IOCONTEXT_BUFFER_POOL IOContext. See comment in + * ForwardSyncRequest() for more details. + */ + if (strategy_io_context && io_op == IOOP_FSYNC) + return false; + + /* + * Temporary tables are not logged and thus do not require fsync'ing. + */ + if (io_context == IOCONTEXT_BUFFER_POOL && io_object == + IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC) + return false; + + return true; +} + +bool +pgstat_expect_io_op(BackendType bktype, IOContext io_context, IOObject io_object, IOOp io_op) +{ + if (!pgstat_io_op_stats_collected(bktype)) + return false; + + if (!pgstat_bktype_io_context_io_object_valid(bktype, io_context, io_object)) + return false; + + if (!(pgstat_io_op_valid(bktype, io_context, io_object, io_op))) + return false; + + return true; +} diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 9e2ce6f011..e2beafb9b2 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -14,6 +14,7 @@ #include "datatype/timestamp.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ +#include "storage/buf.h" #include "utils/backend_progress.h" /* for backward compatibility */ #include "utils/backend_status.h" /* for backward compatibility */ #include "utils/relcache.h" @@ -276,6 +277,63 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; +/* + * Types related to counting IO Operations for various IO Contexts + * When adding a new value, ensure that the proper assertions are added to + * pgstat_io_context_ops_assert_zero() and pgstat_io_op_assert_zero() (though + * the compiler will remind you about the latter) + */ + +typedef enum IOOp +{ + IOOP_EVICT, + IOOP_EXTEND, + IOOP_FSYNC, + IOOP_READ, + IOOP_REUSE, + IOOP_WRITE, +} IOOp; + +#define IOOP_NUM_TYPES (IOOP_WRITE + 1) + +typedef enum IOObject +{ + IOOBJECT_RELATION, + IOOBJECT_TEMP_RELATION, +} IOObject; + +#define IOOBJECT_NUM_TYPES (IOOBJECT_TEMP_RELATION + 1) + +typedef enum IOContext +{ + IOCONTEXT_BULKREAD, + IOCONTEXT_BULKWRITE, + IOCONTEXT_BUFFER_POOL, + IOCONTEXT_VACUUM, +} IOContext; + +#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1) + +typedef struct PgStat_IOOpCounters +{ + PgStat_Counter evictions; + PgStat_Counter extends; + PgStat_Counter fsyncs; + PgStat_Counter reads; + PgStat_Counter reuses; + PgStat_Counter writes; +} PgStat_IOOpCounters; + +typedef struct PgStat_IOObjectOps +{ + PgStat_IOOpCounters data[IOOBJECT_NUM_TYPES]; +} PgStat_IOObjectOps; + +typedef struct PgStat_IOContextOps +{ + PgStat_IOObjectOps data[IOCONTEXT_NUM_TYPES]; +} PgStat_IOContextOps; + typedef struct PgStat_StatDBEntry { PgStat_Counter n_xact_commit; @@ -453,6 +511,28 @@ extern void pgstat_report_checkpointer(void); extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); +/* + * Functions in pgstat_io_ops.c + */ + +extern void pgstat_count_io_op(IOOp io_op, IOObject io_object, IOContext io_context); +extern const char *pgstat_io_context_desc(IOContext io_context); +extern const char * pgstat_io_object_desc(IOObject io_object); +extern const char *pgstat_io_op_desc(IOOp io_op); + +/* Validation functions in pgstat_io_ops.c */ +extern bool pgstat_io_op_stats_collected(BackendType bktype); +extern bool pgstat_bktype_io_context_io_object_valid(BackendType bktype, + IOContext io_context, IOObject io_object); +extern bool pgstat_io_op_valid(BackendType bktype, IOContext io_context, + IOObject io_object, IOOp io_op); +extern bool pgstat_expect_io_op(BackendType bktype, + IOContext io_context, IOObject io_object, IOOp io_op); + +/* IO stats translation function in freelist.c */ +extern IOContext IOContextForStrategy(BufferAccessStrategy bas); + + /* * Functions in pgstat_database.c */ diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index b75481450d..7b67250747 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -392,7 +392,7 @@ extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag * /* freelist.c */ extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, - uint32 *buf_state); + uint32 *buf_state, bool *from_ring); extern void StrategyFreeBuffer(BufferDesc *buf); extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring); diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index e1bd22441b..206f4c0b3e 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -23,7 +23,12 @@ typedef void *Block; -/* Possible arguments for GetAccessStrategy() */ +/* + * Possible arguments for GetAccessStrategy(). + * + * If adding a new BufferAccessStrategyType, also add a new IOContext so + * statistics on IO operations using this strategy are tracked. + */ typedef enum BufferAccessStrategyType { BAS_NORMAL, /* Normal random access */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9683b0a88e..6088c44842 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1106,7 +1106,10 @@ ID INFIX INT128 INTERFACE_INFO +IOContext IOFuncSelector +IOObject +IOOp IPCompareMethod ITEM IV @@ -2026,6 +2029,9 @@ PgStat_FetchConsistency PgStat_FunctionCallUsage PgStat_FunctionCounts PgStat_HashKey +PgStat_IOContextOps +PgStat_IOObjectOps +PgStat_IOOpCounters PgStat_Kind PgStat_KindInfo PgStat_LocalState -- 2.38.1