From 9fd9fafe1245bad0802772810f3450e651f7de63 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 5 Dec 2022 19:25:44 -0500 Subject: [PATCH v42 2/4] pgstat: Infrastructure to track IO operations Introduce "IOOp", an IO operation done by a backend, "IOObject", the target object of the IO, and "IOContext", the context or location of the IO operations on that object. For example, the checkpointer may write a shared buffer out. This would be considered an IOOp "written" on an IOObject IOOBJECT_RELATION in IOContext IOCONTEXT_NORMAL by BackendType "checkpointer". Each IOOp (evict, extend, fsync, read, reuse, and write) can be counted per IOObject (relation, temp relation) per IOContext (normal, bulkread, bulkwrite, or vacuum) through a call to pgstat_count_io_op(). Note that this commit introduces the infrastructure to count IO Operation statistics. A subsequent commit will add calls to pgstat_count_io_op() in the appropriate locations. IOContext IOCONTEXT_NORMAL concerns operations on local and shared buffers, while IOCONTEXT_BULKREAD, IOCONTEXT_BULKWRITE, and IOCONTEXT_VACUUM IOContexts concern IO operations on buffers as part of a BufferAccessStrategy. IOObject IOOBJECT_TEMP_RELATION concerns IO Operations on buffers containing temporary table data, while IOObject IOOBJECT_RELATION concerns IO Operations on buffers containing permanent relation data. Stats on IOOps on all IOObjects in all IOContexts for a given backend are first counted in a backend's local memory and then flushed to shared memory and accumulated with those from all other backends, exited and live. Some BackendTypes will not flush their pending statistics at regular intervals and explicitly call pgstat_flush_io_ops() during the course of normal operations to flush their backend-local IO operation statistics to shared memory in a timely manner. Because not all BackendType, IOOp, IOObject, IOContext combinations are valid, the validity of the stats is checked before flushing pending stats and before reading in the existing stats file to shared memory. The aggregated stats in shared memory could be extended in the future with per-backend stats -- useful for per connection IO statistics and monitoring. Author: Melanie Plageman Reviewed-by: Andres Freund Reviewed-by: Justin Pryzby Reviewed-by: Kyotaro Horiguchi Reviewed-by: Maciek Sakrejda Reviewed-by: Lukas Fittl Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de --- doc/src/sgml/monitoring.sgml | 2 + src/backend/utils/activity/Makefile | 1 + src/backend/utils/activity/meson.build | 1 + src/backend/utils/activity/pgstat.c | 38 ++ src/backend/utils/activity/pgstat_bgwriter.c | 7 +- .../utils/activity/pgstat_checkpointer.c | 7 +- src/backend/utils/activity/pgstat_io.c | 446 ++++++++++++++++++ src/backend/utils/activity/pgstat_relation.c | 15 +- src/backend/utils/activity/pgstat_shmem.c | 4 + src/backend/utils/activity/pgstat_wal.c | 4 +- src/backend/utils/adt/pgstatfuncs.c | 4 +- src/include/miscadmin.h | 2 + src/include/pgstat.h | 126 +++++ src/include/utils/pgstat_internal.h | 34 ++ src/tools/pgindent/typedefs.list | 8 + 15 files changed, 693 insertions(+), 6 deletions(-) create mode 100644 src/backend/utils/activity/pgstat_io.c diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 5bcba0fdec..710bd2c52e 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5403,6 +5403,8 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i the pg_stat_bgwriter view, archiver to reset all the counters shown in the pg_stat_archiver view, + io to reset all the counters shown in the + pg_stat_io view, wal to reset all the counters shown in the pg_stat_wal view or recovery_prefetch to reset all the counters shown diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index a80eda3cf4..7d7482dde0 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -22,6 +22,7 @@ OBJS = \ pgstat_checkpointer.o \ pgstat_database.o \ pgstat_function.o \ + pgstat_io.o \ pgstat_relation.o \ pgstat_replslot.o \ pgstat_shmem.o \ diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build index a2b872c24b..518ee3f798 100644 --- a/src/backend/utils/activity/meson.build +++ b/src/backend/utils/activity/meson.build @@ -9,6 +9,7 @@ backend_sources += files( 'pgstat_checkpointer.c', 'pgstat_database.c', 'pgstat_function.c', + 'pgstat_io.c', 'pgstat_relation.c', 'pgstat_replslot.c', 'pgstat_shmem.c', diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 0fa5370bcd..8451be0617 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -72,6 +72,7 @@ * - pgstat_checkpointer.c * - pgstat_database.c * - pgstat_function.c + * - pgstat_io.c * - pgstat_relation.c * - pgstat_replslot.c * - pgstat_slru.c @@ -359,6 +360,15 @@ static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = { .snapshot_cb = pgstat_checkpointer_snapshot_cb, }, + [PGSTAT_KIND_IO] = { + .name = "io_ops", + + .fixed_amount = true, + + .reset_all_cb = pgstat_io_reset_all_cb, + .snapshot_cb = pgstat_io_snapshot_cb, + }, + [PGSTAT_KIND_SLRU] = { .name = "slru", @@ -582,6 +592,7 @@ pgstat_report_stat(bool force) /* Don't expend a clock check if nothing to do */ if (dlist_is_empty(&pgStatPending) && + !have_iostats && !have_slrustats && !pgstat_have_pending_wal()) { @@ -628,6 +639,9 @@ pgstat_report_stat(bool force) /* flush database / relation / function / ... stats */ partial_flush |= pgstat_flush_pending_entries(nowait); + /* flush IO stats */ + partial_flush |= pgstat_flush_io(nowait); + /* flush wal stats */ partial_flush |= pgstat_flush_wal(nowait); @@ -1322,6 +1336,15 @@ pgstat_write_statsfile(void) pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER); write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer); + /* + * Write IO stats struct + */ + pgstat_build_snapshot_fixed(PGSTAT_KIND_IO); + write_chunk_s(fpout, &pgStatLocal.snapshot.io.stat_reset_timestamp); + for (BackendType bktype = B_INVALID + 1; bktype < BACKEND_NUM_TYPES; + bktype++) + write_chunk_s(fpout, &pgStatLocal.snapshot.io.stats[bktype]); + /* * Write SLRU stats struct */ @@ -1496,6 +1519,21 @@ pgstat_read_statsfile(void) if (!read_chunk_s(fpin, &shmem->checkpointer.stats)) goto error; + /* + * Read IO stats struct + */ + if (!read_chunk_s(fpin, &shmem->io.stat_reset_timestamp)) + goto error; + + for (BackendType bktype = B_INVALID + 1; bktype < BACKEND_NUM_TYPES; + bktype++) + { + Assert(pgstat_bktype_io_stats_valid(&shmem->io.stats[bktype], + bktype)); + if (!read_chunk_s(fpin, &shmem->io.stats[bktype].data)) + goto error; + } + /* * Read SLRU stats struct */ diff --git a/src/backend/utils/activity/pgstat_bgwriter.c b/src/backend/utils/activity/pgstat_bgwriter.c index 9247f2dda2..92be384b0d 100644 --- a/src/backend/utils/activity/pgstat_bgwriter.c +++ b/src/backend/utils/activity/pgstat_bgwriter.c @@ -24,7 +24,7 @@ PgStat_BgWriterStats PendingBgWriterStats = {0}; /* - * Report bgwriter statistics + * Report bgwriter and IO statistics */ void pgstat_report_bgwriter(void) @@ -56,6 +56,11 @@ pgstat_report_bgwriter(void) * Clear out the statistics buffer, so it can be re-used. */ MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats)); + + /* + * Report IO statistics + */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c index 3e9ab45103..26dec112f6 100644 --- a/src/backend/utils/activity/pgstat_checkpointer.c +++ b/src/backend/utils/activity/pgstat_checkpointer.c @@ -24,7 +24,7 @@ PgStat_CheckpointerStats PendingCheckpointerStats = {0}; /* - * Report checkpointer statistics + * Report checkpointer and IO statistics */ void pgstat_report_checkpointer(void) @@ -62,6 +62,11 @@ pgstat_report_checkpointer(void) * Clear out the statistics buffer, so it can be re-used. */ MemSet(&PendingCheckpointerStats, 0, sizeof(PendingCheckpointerStats)); + + /* + * Report IO statistics + */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c new file mode 100644 index 0000000000..981372c24c --- /dev/null +++ b/src/backend/utils/activity/pgstat_io.c @@ -0,0 +1,446 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io.c + * Implementation of IO statistics. + * + * This file contains the implementation of IO statistics. It is kept separate + * from pgstat.c to enforce the line between the statistics access / storage + * implementation and the details about individual types of statistics. + * + * Copyright (c) 2021-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + + +static PgStat_IOContextOps pending_IOOpStats; +bool have_iostats = false; + + +void +pgstat_count_io_op(IOOp io_op, IOObject io_object, IOContext io_context) +{ + PgStat_IOOpCounters *pending_counters; + + Assert(io_context < IOCONTEXT_NUM_TYPES); + Assert(io_object < IOOBJECT_NUM_TYPES); + Assert(io_op < IOOP_NUM_TYPES); + Assert(pgstat_tracks_io_op(MyBackendType, io_context, io_object, io_op)); + + pending_counters = &pending_IOOpStats.data[io_context].data[io_object]; + + switch (io_op) + { + case IOOP_EVICT: + pending_counters->evictions++; + break; + case IOOP_EXTEND: + pending_counters->extends++; + break; + case IOOP_FSYNC: + pending_counters->fsyncs++; + break; + case IOOP_READ: + pending_counters->reads++; + break; + case IOOP_REUSE: + pending_counters->reuses++; + break; + case IOOP_WRITE: + pending_counters->writes++; + break; + } + + have_iostats = true; +} + +PgStat_IO * +pgstat_fetch_stat_io(void) +{ + pgstat_snapshot_fixed(PGSTAT_KIND_IO); + + return &pgStatLocal.snapshot.io; +} + +/* + * Flush out locally pending IO statistics + * + * If no stats have been recorded, this function returns false. + * + * If nowait is true, this function returns true if the lock could not be + * acquired. Otherwise, return false. + */ +bool +pgstat_flush_io(bool nowait) +{ + LWLock *bktype_lock; + PgStat_IOContextOps *bktype_shstats; + + if (!have_iostats) + return false; + + bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType]; + bktype_shstats = + &pgStatLocal.shmem->io.stats[MyBackendType]; + + if (!nowait) + LWLockAcquire(bktype_lock, LW_EXCLUSIVE); + else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE)) + return true; + + for (IOContext io_context = IOCONTEXT_BULKREAD; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + PgStat_IOObjectOps *shared_objs = &bktype_shstats->data[io_context]; + PgStat_IOObjectOps *pending_objs = &pending_IOOpStats.data[io_context]; + + for (IOObject io_object = IOOBJECT_RELATION; + io_object < IOOBJECT_NUM_TYPES; io_object++) + { + PgStat_IOOpCounters *sharedent = &shared_objs->data[io_object]; + PgStat_IOOpCounters *pendingent = &pending_objs->data[io_object]; + +#define IO_ACC(fld) sharedent->fld += pendingent->fld + IO_ACC(evictions); + IO_ACC(extends); + IO_ACC(fsyncs); + IO_ACC(reads); + IO_ACC(reuses); + IO_ACC(writes); +#undef IO_ACC + } + } + + Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType)); + + LWLockRelease(bktype_lock); + + memset(&pending_IOOpStats, 0, sizeof(pending_IOOpStats)); + + have_iostats = false; + + return false; +} + +const char * +pgstat_get_io_context_name(IOContext io_context) +{ + switch (io_context) + { + case IOCONTEXT_BULKREAD: + return "bulkread"; + case IOCONTEXT_BULKWRITE: + return "bulkwrite"; + case IOCONTEXT_NORMAL: + return "normal"; + case IOCONTEXT_VACUUM: + return "vacuum"; + } + + elog(ERROR, "unrecognized IOContext value: %d", io_context); + pg_unreachable(); +} + +const char * +pgstat_get_io_object_name(IOObject io_object) +{ + switch (io_object) + { + case IOOBJECT_RELATION: + return "relation"; + case IOOBJECT_TEMP_RELATION: + return "temp relation"; + } + + elog(ERROR, "unrecognized IOObject value: %d", io_object); + pg_unreachable(); +} + +const char * +pgstat_get_io_op_name(IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return "evicted"; + case IOOP_EXTEND: + return "extended"; + case IOOP_FSYNC: + return "files synced"; + case IOOP_READ: + return "read"; + case IOOP_REUSE: + return "reused"; + case IOOP_WRITE: + return "written"; + } + + elog(ERROR, "unrecognized IOOp value: %d", io_op); + pg_unreachable(); +} + +void +pgstat_io_reset_all_cb(TimestampTz ts) +{ + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; + PgStat_IOContextOps *bktype_shstats = &pgStatLocal.shmem->io.stats[i]; + + LWLockAcquire(bktype_lock, LW_EXCLUSIVE); + + /* + * Use the lock in the first BackendType's PgStat_IOContextOps to + * protect the reset timestamp as well. + */ + if (i == 0) + pgStatLocal.shmem->io.stat_reset_timestamp = ts; + + memset(bktype_shstats, 0, sizeof(*bktype_shstats)); + LWLockRelease(bktype_lock); + } +} + +void +pgstat_io_snapshot_cb(void) +{ + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; + PgStat_IOContextOps *bktype_shstats = &pgStatLocal.shmem->io.stats[i]; + PgStat_IOContextOps *bktype_snap = &pgStatLocal.snapshot.io.stats[i]; + + LWLockAcquire(bktype_lock, LW_SHARED); + + /* + * Use the lock in the first BackendType's PgStat_IOContextOps to + * protect the reset timestamp as well. + */ + if (i == 0) + pgStatLocal.snapshot.io.stat_reset_timestamp = + pgStatLocal.shmem->io.stat_reset_timestamp; + + /* using struct assignment due to better type safety */ + *bktype_snap = *bktype_shstats; + LWLockRelease(bktype_lock); + } +} + +/* +* IO statistics are not collected for all BackendTypes. +* +* The following BackendTypes do not participate in the cumulative stats +* subsystem or do not perform IO on which we currently track: +* - Syslogger because it is not connected to shared memory +* - Archiver because most relevant archiving IO is delegated to a +* specialized command or module +* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now +* +* Function returns true if BackendType participates in the cumulative stats +* subsystem for IO and false if it does not. +*/ +bool +pgstat_tracks_io_bktype(BackendType bktype) +{ + /* + * List every type so that new backend types trigger a warning about + * needing to adjust this switch. + */ + switch (bktype) + { + case B_INVALID: + case B_ARCHIVER: + case B_LOGGER: + case B_WAL_RECEIVER: + case B_WAL_WRITER: + return false; + + case B_AUTOVAC_LAUNCHER: + case B_AUTOVAC_WORKER: + case B_BACKEND: + case B_BG_WORKER: + case B_BG_WRITER: + case B_CHECKPOINTER: + case B_STANDALONE_BACKEND: + case B_STARTUP: + case B_WAL_SENDER: + return true; + } + + return false; +} + +/* + * Some BackendTypes do not perform IO in certain IOContexts. Some IOObjects + * are never operated on in some IOContexts. Check that the given BackendType + * is expected to do IO in the given IOContext and that the given IOObject is + * expected to be operated on in the given IOContext. + */ +bool +pgstat_tracks_io_object(BackendType bktype, IOContext io_context, + IOObject io_object) +{ + bool no_temp_rel; + + /* + * Some BackendTypes should never track IO statistics. + */ + if (!pgstat_tracks_io_bktype(bktype)) + return false; + + /* + * Currently, IO on temporary relations can only occur in the + * IOCONTEXT_NORMAL IOContext. + */ + if (io_context != IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * In core Postgres, only regular backends and WAL Sender processes + * executing queries will use local buffers and operate on temporary + * relations. Parallel workers will not use local buffers (see + * InitLocalBuffers()); however, extensions leveraging background workers + * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for + * BackendType B_BG_WORKER. + */ + no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || + bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP; + + if (no_temp_rel && io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * Some BackendTypes do not currently perform any IO in certain + * IOContexts, and, while it may not be inherently incorrect for them to + * do so, excluding those rows from the view makes the view easier to use. + */ + if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) && + (io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || + io_context == IOCONTEXT_VACUUM)) + return false; + + if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM) + return false; + + if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) && + io_context == IOCONTEXT_BULKWRITE) + return false; + + return true; +} + +/* + * Some BackendTypes will never do certain IOOps and some IOOps should not + * occur in certain IOContexts. Check that the given IOOp is valid for the + * given BackendType in the given IOContext. Note that there are currently no + * cases of an IOOp being invalid for a particular BackendType only within a + * certain IOContext. + */ +bool +pgstat_tracks_io_op(BackendType bktype, IOContext io_context, + IOObject io_object, IOOp io_op) +{ + bool strategy_io_context; + + /* if (io_context, io_object) will never collect stats, we're done */ + if (!pgstat_tracks_io_object(bktype, io_context, io_object)) + return false; + + /* + * Some BackendTypes will not do certain IOOps. + */ + if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && + (io_op == IOOP_READ || io_op == IOOP_EVICT)) + return false; + + if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND) + return false; + + /* + * Some IOOps are not valid in certain IOContexts and some IOOps are only + * valid in certain contexts. + */ + if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND) + return false; + + strategy_io_context = io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; + + /* + * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use. + */ + if (!strategy_io_context && io_op == IOOP_REUSE) + return false; + + /* + * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are + * counted in the IOCONTEXT_NORMAL IOContext. See comment in + * ForwardSyncRequest() for more details. + */ + if (strategy_io_context && io_op == IOOP_FSYNC) + return false; + + /* + * Temporary tables are not logged and thus do not require fsync'ing. + */ + if (io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC) + return false; + + return true; +} + +/* + * Check that stats have not been counted for any combination of IOContext, + * IOObject, and IOOp which are not tracked for the passed-in BackendType. The + * passed-in array of PgStat_IOOpCounters must contain stats from the + * BackendType specified by the second parameter. Caller is responsible for + * locking of the passed-in PgStat_IOContextOps, if needed. + */ +bool +pgstat_bktype_io_stats_valid(PgStat_IOContextOps *context_ops, + BackendType bktype) +{ + bool bktype_tracked = pgstat_tracks_io_bktype(bktype); + + for (IOContext io_context = IOCONTEXT_BULKREAD; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + PgStat_IOObjectOps *context = &context_ops->data[io_context]; + + for (IOObject io_object = IOOBJECT_RELATION; + io_object < IOOBJECT_NUM_TYPES; io_object++) + { + PgStat_IOOpCounters *object = &context->data[io_object]; + + if (!bktype_tracked || + !pgstat_tracks_io_object(bktype, io_context, + io_object)) + { + if (!pgstat_iszero_io_object(object)) + return false; + continue; + } + + for (IOOp io_op = IOOP_EVICT; io_op < IOOP_NUM_TYPES; io_op++) + { + if (!pgstat_tracks_io_op(bktype, io_context, io_object, io_op) && + !pgstat_iszero_io_op(object, io_op)) + return false; + } + } + } + + return true; +} diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 2e20b93c20..f793ac1516 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -206,7 +206,7 @@ pgstat_drop_relation(Relation rel) } /* - * Report that the table was just vacuumed. + * Report that the table was just vacuumed and flush IO statistics. */ void pgstat_report_vacuum(Oid tableoid, bool shared, @@ -258,10 +258,18 @@ pgstat_report_vacuum(Oid tableoid, bool shared, } pgstat_unlock_entry(entry_ref); + + /* + * Flush IO statistics now. pgstat_report_stat() will flush IO stats, + * however this will not be called until after an entire autovacuum cycle + * is done -- which will likely vacuum many relations -- or until the + * VACUUM command has processed all tables and committed. + */ + pgstat_flush_io(false); } /* - * Report that the table was just analyzed. + * Report that the table was just analyzed and flush IO statistics. * * Caller must provide new live- and dead-tuples estimates, as well as a * flag indicating whether to reset the mod_since_analyze counter. @@ -341,6 +349,9 @@ pgstat_report_analyze(Relation rel, } pgstat_unlock_entry(entry_ref); + + /* see pgstat_report_vacuum() */ + pgstat_flush_io(false); } /* diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index c1506b53d0..09fffd0e82 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -202,6 +202,10 @@ StatsShmemInit(void) LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA); LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA); LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA); + + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + LWLockInitialize(&ctl->io.locks[i], + LWTRANCHE_PGSTATS_DATA); } else { diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index e7a82b5fed..e8598b2f4e 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -34,7 +34,7 @@ static WalUsage prevWalUsage; /* * Calculate how much WAL usage counters have increased and update - * shared statistics. + * shared WAL and IO statistics. * * Must be called by processes that generate WAL, that do not call * pgstat_report_stat(), like walwriter. @@ -43,6 +43,8 @@ void pgstat_report_wal(bool force) { pgstat_flush_wal(force); + + pgstat_flush_io(force); } /* diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 58bd1360b9..42b890b806 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1593,6 +1593,8 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) pgstat_reset_of_kind(PGSTAT_KIND_BGWRITER); pgstat_reset_of_kind(PGSTAT_KIND_CHECKPOINTER); } + else if (strcmp(target, "io") == 0) + pgstat_reset_of_kind(PGSTAT_KIND_IO); else if (strcmp(target, "recovery_prefetch") == 0) XLogPrefetchResetStats(); else if (strcmp(target, "wal") == 0) @@ -1601,7 +1603,7 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized reset target: \"%s\"", target), - errhint("Target must be \"archiver\", \"bgwriter\", \"recovery_prefetch\", or \"wal\"."))); + errhint("Target must be \"archiver\", \"io\", \"bgwriter\", \"recovery_prefetch\", or \"wal\"."))); PG_RETURN_VOID(); } diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 0ffeefc437..0aaf600a78 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -331,6 +331,8 @@ typedef enum BackendType B_WAL_WRITER, } BackendType; +#define BACKEND_NUM_TYPES (B_WAL_WRITER + 1) + extern PGDLLIMPORT BackendType MyBackendType; extern const char *GetBackendTypeDesc(BackendType backendType); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5e3326a3b9..859442f69b 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -48,6 +48,7 @@ typedef enum PgStat_Kind PGSTAT_KIND_ARCHIVER, PGSTAT_KIND_BGWRITER, PGSTAT_KIND_CHECKPOINTER, + PGSTAT_KIND_IO, PGSTAT_KIND_SLRU, PGSTAT_KIND_WAL, } PgStat_Kind; @@ -276,6 +277,71 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; + +/* + * Types related to counting IO for various IO Contexts. When adding a new + * value, ensure that the proper paths are added to pgstat_iszero_io_object() + * and pgstat_iszero_io_op() (though the compiler will remind you about the + * latter). + */ + +typedef enum IOContext +{ + IOCONTEXT_BULKREAD, + IOCONTEXT_BULKWRITE, + IOCONTEXT_NORMAL, + IOCONTEXT_VACUUM, +} IOContext; + +#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1) + +typedef enum IOObject +{ + IOOBJECT_RELATION, + IOOBJECT_TEMP_RELATION, +} IOObject; + +#define IOOBJECT_NUM_TYPES (IOOBJECT_TEMP_RELATION + 1) + +typedef enum IOOp +{ + IOOP_EVICT, + IOOP_EXTEND, + IOOP_FSYNC, + IOOP_READ, + IOOP_REUSE, + IOOP_WRITE, +} IOOp; + +#define IOOP_NUM_TYPES (IOOP_WRITE + 1) + +typedef struct PgStat_IOOpCounters +{ + PgStat_Counter evictions; + PgStat_Counter extends; + PgStat_Counter fsyncs; + PgStat_Counter reads; + PgStat_Counter reuses; + PgStat_Counter writes; +} PgStat_IOOpCounters; + +typedef struct PgStat_IOObjectOps +{ + PgStat_IOOpCounters data[IOOBJECT_NUM_TYPES]; +} PgStat_IOObjectOps; + +typedef struct PgStat_IOContextOps +{ + PgStat_IOObjectOps data[IOCONTEXT_NUM_TYPES]; +} PgStat_IOContextOps; + +typedef struct PgStat_IO +{ + TimestampTz stat_reset_timestamp; + PgStat_IOContextOps stats[BACKEND_NUM_TYPES]; +} PgStat_IO; + + typedef struct PgStat_StatDBEntry { PgStat_Counter xact_commit; @@ -453,6 +519,66 @@ extern void pgstat_report_checkpointer(void); extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); +/* + * Functions in pgstat_io.c + */ + +extern void pgstat_count_io_op(IOOp io_op, IOObject io_object, IOContext io_context); +extern PgStat_IO *pgstat_fetch_stat_io(void); +extern const char *pgstat_get_io_context_name(IOContext io_context); +extern const char *pgstat_get_io_object_name(IOObject io_object); +extern const char *pgstat_get_io_op_name(IOOp io_op); + +extern bool pgstat_tracks_io_bktype(BackendType bktype); +extern bool pgstat_tracks_io_object(BackendType bktype, + IOContext io_context, IOObject io_object); +extern bool pgstat_tracks_io_op(BackendType bktype, IOContext io_context, + IOObject io_object, IOOp io_op); + +/* + * Functions to check if counters are zero. + */ +static inline bool +pgstat_iszero_io_object(const PgStat_IOOpCounters *counters) +{ + return + counters->evictions == 0 && + counters->extends == 0 && + counters->fsyncs == 0 && + counters->reads == 0 && + counters->reuses == 0 && + counters->writes == 0; +} + +static inline PgStat_Counter +pgstat_get_io_op_value(const PgStat_IOOpCounters *counters, IOOp io_op) +{ + switch (io_op) + { + case IOOP_EVICT: + return counters->evictions; + case IOOP_EXTEND: + return counters->extends; + case IOOP_FSYNC: + return counters->fsyncs; + case IOOP_READ: + return counters->reads; + case IOOP_REUSE: + return counters->reuses; + case IOOP_WRITE: + return counters->writes; + } + + pg_unreachable(); +} + +static inline bool +pgstat_iszero_io_op(const PgStat_IOOpCounters *counters, IOOp io_op) +{ + return pgstat_get_io_op_value(counters, io_op) == 0; +} + + /* * Functions in pgstat_database.c */ diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 12fd51f1ae..8f4f9b760c 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -329,6 +329,19 @@ typedef struct PgStatShared_Checkpointer PgStat_CheckpointerStats reset_offset; } PgStatShared_Checkpointer; +/* shared version of PgStat_IO */ +typedef struct PgStatShared_IO +{ + /* + * locks[i] protects ->stats[i]. locks[0] also protects + * ->stat_reset_timestamp. + */ + LWLock locks[BACKEND_NUM_TYPES]; + + TimestampTz stat_reset_timestamp; + PgStat_IOContextOps stats[BACKEND_NUM_TYPES]; +} PgStatShared_IO; + typedef struct PgStatShared_SLRU { /* lock protects ->stats */ @@ -419,6 +432,7 @@ typedef struct PgStat_ShmemControl PgStatShared_Archiver archiver; PgStatShared_BgWriter bgwriter; PgStatShared_Checkpointer checkpointer; + PgStatShared_IO io; PgStatShared_SLRU slru; PgStatShared_Wal wal; } PgStat_ShmemControl; @@ -442,6 +456,8 @@ typedef struct PgStat_Snapshot PgStat_CheckpointerStats checkpointer; + PgStat_IO io; + PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS]; PgStat_WalStats wal; @@ -549,6 +565,17 @@ extern void pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, Time extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); +/* + * Functions in pgstat_io.c + */ + +extern void pgstat_io_reset_all_cb(TimestampTz ts); +extern void pgstat_io_snapshot_cb(void); +extern bool pgstat_flush_io(bool nowait); +extern bool pgstat_bktype_io_stats_valid(PgStat_IOContextOps *context_ops, + BackendType bktype); + + /* * Functions in pgstat_relation.c */ @@ -643,6 +670,13 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid) extern PGDLLIMPORT PgStat_LocalState pgStatLocal; +/* + * Variables in pgstat_io.c + */ + +extern PGDLLIMPORT bool have_iostats; + + /* * Variables in pgstat_slru.c */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 50d86cb01b..9336bf9796 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1106,7 +1106,10 @@ ID INFIX INT128 INTERFACE_INFO +IOContext IOFuncSelector +IOObject +IOOp IPCompareMethod ITEM IV @@ -2010,6 +2013,7 @@ PgStatShared_Common PgStatShared_Database PgStatShared_Function PgStatShared_HashEntry +PgStatShared_IO PgStatShared_Relation PgStatShared_ReplSlot PgStatShared_SLRU @@ -2027,6 +2031,10 @@ PgStat_FetchConsistency PgStat_FunctionCallUsage PgStat_FunctionCounts PgStat_HashKey +PgStat_IO +PgStat_IOContextOps +PgStat_IOObjectOps +PgStat_IOOpCounters PgStat_Kind PgStat_KindInfo PgStat_LocalState -- 2.38.1