From f4f1534c105766c64534810b62eba7fea674c2a5 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 20 Mar 2023 19:26:41 -0700 Subject: [PATCH v6 2/2] pgstat: io: increment pgstat_io hackery --- src/include/pgstat.h | 15 ++--- src/backend/storage/buffer/bufmgr.c | 21 ++++--- src/backend/storage/buffer/localbuf.c | 5 +- src/backend/storage/smgr/md.c | 12 ++-- src/backend/utils/activity/pgstat.c | 77 +------------------------- src/backend/utils/activity/pgstat_io.c | 70 ++++++++++++++++++++--- 6 files changed, 90 insertions(+), 110 deletions(-) diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 288a8c7c0ef..4adbf29fdfd 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -305,11 +305,7 @@ typedef enum IOOp typedef struct PgStat_BktypeIO { PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; - union - { - instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; - PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; - }; + PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; } PgStat_BktypeIO; typedef struct PgStat_IO @@ -471,11 +467,6 @@ extern TimestampTz pgstat_get_stat_snapshot_timestamp(bool *have_snapshot); extern PgStat_Kind pgstat_get_kind_from_str(char *kind_str); extern bool pgstat_have_entry(PgStat_Kind kind, Oid dboid, Oid objoid); -extern instr_time pgstat_io_start(void); - -extern void pgstat_io_end(instr_time io_start, IOObject io_object, - IOContext io_context, IOOp io_op); - /* * Functions in pgstat_archiver.c @@ -507,8 +498,10 @@ extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void); extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *context_ops, BackendType bktype); +extern instr_time pgstat_prepare_io_time(void); extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op); -extern void pgstat_count_io_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time time); +extern void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time time); + extern PgStat_IO *pgstat_fetch_stat_io(void); extern const char *pgstat_get_io_context_name(IOContext io_context); extern const char *pgstat_get_io_object_name(IOObject io_object); diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 64857f1ff68..532c7af992d 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1005,12 +1005,13 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); - io_start = pgstat_io_start(); + io_start = pgstat_prepare_io_time(); /* don't set checksum for all-zero page */ smgrextend(smgr, forkNum, blockNum, bufBlock, false); - pgstat_io_end(io_start, io_object, io_context, IOOP_EXTEND); + pgstat_count_io_op_time(io_object, io_context, + IOOP_EXTEND, io_start); /* * NB: we're *not* doing a ScheduleBufferTagForWriteback here; @@ -1031,11 +1032,12 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, { instr_time io_start; - io_start = pgstat_io_start(); + io_start = pgstat_prepare_io_time(); smgrread(smgr, forkNum, blockNum, bufBlock); - pgstat_io_end(io_start, io_object, io_context, IOOP_READ); + pgstat_count_io_op_time(io_object, io_context, + IOOP_READ, io_start); /* check for garbage data */ if (!PageIsVerifiedExtended((Page) bufBlock, blockNum, @@ -2942,7 +2944,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, */ bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum); - io_start = pgstat_io_start(); + io_start = pgstat_prepare_io_time(); /* * bufToWrite is either the shared buffer or a copy, as appropriate. @@ -2971,7 +2973,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, * When a strategy is not in use, the write can only be a "regular" write * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE). */ - pgstat_io_end(io_start, IOOBJECT_RELATION, io_context, IOOP_WRITE); + pgstat_count_io_op_time(IOOBJECT_RELATION, io_context, + IOOP_WRITE, io_start); /* * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and @@ -3601,7 +3604,7 @@ FlushRelationBuffers(Relation rel) * TODO: is it okay that this formerly didn't count * pgBufferUsage or pgStatBlockReadTime */ - io_start = pgstat_io_start(); + io_start = pgstat_prepare_io_time(); smgrwrite(RelationGetSmgr(rel), BufTagGetForkNum(&bufHdr->tag), @@ -3612,7 +3615,9 @@ FlushRelationBuffers(Relation rel) buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED); pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); - pgstat_io_end(io_start, IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); + pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, + IOCONTEXT_NORMAL, IOOP_WRITE, + io_start); /* Pop the error context stack */ error_context_stack = errcallback.previous; diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index af45a72934a..23f2e2fb759 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -229,7 +229,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, PageSetChecksumInplace(localpage, bufHdr->tag.blockNum); - io_start = pgstat_io_start(); + io_start = pgstat_prepare_io_time(); /* And write... */ smgrwrite(oreln, @@ -242,7 +242,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); - pgstat_io_end(io_start, IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); + pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, + IOOP_WRITE, io_start); } /* diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 2440211f03d..51a9134d57f 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -1030,11 +1030,13 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ )) { - instr_time io_start = pgstat_io_start(); + instr_time io_start; ereport(DEBUG1, (errmsg_internal("could not forward fsync request because request queue is full"))); + io_start = pgstat_prepare_io_time(); + if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0) ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), @@ -1052,7 +1054,8 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) * IOCONTEXT_NORMAL is likely clearer when investigating the number of * backend fsyncs. */ - pgstat_io_end(io_start, IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC); + pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL, + IOOP_FSYNC, io_start); } } @@ -1428,7 +1431,7 @@ mdsyncfiletag(const FileTag *ftag, char *path) need_to_close = true; } - io_start = pgstat_io_start(); + io_start = pgstat_prepare_io_time(); /* Sync the file. */ result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC); @@ -1437,7 +1440,8 @@ mdsyncfiletag(const FileTag *ftag, char *path) if (need_to_close) FileClose(file); - pgstat_io_end(io_start, IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC); + pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL, + IOOP_FSYNC, io_start); errno = save_errno; return result; diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 9f5f041d7c2..b125802b215 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -95,11 +95,9 @@ #include "access/transam.h" #include "access/xact.h" -#include "executor/instrument.h" #include "lib/dshash.h" #include "pgstat.h" #include "port/atomics.h" -#include "storage/bufmgr.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/lwlock.h" @@ -110,6 +108,7 @@ #include "utils/pgstat_internal.h" #include "utils/timestamp.h" + /* ---------- * Timer definitions. * @@ -907,80 +906,6 @@ pgstat_have_entry(PgStat_Kind kind, Oid dboid, Oid objoid) return pgstat_get_entry_ref(kind, dboid, objoid, false, NULL) != NULL; } -instr_time -pgstat_io_start(void) -{ - instr_time io_start; - - if (track_io_timing) - INSTR_TIME_SET_CURRENT(io_start); - else - INSTR_TIME_SET_ZERO(io_start); - - return io_start; -} - -void -pgstat_io_end(instr_time io_start, IOObject io_object, - IOContext io_context, IOOp io_op) -{ - instr_time io_time; - - if (track_io_timing) - { - INSTR_TIME_SET_CURRENT(io_time); - INSTR_TIME_SUBTRACT(io_time, io_start); - pgstat_count_io_time(io_object, io_context, io_op, io_time); - - if (io_op == IOOP_WRITE) - { - if (io_object == IOOBJECT_RELATION) - { - /* TODO: AFAICT, pgstat_count_buffer_write_time is only called */ - /* for shared buffers whereas pgstat_count_buffer_read_time is */ - /* called for temp relations and shared buffers. */ - /* - * is this intentional and should I match current behavior or - * not? - */ - pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); - INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); - } - } - else if (io_op == IOOP_READ) - { - pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time)); - if (io_object == IOOBJECT_RELATION) - { - INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time); - } - } - } - - pgstat_count_io_op(io_object, io_context, io_op); - - if (io_op == IOOP_WRITE) - { - if (io_object == IOOBJECT_RELATION) - pgBufferUsage.shared_blks_written++; - else if (io_object == IOOBJECT_TEMP_RELATION) - pgBufferUsage.local_blks_written++; - } - - /* - * TODO: this is normally done later in ReadBuffer_common() is it okay to - * do here? - */ - else if (io_op == IOOP_READ) - { - if (io_object == IOOBJECT_RELATION) - pgBufferUsage.shared_blks_read++; - else if (io_object == IOOBJECT_TEMP_RELATION) - pgBufferUsage.local_blks_read++; - } -} - - /* * Ensure snapshot for fixed-numbered 'kind' exists. * diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index fe09cbf16d4..08888fa0c5b 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -16,10 +16,19 @@ #include "postgres.h" +#include "executor/instrument.h" +#include "storage/bufmgr.h" #include "utils/pgstat_internal.h" -static PgStat_BktypeIO PendingIOStats; +typedef struct PgStat_PendingIO +{ + PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; + instr_time pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES]; +} PgStat_PendingIO; + + +static PgStat_PendingIO PendingIOStats; bool have_iostats = false; @@ -73,6 +82,19 @@ pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, return true; } +instr_time +pgstat_prepare_io_time(void) +{ + instr_time io_start; + + if (track_io_timing) + INSTR_TIME_SET_CURRENT(io_start); + else + INSTR_TIME_SET_ZERO(io_start); + + return io_start; +} + void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op) { @@ -86,18 +108,48 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op) have_iostats = true; } +/* + * Like pgstat_count_io_op() except it also accumulates time. + */ void -pgstat_count_io_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time time) +pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, + instr_time start_time) { - Assert(io_object < IOOBJECT_NUM_TYPES); - Assert(io_context < IOCONTEXT_NUM_TYPES); - Assert(io_op < IOOP_NUM_TYPES); - Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); - Assert(pgstat_tracks_io_time(io_op) != -1); + if (track_io_timing) + { + instr_time io_time; - INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op], time); + INSTR_TIME_SET_CURRENT(io_time); + INSTR_TIME_SUBTRACT(io_time, start_time); - have_iostats = true; + if (io_op == IOOP_WRITE) + { + if (io_object == IOOBJECT_RELATION) + { + /* TODO: AFAICT, pgstat_count_buffer_write_time is only called */ + /* for shared buffers whereas pgstat_count_buffer_read_time is */ + /* called for temp relations and shared buffers. */ + /* + * is this intentional and should I match current behavior or + * not? + */ + pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time)); + INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time); + } + } + else if (io_op == IOOP_READ) + { + pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time)); + if (io_object == IOOBJECT_RELATION) + { + INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time); + } + } + + INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op], io_time); + } + + pgstat_count_io_op(io_object, io_context, io_op); } PgStat_IO * -- 2.38.0