Re: Track IO times in pg_stat_io - Mailing list pgsql-hackers

From Andres Freund
Subject Re: Track IO times in pg_stat_io
Date
Msg-id 20230405005922.2ymbhbvondupyz7m@awork3.anarazel.de
Whole thread Raw
In response to Re: Track IO times in pg_stat_io  (Melanie Plageman <melanieplageman@gmail.com>)
Responses Re: Track IO times in pg_stat_io  (Melanie Plageman <melanieplageman@gmail.com>)
List pgsql-hackers
Hi,

On 2023-03-31 15:44:58 -0400, Melanie Plageman wrote:
> From 789d4bf1fb749a26523dbcd2c69795916b711c68 Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplageman@gmail.com>
> Date: Tue, 21 Mar 2023 16:00:55 -0400
> Subject: [PATCH v8 1/4] Count IO time for temp relation writes
>
> Both pgstat_database and pgBufferUsage write times failed to count
> timing for flushes of dirty local buffers when acquiring a new local
> buffer for a temporary relation block.

I think it'd be worth mentioning here that we do count read time? Otherwise
it'd not be as clear that adding tracking increases consistency...



> From f4e0db5c833f33b30d4c0b4bebec1096a1745d81 Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplageman@gmail.com>
> Date: Tue, 21 Mar 2023 18:20:44 -0400
> Subject: [PATCH v8 2/4] FlushRelationBuffers() counts temp relation IO timing
>
> Add pgstat_database and pgBufferUsage IO timing counting to
> FlushRelationBuffers() for writes of temporary relations.
> ---
>  src/backend/storage/buffer/bufmgr.c | 18 ++++++++++++++++++
>  1 file changed, 18 insertions(+)
>
> diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
> index b3adbbe7d2..05e98d5994 100644
> --- a/src/backend/storage/buffer/bufmgr.c
> +++ b/src/backend/storage/buffer/bufmgr.c
> @@ -3571,6 +3571,8 @@ FlushRelationBuffers(Relation rel)
>  {
>      int            i;
>      BufferDesc *bufHdr;
> +    instr_time    io_start,
> +                io_time;
>
>      if (RelationUsesLocalBuffers(rel))
>      {
> @@ -3596,17 +3598,33 @@ FlushRelationBuffers(Relation rel)
>
>                  PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
>
> +                if (track_io_timing)
> +                    INSTR_TIME_SET_CURRENT(io_start);
> +                else
> +                    INSTR_TIME_SET_ZERO(io_start);
> +
>                  smgrwrite(RelationGetSmgr(rel),
>                            BufTagGetForkNum(&bufHdr->tag),
>                            bufHdr->tag.blockNum,
>                            localpage,
>                            false);
>
> +

Spurious newline.


>                  buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
>                  pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
>
>                  pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
>
> +                if (track_io_timing)
> +                {
> +                    INSTR_TIME_SET_CURRENT(io_time);
> +                    INSTR_TIME_SUBTRACT(io_time, io_start);
> +                    pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
> +                    INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
> +                }
> +
> +                pgBufferUsage.local_blks_written++;
> +
>                  /* Pop the error context stack */
>                  error_context_stack = errcallback.previous;
>              }
> --
> 2.37.2
>


> From 2bdad725133395ded199ecc726096e052d6e654b Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplageman@gmail.com>
> Date: Fri, 31 Mar 2023 15:32:36 -0400
> Subject: [PATCH v8 3/4] Track IO times in pg_stat_io
>
> Add IO timing for reads, writes, extends, and fsyncs to pg_stat_io.
>
> Reviewed-by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
> Reviewed-by: Andres Freund <andres@anarazel.de>
> Discussion:
https://www.postgresql.org/message-id/flat/CAAKRu_ay5iKmnbXZ3DsauViF3eMxu4m1oNnJXqV_HyqYeg55Ww%40mail.gmail.com
> ---

> -static PgStat_BktypeIO PendingIOStats;
> +typedef struct PgStat_PendingIO
> +{
> +    PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
> +    instr_time    pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
> +}            PgStat_PendingIO;

Probably will look less awful after adding the typedef to typedefs.list.


> +                /* we do track it */
> +                if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
> +                {
> +                    /* ensure that if IO times are non-zero, counts are > 0 */
> +                    if (backend_io->times[io_object][io_context][io_op] != 0 &&
> +                        backend_io->counts[io_object][io_context][io_op] <= 0)
> +                        return false;
> +
>                      continue;
> +                }
>
> -                /* There are stats and there shouldn't be */
> -                if (!bktype_tracked ||
> -                    !pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
> +                /* we don't track it, and it is not 0 */
> +                if (backend_io->counts[io_object][io_context][io_op] != 0)
>                      return false;
> +
> +                /* we don't track this IOOp, so make sure its IO time is zero */
> +                if (pgstat_tracks_io_time(io_op) > -1)
> +                {
> +                    if (backend_io->times[io_object][io_context][io_op] != 0)
> +                        return false;
> +                }

I'm somehow doubtful it's worth having pgstat_tracks_io_time, what kind of
error would be caught by this check?


> +/*
> + * Get the number of the column containing IO times for the specified IOOp. If
> + * the specified IOOp is one for which IO time is not tracked, return -1. Note
> + * that this function assumes that IO time for an IOOp is displayed in the view
> + * in the column directly after the IOOp counts.
> + */
> +static io_stat_col
> +pgstat_get_io_time_index(IOOp io_op)
> +{
> +    if (pgstat_tracks_io_time(io_op) == -1)
> +        return -1;

That seems dangerous - won't it just lead to accessing something from before
the start of the array? Probably should just assert.



> @@ -1363,20 +1389,32 @@ pg_stat_get_io(PG_FUNCTION_ARGS)
>
>                  for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
>                  {
> -                    int            col_idx = pgstat_get_io_op_index(io_op);
> +                    PgStat_Counter count = bktype_stats->counts[io_obj][io_context][io_op];
> +                    int            i = pgstat_get_io_op_index(io_op);
>
>                      /*
>                       * Some combinations of BackendType and IOOp, of IOContext
>                       * and IOOp, and of IOObject and IOOp are not tracked. Set
>                       * these cells in the view NULL.
>                       */
> -                    nulls[col_idx] = !pgstat_tracks_io_op(bktype, io_obj, io_context, io_op);
> +                    if (pgstat_tracks_io_op(bktype, io_obj, io_context, io_op))
> +                        values[i] = Int64GetDatum(count);
> +                    else
> +                        nulls[i] = true;
> +                }
>
> -                    if (nulls[col_idx])
> +                for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
> +                {
> +                    PgStat_Counter time = bktype_stats->times[io_obj][io_context][io_op];
> +                    int            i = pgstat_get_io_time_index(io_op);
> +
> +                    if (i == -1)
>                          continue;
>
> -                    values[col_idx] =
> -                        Int64GetDatum(bktype_stats->data[io_obj][io_context][io_op]);
> +                    if (!nulls[pgstat_get_io_op_index(io_op)])
> +                        values[i] = Float8GetDatum(pg_stat_micro_to_millisecs(time));
> +                    else
> +                        nulls[i] = true;
>                  }

Why two loops?

Greetings,

Andres Freund



pgsql-hackers by date:

Previous
From: Kirk Wolak
Date:
Subject: Re: [PATCH] Add function to_oct
Next
From: Amit Langote
Date:
Subject: Re: on placeholder entries in view rule action query's range table