Re: Report checkpoint progress with pg_stat_progress_checkpoint (was: Report checkpoint progress in server logs) - Mailing list pgsql-hackers

From Andres Freund
Subject Re: Report checkpoint progress with pg_stat_progress_checkpoint (was: Report checkpoint progress in server logs)
Date
Msg-id 20221115201811.6tair4cyb3skayc4@awork3.anarazel.de
Whole thread Raw
In response to Re: Report checkpoint progress with pg_stat_progress_checkpoint (was: Report checkpoint progress in server logs)  ("Drouvot, Bertrand" <bertranddrouvot.pg@gmail.com>)
List pgsql-hackers
Hi,

On 2022-11-04 09:25:52 +0100, Drouvot, Bertrand wrote:
>  
> @@ -7023,29 +7048,63 @@ static void
>  CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
>  {
>      CheckPointRelationMap();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_REPLI_SLOTS);
>      CheckPointReplicationSlots();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_SNAPSHOTS);
>      CheckPointSnapBuild();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_LOGICAL_REWRITE_MAPPINGS);
>      CheckPointLogicalRewriteHeap();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_REPLI_ORIGIN);
>      CheckPointReplicationOrigin();
>  
>      /* Write out all dirty data in SLRUs and the main buffer pool */
>      TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
>      CheckpointStats.ckpt_write_t = GetCurrentTimestamp();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_CLOG_PAGES);
>      CheckPointCLOG();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_COMMITTS_PAGES);
>      CheckPointCommitTs();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_SUBTRANS_PAGES);
>      CheckPointSUBTRANS();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_MULTIXACT_PAGES);
>      CheckPointMultiXact();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_PREDICATE_LOCK_PAGES);
>      CheckPointPredicate();
> +
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_BUFFERS);
>      CheckPointBuffers(flags);
>  
>      /* Perform all queued up fsyncs */
>      TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
>      CheckpointStats.ckpt_sync_t = GetCurrentTimestamp();
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_SYNC_FILES);
>      ProcessSyncRequests();
>      CheckpointStats.ckpt_sync_end_t = GetCurrentTimestamp();
>      TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
>  
>      /* We deliberately delay 2PC checkpointing as long as possible */
> +    pgstat_progress_update_param(PROGRESS_CHECKPOINT_PHASE,
> +                                 PROGRESS_CHECKPOINT_PHASE_TWO_PHASE);
>      CheckPointTwoPhase(checkPointRedo);
>  }

This is quite the code bloat. Can we make this less duplicative?


> +CREATE VIEW pg_stat_progress_checkpoint AS
> +    SELECT
> +        S.pid AS pid,
> +        CASE S.param1 WHEN 1 THEN 'checkpoint'
> +                      WHEN 2 THEN 'restartpoint'
> +                      END AS type,
> +        ( CASE WHEN (S.param2 & 4) > 0 THEN 'immediate ' ELSE '' END ||
> +          CASE WHEN (S.param2 & 8) > 0 THEN 'force ' ELSE '' END ||
> +          CASE WHEN (S.param2 & 16) > 0 THEN 'flush-all ' ELSE '' END ||
> +          CASE WHEN (S.param2 & 32) > 0 THEN 'wait ' ELSE '' END ||
> +          CASE WHEN (S.param2 & 128) > 0 THEN 'wal ' ELSE '' END ||
> +          CASE WHEN (S.param2 & 256) > 0 THEN 'time ' ELSE '' END
> +        ) AS flags,
> +        ( '0/0'::pg_lsn +
> +          ((CASE
> +                WHEN S.param3 < 0 THEN pow(2::numeric, 64::numeric)::numeric
> +                ELSE 0::numeric
> +            END) +
> +           S.param3::numeric)
> +        ) AS start_lsn,

I don't think we should embed this much complexity in the view
defintions. It's hard to read, bloats the catalog, we can't fix them once
released.  This stuff seems like it should be in a helper function.

I don't have any iea what that pow stuff is supposed to be doing.


> +        to_timestamp(946684800 + (S.param4::float8 / 1000000)) AS start_time,

I don't think this is a reasonable path - embedding way too much low-level
details about the timestamp format in the view definition. Why do we need to
do this?



Greetings,

Andres Freund



pgsql-hackers by date:

Previous
From: Jacob Champion
Date:
Subject: Re: Moving forward with TDE
Next
From: Pavel Stehule
Date:
Subject: Re: Schema variables - new implementation for Postgres 15