From d03c53fca689f5303a5f4f8e8d142687ed2c44d9 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sat, 8 Mar 2025 19:34:50 +0100 Subject: [PATCH v20250310c 4/5] make progress reporting work - Splits the progress status by worker type - one row for launcher, one row for checksum worker (might be more with parallel workers). - The launcher only updates database counters, the workers only set relation/block counters. - Also reworks the columns in the system view a bit, discards the "current" fields (we still know the database for each worker). - Issue: Not sure what to do about relation forks, at this point it tracks only blocks for MAIN fork. --- src/backend/catalog/system_views.sql | 36 ++++---- src/backend/postmaster/datachecksumsworker.c | 91 +++++++++++++++++--- src/include/commands/progress.h | 27 +++--- src/test/regress/expected/rules.out | 29 ++++--- 4 files changed, 131 insertions(+), 52 deletions(-) diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 0d62976dc1f..4330d0ad656 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1335,25 +1335,25 @@ CREATE VIEW pg_stat_progress_copy AS LEFT JOIN pg_database D ON S.datid = D.oid; CREATE VIEW pg_stat_progress_data_checksums AS - SELECT - S.pid AS pid, S.datid AS datid, D.datname AS datname, - CASE S.param1 WHEN 0 THEN 'enabling' + SELECT + S.pid AS pid, S.datid, D.datname AS datname, + CASE S.param1 WHEN 0 THEN 'enabling' WHEN 1 THEN 'disabling' - WHEN 2 THEN 'waiting' - WHEN 3 THEN 'waiting on backends' - WHEN 4 THEN 'waiting on temporary tables' - WHEN 5 THEN 'done' - END AS phase, - CASE S.param2 WHEN -1 THEN NULL ELSE S.param2 END AS databases_total, - CASE S.param3 WHEN -1 THEN NULL ELSE S.param3 END AS relations_total, - S.param4 AS databases_processed, - S.param5 AS relations_processed, - S.param6 AS databases_current, - S.param7 AS relation_current, - S.param8 AS relation_current_blocks, - S.param9 AS relation_current_blocks_processed - FROM pg_stat_get_progress_info('DATACHECKSUMS') AS S - LEFT JOIN pg_database D ON S.datid = D.oid; + WHEN 2 THEN 'waiting' + WHEN 3 THEN 'waiting on backends' + WHEN 4 THEN 'waiting on temporary tables' + WHEN 5 THEN 'waiting on checkpoint' + WHEN 6 THEN 'done' + END AS phase, + CASE S.param2 WHEN -1 THEN NULL ELSE S.param2 END AS databases_total, + S.param3 AS databases_done, + CASE S.param4 WHEN -1 THEN NULL ELSE S.param4 END AS relations_total, + CASE S.param5 WHEN -1 THEN NULL ELSE S.param5 END AS relations_done, + CASE S.param6 WHEN -1 THEN NULL ELSE S.param6 END AS blocks_total, + CASE S.param7 WHEN -1 THEN NULL ELSE S.param7 END AS blocks_done + FROM pg_stat_get_progress_info('DATACHECKSUMS') AS S + LEFT JOIN pg_database D ON S.datid = D.oid + ORDER BY S.datid; -- return the launcher process first CREATE VIEW pg_user_mappings AS SELECT diff --git a/src/backend/postmaster/datachecksumsworker.c b/src/backend/postmaster/datachecksumsworker.c index 6df92684a3b..bbbce61cfa6 100644 --- a/src/backend/postmaster/datachecksumsworker.c +++ b/src/backend/postmaster/datachecksumsworker.c @@ -409,6 +409,11 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg relns, RelationGetRelationName(reln), forkNames[forkNum], numblocks); pgstat_report_activity(STATE_RUNNING, activity); + /* XXX only do this for main forks, maybe we should do it for all? */ + if (forkNum == MAIN_FORKNUM) + pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL, + numblocks); + /* * We are looping over the blocks which existed at the time of process * start, which is safe since new blocks are created with checksums set @@ -450,6 +455,11 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg if (abort_requested) return false; + /* XXX only do this for main forks, maybe we should do it for all? */ + if (forkNum == MAIN_FORKNUM) + pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_DONE, + (blknum + 1)); + vacuum_delay_point(false); } @@ -798,6 +808,8 @@ again: pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE, PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS); + /* XXX isn't it weird there's no wait between the phase updates? */ + /* * Set the state to inprogress-on and wait on the procsignal barrier. */ @@ -908,8 +920,29 @@ ProcessAllDatabases(bool immediate_checkpoint) * columns for processed databases is instead increased such that it can * be compared against the total. */ - pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_TOTAL_DB, - list_length(DatabaseList)); + { + const int index[] = { + PROGRESS_DATACHECKSUMS_DBS_TOTAL, + PROGRESS_DATACHECKSUMS_DBS_DONE, + PROGRESS_DATACHECKSUMS_RELS_TOTAL, + PROGRESS_DATACHECKSUMS_RELS_DONE, + PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL, + PROGRESS_DATACHECKSUMS_BLOCKS_DONE, + }; + + int64 vals[6]; + + vals[0] = list_length(DatabaseList); + vals[1] = 0; + + /* translated to NULL */ + vals[2] = -1; + vals[3] = -1; + vals[4] = -1; + vals[5] = -1; + + pgstat_progress_update_multi_param(6, index, vals); + } while (true) { @@ -921,14 +954,6 @@ ProcessAllDatabases(bool immediate_checkpoint) DataChecksumsWorkerResultEntry *entry; bool found; - /* - * Indicate which database is being processed set the number of - * relations to -1 to clear field from previous values. -1 will - * translate to NULL in the progress view. - */ - pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_CUR_DB, db->dboid); - pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_TOTAL_REL, -1); - /* * Check if this database has been processed already, and if so * whether it should be retried or skipped. @@ -957,6 +982,12 @@ ProcessAllDatabases(bool immediate_checkpoint) result = ProcessDatabase(db); processed_databases++; + /* + * Update the number of processed databases in the progress report. + */ + pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_DBS_DONE, + processed_databases); + if (result == DATACHECKSUMSWORKER_SUCCESSFUL) { /* @@ -1050,6 +1081,13 @@ ProcessAllDatabases(bool immediate_checkpoint) errhint("The server log might have more information on the cause of the error."))); } + /* + * When enabling checksums, we have to wait for a checkpoint for the + * checksums to e. + */ + pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE, + PROGRESS_DATACHECKSUMS_PHASE_WAITING_CHECKPOINT); + /* * Force a checkpoint to get everything out to disk. The use of immediate * checkpoints is for running tests, as they would otherwise not execute @@ -1258,6 +1296,7 @@ DataChecksumsWorkerMain(Datum arg) List *InitialTempTableList = NIL; BufferAccessStrategy strategy; bool aborted = false; + int64 rels_done; enabling_checksums = true; @@ -1271,6 +1310,10 @@ DataChecksumsWorkerMain(Datum arg) BackgroundWorkerInitializeConnectionByOid(dboid, InvalidOid, BGWORKER_BYPASS_ALLOWCONN); + /* worker will have a separate entry in pg_stat_progress_data_checksums */ + pgstat_progress_start_command(PROGRESS_COMMAND_DATACHECKSUMS, + InvalidOid); + /* * Get a list of all temp tables present as we start in this database. We * need to wait until they are all gone until we are done, since we cannot @@ -1297,6 +1340,24 @@ DataChecksumsWorkerMain(Datum arg) RelationList = BuildRelationList(false, DataChecksumsWorkerShmem->process_shared_catalogs); + + /* Update the total number of relations to be processed in this DB. */ + { + const int index[] = { + PROGRESS_DATACHECKSUMS_RELS_TOTAL, + PROGRESS_DATACHECKSUMS_RELS_DONE + }; + + int64 vals[2]; + + vals[0] = list_length(RelationList); + vals[1] = 0; + + pgstat_progress_update_multi_param(2, index, vals); + } + + /* process the relations */ + rels_done = 0; foreach_oid(reloid, RelationList) { if (!ProcessSingleRelationByOid(reloid, strategy)) @@ -1304,6 +1365,9 @@ DataChecksumsWorkerMain(Datum arg) aborted = true; break; } + + pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_RELS_DONE, + ++rels_done); } list_free(RelationList); @@ -1316,6 +1380,10 @@ DataChecksumsWorkerMain(Datum arg) return; } + /* The worker is about to wait for temporary tables to go away. */ + pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_PHASE, + PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL); + /* * Wait for all temp tables that existed when we started to go away. This * is necessary since we cannot "reach" them to enable checksums. Any temp @@ -1372,5 +1440,8 @@ DataChecksumsWorkerMain(Datum arg) list_free(InitialTempTableList); + /* worker done */ + pgstat_progress_end_command(); + DataChecksumsWorkerShmem->success = DATACHECKSUMSWORKER_SUCCESSFUL; } diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 1ebd0c792b4..94b478a6cc9 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -158,21 +158,20 @@ #define PROGRESS_COPY_TYPE_CALLBACK 4 /* Progress parameters for PROGRESS_DATACHECKSUMS */ -#define PROGRESS_DATACHECKSUMS_PHASE 0 -#define PROGRESS_DATACHECKSUMS_TOTAL_DB 1 -#define PROGRESS_DATACHECKSUMS_TOTAL_REL 2 -#define PROGRESS_DATACHECKSUMS_PROCESSED_DB 3 -#define PROGRESS_DATACHECKSUMS_PROCESSED_REL 4 -#define PROGRESS_DATACHECKSUMS_CUR_DB 5 -#define PROGRESS_DATACHECKSUMS_CUR_REL 6 -#define PROGRESS_DATACHECKSUMS_CUR_REL_TOTAL_BLOCKS 7 -#define PROGRESS_DATACHECKSUMS_CUR_REL_PROCESSED_BLOCKS 8 +#define PROGRESS_DATACHECKSUMS_PHASE 0 +#define PROGRESS_DATACHECKSUMS_DBS_TOTAL 1 +#define PROGRESS_DATACHECKSUMS_DBS_DONE 2 +#define PROGRESS_DATACHECKSUMS_RELS_TOTAL 3 +#define PROGRESS_DATACHECKSUMS_RELS_DONE 4 +#define PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL 5 +#define PROGRESS_DATACHECKSUMS_BLOCKS_DONE 6 /* Phases of datachecksumsworker operation */ -#define PROGRESS_DATACHECKSUMS_PHASE_ENABLING 0 -#define PROGRESS_DATACHECKSUMS_PHASE_DISABLING 1 -#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS 2 -#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL 3 -#define PROGRESS_DATACHECKSUMS_DONE 4 +#define PROGRESS_DATACHECKSUMS_PHASE_ENABLING 0 +#define PROGRESS_DATACHECKSUMS_PHASE_DISABLING 1 +#define PROGRESS_DATACHECKSUMS_PHASE_WAITING 2 +#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_BACKENDS 3 +#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_TEMPREL 4 +#define PROGRESS_DATACHECKSUMS_PHASE_WAITING_CHECKPOINT 5 #endif diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 36bed4b168d..2cfea837554 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2050,25 +2050,34 @@ pg_stat_progress_data_checksums| SELECT s.pid, WHEN 2 THEN 'waiting'::text WHEN 3 THEN 'waiting on backends'::text WHEN 4 THEN 'waiting on temporary tables'::text - WHEN 5 THEN 'done'::text + WHEN 5 THEN 'waiting on checkpoint'::text + WHEN 6 THEN 'done'::text ELSE NULL::text END AS phase, CASE s.param2 WHEN '-1'::integer THEN NULL::bigint ELSE s.param2 END AS databases_total, - CASE s.param3 + s.param3 AS databases_done, + CASE s.param4 WHEN '-1'::integer THEN NULL::bigint - ELSE s.param3 + ELSE s.param4 END AS relations_total, - s.param4 AS databases_processed, - s.param5 AS relations_processed, - s.param6 AS databases_current, - s.param7 AS relation_current, - s.param8 AS relation_current_blocks, - s.param9 AS relation_current_blocks_processed + CASE s.param5 + WHEN '-1'::integer THEN NULL::bigint + ELSE s.param5 + END AS relations_done, + CASE s.param6 + WHEN '-1'::integer THEN NULL::bigint + ELSE s.param6 + END AS blocks_total, + CASE s.param7 + WHEN '-1'::integer THEN NULL::bigint + ELSE s.param7 + END AS blocks_done FROM (pg_stat_get_progress_info('DATACHECKSUMS'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) - LEFT JOIN pg_database d ON ((s.datid = d.oid))); + LEFT JOIN pg_database d ON ((s.datid = d.oid))) + ORDER BY s.datid; pg_stat_progress_vacuum| SELECT s.pid, s.datid, d.datname, -- 2.48.1