From 9dbb19a857862168d6168377389dd86a595962c9 Mon Sep 17 00:00:00 2001 From: Sami Imseih Date: Mon, 10 Nov 2025 00:03:41 -0600 Subject: [PATCH v1 1/2] pgstat: support custom serialization files and callbacks Allow custom statistics kinds to serialize and deserialize extra per-entry data, supporting kinds with variable auxiliary data that cannot fit in the fixed shared-memory layout. Add optional to_serialized_extra and from_serialized_extra callbacks to PgStat_KindInfo, along with num_serialized_extra_files. Both callbacks must be provided together, and num_serialized_extra_files must be > 0. The to_serialized_extra callback writes extra data to disk, and from_serialized_extra reads it back into the entry. The callbacks receive the entry key, header, core stats file descriptor, and a list of extra file descriptors, up to the number specified by num_serialized_extra_files. Introduce PgStat_SerializeFiles to track temporary and permanent file paths and descriptors. pgstat_allocate_files opens all files for read or write, and pgstat_cleanup_files centralizes cleanup and unlinking. Rename write_chunk/read_chunk helpers to pgstat_write_chunk and pgstat_read_chunk, along with the *_s convenience macros, and make these routines globally available for use by plug-ins. --- src/backend/utils/activity/pgstat.c | 412 +++++++++++++++++++++------- src/include/utils/pgstat_internal.h | 21 ++ 2 files changed, 340 insertions(+), 93 deletions(-) diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 7ef06150df7..a5d0a475db0 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -154,6 +154,26 @@ typedef struct PgStat_SnapshotEntry void *data; /* the stats data itself */ } PgStat_SnapshotEntry; +/* + * Struct used to track the set of serialization files managed by the + * pgstat infrastructure. This includes both the core statistics file and + * any additional files defined by the stats kind. The core file must be + * listed at index 0. + * + * The structure maintains three parallel lists: temporary file paths, + * statistics file paths, and their corresponding file descriptors. + * + * Custom callbacks are provided the list of file desciptors that + * belong to their stats kind. + */ +typedef struct PgStat_SerializeFiles +{ + char **tmpfiles; + char **statfiles; + FILE **fd; + int num_files; +} PgStat_SerializeFiles; + /* ---------- * Backend-local Hash Table Definitions @@ -1525,6 +1545,34 @@ pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info) errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind))); } + /* + * Ensure that both serialization and deserialization callbacks are + * registered together or not at all, and only for custom stats with a + * variable number of extra files. + */ + if ((kind_info->to_serialized_extra && !kind_info->from_serialized_extra) || + (!kind_info->to_serialized_extra && kind_info->from_serialized_extra)) + { + ereport(ERROR, + (errmsg("could not register custom cumulative statistics \"%s\" with ID %u", + kind_info->name, kind), + errdetail("Both to_serialized_extra and from_serialized_extra must be provided."))); + + } + + /* + * If extra serialization callbacks are set, num_serialized_extra_files + * must be greater than 0. + */ + if (kind_info->to_serialized_extra && + kind_info->num_serialized_extra_files < 1) + { + ereport(ERROR, + (errmsg("could not register custom cumulative statistics \"%s\" with ID %u", + kind_info->name, kind), + errdetail("Extra serialization callbacks were specified, but num_serialized_extra_files is zero"))); + } + /* Register it */ pgstat_kind_custom_infos[idx] = kind_info; ereport(LOG, @@ -1552,8 +1600,8 @@ pgstat_assert_is_up(void) */ /* helpers for pgstat_write_statsfile() */ -static void -write_chunk(FILE *fpout, void *ptr, size_t len) +void +pgstat_write_chunk(FILE *fpout, void *ptr, size_t len) { int rc; @@ -1563,7 +1611,178 @@ write_chunk(FILE *fpout, void *ptr, size_t len) (void) rc; } -#define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr)) +/* helpers for pgstat_read_statsfile() */ +bool +pgstat_read_chunk(FILE *fpin, void *ptr, size_t len) +{ + return fread(ptr, 1, len, fpin) == len; +} + +/* + * Close and remove all files recorded in a PgStat_SerializeFiles array. + * Depending on the is_temporary flag, this will remove either the temporary + * or the permanent filenames associated with each file descriptor. + * + * Errors encountered while closing files are logged, but cleanup continues + * for all remaining files. This function assumes the structure passed in + * was allocated by pgstat_allocate_files() and contains valid file descriptors + * for each opened stats file. + * + * NB: The array includes an extra slot at files[PGSTAT_KIND_MIN-1] for the + * core stats file. The loop below iterates from 0 to PGSTAT_KIND_CUSTOM_SIZE, + * processing all allocated slots, including the extra one. + */ +static void +pgstat_cleanup_files(PgStat_SerializeFiles *files, bool is_temporary) +{ + for (int i = 0; i < PGSTAT_KIND_CUSTOM_SIZE + 1; i++) + { + for (int j = 0; j < files[i].num_files; j++) + { + FILE *fd = files[i].fd[j]; + + const char *filename = is_temporary ? + files[i].tmpfiles[j] : + files[i].statfiles[j]; + const char *type_str = is_temporary ? "temporary" : "permanent"; + + elog(DEBUG2, "removing %s stats file \"%s\"", type_str, filename); + + if (fd && FreeFile(fd) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not close %s statistics file \"%s\": %m", + type_str, filename))); + } + + unlink(filename); + + /* free per-file strings */ + pfree(files[i].tmpfiles[j]); + pfree(files[i].statfiles[j]); + } + + /* free each file arrays */ + if (files[i].fd) + pfree(files[i].fd); + + if (files[i].tmpfiles) + pfree(files[i].tmpfiles); + + if (files[i].statfiles) + pfree(files[i].statfiles); + } + + /* finally free top-level array */ + pfree(files); +} + +/* + * Allocate and open all statistics serialization files for reading or writing. + * + * The returned array contains the core permanent stats file at index 0, + * followed by any per-kind extra files at indices PGSTAT_KIND_CUSTOM_MIN + * to PGSTAT_KIND_CUSTOM_MAX. + * + * In write mode, temporary files are opened. If any file cannot be opened, + * all partially created temporary files are removed using pgstat_cleanup_files(), + * and NULL is returned. + * + * In read mode, permanent files are opened. If the core file cannot be opened, + * all stats are reset and NULL is returned. If a per-kind extra file cannot be + * opened, we proceed to the next extra file. + * + * Returns an array of PgStat_SerializeFiles on success, or NULL on failure + * after cleanup or stats reset as described above. + * + * NB: pgstat_cleanup_files is responsible for pfree'ing the PgStat_SerializeFiles + * array. + */ +static PgStat_SerializeFiles * +pgstat_allocate_files(bool is_read) +{ + const char *mode = is_read ? PG_BINARY_R : PG_BINARY_W; + const char *action = is_read ? "reading" : "writing"; + const char *core_path; + PgStat_SerializeFiles *core; + PgStat_SerializeFiles *files = palloc0(sizeof(PgStat_SerializeFiles) * (PGSTAT_KIND_CUSTOM_SIZE + 1)); + + /* --- Core pgstat file setup --- */ + core = &files[PGSTAT_KIND_MIN - 1]; + core->num_files = 1; + core->tmpfiles = palloc(sizeof(char *)); + core->statfiles = palloc(sizeof(char *)); + core->fd = palloc(sizeof(FILE *)); + + core->tmpfiles[0] = pstrdup(PGSTAT_STAT_PERMANENT_TMPFILE); + core->statfiles[0] = pstrdup(PGSTAT_STAT_PERMANENT_FILENAME); + + elog(DEBUG2, "%s stats file \"%s\"", action, core->statfiles[0]); + + core_path = is_read ? core->statfiles[0] : core->tmpfiles[0]; + core->fd[0] = AllocateFile(core_path, mode); + if (core->fd[0] == NULL) + { + if (!is_read || errno != ENOENT) + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not open %s statistics file \"%s\": %m", + is_read ? "permanent" : "temporary", core_path))); + + if (!is_read) + pgstat_cleanup_files(files, true); + else + pgstat_reset_after_failure(); + + return NULL; + } + + /* --- Per-kind extra pgstat files --- */ + for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++) + { + int nfiles; + int index = (kind - PGSTAT_KIND_CUSTOM_MIN) + 1; + PgStat_SerializeFiles *extra; + const PgStat_KindInfo *info = pgstat_get_kind_info(kind); + + if (!info || info->num_serialized_extra_files < 1) + continue; + + nfiles = info->num_serialized_extra_files; + extra = &files[index]; + + extra->num_files = nfiles; + extra->tmpfiles = palloc(sizeof(char *) * nfiles); + extra->statfiles = palloc(sizeof(char *) * nfiles); + extra->fd = palloc(sizeof(FILE *) * nfiles); + + for (int i = 0; i < nfiles; i++) + { + const char *path; + + extra->tmpfiles[i] = psprintf("%s/pgstat.%d.%d.tmp", + PGSTAT_STAT_PERMANENT_DIRECTORY, kind, i); + extra->statfiles[i] = psprintf("%s/pgstat.%d.%d.stat", + PGSTAT_STAT_PERMANENT_DIRECTORY, kind, i); + + path = is_read ? extra->statfiles[i] : extra->tmpfiles[i]; + elog(DEBUG2, "%s stats file \"%s\"", action, extra->statfiles[i]); + + extra->fd[i] = AllocateFile(path, mode); + if (extra->fd[i] == NULL) + { + if (!is_read || errno != ENOENT) + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not open %s statistics file \"%s\": %m", + is_read ? "permanent" : "temporary ", path))); + } + } + } + + return files; +} /* * This function is called in the last process that is accessing the shared @@ -1574,10 +1793,9 @@ pgstat_write_statsfile(void) { FILE *fpout; int32 format_id; - const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE; - const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME; dshash_seq_status hstat; PgStatShared_HashEntry *ps; + PgStat_SerializeFiles *files = NULL; pgstat_assert_is_up(); @@ -1587,26 +1805,17 @@ pgstat_write_statsfile(void) /* we're shutting down, so it's ok to just override this */ pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE; - elog(DEBUG2, "writing stats file \"%s\"", statfile); - - /* - * Open the statistics temp file to write out the current values. - */ - fpout = AllocateFile(tmpfile, PG_BINARY_W); - if (fpout == NULL) - { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not open temporary statistics file \"%s\": %m", - tmpfile))); + files = pgstat_allocate_files(false); + if (files == NULL) return; - } + + fpout = files[PGSTAT_KIND_MIN - 1].fd[0]; /* * Write the file header --- currently just a format ID. */ format_id = PGSTAT_FILE_FORMAT_ID; - write_chunk_s(fpout, &format_id); + pgstat_write_chunk_s(fpout, &format_id); /* Write various stats structs for fixed number of objects */ for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) @@ -1631,8 +1840,8 @@ pgstat_write_statsfile(void) ptr = pgStatLocal.snapshot.custom_data[kind - PGSTAT_KIND_CUSTOM_MIN]; fputc(PGSTAT_FILE_ENTRY_FIXED, fpout); - write_chunk_s(fpout, &kind); - write_chunk(fpout, ptr, info->shared_data_len); + pgstat_write_chunk_s(fpout, &kind); + pgstat_write_chunk(fpout, ptr, info->shared_data_len); } /* @@ -1686,7 +1895,7 @@ pgstat_write_statsfile(void) { /* normal stats entry, identified by PgStat_HashKey */ fputc(PGSTAT_FILE_ENTRY_HASH, fpout); - write_chunk_s(fpout, &ps->key); + pgstat_write_chunk_s(fpout, &ps->key); } else { @@ -1696,57 +1905,73 @@ pgstat_write_statsfile(void) kind_info->to_serialized_name(&ps->key, shstats, &name); fputc(PGSTAT_FILE_ENTRY_NAME, fpout); - write_chunk_s(fpout, &ps->key.kind); - write_chunk_s(fpout, &name); + pgstat_write_chunk_s(fpout, &ps->key.kind); + pgstat_write_chunk_s(fpout, &name); + } + + /* A plug-in is saving extra data */ + if (kind_info->to_serialized_extra) + { + int index = (ps->key.kind - PGSTAT_KIND_CUSTOM_MIN) + 1; + + Assert(files[index].fd); + + kind_info->to_serialized_extra(&ps->key, shstats, fpout, files[index].fd); } /* Write except the header part of the entry */ - write_chunk(fpout, - pgstat_get_entry_data(ps->key.kind, shstats), - pgstat_get_entry_len(ps->key.kind)); + pgstat_write_chunk(fpout, + pgstat_get_entry_data(ps->key.kind, shstats), + pgstat_get_entry_len(ps->key.kind)); + } dshash_seq_term(&hstat); /* * No more output to be done. Close the temp file and replace the old * pgstat.stat with it. The ferror() check replaces testing for error - * after each individual fputc or fwrite (in write_chunk()) above. + * after each individual fputc or fwrite (in pgstat_write_chunk()) above. */ fputc(PGSTAT_FILE_ENTRY_END, fpout); - if (ferror(fpout)) - { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not write temporary statistics file \"%s\": %m", - tmpfile))); - FreeFile(fpout); - unlink(tmpfile); - } - else if (FreeFile(fpout) < 0) - { - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not close temporary statistics file \"%s\": %m", - tmpfile))); - unlink(tmpfile); - } - else if (durable_rename(tmpfile, statfile, LOG) < 0) + for (int i = 0; i < PGSTAT_KIND_CUSTOM_SIZE + 1; i++) { - /* durable_rename already emitted log message */ - unlink(tmpfile); + for (int j = 0; j < files[i].num_files; j++) + { + const char *tmpfile; + const char *statfile; + FILE *fd = NULL; + + tmpfile = files[i].tmpfiles[j]; + statfile = files[i].statfiles[j]; + fd = files[i].fd[j]; + + if (ferror(fd)) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not write temporary statistics file \"%s\": %m", + tmpfile))); + FreeFile(fd); + unlink(tmpfile); + } + else if (FreeFile(fd) < 0) + { + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not close temporary statistics file \"%s\": %m", + tmpfile))); + unlink(tmpfile); + } + else if (durable_rename(tmpfile, statfile, LOG) < 0) + { + /* durable_rename already emitted log message */ + unlink(tmpfile); + } + } } } -/* helpers for pgstat_read_statsfile() */ -static bool -read_chunk(FILE *fpin, void *ptr, size_t len) -{ - return fread(ptr, 1, len, fpin) == len; -} - -#define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr)) - /* * Reads in existing statistics file into memory. * @@ -1761,36 +1986,21 @@ pgstat_read_statsfile(void) bool found; const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME; PgStat_ShmemControl *shmem = pgStatLocal.shmem; + PgStat_SerializeFiles *files = NULL; /* shouldn't be called from postmaster */ Assert(IsUnderPostmaster || !IsPostmasterEnvironment); - elog(DEBUG2, "reading stats file \"%s\"", statfile); - - /* - * Try to open the stats file. If it doesn't exist, the backends simply - * returns zero for anything and statistics simply starts from scratch - * with empty counters. - * - * ENOENT is a possibility if stats collection was previously disabled or - * has not yet written the stats file for the first time. Any other - * failure condition is suspicious. - */ - if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL) - { - if (errno != ENOENT) - ereport(LOG, - (errcode_for_file_access(), - errmsg("could not open statistics file \"%s\": %m", - statfile))); - pgstat_reset_after_failure(); + files = pgstat_allocate_files(true); + if (files == NULL) return; - } + + fpin = files[PGSTAT_KIND_MIN - 1].fd[0]; /* * Verify it's of the expected format. */ - if (!read_chunk_s(fpin, &format_id)) + if (!pgstat_read_chunk_s(fpin, &format_id)) { elog(WARNING, "could not read format ID"); goto error; @@ -1820,7 +2030,7 @@ pgstat_read_statsfile(void) char *ptr; /* entry for fixed-numbered stats */ - if (!read_chunk_s(fpin, &kind)) + if (!pgstat_read_chunk_s(fpin, &kind)) { elog(WARNING, "could not read stats kind for entry of type %c", t); goto error; @@ -1860,7 +2070,7 @@ pgstat_read_statsfile(void) info->shared_data_off; } - if (!read_chunk(fpin, ptr, info->shared_data_len)) + if (!pgstat_read_chunk(fpin, ptr, info->shared_data_len)) { elog(WARNING, "could not read data of stats kind %u for entry of type %c with size %u", kind, t, info->shared_data_len); @@ -1875,13 +2085,14 @@ pgstat_read_statsfile(void) PgStat_HashKey key; PgStatShared_HashEntry *p; PgStatShared_Common *header; + const PgStat_KindInfo *kind_info = NULL; CHECK_FOR_INTERRUPTS(); if (t == PGSTAT_FILE_ENTRY_HASH) { /* normal stats entry, identified by PgStat_HashKey */ - if (!read_chunk_s(fpin, &key)) + if (!pgstat_read_chunk_s(fpin, &key)) { elog(WARNING, "could not read key for entry of type %c", t); goto error; @@ -1894,8 +2105,8 @@ pgstat_read_statsfile(void) key.objid, t); goto error; } - - if (!pgstat_get_kind_info(key.kind)) + kind_info = pgstat_get_kind_info(key.kind); + if (!kind_info) { elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c", key.kind, key.dboid, @@ -1906,16 +2117,15 @@ pgstat_read_statsfile(void) else { /* stats entry identified by name on disk (e.g. slots) */ - const PgStat_KindInfo *kind_info = NULL; PgStat_Kind kind; NameData name; - if (!read_chunk_s(fpin, &kind)) + if (!pgstat_read_chunk_s(fpin, &kind)) { elog(WARNING, "could not read stats kind for entry of type %c", t); goto error; } - if (!read_chunk_s(fpin, &name)) + if (!pgstat_read_chunk_s(fpin, &name)) { elog(WARNING, "could not read name of stats kind %u for entry of type %c", kind, t); @@ -1990,9 +2200,9 @@ pgstat_read_statsfile(void) key.objid, t); } - if (!read_chunk(fpin, - pgstat_get_entry_data(key.kind, header), - pgstat_get_entry_len(key.kind))) + if (!pgstat_read_chunk(fpin, + pgstat_get_entry_data(key.kind, header), + pgstat_get_entry_len(key.kind))) { elog(WARNING, "could not read data for entry %u/%u/%" PRIu64 " of type %c", key.kind, key.dboid, @@ -2000,6 +2210,25 @@ pgstat_read_statsfile(void) goto error; } + /* + * A plug-in is reading extra data. If reading fails, the + * corresponding file is closed, the error is logged, and + * processing continues. + */ + if (kind_info->from_serialized_extra) + { + int index = (key.kind - PGSTAT_KIND_CUSTOM_MIN) + 1; + + Assert(files[index].fd); + + if (!kind_info->from_serialized_extra(&key, header, fpin, files[index].fd)) + { + elog(WARNING, "could not read extra data for entry %u/%u/%" PRIu64 " of type %c", + key.kind, key.dboid, + key.objid, t); + } + } + break; } case PGSTAT_FILE_ENTRY_END: @@ -2023,10 +2252,7 @@ pgstat_read_statsfile(void) } done: - FreeFile(fpin); - - elog(DEBUG2, "removing permanent stats file \"%s\"", statfile); - unlink(statfile); + pgstat_cleanup_files(files, false); return; diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 4d2b8aa6081..be39c4518f8 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -303,6 +303,16 @@ typedef struct PgStat_KindInfo const PgStatShared_Common *header, NameData *name); bool (*from_serialized_name) (const NameData *name, PgStat_HashKey *key); + /* + * For custom, variable-numbered stats, serialize/deserialize extra data + * per entry. Optional. + */ + bool (*from_serialized_extra) (PgStat_HashKey *key, + const PgStatShared_Common *header, FILE *statfile, FILE **extra_files); + void (*to_serialized_extra) (const PgStat_HashKey *key, + const PgStatShared_Common *header, FILE *statfile, FILE **extra_files); + int num_serialized_extra_files; + /* * For fixed-numbered statistics: Initialize shared memory state. * @@ -984,4 +994,15 @@ pgstat_get_custom_snapshot_data(PgStat_Kind kind) return pgStatLocal.snapshot.custom_data[idx]; } +/* ------------------------------------------------------------ + * reading and writing of on-disk stats file + * ------------------------------------------------------------ + */ + +/* helpers for pgstat_write_statsfile() */ +extern void pgstat_write_chunk(FILE *fpout, void *ptr, size_t len); +extern bool pgstat_read_chunk(FILE *fpin, void *ptr, size_t len); +#define pgstat_read_chunk_s(fpin, ptr) pgstat_read_chunk(fpin, ptr, sizeof(*ptr)) +#define pgstat_write_chunk_s(fpout, ptr) pgstat_write_chunk(fpout, ptr, sizeof(*ptr)) + #endif /* PGSTAT_INTERNAL_H */ -- 2.43.0