From b739ca004ccdede260c541ba8c8dc1aa4e3ea15d Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sun, 29 Mar 2026 11:47:27 -0700 Subject: [PATCH vnocfbot-2] Make pg_stash_advice dump advice to disk when changed, load after restart --- contrib/pg_stash_advice/pg_stash_advice.c | 533 +++++++++++++++++++++- doc/src/sgml/pgstashadvice.sgml | 31 +- 2 files changed, 559 insertions(+), 5 deletions(-) diff --git a/contrib/pg_stash_advice/pg_stash_advice.c b/contrib/pg_stash_advice/pg_stash_advice.c index 22122236694..61fb12a1888 100644 --- a/contrib/pg_stash_advice/pg_stash_advice.c +++ b/contrib/pg_stash_advice/pg_stash_advice.c @@ -18,22 +18,37 @@ */ #include "postgres.h" +#include + #include "common/hashfn.h" #include "common/string.h" #include "fmgr.h" #include "funcapi.h" +#include "miscadmin.h" #include "lib/dshash.h" #include "nodes/queryjumble.h" #include "pg_plan_advice.h" +#include "postmaster/bgworker.h" +#include "postmaster/interrupt.h" #include "storage/dsm_registry.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/latch.h" #include "storage/lwlock.h" +#include "storage/proc.h" +#include "storage/procsignal.h" +#include "utils/backend_status.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/tuplestore.h" +#define PGSA_DUMP_FILE "pg_stash_advice.tsv" + PG_MODULE_MAGIC; +PGDLLEXPORT void pgsa_stash_advice_main(Datum main_arg); + PG_FUNCTION_INFO_V1(pg_create_advice_stash); PG_FUNCTION_INFO_V1(pg_drop_advice_stash); PG_FUNCTION_INFO_V1(pg_get_advice_stash_contents); @@ -50,6 +65,8 @@ typedef struct pgsa_shared_state dsa_handle area; dshash_table_handle stash_hash; dshash_table_handle entry_hash; + ProcNumber bgworker_proc; + bool dump_requested; } pgsa_shared_state; typedef struct pgsa_stash @@ -131,8 +148,9 @@ static dshash_parameters pgsa_entry_dshash_parameters = { LWTRANCHE_INVALID /* gets set at runtime */ }; -/* GUC variable */ +/* GUC variables */ static char *pg_stash_advice_stash_name = ""; +static bool pg_stash_advice_save = true; /* Other global variables */ static MemoryContext pg_stash_advice_mcxt; @@ -154,6 +172,13 @@ static void pgsa_init_shared_state(void *ptr, void *arg); static uint64 pgsa_lookup_stash_id(char *stash_name); static void pgsa_set_advice_string(char *stash_name, int64 queryId, char *advice_string); +static void pgsa_dump_to_file(void); +static void pgsa_load_from_file(void); +static void pgsa_request_dump(void); +static void pgsa_detach_shmem(int code, Datum arg); +static void pgsa_start_bgworker(void); +static char *pgsa_escape_string(char *str); +static char *pgsa_read_next_field(char **pp); /* * Initialize this module. @@ -178,6 +203,28 @@ _PG_init(void) NULL, NULL); + if (process_shared_preload_libraries_in_progress) + { + /* can't define PGC_POSTMASTER variable after startup */ + DefineCustomBoolVariable("pg_stash_advice.save", + "Save and restore advice stash contents across restarts.", + NULL, + &pg_stash_advice_save, + true, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL); + + /* + * Register background worker for dumping entries to recover on + * restart, if enabled. + */ + if (pg_stash_advice_save) + pgsa_start_bgworker(); + } + MarkGUCPrefixReserved("pg_stash_advice"); /* Tell pg_plan_advice that we want to provide advice strings. */ @@ -201,6 +248,7 @@ pg_create_advice_stash(PG_FUNCTION_ARGS) LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); pgsa_create_stash(stash_name); LWLockRelease(&pgsa_state->lock); + pgsa_request_dump(); PG_RETURN_VOID(); } @@ -218,6 +266,7 @@ pg_drop_advice_stash(PG_FUNCTION_ARGS) LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); pgsa_drop_stash(stash_name); LWLockRelease(&pgsa_state->lock); + pgsa_request_dump(); PG_RETURN_VOID(); } @@ -441,6 +490,7 @@ pg_set_stashed_advice(PG_FUNCTION_ARGS) pgsa_set_advice_string(stash_name, queryId, advice_string); } + pgsa_request_dump(); PG_RETURN_VOID(); } @@ -796,6 +846,8 @@ pgsa_init_shared_state(void *ptr, void *arg) state->area = DSA_HANDLE_INVALID; state->stash_hash = DSHASH_HANDLE_INVALID; state->entry_hash = DSHASH_HANDLE_INVALID; + state->bgworker_proc = INVALID_PROC_NUMBER; + state->dump_requested = false; } /* @@ -898,3 +950,482 @@ pgsa_set_advice_string(char *stash_name, int64 queryId, char *advice_string) dsa_free(pgsa_dsa_area, old_dp); LWLockRelease(&pgsa_state->lock); } + +/* + * Background worker entry point. + * + * This worker loads the dump file on startup, then waits for dump requests + * from backends. On shutdown, it performs a final dump. + */ +void +pgsa_stash_advice_main(Datum main_arg) +{ + /* Establish signal handlers; once that's done, unblock signals. */ + pqsignal(SIGTERM, SignalHandlerForShutdownRequest); + pqsignal(SIGHUP, SignalHandlerForConfigReload); + pqsignal(SIGUSR1, procsignal_sigusr1_handler); + BackgroundWorkerUnblockSignals(); + + /* Set up a session user so pgstat_bestart_final() can report it. */ + InitializeSessionUserIdStandalone(); + + /* Report this worker in pg_stat_activity. */ + pgstat_beinit(); + pgstat_bestart_initial(); + pgstat_bestart_final(); + + /* Attach to shared memory structures. */ + pgsa_attach(); + + /* + * Set on-detach hook so that our PID will be cleared on exit. + * + * NB: pg_stash_advice's state is stored in a DSM segment, and DSM + * segments are detached before calling the on_shmem_exit callbacks, so we + * must put pgsa_detach_shmem in the before_shmem_exit callback list. + */ + before_shmem_exit(pgsa_detach_shmem, 0); + + /* + * Store our PID in shared memory, unless there's already another worker + * running. + */ + LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); + if (pgsa_state->bgworker_proc != INVALID_PROC_NUMBER) + { + LWLockRelease(&pgsa_state->lock); + ereport(LOG, + (errmsg("pg_stash_advice worker is already running under PID %d", + (int) GetPGProcByNumber(pgsa_state->bgworker_proc)->pid))); + return; + } + pgsa_state->bgworker_proc = MyProcNumber; + LWLockRelease(&pgsa_state->lock); + + /* Load previously saved stash data from disk. */ + pgsa_load_from_file(); + + /* Dump when requested, until shutdown. */ + while (!ShutdownRequestPending) + { + bool dump_requested = false; + + /* In case of a SIGHUP, just reload the configuration. */ + if (ConfigReloadPending) + { + ConfigReloadPending = false; + ProcessConfigFile(PGC_SIGHUP); + } + + /* Check whether a dump has been requested. */ + LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); + if (pgsa_state->dump_requested) + { + pgsa_state->dump_requested = false; + dump_requested = true; + } + LWLockRelease(&pgsa_state->lock); + + if (dump_requested) + pgsa_dump_to_file(); + + /* + * Sleep for up to 60 seconds before checking again. This ensures we + * coalesce multiple rapid changes into a single dump. + */ + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + 60000L, + PG_WAIT_EXTENSION); + + ResetLatch(MyLatch); + } + + /* Perform a final dump before exiting. */ + pgsa_dump_to_file(); +} + +/* + * Signal the background worker to dump stash data to disk. + */ +static void +pgsa_request_dump(void) +{ + LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); + pgsa_state->dump_requested = true; + LWLockRelease(&pgsa_state->lock); +} + +/* + * Clear our PID from shared memory on exit. + */ +static void +pgsa_detach_shmem(int code, Datum arg) +{ + LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); + if (pgsa_state->bgworker_proc == MyProcNumber) + pgsa_state->bgworker_proc = INVALID_PROC_NUMBER; + LWLockRelease(&pgsa_state->lock); +} + +/* + * Register the background worker. + */ +static void +pgsa_start_bgworker(void) +{ + BackgroundWorker worker = {0}; + + worker.bgw_flags = BGWORKER_SHMEM_ACCESS; + worker.bgw_start_time = BgWorkerStart_ConsistentState; + strcpy(worker.bgw_library_name, "pg_stash_advice"); + strcpy(worker.bgw_function_name, "pgsa_stash_advice_main"); + strcpy(worker.bgw_name, "pg_stash_advice worker"); + strcpy(worker.bgw_type, "pg_stash_advice worker"); + + RegisterBackgroundWorker(&worker); +} + +/* + * Dump all advice stash data to a file. + * + * The file format is a simple TSV with a line-type prefix: + * stash\tstash_name + * entry\tstash_name\tquery_id\tadvice_string + * + * Stash names and advice strings are backslash-escaped where needed. + */ +static void +pgsa_dump_to_file(void) +{ + FILE *file; + char transient_dump_file_path[MAXPGPATH]; + dshash_seq_status iter; + pgsa_stash *stash; + pgsa_entry *entry; + pgsa_stash_name_table_hash *nhash; + int ret = 0; + MemoryContext tmpcxt; + MemoryContext oldcxt; + + Assert(pgsa_entry_dshash != NULL); + + /* Use a temporary context so all allocations are freed at the end. */ + tmpcxt = AllocSetContextCreate(CurrentMemoryContext, + "pg_stash_advice dump", + ALLOCSET_DEFAULT_SIZES); + oldcxt = MemoryContextSwitchTo(tmpcxt); + + /* Open a temporary file for writing. */ + snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", PGSA_DUMP_FILE); + file = AllocateFile(transient_dump_file_path, "w"); + if (!file) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + transient_dump_file_path))); + + /* Build an ID->name lookup table for writing entry lines. */ + nhash = pgsa_stash_name_table_create(tmpcxt, 64, NULL); + + /* Write stash lines. */ + dshash_seq_init(&iter, pgsa_stash_dshash, true); + while ((stash = dshash_seq_next(&iter)) != NULL) + { + pgsa_stash_name *n; + bool found; + + n = pgsa_stash_name_table_insert(nhash, stash->pgsa_stash_id, &found); + Assert(!found); + n->name = pstrdup(stash->name); + ret = fprintf(file, "stash\t%s\n", pgsa_escape_string(n->name)); + if (ret < 0) + break; + } + dshash_seq_term(&iter); + + /* Write entry lines. */ + if (ret >= 0) + { + dshash_seq_init(&iter, pgsa_entry_dshash, true); + while ((entry = dshash_seq_next(&iter)) != NULL) + { + pgsa_stash_name *n; + char *advice_string; + + if (entry->advice_string == InvalidDsaPointer) + continue; + + n = pgsa_stash_name_table_lookup(nhash, entry->key.pgsa_stash_id); + if (n == NULL) + continue; /* orphan entry, skip */ + + advice_string = dsa_get_address(pgsa_dsa_area, + entry->advice_string); + ret = fprintf(file, "entry\t%s\t%" PRId64 "\t%s\n", + pgsa_escape_string(n->name), + entry->key.queryId, + pgsa_escape_string(advice_string)); + if (ret < 0) + break; + } + dshash_seq_term(&iter); + } + + /* Handle any write error. */ + if (ret < 0) + { + int save_errno = errno; + + FreeFile(file); + unlink(transient_dump_file_path); + errno = save_errno; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", + transient_dump_file_path))); + } + + /* Close the file and rename it into place atomically. */ + ret = FreeFile(file); + if (ret != 0) + { + int save_errno = errno; + + unlink(transient_dump_file_path); + errno = save_errno; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + transient_dump_file_path))); + } + + (void) durable_rename(transient_dump_file_path, PGSA_DUMP_FILE, ERROR); + + MemoryContextSwitchTo(oldcxt); + MemoryContextDelete(tmpcxt); +} + +/* + * Load advice stash data from the dump file. + * + * This is called once when the shared memory state is first initialized + * (i.e. after a server restart or crash recovery), to restore the previously + * saved stash contents. + * + * Errors during loading are reported as warnings so that a corrupt dump file + * does not prevent the server from starting. + */ +static void +pgsa_load_from_file(void) +{ + FILE *file; + int num_stashes = 0; + int num_entries = 0; + int num_malformed = 0; + char *line; + + file = AllocateFile(PGSA_DUMP_FILE, "r"); + if (!file) + { + if (errno == ENOENT) + return; /* no dump file, nothing to load */ + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", PGSA_DUMP_FILE))); + return; + } + + /* Read lines until EOF. */ + while ((line = pg_get_line(file, NULL)) != NULL) + { + char *p = line; + char *line_type; + + /* Strip the trailing newline. */ + pg_strip_crlf(line); + + /* Split off the line type prefix (unescaped, plain keyword). */ + line_type = pgsa_read_next_field(&p); + if (line_type == NULL) + { + num_malformed++; + pfree(line); + continue; + } + + if (strcmp(line_type, "stash") == 0) + { + char *name = pgsa_read_next_field(&p); + + if (name != NULL) + { + /* + * Skip duplicates rather than ERRORing like + * pgsa_create_stash. + */ + if (pgsa_lookup_stash_id(name) == 0) + { + LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); + pgsa_create_stash(name); + LWLockRelease(&pgsa_state->lock); + } + num_stashes++; + pfree(name); + } + else + num_malformed++; + } + else if (strcmp(line_type, "entry") == 0) + { + char *stash_name; + char *queryid_str; + char *advice_string; + int64 queryId; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + stash_name = pgsa_read_next_field(&p); + queryid_str = pgsa_read_next_field(&p); + advice_string = pgsa_read_next_field(&p); + + if (stash_name == NULL || queryid_str == NULL || + advice_string == NULL) + { + num_malformed++; + if (stash_name) + pfree(stash_name); + if (queryid_str) + pfree(queryid_str); + if (advice_string) + pfree(advice_string); + pfree(line_type); + pfree(line); + continue; + } + + queryId = pg_strtoint64_safe(queryid_str, (Node *) &escontext); + if (!SOFT_ERROR_OCCURRED(&escontext)) + { + pgsa_set_advice_string(stash_name, queryId, advice_string); + num_entries++; + } + else + num_malformed++; + + pfree(stash_name); + pfree(queryid_str); + pfree(advice_string); + } + else + { + num_malformed++; + } + pfree(line_type); + pfree(line); + } + + FreeFile(file); + + if (num_malformed > 0) + ereport(WARNING, + errmsg("skipped %d malformed advice lines on load", + num_malformed)); + + ereport(LOG, + errmsg("loaded %d advice stashes with %d entries", + num_stashes, num_entries)); +} + +/* + * Backslash-escape the string so it can be written to a tab-separated file. + * + * The escaped characters are backslash, tab, and newline. + */ +static char * +pgsa_escape_string(char *str) +{ + StringInfoData buf; + + if (!strpbrk(str, "\\\t\n")) + return str; + + initStringInfo(&buf); + for (const char *p = str; *p; p++) + { + switch (*p) + { + case '\\': + appendStringInfoString(&buf, "\\\\"); + break; + case '\t': + appendStringInfoString(&buf, "\\t"); + break; + case '\n': + appendStringInfoString(&buf, "\\n"); + break; + case '\r': + appendStringInfoString(&buf, "\\r"); + break; + default: + appendStringInfoChar(&buf, *p); + break; + } + } + + return buf.data; +} + +/* + * Read the next tab-delimited field from *pp, unescaping backslash sequences + * as we go. Advances *pp past the tab delimiter (or to end of string). + * + * Returns a palloc'd string with the unescaped field value, or NULL if there + * are no more fields (i.e. *pp already points to '\0'). + */ +static char * +pgsa_read_next_field(char **pp) +{ + StringInfoData buf; + const char *p = *pp; + + if (*p == '\0') + return NULL; + + initStringInfo(&buf); + while (*p != '\0' && *p != '\t') + { + if (*p == '\\' && p[1] != '\0') + { + p++; + switch (*p) + { + case '\\': + appendStringInfoChar(&buf, '\\'); + break; + case 't': + appendStringInfoChar(&buf, '\t'); + break; + case 'n': + appendStringInfoChar(&buf, '\n'); + break; + case 'r': + appendStringInfoChar(&buf, '\r'); + break; + default: + /* Unrecognized escape; keep as-is. */ + appendStringInfoChar(&buf, *p); + break; + } + } + else + appendStringInfoChar(&buf, *p); + p++; + } + + /* Skip the tab delimiter if present. */ + if (*p == '\t') + p++; + + *pp = (char *) p; + return buf.data; +} diff --git a/doc/src/sgml/pgstashadvice.sgml b/doc/src/sgml/pgstashadvice.sgml index 089fc66446f..937d31e557b 100644 --- a/doc/src/sgml/pgstashadvice.sgml +++ b/doc/src/sgml/pgstashadvice.sgml @@ -15,10 +15,12 @@ query identifiers to plan advice strings. Whenever a session is asked to plan a query whose query ID appears in the relevant advice stash, the plan advice string is automatically applied - to guide planning. Note that advice stashes exist purely in memory. This - means both that it is important to be mindful of memory consumption when - deciding how much plan advice to stash, and also that advice stashes must - be recreated and repopulated whenever the server is restarted. + to guide planning. Advice stashes are held in memory, so it is important + to be mindful of memory consumption when deciding how much plan advice to + stash. The contents are automatically saved to a file called + pg_stash_advice.tsv whenever they are modified, + and restored when the first session attaches after a server restart + (including after a crash). @@ -203,6 +205,27 @@ + + + pg_stash_advice.save (boolean) + + pg_stash_advice.save configuration parameter + + + + + + Specifies whether to save advice stash contents to disk so that they + can be restored after a server restart (including after a crash). + When enabled, a background worker checks every 60 seconds for changes + and writes stash contents to a file called + pg_stash_advice.tsv in the data directory. + The default value is on. This parameter can only + be set at server start. + + + + -- 2.47.1