From 6c02fc9318ef19c15b00ad5376b7717bfa92725f Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Sun, 29 Mar 2026 11:47:27 -0700 Subject: [PATCH vnocfbot] Make pg_stash_advice dump advice to disk when changed, load after restart --- contrib/pg_stash_advice/pg_stash_advice.c | 356 ++++++++++++++++++++++ doc/src/sgml/pgstashadvice.sgml | 10 +- 2 files changed, 362 insertions(+), 4 deletions(-) diff --git a/contrib/pg_stash_advice/pg_stash_advice.c b/contrib/pg_stash_advice/pg_stash_advice.c index 22122236694..7dc52485617 100644 --- a/contrib/pg_stash_advice/pg_stash_advice.c +++ b/contrib/pg_stash_advice/pg_stash_advice.c @@ -18,6 +18,8 @@ */ #include "postgres.h" +#include + #include "common/hashfn.h" #include "common/string.h" #include "fmgr.h" @@ -26,12 +28,15 @@ #include "nodes/queryjumble.h" #include "pg_plan_advice.h" #include "storage/dsm_registry.h" +#include "storage/fd.h" #include "storage/lwlock.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/tuplestore.h" +#define PGSA_DUMP_FILE "pg_stash_advice.entries" + PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(pg_create_advice_stash); @@ -50,6 +55,7 @@ typedef struct pgsa_shared_state dsa_handle area; dshash_table_handle stash_hash; dshash_table_handle entry_hash; + bool file_loaded; } pgsa_shared_state; typedef struct pgsa_stash @@ -154,6 +160,10 @@ static void pgsa_init_shared_state(void *ptr, void *arg); static uint64 pgsa_lookup_stash_id(char *stash_name); static void pgsa_set_advice_string(char *stash_name, int64 queryId, char *advice_string); +static void pgsa_dump_to_file(void); +static void pgsa_load_from_file(void); +static char *pgsa_escape_string(char *str); +static char *pgsa_unescape_string(char *str); /* * Initialize this module. @@ -201,6 +211,7 @@ pg_create_advice_stash(PG_FUNCTION_ARGS) LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); pgsa_create_stash(stash_name); LWLockRelease(&pgsa_state->lock); + pgsa_dump_to_file(); PG_RETURN_VOID(); } @@ -218,6 +229,7 @@ pg_drop_advice_stash(PG_FUNCTION_ARGS) LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); pgsa_drop_stash(stash_name); LWLockRelease(&pgsa_state->lock); + pgsa_dump_to_file(); PG_RETURN_VOID(); } @@ -441,6 +453,7 @@ pg_set_stashed_advice(PG_FUNCTION_ARGS) pgsa_set_advice_string(stash_name, queryId, advice_string); } + pgsa_dump_to_file(); PG_RETURN_VOID(); } @@ -614,6 +627,21 @@ pgsa_attach(void) /* Restore previous memory context. */ MemoryContextSwitchTo(oldcontext); + + /* + * If the shared state was just created (i.e. after a server restart or + * crash), try to restore stash data from the dump file. Use the + * file_loaded flag under the lock to ensure only one backend does this. + */ + LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); + if (!pgsa_state->file_loaded) + { + pgsa_state->file_loaded = true; + LWLockRelease(&pgsa_state->lock); + pgsa_load_from_file(); + } + else + LWLockRelease(&pgsa_state->lock); } /* @@ -796,6 +824,7 @@ pgsa_init_shared_state(void *ptr, void *arg) state->area = DSA_HANDLE_INVALID; state->stash_hash = DSHASH_HANDLE_INVALID; state->entry_hash = DSHASH_HANDLE_INVALID; + state->file_loaded = false; } /* @@ -898,3 +927,330 @@ pgsa_set_advice_string(char *stash_name, int64 queryId, char *advice_string) dsa_free(pgsa_dsa_area, old_dp); LWLockRelease(&pgsa_state->lock); } + +/* + * Dump all advice stash data to a file. + * + * The file format is a simple text format: + * Line 1: <> + * Next num_stashes lines: one stash name per line + * Remaining lines: stash_name\tquery_id\tadvice_string + * + * Stash names and advice strings are backslash-escaped where needed. + */ +static void +pgsa_dump_to_file(void) +{ + FILE *file; + char transient_dump_file_path[MAXPGPATH]; + dshash_seq_status iter; + pgsa_stash *stash; + pgsa_entry *entry; + pgsa_stash_name_table_hash *nhash; + int num_stashes = 0; + int ret; + + Assert(pgsa_entry_dshash != NULL); + + /* Open a temporary file for writing. */ + snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", PGSA_DUMP_FILE); + file = AllocateFile(transient_dump_file_path, "w"); + if (!file) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", + transient_dump_file_path))); + + /* + * Build an ID->name lookup table. We also use this to count stashes (for + * the header) and to write stash names after the header. + */ + nhash = pgsa_stash_name_table_create(CurrentMemoryContext, 64, NULL); + dshash_seq_init(&iter, pgsa_stash_dshash, true); + while ((stash = dshash_seq_next(&iter)) != NULL) + { + pgsa_stash_name *n; + bool found; + + n = pgsa_stash_name_table_insert(nhash, stash->pgsa_stash_id, &found); + Assert(!found); + n->name = pstrdup(stash->name); + num_stashes++; + } + dshash_seq_term(&iter); + + /* Write the header and stash names. */ + ret = fprintf(file, "<<%d>>\n", num_stashes); + if (ret >= 0) + { + pgsa_stash_name_table_iterator i; + + pgsa_stash_name_table_start_iterate(nhash, &i); + while (ret >= 0) + { + pgsa_stash_name *n = pgsa_stash_name_table_iterate(nhash, &i); + + if (n == NULL) + break; + ret = fprintf(file, "%s\n", pgsa_escape_string(n->name)); + } + } + + /* Write entries: escaped_stash_name\tquery_id\tescaped_advice. */ + if (ret >= 0) + { + dshash_seq_init(&iter, pgsa_entry_dshash, true); + while ((entry = dshash_seq_next(&iter)) != NULL) + { + pgsa_stash_name *n; + char *advice_string; + + if (entry->advice_string == InvalidDsaPointer) + continue; + + n = pgsa_stash_name_table_lookup(nhash, entry->key.pgsa_stash_id); + if (n == NULL) + continue; /* orphan entry, skip */ + + advice_string = dsa_get_address(pgsa_dsa_area, + entry->advice_string); + ret = fprintf(file, "%s\t%" PRId64 "\t%s\n", + pgsa_escape_string(n->name), + entry->key.queryId, + pgsa_escape_string(advice_string)); + if (ret < 0) + break; + } + dshash_seq_term(&iter); + } + + /* Handle any write error. */ + if (ret < 0) + { + int save_errno = errno; + + FreeFile(file); + unlink(transient_dump_file_path); + errno = save_errno; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", + transient_dump_file_path))); + } + + /* Close the file and rename it into place atomically. */ + ret = FreeFile(file); + if (ret != 0) + { + int save_errno = errno; + + unlink(transient_dump_file_path); + errno = save_errno; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", + transient_dump_file_path))); + } + + (void) durable_rename(transient_dump_file_path, PGSA_DUMP_FILE, ERROR); +} + +/* + * Load advice stash data from the dump file. + * + * This is called once when the shared memory state is first initialized + * (i.e. after a server restart or crash recovery), to restore the previously + * saved stash contents. + * + * Errors during loading are reported as warnings so that a corrupt dump file + * does not prevent the server from starting. + */ +static void +pgsa_load_from_file(void) +{ + FILE *file; + int num_stashes; + int num_entries = 0; + int i; + StringInfoData buf; + + file = AllocateFile(PGSA_DUMP_FILE, "r"); + if (!file) + { + if (errno == ENOENT) + return; /* no dump file, nothing to load */ + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not open file \"%s\": %m", PGSA_DUMP_FILE))); + return; + } + + /* Read the header. */ + if (fscanf(file, "<<%d>>\n", &num_stashes) != 1) + { + FreeFile(file); + ereport(WARNING, + errmsg("pg_stash_advice dump file has corrupted header")); + return; + } + + initStringInfo(&buf); + + /* Read and create stash names. */ + for (i = 0; i < num_stashes; i++) + { + if (!pg_get_line_buf(file, &buf)) + { + FreeFile(file); + pfree(buf.data); + ereport(WARNING, + errmsg("pg_stash_advice dump file is truncated at stash %d", + i + 1)); + return; + } + + /* Strip the trailing newline. */ + if (buf.len > 0 && buf.data[buf.len - 1] == '\n') + buf.data[--buf.len] = '\0'; + { + char *name = pgsa_unescape_string(buf.data); + + /* Skip duplicates rather than ERRORing like pgsa_create_stash. */ + if (pgsa_lookup_stash_id(name) == 0) + { + LWLockAcquire(&pgsa_state->lock, LW_EXCLUSIVE); + pgsa_create_stash(name); + LWLockRelease(&pgsa_state->lock); + } + } + } + + /* Read and restore entries until EOF. */ + while (pg_get_line_buf(file, &buf)) + { + char *stash_name; + char *queryid_str; + char *advice_string; + int64 queryId; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + /* Strip the trailing newline. */ + if (buf.len > 0 && buf.data[buf.len - 1] == '\n') + buf.data[--buf.len] = '\0'; + + /* Parse: stash_name\tquery_id\tadvice_string */ + stash_name = buf.data; + queryid_str = strchr(stash_name, '\t'); + if (queryid_str == NULL) + goto malformed; + *queryid_str++ = '\0'; + + advice_string = strchr(queryid_str, '\t'); + if (advice_string == NULL) + goto malformed; + *advice_string++ = '\0'; + + queryId = pg_strtoint64_safe(queryid_str, (Node *) &escontext); + if (SOFT_ERROR_OCCURRED(&escontext)) + goto malformed; + + pgsa_set_advice_string(pgsa_unescape_string(stash_name), + queryId, + pgsa_unescape_string(advice_string)); + num_entries++; + continue; + +malformed: + ereport(WARNING, + errmsg("pg_stash_advice dump file has malformed entry, skipping")); + } + + FreeFile(file); + pfree(buf.data); + + ereport(LOG, + errmsg("pg_stash_advice: loaded %d stashes with %d entries from \"%s\"", + num_stashes, num_entries, PGSA_DUMP_FILE)); +} + +/* + * Backslash-escape the string so it can be written to a tab-separated file. + * + * The escaped characters are backslash, tab, and newline. + */ +static char * +pgsa_escape_string(char *str) +{ + StringInfoData buf; + + if (!strpbrk(str, "\\\t\n")) + return str; + + initStringInfo(&buf); + for (const char *p = str; *p; p++) + { + switch (*p) + { + case '\\': + appendStringInfoString(&buf, "\\\\"); + break; + case '\t': + appendStringInfoString(&buf, "\\t"); + break; + case '\n': + appendStringInfoString(&buf, "\\n"); + break; + case '\r': + appendStringInfoString(&buf, "\\r"); + break; + default: + appendStringInfoChar(&buf, *p); + break; + } + } + + return buf.data; +} + +/* + * Unescape a string that was escaped for serializing to the on-disk file. + */ +static char * +pgsa_unescape_string(char *str) +{ + StringInfoData buf; + + if (!strchr(str, '\\')) + return pstrdup(str); + + initStringInfo(&buf); + for (const char *p = str; *p; p++) + { + if (*p == '\\' && p[1] != '\0') + { + p++; + switch (*p) + { + case '\\': + appendStringInfoChar(&buf, '\\'); + break; + case 't': + appendStringInfoChar(&buf, '\t'); + break; + case 'n': + appendStringInfoChar(&buf, '\n'); + break; + case 'r': + appendStringInfoChar(&buf, '\r'); + break; + default: + /* Unrecognized escape; keep as-is. */ + appendStringInfoChar(&buf, *p); + break; + } + } + else + appendStringInfoChar(&buf, *p); + } + return buf.data; +} diff --git a/doc/src/sgml/pgstashadvice.sgml b/doc/src/sgml/pgstashadvice.sgml index 089fc66446f..ce2c8ec3ab9 100644 --- a/doc/src/sgml/pgstashadvice.sgml +++ b/doc/src/sgml/pgstashadvice.sgml @@ -15,10 +15,12 @@ query identifiers to plan advice strings. Whenever a session is asked to plan a query whose query ID appears in the relevant advice stash, the plan advice string is automatically applied - to guide planning. Note that advice stashes exist purely in memory. This - means both that it is important to be mindful of memory consumption when - deciding how much plan advice to stash, and also that advice stashes must - be recreated and repopulated whenever the server is restarted. + to guide planning. Advice stashes are held in memory, so it is important + to be mindful of memory consumption when deciding how much plan advice to + stash. The contents are automatically saved to a file called + pg_stash_advice.entries whenever they are modified, + and restored when the first session attaches after a server restart + (including after a crash). -- 2.47.1