From 5bb567474f6a3ea8f728d45a1bb7a610299cf20b Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Fri, 8 Dec 2023 16:32:46 -0500 Subject: [PATCH v2 07/10] Opportunistically freeze pages unlikely to be modified While vacuuming, we will opportunistically freeze a page if its age means it is unlikely to be modified in the future before target_freeze_duration. Each time an all-visible page is modified, i.e. its all-visible bit is unset, and that modification is considered "early", the duration (in LSNs) that that page spent all-visible is entered into the early unsets accumulator. At the beginning of vacuuming a relation, we will extract the mean and standard deviation from this accumulated data. We then calculate the youngest a page can be and still have a 95% likelihood of remaining unmodified for target_freeze_duration. Pages older than this are frozen by vacuum. This commit includes a guc, opp_freeze_algo, which is for development only and lets us compare different heuristics. --- src/backend/access/heap/vacuumlazy.c | 58 ++++++++++++++-- src/backend/utils/activity/pgstat_relation.c | 68 +++++++++++++++++-- src/backend/utils/init/globals.c | 1 + src/backend/utils/misc/guc_tables.c | 10 +++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/miscadmin.h | 3 + src/include/pgstat.h | 2 +- 7 files changed, 134 insertions(+), 9 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index c788e7e5ca..6bd64b1599 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -210,6 +210,14 @@ typedef struct LVRelState int64 live_tuples; /* # live tuples remaining */ int64 recently_dead_tuples; /* # dead, but not yet removable */ int64 missed_dead_tuples; /* # removable, but not removed */ + + /* + * The youngest page we predict will stay unmodified for + * target_freeze_duration. We will not opportunistically freeze pages + * younger than this threshold. This is calculated at the beginning of + * vacuuming a relation. + */ + XLogRecPtr frz_threshold_min; } LVRelState; /* @@ -250,6 +258,7 @@ static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, bool sharelock, Buffer vmbuffer); static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, + XLogRecPtr page_lsn, LVPagePruneState *prunestate); static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, @@ -287,6 +296,9 @@ static void update_vacuum_error_info(LVRelState *vacrel, static void restore_vacuum_error_info(LVRelState *vacrel, const LVSavedErrInfo *saved_vacrel); +static bool vacuum_opp_freeze(LVRelState *vacrel, XLogRecPtr page_lsn, + bool all_visible_all_frozen, + bool prune_emitted_fpi); /* * heap_vacuum_rel() -- perform VACUUM for one heap relation @@ -489,7 +501,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->relname))); } - pgstat_refresh_frz_dur(RelationGetRelid(rel), rel->rd_rel->relisshared); + vacrel->frz_threshold_min = pgstat_refresh_frz_stats(RelationGetRelid(rel), + rel->rd_rel->relisshared); /* * Allocate dead_items array memory using dead_items_alloc. This handles @@ -855,6 +868,7 @@ lazy_scan_heap(LVRelState *vacrel) { Buffer buf; Page page; + XLogRecPtr page_lsn = InvalidXLogRecPtr; bool all_visible_according_to_vm; LVPagePruneState prunestate; @@ -959,6 +973,7 @@ lazy_scan_heap(LVRelState *vacrel) buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, vacrel->bstrategy); page = BufferGetPage(buf); + page_lsn = PageGetLSN(page); if (!ConditionalLockBufferForCleanup(buf)) { bool hastup, @@ -1021,7 +1036,7 @@ lazy_scan_heap(LVRelState *vacrel) * were pruned some time earlier. Also considers freezing XIDs in the * tuple headers of remaining items with storage. */ - lazy_scan_prune(vacrel, buf, blkno, page, &prunestate); + lazy_scan_prune(vacrel, buf, blkno, page, page_lsn, &prunestate); Assert(!prunestate.all_visible || !prunestate.has_lpdead_items); @@ -1545,6 +1560,7 @@ lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, + XLogRecPtr page_lsn, LVPagePruneState *prunestate) { Relation rel = vacrel->rel; @@ -1786,8 +1802,9 @@ lazy_scan_prune(LVRelState *vacrel, * page all-frozen afterwards (might not happen until final heap pass). */ if (pagefrz.freeze_required || tuples_frozen == 0 || - (prunestate->all_visible && prunestate->all_frozen && - fpi_before != pgWalUsage.wal_fpi)) + vacuum_opp_freeze(vacrel, page_lsn, + prunestate->all_visible && prunestate->all_frozen, + fpi_before != pgWalUsage.wal_fpi)) { /* * We're freezing the page. Our final NewRelfrozenXid doesn't need to @@ -3509,3 +3526,36 @@ restore_vacuum_error_info(LVRelState *vacrel, vacrel->offnum = saved_vacrel->offnum; vacrel->phase = saved_vacrel->phase; } + +/* + * Determine whether or not vacuum should opportunistically freeze a page. + * Given freeze statistics about the relation contained in LVRelState, whether + * or not the page will be able to be marked all visible and all frozen, and + * whether or not pruning emitted an FPI, return whether or not the page should + * be frozen. The LVRelState should not be modified. + */ +static bool +vacuum_opp_freeze(LVRelState *vacrel, XLogRecPtr page_lsn, + bool all_visible_all_frozen, + bool prune_emitted_fpi) +{ + int64 page_age; + + if (!all_visible_all_frozen) + return false; + + page_age = GetInsertRecPtr() - page_lsn; + page_age = Max(page_age, 0); + + if (opp_freeze_algo == 0) + return prune_emitted_fpi; + + if (opp_freeze_algo == 4) + { + if (vacrel->frz_threshold_min == InvalidXLogRecPtr) + return true; + return page_age > vacrel->frz_threshold_min; + } + + return false; +} diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index ae8d200c1e..52cd37f91d 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -44,6 +44,16 @@ typedef struct TwoPhasePgStatRecord bool truncdropped; /* was the relation truncated/dropped? */ } TwoPhasePgStatRecord; +/* + * The Z-score used to calculate the freeze threshold from the distribution of + * early unsets. See: + * + * https://en.wikipedia.org/wiki/Standard_normal_table#Cumulative_(less_than_Z) + * + * This Z-score has a cumulative probability (from negative infinity) of + * approximately 0.94950, or 94.950%. + */ +static const double FRZ_THRESHOLD_ZSCORE = 1.64; static PgStat_TableStatus *pgstat_prep_relation_pending(Oid rel_id, bool isshared); static void add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level); @@ -215,8 +225,8 @@ pgstat_drop_relation(Relation rel) * want to refresh this translated value periodically. Doing so at the start of * each table vacuum is convenient. */ -void -pgstat_refresh_frz_dur(Oid tableoid, bool shared) +XLogRecPtr +pgstat_refresh_frz_stats(Oid tableoid, bool shared) { PgStat_EntryRef *entry_ref; PgStat_StatTabEntry *tabentry; @@ -226,10 +236,14 @@ pgstat_refresh_frz_dur(Oid tableoid, bool shared) TimestampTz target_time; XLogRecPtr target_lsn; uint64 target_dur_usecs; + double mean; + double stddev; + double n; Oid dboid = (shared ? InvalidOid : MyDatabaseId); + XLogRecPtr frz_threshold_min; if (!pgstat_track_counts) - return; + return InvalidXLogRecPtr; target_dur_usecs = target_freeze_duration * USECS_PER_SEC; @@ -281,6 +295,52 @@ pgstat_refresh_frz_dur(Oid tableoid, bool shared) */ tabentry->target_frz_dur_lsns = cur_lsn - target_lsn; + /* + * Calculate the mean and standard deviation of the distribution of early + * unsets. + * + * Each time an all-visible page is modified, i.e. its all-visible bit is + * unset, and that modification is considered "early", the duration (in + * LSNs) that that page spent all-visible is entered into the early unsets + * accumulator. Here, the data collected in that accumulator is extracted + * into the parameters of a normal distribution (mean and standard + * deviation). + */ + accumulator_calculate(&tabentry->vm_unset.early_unsets, &mean, &stddev); + + /* + * Calculate the age of the youngest page that should be opportunistically + * frozen. + * + * We'll opportunistically freeze a page if the probability that it will + * be early unset is less than approximately 5%. This threshold occurs + * when the cumulative distribution function of the early unsets + * distribution exceeds 95%. We assume that if a page has survived past + * the age when 95% of early unsets have occurred, then it's safe to + * freeze. + * + * If we couldn't produce a distribution from the accumulator, or the + * standard deviation of that distribution is infinite, then err on the + * side of freezing everything. + */ + n = mean + FRZ_THRESHOLD_ZSCORE * stddev; + if (isnan(n) || isinf(n)) + frz_threshold_min = InvalidXLogRecPtr; + else + frz_threshold_min = n; + + /* + * If the number of entries in the accumulator is small, then the mean and + * standard deviation extracted from it may be unreliable. We can probably + * devise a way to represent low confidence using a modifier. For example, + * we could skew the mean and standard deviation to favor more freezing + * (perhaps using standard error). The internet says that a sample size >= + * 30ish is required for the central limit theorem to hold. So, before we + * have 30 unsets, just freeze everything on the given vacuum. + */ + if (tabentry->vm_unset.early_unsets.n < 30) + frz_threshold_min = InvalidXLogRecPtr; + pgstat_unlock_entry(entry_ref); /* @@ -290,7 +350,7 @@ pgstat_refresh_frz_dur(Oid tableoid, bool shared) * do this? */ - return; + return frz_threshold_min; } diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 89bad73720..0f5e5077da 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -149,6 +149,7 @@ int VacuumCostPageMiss = 2; int VacuumCostPageDirty = 20; int VacuumCostLimit = 200; double VacuumCostDelay = 0; +int opp_freeze_algo = 0; int target_freeze_duration = 1; int64 VacuumPageHit = 0; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 4cc1970bec..25ab5b3f7a 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -2465,6 +2465,16 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"opp_freeze_algo", PGC_USERSET, AUTOVACUUM, + gettext_noop("algorithm used to determine whether or not to freeze a page during vacuum"), + NULL + }, + &opp_freeze_algo, + 0, 0, 10000, + NULL, NULL, NULL + }, + { {"target_freeze_duration", PGC_USERSET, AUTOVACUUM, gettext_noop("minimum amount of time in seconds that a page should stay frozen."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 1a9adcc8f1..26d6acb941 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -664,6 +664,7 @@ #autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for # autovacuum, -1 means use # vacuum_cost_limit +#opp_freeze_algo = 0 # default opp_freeze_algo is 0 which means master #target_freeze_duration = 1 # desired time for page to stay frozen in seconds diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 70bad41505..6391170156 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -279,6 +279,9 @@ extern PGDLLIMPORT int VacuumCostPageMiss; extern PGDLLIMPORT int VacuumCostPageDirty; extern PGDLLIMPORT int VacuumCostLimit; extern PGDLLIMPORT double VacuumCostDelay; + +/* opp_freeze_algo is only used for development */ +extern PGDLLIMPORT int opp_freeze_algo; extern PGDLLIMPORT int target_freeze_duration; extern PGDLLIMPORT int64 VacuumPageHit; diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 5434d181fd..034a596f99 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -742,7 +742,7 @@ extern void pgstat_report_analyze(Relation rel, PgStat_Counter livetuples, PgStat_Counter deadtuples, bool resetcounter); -extern void pgstat_refresh_frz_dur(Oid tableoid, bool shared); +extern XLogRecPtr pgstat_refresh_frz_stats(Oid tableoid, bool shared); extern void pgstat_count_vm_unset(Relation relation, XLogRecPtr page_lsn, XLogRecPtr current_lsn, uint8 old_vmbits); -- 2.37.2