From 0a546a538b75ee1a736ff9a09a31412c0b323082 Mon Sep 17 00:00:00 2001 From: Melanie Plageman Date: Sat, 25 Mar 2023 14:14:55 -0400 Subject: [PATCH v8 4/4] Improve autovacuum worker cost balancing Before the prior commit, an autovacuum worker's wi_cost_limit was set only at the beginning of vacuuming a table, after reloading the config file. Therefore, at the time that autovac_balance_cost() is called, workers vacuuming tables with no table options could still have different values for their wi_cost_limit_base and wi_cost_delay. Now that the cost parameters can be updated while vacuuming a table, workers will (within some margin of error) have no reason to have different values for cost limit and cost delay (in the absence of table options). This removes the rationale for keeping cost limit and cost delay in shared memory. Balancing the cost limit requires only the number of active autovacuum workers vacuuming a table with no cost-based table options. --- src/backend/commands/vacuum.c | 18 ++- src/backend/postmaster/autovacuum.c | 237 ++++++++++++---------------- src/include/postmaster/autovacuum.h | 4 +- 3 files changed, 112 insertions(+), 147 deletions(-) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index cb32078c19..54ad76a729 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2257,12 +2257,13 @@ vacuum_delay_point(void) * VacuumCostLimit and VacuumCostDelay in case they were overwritten * by reload. */ - AutoVacuumUpdateCosts(); - AutoVacuumOverrideCosts(); + AutoVacuumUpdateDelay(); + AutoVacuumUpdateLimit(); /* * If configuration changes are allowed to impact VacuumCostInactive, - * make sure it is updated. + * make sure it is updated. Autovacuum workers will have already done + * this in AutoVacuumUpdateDelay() */ if (VacuumCostInactive == VACUUM_COST_INACTIVE_AND_LOCKED) return; @@ -2314,12 +2315,13 @@ vacuum_delay_point(void) VacuumCostBalance = 0; /* - * For autovacuum workers, someone may have called - * autovac_balance_cost() since they last updated their - * VacuumCostLimit above. Do so again now to ensure they have a - * current value. + * Update limit values for autovacuum workers. We must always do this + * in case the autovacuum launcher or another autovacuum worker has + * recalculated the number of workers across which we must balance the + * limit. This is done by the launcher when launching a new worker and + * by workers before vacuuming each table. */ - AutoVacuumOverrideCosts(); + AutoVacuumUpdateLimit(); /* Might have gotten an interrupt while sleeping */ CHECK_FOR_INTERRUPTS(); diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 8ac14a44c8..0c20442fb1 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -139,6 +139,9 @@ int Log_autovacuum_min_duration = 600000; static bool am_autovacuum_launcher = false; static bool am_autovacuum_worker = false; +static double av_table_option_cost_delay = -1; +static int av_table_option_cost_limit = 0; + /* Flags set by signal handlers */ static volatile sig_atomic_t got_SIGUSR2 = false; @@ -189,8 +192,8 @@ typedef struct autovac_table { Oid at_relid; VacuumParams at_params; - double at_vacuum_cost_delay; - int at_vacuum_cost_limit; + double at_table_option_vac_cost_delay; + int at_table_option_vac_cost_limit; bool at_dobalance; bool at_sharedrel; char *at_relname; @@ -209,7 +212,7 @@ typedef struct autovac_table * wi_sharedrel flag indicating whether table is marked relisshared * wi_proc pointer to PGPROC of the running worker, NULL if not started * wi_launchtime Time at which this worker was launched - * wi_cost_* Vacuum cost-based delay parameters current in this worker + * wi_dobalance Whether this worker should be included in balance calculations * * All fields are protected by AutovacuumLock, except for wi_tableoid and * wi_sharedrel which are protected by AutovacuumScheduleLock (note these @@ -225,9 +228,6 @@ typedef struct WorkerInfoData TimestampTz wi_launchtime; bool wi_dobalance; bool wi_sharedrel; - double wi_cost_delay; - int wi_cost_limit; - int wi_cost_limit_base; } WorkerInfoData; typedef struct WorkerInfoData *WorkerInfo; @@ -273,6 +273,8 @@ typedef struct AutoVacuumWorkItem * av_startingWorker pointer to WorkerInfo currently being started (cleared by * the worker itself as soon as it's up and running) * av_workItems work item array + * av_nworkers_for_balance the number of autovacuum workers to use when + * calculating the per worker cost limit * * This struct is protected by AutovacuumLock, except for av_signal and parts * of the worker list (see above). @@ -286,6 +288,7 @@ typedef struct dlist_head av_runningWorkers; WorkerInfo av_startingWorker; AutoVacuumWorkItem av_workItems[NUM_WORKITEMS]; + pg_atomic_uint32 av_nworkers_for_balance; } AutoVacuumShmemStruct; static AutoVacuumShmemStruct *AutoVacuumShmem; @@ -820,7 +823,7 @@ HandleAutoVacLauncherInterrupts(void) AutoVacLauncherShutdown(); /* rebalance in case the default cost parameters changed */ - LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); + LWLockAcquire(AutovacuumLock, LW_SHARED); autovac_balance_cost(); LWLockRelease(AutovacuumLock); @@ -1756,9 +1759,6 @@ FreeWorkerInfo(int code, Datum arg) MyWorkerInfo->wi_proc = NULL; MyWorkerInfo->wi_launchtime = 0; MyWorkerInfo->wi_dobalance = false; - MyWorkerInfo->wi_cost_delay = 0; - MyWorkerInfo->wi_cost_limit = 0; - MyWorkerInfo->wi_cost_limit_base = 0; dlist_push_head(&AutoVacuumShmem->av_freeWorkers, &MyWorkerInfo->wi_links); /* not mine anymore */ @@ -1773,123 +1773,114 @@ FreeWorkerInfo(int code, Datum arg) } } + /* - * Update the cost-based delay parameters, so that multiple workers consume - * each a fraction of the total available I/O. + * Update VacuumCostDelay with the correct value for an autovacuum worker, + * given the value of other relevant cost-based delay parameters. Autovacuum + * workers should call this after every config reload, in case VacuumCostDelay + * was overwritten. */ void -AutoVacuumOverrideCosts(void) +AutoVacuumUpdateDelay(void) { - if (MyWorkerInfo) + if (!am_autovacuum_worker) + return; + + if (av_table_option_cost_delay >= 0) + VacuumCostDelay = av_table_option_cost_delay; + else if (autovacuum_vac_cost_delay >= 0) + VacuumCostDelay = autovacuum_vac_cost_delay; + + /* + * If configuration changes are allowed to impact VacuumCostInactive, make + * sure it is updated. + */ + if (VacuumCostInactive == VACUUM_COST_INACTIVE_AND_LOCKED) + return; + + if (VacuumCostDelay > 0) + VacuumCostInactive = VACUUM_COST_ACTIVE; + else { - VacuumCostDelay = MyWorkerInfo->wi_cost_delay; - VacuumCostLimit = MyWorkerInfo->wi_cost_limit; + VacuumCostInactive = VACUUM_COST_INACTIVE_AND_UNLOCKED; + VacuumCostBalance = 0; } } + /* - * Caller must not already hold the AutovacuumLock + * Update VacuumCostLimit with the correct value for an autovacuum worker, + * given the value of other relevant cost limit parameters and the number of + * workers across which the limit must be balanced. Autovacuum workers must + * call this regularly in case av_nworkers_for_balance has been updated by + * another worker or by the autovacuum launcher. They also must call this after + * every config reload, in case VacuumCostLimit was overwritten. */ void -AutoVacuumUpdateCosts(void) +AutoVacuumUpdateLimit(void) { + if (!am_autovacuum_worker) + return; + /* - * Even though this autovacuum worker may be vacuuming a table with a cost - * limit table option and not a cost delay table option, we still don't - * refresh the cost delay value. + * note: in cost_limit, zero also means use value from elsewhere, because + * zero is not a valid value. */ - if (!MyWorkerInfo || !MyWorkerInfo->wi_dobalance) - return; + if (av_table_option_cost_limit > 0) + VacuumCostLimit = av_table_option_cost_limit; + else + { + /* There is at least 1 autovac worker (this worker). */ + int nworkers_for_balance = Max(pg_atomic_read_u32( + &AutoVacuumShmem->av_nworkers_for_balance), 1); - LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); - MyWorkerInfo->wi_cost_delay = autovacuum_vac_cost_delay >= 0 ? - autovacuum_vac_cost_delay : VacuumCostDelay; - MyWorkerInfo->wi_cost_limit_base = autovacuum_vac_cost_limit > 0 ? + int vac_cost_limit = autovacuum_vac_cost_limit > 0 ? autovacuum_vac_cost_limit : VacuumCostLimit; - autovac_balance_cost(); - LWLockRelease(AutovacuumLock); + + int balanced_cost_limit = vac_cost_limit / nworkers_for_balance; + + VacuumCostLimit = Max(Min(balanced_cost_limit, vac_cost_limit), 1); + } } + /* * autovac_balance_cost - * Recalculate the cost limit setting for each active worker. + * Recalculate the number of workers to consider, given table options and + * the current number of active workers. * - * Caller must hold the AutovacuumLock in exclusive mode. + * Caller must hold the AutovacuumLock in at least shared mode. */ static void autovac_balance_cost(void) { - /* - * The idea here is that we ration out I/O equally. The amount of I/O - * that a worker can consume is determined by cost_limit/cost_delay, so we - * try to equalize those ratios rather than the raw limit settings. - * - * note: in cost_limit, zero also means use value from elsewhere, because - * zero is not a valid value. - */ - int vac_cost_limit = (autovacuum_vac_cost_limit > 0 ? - autovacuum_vac_cost_limit : VacuumCostLimit); - double vac_cost_delay = (autovacuum_vac_cost_delay >= 0 ? - autovacuum_vac_cost_delay : VacuumCostDelay); - double cost_total; - double cost_avail; dlist_iter iter; + int orig_nworkers_for_balance; + int nworkers_for_balance = 0; - /* not set? nothing to do */ - if (vac_cost_limit <= 0 || vac_cost_delay <= 0) + if (autovacuum_vac_cost_delay == 0 || + (autovacuum_vac_cost_delay == -1 && VacuumCostDelay == 0)) return; - /* calculate the total base cost limit of participating active workers */ - cost_total = 0.0; - dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers) - { - WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur); - - if (worker->wi_proc != NULL && - worker->wi_dobalance && - worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0) - cost_total += - (double) worker->wi_cost_limit_base / worker->wi_cost_delay; - } - - /* there are no cost limits -- nothing to do */ - if (cost_total <= 0) + if (autovacuum_vac_cost_limit <= 0 && VacuumCostLimit <= 0) return; - /* - * Adjust cost limit of each active worker to balance the total of cost - * limit to autovacuum_vacuum_cost_limit. - */ - cost_avail = (double) vac_cost_limit / vac_cost_delay; + orig_nworkers_for_balance = + pg_atomic_read_u32(&AutoVacuumShmem->av_nworkers_for_balance); + dlist_foreach(iter, &AutoVacuumShmem->av_runningWorkers) { WorkerInfo worker = dlist_container(WorkerInfoData, wi_links, iter.cur); - if (worker->wi_proc != NULL && - worker->wi_dobalance && - worker->wi_cost_limit_base > 0 && worker->wi_cost_delay > 0) - { - int limit = (int) - (cost_avail * worker->wi_cost_limit_base / cost_total); - - /* - * We put a lower bound of 1 on the cost_limit, to avoid division- - * by-zero in the vacuum code. Also, in case of roundoff trouble - * in these calculations, let's be sure we don't ever set - * cost_limit to more than the base value. - */ - worker->wi_cost_limit = Max(Min(limit, - worker->wi_cost_limit_base), - 1); - } + if (worker->wi_proc == NULL || !worker->wi_dobalance) + continue; - if (worker->wi_proc != NULL) - elog(DEBUG2, "autovac_balance_cost(pid=%d db=%u, rel=%u, dobalance=%s cost_limit=%d, cost_limit_base=%d, cost_delay=%g)", - worker->wi_proc->pid, worker->wi_dboid, worker->wi_tableoid, - worker->wi_dobalance ? "yes" : "no", - worker->wi_cost_limit, worker->wi_cost_limit_base, - worker->wi_cost_delay); + nworkers_for_balance++; } + + if (nworkers_for_balance != orig_nworkers_for_balance) + pg_atomic_write_u32(&AutoVacuumShmem->av_nworkers_for_balance, + nworkers_for_balance); } /* @@ -2335,8 +2326,6 @@ do_autovacuum(void) autovac_table *tab; bool isshared; bool skipit; - double stdVacuumCostDelay; - int stdVacuumCostLimit; dlist_iter iter; CHECK_FOR_INTERRUPTS(); @@ -2442,32 +2431,18 @@ do_autovacuum(void) continue; } - /* - * Remember the prevailing values of the vacuum cost GUCs. We have to - * restore these at the bottom of the loop, else we'll compute wrong - * values in the next iteration of autovac_balance_cost(). - */ - stdVacuumCostDelay = VacuumCostDelay; - stdVacuumCostLimit = VacuumCostLimit; + av_table_option_cost_limit = tab->at_table_option_vac_cost_limit; + av_table_option_cost_delay = tab->at_table_option_vac_cost_delay; /* Must hold AutovacuumLock while mucking with cost balance info */ LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); - - /* advertise my cost delay parameters for the balancing algorithm */ MyWorkerInfo->wi_dobalance = tab->at_dobalance; - MyWorkerInfo->wi_cost_delay = tab->at_vacuum_cost_delay; - MyWorkerInfo->wi_cost_limit = tab->at_vacuum_cost_limit; - MyWorkerInfo->wi_cost_limit_base = tab->at_vacuum_cost_limit; - - /* do a balance */ autovac_balance_cost(); - - /* set the active cost parameters from the result of that */ - AutoVacuumOverrideCosts(); - - /* done */ LWLockRelease(AutovacuumLock); + AutoVacuumUpdateDelay(); + AutoVacuumUpdateLimit(); + /* clean up memory before each iteration */ MemoryContextResetAndDeleteChildren(PortalContext); @@ -2551,19 +2526,15 @@ deleted: /* * Remove my info from shared memory. We could, but intentionally - * don't, clear wi_cost_limit and friends --- this is on the - * assumption that we probably have more to do with similar cost - * settings, so we don't want to give up our share of I/O for a very - * short interval and thereby thrash the global balance. + * don't, set wi_dobalance to false on the assumption that we are more + * likely than not to vacuum a table with no table options next, so we + * don't want to give up our share of I/O for a very short interval + * and thereby thrash the global balance. */ LWLockAcquire(AutovacuumScheduleLock, LW_EXCLUSIVE); MyWorkerInfo->wi_tableoid = InvalidOid; MyWorkerInfo->wi_sharedrel = false; LWLockRelease(AutovacuumScheduleLock); - - /* restore vacuum cost GUCs for the next iteration */ - VacuumCostDelay = stdVacuumCostDelay; - VacuumCostLimit = stdVacuumCostLimit; } /* @@ -2595,6 +2566,8 @@ deleted: { ConfigReloadPending = false; ProcessConfigFile(PGC_SIGHUP); + AutoVacuumUpdateDelay(); + AutoVacuumUpdateLimit(); } LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE); @@ -2827,8 +2800,6 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, int freeze_table_age; int multixact_freeze_min_age; int multixact_freeze_table_age; - int vac_cost_limit; - double vac_cost_delay; int log_min_duration; /* @@ -2838,20 +2809,6 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, * defaults, autovacuum's own first and plain vacuum second. */ - /* -1 in autovac setting means use plain vacuum_cost_delay */ - vac_cost_delay = (avopts && avopts->vacuum_cost_delay >= 0) - ? avopts->vacuum_cost_delay - : (autovacuum_vac_cost_delay >= 0) - ? autovacuum_vac_cost_delay - : VacuumCostDelay; - - /* 0 or -1 in autovac setting means use plain vacuum_cost_limit */ - vac_cost_limit = (avopts && avopts->vacuum_cost_limit > 0) - ? avopts->vacuum_cost_limit - : (autovacuum_vac_cost_limit > 0) - ? autovacuum_vac_cost_limit - : VacuumCostLimit; - /* -1 in autovac setting means use log_autovacuum_min_duration */ log_min_duration = (avopts && avopts->log_min_duration >= 0) ? avopts->log_min_duration @@ -2907,8 +2864,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, tab->at_params.multixact_freeze_table_age = multixact_freeze_table_age; tab->at_params.is_wraparound = wraparound; tab->at_params.log_min_duration = log_min_duration; - tab->at_vacuum_cost_limit = vac_cost_limit; - tab->at_vacuum_cost_delay = vac_cost_delay; + tab->at_table_option_vac_cost_limit = avopts ? + avopts->vacuum_cost_limit : 0; + tab->at_table_option_vac_cost_delay = avopts ? + avopts->vacuum_cost_delay : -1; tab->at_relname = NULL; tab->at_nspname = NULL; tab->at_datname = NULL; @@ -3400,10 +3359,14 @@ AutoVacuumShmemInit(void) worker = (WorkerInfo) ((char *) AutoVacuumShmem + MAXALIGN(sizeof(AutoVacuumShmemStruct))); + /* initialize the WorkerInfo free list */ for (i = 0; i < autovacuum_max_workers; i++) dlist_push_head(&AutoVacuumShmem->av_freeWorkers, &worker[i].wi_links); + + pg_atomic_init_u32(&AutoVacuumShmem->av_nworkers_for_balance, 0); + } else Assert(found); diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index ee48e7123d..7b462866c9 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -64,8 +64,8 @@ extern int StartAutoVacWorker(void); extern void AutoVacWorkerFailed(void); /* autovacuum cost-delay balancer */ -extern void AutoVacuumOverrideCosts(void); -extern void AutoVacuumUpdateCosts(void); +extern void AutoVacuumUpdateDelay(void); +extern void AutoVacuumUpdateLimit(void); #ifdef EXEC_BACKEND extern void AutoVacLauncherMain(int argc, char *argv[]) pg_attribute_noreturn(); -- 2.37.2