diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 0f2f2bf..d11a093 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1354,6 +1354,67 @@ include 'filename' + + WAL Rate Limiting + + + During the execution of VACUUM FULL, + CLUSTER, ALTER TABLE, + CREATE INDEX commands, the system maintains an + internal counter that keeps track of the volume of WAL written. + When the accumulated WAL volume reaches a multiple of 64kB + the process performing + the operation will sleep for a short period of time, as specified by + wal_rate_limit_delay. Then it will reset the + counter and continue execution. + + + + The intent of this feature is to allow administrators to reduce + the impact of these commands on concurrent database activity + and replication. There are many situations where it is not + important that maintenance commands finish quickly; + however, it is usually very important that these + commands do not significantly interfere with the ability of the + system to perform other database operations. WAL Rate Limiting + provides a way for administrators to achieve this. + + + + This feature is disabled by default. To enable it, set the + wal_rate_limit_delay variable to a nonzero + value, when wal_level is set to + archive or higher. + + + + + wal_rate_limit_delay (integer) + + wal_rate_limit_delay configuration parameter + + + + The length of time, in milliseconds, that the process will sleep + when the cost limit has been exceeded. + The default value is zero, which disables the WAL Rate Limiting + feature. Positive values enable WAL Rate Limiting. + Note that on many systems, the effective resolution + of sleep delays is 10 milliseconds; setting + wal_rate_limit_delay to a value that is + not a multiple of 10 might have the same results as setting it + to the next higher multiple of 10. + + + + When using WAL Rate Limiting, appropriate values for + wal_rate_limit_delay are usually quite small, perhaps + 10 or 20 milliseconds. + + + + + Cost-based Vacuum Delay diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 5e6f627..61e02db 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -279,6 +279,8 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values, { /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); + if (RelationNeedsWAL(index)) + wal_rate_limit_delay_point(); ginEntryInsert(&buildstate->ginstate, attnum, key, category, list, nlist, &buildstate->buildStats); } diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index 1832687..a1e4454 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -563,6 +563,9 @@ gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, CHECK_FOR_INTERRUPTS(); + if (RelationNeedsWAL(indexrel)) + wal_rate_limit_delay_point(); + /* * Loop until we reach a leaf page (level == 0) or a level with buffers * (not including the level we start at, because we would otherwise make diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 504eb71..47a425b 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -460,6 +460,9 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) */ CHECK_FOR_INTERRUPTS(); + if (wstate->btws_use_wal) + wal_rate_limit_delay_point(); + npage = state->btps_page; nblkno = state->btps_blkno; last_off = state->btps_lastoff; diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c index 2f6a878..0e71782 100644 --- a/src/backend/access/spgist/spgdoinsert.c +++ b/src/backend/access/spgist/spgdoinsert.c @@ -1916,6 +1916,9 @@ spgdoinsert(Relation index, SpGistState *state, */ CHECK_FOR_INTERRUPTS(); + if (RelationNeedsWAL(index)) + wal_rate_limit_delay_point(); + if (current.blkno == InvalidBlockNumber) { /* diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 0487be1..22adedd 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -183,6 +183,12 @@ static TransactionStateData TopTransactionStateData = { }; /* + * Track total volume of WAL written by current top-level transaction + * to allow tracking, reporting and control of writing WAL. + */ +static uint64 currentTransactionWALVolume; + +/* * unreportedXids holds XIDs of all subtransactions that have not yet been * reported in a XLOG_XACT_ASSIGNMENT record. */ @@ -397,17 +403,26 @@ GetCurrentTransactionIdIfAny(void) } /* - * MarkCurrentTransactionIdLoggedIfAny + * ReportTransactionInsertedWAL * - * Remember that the current xid - if it is assigned - now has been wal logged. + * Remember that the current xid - if it is assigned - has now inserted WAL */ void -MarkCurrentTransactionIdLoggedIfAny(void) +ReportTransactionInsertedWAL(uint32 insertedWALVolume) { + currentTransactionWALVolume += insertedWALVolume; if (TransactionIdIsValid(CurrentTransactionState->transactionId)) CurrentTransactionState->didLogXid = true; } +/* + * GetCurrentTransactionWALVolume + */ +uint64 +GetCurrentTransactionWALVolume(void) +{ + return currentTransactionWALVolume; +} /* * GetStableLatestTransactionId @@ -1772,6 +1787,7 @@ StartTransaction(void) /* * initialize reported xid accounting */ + currentTransactionWALVolume = 0; nUnreportedXids = 0; s->didLogXid = false; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b53ae87..7255aa0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -1192,6 +1192,7 @@ begin:; * already exactly at the beginning of a segment, so there was no need * to do anything. */ + write_len = 0; } /* @@ -1199,7 +1200,7 @@ begin:; */ WALInsertSlotRelease(); - MarkCurrentTransactionIdLoggedIfAny(); + ReportTransactionInsertedWAL(write_len); END_CRIT_SECTION(); diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 8eae43d..be1f143 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2163,6 +2163,8 @@ IndexBuildHeapScan(Relation heapRelation, OldestXmin = GetOldestXmin(heapRelation->rd_rel->relisshared, true); } + init_wal_rate_limit(); + scan = heap_beginscan_strat(heapRelation, /* relation */ snapshot, /* snapshot */ 0, /* number of keys */ diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index e15f042..1098f3b 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -926,6 +926,8 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, get_namespace_name(RelationGetNamespace(OldHeap)), RelationGetRelationName(OldHeap)))); + init_wal_rate_limit(); + /* * Scan through the OldHeap, either in OldIndex order or sequentially; * copy each tuple into the NewHeap, or transiently to the tuplesort @@ -940,6 +942,9 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose, CHECK_FOR_INTERRUPTS(); + if (use_wal) + wal_rate_limit_delay_point(); + if (indexScan != NULL) { tuple = index_getnext(indexScan, ForwardScanDirection); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 466d757..7cb101e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -3821,6 +3821,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) dropped_attrs = lappend_int(dropped_attrs, i); } + init_wal_rate_limit(); + /* * Scan through the rows, generating a new row if needed and then * checking all the constraints. @@ -3929,6 +3931,9 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) ResetExprContext(econtext); CHECK_FOR_INTERRUPTS(); + + if (hi_options & HEAP_INSERT_SKIP_WAL) + wal_rate_limit_delay_point(); } MemoryContextSwitchTo(oldCxt); @@ -9120,11 +9125,16 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, nblocks = smgrnblocks(src, forkNum); + init_wal_rate_limit(); + for (blkno = 0; blkno < nblocks; blkno++) { /* If we got a cancel signal during the copy of the data, quit */ CHECK_FOR_INTERRUPTS(); + if (use_wal) + wal_rate_limit_delay_point(); + smgrread(src, forkNum, blkno, buf); if (!PageIsVerified(page, blkno)) diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 3455a0b..97ded4d 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -60,7 +60,7 @@ int vacuum_freeze_table_age; /* A few variables that don't seem worth passing around as parameters */ static MemoryContext vac_context = NULL; static BufferAccessStrategy vac_strategy; - +static uint64 prevWALchunks = 0; /* non-export function prototypes */ static List *get_rel_oids(Oid relid, const RangeVar *vacrel); @@ -1293,3 +1293,51 @@ vacuum_delay_point(void) CHECK_FOR_INTERRUPTS(); } } + +/* + * WAL Rate Limiting will wait after each "chunk" of WAL. + */ +#define WALRateLimitBytesPerChunk 65536 +#define GetCurrentTransactionChunks() \ + (GetCurrentTransactionWALVolume() / WALRateLimitBytesPerChunk ) +/* + * init_wal_rate_limit - initialize for WAL rate limiting + */ +void +init_wal_rate_limit(void) +{ + prevWALchunks = GetCurrentTransactionChunks(); +} + +/* + * wal_rate_limit_delay_point --- check for interrupts and cost-based delay. + * + * This should be called in each major loop of maintenance command processing, + * typically once per page processed when generating large amounts of WAL. + * + * Listed in miscadmin.h for general use. + */ +void +wal_rate_limit_delay_point(void) +{ + uint64 currWALchunks; + + if (WALRateLimitDelay == 0) + return; + + /* + * Fine out how many chunks of WAL have been written + */ + currWALchunks = GetCurrentTransactionChunks(); + + /* Nap if appropriate */ + if (currWALchunks > prevWALchunks) + { + prevWALchunks = currWALchunks; + + pg_usleep(WALRateLimitDelay * 1000L); + + /* Might have gotten an interrupt while sleeping */ + CHECK_FOR_INTERRUPTS(); + } +} diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 63c951e..cf3159e 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -115,6 +115,7 @@ int VacuumCostPageMiss = 10; int VacuumCostPageDirty = 20; int VacuumCostLimit = 200; int VacuumCostDelay = 0; +int WALRateLimitDelay = 0; int VacuumPageHit = 0; int VacuumPageMiss = 0; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 1217098..1c11745 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -544,6 +544,8 @@ const char *const config_group_names[] = gettext_noop("Resource Usage / Disk"), /* RESOURCES_KERNEL */ gettext_noop("Resource Usage / Kernel Resources"), + /* RESOURCES_WAL_RATE_LIMIT */ + gettext_noop("Resource Usage / WAL Rate Limiting"), /* RESOURCES_VACUUM_DELAY */ gettext_noop("Resource Usage / Cost-Based Vacuum Delay"), /* RESOURCES_BGWRITER */ @@ -1842,6 +1844,16 @@ static struct config_int ConfigureNamesInt[] = }, { + {"wal_rate_limit_delay", PGC_USERSET, RESOURCES_WAL_RATE_LIMIT, + gettext_noop("WAL rate limit delay in milliseconds."), + NULL, + GUC_UNIT_MS + }, + &WALRateLimitDelay, + 0, 0, 100, + NULL, NULL, NULL + }, + { {"autovacuum_vacuum_cost_delay", PGC_SIGHUP, AUTOVACUUM, gettext_noop("Vacuum cost delay in milliseconds, for autovacuum."), NULL, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 27791cc..9b953e4 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -145,6 +145,10 @@ # (change requires restart) #shared_preload_libraries = '' # (change requires restart) +# - WAL Rate Limiting - + +#wal_rate_limit_delay = 0 # 0-100 milliseconds + # - Cost-Based Vacuum Delay - #vacuum_cost_delay = 0 # 0-100 milliseconds diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 634f5b2..31ca1ba 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -215,7 +215,8 @@ extern TransactionId GetCurrentTransactionId(void); extern TransactionId GetCurrentTransactionIdIfAny(void); extern TransactionId GetStableLatestTransactionId(void); extern SubTransactionId GetCurrentSubTransactionId(void); -extern void MarkCurrentTransactionIdLoggedIfAny(void); +extern void ReportTransactionInsertedWAL(uint32 insertedWALVolume); +extern uint64 GetCurrentTransactionWALVolume(void); extern bool SubTransactionIsActive(SubTransactionId subxid); extern CommandId GetCurrentCommandId(bool used); extern TimestampTz GetCurrentTransactionStartTimestamp(void); diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index b145a19..444eea8 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -226,6 +226,10 @@ extern bool allowSystemTableMods; extern PGDLLIMPORT int work_mem; extern PGDLLIMPORT int maintenance_work_mem; +extern void init_wal_rate_limit(void); +extern void wal_rate_limit_delay_point(void); +extern int WALRateLimitDelay; + extern int VacuumCostPageHit; extern int VacuumCostPageMiss; extern int VacuumCostPageDirty; diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 47ff880..ded38c9 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -61,6 +61,7 @@ enum config_group RESOURCES_MEM, RESOURCES_DISK, RESOURCES_KERNEL, + RESOURCES_WAL_RATE_LIMIT, RESOURCES_VACUUM_DELAY, RESOURCES_BGWRITER, RESOURCES_ASYNCHRONOUS,