From 91106a6cbfc010416c97d040daa87f125c24e24b Mon Sep 17 00:00:00 2001 From: Daniel Gustafsson Date: Tue, 17 Mar 2026 22:16:06 +0100 Subject: [PATCH v20260317 3/3] review_comments --- src/backend/access/rmgrdesc/xlogdesc.c | 10 +- src/backend/access/transam/xlog.c | 144 +++++++----------- src/backend/postmaster/auxprocess.c | 2 +- src/backend/postmaster/datachecksum_state.c | 108 ++++++++----- src/backend/postmaster/postmaster.c | 2 +- src/backend/storage/page/bufpage.c | 7 + src/backend/utils/init/postinit.c | 2 +- src/backend/utils/misc/guc_tables.c | 4 +- src/bin/pg_controldata/pg_controldata.c | 2 +- src/include/access/rmgrlist.h | 2 +- src/include/access/xlog.h | 6 +- src/include/access/xlog_internal.h | 2 +- src/include/catalog/pg_control.h | 4 +- src/include/storage/checksum.h | 16 +- .../modules/test_checksums/t/002_restarts.pl | 2 +- .../modules/test_checksums/t/004_offline.pl | 6 +- src/tools/pgindent/typedefs.list | 2 +- 17 files changed, 165 insertions(+), 156 deletions(-) diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 2986a1c9996..47c3c8363ed 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -56,15 +56,15 @@ get_wal_level_string(int wal_level) } static const char * -get_checksum_version_string(ChecksumType checksum) +get_checksum_version_string(ChecksumStateType checksum) { switch (checksum) { case PG_DATA_CHECKSUM_VERSION: return "on"; - case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_OFF: return "inprogress-off"; - case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_ON: return "inprogress-on"; case PG_DATA_CHECKSUM_OFF: return "off"; @@ -85,7 +85,7 @@ xlog2_desc(StringInfo buf, XLogReaderState *record) xl_checksum_state xlrec; memcpy(&xlrec, rec, sizeof(xl_checksum_state)); - appendStringInfoString(buf, get_checksum_version_string(xlrec.new_checksumtype)); + appendStringInfoString(buf, get_checksum_version_string(xlrec.new_checksum_state)); } } @@ -124,7 +124,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->oldestCommitTsXid, checkpoint->newestCommitTsXid, checkpoint->oldestActiveXid, - get_checksum_version_string(checkpoint->dataChecksumVersion), + get_checksum_version_string(checkpoint->dataChecksumState), (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); } else if (info == XLOG_NEXTOID) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 389a2054cd6..883904d390f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -661,11 +661,11 @@ static bool updateMinRecoveryPoint = true; * avoid locking for interrogating the data checksum state. Possible values * are the data checksum versions defined in storage/checksum.h. */ -static ChecksumType LocalDataChecksumVersion = 0; +static ChecksumStateType LocalDataChecksumState = 0; /* - * Variable backing the GUC, keep it in sync with LocalDataChecksumVersion. - * See SetLocalDataChecksumVersion(). + * Variable backing the GUC, keep it in sync with LocalDataChecksumState. + * See SetLocalDataChecksumState(). */ int data_checksums = 0; @@ -853,9 +853,8 @@ XLogInsertRecord(XLogRecData *rdata, * this case and fast otherwise. * * Also check to see if fullPageWrites was just turned on, there's a - * running backup or if checksums are enabled (all of which forces - * full-page writes); if we weren't already doing full-page writes - * then go back and recompute. + * running backup (which forces full-page writes); if we weren't + * already doing full-page writes then go back and recompute. * * If we aren't doing full-page writes then RedoRecPtr doesn't * actually affect the contents of the XLOG record, so we'll update @@ -4654,7 +4653,7 @@ GetMockAuthenticationNonce(void) * * Returns true iff data checksums are enabled or are in the process of being * enabled. During "inprogress-on" and "inprogress-off" states checksums must - * be written even though they are not verified (see datachecksumsworker.c for + * be written even though they are not verified (see datachecksum_state.c for * a longer discussion). * * This function is intended for callsites which are about to write a data page @@ -4665,9 +4664,9 @@ GetMockAuthenticationNonce(void) bool DataChecksumsNeedWrite(void) { - return (LocalDataChecksumVersion == PG_DATA_CHECKSUM_VERSION || - LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION || - LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION); + return (LocalDataChecksumState == PG_DATA_CHECKSUM_VERSION || + LocalDataChecksumState == PG_DATA_CHECKSUM_INPROGRESS_ON || + LocalDataChecksumState == PG_DATA_CHECKSUM_INPROGRESS_OFF); } /* @@ -4676,7 +4675,7 @@ DataChecksumsNeedWrite(void) * * Data checksums are only verified if they are fully enabled in the cluster. * During the "inprogress-on" and "inprogress-off" states they are only - * updated, not verified (see datachecksumsworker.c for a longer discussion). + * updated, not verified (see datachecksum_state.c for a longer discussion). * * This function is intended for callsites which have read data and are about * to perform checksum validation based on the result of this. Calling this @@ -4687,38 +4686,7 @@ DataChecksumsNeedWrite(void) bool DataChecksumsNeedVerify(void) { - return (LocalDataChecksumVersion == PG_DATA_CHECKSUM_VERSION); -} - -/* - * DataChecksumsOnInProgress - * Returns whether data checksums are being enabled - * - * Most operations don't need to worry about the "inprogress" states, and - * should use DataChecksumsNeedVerify() or DataChecksumsNeedWrite(). The - * "inprogress-on" state for enabling checksums is used when the checksum - * worker is setting checksums on all pages, it can thus be used to check for - * aborted checksum processing which need to be restarted. - */ -bool -DataChecksumsOnInProgress(void) -{ - return (LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION); -} - -/* - * DataChecksumsOffInProgress - * Returns whether data checksums are being disabled - * - * The "inprogress-off" state for disabling checksums is used for when the - * worker resets the catalog state. DataChecksumsNeedVerify() or - * DataChecksumsNeedWrite() should be used for deciding whether to read/write - * checksums. - */ -bool -DataChecksumsOffInProgress(void) -{ - return (LocalDataChecksumVersion == PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION); + return (LocalDataChecksumState == PG_DATA_CHECKSUM_VERSION); } /* @@ -4745,10 +4713,10 @@ SetDataChecksumsOnInProgress(void) START_CRIT_SECTION(); MyProc->delayChkptFlags |= DELAY_CHKPT_START; - XLogChecksums(PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION); + XLogChecksums(PG_DATA_CHECKSUM_INPROGRESS_ON); SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION; + XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON; SpinLockRelease(&XLogCtl->info_lck); barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON); @@ -4761,7 +4729,7 @@ SetDataChecksumsOnInProgress(void) * shutdown while waiting we want to come back up with checksums enabled. */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); - ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION; + ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_ON; UpdateControlFile(); LWLockRelease(ControlFileLock); @@ -4781,7 +4749,7 @@ SetDataChecksumsOnInProgress(void) * set the state to "inprogress-on" (done by SetDataChecksumsOnInProgress()) * and the second one to set the state to "on" (done here). Below is a short * description of the processing, a more detailed write-up can be found in - * datachecksumsworker.c. + * datachecksum_state.c. * * To start the process of enabling data checksums in a running cluster the * data_checksum_version state must be changed to "inprogress-on". This state @@ -4790,7 +4758,7 @@ SetDataChecksumsOnInProgress(void) * validation during the process. When all existing pages are guaranteed to * have checksums, and all new pages will be initiated with checksums, the * state can be changed to "on". Once the state is "on" checksums will be both - * written and verified. See datachecksumsworker.c for a longer discussion on + * written and verified. See datachecksum_state.c for a longer discussion on * how data checksums can be enabled in a running cluster. * * This function blocks until all backends in the cluster have acknowledged the @@ -4809,7 +4777,7 @@ SetDataChecksumsOn(void) * The only allowed state transition to "on" is from "inprogress-on" since * that state ensures that all pages will have data checksums written. */ - if (XLogCtl->data_checksum_version != PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) + if (XLogCtl->data_checksum_version != PG_DATA_CHECKSUM_INPROGRESS_ON) { SpinLockRelease(&XLogCtl->info_lck); elog(PANIC, "checksums not in \"inprogress-on\" mode"); @@ -4894,10 +4862,10 @@ SetDataChecksumsOff(void) START_CRIT_SECTION(); MyProc->delayChkptFlags |= DELAY_CHKPT_START; - XLogChecksums(PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION); + XLogChecksums(PG_DATA_CHECKSUM_INPROGRESS_OFF); SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION; + XLogCtl->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_OFF; SpinLockRelease(&XLogCtl->info_lck); barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF); @@ -4960,7 +4928,7 @@ SetDataChecksumsOff(void) } /* - * InitLocalControlData + * InitLocalDataChecksumState * * Set up backend local caches of controldata variables which may change at * any point during runtime and thus require special cased locking. So far @@ -4968,17 +4936,17 @@ SetDataChecksumsOff(void) * purpose enough to handle future cases. */ void -InitLocalDataChecksumVersion(void) +InitLocalDataChecksumState(void) { SpinLockAcquire(&XLogCtl->info_lck); - SetLocalDataChecksumVersion(XLogCtl->data_checksum_version); + SetLocalDataChecksumState(XLogCtl->data_checksum_version); SpinLockRelease(&XLogCtl->info_lck); } void -SetLocalDataChecksumVersion(uint32 data_checksum_version) +SetLocalDataChecksumState(uint32 data_checksum_version) { - LocalDataChecksumVersion = data_checksum_version; + LocalDataChecksumState = data_checksum_version; data_checksums = data_checksum_version; } @@ -4987,13 +4955,13 @@ SetLocalDataChecksumVersion(uint32 data_checksum_version) const char * show_data_checksums(void) { - switch (LocalDataChecksumVersion) + switch (LocalDataChecksumState) { case PG_DATA_CHECKSUM_VERSION: return "on"; - case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_ON: return "inprogress-on"; - case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_OFF: return "inprogress-off"; case PG_DATA_CHECKSUM_OFF: return "off"; @@ -5298,7 +5266,7 @@ LocalProcessControlFile(bool reset) Assert(reset || ControlFile == NULL); ControlFile = palloc_object(ControlFileData); ReadControlFile(); - SetLocalDataChecksumVersion(ControlFile->data_checksum_version); + SetLocalDataChecksumState(ControlFile->data_checksum_version); } /* @@ -5471,7 +5439,7 @@ XLOGShmemInit(void) /* Use the checksum info from control file */ XLogCtl->data_checksum_version = ControlFile->data_checksum_version; - SetLocalDataChecksumVersion(XLogCtl->data_checksum_version); + SetLocalDataChecksumState(XLogCtl->data_checksum_version); SpinLockInit(&XLogCtl->Insert.insertpos_lck); SpinLockInit(&XLogCtl->info_lck); @@ -5547,7 +5515,7 @@ BootStrapXLOG(uint32 data_checksum_version) checkPoint.newestCommitTsXid = InvalidTransactionId; checkPoint.time = (pg_time_t) time(NULL); checkPoint.oldestActiveXid = InvalidTransactionId; - checkPoint.dataChecksumVersion = data_checksum_version; + checkPoint.dataChecksumState = data_checksum_version; TransamVariables->nextXid = checkPoint.nextXid; TransamVariables->nextOid = checkPoint.nextOid; @@ -6635,18 +6603,18 @@ StartupXLOG(void) * restart here since we cannot launch a dynamic background worker * directly from here (it has to be from a regular backend). */ - if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION) + if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_ON) { XLogChecksums(PG_DATA_CHECKSUM_OFF); SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->data_checksum_version = 0; - SetLocalDataChecksumVersion(XLogCtl->data_checksum_version); + SetLocalDataChecksumState(XLogCtl->data_checksum_version); SpinLockRelease(&XLogCtl->info_lck); ereport(WARNING, - errmsg("data checksums state has been set to off"), - errhint("If checksums were being enabled during shutdown then processing must be manually restarted.")); + errmsg("enabling data checksums was interrupted"), + errhint("Data checksum processing must be manually restarted for checksums to be enabled")); } /* @@ -6655,13 +6623,13 @@ StartupXLOG(void) * checksums and we can move to off instead of prompting the user to * perform any action. */ - if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION) + if (XLogCtl->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_OFF) { XLogChecksums(PG_DATA_CHECKSUM_OFF); SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->data_checksum_version = 0; - SetLocalDataChecksumVersion(XLogCtl->data_checksum_version); + SetLocalDataChecksumState(XLogCtl->data_checksum_version); SpinLockRelease(&XLogCtl->info_lck); } @@ -7552,7 +7520,7 @@ CreateCheckPoint(int flags) * Get the current data_checksum_version value from xlogctl, valid at the * time of the checkpoint. */ - checkPoint.dataChecksumVersion = XLogCtl->data_checksum_version; + checkPoint.dataChecksumState = XLogCtl->data_checksum_version; if (shutdown) { @@ -7674,7 +7642,7 @@ CreateCheckPoint(int flags) LWLockRelease(OidGenLock); SpinLockAcquire(&XLogCtl->info_lck); - checkPoint.dataChecksumVersion = XLogCtl->data_checksum_version; + checkPoint.dataChecksumState = XLogCtl->data_checksum_version; SpinLockRelease(&XLogCtl->info_lck); checkPoint.logicalDecodingEnabled = IsLogicalDecodingEnabled(); @@ -7827,7 +7795,7 @@ CreateCheckPoint(int flags) ControlFile->minRecoveryPointTLI = 0; /* make sure we start with the checksum version as of the checkpoint */ - ControlFile->data_checksum_version = checkPoint.dataChecksumVersion; + ControlFile->data_checksum_version = checkPoint.dataChecksumState; /* * Persist unloggedLSN value. It's reset on crash recovery, so this goes @@ -8321,7 +8289,7 @@ CreateRestartPoint(int flags) } /* we shall start with the latest checksum version */ - ControlFile->data_checksum_version = lastCheckPoint.dataChecksumVersion; + ControlFile->data_checksum_version = lastCheckPoint.dataChecksumState; UpdateControlFile(); } @@ -8770,7 +8738,7 @@ XLogChecksums(uint32 new_type) xl_checksum_state xlrec; XLogRecPtr recptr; - xlrec.new_checksumtype = new_type; + xlrec.new_checksum_state = new_type; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xl_checksum_state)); @@ -8906,8 +8874,8 @@ xlog_redo(XLogReaderState *record) checkPoint.oldestMultiDB); SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->data_checksum_version = checkPoint.dataChecksumVersion; - SetLocalDataChecksumVersion(checkPoint.dataChecksumVersion); + XLogCtl->data_checksum_version = checkPoint.dataChecksumState; + SetLocalDataChecksumState(checkPoint.dataChecksumState); SpinLockRelease(&XLogCtl->info_lck); /* @@ -8969,7 +8937,7 @@ xlog_redo(XLogReaderState *record) /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; - ControlFile->data_checksum_version = checkPoint.dataChecksumVersion; + ControlFile->data_checksum_version = checkPoint.dataChecksumState; LWLockRelease(ControlFileLock); /* @@ -9038,7 +9006,7 @@ xlog_redo(XLogReaderState *record) LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; old_state = ControlFile->data_checksum_version; - ControlFile->data_checksum_version = checkPoint.dataChecksumVersion; + ControlFile->data_checksum_version = checkPoint.dataChecksumState; LWLockRelease(ControlFileLock); /* TLI should not change in an on-line checkpoint */ @@ -9055,21 +9023,21 @@ xlog_redo(XLogReaderState *record) * refactoring to be pretty (as well as in the _REDO case */ SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->data_checksum_version = checkPoint.dataChecksumVersion; - if (checkPoint.dataChecksumVersion != old_state) + XLogCtl->data_checksum_version = checkPoint.dataChecksumState; + if (checkPoint.dataChecksumState != old_state) new_state = true; SpinLockRelease(&XLogCtl->info_lck); if (new_state) { - switch (checkPoint.dataChecksumVersion) + switch (checkPoint.dataChecksumState) { - case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_ON: barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON); WaitForProcSignalBarrier(barrier); break; - case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_OFF: barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF); WaitForProcSignalBarrier(barrier); break; @@ -9263,12 +9231,12 @@ xlog_redo(XLogReaderState *record) { switch (redo_rec.data_checksum_version) { - case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_ON: barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON); WaitForProcSignalBarrier(barrier); break; - case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_OFF: barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF); WaitForProcSignalBarrier(barrier); break; @@ -9355,7 +9323,7 @@ xlog2_redo(XLogReaderState *record) * like that), could that confuse the instance? */ SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->data_checksum_version = state.new_checksumtype; + XLogCtl->data_checksum_version = state.new_checksum_state; SpinLockRelease(&XLogCtl->info_lck); /* @@ -9363,14 +9331,14 @@ xlog2_redo(XLogReaderState *record) * change to checksum status. Once the barrier has been passed we can * initiate the corresponding processing. */ - switch (state.new_checksumtype) + switch (state.new_checksum_state) { - case PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_ON: barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON); WaitForProcSignalBarrier(barrier); break; - case PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION: + case PG_DATA_CHECKSUM_INPROGRESS_OFF: barrier = EmitProcSignalBarrier(PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF); WaitForProcSignalBarrier(barrier); break; diff --git a/src/backend/postmaster/auxprocess.c b/src/backend/postmaster/auxprocess.c index 9803ece0792..8fdc518b3a1 100644 --- a/src/backend/postmaster/auxprocess.c +++ b/src/backend/postmaster/auxprocess.c @@ -86,7 +86,7 @@ AuxiliaryProcessMainCommon(void) * NB: Even if the postmaster handled barriers, the value might still be * stale, as it might have changed after this process forked. */ - InitLocalDataChecksumVersion(); + InitLocalDataChecksumState(); /* * Auxiliary processes don't run transactions, but they may need a diff --git a/src/backend/postmaster/datachecksum_state.c b/src/backend/postmaster/datachecksum_state.c index 2384be7f657..2b6c412b70a 100644 --- a/src/backend/postmaster/datachecksum_state.c +++ b/src/backend/postmaster/datachecksum_state.c @@ -4,7 +4,7 @@ * Background worker for enabling or disabling data checksums online as * well as functionality for manipulating data checksum state * - * When enabling data checksums on a database at initdb time or when shut down + * When enabling data checksums on a cluster at initdb time or when shut down * with pg_checksums, no extra process is required as each page is checksummed, * and verified, when accessed. When enabling checksums on an already running * cluster, this worker will ensure that all pages are checksummed before @@ -21,18 +21,20 @@ * "inprogress-on" which signals that write operations MUST compute and write * the checksum on the data page, but during reading the checksum SHALL NOT be * verified. This ensures that all objects created during when checksums are - * being enabled will have checksums set, but reads wont fail due to missing or + * being enabled will have checksums set, but reads won't fail due to missing or * invalid checksums. Invalid checksums can be present in case the cluster had * checksums enabled, then disabled them and updated the page while they were * disabled. * - * The DataChecksumsWorker will compile a list of databases which exist at the - * start of checksumming, and once all are processed will regenerate the list - * and start over processing any new entries. Once there are no new entries on - * the list, processing will end. All databases MUST BE successfully processed - * in order for data checksums to be enabled, the only exception are databases - * which are dropped before having been processed. - + * The DataChecksumsWorker will compile a list of all databases at the start, + * and once all are processed will regenerate the list and start over + * processing any new entries. Once there are no new entries on the list, + * processing will end. The regenerated list is required since databases can + * be created concurrently with data checksum processing, using a template + * database which has yet to be processed. All databases MUST BE successfully + * processed in order for data checksums to be enabled, the only exception are + * databases which are dropped before having been processed. + * * Any new relation in a processed database, created during processing, will * see the in-progress state and will automatically be checksummed. * @@ -46,13 +48,13 @@ * 2. Disabling checksums * ---------------------- * When disabling checksums, data_checksums will be set to "inprogress-off" - * which signals that checksums are written but no longer verified. This ensure - * that backends which have yet to move from the "on" state will still be able - * to process data checksum validation. + * which signals that checksums are written but no longer need to be verified. + * This ensures that backends which have not yet transitioned to the + * "inprogress-off" state will still see valid checksums on pages. * * 3. Synchronization and Correctness * ---------------------------------- - * The processes involved in enabling, or disabling, data checksums in an + * The processes involved in enabling or disabling data checksums in an * online cluster must be properly synchronized with the normal backends * serving concurrent queries to ensure correctness. Correctness is defined * as the following: @@ -74,8 +76,8 @@ * latter with ensuring that any concurrent activity cannot break the data * checksum contract during processing. * - * Synchronizing the state change is done with procsignal barriers, before - * updating the controlfile with the state all other backends must absorb the + * Synchronizing the state change is done with procsignal barriers. Before + * updating the data_checksums state in the control file, all other backends must absorb the * barrier. Barrier absorption will happen during interrupt processing, which * means that connected backends will change state at different times. If * waiting for a barrier is done during startup, for example during replay, it @@ -90,8 +92,9 @@ * failing to validate the data checksum on the page when reading it. * * When processing starts all backends belong to one of the below sets, with - * one set being empty: + * one if Bd and Bi being empty: * + * Bg: Backend updating the global state and emitting the procsignalbarrier * Bd: Backends in "off" state * Bi: Backends in "inprogress-on" state * @@ -173,6 +176,9 @@ * * Restartability (not necessarily with page granularity). * * Avoid processing databases which were created during inprogress-on. * Right now all databases are processed regardless to be safe. + * * Teach CREATE DATABASE to calculate checksums for databases created + * during inprogress-on with a template database which has yet to be + * processed. * * * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group @@ -229,8 +235,7 @@ * must not be in. * * The reason for this explicit checking is to ensure that processing cannot - * be started such that it breaks the assumptions of the state machine. See - * datachecksumsworker.c for a lengthy discussion on these states. + * be started such that it breaks the assumptions of the state machine. * * MAX_BARRIER_CONDITIONS must match largest number of sets in barrier_eq and * barrier_ne in the below checksum_barriers definition. @@ -252,10 +257,41 @@ typedef struct ChecksumBarrierCondition static const ChecksumBarrierCondition checksum_barriers[4] = { - {PG_DATA_CHECKSUM_OFF, {PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION, PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION}, 2, {PG_DATA_CHECKSUM_VERSION}, 1}, - {PG_DATA_CHECKSUM_VERSION, {PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION}, 1, {0}, 0}, - {PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION, {0}, 0, {PG_DATA_CHECKSUM_VERSION}, 1}, - {PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION, {PG_DATA_CHECKSUM_VERSION}, 1, {0}, 0}, + /* + * When disabling checksums, either inprogress state is Ok but checksums + * must not be in the enabled state. + */ + { + .target = PG_DATA_CHECKSUM_OFF, + .barrier_eq = {PG_DATA_CHECKSUM_INPROGRESS_ON, PG_DATA_CHECKSUM_INPROGRESS_OFF}, + .barrier_eq_sz = 2, + .barrier_ne = {PG_DATA_CHECKSUM_VERSION}, + .barrier_ne_sz = 1 + }, + /* When enabling the current state must be inprogress-on */ + { + .target = PG_DATA_CHECKSUM_VERSION, + .barrier_eq = {PG_DATA_CHECKSUM_INPROGRESS_ON}, + .barrier_eq_sz = 1, + {0}, 0 + }, + + /* + * When moving to inprogress-on the current state cannot enabled, but when + * moving to inprogress-off the current state must be enabled. + */ + { + .target = PG_DATA_CHECKSUM_INPROGRESS_ON, + {0}, 0, + .barrier_ne = {PG_DATA_CHECKSUM_VERSION}, + .barrier_ne_sz = 1 + }, + { + .target = PG_DATA_CHECKSUM_INPROGRESS_OFF, + .barrier_eq = {PG_DATA_CHECKSUM_VERSION}, + .barrier_eq_sz = 1, + {0}, 0 + }, }; /* @@ -399,13 +435,13 @@ AbsorbDataChecksumsBarrier(ProcSignalBarrierType barrier) switch (barrier) { case PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_ON: - target_state = PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION; + target_state = PG_DATA_CHECKSUM_INPROGRESS_ON; break; case PROCSIGNAL_BARRIER_CHECKSUM_ON: target_state = PG_DATA_CHECKSUM_VERSION; break; case PROCSIGNAL_BARRIER_CHECKSUM_INPROGRESS_OFF: - target_state = PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION; + target_state = PG_DATA_CHECKSUM_INPROGRESS_OFF; break; case PROCSIGNAL_BARRIER_CHECKSUM_OFF: target_state = PG_DATA_CHECKSUM_OFF; @@ -427,7 +463,7 @@ AbsorbDataChecksumsBarrier(ProcSignalBarrierType barrier) */ if (RecoveryInProgress()) { - SetLocalDataChecksumVersion(target_state); + SetLocalDataChecksumState(target_state); return true; } @@ -480,7 +516,7 @@ AbsorbDataChecksumsBarrier(ProcSignalBarrierType barrier) errmsg("incorrect data checksum state %i for target state %i", current, target_state)); - SetLocalDataChecksumVersion(target_state); + SetLocalDataChecksumState(target_state); return true; } @@ -644,7 +680,7 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg return false; /* Report the current relation to pgstat_activity */ - snprintf(activity, sizeof(activity) - 1, "processing: %s.%s (%s, %dblocks)", + snprintf(activity, sizeof(activity) - 1, "processing: %s.%s (%s, %u blocks)", relns, RelationGetRelationName(reln), forkNames[forkNum], numblocks); pgstat_report_activity(STATE_RUNNING, activity); pgstat_progress_update_param(PROGRESS_DATACHECKSUMS_BLOCKS_TOTAL, numblocks); @@ -658,7 +694,7 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg { Buffer buf = ReadBufferExtended(reln, forkNum, blknum, RBM_NORMAL, strategy); - /* Need to get an exclusive lock before we can flag as dirty */ + /* Need to get an exclusive lock to mark the buffer as dirty */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); /* @@ -666,7 +702,7 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg * re-write the page to WAL even if the checksum hasn't changed, * because if there is a replica it might have a slightly different * version of the page with an invalid checksum, caused by unlogged - * changes (e.g. hintbits) on the master happening while checksums + * changes (e.g. hintbits) on the primary happening while checksums * were off. This can happen if there was a valid checksum on the page * at one point in the past, so only when checksums are first on, then * off, and then turned on again. TODO: investigate if this could be @@ -682,8 +718,7 @@ ProcessSingleRelationFork(Relation reln, ForkNumber forkNum, BufferAccessStrateg /* * This is the only place where we check if we are asked to abort, the - * abortion will bubble up from here. It's safe to check this without - * a lock, because if we miss it being set, we will try again soon. + * abortion will bubble up from here. */ Assert(operation == ENABLE_DATACHECKSUMS); LWLockAcquire(DataChecksumsWorkerLock, LW_SHARED); @@ -827,10 +862,9 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) /* * If the postmaster crashed we cannot end up with a processed database so * we have no alternative other than exiting. When enabling checksums we - * won't at this time have changed the pg_control version to enabled so - * when the cluster comes back up processing will have to be restarted. - * When disabling, the pg_control version will be set to off before this - * so when the cluster comes up checksums will be off as expected. + * won't at this time have changed the data checksums state in pg_control + * to enabled so when the cluster comes back up processing will have to be + * restarted. */ if (status == BGWH_POSTMASTER_DIED) ereport(FATAL, @@ -879,6 +913,8 @@ ProcessDatabase(DataChecksumsWorkerDatabase *db) static void launcher_exit(int code, Datum arg) { + abort_requested = false; + if (launcher_running) { LWLockAcquire(DataChecksumsWorkerLock, LW_EXCLUSIVE); @@ -1325,7 +1361,7 @@ ProcessAllDatabases(void) * Get a fresh list of databases to detect the second case where the * database was dropped before we had started processing it. If a database * still exists, but enabling checksums failed then we fail the entire - * checksumming process and exit with an error. + * checksum enablement process and exit with an error. */ DatabaseList = BuildDatabaseList(); diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index c898c53b623..04a3ba04332 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2999,7 +2999,7 @@ PostmasterStateMachine(void) B_INVALID, B_STANDALONE_BACKEND); - /* also add checksumming processes */ + /* also add data checksums processes */ remainMask = btmask_add(remainMask, B_DATACHECKSUMSWORKER_LAUNCHER, B_DATACHECKSUMSWORKER_WORKER); diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 8da8f8fecf1..4941d8e224f 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -107,6 +107,13 @@ PageIsVerified(PageData *page, BlockNumber blkno, int flags, bool *checksum_fail */ if (!PageIsNew(page)) { + /* + * There shouldn't be any check for interrupt calls happening in this + * codepath, but just to be on the safe side we hold interrupts since + * if they did happen the data checksum state could change during + * verifying checksums, which could lead to incorrect verification + * results. + */ HOLD_INTERRUPTS(); if (DataChecksumsNeedVerify()) { diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index edd429e83cb..18f77bfb5b4 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -774,7 +774,7 @@ InitPostgres(const char *in_dbname, Oid dboid, * NB: Even if the postmaster handled barriers, the value might still be * stale, as it might have changed after this process forked. */ - InitLocalDataChecksumVersion(); + InitLocalDataChecksumState(); /* * Also set up timeout handlers needed for backend operation. We need diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 661b8757979..5e58ca1345f 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -504,8 +504,8 @@ static const struct config_enum_entry file_extend_method_options[] = { static const struct config_enum_entry data_checksums_options[] = { {"on", PG_DATA_CHECKSUM_VERSION, true}, {"off", PG_DATA_CHECKSUM_OFF, true}, - {"inprogress-on", PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION, true}, - {"inprogress-off", PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION, true}, + {"inprogress-on", PG_DATA_CHECKSUM_INPROGRESS_ON, true}, + {"inprogress-off", PG_DATA_CHECKSUM_INPROGRESS_OFF, true}, {NULL, 0, false} }; diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 47afa82d9c5..fe5fc5ec133 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -288,7 +288,7 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's newestCommitTsXid:%u\n"), ControlFile->checkPointCopy.newestCommitTsXid); printf(_("Latest checkpoint's data_checksum_version:%u\n"), - ControlFile->checkPointCopy.dataChecksumVersion); + ControlFile->checkPointCopy.dataChecksumState); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); printf(_("Fake LSN counter for unlogged rels: %X/%08X\n"), diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index 7cec6301769..ae32ef16d67 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -26,7 +26,6 @@ /* symbol name, textual name, redo, desc, identify, startup, cleanup, mask, decode */ PG_RMGR(RM_XLOG_ID, "XLOG", xlog_redo, xlog_desc, xlog_identify, NULL, NULL, NULL, xlog_decode) -PG_RMGR(RM_XLOG2_ID, "XLOG2", xlog2_redo, xlog2_desc, xlog2_identify, NULL, NULL, NULL, xlog2_decode) PG_RMGR(RM_XACT_ID, "Transaction", xact_redo, xact_desc, xact_identify, NULL, NULL, NULL, xact_decode) PG_RMGR(RM_SMGR_ID, "Storage", smgr_redo, smgr_desc, smgr_identify, NULL, NULL, NULL, NULL) PG_RMGR(RM_CLOG_ID, "CLOG", clog_redo, clog_desc, clog_identify, NULL, NULL, NULL, NULL) @@ -48,3 +47,4 @@ PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_i PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL, NULL) PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask, NULL) PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL, logicalmsg_decode) +PG_RMGR(RM_XLOG2_ID, "XLOG2", xlog2_redo, xlog2_desc, xlog2_identify, NULL, NULL, NULL, xlog2_decode) diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 256f3499481..684c1a9b20a 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -249,14 +249,12 @@ extern uint64 GetSystemIdentifier(void); extern char *GetMockAuthenticationNonce(void); extern bool DataChecksumsNeedWrite(void); extern bool DataChecksumsNeedVerify(void); -extern bool DataChecksumsOnInProgress(void); -extern bool DataChecksumsOffInProgress(void); extern void SetDataChecksumsOnInProgress(void); extern void SetDataChecksumsOn(void); extern void SetDataChecksumsOff(void); extern const char *show_data_checksums(void); -extern void InitLocalDataChecksumVersion(void); -extern void SetLocalDataChecksumVersion(uint32 data_checksum_version); +extern void InitLocalDataChecksumState(void); +extern void SetLocalDataChecksumState(uint32 data_checksum_version); extern bool GetDefaultCharSignedness(void); extern XLogRecPtr GetFakeLSNForUnloggedRel(void); extern Size XLOGShmemSize(void); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 8fb5b978f51..dfa0251d55e 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -291,7 +291,7 @@ typedef struct xl_restore_point /* Information logged when data checksum level is changed */ typedef struct xl_checksum_state { - ChecksumType new_checksumtype; + ChecksumStateType new_checksum_state; } xl_checksum_state; /* Overwrite of prior contrecord */ diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 5ac74d7ef1c..146c06cc8ef 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -65,7 +65,7 @@ typedef struct CheckPoint TransactionId oldestActiveXid; /* data checksums state at the time of the checkpoint */ - uint32 dataChecksumVersion; + uint32 dataChecksumState; } CheckPoint; /* XLOG info values for XLOG rmgr */ @@ -229,7 +229,7 @@ typedef struct ControlFileData bool float8ByVal; /* float8, int8, etc pass-by-value? */ /* Are data pages protected by checksums? Zero if no checksum version */ - uint32 data_checksum_version; /* persistent */ + uint32 data_checksum_version; /* * True if the default signedness of char is "signed" on a platform where diff --git a/src/include/storage/checksum.h b/src/include/storage/checksum.h index b6bbba82949..fe5d30b4349 100644 --- a/src/include/storage/checksum.h +++ b/src/include/storage/checksum.h @@ -16,18 +16,18 @@ #include "storage/block.h" /* - * Checksum version 0 is used for when data checksums are disabled (OFF). - * PG_DATA_CHECKSUM_VERSION defines that data checksums are enabled in the - * cluster and PG_DATA_CHECKSUM_INPROGRESS_{ON|OFF}_VERSION defines that data - * checksums are either currently being enabled or disabled. + * Checksum state 0 is used for when data checksums are disabled (OFF). + * PG_DATA_CHECKSUM_INPROGRESS_{ON|OFF} defines that data checksums are either + * currently being enabled or disabled, and PG_DATA_CHECKSUM_VERSION defines + * that data checksums are enabled. */ -typedef enum ChecksumType +typedef enum ChecksumStateType { PG_DATA_CHECKSUM_OFF = 0, PG_DATA_CHECKSUM_VERSION, - PG_DATA_CHECKSUM_INPROGRESS_ON_VERSION, - PG_DATA_CHECKSUM_INPROGRESS_OFF_VERSION -} ChecksumType; + PG_DATA_CHECKSUM_INPROGRESS_OFF, + PG_DATA_CHECKSUM_INPROGRESS_ON, +} ChecksumStateType; /* * Compute the checksum for a Postgres page. The page must be aligned on a diff --git a/src/test/modules/test_checksums/t/002_restarts.pl b/src/test/modules/test_checksums/t/002_restarts.pl index a4d35e9f95c..2b36e5ee59a 100644 --- a/src/test/modules/test_checksums/t/002_restarts.pl +++ b/src/test/modules/test_checksums/t/002_restarts.pl @@ -36,7 +36,7 @@ SKIP: if (!$ENV{PG_TEST_EXTRA} || $ENV{PG_TEST_EXTRA} !~ /\bchecksum_extended\b/); - # Create a barrier for checksumming to block on, in this case a pre- + # Create a barrier for checksum enablement to block on, in this case a pre- # existing temporary table which is kept open while processing is started. # We can accomplish this by setting up an interactive psql process which # keeps the temporary table created as we enable checksums in another psql diff --git a/src/test/modules/test_checksums/t/004_offline.pl b/src/test/modules/test_checksums/t/004_offline.pl index f33e278de28..afe6538faab 100644 --- a/src/test/modules/test_checksums/t/004_offline.pl +++ b/src/test/modules/test_checksums/t/004_offline.pl @@ -48,9 +48,9 @@ $node->start; # Ensure that checksums are disabled test_checksum_state($node, 'off'); -# Create a barrier for checksumming to block on, in this case a pre-existing -# temporary table which is kept open while processing is started. We can -# accomplish this by setting up an interactive psql process which keeps the +# Create a barrier for checksum enablement to block on, in this case a pre- +# existing temporary table which is kept open while processing is started. We +# can accomplish this by setting up an interactive psql process which keeps the # temporary table created as we enable checksums in another psql process. my $bsession = $node->background_psql('postgres'); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index c2f9a2a9b9a..15d32a7768b 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -432,7 +432,7 @@ CheckpointStatsData CheckpointerRequest CheckpointerShmemStruct ChecksumBarrierCondition -ChecksumType +ChecksumStateType Chromosome CkptSortItem CkptTsStatus -- 2.39.3 (Apple Git-146)