From a63818a32d661dba563cedfdb85731e522b3c6a9 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 24 Nov 2022 13:28:22 +1300 Subject: [PATCH 2/2] Try to tolerate concurrent reads and writes of control file. Various frontend programs and SQL-callable backend functions read the control file without any kind of interlocking against concurrent writes. Linux ext4 doesn't implement the atomicity required by POSIX here, so a concurrent reader can see only partial effects of an in-progress write. Tolerate this by retrying until we get two reads in a row with the same checksum, after an idea from Tom Lane. Reported-by: Andres Freund Discussion: https://postgr.es/m/20221123014224.xisi44byq3cf5psi%40awork3.anarazel.de --- src/common/controldata_utils.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c index 2d1f35bbd1..200d24df02 100644 --- a/src/common/controldata_utils.c +++ b/src/common/controldata_utils.c @@ -56,12 +56,19 @@ get_controlfile(const char *DataDir, bool *crc_ok_p) char ControlFilePath[MAXPGPATH]; pg_crc32c crc; int r; + bool first_try; + pg_crc32c last_crc; Assert(crc_ok_p); ControlFile = palloc_object(ControlFileData); snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir); + first_try = true; + INIT_CRC32C(last_crc); + +retry: + #ifndef FRONTEND if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1) ereport(ERROR, @@ -117,6 +124,24 @@ get_controlfile(const char *DataDir, bool *crc_ok_p) *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc); + /* + * With unlucky timing on filesystems that don't implement atomicity of + * concurrent reads and writes (such as Linux ext4), we might have seen + * garbage if the server was writing to the file at the same time. Keep + * retrying until we see the same CRC twice. + */ + if (!*crc_ok_p && (first_try || !EQ_CRC32C(crc, last_crc))) + { + first_try = false; + last_crc = crc; + pg_usleep(10000); + +#ifndef FRONTEND + CHECK_FOR_INTERRUPTS(); +#endif + goto retry; + } + /* Make sure the control file is valid byte order. */ if (ControlFile->pg_control_version % 65536 == 0 && ControlFile->pg_control_version / 65536 != 0) -- 2.35.1