From 7c40600581799b12eeb8550aa095385e7adfb5a9 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 24 Nov 2022 13:28:22 +1300 Subject: [PATCH v5 4/4] Try to tolerate torn reads of control file in frontend. Some of our src/bin tools read the control file without any kind of interlocking against concurrent writes. In the backend we avoid this problem with ControlFileLock, but we can't do that from a stand-alone program. Tolerate the torn read that can occur on some systems (ext4, ntfs) by retrying if checksum fails, until we get two reads in a row with the same checksum. This is only a last ditch effort and not guaranteed to reach the right conclusion with extremely unlucky scheduling, but it seems at least very likely to. Thanks to Tom Lane for this suggestion. Back-patch to all supported releases. Reviewed-by: Anton A. Melnikov Discussion: https://postgr.es/m/20221123014224.xisi44byq3cf5psi%40awork3.anarazel.de --- src/common/controldata_utils.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c index 9723587466..8b1786512f 100644 --- a/src/common/controldata_utils.c +++ b/src/common/controldata_utils.c @@ -56,12 +56,22 @@ get_controlfile(const char *DataDir, bool *crc_ok_p) char ControlFilePath[MAXPGPATH]; pg_crc32c crc; int r; +#ifdef FRONTEND + pg_crc32c last_crc; + int retries = 0; +#endif Assert(crc_ok_p); ControlFile = palloc_object(ControlFileData); snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir); +#ifdef FRONTEND + INIT_CRC32C(last_crc); + +retry: +#endif + #ifndef FRONTEND if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1) ereport(ERROR, @@ -117,6 +127,26 @@ get_controlfile(const char *DataDir, bool *crc_ok_p) *crc_ok_p = EQ_CRC32C(crc, ControlFile->crc); +#ifdef FRONTEND + + /* + * With unlucky timing on filesystems that don't implement atomicity of + * concurrent reads and writes, we might have seen garbage if the server + * was writing to the file at the same time. Keep retrying until we see + * the same CRC twice, with a tiny sleep to give a concurrent writer a + * good chance of making progress. + */ + if (!*crc_ok_p && + (retries == 0 || !EQ_CRC32C(crc, last_crc)) && + retries < 10) + { + retries++; + last_crc = crc; + pg_usleep(10000); + goto retry; + } +#endif + /* Make sure the control file is valid byte order. */ if (ControlFile->pg_control_version % 65536 == 0 && ControlFile->pg_control_version / 65536 != 0) -- 2.39.2