From 7c40600581799b12eeb8550aa095385e7adfb5a9 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Thu, 24 Nov 2022 13:28:22 +1300
Subject: [PATCH v5 4/4] Try to tolerate torn reads of control file in
 frontend.

Some of our src/bin tools read the control file without any kind of
interlocking against concurrent writes.  In the backend we avoid this
problem with ControlFileLock, but we can't do that from a stand-alone
program.

Tolerate the torn read that can occur on some systems (ext4, ntfs) by
retrying if checksum fails, until we get two reads in a row with the
same checksum.  This is only a last ditch effort and not guaranteed to
reach the right conclusion with extremely unlucky scheduling, but it
seems at least very likely to.  Thanks to Tom Lane for this suggestion.

Back-patch to all supported releases.

Reviewed-by: Anton A. Melnikov <aamelnikov@inbox.ru>
Discussion: https://postgr.es/m/20221123014224.xisi44byq3cf5psi%40awork3.anarazel.de
---
 src/common/controldata_utils.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c
index 9723587466..8b1786512f 100644
--- a/src/common/controldata_utils.c
+++ b/src/common/controldata_utils.c
@@ -56,12 +56,22 @@ get_controlfile(const char *DataDir, bool *crc_ok_p)
 	char		ControlFilePath[MAXPGPATH];
 	pg_crc32c	crc;
 	int			r;
+#ifdef FRONTEND
+	pg_crc32c	last_crc;
+	int			retries = 0;
+#endif
 
 	Assert(crc_ok_p);
 
 	ControlFile = palloc_object(ControlFileData);
 	snprintf(ControlFilePath, MAXPGPATH, "%s/global/pg_control", DataDir);
 
+#ifdef FRONTEND
+	INIT_CRC32C(last_crc);
+
+retry:
+#endif
+
 #ifndef FRONTEND
 	if ((fd = OpenTransientFile(ControlFilePath, O_RDONLY | PG_BINARY)) == -1)
 		ereport(ERROR,
@@ -117,6 +127,26 @@ get_controlfile(const char *DataDir, bool *crc_ok_p)
 
 	*crc_ok_p = EQ_CRC32C(crc, ControlFile->crc);
 
+#ifdef FRONTEND
+
+	/*
+	 * With unlucky timing on filesystems that don't implement atomicity of
+	 * concurrent reads and writes, we might have seen garbage if the server
+	 * was writing to the file at the same time.  Keep retrying until we see
+	 * the same CRC twice, with a tiny sleep to give a concurrent writer a
+	 * good chance of making progress.
+	 */
+	if (!*crc_ok_p &&
+		(retries == 0 || !EQ_CRC32C(crc, last_crc)) &&
+		retries < 10)
+	{
+		retries++;
+		last_crc = crc;
+		pg_usleep(10000);
+		goto retry;
+	}
+#endif
+
 	/* Make sure the control file is valid byte order. */
 	if (ControlFile->pg_control_version % 65536 == 0 &&
 		ControlFile->pg_control_version / 65536 != 0)
-- 
2.39.2

