From 1afc1225ce3e49b1da3d97ada50fa01444bdafc4 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Wed, 19 Feb 2025 09:14:51 -0600
Subject: [PATCH v8 2/4] initdb: Add --no-sync-data-files.

This new option instructs initdb to skip synchronizing any files
in database directories and the database directories themselves,
i.e., everything in the base/ subdirectory and any other
tablespace directories.  Other files, such as those in pg_wal/ and
pg_xact/, will still be synchronized unless --no-sync is also
specified.  --no-sync-data-files is primarily intended for internal
use by tools that separately ensure the skipped files are
synchronized to disk.  A follow-up commit will use this to help
optimize pg_upgrade's file transfer step.

Discussion: https://postgr.es/m/Zyvop-LxLXBLrZil%40nathan
---
 doc/src/sgml/ref/initdb.sgml                | 27 +++++++
 src/bin/initdb/initdb.c                     | 10 ++-
 src/bin/initdb/t/001_initdb.pl              |  1 +
 src/bin/pg_basebackup/pg_basebackup.c       |  2 +-
 src/bin/pg_checksums/pg_checksums.c         |  2 +-
 src/bin/pg_combinebackup/pg_combinebackup.c |  2 +-
 src/bin/pg_rewind/file_ops.c                |  2 +-
 src/common/file_utils.c                     | 85 +++++++++++++--------
 src/include/common/file_utils.h             |  2 +-
 9 files changed, 96 insertions(+), 37 deletions(-)

diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 0026318485a..2f1f9a42f90 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -527,6 +527,33 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-option-no-sync-data-files">
+      <term><option>--no-sync-data-files</option></term>
+      <listitem>
+       <para>
+        By default, <command>initdb</command> safely writes all database files
+        to disk.  This option instructs <command>initdb</command> to skip
+        synchronizing all files in the individual database directories, the
+        database directories themselves, and the tablespace directories, i.e.,
+        everything in the <filename>base</filename> subdirectory and any other
+        tablespace directories.  Other files, such as those in
+        <literal>pg_wal</literal> and <literal>pg_xact</literal>, will still be
+        synchronized unless the <option>--no-sync</option> option is also
+        specified.
+       </para>
+       <para>
+        Note that if <option>--no-sync-data-files</option> is used in
+        conjuction with <option>--sync-method=syncfs</option>, some or all of
+        the aforementioned files and directories will be synchronized because
+        <literal>syncfs</literal> processes entire file systems.
+       </para>
+       <para>
+        This option is primarily intended for internal use by tools that
+        separately ensure the skipped files are synchronized to disk.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="app-initdb-option-no-instructions">
       <term><option>--no-instructions</option></term>
       <listitem>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 21a0fe3ecd9..22b7d31b165 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -168,6 +168,7 @@ static bool data_checksums = true;
 static char *xlog_dir = NULL;
 static int	wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
 static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
+static bool sync_data_files = true;
 
 
 /* internal vars */
@@ -2566,6 +2567,7 @@ usage(const char *progname)
 	printf(_("  -L DIRECTORY              where to find the input files\n"));
 	printf(_("  -n, --no-clean            do not clean up after errors\n"));
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
+	printf(_("      --no-sync-data-files  do not sync files within database directories\n"));
 	printf(_("      --no-instructions     do not print instructions for next steps\n"));
 	printf(_("  -s, --show                show internal settings, then exit\n"));
 	printf(_("      --sync-method=METHOD  set method for syncing files to disk\n"));
@@ -3208,6 +3210,7 @@ main(int argc, char *argv[])
 		{"icu-rules", required_argument, NULL, 18},
 		{"sync-method", required_argument, NULL, 19},
 		{"no-data-checksums", no_argument, NULL, 20},
+		{"no-sync-data-files", no_argument, NULL, 21},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3402,6 +3405,9 @@ main(int argc, char *argv[])
 			case 20:
 				data_checksums = false;
 				break;
+			case 21:
+				sync_data_files = false;
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3453,7 +3459,7 @@ main(int argc, char *argv[])
 
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		sync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
+		sync_pgdata(pg_data, PG_VERSION_NUM, sync_method, sync_data_files);
 		check_ok();
 		return 0;
 	}
@@ -3516,7 +3522,7 @@ main(int argc, char *argv[])
 	{
 		fputs(_("syncing data to disk ... "), stdout);
 		fflush(stdout);
-		sync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
+		sync_pgdata(pg_data, PG_VERSION_NUM, sync_method, sync_data_files);
 		check_ok();
 	}
 	else
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 01cc4a1602b..15dd10ce40a 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -76,6 +76,7 @@ command_like(
 	'checksums are enabled in control file');
 
 command_ok([ 'initdb', '--sync-only', $datadir ], 'sync only');
+command_ok([ 'initdb', '--sync-only', '--no-sync-data-files', $datadir ], '--no-sync-data-files');
 command_fails([ 'initdb', $datadir ], 'existing data directory');
 
 if ($supports_syncfs)
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index d4b4e334014..1da4bfc2351 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -2310,7 +2310,7 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
 		}
 		else
 		{
-			(void) sync_pgdata(basedir, serverVersion, sync_method);
+			(void) sync_pgdata(basedir, serverVersion, sync_method, true);
 		}
 	}
 
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 867aeddc601..f20be82862a 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -633,7 +633,7 @@ main(int argc, char *argv[])
 		if (do_sync)
 		{
 			pg_log_info("syncing data directory");
-			sync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
+			sync_pgdata(DataDir, PG_VERSION_NUM, sync_method, true);
 		}
 
 		pg_log_info("updating control file");
diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c
index d480dc74436..050260ee832 100644
--- a/src/bin/pg_combinebackup/pg_combinebackup.c
+++ b/src/bin/pg_combinebackup/pg_combinebackup.c
@@ -424,7 +424,7 @@ main(int argc, char *argv[])
 		else
 		{
 			pg_log_debug("recursively fsyncing \"%s\"", opt.output);
-			sync_pgdata(opt.output, version * 10000, opt.sync_method);
+			sync_pgdata(opt.output, version * 10000, opt.sync_method, true);
 		}
 	}
 
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 467845419ed..55659ce201f 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -296,7 +296,7 @@ sync_target_dir(void)
 	if (!do_sync || dry_run)
 		return;
 
-	sync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
+	sync_pgdata(datadir_target, PG_VERSION_NUM, sync_method, true);
 }
 
 
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 0e3cfede935..78e272916f5 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -50,7 +50,8 @@ static int	pre_sync_fname(const char *fname, bool isdir);
 #endif
 static void walkdir(const char *path,
 					int (*action) (const char *fname, bool isdir),
-					bool process_symlinks);
+					bool process_symlinks,
+					const char *exclude_dir);
 
 #ifdef HAVE_SYNCFS
 
@@ -93,11 +94,15 @@ do_syncfs(const char *path)
  * syncing, and might not have privileges to write at all.
  *
  * serverVersion indicates the version of the server to be sync'd.
+ *
+ * If sync_data_files is false, this function skips syncing "base/" and any
+ * other tablespace directories.
  */
 void
 sync_pgdata(const char *pg_data,
 			int serverVersion,
-			DataDirSyncMethod sync_method)
+			DataDirSyncMethod sync_method,
+			bool sync_data_files)
 {
 	bool		xlog_is_symlink;
 	char		pg_wal[MAXPGPATH];
@@ -147,30 +152,33 @@ sync_pgdata(const char *pg_data,
 				do_syncfs(pg_data);
 
 				/* If any tablespaces are configured, sync each of those. */
-				dir = opendir(pg_tblspc);
-				if (dir == NULL)
-					pg_log_error("could not open directory \"%s\": %m",
-								 pg_tblspc);
-				else
+				if (sync_data_files)
 				{
-					while (errno = 0, (de = readdir(dir)) != NULL)
+					dir = opendir(pg_tblspc);
+					if (dir == NULL)
+						pg_log_error("could not open directory \"%s\": %m",
+									 pg_tblspc);
+					else
 					{
-						char		subpath[MAXPGPATH * 2];
+						while (errno = 0, (de = readdir(dir)) != NULL)
+						{
+							char		subpath[MAXPGPATH * 2];
 
-						if (strcmp(de->d_name, ".") == 0 ||
-							strcmp(de->d_name, "..") == 0)
-							continue;
+							if (strcmp(de->d_name, ".") == 0 ||
+								strcmp(de->d_name, "..") == 0)
+								continue;
 
-						snprintf(subpath, sizeof(subpath), "%s/%s",
-								 pg_tblspc, de->d_name);
-						do_syncfs(subpath);
-					}
+							snprintf(subpath, sizeof(subpath), "%s/%s",
+									 pg_tblspc, de->d_name);
+							do_syncfs(subpath);
+						}
 
-					if (errno)
-						pg_log_error("could not read directory \"%s\": %m",
-									 pg_tblspc);
+						if (errno)
+							pg_log_error("could not read directory \"%s\": %m",
+										 pg_tblspc);
 
-					(void) closedir(dir);
+						(void) closedir(dir);
+					}
 				}
 
 				/* If pg_wal is a symlink, process that too. */
@@ -182,15 +190,21 @@ sync_pgdata(const char *pg_data,
 
 		case DATA_DIR_SYNC_METHOD_FSYNC:
 			{
+				char	   *exclude_dir = NULL;
+
+				if (!sync_data_files)
+					exclude_dir = psprintf("%s/base", pg_data);
+
 				/*
 				 * If possible, hint to the kernel that we're soon going to
 				 * fsync the data directory and its contents.
 				 */
 #ifdef PG_FLUSH_DATA_WORKS
-				walkdir(pg_data, pre_sync_fname, false);
+				walkdir(pg_data, pre_sync_fname, false, exclude_dir);
 				if (xlog_is_symlink)
-					walkdir(pg_wal, pre_sync_fname, false);
-				walkdir(pg_tblspc, pre_sync_fname, true);
+					walkdir(pg_wal, pre_sync_fname, false, NULL);
+				if (sync_data_files)
+					walkdir(pg_tblspc, pre_sync_fname, true, NULL);
 #endif
 
 				/*
@@ -203,10 +217,14 @@ sync_pgdata(const char *pg_data,
 				 * get fsync'd twice. That's not an expected case so we don't
 				 * worry about optimizing it.
 				 */
-				walkdir(pg_data, fsync_fname, false);
+				walkdir(pg_data, fsync_fname, false, exclude_dir);
 				if (xlog_is_symlink)
-					walkdir(pg_wal, fsync_fname, false);
-				walkdir(pg_tblspc, fsync_fname, true);
+					walkdir(pg_wal, fsync_fname, false, NULL);
+				if (sync_data_files)
+					walkdir(pg_tblspc, fsync_fname, true, NULL);
+
+				if (exclude_dir)
+					pfree(exclude_dir);
 			}
 			break;
 	}
@@ -245,10 +263,10 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
 				 * fsync the data directory and its contents.
 				 */
 #ifdef PG_FLUSH_DATA_WORKS
-				walkdir(dir, pre_sync_fname, false);
+				walkdir(dir, pre_sync_fname, false, NULL);
 #endif
 
-				walkdir(dir, fsync_fname, false);
+				walkdir(dir, fsync_fname, false, NULL);
 			}
 			break;
 	}
@@ -264,6 +282,9 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
  * ignored in subdirectories, ie we intentionally don't pass down the
  * process_symlinks flag to recursive calls.
  *
+ * If exclude_dir is not NULL, it specifies a directory path to skip
+ * processing.
+ *
  * Errors are reported but not considered fatal.
  *
  * See also walkdir in fd.c, which is a backend version of this logic.
@@ -271,11 +292,15 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
 static void
 walkdir(const char *path,
 		int (*action) (const char *fname, bool isdir),
-		bool process_symlinks)
+		bool process_symlinks,
+		const char *exclude_dir)
 {
 	DIR		   *dir;
 	struct dirent *de;
 
+	if (exclude_dir && strcmp(exclude_dir, path) == 0)
+		return;
+
 	dir = opendir(path);
 	if (dir == NULL)
 	{
@@ -299,7 +324,7 @@ walkdir(const char *path,
 				(*action) (subpath, false);
 				break;
 			case PGFILETYPE_DIR:
-				walkdir(subpath, action, false);
+				walkdir(subpath, action, false, exclude_dir);
 				break;
 			default:
 
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index a832210adc1..8274bc877ab 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -35,7 +35,7 @@ struct iovec;					/* avoid including port/pg_iovec.h here */
 #ifdef FRONTEND
 extern int	fsync_fname(const char *fname, bool isdir);
 extern void sync_pgdata(const char *pg_data, int serverVersion,
-						DataDirSyncMethod sync_method);
+						DataDirSyncMethod sync_method, bool sync_data_files);
 extern void sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
 extern int	durable_rename(const char *oldfile, const char *newfile);
 extern int	fsync_parent_path(const char *fname);
-- 
2.39.5 (Apple Git-154)