From 6716e7b16a795911f55432dfd6d3c246aa8fd9fe Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 19 Feb 2025 09:14:51 -0600 Subject: [PATCH v4 1/3] initdb: Add --no-sync-data-files. This new option instructs initdb to skip synchronizing any files in database directories and the database directories themselves, i.e., everything in the base/ subdirectory and any other tablespace directories. Other files, such as those in pg_wal/ and pg_xact/, will still be synchronized unless --no-sync is also specified. --no-sync-data-files is primarily intended for internal use by tools that separately ensure the skipped files are synchronized to disk. A follow-up commit will use this to help optimize pg_upgrade's file transfer step. Discussion: https://postgr.es/m/Zyvop-LxLXBLrZil%40nathan --- doc/src/sgml/ref/initdb.sgml | 20 +++++ src/bin/initdb/initdb.c | 10 ++- src/bin/initdb/t/001_initdb.pl | 1 + src/bin/pg_basebackup/pg_basebackup.c | 2 +- src/bin/pg_checksums/pg_checksums.c | 2 +- src/bin/pg_combinebackup/pg_combinebackup.c | 2 +- src/bin/pg_rewind/file_ops.c | 2 +- src/common/file_utils.c | 85 +++++++++++++-------- src/include/common/file_utils.h | 2 +- 9 files changed, 89 insertions(+), 37 deletions(-) diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 0026318485a..14c401b9a99 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -527,6 +527,26 @@ PostgreSQL documentation + + + + + By default, initdb safely writes all database files + to disk. This option instructs initdb to skip + synchronizing all files in the individual database directories and the + database directories themselves, i.e., everything in the + base subdirectory and any other tablespace + directories. Other files, such as those in pg_wal + and pg_xact, will still be synchronized unless the + option is also specified. + + + This option is primarily intended for internal use by tools that + separately ensure the skipped files are synchronized to disk. + + + + diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 21a0fe3ecd9..22b7d31b165 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -168,6 +168,7 @@ static bool data_checksums = true; static char *xlog_dir = NULL; static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024); static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; +static bool sync_data_files = true; /* internal vars */ @@ -2566,6 +2567,7 @@ usage(const char *progname) printf(_(" -L DIRECTORY where to find the input files\n")); printf(_(" -n, --no-clean do not clean up after errors\n")); printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); + printf(_(" --no-sync-data-files do not sync files within database directories\n")); printf(_(" --no-instructions do not print instructions for next steps\n")); printf(_(" -s, --show show internal settings, then exit\n")); printf(_(" --sync-method=METHOD set method for syncing files to disk\n")); @@ -3208,6 +3210,7 @@ main(int argc, char *argv[]) {"icu-rules", required_argument, NULL, 18}, {"sync-method", required_argument, NULL, 19}, {"no-data-checksums", no_argument, NULL, 20}, + {"no-sync-data-files", no_argument, NULL, 21}, {NULL, 0, NULL, 0} }; @@ -3402,6 +3405,9 @@ main(int argc, char *argv[]) case 20: data_checksums = false; break; + case 21: + sync_data_files = false; + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -3453,7 +3459,7 @@ main(int argc, char *argv[]) fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - sync_pgdata(pg_data, PG_VERSION_NUM, sync_method); + sync_pgdata(pg_data, PG_VERSION_NUM, sync_method, sync_data_files); check_ok(); return 0; } @@ -3516,7 +3522,7 @@ main(int argc, char *argv[]) { fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - sync_pgdata(pg_data, PG_VERSION_NUM, sync_method); + sync_pgdata(pg_data, PG_VERSION_NUM, sync_method, sync_data_files); check_ok(); } else diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 01cc4a1602b..15dd10ce40a 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -76,6 +76,7 @@ command_like( 'checksums are enabled in control file'); command_ok([ 'initdb', '--sync-only', $datadir ], 'sync only'); +command_ok([ 'initdb', '--sync-only', '--no-sync-data-files', $datadir ], '--no-sync-data-files'); command_fails([ 'initdb', $datadir ], 'existing data directory'); if ($supports_syncfs) diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index dc0c805137a..bc94c114d27 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -2310,7 +2310,7 @@ BaseBackup(char *compression_algorithm, char *compression_detail, } else { - (void) sync_pgdata(basedir, serverVersion, sync_method); + (void) sync_pgdata(basedir, serverVersion, sync_method, true); } } diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 867aeddc601..f20be82862a 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -633,7 +633,7 @@ main(int argc, char *argv[]) if (do_sync) { pg_log_info("syncing data directory"); - sync_pgdata(DataDir, PG_VERSION_NUM, sync_method); + sync_pgdata(DataDir, PG_VERSION_NUM, sync_method, true); } pg_log_info("updating control file"); diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c index 5864ec574fb..c0ec09485c3 100644 --- a/src/bin/pg_combinebackup/pg_combinebackup.c +++ b/src/bin/pg_combinebackup/pg_combinebackup.c @@ -420,7 +420,7 @@ main(int argc, char *argv[]) else { pg_log_debug("recursively fsyncing \"%s\"", opt.output); - sync_pgdata(opt.output, version * 10000, opt.sync_method); + sync_pgdata(opt.output, version * 10000, opt.sync_method, true); } } diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c index 467845419ed..55659ce201f 100644 --- a/src/bin/pg_rewind/file_ops.c +++ b/src/bin/pg_rewind/file_ops.c @@ -296,7 +296,7 @@ sync_target_dir(void) if (!do_sync || dry_run) return; - sync_pgdata(datadir_target, PG_VERSION_NUM, sync_method); + sync_pgdata(datadir_target, PG_VERSION_NUM, sync_method, true); } diff --git a/src/common/file_utils.c b/src/common/file_utils.c index 0e3cfede935..78e272916f5 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -50,7 +50,8 @@ static int pre_sync_fname(const char *fname, bool isdir); #endif static void walkdir(const char *path, int (*action) (const char *fname, bool isdir), - bool process_symlinks); + bool process_symlinks, + const char *exclude_dir); #ifdef HAVE_SYNCFS @@ -93,11 +94,15 @@ do_syncfs(const char *path) * syncing, and might not have privileges to write at all. * * serverVersion indicates the version of the server to be sync'd. + * + * If sync_data_files is false, this function skips syncing "base/" and any + * other tablespace directories. */ void sync_pgdata(const char *pg_data, int serverVersion, - DataDirSyncMethod sync_method) + DataDirSyncMethod sync_method, + bool sync_data_files) { bool xlog_is_symlink; char pg_wal[MAXPGPATH]; @@ -147,30 +152,33 @@ sync_pgdata(const char *pg_data, do_syncfs(pg_data); /* If any tablespaces are configured, sync each of those. */ - dir = opendir(pg_tblspc); - if (dir == NULL) - pg_log_error("could not open directory \"%s\": %m", - pg_tblspc); - else + if (sync_data_files) { - while (errno = 0, (de = readdir(dir)) != NULL) + dir = opendir(pg_tblspc); + if (dir == NULL) + pg_log_error("could not open directory \"%s\": %m", + pg_tblspc); + else { - char subpath[MAXPGPATH * 2]; + while (errno = 0, (de = readdir(dir)) != NULL) + { + char subpath[MAXPGPATH * 2]; - if (strcmp(de->d_name, ".") == 0 || - strcmp(de->d_name, "..") == 0) - continue; + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; - snprintf(subpath, sizeof(subpath), "%s/%s", - pg_tblspc, de->d_name); - do_syncfs(subpath); - } + snprintf(subpath, sizeof(subpath), "%s/%s", + pg_tblspc, de->d_name); + do_syncfs(subpath); + } - if (errno) - pg_log_error("could not read directory \"%s\": %m", - pg_tblspc); + if (errno) + pg_log_error("could not read directory \"%s\": %m", + pg_tblspc); - (void) closedir(dir); + (void) closedir(dir); + } } /* If pg_wal is a symlink, process that too. */ @@ -182,15 +190,21 @@ sync_pgdata(const char *pg_data, case DATA_DIR_SYNC_METHOD_FSYNC: { + char *exclude_dir = NULL; + + if (!sync_data_files) + exclude_dir = psprintf("%s/base", pg_data); + /* * If possible, hint to the kernel that we're soon going to * fsync the data directory and its contents. */ #ifdef PG_FLUSH_DATA_WORKS - walkdir(pg_data, pre_sync_fname, false); + walkdir(pg_data, pre_sync_fname, false, exclude_dir); if (xlog_is_symlink) - walkdir(pg_wal, pre_sync_fname, false); - walkdir(pg_tblspc, pre_sync_fname, true); + walkdir(pg_wal, pre_sync_fname, false, NULL); + if (sync_data_files) + walkdir(pg_tblspc, pre_sync_fname, true, NULL); #endif /* @@ -203,10 +217,14 @@ sync_pgdata(const char *pg_data, * get fsync'd twice. That's not an expected case so we don't * worry about optimizing it. */ - walkdir(pg_data, fsync_fname, false); + walkdir(pg_data, fsync_fname, false, exclude_dir); if (xlog_is_symlink) - walkdir(pg_wal, fsync_fname, false); - walkdir(pg_tblspc, fsync_fname, true); + walkdir(pg_wal, fsync_fname, false, NULL); + if (sync_data_files) + walkdir(pg_tblspc, fsync_fname, true, NULL); + + if (exclude_dir) + pfree(exclude_dir); } break; } @@ -245,10 +263,10 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method) * fsync the data directory and its contents. */ #ifdef PG_FLUSH_DATA_WORKS - walkdir(dir, pre_sync_fname, false); + walkdir(dir, pre_sync_fname, false, NULL); #endif - walkdir(dir, fsync_fname, false); + walkdir(dir, fsync_fname, false, NULL); } break; } @@ -264,6 +282,9 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method) * ignored in subdirectories, ie we intentionally don't pass down the * process_symlinks flag to recursive calls. * + * If exclude_dir is not NULL, it specifies a directory path to skip + * processing. + * * Errors are reported but not considered fatal. * * See also walkdir in fd.c, which is a backend version of this logic. @@ -271,11 +292,15 @@ sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method) static void walkdir(const char *path, int (*action) (const char *fname, bool isdir), - bool process_symlinks) + bool process_symlinks, + const char *exclude_dir) { DIR *dir; struct dirent *de; + if (exclude_dir && strcmp(exclude_dir, path) == 0) + return; + dir = opendir(path); if (dir == NULL) { @@ -299,7 +324,7 @@ walkdir(const char *path, (*action) (subpath, false); break; case PGFILETYPE_DIR: - walkdir(subpath, action, false); + walkdir(subpath, action, false, exclude_dir); break; default: diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index a832210adc1..8274bc877ab 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -35,7 +35,7 @@ struct iovec; /* avoid including port/pg_iovec.h here */ #ifdef FRONTEND extern int fsync_fname(const char *fname, bool isdir); extern void sync_pgdata(const char *pg_data, int serverVersion, - DataDirSyncMethod sync_method); + DataDirSyncMethod sync_method, bool sync_data_files); extern void sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method); extern int durable_rename(const char *oldfile, const char *newfile); extern int fsync_parent_path(const char *fname); -- 2.39.5 (Apple Git-154)