From 3781795f9b4e448df6bdd24d5cd7c0743b5e2944 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <bossartn@amazon.com>
Date: Wed, 10 Nov 2021 18:35:14 +0000
Subject: [PATCH v8 1/2] Move WAL segment creation logic to its own function.

---
 src/backend/access/transam/xlog.c | 103 +--------------------------
 src/backend/storage/file/fd.c     | 114 ++++++++++++++++++++++++++++++
 src/include/storage/fd.h          |   1 +
 3 files changed, 116 insertions(+), 102 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a7814d4019..87d71e2008 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2918,11 +2918,9 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 					 bool *added, char *path)
 {
 	char		tmppath[MAXPGPATH];
-	PGAlignedXLogBlock zbuffer;
 	XLogSegNo	installed_segno;
 	XLogSegNo	max_segno;
 	int			fd;
-	int			save_errno;
 
 	Assert(logtli != 0);
 
@@ -2952,106 +2950,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	elog(DEBUG2, "creating and filling new WAL file");
 
 	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
-
-	unlink(tmppath);
-
-	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
-	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
-	if (fd < 0)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not create file \"%s\": %m", tmppath)));
-
-	memset(zbuffer.data, 0, XLOG_BLCKSZ);
-
-	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
-	save_errno = 0;
-	if (wal_init_zero)
-	{
-		struct iovec iov[PG_IOV_MAX];
-		int			blocks;
-
-		/*
-		 * Zero-fill the file.  With this setting, we do this the hard way to
-		 * ensure that all the file space has really been allocated.  On
-		 * platforms that allow "holes" in files, just seeking to the end
-		 * doesn't allocate intermediate space.  This way, we know that we
-		 * have all the space and (after the fsync below) that all the
-		 * indirect blocks are down on disk.  Therefore, fdatasync(2) or
-		 * O_DSYNC will be sufficient to sync future writes to the log file.
-		 */
-
-		/* Prepare to write out a lot of copies of our zero buffer at once. */
-		for (int i = 0; i < lengthof(iov); ++i)
-		{
-			iov[i].iov_base = zbuffer.data;
-			iov[i].iov_len = XLOG_BLCKSZ;
-		}
-
-		/* Loop, writing as many blocks as we can for each system call. */
-		blocks = wal_segment_size / XLOG_BLCKSZ;
-		for (int i = 0; i < blocks;)
-		{
-			int			iovcnt = Min(blocks - i, lengthof(iov));
-			off_t		offset = i * XLOG_BLCKSZ;
-
-			if (pg_pwritev_with_retry(fd, iov, iovcnt, offset) < 0)
-			{
-				save_errno = errno;
-				break;
-			}
-
-			i += iovcnt;
-		}
-	}
-	else
-	{
-		/*
-		 * Otherwise, seeking to the end and writing a solitary byte is
-		 * enough.
-		 */
-		errno = 0;
-		if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1)
-		{
-			/* if write didn't set errno, assume no disk space */
-			save_errno = errno ? errno : ENOSPC;
-		}
-	}
-	pgstat_report_wait_end();
-
-	if (save_errno)
-	{
-		/*
-		 * If we fail to make the file, delete it to release disk space
-		 */
-		unlink(tmppath);
-
-		close(fd);
-
-		errno = save_errno;
-
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not write to file \"%s\": %m", tmppath)));
-	}
-
-	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
-	if (pg_fsync(fd) != 0)
-	{
-		int			save_errno = errno;
-
-		close(fd);
-		errno = save_errno;
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not fsync file \"%s\": %m", tmppath)));
-	}
-	pgstat_report_wait_end();
-
-	if (close(fd) != 0)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not close file \"%s\": %m", tmppath)));
+	CreateEmptyWalSegment(tmppath);
 
 	/*
 	 * Now move the segment into place with its final name.  Cope with
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 14b77f2861..4efc46460e 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -3891,3 +3891,117 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
 
 	return sum;
 }
+
+/*
+ * CreateEmptyWalSegment
+ *
+ * Create a new file that can be used as a new WAL segment.  The caller is
+ * responsible for installing the new file in pg_wal.
+ */
+void
+CreateEmptyWalSegment(const char *path)
+{
+	PGAlignedXLogBlock zbuffer;
+	int			fd;
+	int			save_errno;
+
+	unlink(path);
+
+	/* do not use get_sync_bit() here --- want to fsync only at end of fill */
+	fd = BasicOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
+	if (fd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not create file \"%s\": %m", path)));
+
+	memset(zbuffer.data, 0, XLOG_BLCKSZ);
+
+	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
+	save_errno = 0;
+	if (wal_init_zero)
+	{
+		struct iovec iov[PG_IOV_MAX];
+		int			blocks;
+
+		/*
+		 * Zero-fill the file.  With this setting, we do this the hard way to
+		 * ensure that all the file space has really been allocated.  On
+		 * platforms that allow "holes" in files, just seeking to the end
+		 * doesn't allocate intermediate space.  This way, we know that we
+		 * have all the space and (after the fsync below) that all the
+		 * indirect blocks are down on disk.  Therefore, fdatasync(2) or
+		 * O_DSYNC will be sufficient to sync future writes to the log file.
+		 */
+
+		/* Prepare to write out a lot of copies of our zero buffer at once. */
+		for (int i = 0; i < lengthof(iov); ++i)
+		{
+			iov[i].iov_base = zbuffer.data;
+			iov[i].iov_len = XLOG_BLCKSZ;
+		}
+
+		/* Loop, writing as many blocks as we can for each system call. */
+		blocks = wal_segment_size / XLOG_BLCKSZ;
+		for (int i = 0; i < blocks;)
+		{
+			int			iovcnt = Min(blocks - i, lengthof(iov));
+			off_t		offset = i * XLOG_BLCKSZ;
+
+			if (pg_pwritev_with_retry(fd, iov, iovcnt, offset) < 0)
+			{
+				save_errno = errno;
+				break;
+			}
+
+			i += iovcnt;
+		}
+	}
+	else
+	{
+		/*
+		 * Otherwise, seeking to the end and writing a solitary byte is
+		 * enough.
+		 */
+		errno = 0;
+		if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1)
+		{
+			/* if write didn't set errno, assume no disk space */
+			save_errno = errno ? errno : ENOSPC;
+		}
+	}
+	pgstat_report_wait_end();
+
+	if (save_errno)
+	{
+		/*
+		 * If we fail to make the file, delete it to release disk space
+		 */
+		unlink(path);
+
+		close(fd);
+
+		errno = save_errno;
+
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write to file \"%s\": %m", path)));
+	}
+
+	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
+	if (pg_fsync(fd) != 0)
+	{
+		int			save_errno = errno;
+
+		close(fd);
+		errno = save_errno;
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync file \"%s\": %m", path)));
+	}
+	pgstat_report_wait_end();
+
+	if (close(fd) != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\": %m", path)));
+}
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 69549b000f..6bb9e3525b 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -190,6 +190,7 @@ extern int	durable_unlink(const char *fname, int loglevel);
 extern int	durable_rename_excl(const char *oldfile, const char *newfile, int loglevel);
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
+extern void CreateEmptyWalSegment(const char *path);
 
 /* Filename components */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
-- 
2.25.1

