From a416c09f33c429aea0fd2b3cd1fa03878c520d8d Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 21 Feb 2022 15:41:23 -0800
Subject: [PATCH v1 2/4] WIP: AssertFileNotDeleted(fd).

Author:
Reviewed-By:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/include/storage/fd.h              |  1 +
 src/backend/access/transam/slru.c     |  2 +
 src/backend/access/transam/xlog.c     |  2 +
 src/backend/replication/walreceiver.c |  2 +
 src/backend/storage/file/fd.c         | 83 +++++++++++++++++++++++++++
 5 files changed, 90 insertions(+)

diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 29209e27243..3bb8f669b64 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -191,6 +191,7 @@ extern int	durable_rename_excl(const char *oldfile, const char *newfile, int log
 extern void SyncDataDirectory(void);
 extern int	data_sync_elevel(int elevel);
 
+extern void AssertFileNotDeleted(int fd);
 /* Filename components */
 #define PG_TEMP_FILES_DIR "pgsql_tmp"
 #define PG_TEMP_FILE_PREFIX "pgsql_tmp"
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 30a476ed5dc..e4907ea5356 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -871,6 +871,8 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata)
 		}
 	}
 
+	AssertFileNotDeleted(fd);
+
 	errno = 0;
 	pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
 	if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 0d2bd7a3576..e1225bc0627 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2198,6 +2198,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 				if (track_wal_io_timing)
 					INSTR_TIME_SET_CURRENT(start);
 
+				AssertFileNotDeleted(openLogFile);
+
 				pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
 				written = pg_pwrite(openLogFile, from, nleft, startoffset);
 				pgstat_report_wait_end();
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index ceaff097b97..23bf982b545 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -912,6 +912,8 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli)
 		else
 			segbytes = nbytes;
 
+		AssertFileNotDeleted(recvFile);
+
 		/* OK to write the logs */
 		errno = 0;
 
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 14b77f28617..123815e4a80 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -93,6 +93,7 @@
 #include "common/file_perm.h"
 #include "common/file_utils.h"
 #include "common/pg_prng.h"
+#include "common/string.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "port/pg_iovec.h"
@@ -2073,6 +2074,8 @@ FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info)
 	if (returnCode < 0)
 		return returnCode;
 
+	AssertFileNotDeleted(VfdCache[file].fd);
+
 	pgstat_report_wait_start(wait_event_info);
 	returnCode = posix_fadvise(VfdCache[file].fd, offset, amount,
 							   POSIX_FADV_WILLNEED);
@@ -2103,6 +2106,11 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info)
 	if (returnCode < 0)
 		return;
 
+	/*
+	 * XXX: can't assert non-use of fd right now,
+	 * ScheduleBufferTagForWriteback can end up writing at a later time.
+	 */
+
 	pgstat_report_wait_start(wait_event_info);
 	pg_flush_data(VfdCache[file].fd, offset, nbytes);
 	pgstat_report_wait_end();
@@ -2128,6 +2136,8 @@ FileRead(File file, char *buffer, int amount, off_t offset,
 
 	vfdP = &VfdCache[file];
 
+	AssertFileNotDeleted(vfdP->fd);
+
 retry:
 	pgstat_report_wait_start(wait_event_info);
 	returnCode = pg_pread(vfdP->fd, buffer, amount, offset);
@@ -2184,6 +2194,8 @@ FileWrite(File file, char *buffer, int amount, off_t offset,
 
 	vfdP = &VfdCache[file];
 
+	AssertFileNotDeleted(vfdP->fd);
+
 	/*
 	 * If enforcing temp_file_limit and it's a temp file, check to see if the
 	 * write would overrun temp_file_limit, and throw error if so.  Note: it's
@@ -2276,6 +2288,8 @@ FileSync(File file, uint32 wait_event_info)
 	if (returnCode < 0)
 		return returnCode;
 
+	AssertFileNotDeleted(VfdCache[file].fd);
+
 	pgstat_report_wait_start(wait_event_info);
 	returnCode = pg_fsync(VfdCache[file].fd);
 	pgstat_report_wait_end();
@@ -2297,6 +2311,8 @@ FileSize(File file)
 			return (off_t) -1;
 	}
 
+	AssertFileNotDeleted(VfdCache[file].fd);
+
 	return lseek(VfdCache[file].fd, 0, SEEK_END);
 }
 
@@ -2314,6 +2330,8 @@ FileTruncate(File file, off_t offset, uint32 wait_event_info)
 	if (returnCode < 0)
 		return returnCode;
 
+	AssertFileNotDeleted(VfdCache[file].fd);
+
 	pgstat_report_wait_start(wait_event_info);
 	returnCode = ftruncate(VfdCache[file].fd, offset);
 	pgstat_report_wait_end();
@@ -3828,6 +3846,71 @@ data_sync_elevel(int elevel)
 	return data_sync_retry ? elevel : PANIC;
 }
 
+void
+AssertFileNotDeleted(int fd)
+{
+	struct stat statbuf;
+	int			ret;
+	char		deleted_filename[MAXPGPATH];
+	bool		have_filename = false;
+
+	/*
+	 * fstat shouldn't fail, so it seems ok to error out, even if it's
+	 * just a debugging aid.
+	 *
+	 * XXX: Figure out which operating systems this works on.
+	 */
+	ret = fstat(fd, &statbuf);
+	if (ret != 0)
+		elog(ERROR, "fstat failed: %m");
+
+	/*
+	 * On several operating systems st_nlink == 0 indicates that the file has
+	 * been deleted. On some OS/filesystem combinations a deleted file may
+	 * still show up with nlink > 0, but nlink == 0 shouldn't be returned
+	 * spuriously. Hardlinks obviously can prevent this from working, but we
+	 * don't expect any, so that's fine.
+	 */
+	if (statbuf.st_nlink > 0)
+		return;
+
+#if defined(__linux__)
+	{
+		char        path[MAXPGPATH];
+		const char *const deleted_suffix = " (deleted)";
+
+		/*
+		 * On linux we can figure out what the file name
+		 */
+		sprintf(path, "/proc/self/fd/%d", fd);
+		ret = readlink(path, deleted_filename, sizeof(deleted_filename) - 1);
+
+		// FIXME: Tolerate most errors here
+		if (ret == -1)
+			elog(PANIC, "readlink failed: %m");
+
+		/* readlink doesn't null terminate */
+		deleted_filename[ret] = 0;
+		have_filename = true;
+
+		/* chop off the " (deleted)" */
+		if (pg_str_endswith(deleted_filename, deleted_suffix))
+		{
+			Size		len = strlen(deleted_filename);
+
+			deleted_filename[len - strlen(deleted_suffix)] = 0;
+		}
+	}
+#endif
+
+	if (have_filename)
+		elog(PANIC, "file descriptor %d for file %s is of a deleted file",
+			 fd, deleted_filename);
+	else
+		elog(PANIC, "file descriptor %d is of a deleted file",
+			 fd);
+}
+
 /*
  * A convenience wrapper for pg_pwritev() that retries on partial write.  If an
  * error is returned, it is unspecified how much has been written.
-- 
2.34.0

