From a58a6bb70785a557a150680b64cd8ce78ce1b73a Mon Sep 17 00:00:00 2001
From: Nathan Bossart <bossartn@amazon.com>
Date: Sun, 5 Dec 2021 22:02:40 -0800
Subject: [PATCH v6 5/6] Move removal of old serialized snapshots to custodian.

This was only done during checkpoints because it was a convenient
place to put it.  However, if there are many snapshots to remove,
it can significantly extend checkpoint time.  To avoid this, move
this work to the newly-introduced custodian process.
---
 src/backend/access/transam/xlog.c           |  6 ++++--
 src/backend/postmaster/custodian.c          | 12 ++++++++++++
 src/backend/replication/logical/snapbuild.c |  9 ++++-----
 src/include/postmaster/custodian.h          |  3 ++-
 src/include/replication/snapbuild.h         |  2 +-
 5 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8764084e21..621bda0844 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -75,13 +75,13 @@
 #include "port/atomics.h"
 #include "port/pg_iovec.h"
 #include "postmaster/bgwriter.h"
+#include "postmaster/custodian.h"
 #include "postmaster/startup.h"
 #include "postmaster/walwriter.h"
 #include "replication/basebackup.h"
 #include "replication/logical.h"
 #include "replication/origin.h"
 #include "replication/slot.h"
-#include "replication/snapbuild.h"
 #include "replication/walreceiver.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
@@ -6840,10 +6840,12 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
 {
 	CheckPointRelationMap();
 	CheckPointReplicationSlots();
-	CheckPointSnapBuild();
 	CheckPointLogicalRewriteHeap();
 	CheckPointReplicationOrigin();
 
+	/* tasks offloaded to custodian */
+	RequestCustodian(CUSTODIAN_REMOVE_SERIALIZED_SNAPSHOTS);
+
 	/* Write out all dirty data in SLRUs and the main buffer pool */
 	TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
 	CheckpointStats.ckpt_write_t = GetCurrentTimestamp();
diff --git a/src/backend/postmaster/custodian.c b/src/backend/postmaster/custodian.c
index a0ec94ea5c..861de882c6 100644
--- a/src/backend/postmaster/custodian.c
+++ b/src/backend/postmaster/custodian.c
@@ -31,6 +31,7 @@
 #include "pgstat.h"
 #include "postmaster/custodian.h"
 #include "postmaster/interrupt.h"
+#include "replication/snapbuild.h"
 #include "storage/bufmgr.h"
 #include "storage/condition_variable.h"
 #include "storage/fd.h"
@@ -210,6 +211,17 @@ CustodianMain(void)
 		if (flags & CUSTODIAN_REMOVE_TEMP_FILES)
 			RemovePgTempFiles(false, false);
 
+		/*
+		 * Remove serialized snapshots that are no longer required by any
+		 * logical replication slot.
+		 *
+		 * It is not important for these to be removed in single-user mode, so
+		 * we don't need any extra handling outside of the custodian process for
+		 * this.
+		 */
+		if (flags & CUSTODIAN_REMOVE_SERIALIZED_SNAPSHOTS)
+			RemoveOldSerializedSnapshots();
+
 		/* Calculate how long to sleep */
 		end_time = (pg_time_t) time(NULL);
 		elapsed_secs = end_time - start_time;
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 1119a12db9..42eb064bd8 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1911,14 +1911,13 @@ snapshot_not_interesting:
 
 /*
  * Remove all serialized snapshots that are not required anymore because no
- * slot can need them. This doesn't actually have to run during a checkpoint,
- * but it's a convenient point to schedule this.
+ * slot can need them.
  *
- * NB: We run this during checkpoints even if logical decoding is disabled so
- * we cleanup old slots at some point after it got disabled.
+ * NB: We run this even if logical decoding is disabled so we cleanup old slots
+ * at some point after it got disabled.
  */
 void
-CheckPointSnapBuild(void)
+RemoveOldSerializedSnapshots(void)
 {
 	XLogRecPtr	cutoff;
 	XLogRecPtr	redo;
diff --git a/src/include/postmaster/custodian.h b/src/include/postmaster/custodian.h
index f6dcd9ddef..769c07f2c9 100644
--- a/src/include/postmaster/custodian.h
+++ b/src/include/postmaster/custodian.h
@@ -18,6 +18,7 @@ extern void CustodianShmemInit(void);
 extern void RequestCustodian(int flags);
 
 /* flags for RequestCustodian() */
-#define CUSTODIAN_REMOVE_TEMP_FILES		0x0001
+#define CUSTODIAN_REMOVE_TEMP_FILES				0x0001
+#define CUSTODIAN_REMOVE_SERIALIZED_SNAPSHOTS	0x0002
 
 #endif						/* _CUSTODIAN_H */
diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h
index d179251aad..55a2beb434 100644
--- a/src/include/replication/snapbuild.h
+++ b/src/include/replication/snapbuild.h
@@ -57,7 +57,7 @@ struct ReorderBuffer;
 struct xl_heap_new_cid;
 struct xl_running_xacts;
 
-extern void CheckPointSnapBuild(void);
+extern void RemoveOldSerializedSnapshots(void);
 
 extern SnapBuild *AllocateSnapshotBuilder(struct ReorderBuffer *cache,
 										  TransactionId xmin_horizon, XLogRecPtr start_lsn,
-- 
2.25.1

