From df5c0fb71f5487a7683f627be9b259882e5d0487 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nathan@postgresql.org>
Date: Tue, 27 Aug 2024 10:55:14 -0500
Subject: [PATCH v1 1/1] remove adaptive spins_per_delay code

---
 src/backend/storage/buffer/bufmgr.c |  3 -
 src/backend/storage/lmgr/lwlock.c   |  1 -
 src/backend/storage/lmgr/proc.c     | 17 ------
 src/backend/storage/lmgr/s_lock.c   | 95 +++--------------------------
 src/include/storage/proc.h          |  2 -
 src/include/storage/s_lock.h        |  7 ---
 6 files changed, 10 insertions(+), 115 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index fb38c7bdd4..e05e8fe424 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -5785,7 +5785,6 @@ LockBufHdr(BufferDesc *desc)
 			break;
 		perform_spin_delay(&delayStatus);
 	}
-	finish_spin_delay(&delayStatus);
 	return old_buf_state | BM_LOCKED;
 }
 
@@ -5812,8 +5811,6 @@ WaitBufHdrUnlocked(BufferDesc *buf)
 		buf_state = pg_atomic_read_u32(&buf->state);
 	}
 
-	finish_spin_delay(&delayStatus);
-
 	return buf_state;
 }
 
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index e765754d80..ac8a1f2c1d 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -883,7 +883,6 @@ LWLockWaitListLock(LWLock *lock)
 #ifdef LWLOCK_STATS
 			delays += delayStatus.delays;
 #endif
-			finish_spin_delay(&delayStatus);
 		}
 
 		/*
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index ac66da8638..0ed8e1a222 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -171,7 +171,6 @@ InitProcGlobal(void)
 	/*
 	 * Initialize the data structures.
 	 */
-	ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY;
 	dlist_init(&ProcGlobal->freeProcs);
 	dlist_init(&ProcGlobal->autovacFreeProcs);
 	dlist_init(&ProcGlobal->bgworkerFreeProcs);
@@ -321,14 +320,9 @@ InitProcess(void)
 	/*
 	 * Try to get a proc struct from the appropriate free list.  If this
 	 * fails, we must be out of PGPROC structures (not to mention semaphores).
-	 *
-	 * While we are holding the ProcStructLock, also copy the current shared
-	 * estimate of spins_per_delay to local storage.
 	 */
 	SpinLockAcquire(ProcStructLock);
 
-	set_spins_per_delay(ProcGlobal->spins_per_delay);
-
 	if (!dlist_is_empty(procgloballist))
 	{
 		MyProc = dlist_container(PGPROC, links, dlist_pop_head_node(procgloballist));
@@ -539,14 +533,9 @@ InitAuxiliaryProcess(void)
 	/*
 	 * We use the ProcStructLock to protect assignment and releasing of
 	 * AuxiliaryProcs entries.
-	 *
-	 * While we are holding the ProcStructLock, also copy the current shared
-	 * estimate of spins_per_delay to local storage.
 	 */
 	SpinLockAcquire(ProcStructLock);
 
-	set_spins_per_delay(ProcGlobal->spins_per_delay);
-
 	/*
 	 * Find a free auxproc ... *big* trouble if there isn't one ...
 	 */
@@ -942,9 +931,6 @@ ProcKill(int code, Datum arg)
 		dlist_push_tail(procgloballist, &proc->links);
 	}
 
-	/* Update shared estimate of spins_per_delay */
-	ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);
-
 	SpinLockRelease(ProcStructLock);
 
 	/*
@@ -1008,9 +994,6 @@ AuxiliaryProcKill(int code, Datum arg)
 	proc->vxid.procNumber = INVALID_PROC_NUMBER;
 	proc->vxid.lxid = InvalidTransactionId;
 
-	/* Update shared estimate of spins_per_delay */
-	ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay);
-
 	SpinLockRelease(ProcStructLock);
 }
 
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 69549a65db..2a8d07b79f 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -5,18 +5,13 @@
  *
  * When waiting for a contended spinlock we loop tightly for awhile, then
  * delay using pg_usleep() and try again.  Preferably, "awhile" should be a
- * small multiple of the maximum time we expect a spinlock to be held.  100
- * iterations seems about right as an initial guess.  However, on a
- * uniprocessor the loop is a waste of cycles, while in a multi-CPU scenario
- * it's usually better to spin a bit longer than to call the kernel, so we try
- * to adapt the spin loop count depending on whether we seem to be in a
- * uniprocessor or multiprocessor.
- *
- * Note: you might think MIN_SPINS_PER_DELAY should be just 1, but you'd
- * be wrong; there are platforms where that can result in a "stuck
- * spinlock" failure.  This has been seen particularly on Alphas; it seems
- * that the first TAS after returning from kernel space will always fail
- * on that hardware.
+ * small multiple of the maximum time we expect a spinlock to be held.  1000
+ * iterations has historically worked well for multiprocessors, where it's
+ * usually better to spin a bit longer than to call the kernel.  We used to
+ * try to adapt the spin loop count depending on whether we seem to be in a
+ * uniprocessor or multiprocessor (since the loop is a waste of cycles on a
+ * uniprocessor) but we no longer anticipate any serious usage on
+ * uniprocessor systems.
  *
  * Once we do decide to block, we use randomly increasing pg_usleep()
  * delays. The first delay is 1 msec, then the delay randomly increases to
@@ -55,8 +50,7 @@
 #include "storage/s_lock.h"
 #include "utils/wait_event.h"
 
-#define MIN_SPINS_PER_DELAY 10
-#define MAX_SPINS_PER_DELAY 1000
+#define SPINS_PER_DELAY		1000
 #define NUM_DELAYS			1000
 #define MIN_DELAY_USEC		1000L
 #define MAX_DELAY_USEC		1000000L
@@ -70,8 +64,6 @@ static uint32 local_my_wait_event_info;
 uint32	   *my_wait_event_info = &local_my_wait_event_info;
 #endif
 
-static int	spins_per_delay = DEFAULT_SPINS_PER_DELAY;
-
 
 /*
  * s_lock_stuck() - complain about a stuck spinlock
@@ -107,8 +99,6 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 		perform_spin_delay(&delayStatus);
 	}
 
-	finish_spin_delay(&delayStatus);
-
 	return delayStatus.delays;
 }
 
@@ -129,8 +119,8 @@ perform_spin_delay(SpinDelayStatus *status)
 	/* CPU-specific delay each time through the loop */
 	SPIN_DELAY();
 
-	/* Block the process every spins_per_delay tries */
-	if (++(status->spins) >= spins_per_delay)
+	/* Block the process every SPINS_PER_DELAY tries */
+	if (++(status->spins) >= SPINS_PER_DELAY)
 	{
 		if (++(status->delays) > NUM_DELAYS)
 			s_lock_stuck(status->file, status->line, status->func);
@@ -166,71 +156,6 @@ perform_spin_delay(SpinDelayStatus *status)
 	}
 }
 
-/*
- * After acquiring a spinlock, update estimates about how long to loop.
- *
- * If we were able to acquire the lock without delaying, it's a good
- * indication we are in a multiprocessor.  If we had to delay, it's a sign
- * (but not a sure thing) that we are in a uniprocessor. Hence, we
- * decrement spins_per_delay slowly when we had to delay, and increase it
- * rapidly when we didn't.  It's expected that spins_per_delay will
- * converge to the minimum value on a uniprocessor and to the maximum
- * value on a multiprocessor.
- *
- * Note: spins_per_delay is local within our current process. We want to
- * average these observations across multiple backends, since it's
- * relatively rare for this function to even get entered, and so a single
- * backend might not live long enough to converge on a good value.  That
- * is handled by the two routines below.
- */
-void
-finish_spin_delay(SpinDelayStatus *status)
-{
-	if (status->cur_delay == 0)
-	{
-		/* we never had to delay */
-		if (spins_per_delay < MAX_SPINS_PER_DELAY)
-			spins_per_delay = Min(spins_per_delay + 100, MAX_SPINS_PER_DELAY);
-	}
-	else
-	{
-		if (spins_per_delay > MIN_SPINS_PER_DELAY)
-			spins_per_delay = Max(spins_per_delay - 1, MIN_SPINS_PER_DELAY);
-	}
-}
-
-/*
- * Set local copy of spins_per_delay during backend startup.
- *
- * NB: this has to be pretty fast as it is called while holding a spinlock
- */
-void
-set_spins_per_delay(int shared_spins_per_delay)
-{
-	spins_per_delay = shared_spins_per_delay;
-}
-
-/*
- * Update shared estimate of spins_per_delay during backend exit.
- *
- * NB: this has to be pretty fast as it is called while holding a spinlock
- */
-int
-update_spins_per_delay(int shared_spins_per_delay)
-{
-	/*
-	 * We use an exponential moving average with a relatively slow adaption
-	 * rate, so that noise in any one backend's result won't affect the shared
-	 * value too much.  As long as both inputs are within the allowed range,
-	 * the result must be too, so we need not worry about clamping the result.
-	 *
-	 * We deliberately truncate rather than rounding; this is so that single
-	 * adjustments inside a backend can affect the shared estimate (see the
-	 * asymmetric adjustment rules above).
-	 */
-	return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
-}
-
 
 /*****************************************************************************/
 #if defined(S_LOCK_TEST)
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index deeb06c9e0..f001e73f6e 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -417,8 +417,6 @@ typedef struct PROC_HDR
 	Latch	   *walwriterLatch;
 	/* Checkpointer process's latch */
 	Latch	   *checkpointerLatch;
-	/* Current shared estimate of appropriate spins_per_delay value */
-	int			spins_per_delay;
 	/* Buffer id of the buffer that Startup process waits for pin on, or -1 */
 	int			startupBufferPinWaitBufId;
 } PROC_HDR;
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index e94ed5f48b..7573631731 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -710,12 +710,6 @@ extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
  */
 extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func);
 
-/* Support for dynamic adjustment of spins_per_delay */
-#define DEFAULT_SPINS_PER_DELAY  100
-
-extern void set_spins_per_delay(int shared_spins_per_delay);
-extern int	update_spins_per_delay(int shared_spins_per_delay);
-
 /*
  * Support for spin delay which is useful in various places where
  * spinlock-like procedures take place.
@@ -744,6 +738,5 @@ init_spin_delay(SpinDelayStatus *status,
 
 #define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
 extern void perform_spin_delay(SpinDelayStatus *status);
-extern void finish_spin_delay(SpinDelayStatus *status);
 
 #endif	 /* S_LOCK_H */
-- 
2.39.3 (Apple Git-146)