From df5c0fb71f5487a7683f627be9b259882e5d0487 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 27 Aug 2024 10:55:14 -0500 Subject: [PATCH v1 1/1] remove adaptive spins_per_delay code --- src/backend/storage/buffer/bufmgr.c | 3 - src/backend/storage/lmgr/lwlock.c | 1 - src/backend/storage/lmgr/proc.c | 17 ------ src/backend/storage/lmgr/s_lock.c | 95 +++-------------------------- src/include/storage/proc.h | 2 - src/include/storage/s_lock.h | 7 --- 6 files changed, 10 insertions(+), 115 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index fb38c7bdd4..e05e8fe424 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -5785,7 +5785,6 @@ LockBufHdr(BufferDesc *desc) break; perform_spin_delay(&delayStatus); } - finish_spin_delay(&delayStatus); return old_buf_state | BM_LOCKED; } @@ -5812,8 +5811,6 @@ WaitBufHdrUnlocked(BufferDesc *buf) buf_state = pg_atomic_read_u32(&buf->state); } - finish_spin_delay(&delayStatus); - return buf_state; } diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index e765754d80..ac8a1f2c1d 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -883,7 +883,6 @@ LWLockWaitListLock(LWLock *lock) #ifdef LWLOCK_STATS delays += delayStatus.delays; #endif - finish_spin_delay(&delayStatus); } /* diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index ac66da8638..0ed8e1a222 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -171,7 +171,6 @@ InitProcGlobal(void) /* * Initialize the data structures. */ - ProcGlobal->spins_per_delay = DEFAULT_SPINS_PER_DELAY; dlist_init(&ProcGlobal->freeProcs); dlist_init(&ProcGlobal->autovacFreeProcs); dlist_init(&ProcGlobal->bgworkerFreeProcs); @@ -321,14 +320,9 @@ InitProcess(void) /* * Try to get a proc struct from the appropriate free list. If this * fails, we must be out of PGPROC structures (not to mention semaphores). - * - * While we are holding the ProcStructLock, also copy the current shared - * estimate of spins_per_delay to local storage. */ SpinLockAcquire(ProcStructLock); - set_spins_per_delay(ProcGlobal->spins_per_delay); - if (!dlist_is_empty(procgloballist)) { MyProc = dlist_container(PGPROC, links, dlist_pop_head_node(procgloballist)); @@ -539,14 +533,9 @@ InitAuxiliaryProcess(void) /* * We use the ProcStructLock to protect assignment and releasing of * AuxiliaryProcs entries. - * - * While we are holding the ProcStructLock, also copy the current shared - * estimate of spins_per_delay to local storage. */ SpinLockAcquire(ProcStructLock); - set_spins_per_delay(ProcGlobal->spins_per_delay); - /* * Find a free auxproc ... *big* trouble if there isn't one ... */ @@ -942,9 +931,6 @@ ProcKill(int code, Datum arg) dlist_push_tail(procgloballist, &proc->links); } - /* Update shared estimate of spins_per_delay */ - ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay); - SpinLockRelease(ProcStructLock); /* @@ -1008,9 +994,6 @@ AuxiliaryProcKill(int code, Datum arg) proc->vxid.procNumber = INVALID_PROC_NUMBER; proc->vxid.lxid = InvalidTransactionId; - /* Update shared estimate of spins_per_delay */ - ProcGlobal->spins_per_delay = update_spins_per_delay(ProcGlobal->spins_per_delay); - SpinLockRelease(ProcStructLock); } diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c index 69549a65db..2a8d07b79f 100644 --- a/src/backend/storage/lmgr/s_lock.c +++ b/src/backend/storage/lmgr/s_lock.c @@ -5,18 +5,13 @@ * * When waiting for a contended spinlock we loop tightly for awhile, then * delay using pg_usleep() and try again. Preferably, "awhile" should be a - * small multiple of the maximum time we expect a spinlock to be held. 100 - * iterations seems about right as an initial guess. However, on a - * uniprocessor the loop is a waste of cycles, while in a multi-CPU scenario - * it's usually better to spin a bit longer than to call the kernel, so we try - * to adapt the spin loop count depending on whether we seem to be in a - * uniprocessor or multiprocessor. - * - * Note: you might think MIN_SPINS_PER_DELAY should be just 1, but you'd - * be wrong; there are platforms where that can result in a "stuck - * spinlock" failure. This has been seen particularly on Alphas; it seems - * that the first TAS after returning from kernel space will always fail - * on that hardware. + * small multiple of the maximum time we expect a spinlock to be held. 1000 + * iterations has historically worked well for multiprocessors, where it's + * usually better to spin a bit longer than to call the kernel. We used to + * try to adapt the spin loop count depending on whether we seem to be in a + * uniprocessor or multiprocessor (since the loop is a waste of cycles on a + * uniprocessor) but we no longer anticipate any serious usage on + * uniprocessor systems. * * Once we do decide to block, we use randomly increasing pg_usleep() * delays. The first delay is 1 msec, then the delay randomly increases to @@ -55,8 +50,7 @@ #include "storage/s_lock.h" #include "utils/wait_event.h" -#define MIN_SPINS_PER_DELAY 10 -#define MAX_SPINS_PER_DELAY 1000 +#define SPINS_PER_DELAY 1000 #define NUM_DELAYS 1000 #define MIN_DELAY_USEC 1000L #define MAX_DELAY_USEC 1000000L @@ -70,8 +64,6 @@ static uint32 local_my_wait_event_info; uint32 *my_wait_event_info = &local_my_wait_event_info; #endif -static int spins_per_delay = DEFAULT_SPINS_PER_DELAY; - /* * s_lock_stuck() - complain about a stuck spinlock @@ -107,8 +99,6 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func) perform_spin_delay(&delayStatus); } - finish_spin_delay(&delayStatus); - return delayStatus.delays; } @@ -129,8 +119,8 @@ perform_spin_delay(SpinDelayStatus *status) /* CPU-specific delay each time through the loop */ SPIN_DELAY(); - /* Block the process every spins_per_delay tries */ - if (++(status->spins) >= spins_per_delay) + /* Block the process every SPINS_PER_DELAY tries */ + if (++(status->spins) >= SPINS_PER_DELAY) { if (++(status->delays) > NUM_DELAYS) s_lock_stuck(status->file, status->line, status->func); @@ -166,71 +156,6 @@ perform_spin_delay(SpinDelayStatus *status) } } -/* - * After acquiring a spinlock, update estimates about how long to loop. - * - * If we were able to acquire the lock without delaying, it's a good - * indication we are in a multiprocessor. If we had to delay, it's a sign - * (but not a sure thing) that we are in a uniprocessor. Hence, we - * decrement spins_per_delay slowly when we had to delay, and increase it - * rapidly when we didn't. It's expected that spins_per_delay will - * converge to the minimum value on a uniprocessor and to the maximum - * value on a multiprocessor. - * - * Note: spins_per_delay is local within our current process. We want to - * average these observations across multiple backends, since it's - * relatively rare for this function to even get entered, and so a single - * backend might not live long enough to converge on a good value. That - * is handled by the two routines below. - */ -void -finish_spin_delay(SpinDelayStatus *status) -{ - if (status->cur_delay == 0) - { - /* we never had to delay */ - if (spins_per_delay < MAX_SPINS_PER_DELAY) - spins_per_delay = Min(spins_per_delay + 100, MAX_SPINS_PER_DELAY); - } - else - { - if (spins_per_delay > MIN_SPINS_PER_DELAY) - spins_per_delay = Max(spins_per_delay - 1, MIN_SPINS_PER_DELAY); - } -} - -/* - * Set local copy of spins_per_delay during backend startup. - * - * NB: this has to be pretty fast as it is called while holding a spinlock - */ -void -set_spins_per_delay(int shared_spins_per_delay) -{ - spins_per_delay = shared_spins_per_delay; -} - -/* - * Update shared estimate of spins_per_delay during backend exit. - * - * NB: this has to be pretty fast as it is called while holding a spinlock - */ -int -update_spins_per_delay(int shared_spins_per_delay) -{ - /* - * We use an exponential moving average with a relatively slow adaption - * rate, so that noise in any one backend's result won't affect the shared - * value too much. As long as both inputs are within the allowed range, - * the result must be too, so we need not worry about clamping the result. - * - * We deliberately truncate rather than rounding; this is so that single - * adjustments inside a backend can affect the shared estimate (see the - * asymmetric adjustment rules above). - */ - return (shared_spins_per_delay * 15 + spins_per_delay) / 16; -} - /*****************************************************************************/ #if defined(S_LOCK_TEST) diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index deeb06c9e0..f001e73f6e 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -417,8 +417,6 @@ typedef struct PROC_HDR Latch *walwriterLatch; /* Checkpointer process's latch */ Latch *checkpointerLatch; - /* Current shared estimate of appropriate spins_per_delay value */ - int spins_per_delay; /* Buffer id of the buffer that Startup process waits for pin on, or -1 */ int startupBufferPinWaitBufId; } PROC_HDR; diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h index e94ed5f48b..7573631731 100644 --- a/src/include/storage/s_lock.h +++ b/src/include/storage/s_lock.h @@ -710,12 +710,6 @@ extern int tas(volatile slock_t *lock); /* in port/.../tas.s, or */ extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func); -/* Support for dynamic adjustment of spins_per_delay */ -#define DEFAULT_SPINS_PER_DELAY 100 - -extern void set_spins_per_delay(int shared_spins_per_delay); -extern int update_spins_per_delay(int shared_spins_per_delay); - /* * Support for spin delay which is useful in various places where * spinlock-like procedures take place. @@ -744,6 +738,5 @@ init_spin_delay(SpinDelayStatus *status, #define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__) extern void perform_spin_delay(SpinDelayStatus *status); -extern void finish_spin_delay(SpinDelayStatus *status); #endif /* S_LOCK_H */ -- 2.39.3 (Apple Git-146)