From 9dcf4a45b6ed7d8fca1a1cd6782f11dff3a84406 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 17 Jul 2023 09:20:05 -0700
Subject: [PATCH v1] Caller specified wait events for LWLockWaitForVar(),
 LWLockAcquireOrWait()

Reporting waits within those functions as LWLock wait events is misleading, as
we do not actually wait for the lock themselves.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch:
---
 src/include/storage/lwlock.h                  |  6 ++++--
 src/backend/access/transam/xlog.c             |  6 ++++--
 src/backend/storage/lmgr/lwlock.c             | 19 +++++++++++++------
 .../utils/activity/wait_event_names.txt       |  2 ++
 4 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 34169e5889e..0f345076f41 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -127,7 +127,8 @@ extern PGDLLIMPORT bool Trace_lwlocks;
 
 extern bool LWLockAcquire(LWLock *lock, LWLockMode mode);
 extern bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode);
-extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode);
+extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode,
+								uint32 wait_event_info);
 extern void LWLockRelease(LWLock *lock);
 extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val);
 extern void LWLockReleaseAll(void);
@@ -135,7 +136,8 @@ extern bool LWLockHeldByMe(LWLock *lock);
 extern bool LWLockAnyHeldByMe(LWLock *lock, int nlocks, size_t stride);
 extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode);
 
-extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval);
+extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval,
+							 uint32 wait_event_info);
 extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 val);
 
 extern Size LWLockShmemSize(void);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8b0710abe60..e406634c1cf 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1498,7 +1498,8 @@ WaitXLogInsertionsToFinish(XLogRecPtr upto)
 			 */
 			if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
 								 &WALInsertLocks[i].l.insertingAt,
-								 insertingat, &insertingat))
+								 insertingat, &insertingat,
+								 WAIT_EVENT_WAL_WAIT_INSERT))
 			{
 				/* the lock was free, so no insertion in progress */
 				insertingat = InvalidXLogRecPtr;
@@ -2586,7 +2587,8 @@ XLogFlush(XLogRecPtr record)
 		 * helps to maintain a good rate of group committing when the system
 		 * is bottlenecked by the speed of fsyncing.
 		 */
-		if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
+		if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE,
+								 WAIT_EVENT_WAL_WAIT_WRITE))
 		{
 			/*
 			 * The lock is now free, but we didn't acquire it yet. Before we
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 01d738f306b..549b061b533 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -1418,9 +1418,12 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
  * backends as a side-effect.  Those other backends need to wait until the
  * flush finishes, but don't need to acquire the lock anymore.  They can just
  * wake up, observe that their records have already been flushed, and return.
+ *
+ * As this is not used to *wait* for lwlock acquisition, the caller has to
+ * provide a wait event to be reported.
  */
 bool
-LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
+LWLockAcquireOrWait(LWLock *lock, LWLockMode mode, uint32 wait_event_info)
 {
 	PGPROC	   *proc = MyProc;
 	bool		mustwait;
@@ -1470,7 +1473,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
 			lwstats->block_count++;
 #endif
 
-			LWLockReportWaitStart(lock);
+			pgstat_report_wait_start(wait_event_info);
 			if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
 				TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);
 
@@ -1492,7 +1495,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
 #endif
 			if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
 				TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
-			LWLockReportWaitEnd();
+			pgstat_report_wait_end();
 
 			LOG_LWDEBUG("LWLockAcquireOrWait", lock, "awakened");
 		}
@@ -1605,9 +1608,13 @@ LWLockConflictsWithVar(LWLock *lock,
  *
  * Note: this function ignores shared lock holders; if the lock is held
  * in shared mode, returns 'true'.
+ *
+ * As this is not used to wait for lwlocks themselves, the caller has to
+ * provide a wait event to be reported.
  */
 bool
-LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
+LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval,
+				 uint32 wait_event_info)
 {
 	PGPROC	   *proc = MyProc;
 	int			extraWaits = 0;
@@ -1686,7 +1693,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
 		lwstats->block_count++;
 #endif
 
-		LWLockReportWaitStart(lock);
+		pgstat_report_wait_start(wait_event_info);
 		if (TRACE_POSTGRESQL_LWLOCK_WAIT_START_ENABLED())
 			TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), LW_EXCLUSIVE);
 
@@ -1709,7 +1716,7 @@ LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval)
 
 		if (TRACE_POSTGRESQL_LWLOCK_WAIT_DONE_ENABLED())
 			TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), LW_EXCLUSIVE);
-		LWLockReportWaitEnd();
+		pgstat_report_wait_end();
 
 		LOG_LWDEBUG("LWLockWaitForVar", lock, "awakened");
 
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 3fabad96d96..a9358538721 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -141,6 +141,8 @@ WAIT_EVENT_SYNC_REP	SyncRep	"Waiting for confirmation from a remote server durin
 WAIT_EVENT_WAL_RECEIVER_EXIT	WalReceiverExit	"Waiting for the WAL receiver to exit."
 WAIT_EVENT_WAL_RECEIVER_WAIT_START	WalReceiverWaitStart	"Waiting for startup process to send initial data for streaming replication."
 WAIT_EVENT_XACT_GROUP_UPDATE	XactGroupUpdate	"Waiting for the group leader to update transaction status at end of a parallel operation."
+WAIT_EVENT_WAL_WAIT_INSERT	WALWaitInsert	"Waiting for WAL record to be copied into buffers."
+WAIT_EVENT_WAL_WAIT_WRITE	WALWaitWrite	"Waiting for WAL buffers to be written or flushed to disk."
 
 
 #
-- 
2.38.0

