From 2f47436931848e63b1ca26658441cafe891bfb0e Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Tue, 11 Mar 2025 02:53:54 +0900 Subject: [PATCH v9] Add GUC option to log lock acquisition failures This commit introduces a new GUC, log_lock_failure, which controls whether a log message is generated when a lock acquisition fails. Currently, it supports logging lock failures caused by SELECT ... NOWAIT. The log message includes details about all lock holders and waiters for the lock that couldn't be acquired, helping users analyze and diagnose the causes of lock acquisition failures. --- doc/src/sgml/config.sgml | 19 +++ src/backend/access/heap/heapam.c | 29 ++-- src/backend/access/heap/heapam_handler.c | 4 +- src/backend/storage/lmgr/lmgr.c | 33 +++-- src/backend/storage/lmgr/lock.c | 47 ++++++- src/backend/storage/lmgr/proc.c | 129 +++++++++++------- src/backend/utils/misc/guc_tables.c | 9 ++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/storage/lmgr.h | 4 +- src/include/storage/lock.h | 4 +- src/include/storage/proc.h | 4 + 11 files changed, 198 insertions(+), 85 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index d2fa5f7d1a9..dd14bac6a4d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7678,6 +7678,25 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' + + log_lock_failure (boolean) + + log_lock_failure configuration parameter + + + + + Controls whether a log message is produced when a lock acquisition + fails. This is useful for analyzing the causes of lock failures. + Currently, only lock failures due to SELECT NOWAIT + is supported. The default is off. Only superusers + and users with the appropriate SET privilege can change + this setting. + + + + + log_recovery_conflict_waits (boolean) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index fa7935a0ed3..06aa5e1eafa 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -98,7 +98,7 @@ static void MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 in Relation rel, ItemPointer ctid, XLTW_Oper oper, int *remaining); static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, - uint16 infomask, Relation rel, int *remaining); + uint16 infomask, Relation rel, int *remaining, bool logLockFailure); static void index_delete_sort(TM_IndexDeleteOp *delstate); static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate); static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup); @@ -162,8 +162,8 @@ static const struct LockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock) #define UnlockTupleTuplock(rel, tup, mode) \ UnlockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock) -#define ConditionalLockTupleTuplock(rel, tup, mode) \ - ConditionalLockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock) +#define ConditionalLockTupleTuplock(rel, tup, mode, log) \ + ConditionalLockTuple((rel), (tup), tupleLockExtraInfo[mode].hwlock, (log)) #ifdef USE_PREFETCH /* @@ -4894,7 +4894,7 @@ l3: case LockWaitSkip: if (!ConditionalMultiXactIdWait((MultiXactId) xwait, status, infomask, relation, - NULL)) + NULL, false)) { result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ @@ -4905,7 +4905,7 @@ l3: case LockWaitError: if (!ConditionalMultiXactIdWait((MultiXactId) xwait, status, infomask, relation, - NULL)) + NULL, log_lock_failure)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", @@ -4934,7 +4934,7 @@ l3: XLTW_Lock); break; case LockWaitSkip: - if (!ConditionalXactLockTableWait(xwait)) + if (!ConditionalXactLockTableWait(xwait, false)) { result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ @@ -4943,7 +4943,7 @@ l3: } break; case LockWaitError: - if (!ConditionalXactLockTableWait(xwait)) + if (!ConditionalXactLockTableWait(xwait, log_lock_failure)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", @@ -5203,12 +5203,12 @@ heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, break; case LockWaitSkip: - if (!ConditionalLockTupleTuplock(relation, tid, mode)) + if (!ConditionalLockTupleTuplock(relation, tid, mode, false)) return false; break; case LockWaitError: - if (!ConditionalLockTupleTuplock(relation, tid, mode)) + if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failure)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", @@ -7602,7 +7602,7 @@ static bool Do_MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, bool nowait, Relation rel, ItemPointer ctid, XLTW_Oper oper, - int *remaining) + int *remaining, bool logLockFailure) { bool result = true; MultiXactMember *members; @@ -7649,7 +7649,7 @@ Do_MultiXactIdWait(MultiXactId multi, MultiXactStatus status, */ if (nowait) { - result = ConditionalXactLockTableWait(memxid); + result = ConditionalXactLockTableWait(memxid, logLockFailure); if (!result) break; } @@ -7682,7 +7682,7 @@ MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, int *remaining) { (void) Do_MultiXactIdWait(multi, status, infomask, false, - rel, ctid, oper, remaining); + rel, ctid, oper, remaining, false); } /* @@ -7700,10 +7700,11 @@ MultiXactIdWait(MultiXactId multi, MultiXactStatus status, uint16 infomask, */ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, - uint16 infomask, Relation rel, int *remaining) + uint16 infomask, Relation rel, int *remaining, + bool logLockFailure) { return Do_MultiXactIdWait(multi, status, infomask, true, - rel, NULL, XLTW_None, remaining); + rel, NULL, XLTW_None, remaining, logLockFailure); } /* diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index e78682c3cef..ca776bea539 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -455,12 +455,12 @@ tuple_lock_retry: XLTW_FetchUpdated); break; case LockWaitSkip: - if (!ConditionalXactLockTableWait(SnapshotDirty.xmax)) + if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, false)) /* skip instead of waiting */ return TM_WouldBlock; break; case LockWaitError: - if (!ConditionalXactLockTableWait(SnapshotDirty.xmax)) + if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 68271cb6408..f50962983c3 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -112,7 +112,8 @@ LockRelationOid(Oid relid, LOCKMODE lockmode) SetLocktagRelationOid(&tag, relid); - res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock); + res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock, + false); /* * Now that we have the lock, check for invalidation messages, so that we @@ -155,7 +156,8 @@ ConditionalLockRelationOid(Oid relid, LOCKMODE lockmode) SetLocktagRelationOid(&tag, relid); - res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock); + res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock, + false); if (res == LOCKACQUIRE_NOT_AVAIL) return false; @@ -188,7 +190,8 @@ LockRelationId(LockRelId *relid, LOCKMODE lockmode) SET_LOCKTAG_RELATION(tag, relid->dbId, relid->relId); - res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock); + res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock, + false); /* * Now that we have the lock, check for invalidation messages; see notes @@ -250,7 +253,8 @@ LockRelation(Relation relation, LOCKMODE lockmode) relation->rd_lockInfo.lockRelId.dbId, relation->rd_lockInfo.lockRelId.relId); - res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock); + res = LockAcquireExtended(&tag, lockmode, false, false, true, &locallock, + false); /* * Now that we have the lock, check for invalidation messages; see notes @@ -281,7 +285,8 @@ ConditionalLockRelation(Relation relation, LOCKMODE lockmode) relation->rd_lockInfo.lockRelId.dbId, relation->rd_lockInfo.lockRelId.relId); - res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock); + res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock, + false); if (res == LOCKACQUIRE_NOT_AVAIL) return false; @@ -574,7 +579,8 @@ LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode) * Returns true iff the lock was acquired. */ bool -ConditionalLockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode) +ConditionalLockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode, + bool logLockFailure) { LOCKTAG tag; @@ -584,7 +590,8 @@ ConditionalLockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode) ItemPointerGetBlockNumber(tid), ItemPointerGetOffsetNumber(tid)); - return (LockAcquire(&tag, lockmode, false, true) != LOCKACQUIRE_NOT_AVAIL); + return (LockAcquireExtended(&tag, lockmode, false, true, true, NULL, + logLockFailure) != LOCKACQUIRE_NOT_AVAIL); } /* @@ -726,7 +733,7 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, * Returns true if the lock was acquired. */ bool -ConditionalXactLockTableWait(TransactionId xid) +ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure) { LOCKTAG tag; bool first = true; @@ -738,7 +745,9 @@ ConditionalXactLockTableWait(TransactionId xid) SET_LOCKTAG_TRANSACTION(tag, xid); - if (LockAcquire(&tag, ShareLock, false, true) == LOCKACQUIRE_NOT_AVAIL) + if (LockAcquireExtended(&tag, ShareLock, false, true, true, NULL, + logLockFailure) + == LOCKACQUIRE_NOT_AVAIL) return false; LockRelease(&tag, ShareLock, false); @@ -1027,7 +1036,8 @@ ConditionalLockDatabaseObject(Oid classid, Oid objid, uint16 objsubid, objid, objsubid); - res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock); + res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock, + false); if (res == LOCKACQUIRE_NOT_AVAIL) return false; @@ -1106,7 +1116,8 @@ ConditionalLockSharedObject(Oid classid, Oid objid, uint16 objsubid, objid, objsubid); - res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock); + res = LockAcquireExtended(&tag, lockmode, false, true, true, &locallock, + false); if (res == LOCKACQUIRE_NOT_AVAIL) return false; diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index ccfe6b69bf5..c4aeba915b9 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -39,6 +39,7 @@ #include "access/xlogutils.h" #include "miscadmin.h" #include "pg_trace.h" +#include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" #include "storage/spin.h" @@ -48,8 +49,9 @@ #include "utils/resowner.h" -/* This configuration variable is used to set the lock table size */ -int max_locks_per_xact; /* set by guc.c */ +/* GUC variables */ +int max_locks_per_xact; /* used to set the lock table size */ +bool log_lock_failure = false; #define NLOCKENTS() \ mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts)) @@ -806,7 +808,7 @@ LockAcquire(const LOCKTAG *locktag, bool dontWait) { return LockAcquireExtended(locktag, lockmode, sessionLock, dontWait, - true, NULL); + true, NULL, false); } /* @@ -829,7 +831,8 @@ LockAcquireExtended(const LOCKTAG *locktag, bool sessionLock, bool dontWait, bool reportMemoryError, - LOCALLOCK **locallockp) + LOCALLOCK **locallockp, + bool logLockFailure) { LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; LockMethod lockMethodTable; @@ -1145,6 +1148,42 @@ LockAcquireExtended(const LOCKTAG *locktag, if (dontWait) { + if (logLockFailure) + { + StringInfoData buf, + lock_waiters_sbuf, + lock_holders_sbuf; + const char *modename; + int lockHoldersNum = 0; + + initStringInfo(&buf); + initStringInfo(&lock_waiters_sbuf); + initStringInfo(&lock_holders_sbuf); + + DescribeLockTag(&buf, &locallock->tag.lock); + modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid, + lockmode); + + /* Gather a list of all lock holders and waiters */ + LWLockAcquire(partitionLock, LW_SHARED); + GetLockHoldersAndWaiters(locallock, &lock_holders_sbuf, + &lock_waiters_sbuf, &lockHoldersNum); + LWLockRelease(partitionLock); + + ereport(LOG, + (errmsg("process %d could not obtain %s on %s", + MyProcPid, modename, buf.data), + errdetail_log_plural( + "Process holding the lock: %s, Wait queue: %s.", + "Processes holding the lock: %s, Wait queue: %s.", + lockHoldersNum, + lock_holders_sbuf.data, + lock_waiters_sbuf.data))); + + pfree(buf.data); + pfree(lock_holders_sbuf.data); + pfree(lock_waiters_sbuf.data); + } if (locallockp) *locallockp = NULL; return LOCKACQUIRE_NOT_AVAIL; diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 749a79d48ef..e4ca861a8e6 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1514,10 +1514,6 @@ ProcSleep(LOCALLOCK *locallock) long secs; int usecs; long msecs; - dlist_iter proc_iter; - PROCLOCK *curproclock; - bool first_holder = true, - first_waiter = true; int lockHoldersNum = 0; initStringInfo(&buf); @@ -1533,54 +1529,10 @@ ProcSleep(LOCALLOCK *locallock) msecs = secs * 1000 + usecs / 1000; usecs = usecs % 1000; - /* - * we loop over the lock's procLocks to gather a list of all - * holders and waiters. Thus we will be able to provide more - * detailed information for lock debugging purposes. - * - * lock->procLocks contains all processes which hold or wait for - * this lock. - */ - + /* Gather a list of all lock holders and waiters */ LWLockAcquire(partitionLock, LW_SHARED); - - dlist_foreach(proc_iter, &lock->procLocks) - { - curproclock = - dlist_container(PROCLOCK, lockLink, proc_iter.cur); - - /* - * we are a waiter if myProc->waitProcLock == curproclock; we - * are a holder if it is NULL or something different - */ - if (curproclock->tag.myProc->waitProcLock == curproclock) - { - if (first_waiter) - { - appendStringInfo(&lock_waiters_sbuf, "%d", - curproclock->tag.myProc->pid); - first_waiter = false; - } - else - appendStringInfo(&lock_waiters_sbuf, ", %d", - curproclock->tag.myProc->pid); - } - else - { - if (first_holder) - { - appendStringInfo(&lock_holders_sbuf, "%d", - curproclock->tag.myProc->pid); - first_holder = false; - } - else - appendStringInfo(&lock_holders_sbuf, ", %d", - curproclock->tag.myProc->pid); - - lockHoldersNum++; - } - } - + GetLockHoldersAndWaiters(locallock, &lock_holders_sbuf, + &lock_waiters_sbuf, &lockHoldersNum); LWLockRelease(partitionLock); if (deadlock_state == DS_SOFT_DEADLOCK) @@ -1885,6 +1837,81 @@ CheckDeadLockAlert(void) errno = save_errno; } +/* + * GetLockHoldersAndWaiters - get lock holders and waiters for a lock + * + * Fill lock_holders_sbuf and lock_waiters_sbuf with the PIDs of processes holding + * and waiting for the lock, and set lockHoldersNum to the number of lock holders. + * + * The lock table's partition lock must be held on entry and remains held on exit. + */ +void +GetLockHoldersAndWaiters(LOCALLOCK *locallock, StringInfo lock_holders_sbuf, + StringInfo lock_waiters_sbuf, int *lockHoldersNum) +{ + dlist_iter proc_iter; + PROCLOCK *curproclock; + LOCK *lock = locallock->lock; + bool first_holder = true, + first_waiter = true; + +#ifdef USE_ASSERT_CHECKING + { + uint32 hashcode = locallock->hashcode; + LWLock *partitionLock = LockHashPartitionLock(hashcode); + + Assert(LWLockHeldByMe(partitionLock)); + } +#endif + + *lockHoldersNum = 0; + + /* + * Loop over the lock's procLocks to gather a list of all holders and + * waiters. Thus we will be able to provide more detailed information for + * lock debugging purposes. + * + * lock->procLocks contains all processes which hold or wait for this + * lock. + */ + dlist_foreach(proc_iter, &lock->procLocks) + { + curproclock = + dlist_container(PROCLOCK, lockLink, proc_iter.cur); + + /* + * We are a waiter if myProc->waitProcLock == curproclock; we are a + * holder if it is NULL or something different. + */ + if (curproclock->tag.myProc->waitProcLock == curproclock) + { + if (first_waiter) + { + appendStringInfo(lock_waiters_sbuf, "%d", + curproclock->tag.myProc->pid); + first_waiter = false; + } + else + appendStringInfo(lock_waiters_sbuf, ", %d", + curproclock->tag.myProc->pid); + } + else + { + if (first_holder) + { + appendStringInfo(lock_holders_sbuf, "%d", + curproclock->tag.myProc->pid); + first_holder = false; + } + else + appendStringInfo(lock_holders_sbuf, ", %d", + curproclock->tag.myProc->pid); + + (*lockHoldersNum)++; + } + } +} + /* * ProcWaitForSignal - wait for a signal from another backend. * diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index ad25cbb39c5..c0b28bbe783 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1594,6 +1594,15 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"log_lock_failure", PGC_SUSET, LOGGING_WHAT, + gettext_noop("Logs lock failures."), + NULL + }, + &log_lock_failure, + false, + NULL, NULL, NULL + }, { {"log_recovery_conflict_waits", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs standby recovery conflict waits."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 2d1de9c37bd..47aedde8cf5 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -608,6 +608,7 @@ # %% = '%' # e.g. '<%u%%%d> ' #log_lock_waits = off # log lock waits >= deadlock_timeout +#log_lock_failure = off # log lock failures #log_recovery_conflict_waits = off # log standby recovery conflict waits # >= deadlock_timeout #log_parameter_max_length = -1 # when logging statements, limit logged diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 728260c1cc0..902e6154fae 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -73,7 +73,7 @@ extern void UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); /* Lock a tuple (see heap_lock_tuple before assuming you understand this) */ extern void LockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode); extern bool ConditionalLockTuple(Relation relation, ItemPointer tid, - LOCKMODE lockmode); + LOCKMODE lockmode, bool logLockFailure); extern void UnlockTuple(Relation relation, ItemPointer tid, LOCKMODE lockmode); /* Lock an XID (used to wait for a transaction to finish) */ @@ -81,7 +81,7 @@ extern void XactLockTableInsert(TransactionId xid); extern void XactLockTableDelete(TransactionId xid); extern void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper); -extern bool ConditionalXactLockTableWait(TransactionId xid); +extern bool ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure); /* Lock VXIDs, specified by conflicting locktags */ extern void WaitForLockers(LOCKTAG heaplocktag, LOCKMODE lockmode, bool progress); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 1076995518f..5db3809c673 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -30,6 +30,7 @@ typedef struct PGPROC PGPROC; /* GUC variables */ extern PGDLLIMPORT int max_locks_per_xact; +extern PGDLLIMPORT bool log_lock_failure; #ifdef LOCK_DEBUG extern PGDLLIMPORT int Trace_lock_oidmin; @@ -560,7 +561,8 @@ extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag, bool sessionLock, bool dontWait, bool reportMemoryError, - LOCALLOCK **locallockp); + LOCALLOCK **locallockp, + bool logLockFailure); extern void AbortStrongLockAcquire(void); extern void MarkLockClear(LOCALLOCK *locallock); extern bool LockRelease(const LOCKTAG *locktag, diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 114eb1f8f76..0750ec3c474 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -489,6 +489,10 @@ extern void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus); extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock); extern void CheckDeadLockAlert(void); extern void LockErrorCleanup(void); +extern void GetLockHoldersAndWaiters(LOCALLOCK *locallock, + StringInfo lock_holders_sbuf, + StringInfo lock_waiters_sbuf, + int *lockHoldersNum); extern void ProcWaitForSignal(uint32 wait_event_info); extern void ProcSendSignal(ProcNumber procNumber); -- 2.48.1