doc/src/sgml/config.sgml | 22 ++++
doc/src/sgml/high-availability.sgml | 7 ++
src/backend/storage/buffer/bufmgr.c | 10 ++
src/backend/storage/ipc/standby.c | 164 ++++++++++++++++++++++----
src/backend/storage/lmgr/proc.c | 22 ++++
src/backend/utils/misc/guc.c | 10 +-
src/backend/utils/misc/postgresql.conf.sample | 2 +
src/include/storage/standby.h | 3 +
8 files changed, 219 insertions(+), 21 deletions(-)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index ee914740cc..9d7d409951 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6839,6 +6839,28 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
+
+ log_recovery_conflict_waits (boolean)
+
+ log_recovery_conflict_waits configuration parameter
+
+
+
+
+ Controls whether a log message is produced when the startup process
+ is waiting longer than deadlock_timeout
+ for recovery conflicts. This is useful in determining if recovery
+ conflicts prevents the recovery from applying WAL.
+
+
+
+ The default is off. This parameter can only be set
+ in the postgresql.conf file or on the server
+ command line.
+
+
+
+
log_parameter_max_length (integer)
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index 86da84fce7..d7b1c9854d 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -1972,6 +1972,13 @@ if (!triggered)
queries that conflict with to-be-applied WAL records.
+
+ You can control whether a log message is produced when the startup process
+ is waiting longer than deadlock_timeout for recovery
+ conflicts. This is controled by the
+ parameter.
+
+
An example of the problem situation is an administrator on the primary
server running DROP TABLE on a table that is currently being
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index e549fa1d30..b49512b5e2 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3808,6 +3808,7 @@ LockBufferForCleanup(Buffer buffer)
{
BufferDesc *bufHdr;
char *new_status = NULL;
+ TimestampTz waitStart = 0;
Assert(BufferIsPinned(buffer));
Assert(PinCountWaitBuf == NULL);
@@ -3829,6 +3830,9 @@ LockBufferForCleanup(Buffer buffer)
bufHdr = GetBufferDescriptor(buffer - 1);
+ if (log_recovery_conflict_waits)
+ waitStart = GetCurrentTimestamp();
+
for (;;)
{
uint32 buf_state;
@@ -3881,6 +3885,12 @@ LockBufferForCleanup(Buffer buffer)
new_status[len] = '\0'; /* truncate off " waiting" */
}
+ if (waitStart > 0 &&
+ TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
+ DeadlockTimeout))
+ LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+ waitStart, NULL);
+
/* Publish the bufid that Startup process waits on */
SetStartupBufferPinWaitBufId(buffer - 1);
/* Set alarm and then wait to be signaled by UnpinBuffer() */
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 52b2809dac..1cc8b324e1 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -39,6 +39,7 @@
int vacuum_defer_cleanup_age;
int max_standby_archive_delay = 30 * 1000;
int max_standby_streaming_delay = 30 * 1000;
+bool log_recovery_conflict_waits = false;
static HTAB *RecoveryLockLists;
@@ -49,6 +50,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis
static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
+static const char *get_recovery_conflict_desc(ProcSignalReason reason);
/*
* Keep track of all the locks owned by a given transaction.
@@ -215,15 +217,71 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
return false;
}
+/*
+ * Log the recovery conflict.
+ *
+ * waitStart is the timestamp when the caller started to wait. This function also
+ * reports the details about the conflicting process ids if *waitlist is not NULL.
+ */
+void
+LogRecoveryConflict(ProcSignalReason reason, TimestampTz waitStart,
+ VirtualTransactionId *waitlist)
+{
+ long secs;
+ int usecs;
+ long msecs;
+
+ TimestampDifference(waitStart, GetCurrentTimestamp(), &secs, &usecs);
+ msecs = secs * 1000 + usecs / 1000;
+ usecs = usecs % 1000;
+
+ if (waitlist)
+ {
+ VirtualTransactionId *vxids;
+ StringInfoData buf;
+ int nprocs = 0;
+
+ initStringInfo(&buf);
+
+ /* construct a string of list of the conflicted processes */
+ vxids = waitlist;
+ while (VirtualTransactionIdIsValid(*vxids))
+ {
+ PGPROC *proc = BackendIdGetProc(vxids->backendId);
+
+ if (nprocs == 0)
+ appendStringInfo(&buf, "%d", proc->pid);
+ else
+ appendStringInfo(&buf, ", %d", proc->pid);
+
+ nprocs++;
+ vxids++;
+ }
+
+ ereport(LOG,
+ (errmsg("%s after %ld.%03d ms",
+ get_recovery_conflict_desc(reason), msecs, usecs),
+ (errdetail_log_plural("Conflicting process: %s.",
+ "Conflicting processes: %s.",
+ nprocs, buf.data))));
+ pfree(buf.data);
+ }
+ else
+ ereport(LOG,
+ (errmsg("%s after %ld.%03d ms",
+ get_recovery_conflict_desc(reason), msecs, usecs)));
+}
+
/*
* This is the main executioner for any query backend that conflicts with
* recovery processing. Judgement has already been passed on it within
* a specific rmgr. Here we just issue the orders to the procs. The procs
* then throw the required error as instructed.
*
- * If report_waiting is true, "waiting" is reported in PS display if necessary.
- * If the caller has already reported that, report_waiting should be false.
- * Otherwise, "waiting" is reported twice unexpectedly.
+ * If report_waiting is true, "waiting" is reported in PS display and log
+ * the waits for the recovery conflict if necessary. If the caller has already
+ * reported that, report_waiting should be false. Otherwise, both are reported
+ * twice unexpectedly.
*/
static void
ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
@@ -232,6 +290,7 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
{
TimestampTz waitStart = 0;
char *new_status;
+ bool logged = false;
/* Fast exit, to avoid a kernel call if there's no work to be done. */
if (!VirtualTransactionIdIsValid(*waitlist))
@@ -249,13 +308,28 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
/* wait until the virtual xid is gone */
while (!VirtualXactLock(*waitlist, false))
{
+ TimestampTz ts = 0;
+
+ if (report_waiting && (!logged || new_status == NULL))
+ ts = GetCurrentTimestamp();
+
+ /*
+ * Log the recovery conflict. To prevent to log the same report
+ * multiple times, we log the recovery conflict only once.
+ */
+ if (log_recovery_conflict_waits && !logged & report_waiting &&
+ TimestampDifferenceExceeds(waitStart, ts, DeadlockTimeout))
+ {
+ LogRecoveryConflict(reason, waitStart, waitlist);
+ logged = true;
+ }
+
/*
* Report via ps if we have been waiting for more than 500 msec
* (should that be configurable?)
*/
if (update_process_title && new_status == NULL && report_waiting &&
- TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
- 500))
+ TimestampDifferenceExceeds(waitStart, ts, 500))
{
const char *old_status;
int len;
@@ -408,7 +482,35 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
ltime = GetStandbyLimitTime();
- if (GetCurrentTimestamp() >= ltime)
+ if (ltime == 0 || GetCurrentTimestamp() < ltime)
+ {
+ EnableTimeoutParams timeouts[2];
+ int cnt = 0;
+
+ /*
+ * Wait (or wait again) until ltime
+ */
+ if (ltime != 0)
+ {
+ timeouts[cnt].id = STANDBY_LOCK_TIMEOUT;
+ timeouts[cnt].type = TMPARAM_AT;
+ timeouts[cnt].fin_time = ltime;
+ cnt++;
+ }
+
+ /* Also, set deadlock timeout for logging purpose if necessary */
+ if (log_recovery_conflict_waits)
+ {
+ timeouts[cnt].id = STANDBY_TIMEOUT;
+ timeouts[cnt].type = TMPARAM_AFTER;
+ timeouts[cnt].delay_ms = DeadlockTimeout;
+ cnt++;
+ }
+
+ if (cnt > 0)
+ enable_timeouts(timeouts, cnt);
+ }
+ else
{
/*
* We're already behind, so clear a path as quickly as possible.
@@ -419,26 +521,15 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
/*
* Prevent ResolveRecoveryConflictWithVirtualXIDs() from reporting
- * "waiting" in PS display by disabling its argument report_waiting
- * because the caller, WaitOnLock(), has already reported that.
+ * "waiting" in PS display and logging recovery conflict by disabling
+ * its argument report_waiting because the callers have already reported
+ * that.
*/
ResolveRecoveryConflictWithVirtualXIDs(backends,
PROCSIG_RECOVERY_CONFLICT_LOCK,
PG_WAIT_LOCK | locktag.locktag_type,
false);
}
- else
- {
- /*
- * Wait (or wait again) until ltime
- */
- EnableTimeoutParams timeouts[1];
-
- timeouts[0].id = STANDBY_LOCK_TIMEOUT;
- timeouts[0].type = TMPARAM_AT;
- timeouts[0].fin_time = ltime;
- enable_timeouts(timeouts, 1);
- }
/* Wait to be signaled by the release of the Relation Lock */
ProcWaitForSignal(PG_WAIT_LOCK | locktag.locktag_type);
@@ -1116,3 +1207,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
nmsgs * sizeof(SharedInvalidationMessage));
XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
}
+
+/* Return the description of recovery conflict */
+static const char *
+get_recovery_conflict_desc(ProcSignalReason reason)
+{
+ const char *reasonDesc = gettext_noop("unknown reason");
+
+ switch (reason)
+ {
+ case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ reasonDesc = gettext_noop("recovery is still waiting recovery conflict on buffer pin");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_LOCK:
+ reasonDesc = gettext_noop("recovery is still waiting recovery conflict on lock");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+ reasonDesc = gettext_noop("recovery is still waiting recovery conflict on tablespace");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ reasonDesc = gettext_noop("recovery is still waiting recovery conflict on snapshot");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ reasonDesc = gettext_noop("recovery is still waiting recovery conflict on buffer deadlock");
+ break;
+ case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ reasonDesc = gettext_noop("recovery is still waiting recovery conflict on database");
+ break;
+ default:
+ break;
+ }
+
+ return reasonDesc;
+}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 88566bd9fa..66d156d8e1 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1068,6 +1068,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
ProcWaitStatus myWaitStatus;
PGPROC *proc;
PGPROC *leader = MyProc->lockGroupLeader;
+ TimestampTz standbyWaitStart = 0;
int i;
/*
@@ -1260,6 +1261,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
else
enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
}
+ else
+ standbyWaitStart = GetCurrentTimestamp();
/*
* If somebody wakes us between LWLockRelease and WaitLatch, the latch
@@ -1281,6 +1284,25 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
{
/* Set a timer and wait for that or for the Lock to be granted */
ResolveRecoveryConflictWithLock(locallock->tag.lock);
+
+ if (log_recovery_conflict_waits &&
+ TimestampDifferenceExceeds(standbyWaitStart, GetCurrentTimestamp(),
+ DeadlockTimeout))
+ {
+ VirtualTransactionId *vxids;
+ int cnt;
+
+ vxids = GetLockConflicts(&locallock->tag.lock,
+ AccessExclusiveLock, &cnt);
+
+ /*
+ * Log the recovery conflict if there is still virtual transaction
+ * conflicting with the lock.
+ */
+ if (cnt > 0)
+ LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
+ standbyWaitStart, vxids);
+ }
}
else
{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 596bcb7b84..e55a28b1cd 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1570,7 +1570,15 @@ static struct config_bool ConfigureNamesBool[] =
false,
NULL, NULL, NULL
},
-
+ {
+ {"log_recovery_conflict_waits", PGC_SIGHUP, LOGGING_WHAT,
+ gettext_noop("Logs standby recovery conflict waits."),
+ NULL
+ },
+ &log_recovery_conflict_waits,
+ false,
+ NULL, NULL, NULL
+ },
{
{"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
gettext_noop("Logs the host name in the connection logs."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 9cb571f7cc..5625681e8a 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -553,6 +553,8 @@
# %% = '%'
# e.g. '<%u%%%d> '
#log_lock_waits = off # log lock waits >= deadlock_timeout
+#log_recovery_conflict_waits = off # log standby recovery conflict waits
+ # >= deadlock_timeout
#log_parameter_max_length = -1 # when logging statements, limit logged
# bind-parameter values to N bytes;
# -1 means print in full, 0 disables
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h
index faaf1d3817..7df0077a8e 100644
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -23,6 +23,7 @@
extern int vacuum_defer_cleanup_age;
extern int max_standby_archive_delay;
extern int max_standby_streaming_delay;
+extern bool log_recovery_conflict_waits;
extern void InitRecoveryTransactionEnvironment(void);
extern void ShutdownRecoveryTransactionEnvironment(void);
@@ -38,6 +39,8 @@ extern void CheckRecoveryConflictDeadlock(void);
extern void StandbyDeadLockHandler(void);
extern void StandbyTimeoutHandler(void);
extern void StandbyLockTimeoutHandler(void);
+extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz waitStart,
+ VirtualTransactionId *waitlist);
/*
* Standby Rmgr (RM_STANDBY_ID)