From 24d88cb5d6baf840f981913f224d89c792903462 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvherre@alvh.no-ip.org>
Date: Mon, 23 Dec 2013 16:07:51 -0300
Subject: [PATCH] Optimize locking a tuple already locked by another subxact

The original coding tried to avoid locking tuples twice when other
subtransactions of the current top transaction already held the
requested lock, but the coding was bogus and some cases were slow, as
reported by Oskari Saarenmaa in bug #8470.

In order to make it work, HeapTupleSatisfiesUpdate is changed so that it
always returns HeapTupleBeingUpdated when the tuple is locked, even if
the locker is the current transaction or another subtransaction of the
current transaction; that case previously returned HeapTupleMayBeUpdated
and was handled specially by callers, but it turns out to be less
error-prone to instead have them check whether the reported lock is held
by the current transaction.  That way, more optimizations are enabled in
compute_new_xmax_infomask.

The reported test case is still slower than 9.2, but it's only 2x slower
not 56x.
---
 contrib/pgrowlocks/pgrowlocks.c        |    9 +--
 src/backend/access/heap/heapam.c       |  107 +++++++++++++++++++++-----------
 src/backend/access/transam/multixact.c |   33 ----------
 src/backend/utils/time/tqual.c         |    6 +-
 src/include/access/multixact.h         |    1 -
 5 files changed, 77 insertions(+), 79 deletions(-)

diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c
index 075d781..9ef90cd 100644
--- a/contrib/pgrowlocks/pgrowlocks.c
+++ b/contrib/pgrowlocks/pgrowlocks.c
@@ -138,14 +138,9 @@ pgrowlocks(PG_FUNCTION_ARGS)
 		infomask = tuple->t_data->t_infomask;
 
 		/*
-		 * a tuple is locked if HTSU returns BeingUpdated, and if it returns
-		 * MayBeUpdated but the Xmax is valid and pointing at us.
+		 * A tuple is locked if HTSU returns BeingUpdated.
 		 */
-		if (htsu == HeapTupleBeingUpdated ||
-			(htsu == HeapTupleMayBeUpdated &&
-			 !(infomask & HEAP_XMAX_INVALID) &&
-			 !(infomask & HEAP_XMAX_IS_MULTI) &&
-			 (xmax == GetCurrentTransactionIdIfAny())))
+		if (htsu == HeapTupleBeingUpdated)
 		{
 			char	  **values;
 
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b5977da..523d216 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2596,6 +2596,30 @@ l1:
 		TransactionId xwait;
 		uint16		infomask;
 
+		/*
+		 * If any subtransaction of the current top transaction already holds
+		 * a lock on this tuple, (and no one else does,) we must skip sleeping
+		 * on the xwait; that would raise an assert about sleeping on our own
+		 * Xid (or sleep indefinitely in a non-asserts-enabled build.)
+		 *
+		 * Note we don't need to do anything about this when the Xmax is a
+		 * multixact, because that code is already prepared to ignore
+		 * subtransactions of the current top transaction; also, trying to do
+		 * anything other than sleeping during a delete when someone else is
+		 * holding a lock on this tuple would be wrong.
+		 *
+		 * This must be done before acquiring our tuple lock, to avoid
+		 * deadlocks with other transaction that are already waiting on the
+		 * lock we hold.
+		 */
+		if (!(tp.t_data->t_infomask & HEAP_XMAX_IS_MULTI) &&
+			TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tp.t_data)))
+		{
+			Assert(HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask));
+
+			goto continue_deletion;
+		}
+
 		/* must copy state data before unlocking buffer */
 		xwait = HeapTupleHeaderGetRawXmax(tp.t_data);
 		infomask = tp.t_data->t_infomask;
@@ -2669,6 +2693,7 @@ l1:
 			UpdateXmaxHintBits(tp.t_data, buffer, xwait);
 		}
 
+continue_deletion:
 		/*
 		 * We may overwrite if previous xmax aborted, or if it committed but
 		 * only locked the tuple without updating it.
@@ -3089,6 +3114,30 @@ l2:
 		 * heap_update directly.
 		 */
 
+		/*
+		 * If any subtransaction of the current top transaction already holds
+		 * a lock on this tuple, (and no one else does,) we must skip sleeping
+		 * on the xwait; that would raise an assert about sleeping on our own
+		 * Xid (or sleep indefinitely in a non-assertion enabled build.)
+		 *
+		 * Note we don't need to do anything about this when the Xmax is a
+		 * multixact, because that code is prepared to ignore subtransactions
+		 * of the current top transaction.
+		 *
+		 * This must be done before acquiring our tuple lock, to avoid
+		 * deadlocks with other transaction that are already waiting on the
+		 * lock we hold.
+		 */
+		if (!(oldtup.t_data->t_infomask & HEAP_XMAX_IS_MULTI) &&
+			TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(oldtup.t_data)))
+		{
+			Assert(HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask));
+
+			can_continue = true;
+			locker_remains = true;
+			goto continue_updating;
+		}
+
 		/* must copy state data before unlocking buffer */
 		xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data);
 		infomask = oldtup.t_data->t_infomask;
@@ -3223,6 +3272,7 @@ l2:
 			}
 		}
 
+continue_updating:
 		result = can_continue ? HeapTupleMayBeUpdated : HeapTupleUpdated;
 	}
 
@@ -3945,7 +3995,7 @@ l3:
 		TransactionId xwait;
 		uint16		infomask;
 		uint16		infomask2;
-		bool		require_sleep;
+		bool		require_sleep = true;
 		ItemPointerData t_ctid;
 
 		/* must copy state data before unlocking buffer */
@@ -3997,6 +4047,22 @@ l3:
 
 			pfree(members);
 		}
+		else if (HEAP_XMAX_IS_LOCKED_ONLY(infomask) &&
+				 TransactionIdIsCurrentTransactionId(xwait))
+		{
+			/*
+			 * If we already hold a lock on this tuple, we can fall through
+			 * to perhaps create a new MultiXact including the previous lock
+			 * and ours, or perhaps just keep the old lock.
+			 */
+			LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
+			/* If xmax changed while we weren't looking, start over */
+			if ((tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ||
+				!TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data),
+									 xwait))
+				goto l3;
+			goto continue_locking;
+		}
 
 		/*
 		 * Acquire tuple lock to establish our priority for the tuple.
@@ -4330,6 +4396,8 @@ l3:
 
 		/* By here, we're certain that we hold buffer exclusive lock again */
 
+continue_locking:
+
 		/*
 		 * We may lock if previous xmax aborted, or if it committed but only
 		 * locked the tuple without updating it; or if we didn't have to wait
@@ -4365,37 +4433,6 @@ failed:
 	old_infomask = tuple->t_data->t_infomask;
 
 	/*
-	 * We might already hold the desired lock (or stronger), possibly under a
-	 * different subtransaction of the current top transaction.  If so, there
-	 * is no need to change state or issue a WAL record.  We already handled
-	 * the case where this is true for xmax being a MultiXactId, so now check
-	 * for cases where it is a plain TransactionId.
-	 *
-	 * Note in particular that this covers the case where we already hold
-	 * exclusive lock on the tuple and the caller only wants key share or
-	 * share lock. It would certainly not do to give up the exclusive lock.
-	 */
-	if (!(old_infomask & (HEAP_XMAX_INVALID |
-						  HEAP_XMAX_COMMITTED |
-						  HEAP_XMAX_IS_MULTI)) &&
-		(mode == LockTupleKeyShare ?
-		 (HEAP_XMAX_IS_KEYSHR_LOCKED(old_infomask) ||
-		  HEAP_XMAX_IS_SHR_LOCKED(old_infomask) ||
-		  HEAP_XMAX_IS_EXCL_LOCKED(old_infomask)) :
-		 mode == LockTupleShare ?
-		 (HEAP_XMAX_IS_SHR_LOCKED(old_infomask) ||
-		  HEAP_XMAX_IS_EXCL_LOCKED(old_infomask)) :
-		 (HEAP_XMAX_IS_EXCL_LOCKED(old_infomask))) &&
-		TransactionIdIsCurrentTransactionId(xmax))
-	{
-		LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
-		/* Probably can't hold tuple lock here, but may as well check */
-		if (have_tuple_lock)
-			UnlockTupleTuplock(relation, tid, mode);
-		return HeapTupleMayBeUpdated;
-	}
-
-	/*
 	 * If this is the first possibly-multixact-able operation in the current
 	 * transaction, set my per-backend OldestMemberMXactId setting. We can be
 	 * certain that the transaction will never become a member of any older
@@ -4622,9 +4659,9 @@ l5:
 		 */
 		if (!MultiXactIdIsRunning(xmax))
 		{
-			if (HEAP_XMAX_IS_LOCKED_ONLY(old_infomask) ||
-				TransactionIdDidAbort(MultiXactIdGetUpdateXid(xmax,
-															  old_infomask)))
+			if ((HEAP_XMAX_IS_LOCKED_ONLY(old_infomask) ||
+				 !TransactionIdDidCommit(MultiXactIdGetUpdateXid(xmax,
+																 old_infomask))))
 			{
 				/*
 				 * Reset these bits and restart; otherwise fall through to
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 03581be..55a8ca7 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1275,39 +1275,6 @@ retry:
 }
 
 /*
- * MultiXactHasRunningRemoteMembers
- * 		Does the given multixact have still-live members from
- * 		transactions other than our own?
- */
-bool
-MultiXactHasRunningRemoteMembers(MultiXactId multi)
-{
-	MultiXactMember *members;
-	int			nmembers;
-	int			i;
-
-	nmembers = GetMultiXactIdMembers(multi, &members, true);
-	if (nmembers <= 0)
-		return false;
-
-	for (i = 0; i < nmembers; i++)
-	{
-		/* not interested in our own members */
-		if (TransactionIdIsCurrentTransactionId(members[i].xid))
-			continue;
-
-		if (TransactionIdIsInProgress(members[i].xid))
-		{
-			pfree(members);
-			return true;
-		}
-	}
-
-	pfree(members);
-	return false;
-}
-
-/*
  * mxactMemberComparator
  *		qsort comparison function for MultiXactMember
  *
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
index 44b4ddc..7f876ad 100644
--- a/src/backend/utils/time/tqual.c
+++ b/src/backend/utils/time/tqual.c
@@ -701,7 +701,7 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid,
 
 				if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
 				{
-					if (MultiXactHasRunningRemoteMembers(xmax))
+					if (MultiXactIdIsRunning(xmax))
 						return HeapTupleBeingUpdated;
 					else
 						return HeapTupleMayBeUpdated;
@@ -729,7 +729,7 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid,
 				/* updating subtransaction must have aborted */
 				if (!TransactionIdIsCurrentTransactionId(xmax))
 				{
-					if (MultiXactHasRunningRemoteMembers(HeapTupleHeaderGetRawXmax(tuple)))
+					if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple)))
 						return HeapTupleBeingUpdated;
 					return HeapTupleMayBeUpdated;
 				}
@@ -846,7 +846,7 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid,
 	if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple)))
 	{
 		if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
-			return HeapTupleMayBeUpdated;
+			return HeapTupleBeingUpdated;
 		if (HeapTupleHeaderGetCmax(tuple) >= curcid)
 			return HeapTupleSelfUpdated;		/* updated after scan started */
 		else
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 5f82907..0e3b273 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -89,7 +89,6 @@ extern bool MultiXactIdIsRunning(MultiXactId multi);
 extern void MultiXactIdSetOldestMember(void);
 extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **xids,
 					  bool allow_old);
-extern bool MultiXactHasRunningRemoteMembers(MultiXactId multi);
 extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
 extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1,
 							MultiXactId multi2);
-- 
1.7.10.4

