*** a/src/backend/access/transam/twophase.c
--- b/src/backend/access/transam/twophase.c
***************
*** 1719,1724 **** PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
--- 1719,1806 ----
  }
  
  /*
+  * StandbyRecoverPreparedTransactions
+  *
+  * Scan the pg_twophase directory and setup all the required information to
+  * allow standby queries to treat prepared transactions as still active.
+  * This is never called at the end of recovery - we use
+  * RecoverPreparedTransactions() at that point.
+  *
+  * Currently we simply call SubTransSetParent() for any subxids of prepared
+  * transactions.
+  */
+ void
+ StandbyRecoverPreparedTransactions(bool can_overwrite)
+ {
+ 	DIR		   *cldir;
+ 	struct dirent *clde;
+ 
+ 	cldir = AllocateDir(TWOPHASE_DIR);
+ 	while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
+ 	{
+ 		if (strlen(clde->d_name) == 8 &&
+ 			strspn(clde->d_name, "0123456789ABCDEF") == 8)
+ 		{
+ 			TransactionId xid;
+ 			char	   *buf;
+ 			TwoPhaseFileHeader *hdr;
+ 			TransactionId *subxids;
+ 			int			i;
+ 
+ 			xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
+ 
+ 			/* Already processed? */
+ 			if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
+ 			{
+ 				ereport(WARNING,
+ 						(errmsg("removing stale two-phase state file \"%s\"",
+ 								clde->d_name)));
+ 				RemoveTwoPhaseFile(xid, true);
+ 				continue;
+ 			}
+ 
+ 			/* Read and validate file */
+ 			buf = ReadTwoPhaseFile(xid, true);
+ 			if (buf == NULL)
+ 			{
+ 				ereport(WARNING,
+ 					  (errmsg("removing corrupt two-phase state file \"%s\"",
+ 							  clde->d_name)));
+ 				RemoveTwoPhaseFile(xid, true);
+ 				continue;
+ 			}
+ 
+ 			/* Deconstruct header */
+ 			hdr = (TwoPhaseFileHeader *) buf;
+ 			if (!TransactionIdEquals(hdr->xid, xid))
+ 			{
+ 				ereport(WARNING,
+ 					  (errmsg("removing corrupt two-phase state file \"%s\"",
+ 							  clde->d_name)));
+ 				RemoveTwoPhaseFile(xid, true);
+ 				pfree(buf);
+ 				continue;
+ 			}
+ 
+ 			/*
+ 			 * Examine subtransaction XIDs ... they should all follow main
+ 			 * XID, and they may force us to advance nextXid.
+ 			 */
+ 			subxids = (TransactionId *)
+ 				(buf + MAXALIGN(sizeof(TwoPhaseFileHeader)));
+ 			for (i = 0; i < hdr->nsubxacts; i++)
+ 			{
+ 				TransactionId subxid = subxids[i];
+ 
+ 				Assert(TransactionIdFollows(subxid, xid));
+ 				SubTransSetParent(xid, subxid, can_overwrite);
+ 			}
+ 		}
+ 	}
+ 	FreeDir(cldir);
+ }
+ 
+ /*
   * RecoverPreparedTransactions
   *
   * Scan the pg_twophase directory and reload shared-memory state for each
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 494,499 **** static XLogRecPtr minRecoveryPoint;		/* local copy of
--- 494,501 ----
  										 * ControlFile->minRecoveryPoint */
  static bool updateMinRecoveryPoint = true;
  
+ static bool reachedMinRecoveryPoint = false;
+ 
  static bool InRedo = false;
  
  /*
***************
*** 547,552 **** static void ValidateXLOGDirectoryStructure(void);
--- 549,555 ----
  static void CleanupBackupHistory(void);
  static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
  static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt);
+ static void CheckRecoveryConsistency(void);
  static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
  static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
  static List *readTimeLineHistory(TimeLineID targetTLI);
***************
*** 5470,5476 **** StartupXLOG(void)
  	uint32		freespace;
  	TransactionId oldestActiveXID;
  	bool		bgwriterLaunched = false;
- 	bool		backendsAllowed = false;
  
  	/*
  	 * Read control file and check XLOG status looks valid.
--- 5473,5478 ----
***************
*** 5718,5723 **** StartupXLOG(void)
--- 5720,5728 ----
  	{
  		int			rmid;
  
+ 		/* use volatile pointer to prevent code rearrangement */
+ 		volatile XLogCtlData *xlogctl = XLogCtl;
+ 
  		/*
  		 * Update pg_control to show that we are recovering and to show the
  		 * selected checkpoint as the place we are starting from. We also mark
***************
*** 5809,5814 **** StartupXLOG(void)
--- 5814,5846 ----
  			StartupMultiXact();
  
  			ProcArrayInitRecoveryInfo(oldestActiveXID);
+ 
+ 			/*
+ 			 * If we're beginning at a shutdown checkpoint, we know that
+ 			 * nothing was running on the master at this point. So fake-up
+ 			 * an empty running-xacts record and use that here and now.
+ 			 * Recover additional standby state for prepared transactions.
+ 			 */
+ 			if (wasShutdown)
+ 			{
+ 				RunningTransactionsData running;
+ 
+ 				/*
+ 				 * Construct a RunningTransactions snapshot representing a shut
+ 				 * down server, with only prepared transactions still alive.
+ 				 * We're never overflowed at this point because all subxids
+ 				 * are listed with their parent prepared transactions.
+ 				 */
+ 				running.xcnt = nxids;
+ 				running.subxid_overflow = false;
+ 				running.nextXid = checkPoint.nextXid;
+ 				running.oldestRunningXid = oldestActiveXID;
+ 				running.xids = xids;
+ 
+ 				ProcArrayApplyRecoveryInfo(&running);
+ 
+ 				StandbyRecoverPreparedTransactions(false);
+ 			}
  		}
  
  		/* Initialize resource managers */
***************
*** 5818,5823 **** StartupXLOG(void)
--- 5850,5885 ----
  				RmgrTable[rmid].rm_startup();
  		}
  
+ 		/* initialize shared replayEndRecPtr and recoveryLastRecPtr */
+ 		SpinLockAcquire(&xlogctl->info_lck);
+ 		xlogctl->replayEndRecPtr = ReadRecPtr;
+ 		xlogctl->recoveryLastRecPtr = ReadRecPtr;
+ 		SpinLockRelease(&xlogctl->info_lck);
+ 
+ 		/*
+ 		 * Let postmaster know we've started redo now, so that it can
+ 		 * launch bgwriter to perform restartpoints.  We don't bother
+ 		 * during crash recovery as restartpoints can only be performed
+ 		 * during archive recovery.  And we'd like to keep crash recovery
+ 		 * simple, to avoid introducing bugs that could you from
+ 		 * recovering after crash.
+ 		 *
+ 		 * After this point, we can no longer assume that we're the only
+ 		 * process in addition to postmaster!  Also, fsync requests are
+ 		 * subsequently to be handled by the bgwriter, not locally.
+ 		 */
+ 		if (InArchiveRecovery && IsUnderPostmaster)
+ 		{
+ 			SetForwardFsyncRequests();
+ 			SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
+ 			bgwriterLaunched = true;
+ 		}
+ 
+ 		/*
+ 		 * Allow read-only connections immediately if we're consistent already.
+ 		 */
+ 		CheckRecoveryConsistency();
+ 
  		/*
  		 * Find the first record that logically follows the checkpoint --- it
  		 * might physically precede it, though.
***************
*** 5837,5854 **** StartupXLOG(void)
  		{
  			bool		recoveryContinue = true;
  			bool		recoveryApply = true;
- 			bool		reachedMinRecoveryPoint = false;
  			ErrorContextCallback errcontext;
  
- 			/* use volatile pointer to prevent code rearrangement */
- 			volatile XLogCtlData *xlogctl = XLogCtl;
- 
- 			/* initialize shared replayEndRecPtr and recoveryLastRecPtr */
- 			SpinLockAcquire(&xlogctl->info_lck);
- 			xlogctl->replayEndRecPtr = ReadRecPtr;
- 			xlogctl->recoveryLastRecPtr = ReadRecPtr;
- 			SpinLockRelease(&xlogctl->info_lck);
- 
  			InRedo = true;
  
  			ereport(LOG,
--- 5899,5906 ----
***************
*** 5856,5880 **** StartupXLOG(void)
  							ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
  
  			/*
- 			 * Let postmaster know we've started redo now, so that it can
- 			 * launch bgwriter to perform restartpoints.  We don't bother
- 			 * during crash recovery as restartpoints can only be performed
- 			 * during archive recovery.  And we'd like to keep crash recovery
- 			 * simple, to avoid introducing bugs that could you from
- 			 * recovering after crash.
- 			 *
- 			 * After this point, we can no longer assume that we're the only
- 			 * process in addition to postmaster!  Also, fsync requests are
- 			 * subsequently to be handled by the bgwriter, not locally.
- 			 */
- 			if (InArchiveRecovery && IsUnderPostmaster)
- 			{
- 				SetForwardFsyncRequests();
- 				SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
- 				bgwriterLaunched = true;
- 			}
- 
- 			/*
  			 * main redo apply loop
  			 */
  			do
--- 5908,5913 ----
***************
*** 5903,5934 **** StartupXLOG(void)
  				/* Handle interrupt signals of startup process */
  				HandleStartupProcInterrupts();
  
! 				/*
! 				 * Have we passed our safe starting point?
! 				 */
! 				if (!reachedMinRecoveryPoint &&
! 					XLByteLE(minRecoveryPoint, EndRecPtr) &&
! 					XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
! 				{
! 					reachedMinRecoveryPoint = true;
! 					ereport(LOG,
! 						(errmsg("consistent recovery state reached at %X/%X",
! 								EndRecPtr.xlogid, EndRecPtr.xrecoff)));
! 				}
! 
! 				/*
! 				 * Have we got a valid starting snapshot that will allow
! 				 * queries to be run? If so, we can tell postmaster that the
! 				 * database is consistent now, enabling connections.
! 				 */
! 				if (standbyState == STANDBY_SNAPSHOT_READY &&
! 					!backendsAllowed &&
! 					reachedMinRecoveryPoint &&
! 					IsUnderPostmaster)
! 				{
! 					backendsAllowed = true;
! 					SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
! 				}
  
  				/*
  				 * Have we reached our recovery target?
--- 5936,5943 ----
  				/* Handle interrupt signals of startup process */
  				HandleStartupProcInterrupts();
  
! 				/* Allow read-only connections if we're consistent now */
! 				CheckRecoveryConsistency();
  
  				/*
  				 * Have we reached our recovery target?
***************
*** 6278,6283 **** StartupXLOG(void)
--- 6287,6330 ----
  }
  
  /*
+  * Checks if recovery has reached a consistent state. When consistency is
+  * reached and we have a valid starting standby snapshot, tell postmaster
+  * that it can start accepting read-only connections.
+  */
+ static void
+ CheckRecoveryConsistency(void)
+ {
+ 	static bool		backendsAllowed = false;
+ 
+ 	/*
+ 	 * Have we passed our safe starting point?
+ 	 */
+ 	if (!reachedMinRecoveryPoint &&
+ 		XLByteLE(minRecoveryPoint, EndRecPtr) &&
+ 		XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
+ 	{
+ 		reachedMinRecoveryPoint = true;
+ 		ereport(LOG,
+ 				(errmsg("consistent recovery state reached at %X/%X",
+ 						EndRecPtr.xlogid, EndRecPtr.xrecoff)));
+ 	}
+ 
+ 	/*
+ 	 * Have we got a valid starting snapshot that will allow
+ 	 * queries to be run? If so, we can tell postmaster that the
+ 	 * database is consistent now, enabling connections.
+ 	 */
+ 	if (standbyState == STANDBY_SNAPSHOT_READY &&
+ 		!backendsAllowed &&
+ 		reachedMinRecoveryPoint &&
+ 		IsUnderPostmaster)
+ 	{
+ 		backendsAllowed = true;
+ 		SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
+ 	}
+ }
+ 
+ /*
   * Is the system still in recovery?
   *
   * Unlike testing InRecovery, this works in any process that's connected to
***************
*** 7521,7533 **** xlog_redo(XLogRecPtr lsn, XLogRecord *record)
  		if (standbyState != STANDBY_DISABLED)
  			CheckRequiredParameterValues(checkPoint);
  
  		if (standbyState >= STANDBY_INITIALIZED)
  		{
  			/*
! 			 * Remove stale transactions, if any.
  			 */
! 			ExpireOldKnownAssignedTransactionIds(checkPoint.nextXid);
! 			StandbyReleaseOldLocks(checkPoint.nextXid);
  		}
  
  		/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
--- 7568,7601 ----
  		if (standbyState != STANDBY_DISABLED)
  			CheckRequiredParameterValues(checkPoint);
  
+ 		/*
+ 		 * If we're beginning at a shutdown checkpoint, we know that
+ 		 * nothing was running on the master at this point. So fake-up
+ 		 * an empty running-xacts record and use that here and now.
+ 		 * Recover additional standby state for prepared transactions.
+ 		 */
  		if (standbyState >= STANDBY_INITIALIZED)
  		{
+ 			TransactionId *xids;
+ 			int			nxids;
+ 			TransactionId oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
+ 			RunningTransactionsData running;
+ 
  			/*
! 			 * Construct a RunningTransactions snapshot representing a shut
! 			 * down server, with only prepared transactions still alive.
! 			 * We're never overflowed at this point because all subxids
! 			 * are listed with their parent prepared transactions.
  			 */
! 			running.xcnt = nxids;
! 			running.subxid_overflow = false;
! 			running.nextXid = checkPoint.nextXid;
! 			running.oldestRunningXid = oldestActiveXID;
! 			running.xids = xids;
! 
! 			ProcArrayApplyRecoveryInfo(&running);
! 
! 			StandbyRecoverPreparedTransactions(true);
  		}
  
  		/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
*** a/src/include/access/twophase.h
--- b/src/include/access/twophase.h
***************
*** 44,49 **** extern bool StandbyTransactionIdIsPrepared(TransactionId xid);
--- 44,50 ----
  
  extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
  							int *nxids_p);
+ extern void StandbyRecoverPreparedTransactions(bool can_overwrite);
  extern void RecoverPreparedTransactions(void);
  
  extern void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);