From 585f2ce9e87455ac7ed7f879f002968810b33ab6 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 30 Jan 2025 14:51:11 +0900 Subject: [PATCH] Fix set of issues with 2PC transaction handling during recovery This addresses two issues: - CLOG lookups could happen at a too early stage of recovery, where these may not be consistent. These are delayed until the end of recovery. - Legit 2PC transactions found as already committed or aborted when scanning the shared memory data could cause inconsistencies in TwoPhaseState, leaving around dangling entries for transactions that should not be there. --- src/include/access/multixact.h | 9 +- src/include/access/twophase.h | 12 +- src/include/access/twophase_rmgr.h | 4 +- src/include/pgstat.h | 5 +- src/include/storage/lock.h | 11 +- src/include/storage/predicate.h | 7 +- src/backend/access/transam/multixact.c | 16 +- src/backend/access/transam/twophase.c | 327 +++++++++++-------- src/backend/access/transam/xact.c | 13 +- src/backend/storage/lmgr/lock.c | 20 +- src/backend/storage/lmgr/predicate.c | 11 +- src/backend/utils/activity/pgstat_relation.c | 4 +- src/test/recovery/t/009_twophase.pl | 140 ++++++++ 13 files changed, 389 insertions(+), 190 deletions(-) diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 7ffd256c744..7f24cdbc348 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -11,6 +11,7 @@ #ifndef MULTIXACT_H #define MULTIXACT_H +#include "access/transam.h" #include "access/xlogreader.h" #include "lib/stringinfo.h" #include "storage/sync.h" @@ -119,7 +120,7 @@ extern int multixactmemberssyncfiletag(const FileTag *ftag, char *path); extern void AtEOXact_MultiXact(void); extern void AtPrepare_MultiXact(void); -extern void PostPrepare_MultiXact(TransactionId xid); +extern void PostPrepare_MultiXact(FullTransactionId fxid); extern Size MultiXactShmemSize(void); extern void MultiXactShmemInit(void); @@ -145,11 +146,11 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern int MultiXactMemberFreezeThreshold(void); -extern void multixact_twophase_recover(TransactionId xid, uint16 info, +extern void multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void multixact_twophase_postcommit(TransactionId xid, uint16 info, +extern void multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void multixact_twophase_postabort(TransactionId xid, uint16 info, +extern void multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern void multixact_redo(XLogReaderState *record); diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h index 56248c00063..93c5caf8309 100644 --- a/src/include/access/twophase.h +++ b/src/include/access/twophase.h @@ -14,6 +14,7 @@ #ifndef TWOPHASE_H #define TWOPHASE_H +#include "access/transam.h" #include "access/xact.h" #include "access/xlogdefs.h" #include "datatype/timestamp.h" @@ -36,10 +37,10 @@ extern void PostPrepare_Twophase(void); extern TransactionId TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, bool *have_more); -extern PGPROC *TwoPhaseGetDummyProc(TransactionId xid, bool lock_held); -extern int TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held); +extern PGPROC *TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held); +extern int TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held); -extern GlobalTransaction MarkAsPreparing(TransactionId xid, const char *gid, +extern GlobalTransaction MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid); @@ -56,8 +57,9 @@ extern void CheckPointTwoPhase(XLogRecPtr redo_horizon); extern void FinishPreparedTransaction(const char *gid, bool isCommit); -extern void PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, - XLogRecPtr end_lsn, RepOriginId origin_id); +extern void PrepareRedoAdd(FullTransactionId fxid, char *buf, + XLogRecPtr start_lsn, XLogRecPtr end_lsn, + RepOriginId origin_id); extern void PrepareRedoRemove(TransactionId xid, bool giveWarning); extern void restoreTwoPhaseData(void); extern bool LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h index bdd2cb7b339..64db7bade6f 100644 --- a/src/include/access/twophase_rmgr.h +++ b/src/include/access/twophase_rmgr.h @@ -14,7 +14,9 @@ #ifndef TWOPHASE_RMGR_H #define TWOPHASE_RMGR_H -typedef void (*TwoPhaseCallback) (TransactionId xid, uint16 info, +#include "access/transam.h" + +typedef void (*TwoPhaseCallback) (FullTransactionId fxid, uint16 info, void *recdata, uint32 len); typedef uint8 TwoPhaseRmgrId; diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 2136239710e..f60824de31c 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -11,6 +11,7 @@ #ifndef PGSTAT_H #define PGSTAT_H +#include "access/transam.h" #include "datatype/timestamp.h" #include "portability/instr_time.h" #include "postmaster/pgarch.h" /* for MAX_XFN_CHARS */ @@ -649,9 +650,9 @@ extern void pgstat_count_heap_delete(Relation rel); extern void pgstat_count_truncate(Relation rel); extern void pgstat_update_heap_dead_tuples(Relation rel, int delta); -extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, +extern void pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, +extern void pgstat_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index cc1f6e78c39..b196b8a1f7f 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -18,6 +18,7 @@ #error "lock.h may not be included from frontend code" #endif +#include "access/transam.h" #include "lib/ilist.h" #include "storage/lockdefs.h" #include "storage/lwlock.h" @@ -577,7 +578,7 @@ extern bool LockHasWaiters(const LOCKTAG *locktag, extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp); extern void AtPrepare_Locks(void); -extern void PostPrepare_Locks(TransactionId xid); +extern void PostPrepare_Locks(FullTransactionId fxid); extern bool LockCheckConflicts(LockMethod lockMethodTable, LOCKMODE lockmode, LOCK *lock, PROCLOCK *proclock); @@ -591,13 +592,13 @@ extern BlockedProcsData *GetBlockerStatusData(int blocked_pid); extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks); extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode); -extern void lock_twophase_recover(TransactionId xid, uint16 info, +extern void lock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_postcommit(TransactionId xid, uint16 info, +extern void lock_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_postabort(TransactionId xid, uint16 info, +extern void lock_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_standby_recover(TransactionId xid, uint16 info, +extern void lock_twophase_standby_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern DeadLockState DeadLockCheck(PGPROC *proc); diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h index 14ee9b94a2f..bdd5cdd3dbf 100644 --- a/src/include/storage/predicate.h +++ b/src/include/storage/predicate.h @@ -14,6 +14,7 @@ #ifndef PREDICATE_H #define PREDICATE_H +#include "access/transam.h" #include "storage/lock.h" #include "utils/relcache.h" #include "utils/snapshot.h" @@ -71,9 +72,9 @@ extern void PreCommit_CheckForSerializationFailure(void); /* two-phase commit support */ extern void AtPrepare_PredicateLocks(void); -extern void PostPrepare_PredicateLocks(TransactionId xid); -extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit); -extern void predicatelock_twophase_recover(TransactionId xid, uint16 info, +extern void PostPrepare_PredicateLocks(FullTransactionId fxid); +extern void PredicateLockTwoPhaseFinish(FullTransactionId xid, bool isCommit); +extern void predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); /* parallel query support */ diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index b7b47ef076a..b51221cd1da 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -1839,7 +1839,7 @@ AtPrepare_MultiXact(void) * Clean up after successful PREPARE TRANSACTION */ void -PostPrepare_MultiXact(TransactionId xid) +PostPrepare_MultiXact(FullTransactionId fxid) { MultiXactId myOldestMember; @@ -1850,7 +1850,7 @@ PostPrepare_MultiXact(TransactionId xid) myOldestMember = OldestMemberMXactId[MyProcNumber]; if (MultiXactIdIsValid(myOldestMember)) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false); /* * Even though storing MultiXactId is atomic, acquire lock to make @@ -1888,10 +1888,10 @@ PostPrepare_MultiXact(TransactionId xid) * Recover the state of a prepared transaction at startup */ void -multixact_twophase_recover(TransactionId xid, uint16 info, +multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false); MultiXactId oldestMember; /* @@ -1909,10 +1909,10 @@ multixact_twophase_recover(TransactionId xid, uint16 info, * Similar to AtEOXact_MultiXact but for COMMIT PREPARED */ void -multixact_twophase_postcommit(TransactionId xid, uint16 info, +multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, true); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true); Assert(len == sizeof(MultiXactId)); @@ -1924,10 +1924,10 @@ multixact_twophase_postcommit(TransactionId xid, uint16 info, * This is actually just the same as the COMMIT case. */ void -multixact_twophase_postabort(TransactionId xid, uint16 info, +multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - multixact_twophase_postcommit(xid, info, recdata, len); + multixact_twophase_postcommit(fxid, info, recdata, len); } /* diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 4f78c1dc579..20b0756c742 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -159,7 +159,7 @@ typedef struct GlobalTransactionData */ XLogRecPtr prepare_start_lsn; /* XLOG offset of prepare record start */ XLogRecPtr prepare_end_lsn; /* XLOG offset of prepare record end */ - TransactionId xid; /* The GXACT id */ + FullTransactionId fxid; /* The GXACT full xid */ Oid owner; /* ID of user that executed the xact */ ProcNumber locking_backend; /* backend currently working on the xact */ @@ -197,6 +197,7 @@ static GlobalTransaction MyLockedGxact = NULL; static bool twophaseExitRegistered = false; +static void PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning); static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, @@ -216,19 +217,19 @@ static void RecordTransactionAbortPrepared(TransactionId xid, int nstats, xl_xact_stats_item *stats, const char *gid); -static void ProcessRecords(char *bufptr, TransactionId xid, +static void ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[]); static void RemoveGXact(GlobalTransaction gxact); static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len); -static char *ProcessTwoPhaseBuffer(TransactionId xid, +static char *ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid); -static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, +static void MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid); -static void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning); -static void RecreateTwoPhaseFile(TransactionId xid, void *content, int len); +static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning); +static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len); /* * Initialization of shared memory @@ -356,7 +357,7 @@ PostPrepare_Twophase(void) * Reserve the GID for the given transaction. */ GlobalTransaction -MarkAsPreparing(TransactionId xid, const char *gid, +MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid) { GlobalTransaction gxact; @@ -407,7 +408,7 @@ MarkAsPreparing(TransactionId xid, const char *gid, gxact = TwoPhaseState->freeGXacts; TwoPhaseState->freeGXacts = gxact->next; - MarkAsPreparingGuts(gxact, xid, gid, prepared_at, owner, databaseid); + MarkAsPreparingGuts(gxact, fxid, gid, prepared_at, owner, databaseid); gxact->ondisk = false; @@ -430,11 +431,13 @@ MarkAsPreparing(TransactionId xid, const char *gid, * Note: This function should be called with appropriate locks held. */ static void -MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, - TimestampTz prepared_at, Oid owner, Oid databaseid) +MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, + const char *gid, TimestampTz prepared_at, Oid owner, + Oid databaseid) { PGPROC *proc; int i; + TransactionId xid = XidFromFullTransactionId(fxid); Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE)); @@ -479,7 +482,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, proc->subxidStatus.count = 0; gxact->prepared_at = prepared_at; - gxact->xid = xid; + gxact->fxid = fxid; gxact->owner = owner; gxact->locking_backend = MyProcNumber; gxact->valid = false; @@ -797,12 +800,12 @@ pg_prepared_xact(PG_FUNCTION_ARGS) * caller had better hold it. */ static GlobalTransaction -TwoPhaseGetGXact(TransactionId xid, bool lock_held) +TwoPhaseGetGXact(FullTransactionId fxid, bool lock_held) { GlobalTransaction result = NULL; int i; - static TransactionId cached_xid = InvalidTransactionId; + static FullTransactionId cached_fxid = {0}; static GlobalTransaction cached_gxact = NULL; Assert(!lock_held || LWLockHeldByMe(TwoPhaseStateLock)); @@ -811,7 +814,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called * repeatedly for the same XID. We can save work with a simple cache. */ - if (xid == cached_xid) + if (FullTransactionIdEquals(fxid, cached_fxid)) return cached_gxact; if (!lock_held) @@ -821,7 +824,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; - if (gxact->xid == xid) + if (FullTransactionIdEquals(gxact->fxid, fxid)) { result = gxact; break; @@ -832,9 +835,10 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) LWLockRelease(TwoPhaseStateLock); if (result == NULL) /* should not happen */ - elog(ERROR, "failed to find GlobalTransaction for xid %u", xid); + elog(ERROR, "failed to find GlobalTransaction for xid %u", + XidFromFullTransactionId(fxid)); - cached_xid = xid; + cached_fxid = fxid; cached_gxact = result; return result; @@ -881,7 +885,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, *have_more = true; break; } - result = gxact->xid; + result = XidFromFullTransactionId(gxact->fxid); } } @@ -892,7 +896,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, /* * TwoPhaseGetDummyProcNumber - * Get the dummy proc number for prepared transaction specified by XID + * Get the dummy proc number for prepared transaction * * Dummy proc numbers are similar to proc numbers of real backends. They * start at MaxBackends, and are unique across all currently active real @@ -900,24 +904,24 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, * TwoPhaseStateLock will not be taken, so the caller had better hold it. */ ProcNumber -TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held) +TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held) { - GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held); + GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held); return gxact->pgprocno; } /* * TwoPhaseGetDummyProc - * Get the PGPROC that represents a prepared transaction specified by XID + * Get the PGPROC that represents a prepared transaction * * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the * caller had better hold it. */ PGPROC * -TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) +TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held) { - GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held); + GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held); return GetPGProcByNumber(gxact->pgprocno); } @@ -942,10 +946,8 @@ AdjustToFullTransactionId(TransactionId xid) } static inline int -TwoPhaseFilePath(char *path, TransactionId xid) +TwoPhaseFilePath(char *path, FullTransactionId fxid) { - FullTransactionId fxid = AdjustToFullTransactionId(xid); - return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X", EpochFromFullTransactionId(fxid), XidFromFullTransactionId(fxid)); @@ -1049,7 +1051,7 @@ void StartPrepare(GlobalTransaction gxact) { PGPROC *proc = GetPGProcByNumber(gxact->pgprocno); - TransactionId xid = gxact->xid; + TransactionId xid = XidFromFullTransactionId(gxact->fxid); TwoPhaseFileHeader hdr; TransactionId *children; RelFileLocator *commitrels; @@ -1281,10 +1283,11 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, * If it looks OK (has a valid magic number and CRC), return the palloc'd * contents of the file, issuing an error when finding corrupted data. If * missing_ok is true, which indicates that missing files can be safely - * ignored, then return NULL. This state can be reached when doing recovery. + * ignored, then return NULL. This state can be reached when doing recovery + * after discarding two-phase files from frozen epochs. */ static char * -ReadTwoPhaseFile(TransactionId xid, bool missing_ok) +ReadTwoPhaseFile(FullTransactionId fxid, bool missing_ok) { char path[MAXPGPATH]; char *buf; @@ -1296,7 +1299,7 @@ ReadTwoPhaseFile(TransactionId xid, bool missing_ok) file_crc; int r; - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); if (fd < 0) @@ -1461,6 +1464,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid) char *buf; TwoPhaseFileHeader *hdr; bool result; + FullTransactionId fxid; Assert(TransactionIdIsValid(xid)); @@ -1468,7 +1472,8 @@ StandbyTransactionIdIsPrepared(TransactionId xid) return false; /* nothing to do */ /* Read and validate file */ - buf = ReadTwoPhaseFile(xid, true); + fxid = FullTransactionIdFromAllowableAt(TransamVariables->nextXid, xid); + buf = ReadTwoPhaseFile(fxid, true); if (buf == NULL) return false; @@ -1488,6 +1493,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) { GlobalTransaction gxact; PGPROC *proc; + FullTransactionId fxid; TransactionId xid; bool ondisk; char *buf; @@ -1509,7 +1515,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit) */ gxact = LockGXact(gid, GetUserId()); proc = GetPGProcByNumber(gxact->pgprocno); - xid = gxact->xid; + fxid = gxact->fxid; + xid = XidFromFullTransactionId(fxid); /* * Read and validate 2PC state data. State data will typically be stored @@ -1517,7 +1524,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * to disk if for some reason they have lived for a long time. */ if (gxact->ondisk) - buf = ReadTwoPhaseFile(xid, false); + buf = ReadTwoPhaseFile(fxid, false); else XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL); @@ -1636,11 +1643,11 @@ FinishPreparedTransaction(const char *gid, bool isCommit) /* And now do the callbacks */ if (isCommit) - ProcessRecords(bufptr, xid, twophase_postcommit_callbacks); + ProcessRecords(bufptr, fxid, twophase_postcommit_callbacks); else - ProcessRecords(bufptr, xid, twophase_postabort_callbacks); + ProcessRecords(bufptr, fxid, twophase_postabort_callbacks); - PredicateLockTwoPhaseFinish(xid, isCommit); + PredicateLockTwoPhaseFinish(fxid, isCommit); /* * Read this value while holding the two-phase lock, as the on-disk 2PC @@ -1664,7 +1671,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * And now we can clean up any files we may have left. */ if (ondisk) - RemoveTwoPhaseFile(xid, true); + RemoveTwoPhaseFile(fxid, true); MyLockedGxact = NULL; @@ -1677,7 +1684,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * Scan 2PC state data in memory and call the indicated callbacks for each 2PC record. */ static void -ProcessRecords(char *bufptr, TransactionId xid, +ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[]) { for (;;) @@ -1691,7 +1698,7 @@ ProcessRecords(char *bufptr, TransactionId xid, bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk)); if (callbacks[record->rmid] != NULL) - callbacks[record->rmid] (xid, record->info, + callbacks[record->rmid] (fxid, record->info, (void *) bufptr, record->len); bufptr += MAXALIGN(record->len); @@ -1699,17 +1706,21 @@ ProcessRecords(char *bufptr, TransactionId xid, } /* - * Remove the 2PC file for the specified XID. + * Remove the 2PC file. * * If giveWarning is false, do not complain about file-not-present; * this is an expected case during WAL replay. + * + * This routine is used at early stages at recovery where future and + * past orphaned files are checked, hence the FullTransactionId to build + * a complete file name fit for the removal. */ static void -RemoveTwoPhaseFile(TransactionId xid, bool giveWarning) +RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning) { char path[MAXPGPATH]; - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); if (unlink(path)) if (errno != ENOENT || giveWarning) ereport(WARNING, @@ -1724,7 +1735,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning) * Note: content and len don't include CRC. */ static void -RecreateTwoPhaseFile(TransactionId xid, void *content, int len) +RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len) { char path[MAXPGPATH]; pg_crc32c statefile_crc; @@ -1735,7 +1746,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len) COMP_CRC32C(statefile_crc, content, len); FIN_CRC32C(statefile_crc); - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); fd = OpenTransientFile(path, O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY); @@ -1847,7 +1858,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) int len; XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, &len); - RecreateTwoPhaseFile(gxact->xid, buf, len); + RecreateTwoPhaseFile(gxact->fxid, buf, len); gxact->ondisk = true; gxact->prepare_start_lsn = InvalidXLogRecPtr; gxact->prepare_end_lsn = InvalidXLogRecPtr; @@ -1883,13 +1894,16 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) * Scan pg_twophase and fill TwoPhaseState depending on the on-disk data. * This is called once at the beginning of recovery, saving any extra * lookups in the future. Two-phase files that are newer than the - * minimum XID horizon are discarded on the way. + * minimum XID horizon are discarded on the way, as much as files that + * are older than the oldest XID horizon. */ void restoreTwoPhaseData(void) { DIR *cldir; struct dirent *clde; + FullTransactionId nextXid = TransamVariables->nextXid; + FullTransactionId oldestXid = AdjustToFullTransactionId(TransamVariables->oldestXid); LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); cldir = AllocateDir(TWOPHASE_DIR); @@ -1898,19 +1912,33 @@ restoreTwoPhaseData(void) if (strlen(clde->d_name) == 16 && strspn(clde->d_name, "0123456789ABCDEF") == 16) { - TransactionId xid; FullTransactionId fxid; char *buf; fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16)); - xid = XidFromFullTransactionId(fxid); - buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, - true, false, false); - if (buf == NULL) + /* Reject XID if too new or too old */ + if (FullTransactionIdFollowsOrEquals(fxid, nextXid) || + FullTransactionIdPrecedes(fxid, oldestXid)) + { + if (FullTransactionIdFollowsOrEquals(fxid, nextXid)) + ereport(WARNING, + (errmsg("removing future two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + else + ereport(WARNING, + (errmsg("removing past two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + RemoveTwoPhaseFile(fxid, true); continue; + } - PrepareRedoAdd(buf, InvalidXLogRecPtr, + buf = ProcessTwoPhaseBuffer(fxid, InvalidXLogRecPtr, + true, false, false); + + PrepareRedoAdd(fxid, buf, InvalidXLogRecPtr, InvalidXLogRecPtr, InvalidRepOriginId); } } @@ -1969,15 +1997,11 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) Assert(gxact->inredo); - xid = gxact->xid; - - buf = ProcessTwoPhaseBuffer(xid, + xid = XidFromFullTransactionId(gxact->fxid); + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, false, true); - if (buf == NULL) - continue; - /* * OK, we think this file is valid. Incorporate xid into the * running-minimum result. @@ -2037,19 +2061,15 @@ StandbyRecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - TransactionId xid; char *buf; GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; Assert(gxact->inredo); - xid = gxact->xid; - - buf = ProcessTwoPhaseBuffer(xid, + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, true, false); - if (buf != NULL) - pfree(buf); + pfree(buf); } LWLockRelease(TwoPhaseStateLock); } @@ -2074,19 +2094,51 @@ void RecoverPreparedTransactions(void) { int i; + FullTransactionId *remove_fxids; + int remove_fxids_cnt; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); + + /* + * Track XIDs candidate for removal if found as already committed or + * aborted, once the first scan through TwoPhaseState is done. This + * cannot happen while going through the entries in TwoPhaseState as + * PrepareRedoRemove() manipulates it. + */ + remove_fxids_cnt = 0; + remove_fxids = (FullTransactionId *) palloc(TwoPhaseState->numPrepXacts * + sizeof(FullTransactionId)); + for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - TransactionId xid; char *buf; GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + FullTransactionId fxid = gxact->fxid; char *bufptr; TwoPhaseFileHeader *hdr; TransactionId *subxids; const char *gid; - xid = gxact->xid; + /* + * Is this transaction already aborted or committed? If yes, mark it + * for removal. + * + * Checking CLOGs if these transactions have been already aborted or + * committed is safe at this stage; we are at the end of recovery and + * all WAL has been replayed, all 2PC transactions are reinstated and + * should be tracked in TwoPhaseState. + */ + if (TransactionIdDidCommit(XidFromFullTransactionId(fxid)) || + TransactionIdDidAbort(XidFromFullTransactionId(fxid))) + { + /* + * Track this transaction ID for its removal from the shared + * memory state at the end. + */ + remove_fxids[remove_fxids_cnt] = fxid; + remove_fxids_cnt++; + continue; + } /* * Reconstruct subtrans state for the transaction --- needed because @@ -2097,17 +2149,18 @@ RecoverPreparedTransactions(void) * SubTransSetParent has been set before, if the prepared transaction * generated xid assignment records. */ - buf = ProcessTwoPhaseBuffer(xid, + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, true, false); - if (buf == NULL) - continue; ereport(LOG, - (errmsg("recovering prepared transaction %u from shared memory", xid))); + (errmsg("recovering prepared transaction %u of epoch %u from shared memory", + XidFromFullTransactionId(gxact->fxid), + EpochFromFullTransactionId(gxact->fxid)))); hdr = (TwoPhaseFileHeader *) buf; - Assert(TransactionIdEquals(hdr->xid, xid)); + Assert(TransactionIdEquals(hdr->xid, + XidFromFullTransactionId(gxact->fxid))); bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); gid = (const char *) bufptr; bufptr += MAXALIGN(hdr->gidlen); @@ -2123,7 +2176,7 @@ RecoverPreparedTransactions(void) * Recreate its GXACT and dummy PGPROC. But, check whether it was * added in redo and already has a shmem entry for it. */ - MarkAsPreparingGuts(gxact, xid, gid, + MarkAsPreparingGuts(gxact, gxact->fxid, gid, hdr->prepared_at, hdr->owner, hdr->database); @@ -2138,7 +2191,7 @@ RecoverPreparedTransactions(void) /* * Recover other state (notably locks) using resource managers. */ - ProcessRecords(bufptr, xid, twophase_recover_callbacks); + ProcessRecords(bufptr, fxid, twophase_recover_callbacks); /* * Release locks held by the standby process after we process each @@ -2146,7 +2199,7 @@ RecoverPreparedTransactions(void) * additional locks at any one time. */ if (InHotStandby) - StandbyReleaseLockTree(xid, hdr->nsubxacts, subxids); + StandbyReleaseLockTree(hdr->xid, hdr->nsubxacts, subxids); /* * We're done with recovering this transaction. Clear MyLockedGxact, @@ -2159,13 +2212,25 @@ RecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); } + for (i = 0; i < remove_fxids_cnt; i++) + { + FullTransactionId fxid = remove_fxids[i]; + + ereport(WARNING, + (errmsg("removing stale two-phase state from memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + + PrepareRedoRemoveFull(fxid, true); + } + LWLockRelease(TwoPhaseStateLock); } /* * ProcessTwoPhaseBuffer * - * Given a transaction id, read it either from disk or read it directly + * Given a FullTransactionId, read it either from disk or read it directly * via shmem xlog record pointer using the provided "prepare_start_lsn". * * If setParent is true, set up subtransaction parent linkages. @@ -2174,13 +2239,11 @@ RecoverPreparedTransactions(void) * value scanned. */ static char * -ProcessTwoPhaseBuffer(TransactionId xid, +ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid) { - FullTransactionId nextXid = TransamVariables->nextXid; - TransactionId origNextXid = XidFromFullTransactionId(nextXid); TransactionId *subxids; char *buf; TwoPhaseFileHeader *hdr; @@ -2191,50 +2254,10 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (!fromdisk) Assert(prepare_start_lsn != InvalidXLogRecPtr); - /* Already processed? */ - if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) - { - if (fromdisk) - { - ereport(WARNING, - (errmsg("removing stale two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); - } - else - { - ereport(WARNING, - (errmsg("removing stale two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); - } - return NULL; - } - - /* Reject XID if too new */ - if (TransactionIdFollowsOrEquals(xid, origNextXid)) - { - if (fromdisk) - { - ereport(WARNING, - (errmsg("removing future two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); - } - else - { - ereport(WARNING, - (errmsg("removing future two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); - } - return NULL; - } - if (fromdisk) { /* Read and validate file */ - buf = ReadTwoPhaseFile(xid, false); + buf = ReadTwoPhaseFile(fxid, false); } else { @@ -2244,18 +2267,20 @@ ProcessTwoPhaseBuffer(TransactionId xid, /* Deconstruct header */ hdr = (TwoPhaseFileHeader *) buf; - if (!TransactionIdEquals(hdr->xid, xid)) + if (!TransactionIdEquals(hdr->xid, XidFromFullTransactionId(fxid))) { if (fromdisk) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("corrupted two-phase state file for transaction %u", - xid))); + errmsg("corrupted two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("corrupted two-phase state in memory for transaction %u", - xid))); + errmsg("corrupted two-phase state in memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); } /* @@ -2269,14 +2294,14 @@ ProcessTwoPhaseBuffer(TransactionId xid, { TransactionId subxid = subxids[i]; - Assert(TransactionIdFollows(subxid, xid)); + Assert(TransactionIdFollows(subxid, XidFromFullTransactionId(fxid))); /* update nextXid if needed */ if (setNextXid) AdvanceNextFullTransactionIdPastXid(subxid); if (setParent) - SubTransSetParent(subxid, xid); + SubTransSetParent(subxid, XidFromFullTransactionId(fxid)); } return buf; @@ -2467,8 +2492,9 @@ RecordTransactionAbortPrepared(TransactionId xid, * data, the entry is marked as located on disk. */ void -PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, - XLogRecPtr end_lsn, RepOriginId origin_id) +PrepareRedoAdd(FullTransactionId fxid, char *buf, + XLogRecPtr start_lsn, XLogRecPtr end_lsn, + RepOriginId origin_id) { TwoPhaseFileHeader *hdr = (TwoPhaseFileHeader *) buf; char *bufptr; @@ -2478,6 +2504,10 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE)); Assert(RecoveryInProgress()); + if (!FullTransactionIdIsValid(fxid)) + fxid = FullTransactionIdFromAllowableAt(TransamVariables->nextXid, + hdr->xid); + bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); gid = (const char *) bufptr; @@ -2506,7 +2536,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, { char path[MAXPGPATH]; - TwoPhaseFilePath(path, hdr->xid); + Assert(InRecovery); + TwoPhaseFilePath(path, fxid); if (access(path, F_OK) == 0) { @@ -2537,7 +2568,7 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, gxact->prepared_at = hdr->prepared_at; gxact->prepare_start_lsn = start_lsn; gxact->prepare_end_lsn = end_lsn; - gxact->xid = hdr->xid; + gxact->fxid = fxid; gxact->owner = hdr->owner; gxact->locking_backend = INVALID_PROC_NUMBER; gxact->valid = false; @@ -2556,11 +2587,13 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, false /* backward */ , false /* WAL */ ); } - elog(DEBUG2, "added 2PC data in shared memory for transaction %u", gxact->xid); + elog(DEBUG2, "added 2PC data in shared memory for transaction %u of epoch %u", + XidFromFullTransactionId(gxact->fxid), + EpochFromFullTransactionId(gxact->fxid)); } /* - * PrepareRedoRemove + * PrepareRedoRemoveFull * * Remove the corresponding gxact entry from TwoPhaseState. Also remove * the 2PC file if a prepared transaction was saved via an earlier checkpoint. @@ -2568,8 +2601,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, * Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState * is updated. */ -void -PrepareRedoRemove(TransactionId xid, bool giveWarning) +static void +PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning) { GlobalTransaction gxact = NULL; int i; @@ -2582,7 +2615,7 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning) { gxact = TwoPhaseState->prepXacts[i]; - if (gxact->xid == xid) + if (FullTransactionIdEquals(gxact->fxid, fxid)) { Assert(gxact->inredo); found = true; @@ -2599,12 +2632,28 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning) /* * And now we can clean up any files we may have left. */ - elog(DEBUG2, "removing 2PC data for transaction %u", xid); + elog(DEBUG2, "removing 2PC data for transaction %u of epoch %u ", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)); + if (gxact->ondisk) - RemoveTwoPhaseFile(xid, giveWarning); + RemoveTwoPhaseFile(fxid, giveWarning); + RemoveGXact(gxact); } +/* + * Wrapper of PrepareRedoRemoveFull(), for TransactionIds. + */ +void +PrepareRedoRemove(TransactionId xid, bool giveWarning) +{ + FullTransactionId fxid = + FullTransactionIdFromAllowableAt(TransamVariables->nextXid, xid); + + PrepareRedoRemoveFull(fxid, giveWarning); +} + /* * LookupGXact * Check if the prepared transaction with the given GID, lsn and timestamp @@ -2649,7 +2698,7 @@ LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, * between publisher and subscriber. */ if (gxact->ondisk) - buf = ReadTwoPhaseFile(gxact->xid, false); + buf = ReadTwoPhaseFile(gxact->fxid, false); else { Assert(gxact->prepare_start_lsn); diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 4cecf630060..89ed5b726c8 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2460,7 +2460,7 @@ static void PrepareTransaction(void) { TransactionState s = CurrentTransactionState; - TransactionId xid = GetCurrentTransactionId(); + FullTransactionId fxid = GetCurrentFullTransactionId(); GlobalTransaction gxact; TimestampTz prepared_at; @@ -2589,7 +2589,7 @@ PrepareTransaction(void) * Reserve the GID for this transaction. This could fail if the requested * GID is invalid or already in use. */ - gxact = MarkAsPreparing(xid, prepareGID, prepared_at, + gxact = MarkAsPreparing(fxid, prepareGID, prepared_at, GetUserId(), MyDatabaseId); prepareGID = NULL; @@ -2639,7 +2639,7 @@ PrepareTransaction(void) * ProcArrayClearTransaction(). Otherwise, a GetLockConflicts() would * conclude "xact already committed or aborted" for our locks. */ - PostPrepare_Locks(xid); + PostPrepare_Locks(fxid); /* * Let others know about no transaction in progress by me. This has to be @@ -2678,9 +2678,9 @@ PrepareTransaction(void) PostPrepare_smgr(); - PostPrepare_MultiXact(xid); + PostPrepare_MultiXact(fxid); - PostPrepare_PredicateLocks(xid); + PostPrepare_PredicateLocks(fxid); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_LOCKS, @@ -6358,7 +6358,8 @@ xact_redo(XLogReaderState *record) * gxact entry. */ LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); - PrepareRedoAdd(XLogRecGetData(record), + PrepareRedoAdd(InvalidFullTransactionId, + XLogRecGetData(record), record->ReadRecPtr, record->EndRecPtr, XLogRecGetOrigin(record)); diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index e5e7ab55716..7e49c8d7d8a 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -3397,9 +3397,9 @@ AtPrepare_Locks(void) * but that probably costs more cycles. */ void -PostPrepare_Locks(TransactionId xid) +PostPrepare_Locks(FullTransactionId fxid) { - PGPROC *newproc = TwoPhaseGetDummyProc(xid, false); + PGPROC *newproc = TwoPhaseGetDummyProc(fxid, false); HASH_SEQ_STATUS status; LOCALLOCK *locallock; LOCK *lock; @@ -4171,11 +4171,11 @@ DumpAllLocks(void) * and PANIC anyway. */ void -lock_twophase_recover(TransactionId xid, uint16 info, +lock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; - PGPROC *proc = TwoPhaseGetDummyProc(xid, false); + PGPROC *proc = TwoPhaseGetDummyProc(fxid, false); LOCKTAG *locktag; LOCKMODE lockmode; LOCKMETHODID lockmethodid; @@ -4352,7 +4352,7 @@ lock_twophase_recover(TransactionId xid, uint16 info, * starting up into hot standby mode. */ void -lock_twophase_standby_recover(TransactionId xid, uint16 info, +lock_twophase_standby_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; @@ -4371,7 +4371,7 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info, if (lockmode == AccessExclusiveLock && locktag->locktag_type == LOCKTAG_RELATION) { - StandbyAcquireAccessExclusiveLock(xid, + StandbyAcquireAccessExclusiveLock(XidFromFullTransactionId(fxid), locktag->locktag_field1 /* dboid */ , locktag->locktag_field2 /* reloid */ ); } @@ -4384,11 +4384,11 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info, * Find and release the lock indicated by the 2PC record. */ void -lock_twophase_postcommit(TransactionId xid, uint16 info, +lock_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; - PGPROC *proc = TwoPhaseGetDummyProc(xid, true); + PGPROC *proc = TwoPhaseGetDummyProc(fxid, true); LOCKTAG *locktag; LOCKMETHODID lockmethodid; LockMethod lockMethodTable; @@ -4410,10 +4410,10 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, * This is actually just the same as the COMMIT case. */ void -lock_twophase_postabort(TransactionId xid, uint16 info, +lock_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - lock_twophase_postcommit(xid, info, recdata, len); + lock_twophase_postcommit(fxid, info, recdata, len); } /* diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 2c87273e17e..82a58f54901 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -191,7 +191,7 @@ * AtPrepare_PredicateLocks(void); * PostPrepare_PredicateLocks(TransactionId xid); * PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit); - * predicatelock_twophase_recover(TransactionId xid, uint16 info, + * predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, * void *recdata, uint32 len); */ @@ -4846,7 +4846,7 @@ AtPrepare_PredicateLocks(void) * anyway. We only need to clean up our local state. */ void -PostPrepare_PredicateLocks(TransactionId xid) +PostPrepare_PredicateLocks(FullTransactionId fxid) { if (MySerializableXact == InvalidSerializableXact) return; @@ -4869,12 +4869,12 @@ PostPrepare_PredicateLocks(TransactionId xid) * commits or aborts. */ void -PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit) +PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit) { SERIALIZABLEXID *sxid; SERIALIZABLEXIDTAG sxidtag; - sxidtag.xid = xid; + sxidtag.xid = XidFromFullTransactionId(fxid); LWLockAcquire(SerializableXactHashLock, LW_SHARED); sxid = (SERIALIZABLEXID *) @@ -4896,10 +4896,11 @@ PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit) * Re-acquire a predicate lock belonging to a transaction that was prepared. */ void -predicatelock_twophase_recover(TransactionId xid, uint16 info, +predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePredicateRecord *record; + TransactionId xid = XidFromFullTransactionId(fxid); Assert(len == sizeof(TwoPhasePredicateRecord)); diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 8a3f7d434cf..54500633be5 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -730,7 +730,7 @@ PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) * Load the saved counts into our local pgstats state. */ void -pgstat_twophase_postcommit(TransactionId xid, uint16 info, +pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; @@ -766,7 +766,7 @@ pgstat_twophase_postcommit(TransactionId xid, uint16 info, * as aborted. */ void -pgstat_twophase_postabort(TransactionId xid, uint16 info, +pgstat_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 4b3e0f77dc0..c08d7676d66 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -5,6 +5,7 @@ use strict; use warnings FATAL => 'all'; +use File::Copy; use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; @@ -28,6 +29,15 @@ sub configure_and_reload return; } +sub twophase_file_name +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $epoch = shift; + my $xid = shift; + return sprintf("%08X%08X", $epoch, $xid); +} + # Set up two nodes, which will alternately be primary and replication standby. # Setup london node @@ -572,4 +582,134 @@ my $nsubtrans = $cur_primary->safe_psql('postgres', ); isnt($osubtrans, $nsubtrans, "contents of pg_subtrans/ have changed"); +############################################################################### +# Check handling of already committed or aborted 2PC files at recovery. +# This test does a manual copy of 2PC files created in a running server, +# to cheaply emulate situations that could be found in base backups. +############################################################################### + +# Issue a set of transactions that will be used for this portion of the test: +# - One transaction to hold on the minimum xid horizon at bay. +# - One transaction that will be found as already committed at recovery. +# - One transaction that will be fonnd as already rollbacked at recovery. +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (40, 'transaction: xid horizon'); + PREPARE TRANSACTION 'xact_009_40'; + BEGIN; + INSERT INTO t_009_tbl VALUES (41, 'transaction: commit-prepared'); + PREPARE TRANSACTION 'xact_009_41'; + BEGIN; + INSERT INTO t_009_tbl VALUES (42, 'transaction: rollback-prepared'); + PREPARE TRANSACTION 'xact_009_42';"); + +# Issue a checkpoint, fixing the XID horizon based on the first transaction, +# flushing to disk the two files to use. +$cur_primary->psql('postgres', "CHECKPOINT"); + +# Get the transaction IDs of the ones to 2PC files to manipulate. +my $commit_prepared_xid = int( + $cur_primary->safe_psql( + 'postgres', + "SELECT transaction FROM pg_prepared_xacts WHERE gid = 'xact_009_41'") +); +my $abort_prepared_xid = int( + $cur_primary->safe_psql( + 'postgres', + "SELECT transaction FROM pg_prepared_xacts WHERE gid = 'xact_009_42'") +); + +# Copy the two-phase files that will be put back later. Assume an +# epoch of 0. +my $commit_prepared_name = twophase_file_name(0, $commit_prepared_xid); +my $abort_prepared_name = twophase_file_name(0, $abort_prepared_xid); + +my $twophase_tmpdir = $PostgreSQL::Test::Utils::tmp_check . '/' . "2pc_files"; +mkdir($twophase_tmpdir); +my $primary_twophase_folder = $cur_primary->data_dir . '/pg_twophase/'; +copy("$primary_twophase_folder/$commit_prepared_name", $twophase_tmpdir); +copy("$primary_twophase_folder/$abort_prepared_name", $twophase_tmpdir); + +# Issue abort/commit prepared. +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_41'"); +$cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_42'"); + +# Again checkpoint, to advance the LSN past the point where the two previous +# transaction records would be replayed. +$cur_primary->psql('postgres', "CHECKPOINT"); + +# Take down node. +$cur_primary->teardown_node; + +# Move back the two twophase files. +copy("$twophase_tmpdir/$commit_prepared_name", $primary_twophase_folder); +copy("$twophase_tmpdir/$abort_prepared_name", $primary_twophase_folder); + +# Grab location in logs of primary +my $log_offset = -s $cur_primary->logfile; + +# Start node and check that the two previous files are removed by checking the +# server logs, following the CLOG lookup done at the end of recovery. +$cur_primary->start; + +$cur_primary->log_check( + "two-phase files of committed transactions removed at recovery", + $log_offset, + log_like => [ + qr/removing stale two-phase state from memory for transaction $commit_prepared_xid of epoch 0/, + qr/removing stale two-phase state from memory for transaction $abort_prepared_xid of epoch 0/ + ]); + +# Commit the first transaction. +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_40'"); +# After replay, there should be no 2PC transactions. +$cur_primary->psql( + 'postgres', + "SELECT * FROM pg_prepared_xact", + stdout => \$psql_out); +is($psql_out, qq{}, "Check expected pg_prepared_xact data on primary"); +# Data from transactions should be around. +$cur_primary->psql( + 'postgres', + "SELECT * FROM t_009_tbl WHERE id IN (40, 41, 42);", + stdout => \$psql_out); +is( $psql_out, qq{40|transaction: xid horizon +41|transaction: commit-prepared}, + "Check expected table data on primary"); + +############################################################################### +# Check handling of orphaned 2PC files at recovery. +############################################################################### + +$cur_standby->teardown_node; +$cur_primary->teardown_node; + +# Grab location in logs of primary +$log_offset = -s $cur_primary->logfile; + +# Create fake files with a transaction ID large or low enough to be in the +# future or the past, in different epochs, then check that the primary is able +# to start and remove these files at recovery. + +# First bump the epoch with pg_resetwal. +$cur_primary->command_ok( + [ 'pg_resetwal', '-e', 256, '-f', $cur_primary->data_dir ], + 'bump epoch of primary'); + +my $future_2pc_file = + $cur_primary->data_dir . '/pg_twophase/000001FF00000FFF'; +append_to_file $future_2pc_file, ""; +my $past_2pc_file = $cur_primary->data_dir . '/pg_twophase/000000EE00000FFF'; +append_to_file $past_2pc_file, ""; + +$cur_primary->start; +$cur_primary->log_check( + "two-phase files removed at recovery", + $log_offset, + log_like => [ + qr/removing past two-phase state file for transaction 4095 of epoch 238/, + qr/removing future two-phase state file for transaction 4095 of epoch 511/ + ]); + done_testing(); -- 2.47.2