From fc1b0f10e6622f3f6e7f0c19212f06db2c553fb3 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Fri, 31 Jan 2025 09:17:43 +0900 Subject: [PATCH v2] Fix issues with 2PC file handling at recovery This addresses two issues: - Avoid CLOG file lookups until we are sure that this is safe. This is now done at the end of recovery. - Avoid mishandling of 2PC shmem state data. Tests are added to show the problems possible. Backpatch-through: 13 --- src/backend/access/transam/twophase.c | 119 +++++++++++++---------- src/test/recovery/t/009_twophase.pl | 131 ++++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 52 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 95aa8be9c53..4a34e7697e5 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1877,13 +1877,17 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) * Scan pg_twophase and fill TwoPhaseState depending on the on-disk data. * This is called once at the beginning of recovery, saving any extra * lookups in the future. Two-phase files that are newer than the - * minimum XID horizon are discarded on the way. + * minimum XID horizon are discarded on the way, as much as files that + * are older than the oldest XID horizon. */ void restoreTwoPhaseData(void) { DIR *cldir; struct dirent *clde; + FullTransactionId nextXid = ShmemVariableCache->nextXid; + TransactionId origNextXid = XidFromFullTransactionId(nextXid); + TransactionId oldestXid = ShmemVariableCache->oldestXid; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); cldir = AllocateDir(TWOPHASE_DIR); @@ -1897,10 +1901,24 @@ restoreTwoPhaseData(void) xid = (TransactionId) strtoul(clde->d_name, NULL, 16); + /* Reject XID if too new or too old */ + if (TransactionIdFollowsOrEquals(xid, origNextXid) || + TransactionIdPrecedes(xid, oldestXid)) + { + if (TransactionIdFollowsOrEquals(xid, origNextXid)) + ereport(WARNING, + (errmsg("removing future two-phase state file for transaction %u", + xid))); + else + ereport(WARNING, + (errmsg("removing past two-phase state file for transaction %u", + xid))); + RemoveTwoPhaseFile(xid, true); + continue; + } + buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, true, false, false); - if (buf == NULL) - continue; PrepareRedoAdd(buf, InvalidXLogRecPtr, InvalidXLogRecPtr, InvalidRepOriginId); @@ -1967,9 +1985,6 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) gxact->prepare_start_lsn, gxact->ondisk, false, true); - if (buf == NULL) - continue; - /* * OK, we think this file is valid. Incorporate xid into the * running-minimum result. @@ -2040,8 +2055,7 @@ StandbyRecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, gxact->ondisk, true, false); - if (buf != NULL) - pfree(buf); + pfree(buf); } LWLockRelease(TwoPhaseStateLock); } @@ -2066,8 +2080,21 @@ void RecoverPreparedTransactions(void) { int i; + TransactionId *remove_xids; + int remove_xids_cnt; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); + + /* + * Track XIDs candidate for removal if found as already committed or + * aborted, once the first scan through TwoPhaseState is done. This + * cannot happen while going through the entries in TwoPhaseState as + * PrepareRedoRemove() manipulates it. + */ + remove_xids_cnt = 0; + remove_xids = (TransactionId *) palloc(TwoPhaseState->numPrepXacts * + sizeof(TransactionId)); + for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { TransactionId xid; @@ -2080,6 +2107,26 @@ RecoverPreparedTransactions(void) xid = gxact->xid; + /* + * Is this transaction already aborted or committed? If yes, mark it + * for removal. + * + * Checking CLOGs if these transactions have been already aborted or + * committed is safe at this stage; we are at the end of recovery and + * all WAL has been replayed, all 2PC transactions are reinstated and + * should be tracked in TwoPhaseState. + */ + if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) + { + /* + * Track this transaction ID for its removal from the shared + * memory state at the end. + */ + remove_xids[remove_xids_cnt] = xid; + remove_xids_cnt++; + continue; + } + /* * Reconstruct subtrans state for the transaction --- needed because * pg_subtrans is not preserved over a restart. Note that we are @@ -2092,8 +2139,6 @@ RecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, gxact->ondisk, true, false); - if (buf == NULL) - continue; ereport(LOG, (errmsg("recovering prepared transaction %u from shared memory", xid))); @@ -2151,7 +2196,19 @@ RecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); } + for (i = 0; i < remove_xids_cnt; i++) + { + TransactionId xid = remove_xids[i]; + + ereport(WARNING, + (errmsg("removing stale two-phase state from memory for transaction %u", + xid))); + PrepareRedoRemove(xid, true); + } + LWLockRelease(TwoPhaseStateLock); + + pfree(remove_xids); } /* @@ -2171,8 +2228,6 @@ ProcessTwoPhaseBuffer(TransactionId xid, bool fromdisk, bool setParent, bool setNextXid) { - FullTransactionId nextXid = ShmemVariableCache->nextXid; - TransactionId origNextXid = XidFromFullTransactionId(nextXid); TransactionId *subxids; char *buf; TwoPhaseFileHeader *hdr; @@ -2183,46 +2238,6 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (!fromdisk) Assert(prepare_start_lsn != InvalidXLogRecPtr); - /* Already processed? */ - if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) - { - if (fromdisk) - { - ereport(WARNING, - (errmsg("removing stale two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); - } - else - { - ereport(WARNING, - (errmsg("removing stale two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); - } - return NULL; - } - - /* Reject XID if too new */ - if (TransactionIdFollowsOrEquals(xid, origNextXid)) - { - if (fromdisk) - { - ereport(WARNING, - (errmsg("removing future two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); - } - else - { - ereport(WARNING, - (errmsg("removing future two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); - } - return NULL; - } - if (fromdisk) { /* Read and validate file */ diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index fe7e8e79802..790f1a1d2ec 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -5,6 +5,7 @@ use strict; use warnings; +use File::Copy; use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; @@ -28,6 +29,14 @@ sub configure_and_reload return; } +sub twophase_file_name +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $xid = shift; + return sprintf("%08X", $xid); +} + # Set up two nodes, which will alternately be primary and replication standby. # Setup london node @@ -528,4 +537,126 @@ is( $psql_out, qq{27|issued to paris}, "Check expected t_009_tbl2 data on standby"); +############################################################################### +# Check handling of already committed or aborted 2PC files at recovery. +# This test does a manual copy of 2PC files created in a running server, +# to cheaply emulate situations that could be found in base backups. +############################################################################### + +# Issue a set of transactions that will be used for this portion of the test: +# - One transaction to hold on the minimum xid horizon at bay. +# - One transaction that will be found as already committed at recovery. +# - One transaction that will be fonnd as already rollbacked at recovery. +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (40, 'transaction: xid horizon'); + PREPARE TRANSACTION 'xact_009_40'; + BEGIN; + INSERT INTO t_009_tbl VALUES (41, 'transaction: commit-prepared'); + PREPARE TRANSACTION 'xact_009_41'; + BEGIN; + INSERT INTO t_009_tbl VALUES (42, 'transaction: rollback-prepared'); + PREPARE TRANSACTION 'xact_009_42';"); + +# Issue a checkpoint, fixing the XID horizon based on the first transaction, +# flushing to disk the two files to use. +$cur_primary->psql('postgres', "CHECKPOINT"); + +# Get the transaction IDs of the ones to 2PC files to manipulate. +my $commit_prepared_xid = int( + $cur_primary->safe_psql( + 'postgres', + "SELECT transaction FROM pg_prepared_xacts WHERE gid = 'xact_009_41'") +); +my $abort_prepared_xid = int( + $cur_primary->safe_psql( + 'postgres', + "SELECT transaction FROM pg_prepared_xacts WHERE gid = 'xact_009_42'") +); + +# Copy the two-phase files that will be put back later. +my $commit_prepared_name = twophase_file_name($commit_prepared_xid); +my $abort_prepared_name = twophase_file_name($abort_prepared_xid); + +my $twophase_tmpdir = $PostgreSQL::Test::Utils::tmp_check . '/' . "2pc_files"; +mkdir($twophase_tmpdir); +my $primary_twophase_folder = $cur_primary->data_dir . '/pg_twophase/'; +copy("$primary_twophase_folder/$commit_prepared_name", $twophase_tmpdir); +copy("$primary_twophase_folder/$abort_prepared_name", $twophase_tmpdir); + +# Issue abort/commit prepared. +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_41'"); +$cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_42'"); + +# Again checkpoint, to advance the LSN past the point where the two previous +# transaction records would be replayed. +$cur_primary->psql('postgres', "CHECKPOINT"); + +# Take down node. +$cur_primary->teardown_node; + +# Move back the two twophase files. +copy("$twophase_tmpdir/$commit_prepared_name", $primary_twophase_folder); +copy("$twophase_tmpdir/$abort_prepared_name", $primary_twophase_folder); + +# Grab location in logs of primary +my $log_offset = -s $cur_primary->logfile; + +# Start node and check that the two previous files are removed by checking the +# server logs, following the CLOG lookup done at the end of recovery. +$cur_primary->start; + +$cur_primary->log_check( + "two-phase files of committed transactions removed at recovery", + $log_offset, + log_like => [ + qr/removing stale two-phase state from memory for transaction $commit_prepared_xid/, + qr/removing stale two-phase state from memory for transaction $abort_prepared_xid/ + ]); + +# Commit the first transaction. +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_40'"); +# After replay, there should be no 2PC transactions. +$cur_primary->psql( + 'postgres', + "SELECT * FROM pg_prepared_xact", + stdout => \$psql_out); +is($psql_out, qq{}, "Check expected pg_prepared_xact data on primary"); +# Data from transactions should be around. +$cur_primary->psql( + 'postgres', + "SELECT * FROM t_009_tbl WHERE id IN (40, 41, 42);", + stdout => \$psql_out); +is( $psql_out, qq{40|transaction: xid horizon +41|transaction: commit-prepared}, + "Check expected table data on primary"); + +############################################################################### +# Check handling of orphaned 2PC files at recovery. +############################################################################### + +$cur_primary->teardown_node; + +# Grab location in logs of primary +$log_offset = -s $cur_primary->logfile; + +# Create fake files with a transaction ID large or low enough to be in the +# future or the past, then check that the primary is able to start and remove +# these files at recovery. + +my $future_2pc_file = $cur_primary->data_dir . '/pg_twophase/00FFFFFF'; +append_to_file $future_2pc_file, ""; +my $past_2pc_file = $cur_primary->data_dir . '/pg_twophase/000000FF'; +append_to_file $past_2pc_file, ""; + +$cur_primary->start; +$cur_primary->log_check( + "fake two-phase files removed at recovery", + $log_offset, + log_like => [ + qr/removing future two-phase state file for transaction 16777215/, + qr/removing past two-phase state file for transaction 255/ + ]); + done_testing(); -- 2.47.2