From 5bbc09abe3e0ec7c5f198dc1ed5ae7037e343ba1 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Thu, 30 Jan 2025 12:04:36 +0900 Subject: [PATCH] Fix set of issues with 2PC transaction handling during recovery This addresses two issues: - CLOG lookups could happen at a too early stage of recovery, where these may not be consistent. These are delayed until the end of recovery. - Legit 2PC transactions found as already committed or aborted when scanning the shared memory data could cause inconsistencies in TwoPhaseState, leaving around dangling entries for transactions that should not be there. --- src/backend/access/transam/twophase.c | 119 +++++++++++++---------- src/test/recovery/t/009_twophase.pl | 135 +++++++++++++++++++++++++- 2 files changed, 201 insertions(+), 53 deletions(-) diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 0a0932cff44..49c760dc19d 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1863,13 +1863,17 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) * Scan pg_twophase and fill TwoPhaseState depending on the on-disk data. * This is called once at the beginning of recovery, saving any extra * lookups in the future. Two-phase files that are newer than the - * minimum XID horizon are discarded on the way. + * minimum XID horizon are discarded on the way, as much as files that + * are older than the oldest XID horizon. */ void restoreTwoPhaseData(void) { DIR *cldir; struct dirent *clde; + FullTransactionId nextFullXid = ShmemVariableCache->nextFullXid; + TransactionId origNextXid = XidFromFullTransactionId(nextFullXid); + TransactionId oldestXid = ShmemVariableCache->oldestXid; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); cldir = AllocateDir(TWOPHASE_DIR); @@ -1883,10 +1887,24 @@ restoreTwoPhaseData(void) xid = (TransactionId) strtoul(clde->d_name, NULL, 16); + /* Reject XID if too new or too old */ + if (TransactionIdFollowsOrEquals(xid, origNextXid) || + TransactionIdPrecedes(xid, oldestXid)) + { + if (TransactionIdFollowsOrEquals(xid, origNextXid)) + ereport(WARNING, + (errmsg("removing future two-phase state file for transaction %u", + xid))); + else + ereport(WARNING, + (errmsg("removing past two-phase state file for transaction %u", + xid))); + RemoveTwoPhaseFile(xid, true); + continue; + } + buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, true, false, false); - if (buf == NULL) - continue; PrepareRedoAdd(buf, InvalidXLogRecPtr, InvalidXLogRecPtr, InvalidRepOriginId); @@ -1953,9 +1971,6 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) gxact->prepare_start_lsn, gxact->ondisk, false, true); - if (buf == NULL) - continue; - /* * OK, we think this file is valid. Incorporate xid into the * running-minimum result. @@ -2026,8 +2041,7 @@ StandbyRecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, gxact->ondisk, true, false); - if (buf != NULL) - pfree(buf); + pfree(buf); } LWLockRelease(TwoPhaseStateLock); } @@ -2052,8 +2066,21 @@ void RecoverPreparedTransactions(void) { int i; + TransactionId *remove_xids; + int remove_xids_cnt; LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); + + /* + * Track XIDs candidate for removal if found as already committed or + * aborted, once the first scan through TwoPhaseState is done. This + * cannot happen while going through the entries in TwoPhaseState as + * PrepareRedoRemove() manipulates it. + */ + remove_xids_cnt = 0; + remove_xids = (TransactionId *) palloc(TwoPhaseState->numPrepXacts * + sizeof(TransactionId)); + for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { TransactionId xid; @@ -2066,6 +2093,26 @@ RecoverPreparedTransactions(void) xid = gxact->xid; + /* + * Is this transaction already aborted or committed? If yes, mark it + * for removal. + * + * Checking CLOGs if these transactions have been already aborted or + * committed is safe at this stage; we are at the end of recovery and + * all WAL has been replayed, all 2PC transactions are reinstated and + * should be tracked in TwoPhaseState. + */ + if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) + { + /* + * Track this transaction ID for its removal from the shared + * memory state at the end. + */ + remove_xids[remove_xids_cnt] = xid; + remove_xids_cnt++; + continue; + } + /* * Reconstruct subtrans state for the transaction --- needed because * pg_subtrans is not preserved over a restart. Note that we are @@ -2078,8 +2125,6 @@ RecoverPreparedTransactions(void) buf = ProcessTwoPhaseBuffer(xid, gxact->prepare_start_lsn, gxact->ondisk, true, false); - if (buf == NULL) - continue; ereport(LOG, (errmsg("recovering prepared transaction %u from shared memory", xid))); @@ -2135,7 +2180,19 @@ RecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); } + for (i = 0; i < remove_xids_cnt; i++) + { + TransactionId xid = remove_xids[i]; + + ereport(WARNING, + (errmsg("removing stale two-phase state from memory for transaction %u", + xid))); + PrepareRedoRemove(xid, true); + } + LWLockRelease(TwoPhaseStateLock); + + pfree(remove_xids); } /* @@ -2155,8 +2212,6 @@ ProcessTwoPhaseBuffer(TransactionId xid, bool fromdisk, bool setParent, bool setNextXid) { - FullTransactionId nextFullXid = ShmemVariableCache->nextFullXid; - TransactionId origNextXid = XidFromFullTransactionId(nextFullXid); TransactionId *subxids; char *buf; TwoPhaseFileHeader *hdr; @@ -2167,46 +2222,6 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (!fromdisk) Assert(prepare_start_lsn != InvalidXLogRecPtr); - /* Already processed? */ - if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) - { - if (fromdisk) - { - ereport(WARNING, - (errmsg("removing stale two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); - } - else - { - ereport(WARNING, - (errmsg("removing stale two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); - } - return NULL; - } - - /* Reject XID if too new */ - if (TransactionIdFollowsOrEquals(xid, origNextXid)) - { - if (fromdisk) - { - ereport(WARNING, - (errmsg("removing future two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); - } - else - { - ereport(WARNING, - (errmsg("removing future two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); - } - return NULL; - } - if (fromdisk) { /* Read and validate file */ diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl index 15bb28627f9..de9acd5e43a 100644 --- a/src/test/recovery/t/009_twophase.pl +++ b/src/test/recovery/t/009_twophase.pl @@ -2,9 +2,10 @@ use strict; use warnings; +use File::Copy; use PostgresNode; use TestLib; -use Test::More tests => 27; +use Test::More tests => 33; my $psql_out = ''; my $psql_rc = ''; @@ -25,6 +26,14 @@ sub configure_and_reload return; } +sub twophase_file_name +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $xid = shift; + return sprintf("%08X", $xid); +} + # Set up two nodes, which will alternately be master and replication standby. # Setup london node @@ -523,3 +532,127 @@ $cur_standby->psql( is( $psql_out, qq{27|issued to paris}, "Check expected t_009_tbl2 data on standby"); + +############################################################################### +# Check handling of already committed or aborted 2PC files at recovery. +# This test does a manual copy of 2PC files created in a running server, +# to cheaply emulate situations that could be found in base backups. +############################################################################### + +# Issue a set of transactions that will be used for this portion of the test: +# - One transaction to hold on the minimum xid horizon at bay. +# - One transaction that will be found as already committed at recovery. +# - One transaction that will be fonnd as already rollbacked at recovery. +$cur_master->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (40, 'transaction: xid horizon'); + PREPARE TRANSACTION 'xact_009_40'; + BEGIN; + INSERT INTO t_009_tbl VALUES (41, 'transaction: commit-prepared'); + PREPARE TRANSACTION 'xact_009_41'; + BEGIN; + INSERT INTO t_009_tbl VALUES (42, 'transaction: rollback-prepared'); + PREPARE TRANSACTION 'xact_009_42';"); + +# Issue a checkpoint, fixing the XID horizon based on the first transaction, +# flushing to disk the two files to use. +$cur_master->psql('postgres', "CHECKPOINT"); + +# Get the transaction IDs of the ones to 2PC files to manipulate. +my $commit_prepared_xid = int( + $cur_master->safe_psql( + 'postgres', + "SELECT transaction FROM pg_prepared_xacts WHERE gid = 'xact_009_41'") +); +my $abort_prepared_xid = int( + $cur_master->safe_psql( + 'postgres', + "SELECT transaction FROM pg_prepared_xacts WHERE gid = 'xact_009_42'") +); + +# Copy the two-phase files that will be put back later. +my $commit_prepared_name = twophase_file_name($commit_prepared_xid); +my $abort_prepared_name = twophase_file_name($abort_prepared_xid); + +my $twophase_tmpdir = $PostgreSQL::Test::Utils::tmp_check . '/' . "2pc_files"; +mkdir($twophase_tmpdir); +my $primary_twophase_folder = $cur_master->data_dir . '/pg_twophase/'; +copy("$primary_twophase_folder/$commit_prepared_name", $twophase_tmpdir); +copy("$primary_twophase_folder/$abort_prepared_name", $twophase_tmpdir); + +# Issue abort/commit prepared. +$cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_41'"); +$cur_master->psql('postgres', "ROLLBACK PREPARED 'xact_009_42'"); + +# Again checkpoint, to advance the LSN past the point where the two previous +# transaction records would be replayed. +$cur_master->psql('postgres', "CHECKPOINT"); + +# Take down node. +$cur_master->teardown_node; + +# Move back the two twophase files. +copy("$twophase_tmpdir/$commit_prepared_name", $primary_twophase_folder); +copy("$twophase_tmpdir/$abort_prepared_name", $primary_twophase_folder); + +# Grab location in logs of primary +my $log_offset = -s $cur_master->logfile; + +# Start node and check that the two previous files are removed by checking the +# server logs, following the CLOG lookup done at the end of recovery. +$cur_master->start; + +ok($cur_master->log_contains( + qr/removing stale two-phase state from memory for transaction $commit_prepared_xid/, + $log_offset), + "two-phase file of committed transaction removed at recovery"); +ok($cur_master->log_contains( + qr/removing stale two-phase state from memory for transaction $abort_prepared_xid/, + $log_offset), + "two-phase file of aborted transaction removed at recovery"); + +# Commit the first transaction. +$cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_40'"); +# After replay, there should be no 2PC transactions. +$cur_master->psql( + 'postgres', + "SELECT * FROM pg_prepared_xact", + stdout => \$psql_out); +is($psql_out, qq{}, "Check expected pg_prepared_xact data on primary"); +# Data from transactions should be around. +$cur_master->psql( + 'postgres', + "SELECT * FROM t_009_tbl WHERE id IN (40, 41, 42);", + stdout => \$psql_out); +is( $psql_out, qq{40|transaction: xid horizon +41|transaction: commit-prepared}, + "Check expected table data on primary"); + +############################################################################### +# Check handling of orphaned 2PC files at recovery. +############################################################################### + +$cur_master->teardown_node; + +# Grab location in logs of primary +$log_offset = -s $cur_master->logfile; + +# Create fake files with a transaction ID large or low enough to be in the +# future or the past, then check that the primary is able to start and remove +# these files at recovery. + +my $future_2pc_file = $cur_master->data_dir . '/pg_twophase/00FFFFFF'; +append_to_file $future_2pc_file, ""; +my $past_2pc_file = $cur_master->data_dir . '/pg_twophase/000000FF'; +append_to_file $past_2pc_file, ""; + +$cur_master->start; +ok($cur_master->log_contains( + qr/removing future two-phase state file for transaction 16777215/, + $log_offset), + "removed future two-phase state file"); +ok($cur_master->log_contains( + qr/removing past two-phase state file for transaction 255/, + $log_offset), + "removed past two-phase state file"); -- 2.47.2