From 71c23ab9a87817eab41b3d31516ceddfff04ed66 Mon Sep 17 00:00:00 2001 From: Hou Zhijie Date: Mon, 31 Mar 2025 16:28:57 +0800 Subject: [PATCH v2] Disallow enabling failover for a replication slot that enables two-phase decoding This commit fixes a bug for slot synchronization with logical replication slots that enabled two_phase decoding. As it stands, transactions prepared before two-phase decoding is enabled may fail to replicate to the subscriber after being committed on a promoted standby following a failover. The issue arises because the two_phase_at field of a slot, which tracks the LSN from which two-phase decoding starts, is not synchronized to standby servers. Without this field, the logical decoding might incorrectly identify prepared transaction as already replicated to the subscriber, causing them to be skipped. To address the issue on HEAD, this commit makes the two_phase_at field of the slot visible in the pg_replication_slots view and enables the slot synchronization to copy this value to the corresponding synced slot on the standby server. The bug has been present since the introduction of slot synchronization in PostgreSQL 17. However, due to the need for catalog changes, backpatching this fix is not feasible. Instead, to prevent the risk of losing prepared transactions in prior versions, we now disallow enabling failover and two-phase decoding together for a replication slot. --- contrib/test_decoding/expected/slot.out | 2 ++ contrib/test_decoding/sql/slot.sql | 1 + src/backend/commands/subscriptioncmds.c | 11 +++++++++ src/backend/replication/slot.c | 27 ++++++++++++++++++++++ src/test/regress/expected/subscription.out | 3 +++ src/test/regress/sql/subscription.sql | 4 ++++ 6 files changed, 48 insertions(+) diff --git a/contrib/test_decoding/expected/slot.out b/contrib/test_decoding/expected/slot.out index 7de03c79f6f..347b9c11467 100644 --- a/contrib/test_decoding/expected/slot.out +++ b/contrib/test_decoding/expected/slot.out @@ -427,6 +427,8 @@ SELECT 'init' FROM pg_create_logical_replication_slot('failover_default_slot', ' SELECT 'init' FROM pg_create_logical_replication_slot('failover_true_temp_slot', 'test_decoding', true, false, true); ERROR: cannot enable failover for a temporary replication slot +SELECT 'init' FROM pg_create_logical_replication_slot('failover_twophase_true_slot', 'test_decoding', false, true, true); +ERROR: cannot enable failover for a replication slot that enables two-phase decoding SELECT 'init' FROM pg_create_physical_replication_slot('physical_slot'); ?column? ---------- diff --git a/contrib/test_decoding/sql/slot.sql b/contrib/test_decoding/sql/slot.sql index 580e3ae3bef..a89fe712ff6 100644 --- a/contrib/test_decoding/sql/slot.sql +++ b/contrib/test_decoding/sql/slot.sql @@ -182,6 +182,7 @@ SELECT 'init' FROM pg_create_logical_replication_slot('failover_true_slot', 'tes SELECT 'init' FROM pg_create_logical_replication_slot('failover_false_slot', 'test_decoding', false, false, false); SELECT 'init' FROM pg_create_logical_replication_slot('failover_default_slot', 'test_decoding', false, false); SELECT 'init' FROM pg_create_logical_replication_slot('failover_true_temp_slot', 'test_decoding', true, false, true); +SELECT 'init' FROM pg_create_logical_replication_slot('failover_twophase_true_slot', 'test_decoding', false, true, true); SELECT 'init' FROM pg_create_physical_replication_slot('physical_slot'); SELECT slot_name, slot_type, failover FROM pg_replication_slots; diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 9467f58a23d..8308ccaad5a 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -648,6 +648,17 @@ CreateSubscription(ParseState *pstate, CreateSubscriptionStmt *stmt, errmsg("password_required=false is superuser-only"), errhint("Subscriptions with the password_required option set to false may only be created or modified by the superuser."))); + /* + * Do not allow users to enable failover and two_phase option together. + * + * See comments atop the similar check in ReplicationSlotCreate() for + * detailed reasons. + */ + if (opts.twophase && opts.failover) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot enable failover option when two_phase option is enabled")); + /* * If built with appropriate switch, whine when regression-testing * conventions for subscription names are violated. diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 780d22afbca..5b349b3c3af 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -343,6 +343,21 @@ ReplicationSlotCreate(const char *name, bool db_specific, ereport(ERROR, errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot enable failover for a temporary replication slot")); + + /* + * Do not allow users to enable failover for slots that enable + * two-phase decoding. + * + * This is because the two_phase_at field of a slot, which tracks the + * LSN from which two-phase decoding starts, is not synchronized to + * standby servers. Without this field, the logical decoding might + * incorrectly identify prepared transaction as already replicated to + * the subscriber, causing them to be skipped. + */ + if (two_phase) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot enable failover for a replication slot that enables two-phase decoding")); } /* @@ -848,6 +863,18 @@ ReplicationSlotAlter(const char *name, bool failover) errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot enable failover for a temporary replication slot")); + /* + * Do not allow users to enable failover for slots that enable two-phase + * decoding. + * + * See comments atop the similar check in ReplicationSlotCreate() for + * detailed reasons. + */ + if (failover && MyReplicationSlot->data.two_phase) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot enable failover for a replication slot that enables two-phase decoding")); + if (MyReplicationSlot->data.failover != failover) { SpinLockAcquire(&MyReplicationSlot->mutex); diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out index 0f2a25cdc19..ee3603b67ef 100644 --- a/src/test/regress/expected/subscription.out +++ b/src/test/regress/expected/subscription.out @@ -479,6 +479,9 @@ COMMIT; ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); DROP SUBSCRIPTION regress_testsub; RESET SESSION AUTHORIZATION; +-- fail - cannot enable two_phase and failover together +CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUBLICATION testpub WITH (connect = false, two_phase = true, failover = true); +ERROR: cannot enable failover option when two_phase option is enabled DROP ROLE regress_subscription_user; DROP ROLE regress_subscription_user2; DROP ROLE regress_subscription_user3; diff --git a/src/test/regress/sql/subscription.sql b/src/test/regress/sql/subscription.sql index 3e5ba4cb8c6..47fc1e5329b 100644 --- a/src/test/regress/sql/subscription.sql +++ b/src/test/regress/sql/subscription.sql @@ -342,6 +342,10 @@ ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); DROP SUBSCRIPTION regress_testsub; RESET SESSION AUTHORIZATION; + +-- fail - cannot enable two_phase and failover together +CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUBLICATION testpub WITH (connect = false, two_phase = true, failover = true); + DROP ROLE regress_subscription_user; DROP ROLE regress_subscription_user2; DROP ROLE regress_subscription_user3; -- 2.31.1