From 0474d91ee46f5974fa2e814036b7ac92819f615c Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Fri, 15 Sep 2017 09:41:55 -0400 Subject: [PATCH] Fix DROP SUBSCRIPTION hang When ALTER SUBSCRIPTION DISABLE is run in the same transaction before DROP SUBSCRIPTION, the latter will hang because workers will still be running, not having seen the DISABLE committed, and DROP SUBSCRIPTION will wait until the workers have vacated the replication origin slots. To fix, kill the workers also if dropping a subscription that is disabled. This will address this specific scenario. If the subscription was already disabled before the current transaction, then there shouldn't be any workers left and this won't make a difference. Reported-by: Arseny Sher Discussion: https://www.postgresql.org/message-id/flat/87mv6av84w.fsf%40ars-thinkpad --- src/backend/commands/subscriptioncmds.c | 18 ++++++++++-- src/test/subscription/t/007_ddl.pl | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 src/test/subscription/t/007_ddl.pl diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 2ef414e084..9f85ae937e 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -816,6 +816,7 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) char *subname; char *conninfo; char *slotname; + bool subenabled; List *subworkers; ListCell *lc; char originname[NAMEDATALEN]; @@ -886,6 +887,9 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) else slotname = NULL; + /* Get subenabled */ + subenabled = ((Form_pg_subscription) GETSTRUCT(tup))->subenabled; + /* * Since dropping a replication slot is not transactional, the replication * slot stays dropped even if the transaction rolls back. So we cannot @@ -910,8 +914,16 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) /* * If we are dropping the replication slot, stop all the subscription - * workers immediately, so that the slot becomes accessible. Otherwise - * just schedule the stopping for the end of the transaction. + * workers immediately, so that the slot becomes accessible. + * + * Also, if the subscription is disabled, stop the workers. This is + * necessary if the subscription was disabled in the same transaction, in + * which case the workers haven't seen that yet and will still be running, + * leading to hangs later when we want to drop the replication origin. If + * the subscription was disabled before this transaction, then there + * shouldn't be any workers left, so this won't make a difference. + * + * Otherwise just schedule the stopping for the end of the transaction. * * New workers won't be started because we hold an exclusive lock on the * subscription till the end of the transaction. @@ -923,7 +935,7 @@ DropSubscription(DropSubscriptionStmt *stmt, bool isTopLevel) { LogicalRepWorker *w = (LogicalRepWorker *) lfirst(lc); - if (slotname) + if (slotname || !subenabled) logicalrep_worker_stop(w->subid, w->relid); else logicalrep_worker_stop_at_commit(w->subid, w->relid); diff --git a/src/test/subscription/t/007_ddl.pl b/src/test/subscription/t/007_ddl.pl new file mode 100644 index 0000000000..3f36238840 --- /dev/null +++ b/src/test/subscription/t/007_ddl.pl @@ -0,0 +1,51 @@ +# Test some logical replication DDL behavior +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 1; + +sub wait_for_caught_up +{ + my ($node, $appname) = @_; + + $node->poll_query_until('postgres', +"SELECT pg_current_wal_lsn() <= replay_lsn FROM pg_stat_replication WHERE application_name = '$appname';" + ) or die "Timed out while waiting for subscriber to catch up"; +} + +my $node_publisher = get_new_node('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->start; + +my $node_subscriber = get_new_node('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +my $ddl = "CREATE TABLE test1 (a int, b text);"; +$node_publisher->safe_psql('postgres', $ddl); +$node_subscriber->safe_psql('postgres', $ddl); + +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +my $appname = 'replication_test'; + +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION mypub FOR ALL TABLES;"); +$node_subscriber->safe_psql('postgres', +"CREATE SUBSCRIPTION mysub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION mypub;" +); + +wait_for_caught_up($node_publisher, $appname); + +$node_subscriber->safe_psql('postgres', q{ +BEGIN; +ALTER SUBSCRIPTION mysub DISABLE; +ALTER SUBSCRIPTION mysub SET (slot_name = NONE); +DROP SUBSCRIPTION mysub; +COMMIT; +}); + +pass "subscription disable and drop in same transaction did not hang"; + +$node_subscriber->stop; +$node_publisher->stop; -- 2.11.0 (Apple Git-81)