diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c index 08daf26fdf..9f76261d99 100644 --- a/contrib/postgres_fdw/connection.c +++ b/contrib/postgres_fdw/connection.c @@ -108,6 +108,7 @@ PGconn * GetConnection(UserMapping *user, bool will_prep_stmt) { bool found; + bool need_reset_conn = false; ConnCacheEntry *entry; ConnCacheKey key; @@ -159,23 +160,25 @@ GetConnection(UserMapping *user, bool will_prep_stmt) /* Reject further use of connections which failed abort cleanup. */ pgfdw_reject_incomplete_xact_state_change(entry); +reset: /* * If the connection needs to be remade due to invalidation, disconnect as - * soon as we're out of all transactions. + * soon as we're out of all transactions. Also if previous attempt to start + * new remote transaction failed on the cached connection, disconnect + * it to reestablish new connection. */ - if (entry->conn != NULL && entry->invalidated && entry->xact_depth == 0) + if ((entry->conn != NULL && entry->invalidated && entry->xact_depth == 0) || + need_reset_conn) { - elog(DEBUG3, "closing connection %p for option changes to take effect", - entry->conn); + if (need_reset_conn) + elog(DEBUG3, "closing connection %p to reestablish connection", + entry->conn); + else + elog(DEBUG3, "closing connection %p for option changes to take effect", + entry->conn); disconnect_pg_server(entry); } - /* - * We don't check the health of cached connection here, because it would - * require some overhead. Broken connection will be detected when the - * connection is actually used. - */ - /* * If cache entry doesn't have a connection, we have to establish a new * connection. (If connect_pg_server throws an error, the cache entry @@ -206,9 +209,31 @@ GetConnection(UserMapping *user, bool will_prep_stmt) } /* - * Start a new transaction or subtransaction if needed. + * We check the health of the cached connection here, while the remote + * xact is going to begin. Broken connection will be detected and the + * connection is retried. We do this only once during each begin remote + * xact call, if the connection is still broken after the retry attempt, + * then the query will fail. */ - begin_remote_xact(entry); + PG_TRY(); + { + /* Start a new transaction or subtransaction if needed. */ + begin_remote_xact(entry); + need_reset_conn = false; + } + PG_CATCH(); + { + if (!entry->conn || + PQstatus(entry->conn) != CONNECTION_BAD || + entry->xact_depth > 0 || + need_reset_conn) + PG_RE_THROW(); + need_reset_conn = true; + } + PG_END_TRY(); + + if (need_reset_conn) + goto reset; /* Remember if caller will prepare statements */ entry->have_prep_stmt |= will_prep_stmt; diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 10e23d02ed..d72b11a3ac 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -8987,3 +8987,58 @@ PREPARE TRANSACTION 'fdw_tpc'; ERROR: cannot PREPARE a transaction that has operated on postgres_fdw foreign tables ROLLBACK; WARNING: there is no transaction in progress +-- Retry cached connections at the beginning of the remote xact +-- in case remote backend is killed. +-- Let's use a different application name for remote connection, +-- so that this test will not kill other backends wrongly. +ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check'); +-- Generate a connection to remote. Local backend will cache it. +SELECT * FROM ft1 LIMIT 1; + c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 +------+----+----+----+----+----+------------+---- + 1111 | 2 | | | | | ft1 | +(1 row) + +-- Retrieve pid of remote backend with application name fdw_retry_check +-- and kill it intentionally here. Note that, local backend still has +-- the remote connection/backend info in it's cache. +SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE +backend_type = 'client backend' AND application_name = 'fdw_retry_check'; + pg_terminate_backend +---------------------- + t +(1 row) + +-- Next query using the same foreign server should succeed if connection +-- retry works. +SELECT * FROM ft1 LIMIT 1; + c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 +------+----+----+----+----+----+------------+---- + 1111 | 2 | | | | | ft1 | +(1 row) + +-- Subtransaction - remote backend is killed in the middle of a remote +-- xact, and we don't do retry connection, hence the subsequent query +-- using the same foreign server should fail. +BEGIN; +DECLARE c CURSOR FOR SELECT * FROM ft1 LIMIT 1; +FETCH c; + c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 +------+----+----+----+----+----+------------+---- + 1111 | 2 | | | | | ft1 | +(1 row) + +SAVEPOINT s; +SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE +backend_type = 'client backend' AND application_name = 'fdw_retry_check'; + pg_terminate_backend +---------------------- + t +(1 row) + +SELECT * FROM ft1 LIMIT 1; -- should fail +ERROR: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. +CONTEXT: remote SQL command: SAVEPOINT s2 +COMMIT; diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 78156d10b4..7a1b4c78bd 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -2653,3 +2653,34 @@ SELECT count(*) FROM ft1; -- error here PREPARE TRANSACTION 'fdw_tpc'; ROLLBACK; + +-- Retry cached connections at the beginning of the remote xact +-- in case remote backend is killed. +-- Let's use a different application name for remote connection, +-- so that this test will not kill other backends wrongly. +ALTER SERVER loopback OPTIONS (application_name 'fdw_retry_check'); + +-- Generate a connection to remote. Local backend will cache it. +SELECT * FROM ft1 LIMIT 1; + +-- Retrieve pid of remote backend with application name fdw_retry_check +-- and kill it intentionally here. Note that, local backend still has +-- the remote connection/backend info in it's cache. +SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE +backend_type = 'client backend' AND application_name = 'fdw_retry_check'; + +-- Next query using the same foreign server should succeed if connection +-- retry works. +SELECT * FROM ft1 LIMIT 1; + +-- Subtransaction - remote backend is killed in the middle of a remote +-- xact, and we don't do retry connection, hence the subsequent query +-- using the same foreign server should fail. +BEGIN; +DECLARE c CURSOR FOR SELECT * FROM ft1 LIMIT 1; +FETCH c; +SAVEPOINT s; +SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE +backend_type = 'client backend' AND application_name = 'fdw_retry_check'; +SELECT * FROM ft1 LIMIT 1; -- should fail +COMMIT;