Author: Noah Misch Commit: Noah Misch [not for commit] Demonstrate bugs that WAL_LOG-chkpt-interleave fixes. Components that might be separate patches if committing: - Allow injection points in critical sections - BackgroundPsql: add feature to wait for stderr content, not just stdout diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index d5b8ca2..943bbb4 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -45,6 +45,7 @@ #include "storage/reinit.h" #include "utils/builtins.h" #include "utils/guc.h" +#include "utils/injection_point.h" #include "utils/ps_status.h" #include "utils/relcache.h" #include "utils/resowner.h" @@ -994,6 +995,8 @@ SendBaseBackup(BaseBackupCmd *cmd, IncrementalBackupInfo *ib) bbsink *sink; SessionBackupState status = get_backup_status(); + INJECTION_POINT("begin-SendBaseBackup"); + if (status == SESSION_BACKUP_RUNNING) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 0a97a11..f9bc53c 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -65,6 +65,7 @@ #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/guc.h" +#include "utils/injection_point.h" #include "utils/pg_locale.h" #include "utils/relmapper.h" #include "utils/snapmgr.h" @@ -486,6 +487,8 @@ CreateDirAndVersionFile(char *dbpath, Oid dbid, Oid tsid, bool isRedo) sizeof(xl_dbase_create_wal_log_rec)); lsn = XLogInsert(RM_DBASE_ID, XLOG_DBASE_CREATE_WAL_LOG); + /* at merge conflict w/ fix patch, keep this with the XLogInsert() */ + INJECTION_POINT("after-XLOG_DBASE_CREATE_WAL_LOG"); /* As always, WAL must hit the disk before the data update does. */ XLogFlush(lsn); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 1a34bd3..226814e 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -72,6 +72,7 @@ #include "tcop/tcopprot.h" #include "tcop/utility.h" #include "utils/guc_hooks.h" +#include "utils/injection_point.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/ps_status.h" diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 58807f7..f5c70b7 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -53,6 +53,7 @@ #include "pgstat.h" #include "storage/fd.h" #include "storage/lwlock.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/relmapper.h" @@ -962,6 +963,7 @@ write_relmap_file(RelMapFile *newmap, bool write_wal, bool send_sinval, XLogRegisterData((char *) newmap, sizeof(RelMapFile)); lsn = XLogInsert(RM_RELMAP_ID, XLOG_RELMAP_UPDATE); + INJECTION_POINT("after-XLOG_RELMAP_UPDATE"); /* As always, WAL must hit the disk before the data update does */ XLogFlush(lsn); diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c index 0cf4d51..1fd63ea 100644 --- a/src/backend/utils/misc/injection_point.c +++ b/src/backend/utils/misc/injection_point.c @@ -232,6 +232,26 @@ InjectionPointAttach(const char *name, } /* + * Probe for an existing injection point. + */ +bool +InjectionPointAttached(const char *name) +{ +#ifdef USE_INJECTION_POINTS + bool found; + + LWLockAcquire(InjectionPointLock, LW_SHARED); + (void) hash_search(InjectionPointHash, name, + HASH_FIND, &found); + LWLockRelease(InjectionPointLock); + + return found; +#else + elog(ERROR, "Injection points are not supported by this build"); +#endif +} + +/* * Detach an existing injection point. */ void @@ -262,10 +282,17 @@ void InjectionPointRun(const char *name) { #ifdef USE_INJECTION_POINTS + bool reset_allow = false; InjectionPointEntry *entry_by_name; bool found; InjectionPointCallback injection_callback; + if (CritSectionCount > 0 && !MemoryContextAllowAllInCriticalSection) + { + reset_allow = true; + MemoryContextAllowAllInCriticalSection = true; + } + LWLockAcquire(InjectionPointLock, LW_SHARED); entry_by_name = (InjectionPointEntry *) hash_search(InjectionPointHash, name, @@ -279,7 +306,7 @@ InjectionPointRun(const char *name) if (!found) { injection_point_cache_remove(name); - return; + goto out; } /* @@ -311,6 +338,11 @@ InjectionPointRun(const char *name) } injection_callback(name); + +out: + /* elog(ERROR) would have become PANIC, so we never miss this reset */ + if (reset_allow) + MemoryContextAllowAllInCriticalSection = false; #else elog(ERROR, "Injection points are not supported by this build"); #endif diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 1336944..3a57c6a 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -149,6 +149,17 @@ MemoryContext CurTransactionContext = NULL; /* This is a transient link to the active portal's memory context: */ MemoryContext PortalContext = NULL; +/* + * If true, suppress all assertions about allocations in critical sections. + * Behave as though MemoryContextAllowInCriticalSection() had been called for + * every context, and permit new contexts. Caller is responsible for + * restoring value on error. The intended use case is debugging aids that + * can't use a single-purpose context. For example, LockAcquire() allocates + * in TopMemoryContext, so a debugging aid that calls it has no clean way to + * redirect that allocation. + */ +bool MemoryContextAllowAllInCriticalSection = false; + static void MemoryContextCallResetCallbacks(MemoryContext context); static void MemoryContextStatsInternal(MemoryContext context, int level, bool print, int max_children, @@ -164,7 +175,8 @@ static void MemoryContextStatsPrint(MemoryContext context, void *passthru, * rule, the allocation functions Assert that. */ #define AssertNotInCriticalSection(context) \ - Assert(CritSectionCount == 0 || (context)->allowInCritSection) + Assert(CritSectionCount == 0 || (context)->allowInCritSection || \ + MemoryContextAllowAllInCriticalSection) /* * Call the given function in the MemoryContextMethods for the memory context @@ -976,8 +988,7 @@ MemoryContextCreate(MemoryContext node, MemoryContext parent, const char *name) { - /* Creating new memory contexts is not allowed in a critical section */ - Assert(CritSectionCount == 0); + Assert(CritSectionCount == 0 || MemoryContextAllowAllInCriticalSection); /* Initialize all standard fields of memory context header */ node->type = tag; diff --git a/src/include/utils/injection_point.h b/src/include/utils/injection_point.h index 55524b5..57d9f3a 100644 --- a/src/include/utils/injection_point.h +++ b/src/include/utils/injection_point.h @@ -31,6 +31,7 @@ extern void InjectionPointShmemInit(void); extern void InjectionPointAttach(const char *name, const char *library, const char *function); +extern bool InjectionPointAttached(const char *name); extern void InjectionPointRun(const char *name); extern void InjectionPointDetach(const char *name); diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index ca7858d..ab46aaf 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -90,6 +90,8 @@ extern void MemoryContextStatsDetail(MemoryContext context, int max_children, extern void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow); +extern PGDLLIMPORT bool MemoryContextAllowAllInCriticalSection; + #ifdef MEMORY_CONTEXT_CHECKING extern void MemoryContextCheck(MemoryContext context); #endif diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index 2cbbae4..3ca6859 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -8,6 +8,8 @@ PGFILEDESC = "injection_points - facility for injection points" REGRESS = injection_points +TAP_TESTS = 1 + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/src/test/modules/injection_points/injection_points.c b/src/test/modules/injection_points/injection_points.c index e843e65..feff3f1 100644 --- a/src/test/modules/injection_points/injection_points.c +++ b/src/test/modules/injection_points/injection_points.c @@ -18,6 +18,7 @@ #include "postgres.h" #include "fmgr.h" +#include "miscadmin.h" #include "storage/lwlock.h" #include "storage/shmem.h" #include "utils/builtins.h" @@ -28,10 +29,23 @@ PG_MODULE_MAGIC; extern PGDLLEXPORT void injection_error(const char *name); extern PGDLLEXPORT void injection_notice(const char *name); +extern PGDLLEXPORT void injection_await_detach(const char *name); /* Set of callbacks available to be attached to an injection point. */ void +injection_await_detach(const char *name) +{ + elog(WARNING, "injection point waiting for detach of %s", name); + while (InjectionPointAttached(name)) + { + CHECK_FOR_INTERRUPTS(); + pg_usleep(10 * 1000); + } + elog(WARNING, "injection point observed detach of %s", name); +} + +void injection_error(const char *name) { elog(ERROR, "error triggered for injection point %s", name); @@ -54,7 +68,9 @@ injection_points_attach(PG_FUNCTION_ARGS) char *action = text_to_cstring(PG_GETARG_TEXT_PP(1)); char *function; - if (strcmp(action, "error") == 0) + if (strcmp(action, "await-detach") == 0) + function = "injection_await_detach"; + else if (strcmp(action, "error") == 0) function = "injection_error"; else if (strcmp(action, "notice") == 0) function = "injection_notice"; diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index ee37573..1e2adbc 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -34,4 +34,9 @@ tests += { 'injection_points', ], }, + 'tap': { + 'tests': [ + 't/001_create_database.pl', + ], + }, } diff --git a/src/test/modules/injection_points/t/001_create_database.pl b/src/test/modules/injection_points/t/001_create_database.pl new file mode 100644 index 0000000..47d1299 --- /dev/null +++ b/src/test/modules/injection_points/t/001_create_database.pl @@ -0,0 +1,70 @@ +# Copyright (c) 2021-2024, PostgreSQL Global Development Group + +# Test usability of a base backup taken after a particular WAL record. +use strict; +use warnings FATAL => 'all'; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +foreach + my $point ('after-XLOG_RELMAP_UPDATE', 'after-XLOG_DBASE_CREATE_WAL_LOG') +{ + # create a base backup at $point, a post-XLogInsert injection point + my $origin = PostgreSQL::Test::Cluster->new('origin-' . $point); + $origin->init(has_archiving => 1, allows_streaming => 1); + $origin->start; + my $creator = $origin->background_psql('postgres'); + my $injector = $origin->background_psql('postgres'); + $injector->query_safe(<backup_async($point, backup_options => ['--wal-method=none']); + # FIXME replace sleep w/ watch for "injection point waiting", perhaps by + # introducing "->backup(until => qr/regexp/)". + sleep 1; + # Run CREATE DATABASE, which stops at $point. RelationMappingLock blocks + # InitPostgres(), so start all backends before here. We used + # --wal-method=none above since the default would start an additional + # connection too late. (The archive makes it superfluous to acquire WAL + # through pg_basebackup.) + $creator->query_until_stderr(qr/waiting.*$point/, + "CREATE DATABASE regress_created;\n"); + $injector->query_safe( + "SELECT injection_points_detach('begin-SendBaseBackup');"); + # FIXME While this is an effective test of XLOG_DBASE_CREATE_WAL_LOG, the + # XLOG_RELMAP_UPDATE makes the backup's checkpoint hang waiting for + # RelationMappingLock. To have a test that both fails with the bug and + # passes with its fix, we'd need a procedure like this: + # + # while (backup client still not done) + # { + # run backup client until it's waiting on a lock CREATE DATABASE holds; + # unpause CREATE DATABASE and re-pause it after its next lock release; + # } + $backup_client->finish; + $injector->query_safe("SELECT injection_points_detach('$point');"); + $injector->quit; + $creator->quit; + $origin->stop; + + # replay backup to the end; confirm new db accepts connections + my $replica = PostgreSQL::Test::Cluster->new('replica-' . $point); + $replica->init_from_backup( + $origin, $point, + has_restoring => 1, + standby => 0); + $replica->start; + $replica->poll_query_until('postgres', + "SELECT pg_is_in_recovery() = 'f';") + or die "Timed out while waiting for recovery to end"; + my $rc = $replica->psql('regress_created', 'SELECT 1'); + is($rc, 0, "backup @ $point usable"); + $replica->stop; +} + +done_testing(); diff --git a/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm b/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm index 4091c31..2b9f26d 100644 --- a/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm +++ b/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm @@ -288,6 +288,29 @@ sub query_until return $ret; } +# FIXME perhaps make "query_until" monitor both stdout and stderr, then remove +# this fork. +sub query_until_stderr +{ + my ($self, $until, $query) = @_; + my $ret; + local $Test::Builder::Level = $Test::Builder::Level + 1; + + $self->{timeout}->start() if (defined($self->{query_timer_restart})); + $self->{stdin} .= $query; + + pump_until($self->{run}, $self->{timeout}, \$self->{stderr}, $until); + + die "psql query timed out" if $self->{timeout}->is_expired; + + $ret = $self->{stderr}; + + # clear out output for the next query + $self->{stderr} = ''; + + return $ret; +} + =pod =item $session->set_query_timer_restart() diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index e2e70d0..3ff0df9 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -739,6 +739,27 @@ sub backup return; } +# FIXME +sub backup_async +{ + my ($self, $backup_name, %params) = @_; + my $backup_path = $self->backup_dir . '/' . $backup_name; + my $name = $self->name; + + local %ENV = $self->_get_env(); + + print "# Taking pg_basebackup $backup_name from node \"$name\"\n"; + return IPC::Run::start( + [ + 'pg_basebackup', '-D', + $backup_path, '-h', + $self->host, '-p', + $self->port, '--checkpoint', + 'fast', '--no-sync', + @{ $params{backup_options} } + ]); +} + =item $node->backup_fs_cold(backup_name) Create a backup with a filesystem level copy in subdirectory B of