Author: Noah Misch Commit: Noah Misch [not for commit] demonstrate datfrozenxid overtaking relfrozenxid Components that might be separate patches if committing: - Allow injection points in critical sections - Emit DEBUG1 before waiting on injection point, so TAP test can poll for it - BackgroundPsql: add feature to wait for stderr content, not just stdout diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index d7e417f..c4d18ad 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6344,6 +6344,7 @@ heap_inplace_update_finish(void *state, HeapTuple tuple) memcpy((char *) htup + htup->t_hoff, (char *) tuple->t_data + tuple->t_data->t_hoff, newlen); + INJECTION_POINT("inplace-after-mempcy"); /*---------- * XXX A crash here can allow datfrozenxid() to get ahead of relfrozenxid: diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index d299a25..c4020e3 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -37,6 +37,7 @@ #include "catalog/index.h" #include "catalog/namespace.h" #include "catalog/pg_database.h" +#include "catalog/pg_enum_d.h" #include "catalog/pg_inherits.h" #include "commands/cluster.h" #include "commands/defrem.h" @@ -56,6 +57,7 @@ #include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/guc_hooks.h" +#include "utils/injection_point.h" #include "utils/memutils.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -1575,7 +1577,7 @@ vac_update_datfrozenxid(void) { HeapTuple tuple; Form_pg_database dbform; - Relation relation; + Relation relation, dbrelation; SysScanDesc scan; HeapTuple classTup; TransactionId newFrozenXid; @@ -1629,11 +1631,24 @@ vac_update_datfrozenxid(void) scan = systable_beginscan(relation, InvalidOid, false, NULL, 0, NULL); + /* + * This may process invals that need pg_class buffer locks, so get it out + * of the way. + */ + dbrelation = table_open(DatabaseRelationId, RowExclusiveLock); + while ((classTup = systable_getnext(scan)) != NULL) { volatile FormData_pg_class *classForm = (Form_pg_class) GETSTRUCT(classTup); - TransactionId relfrozenxid = classForm->relfrozenxid; - TransactionId relminmxid = classForm->relminmxid; + TransactionId relfrozenxid; + TransactionId relminmxid; + +#ifdef USE_INJECTION_POINTS + if (classForm->oid == EnumRelationId) + INJECTION_POINT("update_datfrozenxid-before-fetch-relfrozenxid"); +#endif + relfrozenxid = classForm->relfrozenxid; + relminmxid = classForm->relminmxid; /* * Only consider relations able to hold unfrozen XIDs (anything else @@ -1706,7 +1721,8 @@ vac_update_datfrozenxid(void) Assert(MultiXactIdIsValid(newMinMulti)); /* Now fetch the pg_database tuple we need to update. */ - relation = table_open(DatabaseRelationId, RowExclusiveLock); + /* relation = table_open(DatabaseRelationId, RowExclusiveLock); */ + relation = dbrelation; /* * Fetch a copy of the tuple to scribble on. We could check the syscache diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c index 5c2a0d2..64720f3 100644 --- a/src/backend/utils/misc/injection_point.c +++ b/src/backend/utils/misc/injection_point.c @@ -291,11 +291,18 @@ void InjectionPointRun(const char *name) { #ifdef USE_INJECTION_POINTS + bool reset_allow = false; InjectionPointEntry *entry_by_name; bool found; InjectionPointCallback injection_callback; const void *private_data; + if (CritSectionCount > 0 && !MemoryContextAllowAllInCriticalSection) + { + reset_allow = true; + MemoryContextAllowAllInCriticalSection = true; + } + LWLockAcquire(InjectionPointLock, LW_SHARED); entry_by_name = (InjectionPointEntry *) hash_search(InjectionPointHash, name, @@ -309,7 +316,7 @@ InjectionPointRun(const char *name) if (!found) { injection_point_cache_remove(name); - return; + goto out; } /* @@ -344,6 +351,11 @@ InjectionPointRun(const char *name) /* Now loaded, so get it. */ injection_callback = injection_point_cache_get(name, &private_data); injection_callback(name, private_data); + +out: + /* elog(ERROR) would have become PANIC, so we never miss this reset */ + if (reset_allow) + MemoryContextAllowAllInCriticalSection = false; #else elog(ERROR, "Injection points are not supported by this build"); #endif diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index b42ecec..dda3dcb 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -157,6 +157,17 @@ MemoryContext CurTransactionContext = NULL; /* This is a transient link to the active portal's memory context: */ MemoryContext PortalContext = NULL; +/* + * If true, suppress all assertions about allocations in critical sections. + * Behave as though MemoryContextAllowInCriticalSection() had been called for + * every context, and permit new contexts. Caller is responsible for + * restoring value on error. The intended use case is debugging aids that + * can't use a single-purpose context. For example, LockAcquire() allocates + * in TopMemoryContext, so a debugging aid that calls it has no clean way to + * redirect that allocation. + */ +bool MemoryContextAllowAllInCriticalSection = false; + static void MemoryContextDeleteOnly(MemoryContext context); static void MemoryContextCallResetCallbacks(MemoryContext context); static void MemoryContextStatsInternal(MemoryContext context, int level, @@ -173,7 +184,8 @@ static void MemoryContextStatsPrint(MemoryContext context, void *passthru, * rule, the allocation functions Assert that. */ #define AssertNotInCriticalSection(context) \ - Assert(CritSectionCount == 0 || (context)->allowInCritSection) + Assert(CritSectionCount == 0 || (context)->allowInCritSection || \ + MemoryContextAllowAllInCriticalSection) /* * Call the given function in the MemoryContextMethods for the memory context @@ -1103,8 +1115,7 @@ MemoryContextCreate(MemoryContext node, MemoryContext parent, const char *name) { - /* Creating new memory contexts is not allowed in a critical section */ - Assert(CritSectionCount == 0); + Assert(CritSectionCount == 0 || MemoryContextAllowAllInCriticalSection); /* Initialize all standard fields of memory context header */ node->type = tag; diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index cd9596f..9889225 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -93,6 +93,8 @@ extern void MemoryContextStatsDetail(MemoryContext context, extern void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow); +extern PGDLLIMPORT bool MemoryContextAllowAllInCriticalSection; + #ifdef MEMORY_CONTEXT_CHECKING extern void MemoryContextCheck(MemoryContext context); #endif diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index 2ffd2f7..4b75d27 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -10,6 +10,7 @@ REGRESS = injection_points REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress ISOLATION = inplace +TAP_TESTS = 1 # The injection points are cluster-wide, so disable installcheck NO_INSTALLCHECK = 1 diff --git a/src/test/modules/injection_points/injection_points.c b/src/test/modules/injection_points/injection_points.c index 1b695a1..1b1cb56 100644 --- a/src/test/modules/injection_points/injection_points.c +++ b/src/test/modules/injection_points/injection_points.c @@ -238,6 +238,8 @@ injection_wait(const char *name, const void *private_data) elog(ERROR, "could not find free slot for wait of injection point %s ", name); + elog(DEBUG1, "waiting at injection point %s", name); + /* And sleep.. */ ConditionVariablePrepareToSleep(&inj_state->wait_point); for (;;) diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index 3c23c14..86639c5 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -42,4 +42,9 @@ tests += { 'inplace', ], }, + 'tap': { + 'tests': [ + 't/001_inplace.pl', + ], + }, } diff --git a/src/test/modules/injection_points/t/001_inplace.pl b/src/test/modules/injection_points/t/001_inplace.pl new file mode 100644 index 0000000..fc9380d --- /dev/null +++ b/src/test/modules/injection_points/t/001_inplace.pl @@ -0,0 +1,129 @@ +# Copyright (c) 2024, PostgreSQL Global Development Group + +# Test race between vac_update_datfrozenxid() pg_class scan and inplace update +# of relfrozenxid: +# +# ["D" is a VACUUM (ONLY_DATABASE_STATS)] +# ["R" is a VACUUM tbl] +# D: vac_update_datfrozenid() -> systable_beginscan(pg_class) +# D: systable_getnext() returns pg_class tuple of tbl +# R: memcpy() into pg_class tuple of tbl +# D: raise pg_database.datfrozenxid, XLogInsert(), finish +# [crash] +# [recovery restores datfrozenxid w/o relfrozenxid] +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('only'); +# We won't use a replica, but this is handy for (a) pg_waldump on the archive +# and (b) pg_waldump showing the logged invalidations. +$node->init(has_archiving => 1, allows_streaming => 1); +$node->start; + +# Choice of table doesn't greatly matter, but injection point triggers on this +# one. Using a catalog gives the injection point a trivial OID-based test. +my $frozenxid_table = 'pg_enum'; +my $dat_inj = 'update_datfrozenxid-before-fetch-relfrozenxid'; +my $inplace_inj = 'inplace-after-mempcy'; + +# session locks one table, as a way to VACUUM all database tables but this one +my $lock_table = $node->background_psql('postgres'); +# session waits in vac_update_datfrozenxid() +my $update_dat = $node->background_psql('postgres'); +# session waits in inplace update of vac_update_relstats() +my $update_rel = $node->background_psql('postgres'); +# session detaches another session +my $detacher = $node->background_psql('postgres', on_error_stop => 0); + +my $start_id; +$node->psql( + 'postgres', qq( + SELECT relfrozenxid, datfrozenxid + FROM pg_class, pg_database + WHERE relname = '$frozenxid_table' AND datname = current_catalog; +), stdout => \$start_id); +print STDOUT "relfrozenxid, datfrozenxid: $start_id\n"; + +# populate syscaches; query fails, but that's fine +$detacher->query( + qq( + CREATE EXTENSION injection_points; + SELECT injection_points_detach('$dat_inj'); +)); + +# marker in WAL stream, for pg_waldump convenience +$node->safe_psql( + 'postgres', qq( + SELECT pg_logical_emit_message(false, 'before', '.', true); +)); + +$lock_table->query_safe( + "BEGIN; LOCK TABLE $frozenxid_table IN SHARE UPDATE EXCLUSIVE MODE"); +$update_rel->query_safe("SELECT txid_current()"); +# update relfrozenxid for all tables but $frozenxid_table +$update_rel->query_safe("VACUUM (FREEZE, SKIP_LOCKED, SKIP_DATABASE_STATS);"); +# start ONLY_DATABASE_STATS and wait for it to reach injection point +$update_dat->query_until( + qr/at injection point/, qq( + SELECT injection_points_set_local(); + SELECT injection_points_attach('$dat_inj', 'wait'); + SET client_min_messages = debug1; + VACUUM (ONLY_DATABASE_STATS); +)); + +# start one-table VACUUM and wait for it to reach injection point +$lock_table->quit; +$update_rel->query_until( + qr/at injection point/, qq( + SELECT injection_points_set_local(); + SELECT injection_points_attach('$inplace_inj', 'wait'); + SET client_min_messages = debug1; + VACUUM (FREEZE, SKIP_DATABASE_STATS) $frozenxid_table; +)); + +# complete ONLY_DATABASE_STATS +$detacher->query( + qq( + SELECT injection_points_detach('$dat_inj'); + SELECT injection_points_wakeup('$dat_inj'); +)); +$detacher->quit; +# flush WAL, since VACUUM did an asynchronous commit +$update_dat->query( + qq( + SELECT pg_logical_emit_message(false, 'after', '.', true); +)); +$update_dat->quit; + +# crash with $inplace_inj still waiting +$node->stop('immediate'); +# don't quit update_rel, which would suffer EPIPE +#$update_rel->quit; +$node->start; + +# check for corruption +my $stdout; +$node->psql( + 'postgres', q( + SELECT pg_class.oid::regclass + FROM pg_class, pg_database + WHERE age(relfrozenxid) > age(datfrozenxid) + AND relkind = 'r' AND datname = current_catalog; +), stdout => \$stdout); +is($stdout, '', 'datfrozenxid younger than every relfrozenxid'); + +# print more detail +$node->psql( + 'postgres', qq( + SELECT relfrozenxid, datfrozenxid + FROM pg_class, pg_database + WHERE relname = '$frozenxid_table' AND datname = current_catalog; +), stdout => \$stdout); +isnt($stdout, $start_id, 'frozenxid values changed'); +print STDOUT "relfrozenxid, datfrozenxid: $stdout\n"; + +done_testing(); diff --git a/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm b/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm index 4091c31..c3f55bb 100644 --- a/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm +++ b/src/test/perl/PostgreSQL/Test/BackgroundPsql.pm @@ -260,10 +260,10 @@ sub query_safe =item $session->query_until(until, query) -Issue C and wait for C appearing in the query output rather than -waiting for query completion. C needs to end with newline and semicolon -(if applicable, interactive psql input may not require it) for psql to process -the input. +Issue C and wait for C appearing in the query stdout or stderr +rather than waiting for query completion. C needs to end with newline +and semicolon (if applicable, interactive psql input may not require it) for +psql to process the input. =cut @@ -276,7 +276,8 @@ sub query_until $self->{timeout}->start() if (defined($self->{query_timer_restart})); $self->{stdin} .= $query; - pump_until($self->{run}, $self->{timeout}, \$self->{stdout}, $until); + pump_until($self->{run}, $self->{timeout}, + [ \$self->{stdout}, \$self->{stderr} ], $until); die "psql query timed out" if $self->{timeout}->is_expired; diff --git a/src/test/perl/PostgreSQL/Test/Utils.pm b/src/test/perl/PostgreSQL/Test/Utils.pm index 022b44b..d1791ee 100644 --- a/src/test/perl/PostgreSQL/Test/Utils.pm +++ b/src/test/perl/PostgreSQL/Test/Utils.pm @@ -427,29 +427,39 @@ sub run_command =item pump_until(proc, timeout, stream, until) -Pump until string is matched on the specified stream, or timeout occurs. +Pump until string is matched on the specified stream(s), or timeout occurs. +The stream argument can be a scalar ref like, \$stdout, or a ref to an array +of scalar refs, like [\$stdout, \$stderr]. =cut sub pump_until { - my ($proc, $timeout, $stream, $until) = @_; + my ($proc, $timeout, $stream_spec, $until) = @_; $proc->pump_nb(); - while (1) + $stream_spec = [$stream_spec] if ref $stream_spec ne 'ARRAY'; + OUTER: while (1) { - last if $$stream =~ /$until/; + foreach my $stream (@$stream_spec) + { + last OUTER if $$stream =~ /$until/; + } if ($timeout->is_expired) { diag( - "pump_until: timeout expired when searching for \"$until\" with stream: \"$$stream\"" - ); + "pump_until: timeout expired when searching for \"$until\" with stream(s): " + . '"' + . join('", "', map { $$_ } @$stream_spec) + . '"'); return 0; } if (not $proc->pumpable()) { diag( - "pump_until: process terminated unexpectedly when searching for \"$until\" with stream: \"$$stream\"" - ); + "pump_until: process terminated unexpectedly when searching for \"$until\" with stream(s): " + . '"' + . join('", "', map { $$_ } @$stream_spec) + . '"'); return 0; } $proc->pump();