Author: Noah Misch Commit: Noah Misch At update of non-LP_NORMAL TID, fail instead of corrupting page header. The right mix of DDL and VACUUM could corrupt a page header such that PageIsVerified() durably fails, requiring a restore from backup. One of the test permutations shows a variant not yet fixed. This makes !TransactionIdIsValid(TM_FailureData.xmax) possible with TM_Deleted. Core and PGXN appear indifferent to that. Per bug #17821 from Alexander Lakhin. Back-patch to v13 (all supported versions). The test case is v17+, since it uses INJECTION_POINT. Reviewed by FIXME. Discussion: https://postgr.es/m/17821-dd8c334263399284@postgresql.org diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index b634995..9d00808 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -49,8 +49,10 @@ #include "storage/predicate.h" #include "storage/procarray.h" #include "utils/datum.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/spccache.h" +#include "utils/syscache.h" static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, @@ -3254,6 +3256,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, interesting_attrs = bms_add_members(interesting_attrs, id_attrs); block = ItemPointerGetBlockNumber(otid); + INJECTION_POINT("heap_update-before-pin"); buffer = ReadBuffer(relation, block); page = BufferGetPage(buffer); @@ -3269,7 +3272,51 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); lp = PageGetItemId(page, ItemPointerGetOffsetNumber(otid)); - Assert(ItemIdIsNormal(lp)); + + /* + * Usually, a buffer pin and/or snapshot blocks pruning of otid, ensuring + * we see LP_NORMAL here. When the otid origin is a syscache, we may have + * neither a pin nor a snapshot. Hence, we may see other LP_ states, each + * of which indicates concurrent pruning. + * + * Failing with TM_Updated would be most accurate. However, unlike other + * TM_Updated scenarios, we don't know the successor ctid in LP_UNUSED and + * LP_DEAD cases. While the distinction between TM_Updated and TM_Deleted + * does matter to SQL statements UPDATE and MERGE, those SQL statements + * hold a snapshot that ensures LP_NORMAL. Hence, the choice between + * TM_Updated and TM_Deleted affects only the wording of error messages. + * Settle on TM_Deleted, for two reasons. First, it avoids complicating + * the specification of when tmfd->ctid is valid. Second, it creates + * error log evidence that we took this branch. + * + * Since it's possible to see LP_UNUSED at otid, it's also possible to see + * LP_NORMAL for a tuple that replaced LP_UNUSED. If it's a tuple for an + * unrelated row, we'll fail with "duplicate key value violates unique". + * XXX if otid is the live, newer version of the newtup row, we'll discard + * changes originating in versions of this catalog row after the version + * the caller got from syscache. See syscache-update-pruned.spec. + */ + if (!ItemIdIsNormal(lp)) + { + Assert(RelationSupportsSysCache(RelationGetRelid(relation))); + + UnlockReleaseBuffer(buffer); + Assert(!have_tuple_lock); + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + tmfd->ctid = *otid; + tmfd->xmax = InvalidTransactionId; + tmfd->cmax = InvalidCommandId; + *update_indexes = TU_None; + + bms_free(hot_attrs); + bms_free(sum_attrs); + bms_free(key_attrs); + bms_free(id_attrs); + /* modified_attrs not yet initialized */ + bms_free(interesting_attrs); + return TM_Deleted; + } /* * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index f41d314..32cf28b 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -123,6 +123,7 @@ #include "storage/sinval.h" #include "storage/smgr.h" #include "utils/catcache.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/memdebug.h" #include "utils/memutils.h" @@ -1134,6 +1135,8 @@ AtEOXact_Inval(bool isCommit) /* Must be at top of stack */ Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL); + INJECTION_POINT("AtEOXact_Inval-with-transInvalInfo"); + if (isCommit) { /* diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 09b9b39..131c050 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -136,7 +136,8 @@ typedef enum TU_UpdateIndexes * * xmax is the outdating transaction's XID. If the caller wants to visit the * replacement tuple, it must check that this matches before believing the - * replacement is really a match. + * replacement is really a match. This is InvalidTransactionId if the target + * was !LP_NORMAL (expected only for a TID retrieved from syscache). * * cmax is the outdating command's CID, but only when the failure code is * TM_SelfModified (i.e., something in the current transaction outdated the diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index 0753a9d..4f0161f 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -5,7 +5,8 @@ OBJS = \ $(WIN32RES) \ injection_points.o \ injection_stats.o \ - injection_stats_fixed.o + injection_stats_fixed.o \ + regress_injection.o EXTENSION = injection_points DATA = injection_points--1.0.sql PGFILEDESC = "injection_points - facility for injection points" @@ -13,7 +14,7 @@ PGFILEDESC = "injection_points - facility for injection points" REGRESS = injection_points reindex_conc REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress -ISOLATION = basic inplace +ISOLATION = basic inplace syscache-update-pruned TAP_TESTS = 1 diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned.out b/src/test/modules/injection_points/expected/syscache-update-pruned.out new file mode 100644 index 0000000..5dc5a1d --- /dev/null +++ b/src/test/modules/injection_points/expected/syscache-update-pruned.out @@ -0,0 +1,87 @@ +Parsed test spec with 4 sessions + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +ERROR: tuple concurrently deleted +step wakegrant4: <... completed> + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +ERROR: duplicate key value violates unique constraint "pg_class_oid_index" +step wakegrant4: <... completed> + +starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4 +step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step r3: ROLLBACK; +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50; +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +step wakegrant4: <... completed> +step inspect4: + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; + +relhastriggers|relhassubclass +--------------+-------------- +f |f +(1 row) + diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned_1.out b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out new file mode 100644 index 0000000..b18857c --- /dev/null +++ b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out @@ -0,0 +1,86 @@ +Parsed test spec with 4 sessions + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +step wakegrant4: <... completed> + +starting permutation: cachefill1 at2 waitprunable4 vac4 grant1 wakeinval4 mkrels4 wakegrant4 +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +step wakegrant4: <... completed> + +starting permutation: snap3 cachefill1 at2 mkrels4 r3 waitprunable4 vac4 grant1 wakeinval4 at4 wakegrant4 inspect4 +step snap3: BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; +step cachefill1: SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); +step at2: + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); + +step mkrels4: + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED + +step r3: ROLLBACK; +step waitprunable4: CALL vactest.wait_prunable(); +step vac4: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakeinval4: + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); + +step at2: <... completed> +step wakeinval4: <... completed> +step at4: ALTER TABLE vactest.child50 INHERIT vactest.orig50; +step wakegrant4: + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); + +step grant1: <... completed> +ERROR: tuple concurrently updated +step wakegrant4: <... completed> +step inspect4: + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; + +relhastriggers|relhassubclass +--------------+-------------- +t |t +(1 row) + diff --git a/src/test/modules/injection_points/injection_points--1.0.sql b/src/test/modules/injection_points/injection_points--1.0.sql index 6c81d55..c445bf6 100644 --- a/src/test/modules/injection_points/injection_points--1.0.sql +++ b/src/test/modules/injection_points/injection_points--1.0.sql @@ -97,3 +97,11 @@ CREATE FUNCTION injection_points_stats_fixed(OUT numattach int8, RETURNS record AS 'MODULE_PATHNAME', 'injection_points_stats_fixed' LANGUAGE C STRICT; + +-- +-- regress_injection.c functions +-- +CREATE FUNCTION removable_cutoff(rel regclass) +RETURNS xid8 +AS 'MODULE_PATHNAME' +LANGUAGE C CALLED ON NULL INPUT; diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index ebe79fe..259045e 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -8,6 +8,7 @@ injection_points_sources = files( 'injection_points.c', 'injection_stats.c', 'injection_stats_fixed.c', + 'regress_injection.c', ) if host_system == 'windows' @@ -44,8 +45,9 @@ tests += { 'specs': [ 'basic', 'inplace', + 'syscache-update-pruned', ], - 'runningcheck': false, # align with GNU make build system + 'runningcheck': false, # see syscache-update-pruned }, 'tap': { 'env': { diff --git a/src/test/modules/injection_points/regress_injection.c b/src/test/modules/injection_points/regress_injection.c new file mode 100644 index 0000000..422f416 --- /dev/null +++ b/src/test/modules/injection_points/regress_injection.c @@ -0,0 +1,71 @@ +/*-------------------------------------------------------------------------- + * + * regress_injection.c + * Functions supporting test-specific subject matter. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/test/modules/injection_points/regress_injection.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/table.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "storage/procarray.h" +#include "utils/xid8.h" + +/* + * removable_cutoff - for syscache-update-pruned.spec + * + * Wrapper around GetOldestNonRemovableTransactionId(). In general, this can + * move backward. runningcheck=false isolation tests can reasonably prevent + * that. For the causes of backward movement, see + * postgr.es/m/CAEze2Wj%2BV0kTx86xB_YbyaqTr5hnE_igdWAwuhSyjXBYscf5-Q%40mail.gmail.com + * and the header comment for ComputeXidHorizons(). One can assume this + * doesn't move backward if one arranges for concurrent activity not to reach + * AbortTransaction() and not to allocate an XID while connected to another + * database. Non-runningcheck tests can control most concurrent activity, + * except autovacuum and the isolationtester control connection. Neither + * allocates XIDs, and AbortTransaction() in those would justify test failure. + */ +PG_FUNCTION_INFO_V1(removable_cutoff); +Datum +removable_cutoff(PG_FUNCTION_ARGS) +{ + Relation rel = NULL; + TransactionId xid; + FullTransactionId next_fxid_before, + next_fxid; + + /* could take other relkinds callee takes, but we've not yet needed it */ + if (!PG_ARGISNULL(0)) + rel = table_open(PG_GETARG_OID(0), AccessShareLock); + + /* + * No lock or snapshot necessarily prevents oldestXid from advancing past + * "xid" while this function runs. That concerns us only in that we must + * not ascribe "xid" to the wrong epoch. (That may never arise in + * isolation testing, but let's set a good example.) As a crude solution, + * retry until nextXid doesn't change. + */ + next_fxid = ReadNextFullTransactionId(); + do + { + CHECK_FOR_INTERRUPTS(); + next_fxid_before = next_fxid; + xid = GetOldestNonRemovableTransactionId(rel); + next_fxid = ReadNextFullTransactionId(); + } while (!FullTransactionIdEquals(next_fxid, next_fxid_before)); + + if (rel) + table_close(rel, AccessShareLock); + + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromAllowableAt(next_fxid, + xid)); +} diff --git a/src/test/modules/injection_points/specs/syscache-update-pruned.spec b/src/test/modules/injection_points/specs/syscache-update-pruned.spec new file mode 100644 index 0000000..b48e897 --- /dev/null +++ b/src/test/modules/injection_points/specs/syscache-update-pruned.spec @@ -0,0 +1,179 @@ +# Test race conditions involving: +# - s1: heap_update($FROM_SYSCACHE), without a snapshot or pin +# - s2: ALTER TABLE making $FROM_SYSCACHE a dead tuple +# - s3: "VACUUM pg_class" making $FROM_SYSCACHE become LP_UNUSED + +# This is a derivative work of inplace.spec, which exercises the corresponding +# race condition for inplace updates. + +# Despite local injection points, this is incompatible with runningcheck. +# First, removable_cutoff() could move backward, per its header comment. +# Second, other activity could trigger sinval queue overflow, negating our +# efforts to delay inval. Third, this deadlock emerges: +# +# - step at2 waits at an injection point, with interrupts held +# - an unrelated backend waits for at2 to do PROCSIGNAL_BARRIER_SMGRRELEASE +# - step waitprunable4 waits for the unrelated backend to release its xmin + +# The alternative expected output is for -DCATCACHE_FORCE_RELEASE, a setting +# that thwarts testing the race conditions this spec seeks. + + +# Need s2 to make a non-HOT update. Otherwise, "VACUUM pg_class" would leave +# an LP_REDIRECT that persists. To get non-HOT, make rels so the pg_class row +# for vactest.orig50 is on a filled page (assuming BLCKSZ=8192). Just to save +# on filesystem syscalls, use relkind=c for every other rel. +setup +{ + CREATE EXTENSION injection_points; + CREATE SCHEMA vactest; + -- Ensure a leader RELOID catcache entry. PARALLEL RESTRICTED since a + -- parallel worker running pg_relation_filenode() would lack that effect. + CREATE FUNCTION vactest.reloid_catcache_set(regclass) RETURNS int + LANGUAGE sql PARALLEL RESTRICTED + AS 'SELECT 0 FROM pg_relation_filenode($1)'; + CREATE FUNCTION vactest.mkrels(text, int, int) RETURNS void + LANGUAGE plpgsql SET search_path = vactest AS $$ + DECLARE + tname text; + BEGIN + FOR i in $2 .. $3 LOOP + tname := $1 || i; + EXECUTE FORMAT('CREATE TYPE ' || tname || ' AS ()'); + RAISE DEBUG '% at %', tname, ctid + FROM pg_class WHERE oid = tname::regclass; + END LOOP; + END + $$; + CREATE PROCEDURE vactest.wait_prunable() LANGUAGE plpgsql AS $$ + DECLARE + barrier xid8; + cutoff xid8; + BEGIN + barrier := pg_current_xact_id(); + -- autovacuum worker RelationCacheInitializePhase3() or the + -- isolationtester control connection might hold a snapshot that + -- limits pruning. Sleep until that clears. + LOOP + ROLLBACK; -- release MyProc->xmin, which could be the oldest + cutoff := removable_cutoff('pg_class'); + EXIT WHEN cutoff >= barrier; + RAISE LOG 'removable cutoff %; waiting for %', cutoff, barrier; + PERFORM pg_sleep(.1); + END LOOP; + END + $$; +} +setup { CALL vactest.wait_prunable(); -- maximize next two VACUUMs } +setup { VACUUM FULL pg_class; -- reduce free space } +setup { VACUUM FREEZE pg_class; -- populate fsm etc. } +setup +{ + SELECT FROM vactest.mkrels('orig', 1, 49); + CREATE TABLE vactest.orig50 (c int) WITH (autovacuum_enabled = off); + CREATE TABLE vactest.child50 (c int) WITH (autovacuum_enabled = off); + SELECT FROM vactest.mkrels('orig', 51, 100); +} +teardown +{ + DROP SCHEMA vactest CASCADE; + DROP EXTENSION injection_points; +} + +# Wait during GRANT. Disable debug_discard_caches, since we're here to +# exercise an outcome that happens under permissible cache staleness. +session s1 +setup { + SET debug_discard_caches = 0; + SELECT FROM injection_points_set_local(); + SELECT FROM injection_points_attach('heap_update-before-pin', 'wait'); +} +step cachefill1 { SELECT FROM vactest.reloid_catcache_set('vactest.orig50'); } +step grant1 { GRANT SELECT ON vactest.orig50 TO PUBLIC; } + +# Update of the tuple that grant1 will update. Wait before sending invals, so +# s1 will not get a cache miss. Choose the commands for making such updates +# from among those whose heavyweight locking does not conflict with GRANT's +# heavyweight locking. (GRANT will see our XID as committed, so observing +# that XID in the tuple xmax also won't block GRANT.) +session s2 +setup { + SELECT FROM injection_points_set_local(); + SELECT FROM + injection_points_attach('AtEOXact_Inval-with-transInvalInfo', 'wait'); +} +step at2 { + CREATE TRIGGER to_set_relhastriggers BEFORE UPDATE ON vactest.orig50 + FOR EACH ROW EXECUTE PROCEDURE suppress_redundant_updates_trigger(); +} + +# Hold snapshot to block pruning. +session s3 +step snap3 { BEGIN ISOLATION LEVEL REPEATABLE READ; SELECT; } +step r3 { ROLLBACK; } + +# Non-blocking actions. +session s4 +step waitprunable4 { CALL vactest.wait_prunable(); } +step vac4 { VACUUM pg_class; } +# Reuse the lp that s1 is waiting to change. I've observed reuse at the 1st +# or 18th CREATE, so create excess. +step mkrels4 { + SELECT FROM vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED +} +step wakegrant4 { + SELECT FROM injection_points_detach('heap_update-before-pin'); + SELECT FROM injection_points_wakeup('heap_update-before-pin'); +} +step at4 { ALTER TABLE vactest.child50 INHERIT vactest.orig50; } +step wakeinval4 { + SELECT FROM injection_points_detach('AtEOXact_Inval-with-transInvalInfo'); + SELECT FROM injection_points_wakeup('AtEOXact_Inval-with-transInvalInfo'); +} +# Witness effects of steps at2 and/or at4. +step inspect4 { + SELECT relhastriggers, relhassubclass FROM pg_class + WHERE oid = 'vactest.orig50'::regclass; +} + +# TID from syscache becomes LP_UNUSED. Before the bug fix, this permutation +# made s1 fail with "attempted to update invisible tuple" or an assert. +# However, suppose a pd_lsn value such that (pd_lsn.xlogid, pd_lsn.xrecoff) +# passed for (xmin, xmax) with xmin known-committed and xmax known-aborted. +# Persistent page header corruption ensued. For example, s1 overwrote +# pd_lower, pd_upper, and pd_special as though they were t_ctid. +permutation + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + wakegrant4(grant1) # s1 wakes: "tuple concurrently deleted" + +# add mkrels4: LP_UNUSED becomes a different rel's row +permutation + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + mkrels4 # T0 becomes a new rel + wakegrant4(grant1) # s1 wakes: "duplicate key value violates unique" + +# TID from syscache becomes LP_UNUSED, then becomes a newer version of the +# original rel's row. +permutation + snap3 # sets MyProc->xmin + cachefill1 # reads pg_class tuple T0, xmax invalid + at2 # T0 dead, T1 live + mkrels4 # T1's page becomes full + r3 # clears MyProc->xmin + waitprunable4 # T0 prunable + vac4 # T0 becomes LP_UNUSED + grant1 # pauses at heap_update(T0) + wakeinval4(at2) # at2 sends inval message + at4 # T1 dead, T0 live + wakegrant4(grant1) # s1 wakes: T0 dead, T2 live + inspect4 # observe loss of at2+at4 changes XXX is an extant bug