Author: Noah Misch Commit: Noah Misch diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 485525f..fed1279 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -49,6 +49,7 @@ #include "storage/predicate.h" #include "storage/procarray.h" #include "utils/datum.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/spccache.h" @@ -3236,6 +3237,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, interesting_attrs = bms_add_members(interesting_attrs, id_attrs); block = ItemPointerGetBlockNumber(otid); + INJECTION_POINT("heap_update-before-pin"); buffer = ReadBuffer(relation, block); page = BufferGetPage(buffer); @@ -6375,6 +6377,9 @@ heap_inplace_update_and_unlock(Relation relation, */ PreInplace_Inval(); + INJECTION_POINT_LOAD("SIInsertDataEntries2"); + INJECTION_POINT_LOAD("SIInsertDataEntries3"); + /*---------- * NO EREPORT(ERROR) from here till changes are complete * diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c index 2da9173..ed0f173 100644 --- a/src/backend/storage/ipc/sinvaladt.c +++ b/src/backend/storage/ipc/sinvaladt.c @@ -25,6 +25,7 @@ #include "storage/shmem.h" #include "storage/sinvaladt.h" #include "storage/spin.h" +#include "utils/injection_point.h" /* * Conceptually, the shared cache invalidation messages are stored in an @@ -386,6 +387,17 @@ SIInsertDataEntries(const SharedInvalidationMessage *data, int n) int max; int i; + /* + * CacheInvalidateSmgr skips the injection point, since this + * facilitates a test that needs this only for delaying pg_class + * syscache invals and relcache invals. + */ + if (n != 1 || data->sm.id != SHAREDINVALSMGR_ID) + { + INJECTION_POINT("SIInsertDataEntries2"); + INJECTION_POINT("SIInsertDataEntries3"); + } + n -= nthistime; LWLockAcquire(SInvalWriteLock, LW_EXCLUSIVE); diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index 0753a9d..802ea8c 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -13,7 +13,8 @@ PGFILEDESC = "injection_points - facility for injection points" REGRESS = injection_points reindex_conc REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress -ISOLATION = basic inplace +EXTRA_INSTALL = contrib/pageinspect contrib/pg_buffercache +ISOLATION = basic syscache-update-pruned inplace TAP_TESTS = 1 diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned.out b/src/test/modules/injection_points/expected/syscache-update-pruned.out new file mode 100644 index 0000000..2125dd2 --- /dev/null +++ b/src/test/modules/injection_points/expected/syscache-update-pruned.out @@ -0,0 +1,73 @@ +Parsed test spec with 4 sessions + +starting permutation: xids4 cachefill1 at2 vac3 grant1 wakegrant4 wakeinval4 validate4 +debug_assertions +---------------- +on +(1 row) + +mkrels +------ + +(1 row) + +injection_points_attach +----------------------- + +(1 row) + +injection_points_attach +----------------------- + +(1 row) + +injection_points_attach +----------------------- + +(1 row) + +step xids4: + DO $$ + DECLARE + goal pg_lsn := '1/1'; + diff bigint := pg_wal_lsn_diff(goal, pg_current_wal_insert_lsn()); + BEGIN + IF diff < 0 THEN + RAISE EXCEPTION 'WAL is too far ahead by %; use a lower pg_resetwal argument', pg_size_pretty(-diff); + END IF; + RAISE LOG 'closing WAL gap of %', pg_size_pretty(diff); + WHILE pg_wal_lsn_diff(goal, pg_current_wal_insert_lsn()) > 0 LOOP + PERFORM pg_logical_emit_message(true, '', ''); + ROLLBACK; -- one aborted XID per iteration + END LOOP; + RAISE LOG 'closing WAL stopped at %', pg_current_wal_insert_lsn(); + END + $$; + +step cachefill1: SELECT pg_relation_filenode('vactest.orig50') > 0; +?column? +-------- +t +(1 row) + +step at2: ALTER TABLE vactest.orig50 SET (fillfactor = 50); +step vac3: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakegrant4: + SELECT injection_points_detach('heap_update-before-pin'); + SELECT injection_points_wakeup('heap_update-before-pin'); + +injection_points_detach +----------------------- + +(1 row) + +injection_points_wakeup +----------------------- + +(1 row) + +PQconsumeInput failed: server closed the connection unexpectedly + This probably means the server terminated abnormally + before or while processing the request. + diff --git a/src/test/modules/injection_points/expected/syscache-update-pruned_1.out b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out new file mode 100644 index 0000000..1f89721 --- /dev/null +++ b/src/test/modules/injection_points/expected/syscache-update-pruned_1.out @@ -0,0 +1,125 @@ +Parsed test spec with 4 sessions + +starting permutation: xids4 cachefill1 at2 vac3 grant1 wakegrant4 wakeinval4 validate4 +debug_assertions +---------------- +off +(1 row) + +mkrels +------ + +(1 row) + +injection_points_attach +----------------------- + +(1 row) + +injection_points_attach +----------------------- + +(1 row) + +injection_points_attach +----------------------- + +(1 row) + +step xids4: + DO $$ + DECLARE + goal pg_lsn := '1/1'; + diff bigint := pg_wal_lsn_diff(goal, pg_current_wal_insert_lsn()); + BEGIN + IF diff < 0 THEN + RAISE EXCEPTION 'WAL is too far ahead by %; use a lower pg_resetwal argument', pg_size_pretty(-diff); + END IF; + RAISE LOG 'closing WAL gap of %', pg_size_pretty(diff); + WHILE pg_wal_lsn_diff(goal, pg_current_wal_insert_lsn()) > 0 LOOP + PERFORM pg_logical_emit_message(true, '', ''); + ROLLBACK; -- one aborted XID per iteration + END LOOP; + RAISE LOG 'closing WAL stopped at %', pg_current_wal_insert_lsn(); + END + $$; + +step cachefill1: SELECT pg_relation_filenode('vactest.orig50') > 0; +?column? +-------- +t +(1 row) + +step at2: ALTER TABLE vactest.orig50 SET (fillfactor = 50); +step vac3: VACUUM pg_class; +step grant1: GRANT SELECT ON vactest.orig50 TO PUBLIC; +step wakegrant4: + SELECT injection_points_detach('heap_update-before-pin'); + SELECT injection_points_wakeup('heap_update-before-pin'); + +injection_points_detach +----------------------- + +(1 row) + +injection_points_wakeup +----------------------- + +(1 row) + +step grant1: <... completed> +step wakeinval4: + SELECT injection_points_detach('SIInsertDataEntries2'); + SELECT injection_points_detach('SIInsertDataEntries3'); + SELECT injection_points_wakeup('SIInsertDataEntries2'); + SELECT injection_points_wakeup('SIInsertDataEntries3'); + +injection_points_detach +----------------------- + +(1 row) + +injection_points_detach +----------------------- + +(1 row) + +injection_points_wakeup +----------------------- + +(1 row) + +injection_points_wakeup +----------------------- + +(1 row) + +step at2: <... completed> +step vac3: <... completed> +step validate4: + SELECT * FROM page_header(get_raw_page('pg_class', 11)); + CREATE EXTENSION pg_buffercache; + SELECT count(pg_buffercache_evict(bufferid)) > 0 FROM pg_buffercache; + DROP EXTENSION pg_buffercache; + REINDEX INDEX pg_class_oid_index; + +lsn |checksum|flags|lower|upper|special|pagesize|version|prune_xid +------+--------+-----+-----+-----+-------+--------+-------+--------- +1/6AF8| 0| 0| 0| 11| 38| 8192| 4| 0 +(1 row) + +?column? +-------- +t +(1 row) + +ERROR: invalid page in block 11 of relation base/16384/16462 +teardown failed: ERROR: invalid page in block 11 of relation base/16384/16462 + +starting permutation: cachefill1 at2 vac3 grant1 mkrels4 wakegrant4 wakeinval4 +debug_assertions +---------------- +off +(1 row) + +setup failed: ERROR: extension "pageinspect" already exists diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index 989b4db..a5e881f 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -43,6 +43,7 @@ tests += { 'isolation': { 'specs': [ 'basic', + 'syscache-update-pruned', 'inplace', ], }, diff --git a/src/test/modules/injection_points/specs/syscache-update-pruned.spec b/src/test/modules/injection_points/specs/syscache-update-pruned.spec new file mode 100644 index 0000000..bde61c5 --- /dev/null +++ b/src/test/modules/injection_points/specs/syscache-update-pruned.spec @@ -0,0 +1,162 @@ +# Test race conditions involving: +# - s1: heap_update($FROM_SYSCACHE), without a snapshot or pin +# - s2: ALTER TABLE making $FROM_SYSCACHE a dead tuple +# - s3: "VACUUM pg_class" making $FROM_SYSCACHE become LP_UNUSED + +# This is a derivative work of inplace.spec. When introduced, that spec +# reached basically the same race condition, for inplace updates. Commit +# a07e03f fixed the inplace update case. + +# All sessions disable debug_parallel_query. The most prominent +# incompatibility between this test and parallel query involves the two +# SIInsertDataEntries injection point waits. We hold RelCacheInitLock during +# those waits, so parallel workers could hang in startup. + +setup { SHOW debug_assertions; -- distinguish the two expected outputs } +# Need s2 to make a non-HOT update. Otherwise, "VACUUM pg_class" would leave +# an LP_REDIRECT that persists. To get non-HOT, make rels so the pg_class row +# for vactest.orig50 is on a filled page (assuming BLCKSZ=8192). Just to save +# on filesystem syscalls, use relkind=c for every other rel. +setup +{ + SET debug_parallel_query = off; + CREATE EXTENSION pageinspect; + CREATE EXTENSION injection_points; + CREATE SCHEMA vactest; + CREATE FUNCTION vactest.mkrels(text, int, int) RETURNS void + LANGUAGE plpgsql SET search_path = vactest AS $$ + DECLARE + tname text; + BEGIN + FOR i in $2 .. $3 LOOP + tname := $1 || i; + EXECUTE FORMAT('CREATE TYPE ' || tname || ' AS ()'); + RAISE DEBUG '% at %', tname, ctid + FROM pg_class WHERE oid = tname::regclass; + END LOOP; + END + $$; +} +setup { VACUUM FULL pg_class; -- reduce free space } +setup +{ + SELECT vactest.mkrels('orig', 1, 49); + CREATE TABLE vactest.orig50 (); + SELECT vactest.mkrels('orig', 51, 100); +} +teardown +{ + DROP SCHEMA vactest CASCADE; + DROP EXTENSION injection_points; + DROP EXTENSION pageinspect; +} + +# Wait during GRANT. +session s1 +setup { + SET debug_parallel_query = off; + SELECT injection_points_set_local(); + SELECT injection_points_attach('heap_update-before-pin', 'wait'); +} +step cachefill1 { SELECT pg_relation_filenode('vactest.orig50') > 0; } +step grant1 { GRANT SELECT ON vactest.orig50 TO PUBLIC; } + +# Update of the tuple grant1 will update. Wait before sending invals, so s1 +# will not get a cache miss. Choose the command for making that update from +# among those whose heavyweight locking does not conflict with GRANT's +# heavyweight locking. (GRANT will see our XID as committed, so observing +# that XID in the tuple xmax also won't block GRANT.) +session s2 +setup { + SET debug_parallel_query = off; + SELECT injection_points_set_local(); + SELECT injection_points_attach('SIInsertDataEntries2', 'wait'); +} +step at2 { ALTER TABLE vactest.orig50 SET (fillfactor = 50); } + +# VACUUM to achieve LP_UNUSED. Again delay any s1 cache miss. +session s3 +setup { + SET debug_parallel_query = off; + SELECT injection_points_set_local(); + SELECT injection_points_attach('SIInsertDataEntries3', 'wait'); +} +step vac3 { VACUUM pg_class; } + +# Non-blocking actions. +session s4 +setup { SET debug_parallel_query = off; } +# Part of the mission is tricking heap_update() into treating lp_off=0 as an +# "old tuple". For that to work, the page header needs to look enough like a +# tuple header. Specifically, (pd_lsn.xlogid, pd_lsn.xrecoff) needs to +# suffice as a plausible (xmin, xmax). xmin should be known-committed, and +# xmax should be known-aborted. +# +# Our caller used pg_resetwal to leave the WAL position somewhat lower than +# 1/1. Advance it to 1/1, achieving xlogid = 1 = BootstrapTransactionId. In +# passing, burn XIDs so xrecoff will be a known-aborted XID in CLOG. +step xids4 { + DO $$ + DECLARE + goal pg_lsn := '1/1'; + diff bigint := pg_wal_lsn_diff(goal, pg_current_wal_insert_lsn()); + BEGIN + IF diff < 0 THEN + RAISE EXCEPTION 'WAL is too far ahead by %; use a lower pg_resetwal argument', pg_size_pretty(-diff); + END IF; + RAISE LOG 'closing WAL gap of %', pg_size_pretty(diff); + WHILE pg_wal_lsn_diff(goal, pg_current_wal_insert_lsn()) > 0 LOOP + PERFORM pg_logical_emit_message(true, '', ''); + ROLLBACK; -- one aborted XID per iteration + END LOOP; + RAISE LOG 'closing WAL stopped at %', pg_current_wal_insert_lsn(); + END + $$; +} +# Reuse the lp that s1 is waiting to change. I've observed reuse at the 1st +# or 18th CREATE, so create excess. +step mkrels4 { + SELECT vactest.mkrels('intruder', 1, 100); -- repopulate LP_UNUSED +} +step wakegrant4 { + SELECT injection_points_detach('heap_update-before-pin'); + SELECT injection_points_wakeup('heap_update-before-pin'); +} +step wakeinval4 { + SELECT injection_points_detach('SIInsertDataEntries2'); + SELECT injection_points_detach('SIInsertDataEntries3'); + SELECT injection_points_wakeup('SIInsertDataEntries2'); + SELECT injection_points_wakeup('SIInsertDataEntries3'); +} +# The bug corrupts pd_lower, pd_upper, and pd_special. That causes the page +# to fail validation when re-reading from disk. +step validate4 { + SELECT * FROM page_header(get_raw_page('pg_class', 11)); + CREATE EXTENSION pg_buffercache; + SELECT count(pg_buffercache_evict(bufferid)) > 0 FROM pg_buffercache; + DROP EXTENSION pg_buffercache; + REINDEX INDEX pg_class_oid_index; +} + +# tuple $FROM_SYSCACHE becomes LP_UNUSED. This permutation must come first, +# because it relies on a particular WAL position that pg_resetwal set before +# this spec began. +permutation + xids4 + cachefill1 # reads pg_class tuple T0 for vactest.orig50, xmax invalid + at2(wakeinval4) # T0 becomes eligible for pruning, T1 is successor + vac3(at2) # T0 becomes LP_UNUSED + grant1(wakegrant4) # pauses at heap_update(T0) + wakegrant4 # s1 wakes, assertion failure or page header corruption + wakeinval4 + validate4 + +# add mkrels4: tuple $FROM_SYSCACHE changes from LP_UNUSED to a successor +permutation + cachefill1 # reads pg_class tuple T0 for vactest.orig50, xmax invalid + at2(wakeinval4) # T0 becomes eligible for pruning, T1 is successor + vac3(at2) # T0 becomes LP_UNUSED + grant1(wakegrant4) # pauses at heap_update(T0) + mkrels4 # T0 becomes a new rel + wakegrant4 # s1 wakes, "duplicate key value violates unique" + wakeinval4 diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index cbef6d4..e1a6b06 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -2475,6 +2475,24 @@ regression_main(int argc, char *argv[], } /* + * FIXME want this for syscache-update-pruned.spec only. That test + * needs to reach wal position 1/1, and this beats writing 12GB of WAL + * to get there. + */ + snprintf(buf, sizeof(buf), + "\"%s%spg_resetwal\" -D \"%s/data\" " + "--wal-segsize=16 --next-wal-file=0000000100000000000000FF", + bindir ? bindir : "", + bindir ? "/" : "", + temp_instance); + fflush(NULL); + if (system(buf)) + { + bail("pg_resetwal failed\n" + "# Command was: %s", buf); + } + + /* * Start the temp postmaster */ snprintf(buf, sizeof(buf),