From 0cdf4982f4a0a4211752fc0228b80b4d89ef07ad Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Mon, 15 Nov 2021 19:55:10 +0900
Subject: [PATCH v1] Fix locking of toast relations with REINDEX CONC

---
 src/backend/access/common/toast_internals.c   |  4 +-
 src/backend/commands/indexcmds.c              | 86 +++++++++++++++++--
 .../expected/reindex-concurrently-toast.out   | 85 ++++++++++++++++++
 src/test/isolation/isolation_schedule         |  1 +
 .../specs/reindex-concurrently-toast.spec     | 55 ++++++++++++
 5 files changed, 220 insertions(+), 11 deletions(-)
 create mode 100644 src/test/isolation/expected/reindex-concurrently-toast.out
 create mode 100644 src/test/isolation/specs/reindex-concurrently-toast.spec

diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c
index c7b9ade574..0a946631bb 100644
--- a/src/backend/access/common/toast_internals.c
+++ b/src/backend/access/common/toast_internals.c
@@ -361,8 +361,8 @@ toast_save_datum(Relation rel, Datum value,
 	/*
 	 * Done - close toast relation and its indexes
 	 */
-	toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
-	table_close(toastrel, RowExclusiveLock);
+	toast_close_indexes(toastidxs, num_indexes, NoLock);
+	table_close(toastrel, NoLock);
 
 	/*
 	 * Create the TOAST pointer value that we'll return
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index c14ca27c5e..8370d1ab6d 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -3262,14 +3262,7 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 				 * toast indexes.
 				 */
 				Relation	heapRelation;
-
-				/* Save the list of relation OIDs in private context */
-				oldcontext = MemoryContextSwitchTo(private_context);
-
-				/* Track this relation for session locks */
-				heapRelationIds = lappend_oid(heapRelationIds, relationOid);
-
-				MemoryContextSwitchTo(oldcontext);
+				Oid			relLockedOid = relationOid;
 
 				if (IsCatalogRelationOid(relationOid))
 					ereport(ERROR,
@@ -3296,6 +3289,46 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 							 errmsg("cannot move system relation \"%s\"",
 									RelationGetRelationName(heapRelation))));
 
+				/*
+				 * If querying a toast relation, locks are taken on its
+				 * parent table instead.
+				 */
+				relLockedOid = relationOid;
+				if (relkind == RELKIND_TOASTVALUE)
+				{
+					Relation classRel;
+					TableScanDesc relScan;
+					ScanKeyData key;
+					HeapTuple   tuple;
+					Form_pg_class classForm;
+
+					classRel = table_open(RelationRelationId, AccessShareLock);
+					ScanKeyInit(&key,
+								Anum_pg_class_reltoastrelid,
+								BTEqualStrategyNumber, F_OIDEQ,
+								ObjectIdGetDatum(relationOid));
+
+					relScan = table_beginscan_catalog(classRel, 1, &key);
+					tuple = heap_getnext(relScan, ForwardScanDirection);
+
+					if (!HeapTupleIsValid(tuple))
+						elog(ERROR, "could not find toast tuple for relation %u",
+							relationOid);
+
+					classForm = (Form_pg_class) GETSTRUCT(tuple);
+					relLockedOid = classForm->oid;
+					table_endscan(relScan);
+					table_close(classRel, AccessShareLock);
+				}
+
+				/* Save the list of relation OIDs in private context */
+				oldcontext = MemoryContextSwitchTo(private_context);
+
+				/* Track this relation for session locks */
+				heapRelationIds = lappend_oid(heapRelationIds, relLockedOid);
+
+				MemoryContextSwitchTo(oldcontext);
+
 				/* Add all the valid indexes of relation to list */
 				foreach(lc, RelationGetIndexList(heapRelation))
 				{
@@ -3394,6 +3427,7 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 													  (params->options & REINDEXOPT_MISSING_OK) != 0);
 				Relation	heapRelation;
 				ReindexIndexInfo *idx;
+				Oid			relLockedOid;
 
 				/* if relation is missing, leave */
 				if (!OidIsValid(heapId))
@@ -3442,11 +3476,45 @@ ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
 
 				table_close(heapRelation, NoLock);
 
+				/*
+				 * If this is a toast index, save the parent table instead
+				 * of the toast relation itself for the session locks.  This
+				 * looks backwards at pg_class.reltoastrelid to know which
+				 * table this toast relation links to.
+				 */
+				relLockedOid = heapId;
+				if (IsToastNamespace(get_rel_namespace(relationOid)))
+				{
+					Relation classRel;
+					TableScanDesc relScan;
+					ScanKeyData key;
+					HeapTuple   tuple;
+					Form_pg_class classForm;
+
+					classRel = table_open(RelationRelationId, AccessShareLock);
+					ScanKeyInit(&key,
+								Anum_pg_class_reltoastrelid,
+								BTEqualStrategyNumber, F_OIDEQ,
+								ObjectIdGetDatum(heapId));
+
+					relScan = table_beginscan_catalog(classRel, 1, &key);
+
+					tuple = heap_getnext(relScan, ForwardScanDirection);
+					if (!HeapTupleIsValid(tuple))
+						elog(ERROR, "could not find toast tuple for relation %u",
+							relationOid);
+
+					classForm = (Form_pg_class) GETSTRUCT(tuple);
+					relLockedOid = classForm->oid;
+					table_endscan(relScan);
+					table_close(classRel, AccessShareLock);
+				}
+
 				/* Save the list of relation OIDs in private context */
 				oldcontext = MemoryContextSwitchTo(private_context);
 
 				/* Track the heap relation of this index for session locks */
-				heapRelationIds = list_make1_oid(heapId);
+				heapRelationIds = list_make1_oid(relLockedOid);
 
 				/*
 				 * Save the list of relation OIDs in private context.  Note
diff --git a/src/test/isolation/expected/reindex-concurrently-toast.out b/src/test/isolation/expected/reindex-concurrently-toast.out
new file mode 100644
index 0000000000..3b9023728a
--- /dev/null
+++ b/src/test/isolation/expected/reindex-concurrently-toast.out
@@ -0,0 +1,85 @@
+Parsed test spec with 2 sessions
+
+starting permutation: lock1 ins1 retab2 end1 sel2
+step lock1: lock TABLE reind_con_wide in ROW EXCLUSIVE MODE;
+step ins1: INSERT INTO reind_con_wide SELECT 3, repeat('3', 11) || string_agg(g.i::text || random()::text, '') FROM generate_series(1, 500) g(i);
+step retab2: REINDEX TABLE CONCURRENTLY pg_toast.reind_con_toast; <waiting ...>
+step end1: COMMIT;
+step retab2: <... completed>
+step sel2: SELECT id, substr(data, 1, 10) FROM reind_con_wide ORDER BY id;
+id|    substr
+--+----------
+ 1|1111111111
+ 2|2222222222
+ 3|3333333333
+(3 rows)
+
+
+starting permutation: lock1 ins1 reind2 end1 sel2
+step lock1: lock TABLE reind_con_wide in ROW EXCLUSIVE MODE;
+step ins1: INSERT INTO reind_con_wide SELECT 3, repeat('3', 11) || string_agg(g.i::text || random()::text, '') FROM generate_series(1, 500) g(i);
+step reind2: REINDEX INDEX CONCURRENTLY pg_toast.reind_con_toast_idx; <waiting ...>
+step end1: COMMIT;
+step reind2: <... completed>
+step sel2: SELECT id, substr(data, 1, 10) FROM reind_con_wide ORDER BY id;
+id|    substr
+--+----------
+ 1|1111111111
+ 2|2222222222
+ 3|3333333333
+(3 rows)
+
+
+starting permutation: lock1 upd1 retab2 end1 sel2
+step lock1: lock TABLE reind_con_wide in ROW EXCLUSIVE MODE;
+step upd1: UPDATE reind_con_wide SET data = (SELECT repeat('4', 11) || string_agg(g.i::text || random()::text, '') FROM generate_series(1, 500) g(i)) WHERE id = 1;
+step retab2: REINDEX TABLE CONCURRENTLY pg_toast.reind_con_toast; <waiting ...>
+step end1: COMMIT;
+step retab2: <... completed>
+step sel2: SELECT id, substr(data, 1, 10) FROM reind_con_wide ORDER BY id;
+id|    substr
+--+----------
+ 1|4444444444
+ 2|2222222222
+(2 rows)
+
+
+starting permutation: lock1 upd1 reind2 end1 sel2
+step lock1: lock TABLE reind_con_wide in ROW EXCLUSIVE MODE;
+step upd1: UPDATE reind_con_wide SET data = (SELECT repeat('4', 11) || string_agg(g.i::text || random()::text, '') FROM generate_series(1, 500) g(i)) WHERE id = 1;
+step reind2: REINDEX INDEX CONCURRENTLY pg_toast.reind_con_toast_idx; <waiting ...>
+step end1: COMMIT;
+step reind2: <... completed>
+step sel2: SELECT id, substr(data, 1, 10) FROM reind_con_wide ORDER BY id;
+id|    substr
+--+----------
+ 1|4444444444
+ 2|2222222222
+(2 rows)
+
+
+starting permutation: lock1 del1 retab2 end1 sel2
+step lock1: lock TABLE reind_con_wide in ROW EXCLUSIVE MODE;
+step del1: DELETE FROM reind_con_wide WHERE id = 2;
+step retab2: REINDEX TABLE CONCURRENTLY pg_toast.reind_con_toast; <waiting ...>
+step end1: COMMIT;
+step retab2: <... completed>
+step sel2: SELECT id, substr(data, 1, 10) FROM reind_con_wide ORDER BY id;
+id|    substr
+--+----------
+ 1|1111111111
+(1 row)
+
+
+starting permutation: lock1 del1 reind2 end1 sel2
+step lock1: lock TABLE reind_con_wide in ROW EXCLUSIVE MODE;
+step del1: DELETE FROM reind_con_wide WHERE id = 2;
+step reind2: REINDEX INDEX CONCURRENTLY pg_toast.reind_con_toast_idx; <waiting ...>
+step end1: COMMIT;
+step reind2: <... completed>
+step sel2: SELECT id, substr(data, 1, 10) FROM reind_con_wide ORDER BY id;
+id|    substr
+--+----------
+ 1|1111111111
+(1 row)
+
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index f4c01006fc..99c23b16ff 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -53,6 +53,7 @@ test: lock-committed-update
 test: lock-committed-keyupdate
 test: update-locked-tuple
 test: reindex-concurrently
+test: reindex-concurrently-toast
 test: reindex-schema
 test: propagate-lock-delete
 test: tuplelock-conflict
diff --git a/src/test/isolation/specs/reindex-concurrently-toast.spec b/src/test/isolation/specs/reindex-concurrently-toast.spec
new file mode 100644
index 0000000000..616396a8f1
--- /dev/null
+++ b/src/test/isolation/specs/reindex-concurrently-toast.spec
@@ -0,0 +1,55 @@
+# REINDEX CONCURRENTLY with toast relations
+#
+# Ensure that concurrent operations work correctly when a REINDEX is performed
+# concurrently on toast relations.  Toast relation names are not deterministic,
+# so this abuses of allow_system_table_mods to change the names of toast
+# tables and its indexes so as they can be executed with REINDEX CONCURRENTLY,
+# which cannot be launched in a transaction context.
+
+# Create a table, with deterministic names for its toast relation and indexes.
+# Fortunately ALTER TABLE is transactional, making the renaming of toast
+# relations possible with allow_system_table_mods.
+setup
+{
+    CREATE TABLE reind_con_wide(id int primary key, data text);
+    INSERT INTO reind_con_wide
+      SELECT 1, repeat('1', 11) || string_agg(g.i::text || random()::text, '') FROM generate_series(1, 500) g(i);
+    INSERT INTO reind_con_wide
+      SELECT 2, repeat('2', 11) || string_agg(g.i::text || random()::text, '') FROM generate_series(1, 500) g(i);
+    SET allow_system_table_mods TO true;
+    DO $$DECLARE r record;
+    BEGIN
+    SELECT INTO r reltoastrelid::regclass::text AS table_name FROM pg_class
+      WHERE oid = 'reind_con_wide'::regclass;
+    EXECUTE 'ALTER TABLE ' || r.table_name || ' RENAME TO reind_con_toast;';
+    SELECT INTO r indexrelid::regclass::text AS index_name FROM pg_index
+        WHERE indrelid = (SELECT oid FROM pg_class where relname = 'reind_con_toast');
+    EXECUTE 'ALTER INDEX ' || r.index_name || ' RENAME TO reind_con_toast_idx;';
+    END$$;
+}
+
+teardown
+{
+    DROP TABLE reind_con_wide;
+}
+
+session s1
+setup { BEGIN; }
+step lock1 { lock TABLE reind_con_wide in ROW EXCLUSIVE MODE; }
+step ins1 { INSERT INTO reind_con_wide SELECT 3, repeat('3', 11) || string_agg(g.i::text || random()::text, '') FROM generate_series(1, 500) g(i); }
+step upd1 { UPDATE reind_con_wide SET data = (SELECT repeat('4', 11) || string_agg(g.i::text || random()::text, '') FROM generate_series(1, 500) g(i)) WHERE id = 1; }
+step del1 { DELETE FROM reind_con_wide WHERE id = 2; }
+step end1 { COMMIT; }
+
+session s2
+step retab2 { REINDEX TABLE CONCURRENTLY pg_toast.reind_con_toast; }
+step reind2 { REINDEX INDEX CONCURRENTLY pg_toast.reind_con_toast_idx; }
+step sel2 { SELECT id, substr(data, 1, 10) FROM reind_con_wide ORDER BY id; }
+
+# Test REINDEX CONCURRENTLY on toast table and its index.
+permutation lock1 ins1 retab2 end1 sel2
+permutation lock1 ins1 reind2 end1 sel2
+permutation lock1 upd1 retab2 end1 sel2
+permutation lock1 upd1 reind2 end1 sel2
+permutation lock1 del1 retab2 end1 sel2
+permutation lock1 del1 reind2 end1 sel2
-- 
2.33.1

