Author: Noah Misch Commit: Noah Misch Require tuple locks for heap_update() of RELKIND_INDEX pg_class rows. [To be squashed into inplace120-locktag if we're keeping it] diff --git a/src/backend/access/heap/README.tuplock b/src/backend/access/heap/README.tuplock index 95828ce..b69a0e4 100644 --- a/src/backend/access/heap/README.tuplock +++ b/src/backend/access/heap/README.tuplock @@ -168,18 +168,17 @@ using locktags as follows. While DDL code is the main audience, the executor follows these rules to make e.g. "MERGE INTO pg_class" safer. Locking rules are per-catalog: - pg_class heap_inplace_update_scan() callers: before the call, acquire a lock - on the relation in mode ShareUpdateExclusiveLock or stricter. If the update - targets a row of RELKIND_INDEX (but not RELKIND_PARTITIONED_INDEX), that - lock must be on the table. Locking the index rel is not necessary. (This - allows VACUUM to overwrite per-index pg_class while holding a lock on the - table alone.) heap_inplace_update_scan() acquires and releases LOCKTAG_TUPLE - in InplaceUpdateTupleLock, an alias for ExclusiveLock, on each tuple it - overwrites. + pg_class heap_inplace_update_scan() callers: if the pg_class row pertains to + an index (but not RELKIND_PARTITIONED_INDEX), no lock is required. + Otherwise, before the call, acquire a lock on the relation in mode + ShareUpdateExclusiveLock or stricter. heap_inplace_update_scan() acquires + and releases LOCKTAG_TUPLE in InplaceUpdateTupleLock, an alias for + ExclusiveLock, on each tuple it overwrites. - pg_class heap_update() callers: before copying the tuple to modify, take a - lock on the tuple, a ShareUpdateExclusiveLock on the relation, or a - ShareRowExclusiveLock or stricter on the relation. + pg_class heap_update() callers: acquire a lock before copying the tuple to + modify. If the pg_class row pertains to an index, lock the tuple. + Otherwise, lock the tuple, get a ShareUpdateExclusiveLock on the relation, + or get a ShareRowExclusiveLock or stricter on the relation. SearchSysCacheLocked1() is one convenient way to acquire the tuple lock. Most heap_update() callers already hold a suitable lock on the relation for diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 7de60c1..1e8bd02 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -4131,19 +4131,10 @@ check_lock_if_inplace_updateable_rel(Relation relation, dbid = InvalidOid; else dbid = MyDatabaseId; - - if (classForm->relkind == RELKIND_INDEX) - { - Relation irel = index_open(relid, AccessShareLock); - - SET_LOCKTAG_RELATION(tag, dbid, irel->rd_index->indrelid); - index_close(irel, AccessShareLock); - } - else - SET_LOCKTAG_RELATION(tag, dbid, relid); - - if (!LockHeldByMe(&tag, ShareUpdateExclusiveLock, false) && - !LockHeldByMe(&tag, ShareRowExclusiveLock, true)) + SET_LOCKTAG_RELATION(tag, dbid, relid); + if (classForm->relkind == RELKIND_INDEX || + (!LockHeldByMe(&tag, ShareUpdateExclusiveLock, false) && + !LockHeldByMe(&tag, ShareRowExclusiveLock, true))) elog(WARNING, "missing lock for relation \"%s\" (OID %u, relkind %c) @ TID (%u,%u)", NameStr(classForm->relname), @@ -4181,21 +4172,14 @@ check_inplace_rel_lock(HeapTuple oldtup) Oid dbid; LOCKTAG tag; + if (classForm->relkind == RELKIND_INDEX) + return; + if (IsSharedRelation(relid)) dbid = InvalidOid; else dbid = MyDatabaseId; - - if (classForm->relkind == RELKIND_INDEX) - { - Relation irel = index_open(relid, AccessShareLock); - - SET_LOCKTAG_RELATION(tag, dbid, irel->rd_index->indrelid); - index_close(irel, AccessShareLock); - } - else - SET_LOCKTAG_RELATION(tag, dbid, relid); - + SET_LOCKTAG_RELATION(tag, dbid, relid); if (!LockHeldByMe(&tag, ShareUpdateExclusiveLock, true)) elog(WARNING, "missing lock for relation \"%s\" (OID %u, relkind %c) @ TID (%u,%u)", diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index e4608b9..579cc0d 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1558,6 +1558,8 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) newClassTuple; Form_pg_class oldClassForm, newClassForm; + ItemPointerData oldClassTid, + newClassTid; HeapTuple oldIndexTuple, newIndexTuple; Form_pg_index oldIndexForm, @@ -1569,6 +1571,10 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) /* * Take a necessary lock on the old and new index before swapping them. + * Since the caller holds session-level locks, this shouldn't deadlock. + * The tuple locks come next, and deadlock is possible there. There's no + * good use case for altering the temporary index of a REINDEX + * CONCURRENTLY, so don't put effort into avoiding said deadlock. */ oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock); newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock); @@ -1576,15 +1582,17 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) /* Now swap names and dependencies of those indexes */ pg_class = table_open(RelationRelationId, RowExclusiveLock); - oldClassTuple = SearchSysCacheCopy1(RELOID, - ObjectIdGetDatum(oldIndexId)); + oldClassTuple = SearchSysCacheLockedCopy1(RELOID, + ObjectIdGetDatum(oldIndexId)); if (!HeapTupleIsValid(oldClassTuple)) elog(ERROR, "could not find tuple for relation %u", oldIndexId); - newClassTuple = SearchSysCacheCopy1(RELOID, - ObjectIdGetDatum(newIndexId)); + newClassTuple = SearchSysCacheLockedCopy1(RELOID, + ObjectIdGetDatum(newIndexId)); if (!HeapTupleIsValid(newClassTuple)) elog(ERROR, "could not find tuple for relation %u", newIndexId); + oldClassTid = oldClassTuple->t_self; + newClassTid = newClassTuple->t_self; oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple); newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple); @@ -1597,8 +1605,10 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName) newClassForm->relispartition = oldClassForm->relispartition; oldClassForm->relispartition = isPartition; - CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple); - CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple); + CatalogTupleUpdate(pg_class, &oldClassTid, oldClassTuple); + UnlockTuple(pg_class, &oldClassTid, InplaceUpdateTupleLock); + CatalogTupleUpdate(pg_class, &newClassTid, newClassTuple); + UnlockTuple(pg_class, &newClassTid, InplaceUpdateTupleLock); heap_freetuple(oldClassTuple); heap_freetuple(newClassTuple); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 78f9678..402dc49 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1074,20 +1074,28 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, relfilenumber2; RelFileNumber swaptemp; char swptmpchr; + ItemPointerData otid1, + otid2; Oid relam1, relam2; - /* We need writable copies of both pg_class tuples. */ + /* + * We need writable copies of both pg_class tuples. Since r2 is new in + * this transaction, no other process should be getting the tuple lock for + * that one. Hence, order of tuple lock acquisition doesn't matter. + */ relRelation = table_open(RelationRelationId, RowExclusiveLock); - reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1)); + reltup1 = SearchSysCacheLockedCopy1(RELOID, ObjectIdGetDatum(r1)); if (!HeapTupleIsValid(reltup1)) elog(ERROR, "cache lookup failed for relation %u", r1); + otid1 = reltup1->t_self; relform1 = (Form_pg_class) GETSTRUCT(reltup1); - reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2)); + reltup2 = SearchSysCacheLockedCopy1(RELOID, ObjectIdGetDatum(r2)); if (!HeapTupleIsValid(reltup2)) elog(ERROR, "cache lookup failed for relation %u", r2); + otid2 = reltup2->t_self; relform2 = (Form_pg_class) GETSTRUCT(reltup2); relfilenumber1 = relform1->relfilenode; @@ -1252,10 +1260,8 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, CatalogIndexState indstate; indstate = CatalogOpenIndexes(relRelation); - CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1, - indstate); - CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2, - indstate); + CatalogTupleUpdateWithInfo(relRelation, &otid1, reltup1, indstate); + CatalogTupleUpdateWithInfo(relRelation, &otid2, reltup2, indstate); CatalogCloseIndexes(indstate); } else @@ -1264,6 +1270,8 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, CacheInvalidateRelcacheByTuple(reltup1); CacheInvalidateRelcacheByTuple(reltup2); } + UnlockTuple(relRelation, &otid1, InplaceUpdateTupleLock); + UnlockTuple(relRelation, &otid2, InplaceUpdateTupleLock); /* * Now that pg_class has been updated with its relevant information for diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 03278f6..86956aa 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -14353,7 +14353,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock /* Get its pg_class tuple, too */ class_rel = table_open(RelationRelationId, RowExclusiveLock); - tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relationOid)); + tuple = SearchSysCacheLocked1(RELOID, ObjectIdGetDatum(relationOid)); if (!HeapTupleIsValid(tuple)) elog(ERROR, "cache lookup failed for relation %u", relationOid); tuple_class = (Form_pg_class) GETSTRUCT(tuple); @@ -14443,7 +14443,9 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock * If the new owner is the same as the existing owner, consider the * command to have succeeded. This is for dump restoration purposes. */ - if (tuple_class->relowner != newOwnerId) + if (tuple_class->relowner == newOwnerId) + UnlockTuple(class_rel, &tuple->t_self, InplaceUpdateTupleLock); + else { Datum repl_val[Natts_pg_class]; bool repl_null[Natts_pg_class]; @@ -14503,6 +14505,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock newtuple = heap_modify_tuple(tuple, RelationGetDescr(class_rel), repl_val, repl_null, repl_repl); CatalogTupleUpdate(class_rel, &newtuple->t_self, newtuple); + UnlockTuple(class_rel, &tuple->t_self, InplaceUpdateTupleLock); heap_freetuple(newtuple);