Author: Noah Misch Commit: Noah Misch diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 14de815..c0c33e9 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2801,6 +2801,10 @@ l1: HeapTupleHeaderSetXmax(tp.t_data, new_xmax); HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo); /* Make sure there is no forward chain link in t_ctid */ + /* + * FIXME this will overwrite any InplaceCanary ctid. Leave c_ctid + * unchanged? Accept that a rolled-back DROP could undo the protection? + */ tp.t_data->t_ctid = tp.t_self; /* Signal that this is actually a move into another partition */ @@ -3348,6 +3352,8 @@ l2: if (!HeapTupleSatisfiesVisibility(&oldtup, crosscheck, buffer)) { result = TM_Updated; + /* FiXME will InplaceCanary mechanism have race conditions w/ this + * or other t_ctid asserts in this file? */ Assert(!ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid)); } } @@ -3730,6 +3736,13 @@ l2: id_has_external, &old_key_copied); + /* FIXME missing anything important via the IsValid tests? */ + if (ItemPointerIsValid(&oldtup.t_data->t_ctid) && + ItemPointerIsValid(&newtup->t_data->t_ctid) && + ItemPointerGetOffsetNumber(&oldtup.t_data->t_ctid) == InplaceCanaryOffsetNumber && + !ItemPointerEquals(&oldtup.t_data->t_ctid, &newtup->t_data->t_ctid)) + elog(ERROR, "tuple concurrently updated"); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -4747,6 +4760,11 @@ failed: * updated, we need to follow the update chain to lock the new versions of * the tuple as well. */ + /* + * FIXME this will overwrite any InplaceCanary ctid. Leave c_ctid + * unchanged? Accept that SELECT FOR UPDATE on the catalog table could + * undo the protection? + */ if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) tuple->t_data->t_ctid = *tid; @@ -5892,6 +5910,7 @@ heap_inplace_update(Relation relation, HeapTuple tuple) HeapTupleHeader htup; uint32 oldlen; uint32 newlen; + HeapTupleData oldtup; /* * For now, we don't allow parallel updates. Unlike a regular update, @@ -5922,12 +5941,29 @@ heap_inplace_update(Relation relation, HeapTuple tuple) if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff) elog(ERROR, "wrong tuple length"); + /* evaluate whether updated */ + /* FIXME wait for a concurrent updater, in case it aborts */ + oldtup.t_tableOid = RelationGetRelid(relation); + oldtup.t_data = htup; + oldtup.t_len = ItemIdGetLength(lp); + oldtup.t_self = tuple->t_self; + if (TM_Ok != + HeapTupleSatisfiesUpdate(&oldtup, GetCurrentCommandId(false), buffer)) + elog(ERROR, "tuple concurrently updated"); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); memcpy((char *) htup + htup->t_hoff, (char *) tuple->t_data + tuple->t_data->t_hoff, newlen); + if (ItemPointerGetOffsetNumber(&htup->t_ctid) == + InplaceCanaryOffsetNumber) + BlockIdRetreat(&htup->t_ctid.ip_blkid); + else + ItemPointerSet(&htup->t_ctid, + MaxBlockNumber, + InplaceCanaryOffsetNumber); MarkBufferDirty(buffer); @@ -5945,6 +5981,7 @@ heap_inplace_update(Relation relation, HeapTuple tuple) XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); XLogRegisterBufData(0, (char *) htup + htup->t_hoff, newlen); + /* don't log t_ctid: concurrent changes not happening in recovery */ /* inplace updates aren't decoded atm, don't log the origin */ recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_INPLACE); diff --git a/src/include/storage/block.h b/src/include/storage/block.h index 31a036d..f8d9157 100644 --- a/src/include/storage/block.h +++ b/src/include/storage/block.h @@ -105,4 +105,14 @@ BlockIdGetBlockNumber(const BlockIdData *blockId) return (((BlockNumber) blockId->bi_hi) << 16) | ((BlockNumber) blockId->bi_lo); } +/* wraps on underflow, avoids InvalidBlockNumber */ +static inline void +BlockIdRetreat(BlockIdData *blockId) +{ + BlockNumber proposal = BlockIdGetBlockNumber(blockId) - 1; + if (proposal == InvalidBlockNumber) + proposal = MaxBlockNumber; + BlockIdSet(blockId, proposal); +} + #endif /* BLOCK_H */ diff --git a/src/include/storage/off.h b/src/include/storage/off.h index 3540308..8ff776a 100644 --- a/src/include/storage/off.h +++ b/src/include/storage/off.h @@ -27,6 +27,19 @@ typedef uint16 OffsetNumber; #define FirstOffsetNumber ((OffsetNumber) 1) #define MaxOffsetNumber ((OffsetNumber) (BLCKSZ / sizeof(ItemIdData))) +/* + * If a t_ctid contains InplaceCanaryOffsetNumber, it's a special ctid + * signifying that the tuple received a heap_inplace_update(). This is + * expected only in system catalogs, though extensions can use it elsewhere. + * (Offsets greater than MaxOffsetNumber are otherwise unattested.) The + * BlockNumber acts as a counter to distinguish multiple inplace updates. It + * starts at MaxBlockNumber, counts down to 0, and wraps back to + * MaxBlockNumber after 4B inplace updates. (Counting upward or other ways + * would be fine, but this choice maximizes these special TIDs looking + * different from regular TIDs.) + */ +#define InplaceCanaryOffsetNumber (InvalidOffsetNumber - 1) + /* ---------------- * support macros * ---------------- diff --git a/src/test/isolation/expected/intra-grant-inplace.out b/src/test/isolation/expected/intra-grant-inplace.out new file mode 100644 index 0000000..e7404e8 --- /dev/null +++ b/src/test/isolation/expected/intra-grant-inplace.out @@ -0,0 +1,29 @@ +Parsed test spec with 2 sessions + +starting permutation: b1 g1 s2 a2 s2 c1 s2 +step b1: BEGIN ISOLATION LEVEL READ COMMITTED; +step g1: GRANT SELECT ON intra_grant_inplace TO PUBLIC; +step s2: SELECT relhasindex FROM pg_class + WHERE oid = 'intra_grant_inplace'::regclass; +relhasindex +----------- +f +(1 row) + +step a2: ALTER TABLE intra_grant_inplace ADD PRIMARY KEY (c); +ERROR: tuple concurrently updated +step s2: SELECT relhasindex FROM pg_class + WHERE oid = 'intra_grant_inplace'::regclass; +relhasindex +----------- +f +(1 row) + +step c1: COMMIT; +step s2: SELECT relhasindex FROM pg_class + WHERE oid = 'intra_grant_inplace'::regclass; +relhasindex +----------- +f +(1 row) + diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index b2be88e..fcc4ad4 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -86,6 +86,7 @@ test: alter-table-3 test: alter-table-4 test: create-trigger test: sequence-ddl +test: intra-grant-inplace test: async-notify test: vacuum-no-cleanup-lock test: timeouts diff --git a/src/test/isolation/specs/intra-grant-inplace.spec b/src/test/isolation/specs/intra-grant-inplace.spec new file mode 100644 index 0000000..b5720c5 --- /dev/null +++ b/src/test/isolation/specs/intra-grant-inplace.spec @@ -0,0 +1,29 @@ +# GRANT's lock is the catalog tuple xmax. GRANT doesn't acquire a heavyweight +# lock on the object undergoing an ACL change. In-place updates, such as +# relhasindex=true, need special code to cope. +# +# FIXME this is an isolation test on the assumption that I'll change +# heap_inplace_update() to wait for the GRANT to commit or abort. If I don't +# do that, this could be a non-isolation test using dblink(). + +setup +{ + CREATE TABLE intra_grant_inplace (c int); +} + +teardown +{ + DROP TABLE intra_grant_inplace; +} + +session s1 +step b1 { BEGIN ISOLATION LEVEL READ COMMITTED; } +step g1 { GRANT SELECT ON intra_grant_inplace TO PUBLIC; } +step c1 { COMMIT; } + +session s2 +step s2 { SELECT relhasindex FROM pg_class + WHERE oid = 'intra_grant_inplace'::regclass; } +step a2 { ALTER TABLE intra_grant_inplace ADD PRIMARY KEY (c); } + +permutation b1 g1 s2 a2 s2 c1 s2