Author: Noah Misch Commit: Noah Misch Move SendSharedInvalidMessages() into the COMMIT critical section. If a backend commits transactional DDL without making associated invals global, other backends may, for example, perform I/O on the wrong relfilenode. This change doesn't raise or lower protection for inplace updates or ON COMMIT DELETE ROWS. Back-patch to v12 (all supported versions). PGXN has no AtEOXact_Inval() references, and there's no known use case for calling it from an extension. Hence, drop it from all branches. Reviewed by FIXME. Discussion: https://postgr.es/m/20240523000548.58.nmisch@google.com diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index bf451d4..a142868 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -1630,7 +1630,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * * Relcache init file invalidation requires processing both before and * after we send the SI messages, only when committing. See - * AtEOXact_Inval(). + * PreCommit_Inval() and AtCommit_Inval(). */ if (isCommit) { diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index ee389fc..6f0f80c 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1358,14 +1358,25 @@ RecordTransactionCommit(void) /* * Transactions without an assigned xid can contain invalidation - * messages (e.g. explicit relcache invalidations or catcache - * invalidations for inplace updates); standbys need to process those. - * We can't emit a commit record without an xid, and we don't want to - * force assigning an xid, because that'd be problematic for e.g. - * vacuum. Hence we emit a bespoke record for the invalidations. We - * don't want to use that in case a commit record is emitted, so they - * happen synchronously with commits (besides not wanting to emit more - * WAL records). + * messages. Inplace updates do so, and standbys need to process + * those invals. We can't emit a commit record without an xid, and we + * don't want to force assigning an xid, because that'd be problematic + * for e.g. vacuum. Hence we emit a bespoke record for the + * invalidations. We don't want to use that in case a commit record is + * emitted, so they happen synchronously with commits (besides not + * wanting to emit more WAL records). + * + * XXX Every known use of this capability is a defect. Since an XID + * isn't controlling visibility of the change that prompted invals, + * other sessions need the inval even if this transactions aborts. + * + * ON COMMIT DELETE ROWS does a nontransactional index_build(), which + * queues a relcache inval. Standbys don't need those invals, but + * we've not done the work to withhold them. ON COMMIT DELETE ROWS + * can't cope with an error in index_build(), which is more likely + * than an error here. ON COMMIT DELETE ROWS would need a deeper + * redesign to become safe against arbitrary errors. Meanwhile, the + * damage from this is limited to temp tables of one session. */ if (nmsgs != 0) { @@ -1373,6 +1384,9 @@ RecordTransactionCommit(void) RelcacheInitFileInval); wrote_xlog = true; /* not strictly necessary */ } + START_CRIT_SECTION(); + AtCommit_Inval(); + END_CRIT_SECTION(); /* * If we didn't create XLOG entries, we're done here; otherwise we @@ -1510,13 +1524,15 @@ RecordTransactionCommit(void) TransactionIdAsyncCommitTree(xid, nchildren, children, XactLastRecEnd); } - /* - * If we entered a commit critical section, leave it now, and let - * checkpoints proceed. - */ + /* If we entered a commit critical section, finish it. */ if (markXidCommitted) { + /* Let checkpoints proceed. */ MyProc->delayChkptFlags &= ~DELAY_CHKPT_START; + + /* Make catalog changes visible to all backends. */ + AtCommit_Inval(); + END_CRIT_SECTION(); } @@ -2271,6 +2287,15 @@ CommitTransaction(void) AtEOXact_LargeObject(true); /* + * Unlink relcache init files as needed. If unlinking, acquire + * RelCacheInitLock until after commit. Do this before + * PreCommit_Notify(), since notify concurrency is more important than + * DDL/connection establishment concurrency. Due to that LWLock, don't + * move this before I/O-heavy smgrDoPendingSyncs(). + */ + PreCommit_Inval(); + + /* * Insert notifications sent by NOTIFY commands into the queue. This * should be late in the pre-commit sequence to minimize time spent * holding the notify-insertion lock. However, this could result in @@ -2368,14 +2393,6 @@ CommitTransaction(void) /* Clean up the relation cache */ AtEOXact_RelationCache(true); - /* - * Make catalog changes visible to all backends. This has to happen - * before locks are released (if anyone is waiting for lock on a relation - * we've modified, we want them to know about the catalog change before - * they start using the relation). - */ - AtEOXact_Inval(true); - AtEOXact_MultiXact(); ResourceOwnerRelease(TopTransactionResourceOwner, @@ -2906,7 +2923,7 @@ AbortTransaction(void) false, true); AtEOXact_Buffers(false); AtEOXact_RelationCache(false); - AtEOXact_Inval(false); + AtAbort_Inval(); AtEOXact_MultiXact(); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_LOCKS, diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 603aa41..0772314 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -612,6 +612,8 @@ PrepareInvalidationState(void) { TransInvalidationInfo *myInfo; + Assert(IsTransactionState()); + if (transInvalInfo != NULL && transInvalInfo->my_level == GetCurrentTransactionNestLevel()) return; @@ -862,14 +864,14 @@ AcceptInvalidationMessages(void) void PostPrepare_Inval(void) { - AtEOXact_Inval(false); + AtAbort_Inval(); } /* * xactGetCommittedInvalidationMessages() is called by * RecordTransactionCommit() to collect invalidation messages to add to the * commit record. This applies only to commit message types, never to - * abort records. Must always run before AtEOXact_Inval(), since that + * abort records. Must always run before AtCommit_Inval(), since that * removes the data we need to see. * * Remember that this runs before we have officially committed, so we @@ -908,7 +910,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, * Collect all the pending messages into a single contiguous array of * invalidation messages, to simplify what needs to happen while building * the commit WAL message. Maintain the order that they would be - * processed in by AtEOXact_Inval(), to ensure emulated behaviour in redo + * processed in by AtCommit_Inval(), to ensure emulated behaviour in redo * is as similar as possible to original. We want the same bugs, if any, * not new ones. */ @@ -955,7 +957,8 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, * only at end-of-xact. * * Relcache init file invalidation requires processing both - * before and after we send the SI messages. See AtEOXact_Inval() + * before and after we send the SI messages. See PreCommit_Inval() + * and AtCommit_Inval(). */ void ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, @@ -998,63 +1001,99 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, } /* - * AtEOXact_Inval - * Process queued-up invalidation messages at end of main transaction. - * - * If isCommit, we must send out the messages in our PriorCmdInvalidMsgs list - * to the shared invalidation message queue. Note that these will be read - * not only by other backends, but also by our own backend at the next - * transaction start (via AcceptInvalidationMessages). This means that - * we can skip immediate local processing of anything that's still in - * CurrentCmdInvalidMsgs, and just send that list out too. - * - * If not isCommit, we are aborting, and must locally process the messages - * in PriorCmdInvalidMsgs. No messages need be sent to other backends, - * since they'll not have seen our changed tuples anyway. We can forget - * about CurrentCmdInvalidMsgs too, since those changes haven't touched - * the caches yet. + * PreCommit_Inval + * Process queued-up invalidation before commit of main transaction. + * + * Call this after the last pre-commit action that could queue transactional + * invalidations. (Direct SendSharedInvalidMessages() remains fine.) Call + * this after any expensive processing, because we may hold an LWLock from + * here till end of xact. + * + * Tasks belong here if they are safe even if the xact later aborts. + * Currently, this just unlinks a file, failure of which does abort the xact. + */ +void +PreCommit_Inval(void) +{ + /* This is a separate function just to run before the critical section. */ + Assert(CritSectionCount == 0); + + if (transInvalInfo == NULL) + return; + + /* Must be at top of stack */ + Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL); + + /* + * Relcache init file invalidation requires processing both before and + * after we send the SI messages. + */ + if (transInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePreInvalidate(); +} + +/* + * AtCommit_Inval + * Process queued-up invalidations after commit of main transaction. + * + * Call this after TransactionIdCommitTree(), so consumers of the + * invalidations find any new rows. Call it before locks are released (if + * anyone is waiting for lock on a relation we've modified, we want them to + * know about the catalog change before they start using the relation). + * + * We must send out the messages in our PriorCmdInvalidMsgs list to the shared + * invalidation message queue. Note that these will be read not only by other + * backends, but also by our own backend at the next transaction start (via + * AcceptInvalidationMessages). This means that we can skip immediate local + * processing of anything that's still in CurrentCmdInvalidMsgs, and just send + * that list out too. * * In any case, reset our state to empty. We need not physically * free memory here, since TopTransactionContext is about to be emptied * anyway. + */ +void +AtCommit_Inval(void) +{ + /* PANIC rather than let other backends use stale cache entries. */ + Assert(CritSectionCount > 0); + + if (transInvalInfo == NULL) + return; + + AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, + &transInvalInfo->CurrentCmdInvalidMsgs); + + ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs, + SendSharedInvalidMessages); + + if (transInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePostInvalidate(); + + /* Need not free anything explicitly */ + transInvalInfo = NULL; +} + +/* + * AtAbort_Inval + * Process queued-up invalidation messages at abort of main transaction. * - * Note: - * This should be called as the last step in processing a transaction. + * We must locally process the messages in PriorCmdInvalidMsgs. No messages + * need be sent to other backends, since they'll not have seen our changed + * tuples anyway. We can forget about CurrentCmdInvalidMsgs too, since those + * changes haven't touched the caches yet. */ void -AtEOXact_Inval(bool isCommit) +AtAbort_Inval(void) { - /* Quick exit if no messages */ if (transInvalInfo == NULL) return; /* Must be at top of stack */ Assert(transInvalInfo->my_level == 1 && transInvalInfo->parent == NULL); - if (isCommit) - { - /* - * Relcache init file invalidation requires processing both before and - * after we send the SI messages. However, we need not do anything - * unless we committed. - */ - if (transInvalInfo->RelcacheInitFileInval) - RelationCacheInitFilePreInvalidate(); - - AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); - - ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs, - SendSharedInvalidMessages); - - if (transInvalInfo->RelcacheInitFileInval) - RelationCacheInitFilePostInvalidate(); - } - else - { - ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, - LocalExecuteInvalidationMessage); - } + ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, + LocalExecuteInvalidationMessage); /* Need not free anything explicitly */ transInvalInfo = NULL; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 50c9440..8147cf6 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -352,7 +352,7 @@ SearchSysCacheLocked1(int cacheId, /* * If an inplace update just finished, ensure we process the syscache * inval. XXX this is insufficient: the inplace updater may not yet - * have reached AtEOXact_Inval(). See test at inplace-inval.spec. + * have reached AtCommit_Inval(). See test at inplace-inval.spec. * * If a heap_update() call just released its LOCKTAG_TUPLE, we'll * probably find the old tuple and reach "tuple concurrently updated". diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 24695fa..94d8b44 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -26,7 +26,9 @@ typedef void (*RelcacheCallbackFunction) (Datum arg, Oid relid); extern void AcceptInvalidationMessages(void); -extern void AtEOXact_Inval(bool isCommit); +extern void PreCommit_Inval(void); +extern void AtCommit_Inval(void); +extern void AtAbort_Inval(void); extern void AtEOSubXact_Inval(bool isCommit);