diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 4bb22fe..11678fc 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -24,7 +24,9 @@ #include "miscadmin.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "postmaster/autovacuum.h" #include "storage/indexfsm.h" +#include "storage/lmgr.h" /* GUC parameter */ int gin_pending_list_limit = 0; @@ -499,11 +501,8 @@ ginHeapTupleFastCollect(GinState *ginstate, * If newHead == InvalidBlockNumber then function drops the whole list. * * metapage is pinned and exclusive-locked throughout this function. - * - * Returns true if another cleanup process is running concurrently - * (if so, we can just abandon our own efforts) */ -static bool +static void shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, bool fill_fsm, IndexBulkDeleteResult *stats) { @@ -534,14 +533,7 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, data.ndeleted++; - if (GinPageIsDeleted(page)) - { - /* concurrent cleanup process is detected */ - for (i = 0; i < data.ndeleted; i++) - UnlockReleaseBuffer(buffers[i]); - - return true; - } + Assert(!GinPageIsDeleted(page)); nDeletedHeapTuples += GinPageGetOpaque(page)->maxoff; blknoToDelete = GinPageGetOpaque(page)->rightlink; @@ -617,8 +609,6 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead, RecordFreeIndexPage(index, freespace[i]); } while (blknoToDelete != newHead); - - return false; } /* Initialize empty KeyArray */ @@ -719,18 +709,10 @@ processPendingPage(BuildAccumulator *accum, KeyArray *ka, /* * Move tuples from pending pages into regular GIN structure. * - * This can be called concurrently by multiple backends, so it must cope. - * On first glance it looks completely not concurrent-safe and not crash-safe - * either. The reason it's okay is that multiple insertion of the same entry - * is detected and treated as a no-op by gininsert.c. If we crash after - * posting entries to the main index and before removing them from the + * On first glance it looks completely not crash-safe. But if we crash + * after posting entries to the main index and before removing them from the * pending list, it's okay because when we redo the posting later on, nothing - * bad will happen. Likewise, if two backends simultaneously try to post - * a pending entry into the main index, one will succeed and one will do - * nothing. We try to notice when someone else is a little bit ahead of - * us in the process, but that's just to avoid wasting cycles. Only the - * action of removing a page from the pending list really needs exclusive - * lock. + * bad will happen. * * vac_delay indicates that ginInsertCleanup should call * vacuum_delay_point() periodically. @@ -758,6 +740,38 @@ ginInsertCleanup(GinState *ginstate, KeyArray datums; BlockNumber blkno; bool fsm_vac = false; + Size workMemory; + + /* + * We would like to prevent concurrent cleanup process. For + * that we will lock metapage in exclusive mode using LockPage() + * call. Nobody other will use that lock for metapage, so + * we keep possibility of concurrent insertion into pending list + */ + + elog(WARNING,"ginInsertCleanup IsVacuum:%d IsAuto:%d", vac_delay, IsAutoVacuumWorkerProcess()); + if (vac_delay) + { + /* + * We are called from [auto]vacuum/analyze and we would + * like to wait concurrent cleanup to finish + */ + LockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); + workMemory = + (IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1) ? + autovacuum_work_mem : maintenance_work_mem; + } + else + { + /* + * We are called from regular insert and if we see + * concurrent cleanup just exit in hope that concurrent + * process will clean up pending list. + */ + if (!ConditionalLockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock)) + return; + workMemory = work_mem; + } metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); LockBuffer(metabuffer, GIN_SHARE); @@ -768,6 +782,7 @@ ginInsertCleanup(GinState *ginstate, { /* Nothing to do */ UnlockReleaseBuffer(metabuffer); + UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); return; } @@ -803,13 +818,7 @@ ginInsertCleanup(GinState *ginstate, */ for (;;) { - if (GinPageIsDeleted(page)) - { - /* another cleanup process is running concurrently */ - UnlockReleaseBuffer(buffer); - fsm_vac = false; - break; - } + Assert(!GinPageIsDeleted(page)); /* * read page's datums into accum @@ -828,7 +837,7 @@ ginInsertCleanup(GinState *ginstate, */ if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || (GinPageHasFullRow(page) && - (accum.allocatedMemory >= (Size)maintenance_work_mem * 1024L))) + (accum.allocatedMemory >= workMemory * 1024L))) { ItemPointerData *list; uint32 nlist; @@ -865,14 +874,7 @@ ginInsertCleanup(GinState *ginstate, LockBuffer(metabuffer, GIN_EXCLUSIVE); LockBuffer(buffer, GIN_SHARE); - if (GinPageIsDeleted(page)) - { - /* another cleanup process is running concurrently */ - UnlockReleaseBuffer(buffer); - LockBuffer(metabuffer, GIN_UNLOCK); - fsm_vac = false; - break; - } + Assert(!GinPageIsDeleted(page)); /* * While we left the page unlocked, more stuff might have gotten @@ -905,13 +907,7 @@ ginInsertCleanup(GinState *ginstate, * remove read pages from pending list, at this point all * content of read pages is in regular structure */ - if (shiftList(index, metabuffer, blkno, fill_fsm, stats)) - { - /* another cleanup process is running concurrently */ - LockBuffer(metabuffer, GIN_UNLOCK); - fsm_vac = false; - break; - } + shiftList(index, metabuffer, blkno, fill_fsm, stats); /* At this point, some pending pages have been freed up */ fsm_vac = true; @@ -923,7 +919,17 @@ ginInsertCleanup(GinState *ginstate, * if we removed the whole pending list just exit */ if (blkno == InvalidBlockNumber) + { + UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); break; + } + + /* + * If we are called from regular insert, give a chance to + * [auto]vacuum/analyze continues our work + */ + if (vac_delay == false) + UnlockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock); /* * release memory used so far and reinit state @@ -931,6 +937,21 @@ ginInsertCleanup(GinState *ginstate, MemoryContextReset(opCtx); initKeyArray(&datums, datums.maxvalues); ginInitBA(&accum); + + if (vac_delay == false && + !ConditionalLockPage(index, GIN_METAPAGE_BLKNO, ExclusiveLock)) + { + /* + * We are called from regular insert, but concurrent + * clean catches a lock while we keep metapage unlocked. + * We believe, that it's a [auto]vacuum/analyze, not a + * cleanup called from regular insert, because + * cleanup called from vacuum waits a a lock while + * regular one just try once with conditional lock + */ + break; + } + } else { @@ -957,7 +978,6 @@ ginInsertCleanup(GinState *ginstate, if (fsm_vac && fill_fsm) IndexFreeSpaceMapVacuum(index); - /* Clean up temporary space */ MemoryContextSwitchTo(oldCtx); MemoryContextDelete(opCtx);