From 07739e5af83664b67ea02d3db7a461a106d74040 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Mon, 2 Dec 2024 15:59:35 +0100 Subject: [PATCH v1] Disable BitmapScan's skip_fetch optimization The optimization doesn't take concurrent vacuum's removal of TIDs into account, which can remove dead TIDs and make ALL_VISIBLE pages for which we have pointers to those dead TIDs in the bitmap. The optimization itself isn't impossible, but would require more work figuring out that vacuum started removing TIDs and then stop using the optimization. However, we currently don't have such a system in place, making the optimization unsound to keep around. Reported-By: Konstantin Knizhnik Backpatch-through: 13 --- src/include/access/heapam.h | 9 +++-- src/include/access/tableam.h | 3 +- src/backend/access/heap/heapam.c | 13 ------- src/backend/access/heap/heapam_handler.c | 28 --------------- src/backend/executor/nodeBitmapHeapscan.c | 42 ++--------------------- 5 files changed, 8 insertions(+), 87 deletions(-) diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 65999dd64e..42f28109e2 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -92,11 +92,10 @@ typedef struct HeapScanDescData ParallelBlockTableScanWorkerData *rs_parallelworkerdata; /* - * These fields are only used for bitmap scans for the "skip fetch" - * optimization. Bitmap scans needing no fields from the heap may skip - * fetching an all visible block, instead using the number of tuples per - * block reported by the bitmap to determine how many NULL-filled tuples - * to return. + * These fields were kept around to guarantee ABI stability, but are + * otherwise unused. They were only used for bitmap scans for the + * "skip fetch" optimization, which turned out to be unsound when the + * second phase of vacuum ran concurrently. */ Buffer rs_vmbuffer; int rs_empty_tuples_pending; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index da661289c1..5d54b0a18b 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -955,8 +955,7 @@ table_beginscan_bm(Relation rel, Snapshot snapshot, { uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE; - if (need_tuple) - flags |= SO_NEED_TUPLES; + flags |= SO_NEED_TUPLES; return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index bbe64b1e53..ca2dbb0b75 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -1188,19 +1188,6 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, if (BufferIsValid(scan->rs_cbuf)) ReleaseBuffer(scan->rs_cbuf); - if (BufferIsValid(scan->rs_vmbuffer)) - { - ReleaseBuffer(scan->rs_vmbuffer); - scan->rs_vmbuffer = InvalidBuffer; - } - - /* - * Reset rs_empty_tuples_pending, a field only used by bitmap heap scan, - * to avoid incorrectly emitting NULL-filled tuples from a previous scan - * on rescan. - */ - scan->rs_empty_tuples_pending = 0; - /* * The read stream is reset on rescan. This must be done before * initscan(), as some state referred to by read_stream_reset() is reset diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 6f8b1b7929..93980be98a 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2131,24 +2131,6 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, hscan->rs_cindex = 0; hscan->rs_ntuples = 0; - /* - * We can skip fetching the heap page if we don't need any fields from the - * heap, the bitmap entries don't need rechecking, and all tuples on the - * page are visible to our transaction. - */ - if (!(scan->rs_flags & SO_NEED_TUPLES) && - !tbmres->recheck && - VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer)) - { - /* can't be lossy in the skip_fetch case */ - Assert(tbmres->ntuples >= 0); - Assert(hscan->rs_empty_tuples_pending >= 0); - - hscan->rs_empty_tuples_pending += tbmres->ntuples; - - return true; - } - /* * Ignore any claimed entries past what we think is the end of the * relation. It may have been extended after the start of our scan (we @@ -2261,16 +2243,6 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan, Page page; ItemId lp; - if (hscan->rs_empty_tuples_pending > 0) - { - /* - * If we don't have to fetch the tuple, just return nulls. - */ - ExecStoreAllNullTuple(slot); - hscan->rs_empty_tuples_pending--; - return true; - } - /* * Out of range? If so, nothing more to look at on this page */ diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c index 6b48a6d835..82402c0ac0 100644 --- a/src/backend/executor/nodeBitmapHeapscan.c +++ b/src/backend/executor/nodeBitmapHeapscan.c @@ -185,24 +185,11 @@ BitmapHeapNext(BitmapHeapScanState *node) */ if (!scan) { - bool need_tuples = false; - - /* - * We can potentially skip fetching heap pages if we do not need - * any columns of the table, either for checking non-indexable - * quals or for returning data. This test is a bit simplistic, as - * it checks the stronger condition that there's no qual or return - * tlist at all. But in most cases it's probably not worth working - * harder than that. - */ - need_tuples = (node->ss.ps.plan->qual != NIL || - node->ss.ps.plan->targetlist != NIL); - scan = table_beginscan_bm(node->ss.ss_currentRelation, node->ss.ps.state->es_snapshot, 0, NULL, - need_tuples); + true); node->ss.ss_currentScanDesc = scan; } @@ -459,7 +446,6 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) while (node->prefetch_pages < node->prefetch_target) { TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); - bool skip_fetch; if (tbmpre == NULL) { @@ -470,20 +456,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) } node->prefetch_pages++; - /* - * If we expect not to have to actually read this heap page, - * skip this prefetch call, but continue to run the prefetch - * logic normally. (Would it be better not to increment - * prefetch_pages?) - */ - skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) && - !tbmpre->recheck && - VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre->blockno, - &node->pvmbuffer)); - - if (!skip_fetch) - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } @@ -500,7 +473,6 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) { TBMIterateResult *tbmpre; bool do_prefetch = false; - bool skip_fetch; /* * Recheck under the mutex. If some other process has already @@ -526,15 +498,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) break; } - /* As above, skip prefetch if we expect not to need page */ - skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) && - !tbmpre->recheck && - VM_ALL_VISIBLE(node->ss.ss_currentRelation, - tbmpre->blockno, - &node->pvmbuffer)); - - if (!skip_fetch) - PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); + PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); } } } -- 2.45.2