From 21677861e101eed22c829e0b14290a56786a12c2 Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 22 Nov 2023 12:01:39 +0700 Subject: [PATCH] Use an index path with the best selectivity estimation --- src/backend/optimizer/util/pathnode.c | 40 +++++++++++++++++ .../expected/drop-index-concurrently-1.out | 16 ++++--- src/test/regress/expected/functional_deps.out | 43 +++++++++++++++++++ src/test/regress/expected/join.out | 40 +++++++++-------- src/test/regress/sql/functional_deps.sql | 36 ++++++++++++++++ 5 files changed, 149 insertions(+), 26 deletions(-) diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 0b1d17b9d3..32aca83d67 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -454,6 +454,46 @@ add_path(RelOptInfo *parent_rel, Path *new_path) costcmp = compare_path_costs_fuzzily(new_path, old_path, STD_FUZZ_FACTOR); + /* + * Apply some heuristics on index paths. + */ + if (IsA(new_path, IndexPath) && IsA(old_path, IndexPath)) + { + IndexPath *inp = (IndexPath *) new_path; + IndexPath *iop = (IndexPath *) old_path; + + if (new_path->rows <= 1.0 && old_path->rows <= 1.0) + { + if (inp->indexinfo->unique && !iop->indexinfo->unique) + /* + * When both paths are predicted to produce only one tuple, + * the optimiser should prefer choosing a unique index scan + * in all cases. + */ + costcmp = COSTS_BETTER1; + else if (costcmp != COSTS_DIFFERENT) + /* + * If the optimiser doesn't have an obviously stable choice + * of unique index, increase the chance of avoiding mistakes + * by choosing an index with smaller selectivity. + * This option makes decision more conservative and looks + * debatable. + */ + costcmp = (inp->indexselectivity < iop->indexselectivity) ? + COSTS_BETTER1 : COSTS_BETTER2; + } + else if (costcmp == COSTS_EQUAL) + /* + * The optimizer can't differ the value of two index paths. + * To avoid making a decision that is based on only an index + * order in the list, use some rational strategy based on + * selectivity: prefer touching fewer tuples on the disk to + * filtering them after. + */ + costcmp = (inp->indexselectivity < iop->indexselectivity) ? + COSTS_BETTER1 : COSTS_BETTER2; + } + /* * If the two paths compare differently for startup and total cost, * then we want to keep both, and we can skip comparing pathkeys and diff --git a/src/test/isolation/expected/drop-index-concurrently-1.out b/src/test/isolation/expected/drop-index-concurrently-1.out index 1cb2250891..2392cdb033 100644 --- a/src/test/isolation/expected/drop-index-concurrently-1.out +++ b/src/test/isolation/expected/drop-index-concurrently-1.out @@ -12,13 +12,15 @@ step preps: PREPARE getrow_seqscan AS SELECT * FROM test_dc WHERE data = 34 ORDE step begin: BEGIN; step disableseq: SET enable_seqscan = false; step explaini: EXPLAIN (COSTS OFF) EXECUTE getrow_idxscan; -QUERY PLAN ----------------------------------------------- -Sort - Sort Key: id - -> Index Scan using test_dc_data on test_dc - Index Cond: (data = 34) -(4 rows) +QUERY PLAN +--------------------------------------------- +Sort + Sort Key: id + -> Bitmap Heap Scan on test_dc + Recheck Cond: (data = 34) + -> Bitmap Index Scan on test_dc_data + Index Cond: (data = 34) +(6 rows) step enableseq: SET enable_seqscan = true; step explains: EXPLAIN (COSTS OFF) EXECUTE getrow_seqscan; diff --git a/src/test/regress/expected/functional_deps.out b/src/test/regress/expected/functional_deps.out index 32381b8ae7..61af99a041 100644 --- a/src/test/regress/expected/functional_deps.out +++ b/src/test/regress/expected/functional_deps.out @@ -230,3 +230,46 @@ EXECUTE foo; ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; EXECUTE foo; -- fail ERROR: column "articles.keywords" must appear in the GROUP BY clause or be used in an aggregate function +/* + * Corner cases of PostgreSQL optimizer: + * + * Correlations between columns aren't found by DBMS. + * Selectivities multiplication of many columns increases total selectivity + * error. If such non-true selectivity is so small, that rows estimation + * give us absolute minimum (1) then the optimizer can't choose between different + * indexes and uses first from the index list (last created). + */ +\set scale 100000 +CREATE TABLE t AS ( + SELECT c1 AS c1, -- selectivity(c1)*selectivity(c2)*nrows <= 1 + c1 AS c2, + -- Create two columns with different selectivity. + (c1 % 2) AS c3, -- missing from a good index. + (c1 % 4) AS c4 -- missing from a bad index. + FROM generate_series(1,:scale) AS c1 +); +UPDATE t SET c1=1,c2=1,c3=1,c4=2 WHERE c1<:scale/1000; +CREATE INDEX bad ON t (c1,c2,c3); +CREATE INDEX good ON t (c1,c2,c4); +ANALYZE t; -- update stat on the indexes. +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE c1=1 AND c2=1 AND c3=1 AND c4=1; + QUERY PLAN +---------------------------------------------------- + Index Scan using good on t + Index Cond: ((c1 = 1) AND (c2 = 1) AND (c4 = 1)) + Filter: (c3 = 1) +(3 rows) + +-- Set the bad index to the first position in the index list. +DROP INDEX bad; +CREATE INDEX bad ON t (c1,c2,c3); +ANALYZE t; +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE c1=1 AND c2=1 AND c3=1 AND c4=1; + QUERY PLAN +---------------------------------------------------- + Index Scan using good on t + Index Cond: ((c1 = 1) AND (c2 = 1) AND (c4 = 1)) + Filter: (c3 = 1) +(3 rows) + +DROP TABLE t CASCADE; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 2c73270143..32b33fabd3 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -8629,14 +8629,15 @@ analyze j2; explain (costs off) select * from j1 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1; - QUERY PLAN ------------------------------------------ + QUERY PLAN +--------------------------------------------------------- Merge Join - Merge Cond: (j1.id1 = j2.id1) - Join Filter: (j2.id2 = j1.id2) - -> Index Scan using j1_id1_idx on j1 - -> Index Scan using j2_id1_idx on j2 -(5 rows) + Merge Cond: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Index Only Scan using j1_pkey on j1 + Filter: ((id1 % 1000) = 1) + -> Index Only Scan using j2_pkey on j2 + Filter: ((id1 % 1000) = 1) +(6 rows) select * from j1 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 @@ -8651,15 +8652,16 @@ where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1; explain (costs off) select * from j1 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 = any (array[1]); - QUERY PLAN ----------------------------------------------------- + QUERY PLAN +--------------------------------------------------------- Merge Join - Merge Cond: (j1.id1 = j2.id1) - Join Filter: (j2.id2 = j1.id2) - -> Index Scan using j1_id1_idx on j1 - -> Index Scan using j2_id1_idx on j2 + Merge Cond: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Index Only Scan using j1_pkey on j1 + Filter: ((id1 % 1000) = 1) + -> Index Only Scan using j2_pkey on j2 Index Cond: (id1 = ANY ('{1}'::integer[])) -(6 rows) + Filter: ((id1 % 1000) = 1) +(7 rows) select * from j1 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 @@ -8674,12 +8676,12 @@ where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 = any (array[1]); explain (costs off) select * from j1 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1 and j2.id1 >= any (array[1,5]); - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------- Merge Join - Merge Cond: (j1.id1 = j2.id1) - Join Filter: (j2.id2 = j1.id2) - -> Index Scan using j1_id1_idx on j1 + Merge Cond: ((j1.id1 = j2.id1) AND (j1.id2 = j2.id2)) + -> Index Only Scan using j1_pkey on j1 + Filter: ((id1 % 1000) = 1) -> Index Only Scan using j2_pkey on j2 Index Cond: (id1 >= ANY ('{1,5}'::integer[])) Filter: ((id1 % 1000) = 1) diff --git a/src/test/regress/sql/functional_deps.sql b/src/test/regress/sql/functional_deps.sql index 406490b995..f617ee9269 100644 --- a/src/test/regress/sql/functional_deps.sql +++ b/src/test/regress/sql/functional_deps.sql @@ -208,3 +208,39 @@ EXECUTE foo; ALTER TABLE articles DROP CONSTRAINT articles_pkey RESTRICT; EXECUTE foo; -- fail + +/* + * Corner cases of PostgreSQL optimizer: + * + * Correlations between columns aren't found by DBMS. + * Selectivities multiplication of many columns increases total selectivity + * error. If such non-true selectivity is so small, that rows estimation + * give us absolute minimum (1) then the optimizer can't choose between different + * indexes and uses first from the index list (last created). + */ +\set scale 100000 + +CREATE TABLE t AS ( + SELECT c1 AS c1, -- selectivity(c1)*selectivity(c2)*nrows <= 1 + c1 AS c2, + -- Create two columns with different selectivity. + (c1 % 2) AS c3, -- missing from a good index. + (c1 % 4) AS c4 -- missing from a bad index. + FROM generate_series(1,:scale) AS c1 +); +UPDATE t SET c1=1,c2=1,c3=1,c4=2 WHERE c1<:scale/1000; + +CREATE INDEX bad ON t (c1,c2,c3); +CREATE INDEX good ON t (c1,c2,c4); +ANALYZE t; -- update stat on the indexes. + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE c1=1 AND c2=1 AND c3=1 AND c4=1; + +-- Set the bad index to the first position in the index list. +DROP INDEX bad; +CREATE INDEX bad ON t (c1,c2,c3); +ANALYZE t; + +EXPLAIN (COSTS OFF) SELECT * FROM t WHERE c1=1 AND c2=1 AND c3=1 AND c4=1; + +DROP TABLE t CASCADE; -- 2.43.0