From d5eb3d326d83e2ca47c17552fcc6fd27b6b98d4e Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyhalov@postgrespro.ru>
Date: Tue, 18 Jun 2024 15:56:18 +0300
Subject: [PATCH] MergeAppend could consider using sorted best path.

This helps when index with suitable pathkeys is not
good for filtering data.
---
 .../postgres_fdw/expected/postgres_fdw.out    |  6 +-
 src/backend/optimizer/path/allpaths.c         | 21 +++++
 src/test/regress/expected/inherit.out         | 45 +++++++++-
 src/test/regress/expected/partition_join.out  | 87 +++++++++++--------
 src/test/regress/expected/union.out           |  6 +-
 src/test/regress/sql/inherit.sql              | 17 ++++
 6 files changed, 141 insertions(+), 41 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index ea566d50341..687591e4a97 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -10074,13 +10074,15 @@ SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a
    ->  Nested Loop
          Join Filter: (t1.a = t2.b)
          ->  Append
-               ->  Foreign Scan on ftprt1_p1 t1_1
+               ->  Sort
+                     Sort Key: t1_1.a
+                     ->  Foreign Scan on ftprt1_p1 t1_1
                ->  Foreign Scan on ftprt1_p2 t1_2
          ->  Materialize
                ->  Append
                      ->  Foreign Scan on ftprt2_p1 t2_1
                      ->  Foreign Scan on ftprt2_p2 t2_2
-(10 rows)
+(12 rows)
 
 SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
   a  |  b  
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 4895cee9944..827bc469269 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -1845,6 +1845,27 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
 				/* Assert we do have an unparameterized path for this child */
 				Assert(cheapest_total->param_info == NULL);
 			}
+			else
+			{
+				/*
+				 * Even if we found necessary pathkeys, using unsorted path
+				 * can be more efficient.
+				 */
+				Path		sort_path;
+
+				cost_sort(&sort_path,
+						  root,
+						  pathkeys,
+						  childrel->cheapest_total_path->total_cost,
+						  childrel->cheapest_total_path->rows,
+						  childrel->cheapest_total_path->pathtarget->width,
+						  0.0,
+						  work_mem,
+						  -1.0 /* need all tuples to sort them */ );
+
+				if (compare_path_costs(&sort_path, cheapest_total, TOTAL_COST) < 0)
+					cheapest_total = childrel->cheapest_total_path;
+			}
 
 			/*
 			 * When building a fractional path, determine a cheapest
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index ad732134148..16e78c8d2ff 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -1555,6 +1555,7 @@ insert into matest2 (name) values ('Test 4');
 insert into matest3 (name) values ('Test 5');
 insert into matest3 (name) values ('Test 6');
 set enable_indexscan = off;  -- force use of seqscan/sort, so no merge
+set enable_sort = off; -- avoid sorting below MergeAppend
 explain (verbose, costs off) select * from matest0 order by 1-id;
                          QUERY PLAN                         
 ------------------------------------------------------------
@@ -1608,6 +1609,7 @@ select min(1-id) from matest0;
 (1 row)
 
 reset enable_indexscan;
+reset enable_sort;
 set enable_seqscan = off;  -- plan with fewest seqscans should be merge
 set enable_parallel_append = off; -- Don't let parallel-append interfere
 explain (verbose, costs off) select * from matest0 order by 1-id;
@@ -1702,7 +1704,9 @@ order by t1.b limit 10;
          Merge Cond: (t1.b = t2.b)
          ->  Merge Append
                Sort Key: t1.b
-               ->  Index Scan using matest0i on matest0 t1_1
+               ->  Sort
+                     Sort Key: t1_1.b
+                     ->  Seq Scan on matest0 t1_1
                ->  Index Scan using matest1i on matest1 t1_2
          ->  Materialize
                ->  Merge Append
@@ -1711,7 +1715,7 @@ order by t1.b limit 10;
                            Filter: (c = d)
                      ->  Index Scan using matest1i on matest1 t2_2
                            Filter: (c = d)
-(14 rows)
+(16 rows)
 
 reset enable_nestloop;
 drop table matest0 cascade;
@@ -2663,6 +2667,43 @@ explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c;
 
 reset enable_bitmapscan;
 drop table mcrparted;
+-- Check that sort path can be used by MergeAppend even when there are suitable pathkeys
+create table hash_parted (i int, j int, k int) partition by hash(i);
+create table hash_parted_1 partition of hash_parted for values with (modulus 4, remainder 0);
+create table hash_parted_2 partition of hash_parted for values with (modulus 4, remainder 1);
+create table hash_parted_3 partition of hash_parted for values with (modulus 4, remainder 2);
+create table hash_parted_4 partition of hash_parted for values with (modulus 4, remainder 3);
+--create table hash_parted_5 partition of hash_parted for values with (modulus 6, remainder 4);
+--create table hash_parted_6 partition of hash_parted for values with (modulus 6, remainder 5);
+create index on hash_parted(j);
+create index on hash_parted(k);
+insert into hash_parted select i, i, i from generate_series(1,10000) i;
+analyze hash_parted;
+explain (costs off) select * from hash_parted where k<100 order by j limit 100;
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Limit
+   ->  Merge Append
+         Sort Key: hash_parted.j
+         ->  Sort
+               Sort Key: hash_parted_1.j
+               ->  Index Scan using hash_parted_1_k_idx on hash_parted_1
+                     Index Cond: (k < 100)
+         ->  Sort
+               Sort Key: hash_parted_2.j
+               ->  Index Scan using hash_parted_2_k_idx on hash_parted_2
+                     Index Cond: (k < 100)
+         ->  Sort
+               Sort Key: hash_parted_3.j
+               ->  Index Scan using hash_parted_3_k_idx on hash_parted_3
+                     Index Cond: (k < 100)
+         ->  Sort
+               Sort Key: hash_parted_4.j
+               ->  Index Scan using hash_parted_4_k_idx on hash_parted_4
+                     Index Cond: (k < 100)
+(19 rows)
+
+drop table hash_parted;
 -- Ensure LIST partitions allow an Append to be used instead of a MergeAppend
 create table bool_lp (b bool) partition by list(b);
 create table bool_lp_true partition of bool_lp for values in(true);
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 6d07f86b9bc..80d480d33d5 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -1309,28 +1309,32 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
 -- This should generate a partitionwise join, but currently fails to
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
-                        QUERY PLAN                         
------------------------------------------------------------
- Incremental Sort
+                           QUERY PLAN                            
+-----------------------------------------------------------------
+ Sort
    Sort Key: prt1.a, prt2.b
-   Presorted Key: prt1.a
-   ->  Merge Left Join
-         Merge Cond: (prt1.a = prt2.b)
-         ->  Sort
-               Sort Key: prt1.a
-               ->  Append
-                     ->  Seq Scan on prt1_p1 prt1_1
-                           Filter: ((a < 450) AND (b = 0))
-                     ->  Seq Scan on prt1_p2 prt1_2
-                           Filter: ((a < 450) AND (b = 0))
-         ->  Sort
-               Sort Key: prt2.b
-               ->  Append
+   ->  Merge Right Join
+         Merge Cond: (prt2.b = prt1.a)
+         ->  Append
+               ->  Sort
+                     Sort Key: prt2_1.b
                      ->  Seq Scan on prt2_p2 prt2_1
                            Filter: (b > 250)
+               ->  Sort
+                     Sort Key: prt2_2.b
                      ->  Seq Scan on prt2_p3 prt2_2
                            Filter: (b > 250)
-(19 rows)
+         ->  Materialize
+               ->  Append
+                     ->  Sort
+                           Sort Key: prt1_1.a
+                           ->  Seq Scan on prt1_p1 prt1_1
+                                 Filter: ((a < 450) AND (b = 0))
+                     ->  Sort
+                           Sort Key: prt1_2.a
+                           ->  Seq Scan on prt1_p2 prt1_2
+                                 Filter: ((a < 450) AND (b = 0))
+(23 rows)
 
 SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
   a  |  b  
@@ -1350,25 +1354,33 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT *
 -- partitionwise join does not apply
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
  Merge Join
-   Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text)))
-   ->  Sort
-         Sort Key: t1.a, ((((t1.*)::prt1))::text)
-         ->  Result
-               ->  Append
-                     ->  Seq Scan on prt1_p1 t1_1
-                     ->  Seq Scan on prt1_p2 t1_2
-                     ->  Seq Scan on prt1_p3 t1_3
-   ->  Sort
-         Sort Key: t2.b, ((((t2.*)::prt2))::text)
-         ->  Result
-               ->  Append
+   Merge Cond: (t1.a = t2.b)
+   Join Filter: ((((t2.*)::prt2))::text = (((t1.*)::prt1))::text)
+   ->  Append
+         ->  Sort
+               Sort Key: t1_1.a
+               ->  Seq Scan on prt1_p1 t1_1
+         ->  Sort
+               Sort Key: t1_2.a
+               ->  Seq Scan on prt1_p2 t1_2
+         ->  Sort
+               Sort Key: t1_3.a
+               ->  Seq Scan on prt1_p3 t1_3
+   ->  Materialize
+         ->  Append
+               ->  Sort
+                     Sort Key: t2_1.b
                      ->  Seq Scan on prt2_p1 t2_1
+               ->  Sort
+                     Sort Key: t2_2.b
                      ->  Seq Scan on prt2_p2 t2_2
+               ->  Sort
+                     Sort Key: t2_3.b
                      ->  Seq Scan on prt2_p3 t2_3
-(16 rows)
+(24 rows)
 
 SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
  a  | b  
@@ -4906,21 +4918,26 @@ EXPLAIN (COSTS OFF)
 SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b) WHERE t1.b >= 125 AND t1.b < 225 ORDER BY t1.a, t1.b;
                              QUERY PLAN                             
 --------------------------------------------------------------------
- Sort
+ Merge Append
    Sort Key: t1.a, t1.b
-   ->  Append
+   ->  Sort
+         Sort Key: t1_1.a, t1_1.b
          ->  Hash Join
                Hash Cond: ((t1_1.a = t2_1.a) AND (t1_1.b = t2_1.b))
                ->  Seq Scan on alpha_neg_p1 t1_1
                      Filter: ((b >= 125) AND (b < 225))
                ->  Hash
                      ->  Seq Scan on beta_neg_p1 t2_1
+   ->  Sort
+         Sort Key: t1_2.a, t1_2.b
          ->  Hash Join
                Hash Cond: ((t2_2.a = t1_2.a) AND (t2_2.b = t1_2.b))
                ->  Seq Scan on beta_neg_p2 t2_2
                ->  Hash
                      ->  Seq Scan on alpha_neg_p2 t1_2
                            Filter: ((b >= 125) AND (b < 225))
+   ->  Sort
+         Sort Key: t1_4.a, t1_4.b
          ->  Hash Join
                Hash Cond: ((t2_4.a = t1_4.a) AND (t2_4.b = t1_4.b))
                ->  Append
@@ -4935,7 +4952,7 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
                                  Filter: ((b >= 125) AND (b < 225))
                            ->  Seq Scan on alpha_pos_p3 t1_6
                                  Filter: ((b >= 125) AND (b < 225))
-(29 rows)
+(34 rows)
 
 SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b) WHERE t1.b >= 125 AND t1.b < 225 ORDER BY t1.a, t1.b;
  a  |  b  |  c   | a  |  b  |  c   
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 0fd0e1c38b3..4c1c173d8e6 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1195,12 +1195,14 @@ select event_id
 ----------------------------------------------------------
  Merge Append
    Sort Key: events.event_id
-   ->  Index Scan using events_pkey on events
+   ->  Sort
+         Sort Key: events.event_id
+         ->  Seq Scan on events
    ->  Sort
          Sort Key: events_1.event_id
          ->  Seq Scan on events_child events_1
    ->  Index Scan using other_events_pkey on other_events
-(7 rows)
+(9 rows)
 
 drop table events_child, events, other_events;
 reset enable_indexonlyscan;
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index e3bcfdb181e..5331e49283f 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -586,11 +586,13 @@ insert into matest3 (name) values ('Test 5');
 insert into matest3 (name) values ('Test 6');
 
 set enable_indexscan = off;  -- force use of seqscan/sort, so no merge
+set enable_sort = off; -- avoid sorting below MergeAppend
 explain (verbose, costs off) select * from matest0 order by 1-id;
 select * from matest0 order by 1-id;
 explain (verbose, costs off) select min(1-id) from matest0;
 select min(1-id) from matest0;
 reset enable_indexscan;
+reset enable_sort;
 
 set enable_seqscan = off;  -- plan with fewest seqscans should be merge
 set enable_parallel_append = off; -- Don't let parallel-append interfere
@@ -976,6 +978,21 @@ reset enable_bitmapscan;
 
 drop table mcrparted;
 
+-- Check that sort path can be used by MergeAppend even when there are suitable pathkeys
+create table hash_parted (i int, j int, k int) partition by hash(i);
+create table hash_parted_1 partition of hash_parted for values with (modulus 4, remainder 0);
+create table hash_parted_2 partition of hash_parted for values with (modulus 4, remainder 1);
+create table hash_parted_3 partition of hash_parted for values with (modulus 4, remainder 2);
+create table hash_parted_4 partition of hash_parted for values with (modulus 4, remainder 3);
+--create table hash_parted_5 partition of hash_parted for values with (modulus 6, remainder 4);
+--create table hash_parted_6 partition of hash_parted for values with (modulus 6, remainder 5);
+create index on hash_parted(j);
+create index on hash_parted(k);
+insert into hash_parted select i, i, i from generate_series(1,10000) i;
+analyze hash_parted;
+explain (costs off) select * from hash_parted where k<100 order by j limit 100;
+drop table hash_parted;
+
 -- Ensure LIST partitions allow an Append to be used instead of a MergeAppend
 create table bool_lp (b bool) partition by list(b);
 create table bool_lp_true partition of bool_lp for values in(true);
-- 
2.34.1

