Re: MergeAppend could consider sorting cheapest child path

From: Bruce Momjian <bruce(at)momjian(dot)us>
To: Alexander Pyhalov <a(dot)pyhalov(at)postgrespro(dot)ru>
Cc: PostgreSQL Hackers <pgsql-hackers(at)lists(dot)postgresql(dot)org>
Subject: Re: MergeAppend could consider sorting cheapest child path
Date: 2024-10-17 00:03:09
Message-ID: ZxBUPQA3fPSmBPV7@momjian.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers


Is this still being considered?

---------------------------------------------------------------------------

On Tue, Jun 18, 2024 at 07:45:09PM +0300, Alexander Pyhalov wrote:
> Hi.
>
> Now when planner finds suitable pathkeys in generate_orderedappend_paths(),
> it uses them, even if explicit sort of the cheapest child path could be more
> efficient.
>
> We encountered this issue on partitioned table with two indexes, where one
> is suitable for sorting, and another is good for selecting data. MergeAppend
> was generated
> with subpaths doing index scan on suitably ordered index and filtering a lot
> of data.
> The suggested fix allows MergeAppend to consider sorting on cheapest
> childrel total path as an alternative.
>
> --
> Best regards,
> Alexander Pyhalov,
> Postgres Professional

> From d5eb3d326d83e2ca47c17552fcc6fd27b6b98d4e Mon Sep 17 00:00:00 2001
> From: Alexander Pyhalov <a(dot)pyhalov(at)postgrespro(dot)ru>
> Date: Tue, 18 Jun 2024 15:56:18 +0300
> Subject: [PATCH] MergeAppend could consider using sorted best path.
>
> This helps when index with suitable pathkeys is not
> good for filtering data.
> ---
> .../postgres_fdw/expected/postgres_fdw.out | 6 +-
> src/backend/optimizer/path/allpaths.c | 21 +++++
> src/test/regress/expected/inherit.out | 45 +++++++++-
> src/test/regress/expected/partition_join.out | 87 +++++++++++--------
> src/test/regress/expected/union.out | 6 +-
> src/test/regress/sql/inherit.sql | 17 ++++
> 6 files changed, 141 insertions(+), 41 deletions(-)
>
> diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
> index ea566d50341..687591e4a97 100644
> --- a/contrib/postgres_fdw/expected/postgres_fdw.out
> +++ b/contrib/postgres_fdw/expected/postgres_fdw.out
> @@ -10074,13 +10074,15 @@ SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a
> -> Nested Loop
> Join Filter: (t1.a = t2.b)
> -> Append
> - -> Foreign Scan on ftprt1_p1 t1_1
> + -> Sort
> + Sort Key: t1_1.a
> + -> Foreign Scan on ftprt1_p1 t1_1
> -> Foreign Scan on ftprt1_p2 t1_2
> -> Materialize
> -> Append
> -> Foreign Scan on ftprt2_p1 t2_1
> -> Foreign Scan on ftprt2_p2 t2_2
> -(10 rows)
> +(12 rows)
>
> SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
> a | b
> diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
> index 4895cee9944..827bc469269 100644
> --- a/src/backend/optimizer/path/allpaths.c
> +++ b/src/backend/optimizer/path/allpaths.c
> @@ -1845,6 +1845,27 @@ generate_orderedappend_paths(PlannerInfo *root, RelOptInfo *rel,
> /* Assert we do have an unparameterized path for this child */
> Assert(cheapest_total->param_info == NULL);
> }
> + else
> + {
> + /*
> + * Even if we found necessary pathkeys, using unsorted path
> + * can be more efficient.
> + */
> + Path sort_path;
> +
> + cost_sort(&sort_path,
> + root,
> + pathkeys,
> + childrel->cheapest_total_path->total_cost,
> + childrel->cheapest_total_path->rows,
> + childrel->cheapest_total_path->pathtarget->width,
> + 0.0,
> + work_mem,
> + -1.0 /* need all tuples to sort them */ );
> +
> + if (compare_path_costs(&sort_path, cheapest_total, TOTAL_COST) < 0)
> + cheapest_total = childrel->cheapest_total_path;
> + }
>
> /*
> * When building a fractional path, determine a cheapest
> diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
> index ad732134148..16e78c8d2ff 100644
> --- a/src/test/regress/expected/inherit.out
> +++ b/src/test/regress/expected/inherit.out
> @@ -1555,6 +1555,7 @@ insert into matest2 (name) values ('Test 4');
> insert into matest3 (name) values ('Test 5');
> insert into matest3 (name) values ('Test 6');
> set enable_indexscan = off; -- force use of seqscan/sort, so no merge
> +set enable_sort = off; -- avoid sorting below MergeAppend
> explain (verbose, costs off) select * from matest0 order by 1-id;
> QUERY PLAN
> ------------------------------------------------------------
> @@ -1608,6 +1609,7 @@ select min(1-id) from matest0;
> (1 row)
>
> reset enable_indexscan;
> +reset enable_sort;
> set enable_seqscan = off; -- plan with fewest seqscans should be merge
> set enable_parallel_append = off; -- Don't let parallel-append interfere
> explain (verbose, costs off) select * from matest0 order by 1-id;
> @@ -1702,7 +1704,9 @@ order by t1.b limit 10;
> Merge Cond: (t1.b = t2.b)
> -> Merge Append
> Sort Key: t1.b
> - -> Index Scan using matest0i on matest0 t1_1
> + -> Sort
> + Sort Key: t1_1.b
> + -> Seq Scan on matest0 t1_1
> -> Index Scan using matest1i on matest1 t1_2
> -> Materialize
> -> Merge Append
> @@ -1711,7 +1715,7 @@ order by t1.b limit 10;
> Filter: (c = d)
> -> Index Scan using matest1i on matest1 t2_2
> Filter: (c = d)
> -(14 rows)
> +(16 rows)
>
> reset enable_nestloop;
> drop table matest0 cascade;
> @@ -2663,6 +2667,43 @@ explain (costs off) select * from mcrparted where a = 10 order by a, abs(b), c;
>
> reset enable_bitmapscan;
> drop table mcrparted;
> +-- Check that sort path can be used by MergeAppend even when there are suitable pathkeys
> +create table hash_parted (i int, j int, k int) partition by hash(i);
> +create table hash_parted_1 partition of hash_parted for values with (modulus 4, remainder 0);
> +create table hash_parted_2 partition of hash_parted for values with (modulus 4, remainder 1);
> +create table hash_parted_3 partition of hash_parted for values with (modulus 4, remainder 2);
> +create table hash_parted_4 partition of hash_parted for values with (modulus 4, remainder 3);
> +--create table hash_parted_5 partition of hash_parted for values with (modulus 6, remainder 4);
> +--create table hash_parted_6 partition of hash_parted for values with (modulus 6, remainder 5);
> +create index on hash_parted(j);
> +create index on hash_parted(k);
> +insert into hash_parted select i, i, i from generate_series(1,10000) i;
> +analyze hash_parted;
> +explain (costs off) select * from hash_parted where k<100 order by j limit 100;
> + QUERY PLAN
> +-------------------------------------------------------------------------
> + Limit
> + -> Merge Append
> + Sort Key: hash_parted.j
> + -> Sort
> + Sort Key: hash_parted_1.j
> + -> Index Scan using hash_parted_1_k_idx on hash_parted_1
> + Index Cond: (k < 100)
> + -> Sort
> + Sort Key: hash_parted_2.j
> + -> Index Scan using hash_parted_2_k_idx on hash_parted_2
> + Index Cond: (k < 100)
> + -> Sort
> + Sort Key: hash_parted_3.j
> + -> Index Scan using hash_parted_3_k_idx on hash_parted_3
> + Index Cond: (k < 100)
> + -> Sort
> + Sort Key: hash_parted_4.j
> + -> Index Scan using hash_parted_4_k_idx on hash_parted_4
> + Index Cond: (k < 100)
> +(19 rows)
> +
> +drop table hash_parted;
> -- Ensure LIST partitions allow an Append to be used instead of a MergeAppend
> create table bool_lp (b bool) partition by list(b);
> create table bool_lp_true partition of bool_lp for values in(true);
> diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
> index 6d07f86b9bc..80d480d33d5 100644
> --- a/src/test/regress/expected/partition_join.out
> +++ b/src/test/regress/expected/partition_join.out
> @@ -1309,28 +1309,32 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
> -- This should generate a partitionwise join, but currently fails to
> EXPLAIN (COSTS OFF)
> SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
> - QUERY PLAN
> ------------------------------------------------------------
> - Incremental Sort
> + QUERY PLAN
> +-----------------------------------------------------------------
> + Sort
> Sort Key: prt1.a, prt2.b
> - Presorted Key: prt1.a
> - -> Merge Left Join
> - Merge Cond: (prt1.a = prt2.b)
> - -> Sort
> - Sort Key: prt1.a
> - -> Append
> - -> Seq Scan on prt1_p1 prt1_1
> - Filter: ((a < 450) AND (b = 0))
> - -> Seq Scan on prt1_p2 prt1_2
> - Filter: ((a < 450) AND (b = 0))
> - -> Sort
> - Sort Key: prt2.b
> - -> Append
> + -> Merge Right Join
> + Merge Cond: (prt2.b = prt1.a)
> + -> Append
> + -> Sort
> + Sort Key: prt2_1.b
> -> Seq Scan on prt2_p2 prt2_1
> Filter: (b > 250)
> + -> Sort
> + Sort Key: prt2_2.b
> -> Seq Scan on prt2_p3 prt2_2
> Filter: (b > 250)
> -(19 rows)
> + -> Materialize
> + -> Append
> + -> Sort
> + Sort Key: prt1_1.a
> + -> Seq Scan on prt1_p1 prt1_1
> + Filter: ((a < 450) AND (b = 0))
> + -> Sort
> + Sort Key: prt1_2.a
> + -> Seq Scan on prt1_p2 prt1_2
> + Filter: ((a < 450) AND (b = 0))
> +(23 rows)
>
> SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
> a | b
> @@ -1350,25 +1354,33 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT *
> -- partitionwise join does not apply
> EXPLAIN (COSTS OFF)
> SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
> - QUERY PLAN
> ------------------------------------------------------------------------------------------
> + QUERY PLAN
> +------------------------------------------------------------------
> Merge Join
> - Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text)))
> - -> Sort
> - Sort Key: t1.a, ((((t1.*)::prt1))::text)
> - -> Result
> - -> Append
> - -> Seq Scan on prt1_p1 t1_1
> - -> Seq Scan on prt1_p2 t1_2
> - -> Seq Scan on prt1_p3 t1_3
> - -> Sort
> - Sort Key: t2.b, ((((t2.*)::prt2))::text)
> - -> Result
> - -> Append
> + Merge Cond: (t1.a = t2.b)
> + Join Filter: ((((t2.*)::prt2))::text = (((t1.*)::prt1))::text)
> + -> Append
> + -> Sort
> + Sort Key: t1_1.a
> + -> Seq Scan on prt1_p1 t1_1
> + -> Sort
> + Sort Key: t1_2.a
> + -> Seq Scan on prt1_p2 t1_2
> + -> Sort
> + Sort Key: t1_3.a
> + -> Seq Scan on prt1_p3 t1_3
> + -> Materialize
> + -> Append
> + -> Sort
> + Sort Key: t2_1.b
> -> Seq Scan on prt2_p1 t2_1
> + -> Sort
> + Sort Key: t2_2.b
> -> Seq Scan on prt2_p2 t2_2
> + -> Sort
> + Sort Key: t2_3.b
> -> Seq Scan on prt2_p3 t2_3
> -(16 rows)
> +(24 rows)
>
> SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
> a | b
> @@ -4906,21 +4918,26 @@ EXPLAIN (COSTS OFF)
> SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b) WHERE t1.b >= 125 AND t1.b < 225 ORDER BY t1.a, t1.b;
> QUERY PLAN
> --------------------------------------------------------------------
> - Sort
> + Merge Append
> Sort Key: t1.a, t1.b
> - -> Append
> + -> Sort
> + Sort Key: t1_1.a, t1_1.b
> -> Hash Join
> Hash Cond: ((t1_1.a = t2_1.a) AND (t1_1.b = t2_1.b))
> -> Seq Scan on alpha_neg_p1 t1_1
> Filter: ((b >= 125) AND (b < 225))
> -> Hash
> -> Seq Scan on beta_neg_p1 t2_1
> + -> Sort
> + Sort Key: t1_2.a, t1_2.b
> -> Hash Join
> Hash Cond: ((t2_2.a = t1_2.a) AND (t2_2.b = t1_2.b))
> -> Seq Scan on beta_neg_p2 t2_2
> -> Hash
> -> Seq Scan on alpha_neg_p2 t1_2
> Filter: ((b >= 125) AND (b < 225))
> + -> Sort
> + Sort Key: t1_4.a, t1_4.b
> -> Hash Join
> Hash Cond: ((t2_4.a = t1_4.a) AND (t2_4.b = t1_4.b))
> -> Append
> @@ -4935,7 +4952,7 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
> Filter: ((b >= 125) AND (b < 225))
> -> Seq Scan on alpha_pos_p3 t1_6
> Filter: ((b >= 125) AND (b < 225))
> -(29 rows)
> +(34 rows)
>
> SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b) WHERE t1.b >= 125 AND t1.b < 225 ORDER BY t1.a, t1.b;
> a | b | c | a | b | c
> diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
> index 0fd0e1c38b3..4c1c173d8e6 100644
> --- a/src/test/regress/expected/union.out
> +++ b/src/test/regress/expected/union.out
> @@ -1195,12 +1195,14 @@ select event_id
> ----------------------------------------------------------
> Merge Append
> Sort Key: events.event_id
> - -> Index Scan using events_pkey on events
> + -> Sort
> + Sort Key: events.event_id
> + -> Seq Scan on events
> -> Sort
> Sort Key: events_1.event_id
> -> Seq Scan on events_child events_1
> -> Index Scan using other_events_pkey on other_events
> -(7 rows)
> +(9 rows)
>
> drop table events_child, events, other_events;
> reset enable_indexonlyscan;
> diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
> index e3bcfdb181e..5331e49283f 100644
> --- a/src/test/regress/sql/inherit.sql
> +++ b/src/test/regress/sql/inherit.sql
> @@ -586,11 +586,13 @@ insert into matest3 (name) values ('Test 5');
> insert into matest3 (name) values ('Test 6');
>
> set enable_indexscan = off; -- force use of seqscan/sort, so no merge
> +set enable_sort = off; -- avoid sorting below MergeAppend
> explain (verbose, costs off) select * from matest0 order by 1-id;
> select * from matest0 order by 1-id;
> explain (verbose, costs off) select min(1-id) from matest0;
> select min(1-id) from matest0;
> reset enable_indexscan;
> +reset enable_sort;
>
> set enable_seqscan = off; -- plan with fewest seqscans should be merge
> set enable_parallel_append = off; -- Don't let parallel-append interfere
> @@ -976,6 +978,21 @@ reset enable_bitmapscan;
>
> drop table mcrparted;
>
> +-- Check that sort path can be used by MergeAppend even when there are suitable pathkeys
> +create table hash_parted (i int, j int, k int) partition by hash(i);
> +create table hash_parted_1 partition of hash_parted for values with (modulus 4, remainder 0);
> +create table hash_parted_2 partition of hash_parted for values with (modulus 4, remainder 1);
> +create table hash_parted_3 partition of hash_parted for values with (modulus 4, remainder 2);
> +create table hash_parted_4 partition of hash_parted for values with (modulus 4, remainder 3);
> +--create table hash_parted_5 partition of hash_parted for values with (modulus 6, remainder 4);
> +--create table hash_parted_6 partition of hash_parted for values with (modulus 6, remainder 5);
> +create index on hash_parted(j);
> +create index on hash_parted(k);
> +insert into hash_parted select i, i, i from generate_series(1,10000) i;
> +analyze hash_parted;
> +explain (costs off) select * from hash_parted where k<100 order by j limit 100;
> +drop table hash_parted;
> +
> -- Ensure LIST partitions allow an Append to be used instead of a MergeAppend
> create table bool_lp (b bool) partition by list(b);
> create table bool_lp_true partition of bool_lp for values in(true);
> --
> 2.34.1
>

--
Bruce Momjian <bruce(at)momjian(dot)us> https://momjian.us
EDB https://enterprisedb.com

When a patient asks the doctor, "Am I going to die?", he means
"Am I going to die soon?"

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Andy Fan 2024-10-17 00:05:41 Re: Considering fractional paths in Append node
Previous Message Bruce Momjian 2024-10-16 23:59:31 Re: minor doc issue in 9.16.2.1.1. Boolean Predicate Check Expressions