From 02df30cf9f1520688a0b474f7ecd106ba004b201 Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Sat, 4 May 2024 13:05:45 +0700 Subject: [PATCH] Introduce the Asymmetric Join (AJ) feature. Asymmetric Join is an addition to the partitionwise join strategy: it allows the optimiser when it sees join of plain table with partitioned one to attempt joining the table with each of partitions separately. Here, as usual, we have some pro and cons: * Smaller JOIN can survive memory allocations or data skews * Different strategies for different pairs may be more effective. * It may allow to prune more partitions and with partition constrain select smaller volume of data. * But of course, here we have increasing complexity of planning. To implement this feature we add consider_asymmetric_join into the RelOptInfo structure. It must be true iff consider_partitionwise_join is false. Implementation follows the logic of partitionwise_join. We disallow any partitioned relations in the inner of AJ. The key point here is to resolve the problem with different ways of achieving RelOptInfo which can need different number of part_rels. One annoying feature of the implementation is some sort of asymmetry introduced: we have dependency on the order of incoming relations. If AJ was approved for a joinrel, possible PWJ for this joinrel with different combination of inner and outer will be declined. New GUC enable_asymmetric_join allows to disable this feature, if necessary. A lot of regression tests were changed or newly created. TODO: 1. Here we doesn't resolved Ashutosh Bapat's warning on scanning of one RangeEntryEntry in different parts of the plan. PostgreSQL used to implicitly assume that each leaf RangeTable (and RelOptInfo) corresponds only one scan node. For example, such partition pruning code is designed with such assumption. An qual evaluation code also references index of scan node. After analysis We didn't found any problems which can be caused by this inconsistency. Unfortunately, it doesn't guarantee any issues in the future. 2. We should discover on the partition pruning more. May a join order change impact pruning outcome and how it can influence partitioning schema? --- .../postgres_fdw/expected/postgres_fdw.out | 211 +++-- contrib/postgres_fdw/sql/postgres_fdw.sql | 9 + src/backend/optimizer/path/allpaths.c | 7 +- src/backend/optimizer/path/costsize.c | 1 + src/backend/optimizer/path/joinrels.c | 222 ++++- src/backend/optimizer/plan/createplan.c | 5 +- src/backend/optimizer/plan/setrefs.c | 17 +- src/backend/optimizer/util/appendinfo.c | 13 +- src/backend/optimizer/util/pathnode.c | 151 +++- src/backend/optimizer/util/relnode.c | 192 ++++- src/backend/utils/misc/guc_tables.c | 10 + src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/nodes/pathnodes.h | 2 + src/include/optimizer/cost.h | 1 + src/include/optimizer/pathnode.h | 6 +- src/test/regress/expected/partition_join.out | 810 ++++++++++++++++++ src/test/regress/expected/partition_prune.out | 440 +++++++--- src/test/regress/expected/sysviews.out | 3 +- src/test/regress/sql/partition_join.sql | 276 ++++++ 19 files changed, 2138 insertions(+), 239 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 078b8a966f..5fc1f57f72 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -10070,6 +10070,60 @@ SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a 400 | 400 (4 rows) +-- Apply AJ to foreign tables +EXPLAIN (COSTS OFF) +SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2; + QUERY PLAN +--------------------------------------------------------------- + Merge Append + Sort Key: t1.a + -> Foreign Scan + Relations: (ftprt1_p1 t1_1) INNER JOIN (ftprt2_p1 t2) + -> Foreign Scan + Relations: (ftprt1_p2 t1_2) INNER JOIN (ftprt2_p1 t2) +(6 rows) + +SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2; + a | b +-----+----- + 204 | 204 + 210 | 210 + 216 | 216 + 222 | 222 + 228 | 228 + 234 | 234 + 240 | 240 + 246 | 246 +(8 rows) + +-- FOR UPDATE requires whole-row reference, and so asymmetric join doesn't apply +EXPLAIN (COSTS OFF) +SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2 FOR UPDATE OF t2; + QUERY PLAN +-------------------------------------------------- + LockRows + -> Merge Join + Merge Cond: (t1.a = t2.b) + -> Append + -> Foreign Scan on ftprt1_p1 t1_1 + -> Foreign Scan on ftprt1_p2 t1_2 + -> Materialize + -> Foreign Scan on ftprt2_p1 t2 +(8 rows) + +SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2 FOR UPDATE OF t2; + a | b +-----+----- + 204 | 204 + 210 | 210 + 216 | 216 + 222 | 222 + 228 | 228 + 234 | 234 + 240 | 240 + 246 | 246 +(8 rows) + RESET enable_partitionwise_join; -- =================================================================== -- test partitionwise aggregates @@ -11338,25 +11392,32 @@ RESET enable_partitionwise_join; SET enable_hashjoin TO false; EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO join_tbl SELECT * FROM async_p1 t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0; - QUERY PLAN ----------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Insert on public.join_tbl - -> Nested Loop - Output: t1.a, t1.b, t1.c, t2.a, t2.b, t2.c - Join Filter: ((t1.a = t2.a) AND (t1.b = t2.b)) - -> Foreign Scan on public.async_p1 t1 - Output: t1.a, t1.b, t1.c - Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE (((b % 100) = 0)) - -> Append - -> Async Foreign Scan on public.async_p1 t2_1 - Output: t2_1.a, t2_1.b, t2_1.c - Remote SQL: SELECT a, b, c FROM public.base_tbl1 - -> Async Foreign Scan on public.async_p2 t2_2 + -> Append + -> Async Foreign Scan + Output: t1.a, t1.b, t1.c, t2_1.a, t2_1.b, t2_1.c + Relations: (public.async_p1 t2_1) INNER JOIN (public.async_p1 t1) + Remote SQL: SELECT r3.a, r3.b, r3.c, r5.a, r5.b, r5.c FROM (public.base_tbl1 r5 INNER JOIN public.base_tbl1 r3 ON (((r3.a = r5.a)) AND ((r3.b = r5.b)) AND (((r3.b % 100) = 0)))) + -> Nested Loop + Output: t1.a, t1.b, t1.c, t2_2.a, t2_2.b, t2_2.c + Join Filter: ((t1.a = t2_2.a) AND (t1.b = t2_2.b)) + -> Foreign Scan on public.async_p1 t1 + Output: t1.a, t1.b, t1.c + Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE (((b % 100) = 0)) + -> Foreign Scan on public.async_p2 t2_2 Output: t2_2.a, t2_2.b, t2_2.c Remote SQL: SELECT a, b, c FROM public.base_tbl2 + -> Nested Loop + Output: t1.a, t1.b, t1.c, t2_3.a, t2_3.b, t2_3.c + Join Filter: ((t1.a = t2_3.a) AND (t1.b = t2_3.b)) + -> Foreign Scan on public.async_p1 t1 + Output: t1.a, t1.b, t1.c + Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE (((b % 100) = 0)) -> Seq Scan on public.async_p3 t2_3 Output: t2_3.a, t2_3.b, t2_3.c -(16 rows) +(23 rows) INSERT INTO join_tbl SELECT * FROM async_p1 t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0; SELECT * FROM join_tbl ORDER BY a1; @@ -11464,39 +11525,61 @@ ALTER FOREIGN TABLE async_p1 OPTIONS (use_remote_estimate 'true'); ALTER FOREIGN TABLE async_p2 OPTIONS (use_remote_estimate 'true'); EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar'; - QUERY PLAN ------------------------------------------------------------------------------------------- - Nested Loop - Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt.a, async_pt.b, async_pt.c - -> Seq Scan on public.local_tbl - Output: local_tbl.a, local_tbl.b, local_tbl.c - Filter: (local_tbl.c = 'bar'::text) - -> Append - -> Async Foreign Scan on public.async_p1 async_pt_1 + QUERY PLAN +------------------------------------------------------------------------------------------------- + Append + -> Nested Loop + Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt_1.a, async_pt_1.b, async_pt_1.c + -> Seq Scan on public.local_tbl + Output: local_tbl.a, local_tbl.b, local_tbl.c + Filter: (local_tbl.c = 'bar'::text) + -> Foreign Scan on public.async_p1 async_pt_1 Output: async_pt_1.a, async_pt_1.b, async_pt_1.c Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((a = $1::integer)) - -> Async Foreign Scan on public.async_p2 async_pt_2 + -> Nested Loop + Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt_2.a, async_pt_2.b, async_pt_2.c + -> Seq Scan on public.local_tbl + Output: local_tbl.a, local_tbl.b, local_tbl.c + Filter: (local_tbl.c = 'bar'::text) + -> Foreign Scan on public.async_p2 async_pt_2 Output: async_pt_2.a, async_pt_2.b, async_pt_2.c Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((a = $1::integer)) + -> Hash Join + Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt_3.a, async_pt_3.b, async_pt_3.c + Hash Cond: (async_pt_3.a = local_tbl.a) -> Seq Scan on public.async_p3 async_pt_3 Output: async_pt_3.a, async_pt_3.b, async_pt_3.c - Filter: (async_pt_3.a = local_tbl.a) -(15 rows) + -> Hash + Output: local_tbl.a, local_tbl.b, local_tbl.c + -> Seq Scan on public.local_tbl + Output: local_tbl.a, local_tbl.b, local_tbl.c + Filter: (local_tbl.c = 'bar'::text) +(27 rows) EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar'; - QUERY PLAN -------------------------------------------------------------------------------- - Nested Loop (actual rows=1 loops=1) - -> Seq Scan on local_tbl (actual rows=1 loops=1) - Filter: (c = 'bar'::text) - Rows Removed by Filter: 1 - -> Append (actual rows=1 loops=1) - -> Async Foreign Scan on async_p1 async_pt_1 (never executed) - -> Async Foreign Scan on async_p2 async_pt_2 (actual rows=1 loops=1) - -> Seq Scan on async_p3 async_pt_3 (never executed) - Filter: (a = local_tbl.a) -(9 rows) + QUERY PLAN +------------------------------------------------------------------------- + Append (actual rows=1 loops=1) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on local_tbl (actual rows=1 loops=1) + Filter: (c = 'bar'::text) + Rows Removed by Filter: 1 + -> Foreign Scan on async_p1 async_pt_1 (actual rows=0 loops=1) + -> Nested Loop (actual rows=1 loops=1) + -> Seq Scan on local_tbl (actual rows=1 loops=1) + Filter: (c = 'bar'::text) + Rows Removed by Filter: 1 + -> Foreign Scan on async_p2 async_pt_2 (actual rows=1 loops=1) + -> Hash Join (actual rows=0 loops=1) + Hash Cond: (async_pt_3.a = local_tbl.a) + -> Seq Scan on async_p3 async_pt_3 (actual rows=200 loops=1) + -> Hash (actual rows=1 loops=1) + Buckets: 1024 Batches: 1 Memory Usage: 9kB + -> Seq Scan on local_tbl (actual rows=1 loops=1) + Filter: (c = 'bar'::text) + Rows Removed by Filter: 1 +(19 rows) SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar'; a | b | c | a | b | c @@ -11670,29 +11753,45 @@ SET enable_mergejoin TO false; SET enable_hashjoin TO false; EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM async_pt t1, async_p2 t2 WHERE t1.a = t2.a AND t1.b === 505; - QUERY PLAN ----------------------------------------------------------------- - Nested Loop - Output: t1.a, t1.b, t1.c, t2.a, t2.b, t2.c - Join Filter: (t1.a = t2.a) - -> Append - -> Async Foreign Scan on public.async_p1 t1_1 + QUERY PLAN +---------------------------------------------------------------------- + Append + -> Nested Loop + Output: t1_1.a, t1_1.b, t1_1.c, t2.a, t2.b, t2.c + Join Filter: (t1_1.a = t2.a) + -> Foreign Scan on public.async_p2 t2 + Output: t2.a, t2.b, t2.c + Remote SQL: SELECT a, b, c FROM public.base_tbl2 + -> Materialize Output: t1_1.a, t1_1.b, t1_1.c - Filter: (t1_1.b === 505) - Remote SQL: SELECT a, b, c FROM public.base_tbl1 - -> Async Foreign Scan on public.async_p2 t1_2 - Output: t1_2.a, t1_2.b, t1_2.c - Filter: (t1_2.b === 505) + -> Foreign Scan on public.async_p1 t1_1 + Output: t1_1.a, t1_1.b, t1_1.c + Filter: (t1_1.b === 505) + Remote SQL: SELECT a, b, c FROM public.base_tbl1 + -> Nested Loop + Output: t1_2.a, t1_2.b, t1_2.c, t2.a, t2.b, t2.c + Join Filter: (t1_2.a = t2.a) + -> Foreign Scan on public.async_p2 t2 + Output: t2.a, t2.b, t2.c Remote SQL: SELECT a, b, c FROM public.base_tbl2 - -> Seq Scan on public.async_p3 t1_3 - Output: t1_3.a, t1_3.b, t1_3.c - Filter: (t1_3.b === 505) - -> Materialize - Output: t2.a, t2.b, t2.c + -> Materialize + Output: t1_2.a, t1_2.b, t1_2.c + -> Foreign Scan on public.async_p2 t1_2 + Output: t1_2.a, t1_2.b, t1_2.c + Filter: (t1_2.b === 505) + Remote SQL: SELECT a, b, c FROM public.base_tbl2 + -> Nested Loop + Output: t1_3.a, t1_3.b, t1_3.c, t2.a, t2.b, t2.c + Join Filter: (t1_3.a = t2.a) -> Foreign Scan on public.async_p2 t2 Output: t2.a, t2.b, t2.c Remote SQL: SELECT a, b, c FROM public.base_tbl2 -(20 rows) + -> Materialize + Output: t1_3.a, t1_3.b, t1_3.c + -> Seq Scan on public.async_p3 t1_3 + Output: t1_3.a, t1_3.b, t1_3.c + Filter: (t1_3.b === 505) +(36 rows) SELECT * FROM async_pt t1, async_p2 t2 WHERE t1.a = t2.a AND t1.b === 505; a | b | c | a | b | c diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 09ba234e43..35e65bba2b 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -3139,6 +3139,15 @@ EXPLAIN (COSTS OFF) SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1; +-- Apply AJ to foreign tables +EXPLAIN (COSTS OFF) +SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2; +SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2; +-- FOR UPDATE requires whole-row reference, and so asymmetric join doesn't apply +EXPLAIN (COSTS OFF) +SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2 FOR UPDATE OF t2; +SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2 FOR UPDATE OF t2; + RESET enable_partitionwise_join; diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index cc51ae1757..e7c943bdf5 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -4315,8 +4315,11 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel) if (!IS_PARTITIONED_REL(rel)) return; - /* The relation should have consider_partitionwise_join set. */ - Assert(rel->consider_partitionwise_join); + /* + * The relation should have consider_partitionwise_join or + * consider_asymmetric_join set. + */ + Assert(rel->consider_partitionwise_join ^ rel->consider_asymmetric_join); /* Guard against stack overflow due to overly deep partition hierarchy. */ check_stack_depth(); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index ee23ed7835..cbe724a223 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -146,6 +146,7 @@ bool enable_mergejoin = true; bool enable_hashjoin = true; bool enable_gathermerge = true; bool enable_partitionwise_join = false; +bool enable_asymmetric_join = true; bool enable_partitionwise_aggregate = false; bool enable_parallel_append = true; bool enable_parallel_hash = true; diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index f3a9412d18..6af5e7c79f 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -42,6 +42,12 @@ static void try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo, List *parent_restrictlist); +static void try_asymmetric_partitionwise_join(PlannerInfo *root, + RelOptInfo *inner_rel, + RelOptInfo *prel, + RelOptInfo *joinrel, + SpecialJoinInfo *parent_sjinfo, + List *parent_restrictlist); static SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo, Relids left_relids, Relids right_relids); @@ -1044,6 +1050,13 @@ populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1, /* Apply partitionwise join technique, if possible. */ try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist); + + /* + * Consider joining inner relation to every leaf of a partitioned relation + */ + try_asymmetric_partitionwise_join(root, rel1, rel2, joinrel, + sjinfo, restrictlist); + } @@ -1491,7 +1504,8 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, check_stack_depth(); /* Nothing to do, if the join relation is not partitioned. */ - if (joinrel->part_scheme == NULL || joinrel->nparts == 0) + if (joinrel->part_scheme == NULL || joinrel->nparts == 0 || + !joinrel->consider_partitionwise_join) return; /* The join relation should have consider_partitionwise_join set. */ @@ -1507,8 +1521,8 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2)); /* The joining relations should have consider_partitionwise_join set. */ - Assert(rel1->consider_partitionwise_join && - rel2->consider_partitionwise_join); + Assert((rel1->consider_partitionwise_join || rel1->consider_asymmetric_join) && + (rel2->consider_partitionwise_join || rel2->consider_asymmetric_join)); /* * The partition scheme of the join relation should match that of the @@ -1681,6 +1695,208 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, } } +/* + * DEV NOTE: + * Remember to set joinrel->nparts to 0 in the case we reject this way. + */ +static void +try_asymmetric_partitionwise_join(PlannerInfo *root, + RelOptInfo *outer, + RelOptInfo *inner, + RelOptInfo *joinrel, + SpecialJoinInfo *parent_sjinfo, + List *parent_restrictlist) +{ + int cnt_parts; + RelOptInfo *inner_rel = inner; + RelOptInfo *prel = outer; + + /* Fast path if AJ is forbidden */ + if (!joinrel->consider_asymmetric_join) + return; + + Assert(joinrel->part_scheme != NULL); + + /* + * The caller will not juggle the inner and outer sides of the joinrel. + * Hence, we need to check reverse order here ifi direct order isn't + * acceptable one. + */ + if (!IS_PARTITIONED_REL(prel)) + { + if (parent_sjinfo->jointype != JOIN_INNER) + /* + * Can't change the order of inputs because left or anti join + * depend on the order. + */ + return; + + prel = inner; + inner_rel = outer; + } + + if (!IS_PARTITIONED_REL(prel)) + return; + + Assert(REL_HAS_ALL_PART_PROPS(prel)); + + /* + * Recheck AJ conditions. XXX: do we need just to store allowed inner/outer + * combinations in some sort of cache? + */ + if (!is_inner_rel_safe_for_asymmetric_join(root, inner_rel)) + return; + + /* + * Compute partition bounds. Remember, after that point we can't get out + * of this routine without paths population. + */ + if (joinrel->boundinfo == NULL) + { + Assert(joinrel->nparts == -1); + joinrel->boundinfo = prel->boundinfo; + joinrel->nparts = prel->nparts; + joinrel->part_rels = + (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * prel->nparts); + } + else + Assert(joinrel->nparts == prel->nparts && joinrel->part_rels != NULL); + + /* + * Create child-join relations for this asymmetric join, if those don't + * exist. Add paths to child-joins for a pair of child relations + * corresponding to the given pair of parent relations. + */ + for (cnt_parts = 0; cnt_parts < joinrel->nparts; cnt_parts++) + { + RelOptInfo *outer_child; + bool child_empty; + SpecialJoinInfo *child_sjinfo; + List *child_restrictlist; + RelOptInfo *child_joinrel; + AppendRelInfo **appinfos; + int nappinfos; + + outer_child = prel->part_rels[cnt_parts]; + + Assert(!IS_DUMMY_REL(inner_rel)); + child_empty = (outer_child == NULL || IS_DUMMY_REL(outer_child)); + + /* Remember, inner already can't be dummy at this point */ + switch (parent_sjinfo->jointype) + { + case JOIN_INNER: + case JOIN_SEMI: + case JOIN_LEFT: + case JOIN_ANTI: + if (child_empty) + continue; /* ignore this join segment */ + break; + default: + /* other values not expected here */ + elog(ERROR, "unrecognized join type: %d", + (int) parent_sjinfo->jointype); + break; + } + + if (outer_child == NULL) + { + /* + * Mark the joinrel as unpartitioned so that later functions treat + * it correctly. + * It is not obvious that we need return here. But we follow the + * commit 7ad6498 and just reduce risk of fault for quite rare case. + * Anyway, it can be fixed in the future. + */ + joinrel->nparts = 0; + return; + } + + /* + * XXX: + * If we want to provide each child join with personal copy of inner + * relation - it is a good place to do. + * The main reason why we still don't do it - what if AJ had + * implemented over arbitrary inner subtree, not only a baserel? In that + * case we inevitable must copy whole subtree of RelOptInfos. But we + * don't have standard machinery to do this and even insight on how to + * do it with strong guarantees. + */ + + Assert(!bms_overlap(inner_rel->relids, outer_child->relids)); + + /* + * Construct SpecialJoinInfo from parent join relations's + * SpecialJoinInfo. + */ + child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo, + inner_rel->relids, + outer_child->relids); + + /* Find the AppendRelInfo structures */ + appinfos = find_appinfos_by_relids(root, + outer_child->relids, + &nappinfos); + + child_restrictlist = + (List *) adjust_appendrel_attrs(root, + (Node *) parent_restrictlist, + nappinfos, appinfos); + + if (joinrel->part_rels[cnt_parts] == NULL) + { + child_joinrel = build_child_join_rel(root, outer_child, inner_rel, + joinrel, child_restrictlist, + child_sjinfo); + joinrel->part_rels[cnt_parts] = child_joinrel; + joinrel->live_parts = bms_add_member(joinrel->live_parts, cnt_parts); + joinrel->all_partrels = bms_add_members(joinrel->all_partrels, + child_joinrel->relids); + } + else + child_joinrel = joinrel->part_rels[cnt_parts]; + +#ifdef USE_ASSERT_CHECKING + /* + * Check correctness of the adjustment here. It is much faster and + * easier to fix issues, detected here, than in setrefs.c module. + */ + { + Bitmapset *parent = bms_del_members(bms_copy(joinrel->relids), + child_joinrel->relids); + ListCell *lc; + + Assert(!bms_is_empty(parent)); + + foreach(lc, child_restrictlist) + { + RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc); + + if(bms_intersect(parent, rinfo->clause_relids)) + elog(ERROR, "Incorrect relids in the child join clause"); + } + } +#endif + Assert(bms_equal(child_joinrel->relids, + adjust_child_relids(joinrel->relids, + nappinfos, appinfos))); + + /* And make paths for the child join */ + populate_joinrel_with_paths(root, outer_child, inner_rel, + child_joinrel, child_sjinfo, + child_restrictlist); + + pfree(appinfos); + + /* + * free_child_join_sjinfo(child_sjinfo); + * We can't free it here, because min_righthand value isn't changed + * in the AJ optimisation yet - remember that the same copy of inner + * relation participates in each child join. + */ + } +} + /* * Construct the SpecialJoinInfo for a child-join by translating * SpecialJoinInfo for the join between parents. left_relids and right_relids diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 3b77886567..7264d1ebd3 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -4357,6 +4357,8 @@ create_nestloop_plan(PlannerInfo *root, Relids outerrelids; List *nestParams; Relids saveOuterRels = root->curOuterRels; + bool needFlatCopy = + is_asymmetric_join((Path *) best_path) ? true : false; /* * If the inner path is parameterized by the topmost parent of the outer @@ -4366,7 +4368,8 @@ create_nestloop_plan(PlannerInfo *root, best_path->jpath.innerjoinpath = reparameterize_path_by_child(root, best_path->jpath.innerjoinpath, - best_path->jpath.outerjoinpath->parent); + best_path->jpath.outerjoinpath->parent, + needFlatCopy); /* * Failure here probably means that reparameterize_path_by_child() is not diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 37abcb4701..dce6124abe 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -319,24 +319,29 @@ set_plan_references(PlannerInfo *root, Plan *plan) /* * Adjust RT indexes of AppendRelInfos and add to final appendrels list. - * We assume the AppendRelInfos were built during planning and don't need - * to be copied. + * The AppendRelInfos are copied, because as a part of a subplan they could + * be visited many times in the case of asymmetric join. */ foreach(lc, root->append_rel_list) { AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc); + AppendRelInfo *newappinfo; + + /* flat copy is enough since all valuable fields are scalars */ + newappinfo = (AppendRelInfo *) palloc(sizeof(AppendRelInfo)); + memcpy(newappinfo, appinfo, sizeof(AppendRelInfo)); /* adjust RT indexes */ - appinfo->parent_relid += rtoffset; - appinfo->child_relid += rtoffset; + newappinfo->parent_relid += rtoffset; + newappinfo->child_relid += rtoffset; /* * Rather than adjust the translated_vars entries, just drop 'em. * Neither the executor nor EXPLAIN currently need that data. */ - appinfo->translated_vars = NIL; + newappinfo->translated_vars = NIL; - glob->appendRelations = lappend(glob->appendRelations, appinfo); + glob->appendRelations = lappend(glob->appendRelations, newappinfo); } /* If needed, create workspace for processing AlternativeSubPlans */ diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index 6ba4eba224..3b08ea7131 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -202,8 +202,9 @@ adjust_appendrel_attrs(PlannerInfo *root, Node *node, int nappinfos, context.nappinfos = nappinfos; context.appinfos = appinfos; - /* If there's nothing to adjust, don't call this function. */ - Assert(nappinfos >= 1 && appinfos != NULL); + /* If there's nothing to adjust, just return a duplication */ + if (nappinfos == 0) + return copyObject(node); /* Should never be translating a Query tree. */ Assert(node == NULL || !IsA(node, Query)); @@ -746,13 +747,7 @@ find_appinfos_by_relids(PlannerInfo *root, Relids relids, int *nappinfos) AppendRelInfo *appinfo = root->append_rel_array[i]; if (!appinfo) - { - /* Probably i is an OJ index, but let's check */ - if (find_base_rel_ignore_join(root, i) == NULL) - continue; - /* It's a base rel, but we lack an append_rel_array entry */ - elog(ERROR, "child rel %d not found in append_rel_array", i); - } + continue; appinfos[cnt++] = appinfo; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 3cf1dac087..471e34109e 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -51,7 +51,8 @@ static int append_total_cost_compare(const ListCell *a, const ListCell *b); static int append_startup_cost_compare(const ListCell *a, const ListCell *b); static List *reparameterize_pathlist_by_child(PlannerInfo *root, List *pathlist, - RelOptInfo *child_rel); + RelOptInfo *child_rel, + bool needFlatCopy); static bool pathlist_is_reparameterizable_by_child(List *pathlist, RelOptInfo *child_rel); @@ -4084,6 +4085,59 @@ reparameterize_path(PlannerInfo *root, Path *path, return NULL; } + +#define IS_PARTITION(rel) \ + (rel->reloptkind == RELOPT_OTHER_JOINREL || \ + rel->reloptkind == RELOPT_OTHER_MEMBER_REL) + +/* + * AJ path is a path which has OTHER_REL parent, non-other, non inherited relation on one side and partition on the opposite side + * Partitioned side of the AJ can be RELOPT_OTHER_MEMBER_REL (base table) or + * RELOPT_OTHER_JOINREL for a join. May it be OTHER_UPPER_REL? - not for now. + * We should differ AJ from trivial UNION ALL. + */ +bool +is_asymmetric_join(Path *path) +{ + RelOptInfo *rel = ((Path *) path)->parent; + RelOptInfo *plainrel; + RelOptInfo *prel; + JoinPath *jpath; + + Assert(IsA(path, NestPath) || IsA(path, MergePath) || IsA(path, HashPath)); + jpath = (JoinPath *) path; + + if (!IS_OTHER_REL(rel)) + return false; + + Assert(rel->reloptkind != RELOPT_OTHER_UPPER_REL); + + /* Discover left and right sides of the join */ + + plainrel = jpath->innerjoinpath->parent; + prel = jpath->outerjoinpath->parent; + + /* + * RelOptInfo can be implemented by a bushy path tree. In that case we + * should dive below, but it is impractical right now - we can't check out + * the code and test it because the case doesn't exists. + */ + Assert(prel != plainrel); + + /* Identify prospective inner and outer of the AJ */ + if (!IS_PARTITION(prel)) + { + /* Inner join allows to change inner and outer sides of AJ */ + plainrel = prel; + prel = jpath->innerjoinpath->parent; + } + + if (!IS_PARTITION(prel) || IS_OTHER_REL(plainrel)) + return false; + + return true; +} + /* * reparameterize_path_by_child * Given a path parameterized by the parent of the given child relation, @@ -4094,6 +4148,11 @@ reparameterize_path(PlannerInfo *root, Path *path, * recursively reparameterized. Other fields that refer to specific relids * also need adjustment. * + * In the case of asymmetric join we utilise the same RelOptInfo in different + * parts of the plan. Being kludge example of coding it needs to make a copy of + * the node before modifying it. Recheck needFlatCopy decision each time it + * finds a join node during reparameterization, if it still not needed. + * * The cost, number of rows, width and parallel path properties depend upon * path->parent, which does not change during the translation. So we need * not change those. @@ -4110,13 +4169,22 @@ reparameterize_path(PlannerInfo *root, Path *path, */ Path * reparameterize_path_by_child(PlannerInfo *root, Path *path, - RelOptInfo *child_rel) + RelOptInfo *child_rel, bool needFlatCopy) { Path *new_path; ParamPathInfo *new_ppi; ParamPathInfo *old_ppi; Relids required_outer; +#define FLAT_COPY_PATH(newnode, node, nodetype) \ + if (needFlatCopy) \ + { \ + (newnode) = makeNode(nodetype); \ + memcpy((newnode), (node), sizeof(nodetype)); \ + } \ + else \ + (newnode) = (nodetype *) node; + #define ADJUST_CHILD_ATTRS(node) \ ((node) = (void *) adjust_appendrel_attrs_multilevel(root, \ (Node *) (node), \ @@ -4125,7 +4193,7 @@ reparameterize_path_by_child(PlannerInfo *root, Path *path, #define REPARAMETERIZE_CHILD_PATH(path) \ do { \ - (path) = reparameterize_path_by_child(root, (path), child_rel); \ + (path) = reparameterize_path_by_child(root, (path), child_rel, needFlatCopy); \ if ((path) == NULL) \ return NULL; \ } while(0) @@ -4135,7 +4203,7 @@ do { \ if ((pathlist) != NIL) \ { \ (pathlist) = reparameterize_pathlist_by_child(root, (pathlist), \ - child_rel); \ + child_rel, needFlatCopy); \ if ((pathlist) == NIL) \ return NULL; \ } \ @@ -4164,7 +4232,7 @@ do { \ switch (nodeTag(path)) { case T_Path: - new_path = path; + FLAT_COPY_PATH(new_path, path, Path); ADJUST_CHILD_ATTRS(new_path->parent->baserestrictinfo); if (path->pathtype == T_SampleScan) { @@ -4183,8 +4251,9 @@ do { \ case T_IndexPath: { - IndexPath *ipath = (IndexPath *) path; + IndexPath *ipath; + FLAT_COPY_PATH(ipath, path, IndexPath); ADJUST_CHILD_ATTRS(ipath->indexinfo->indrestrictinfo); ADJUST_CHILD_ATTRS(ipath->indexclauses); new_path = (Path *) ipath; @@ -4193,8 +4262,9 @@ do { \ case T_BitmapHeapPath: { - BitmapHeapPath *bhpath = (BitmapHeapPath *) path; + BitmapHeapPath *bhpath; + FLAT_COPY_PATH(bhpath, path, BitmapHeapPath); ADJUST_CHILD_ATTRS(bhpath->path.parent->baserestrictinfo); REPARAMETERIZE_CHILD_PATH(bhpath->bitmapqual); new_path = (Path *) bhpath; @@ -4203,8 +4273,9 @@ do { \ case T_BitmapAndPath: { - BitmapAndPath *bapath = (BitmapAndPath *) path; + BitmapAndPath *bapath; + FLAT_COPY_PATH(bapath, path, BitmapAndPath); REPARAMETERIZE_CHILD_PATH_LIST(bapath->bitmapquals); new_path = (Path *) bapath; } @@ -4212,8 +4283,9 @@ do { \ case T_BitmapOrPath: { - BitmapOrPath *bopath = (BitmapOrPath *) path; + BitmapOrPath *bopath; + FLAT_COPY_PATH(bopath, path, BitmapOrPath); REPARAMETERIZE_CHILD_PATH_LIST(bopath->bitmapquals); new_path = (Path *) bopath; } @@ -4221,9 +4293,10 @@ do { \ case T_ForeignPath: { - ForeignPath *fpath = (ForeignPath *) path; + ForeignPath *fpath; ReparameterizeForeignPathByChild_function rfpc_func; + FLAT_COPY_PATH(fpath, path, ForeignPath); ADJUST_CHILD_ATTRS(fpath->path.parent->baserestrictinfo); if (fpath->fdw_outerpath) REPARAMETERIZE_CHILD_PATH(fpath->fdw_outerpath); @@ -4242,8 +4315,9 @@ do { \ case T_CustomPath: { - CustomPath *cpath = (CustomPath *) path; + CustomPath *cpath; + FLAT_COPY_PATH(cpath, path, CustomPath); ADJUST_CHILD_ATTRS(cpath->path.parent->baserestrictinfo); REPARAMETERIZE_CHILD_PATH_LIST(cpath->custom_paths); if (cpath->custom_restrictinfo) @@ -4260,9 +4334,14 @@ do { \ case T_NestPath: { - NestPath *npath = (NestPath *) path; - JoinPath *jpath = (JoinPath *) npath; + NestPath *npath; + JoinPath *jpath; + + if (!needFlatCopy) + needFlatCopy = is_asymmetric_join(path) ? true : false; + FLAT_COPY_PATH(npath, path, NestPath); + jpath = (JoinPath *) npath; REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); @@ -4272,9 +4351,15 @@ do { \ case T_MergePath: { - MergePath *mpath = (MergePath *) path; - JoinPath *jpath = (JoinPath *) mpath; + MergePath *mpath; + JoinPath *jpath; + if (!needFlatCopy) + needFlatCopy = is_asymmetric_join(path) ? true : false; + + FLAT_COPY_PATH(mpath, path, MergePath); + + jpath = (JoinPath *) mpath; REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); @@ -4285,9 +4370,15 @@ do { \ case T_HashPath: { - HashPath *hpath = (HashPath *) path; - JoinPath *jpath = (JoinPath *) hpath; + HashPath *hpath; + JoinPath *jpath; + + if (!needFlatCopy) + needFlatCopy = is_asymmetric_join(path) ? true : false; + FLAT_COPY_PATH(hpath, path, HashPath); + + jpath = (JoinPath *) hpath; REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath); REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath); ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo); @@ -4298,8 +4389,9 @@ do { \ case T_AppendPath: { - AppendPath *apath = (AppendPath *) path; + AppendPath *apath; + FLAT_COPY_PATH(apath, path, AppendPath); REPARAMETERIZE_CHILD_PATH_LIST(apath->subpaths); new_path = (Path *) apath; } @@ -4307,8 +4399,9 @@ do { \ case T_MaterialPath: { - MaterialPath *mpath = (MaterialPath *) path; + MaterialPath *mpath; + FLAT_COPY_PATH(mpath, path, MaterialPath); REPARAMETERIZE_CHILD_PATH(mpath->subpath); new_path = (Path *) mpath; } @@ -4316,8 +4409,9 @@ do { \ case T_MemoizePath: { - MemoizePath *mpath = (MemoizePath *) path; + MemoizePath *mpath; + FLAT_COPY_PATH(mpath, path, MemoizePath); REPARAMETERIZE_CHILD_PATH(mpath->subpath); ADJUST_CHILD_ATTRS(mpath->param_exprs); new_path = (Path *) mpath; @@ -4326,8 +4420,9 @@ do { \ case T_GatherPath: { - GatherPath *gpath = (GatherPath *) path; + GatherPath *gpath; + FLAT_COPY_PATH(gpath, path, GatherPath); REPARAMETERIZE_CHILD_PATH(gpath->subpath); new_path = (Path *) gpath; } @@ -4374,7 +4469,14 @@ do { \ MemoryContextSwitchTo(oldcontext); } - bms_free(required_outer); + + /* + * If adjust_child_relids_multilevel don't do replacements it returns + * the original set, not a copy. It is possible in the case of asymmetric + * JOIN and child_rel->relids contains relids only of plane relations. + */ + if (required_outer != old_ppi->ppi_req_outer) + bms_free(required_outer); new_path->param_info = new_ppi; @@ -4538,7 +4640,8 @@ do { \ static List * reparameterize_pathlist_by_child(PlannerInfo *root, List *pathlist, - RelOptInfo *child_rel) + RelOptInfo *child_rel, + bool needFlatCopy) { ListCell *lc; List *result = NIL; @@ -4546,7 +4649,7 @@ reparameterize_pathlist_by_child(PlannerInfo *root, foreach(lc, pathlist) { Path *path = reparameterize_path_by_child(root, lfirst(lc), - child_rel); + child_rel, needFlatCopy); if (path == NULL) { diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index e05b21c884..a3e774bcf6 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -276,6 +276,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->joininfo = NIL; rel->has_eclass_joins = false; rel->consider_partitionwise_join = false; /* might get changed later */ + rel->consider_asymmetric_join = false; rel->part_scheme = NULL; rel->nparts = -1; rel->boundinfo = NULL; @@ -693,6 +694,27 @@ build_join_rel(PlannerInfo *root, outer_rel, inner_rel, sjinfo); + +#ifdef USE_ASSERT_CHECKING + if (enable_asymmetric_join && joinrel->part_scheme == NULL) + { + /* + * Potentially, Asymmetric JOIN can be rejected because of a reason + * in case of, for example JOIN(JOIN(P1,R1),P2). + * But for a case of JOIN(JOIN(P1,P2),R1) it may be possible. + * Right now we can't find any ways for that case, but check it. + */ + restrictlist = restrictlist_ptr ? *restrictlist_ptr : + build_joinrel_restrictlist(root, + joinrel, + outer_rel, + inner_rel, + sjinfo); + build_joinrel_partition_info(root, joinrel, outer_rel, + inner_rel, sjinfo, restrictlist); + Assert(joinrel->part_scheme == NULL); + } +#endif return joinrel; } @@ -755,6 +777,7 @@ build_join_rel(PlannerInfo *root, joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; joinrel->consider_partitionwise_join = false; /* might get changed later */ + joinrel->consider_asymmetric_join = false; joinrel->parent = NULL; joinrel->top_parent = NULL; joinrel->top_parent_relids = NULL; @@ -886,11 +909,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, AppendRelInfo **appinfos; int nappinfos; - /* Only joins between "other" relations land here. */ - Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel)); - - /* The parent joinrel should have consider_partitionwise_join set. */ - Assert(parent_joinrel->consider_partitionwise_join); + /* Either of relations must be "other" relation at least. */ + Assert(IS_OTHER_REL(outer_rel) || IS_OTHER_REL(inner_rel)); /* * Find the AppendRelInfo structures for the child baserels. We'll need @@ -949,6 +969,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; joinrel->consider_partitionwise_join = false; /* might get changed later */ + joinrel->consider_asymmetric_join = false; joinrel->parent = parent_joinrel; joinrel->top_parent = parent_joinrel->top_parent ? parent_joinrel->top_parent : parent_joinrel; joinrel->top_parent_relids = joinrel->top_parent->relids; @@ -2007,6 +2028,128 @@ get_param_path_clause_serials(Path *path) } } +/* + * Check inner rel properties, required to participate in asymmetric join + * XXX: Looks like it needs a lot of refactoring + */ +bool +is_inner_rel_safe_for_asymmetric_join(PlannerInfo *root, RelOptInfo *inner_rel) +{ + int i = -1; + + /* + * Don't allow AJ with partitioned inner. + * Also, don't allow lateral references to avoid re-parameterization issues + * with relations like TABLESAMPLE, where repartitioning code changes RTE. + */ + if (inner_rel->part_scheme != NULL || + !bms_is_empty(inner_rel->lateral_relids)) + return false; + + while ((i = bms_next_member(inner_rel->relids, i)) > 0) + { + RangeTblEntry *rte; + + /* + * No one relation in the underlying inner join tree is a partitioned + * one: we can't allow the situation when the part_rels would + * not correspond partitioning schema. + */ + if (root->simple_rel_array[i] && + root->simple_rel_array[i]->part_scheme != NULL) + return false; + + rte = root->simple_rte_array[i]; + Assert(rte); + + switch (rte->rtekind) + { + case RTE_RELATION: + if (rte->tablesample != NULL) + return false; + /* Allow asymmetric join */ + case RTE_JOIN: + break; + case RTE_FUNCTION: + { + ListCell *lc; + + foreach(lc, rte->functions) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); + + /* Allow only immutable functions on constants */ + if (contain_var_clause(rtfunc->funcexpr) || + contain_mutable_functions(rtfunc->funcexpr) || + contain_subplans(rtfunc->funcexpr)) + return false; + } + break; + } + + /* + * Prohibit following RTEs in the inner of AJ. + * Right now it means we don't invest enough time into analysis of + * consecuences and corner cases of AJ, applied to inner, containing + * such RTE. This list can be revised in the future. + */ + case RTE_VALUES: + case RTE_TABLEFUNC: + case RTE_SUBQUERY: + case RTE_CTE: + case RTE_NAMEDTUPLESTORE: + case RTE_RESULT: + return false; + + default: + elog(ERROR, "unsupported RTE type %u", rte->rtekind); + break; + } + } + return true; +} + +static void +build_joinrel_partition_info_asymm(PlannerInfo *root, + RelOptInfo *joinrel, + RelOptInfo *prel, + RelOptInfo *inner_rel, + SpecialJoinInfo *sjinfo, + List *restrictlist) +{ + /* Check feasibility */ + + if (!IS_PARTITIONED_REL(prel)) + return; + + /* Enable asymmetric join only if target list doesn't contain WholeRowVar */ + if ((prel->attr_needed && + !bms_is_empty(prel->attr_needed[InvalidAttrNumber - prel->min_attr])) || + (inner_rel->attr_needed && + !bms_is_empty(inner_rel->attr_needed[InvalidAttrNumber - inner_rel->min_attr]))) + return; + + /* Check that inner rel is suitable */ + if (!is_inner_rel_safe_for_asymmetric_join(root, inner_rel)) + return; + + /* + * Dev NOTE: + * boundinfo, nparts and part_rels will be initialized later - + * See code of the compute_partition_bounds as an explanation. + */ + joinrel->part_scheme = prel->part_scheme; + + set_joinrel_partition_key_exprs(joinrel, prel, inner_rel, sjinfo->jointype); + joinrel->consider_asymmetric_join = true; + + /* + * It is impossible to be successful in both partitionwise strategies. + * At least for now + */ + Assert(!joinrel->consider_partitionwise_join); +} + /* * build_joinrel_partition_info * Checks if the two relations being joined can use partitionwise join @@ -2021,11 +2164,14 @@ build_joinrel_partition_info(PlannerInfo *root, { PartitionScheme part_scheme; + if (joinrel->consider_partitionwise_join || joinrel->consider_asymmetric_join) + return; + /* Nothing to do if partitionwise join technique is disabled. */ if (!enable_partitionwise_join) { Assert(!IS_PARTITIONED_REL(joinrel)); - return; + goto asymmetric; } /* @@ -2040,20 +2186,20 @@ build_joinrel_partition_info(PlannerInfo *root, * the joins. Please see optimizer/README for details. */ if (outer_rel->part_scheme == NULL || inner_rel->part_scheme == NULL || - !outer_rel->consider_partitionwise_join || - !inner_rel->consider_partitionwise_join || + (!outer_rel->consider_partitionwise_join && !outer_rel->consider_asymmetric_join) || + (!inner_rel->consider_partitionwise_join && !inner_rel->consider_asymmetric_join) || outer_rel->part_scheme != inner_rel->part_scheme || !have_partkey_equi_join(root, joinrel, outer_rel, inner_rel, sjinfo->jointype, restrictlist)) { Assert(!IS_PARTITIONED_REL(joinrel)); - return; + goto asymmetric; } part_scheme = outer_rel->part_scheme; /* - * This function will be called only once for each joinrel, hence it + * This place can be achieved only once for each joinrel, hence it * should not have partitioning fields filled yet. */ Assert(!joinrel->part_scheme && !joinrel->partexprs && @@ -2074,9 +2220,25 @@ build_joinrel_partition_info(PlannerInfo *root, /* * Set the consider_partitionwise_join flag. */ - Assert(outer_rel->consider_partitionwise_join); - Assert(inner_rel->consider_partitionwise_join); + Assert(outer_rel->consider_partitionwise_join ^ outer_rel->consider_asymmetric_join); + Assert(inner_rel->consider_partitionwise_join ^ inner_rel->consider_asymmetric_join); joinrel->consider_partitionwise_join = true; + + /* Don't consider asymmetric join after successful partitioned decision. */ + return; + +asymmetric: + if (!enable_asymmetric_join || sjinfo->jointype == JOIN_FULL) + return; + + /* + * Try to setup asymmetric join scheme. Because we are here only once for + * the combination of inner and outer, we must check both variants at once. + */ + build_joinrel_partition_info_asymm(root, joinrel, outer_rel, + inner_rel, sjinfo, restrictlist); + build_joinrel_partition_info_asymm(root, joinrel, inner_rel, + outer_rel, sjinfo, restrictlist); } /* @@ -2300,8 +2462,10 @@ set_joinrel_partition_key_exprs(RelOptInfo *joinrel, /* mark these const to enforce that we copy them properly */ const List *outer_expr = outer_rel->partexprs[cnt]; const List *outer_null_expr = outer_rel->nullable_partexprs[cnt]; - const List *inner_expr = inner_rel->partexprs[cnt]; - const List *inner_null_expr = inner_rel->nullable_partexprs[cnt]; + const List *inner_expr = (inner_rel->partexprs != NULL) ? + inner_rel->partexprs[cnt] : NIL; + const List *inner_null_expr = (inner_rel->nullable_partexprs != NULL) ? + inner_rel->nullable_partexprs[cnt] : NIL; List *partexpr = NIL; List *nullable_partexpr = NIL; ListCell *lc; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 3fd0b14dd8..e9407edcd0 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -919,6 +919,16 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"enable_asymmetric_join", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables asymmetric partitionwise join."), + NULL, + GUC_EXPLAIN + }, + &enable_asymmetric_join, + true, + NULL, NULL, NULL + }, { {"enable_partitionwise_aggregate", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables partitionwise aggregation and grouping."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 2166ea4a87..74f9a7c4b8 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -407,6 +407,7 @@ #enable_parallel_hash = on #enable_partition_pruning = on #enable_partitionwise_join = off +#enable_asymmetric_join = on #enable_partitionwise_aggregate = off #enable_presorted_aggregate = on #enable_seqscan = on diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 91a6ce90d8..ab0fe2fb3f 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -988,6 +988,8 @@ typedef struct RelOptInfo /* consider partitionwise join paths? (if partitioned rel) */ bool consider_partitionwise_join; + bool consider_asymmetric_join; + /* * inheritance links, if this is an otherrel (otherwise NULL): */ diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index b1c51a4e70..8e4f864e12 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -64,6 +64,7 @@ extern PGDLLIMPORT bool enable_mergejoin; extern PGDLLIMPORT bool enable_hashjoin; extern PGDLLIMPORT bool enable_gathermerge; extern PGDLLIMPORT bool enable_partitionwise_join; +extern PGDLLIMPORT bool enable_asymmetric_join; extern PGDLLIMPORT bool enable_partitionwise_aggregate; extern PGDLLIMPORT bool enable_parallel_append; extern PGDLLIMPORT bool enable_parallel_hash; diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index c5c4756b0f..fd9237a87a 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -297,14 +297,18 @@ extern void adjust_limit_rows_costs(double *rows, extern Path *reparameterize_path(PlannerInfo *root, Path *path, Relids required_outer, double loop_count); +extern bool is_asymmetric_join(Path *path); extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path, - RelOptInfo *child_rel); + RelOptInfo *child_rel, + bool needFlatCopy); extern bool path_is_reparameterizable_by_child(Path *path, RelOptInfo *child_rel); /* * prototypes for relnode.c */ +extern bool is_inner_rel_safe_for_asymmetric_join(PlannerInfo *root, + RelOptInfo *rel); extern void setup_simple_rel_arrays(PlannerInfo *root); extern void expand_planner_arrays(PlannerInfo *root, int add_size); extern RelOptInfo *build_simple_rel(PlannerInfo *root, int relid, diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 6d07f86b9b..7b364d4bea 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -2535,6 +2535,792 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = 375 | 0375 | 375 | 0375 (8 rows) +-- +-- For asymmetric partition-wise join +-- +CREATE TABLE prt5 (hkey int, a int, b int) PARTITION BY HASH(hkey); +CREATE TABLE prt5_p0 PARTITION OF prt5 + FOR VALUES WITH (modulus 3, remainder 0); +CREATE TABLE prt5_p1 PARTITION OF prt5 + FOR VALUES WITH (modulus 3, remainder 1); +CREATE TABLE prt5_p2 PARTITION OF prt5 + FOR VALUES WITH (modulus 3, remainder 2); +CREATE TABLE prt6 (aid int, alabel text) PARTITION BY HASH(aid); +CREATE TABLE prt6_p0 PARTITION OF prt6 + FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE prt6_p1 PARTITION OF prt6 + FOR VALUES WITH (modulus 2, remainder 1); +CREATE TABLE t5_1 (aid int, alabel text); +CREATE TABLE t5_2 (bid int, blabel text); +INSERT INTO prt5 (SELECT x, (x % 1000)::int, + ((x+1) % 1000)::int + FROM generate_series(1,1000000) x); +INSERT INTO t5_1 (SELECT x, md5(x::text) FROM generate_series(-200, 1500) x); +INSERT INTO t5_2 (SELECT x, md5(x::text) FROM generate_series(-200, 1500) x); +INSERT INTO prt6 (SELECT * FROM t5_1); +VACUUM ANALYZE prt5,prt6,t5_1,t5_2; +SET max_parallel_workers_per_gather = 0; +-- Trivial asymmetric JOIN of partitioned table with a relation +EXPLAIN (COSTS OFF) +SELECT * + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%'; + QUERY PLAN +------------------------------------------------------- + Append + -> Hash Join + Hash Cond: (prt5_1.a = t5_1.aid) + -> Seq Scan on prt5_p0 prt5_1 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%abc%'::text) + -> Hash Join + Hash Cond: (prt5_2.a = t5_1.aid) + -> Seq Scan on prt5_p1 prt5_2 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%abc%'::text) + -> Hash Join + Hash Cond: (prt5_3.a = t5_1.aid) + -> Seq Scan on prt5_p2 prt5_3 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%abc%'::text) +(19 rows) + +-- AJ: the case of WholeRowVar +EXPLAIN (COSTS OFF) +SELECT (t1.*)::prt1 FROM prt1 t1, prt2_p1 t2 WHERE (t1.a = t2.b); + QUERY PLAN +-------------------------------------- + Hash Join + Hash Cond: (t1.a = t2.b) + -> Append + -> Seq Scan on prt1_p1 t1_1 + -> Seq Scan on prt1_p2 t1_2 + -> Seq Scan on prt1_p3 t1_3 + -> Hash + -> Seq Scan on prt2_p1 t2 +(8 rows) + +-- AJ with partitioned table (one partition has pruned) +EXPLAIN (COSTS OFF) +SELECT (t1.*)::prt1 +FROM prt1 t1, prt2_p1 t2 +WHERE (t1.a = t2.b) AND (t1.a = 542 OR t1.a = 1); + QUERY PLAN +---------------------------------------------------- + Hash Join + Hash Cond: (t2.b = t1.a) + -> Seq Scan on prt2_p1 t2 + -> Hash + -> Append + -> Seq Scan on prt1_p1 t1_1 + Filter: ((a = 542) OR (a = 1)) + -> Seq Scan on prt1_p3 t1_2 + Filter: ((a = 542) OR (a = 1)) +(9 rows) + +-- Test AJ on anti join +-- First test mostly for the future SJE with support of partitioned tables. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt1 t1, prt2_p1 t2 +WHERE NOT EXISTS (SELECT * FROM prt1 WHERE prt1.a = t1.a); + QUERY PLAN +---------------------------------------------------------- + Aggregate + -> Append + -> Nested Loop + -> Hash Anti Join + Hash Cond: (t1_1.a = prt1_1.a) + -> Seq Scan on prt1_p1 t1_1 + -> Hash + -> Seq Scan on prt1_p1 prt1_1 + -> Seq Scan on prt2_p1 t2 + -> Nested Loop + -> Hash Anti Join + Hash Cond: (t1_2.a = prt1_2.a) + -> Seq Scan on prt1_p2 t1_2 + -> Hash + -> Seq Scan on prt1_p2 prt1_2 + -> Seq Scan on prt2_p1 t2 + -> Nested Loop + -> Hash Anti Join + Hash Cond: (t1_3.a = prt1_3.a) + -> Seq Scan on prt1_p3 t1_3 + -> Hash + -> Seq Scan on prt1_p3 prt1_3 + -> Seq Scan on prt2_p1 t2 +(23 rows) + +SELECT count(*) FROM prt1 t1, prt2_p1 t2 +WHERE NOT EXISTS (SELECT * FROM prt1 WHERE prt1.a = t1.a); + count +------- + 0 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt2_p1 t1 +WHERE NOT EXISTS (SELECT * FROM prt1 WHERE prt1.a = t1.b); + QUERY PLAN +---------------------------------------------- + Aggregate + -> Hash Right Anti Join + Hash Cond: (prt1.a = t1.b) + -> Append + -> Seq Scan on prt1_p1 prt1_1 + -> Seq Scan on prt1_p2 prt1_2 + -> Seq Scan on prt1_p3 prt1_3 + -> Hash + -> Seq Scan on prt2_p1 t1 +(9 rows) + +SELECT count(*) FROM prt2_p1 t1 +WHERE NOT EXISTS (SELECT * FROM prt1 WHERE prt1.a = t1.b); + count +------- + 42 +(1 row) + +-- Can't use AJ because of complex inner as well as PWJ (one partition pruned) +EXPLAIN (COSTS OFF) +SELECT (t1.*)::prt1 +FROM prt1 t1, prt2_p1 t2, prt1 t3 +WHERE (t1.a = t2.b) AND (t1.a = 542 OR t1.a = 1) AND t2.a = t3.a; + QUERY PLAN +---------------------------------------------------------------- + Merge Join + Merge Cond: (t3.a = t2.a) + -> Merge Append + Sort Key: t3.a + -> Index Only Scan using iprt1_p1_a on prt1_p1 t3_1 + -> Index Only Scan using iprt1_p2_a on prt1_p2 t3_2 + -> Index Only Scan using iprt1_p3_a on prt1_p3 t3_3 + -> Sort + Sort Key: t2.a + -> Hash Join + Hash Cond: (t2.b = t1.a) + -> Seq Scan on prt2_p1 t2 + -> Hash + -> Append + -> Seq Scan on prt1_p1 t1_1 + Filter: ((a = 542) OR (a = 1)) + -> Seq Scan on prt1_p3 t1_2 + Filter: ((a = 542) OR (a = 1)) +(18 rows) + +-- TODO: We don't see any AJ or PWJ, but may be it's a game of costs? +EXPLAIN (COSTS OFF) +SELECT (t1.*)::prt1 +FROM prt1 t1, prt2_p1 t2, prt1 t3 +WHERE (t1.a = t2.b) AND (t1.a = 542 OR t1.a = 1) + AND (t2.a = t3.a) AND (t3.a = 543 OR t3.a = 2); + QUERY PLAN +-------------------------------------------------------------- + Nested Loop + -> Hash Join + Hash Cond: (t2.b = t1.a) + -> Seq Scan on prt2_p1 t2 + -> Hash + -> Append + -> Seq Scan on prt1_p1 t1_1 + Filter: ((a = 542) OR (a = 1)) + -> Seq Scan on prt1_p3 t1_2 + Filter: ((a = 542) OR (a = 1)) + -> Append + -> Index Only Scan using iprt1_p1_a on prt1_p1 t3_1 + Index Cond: (a = t2.a) + Filter: ((a = 543) OR (a = 2)) + -> Index Only Scan using iprt1_p3_a on prt1_p3 t3_2 + Index Cond: (a = t2.a) + Filter: ((a = 543) OR (a = 2)) +(17 rows) + +-- The same, but appended with UNION ALL +EXPLAIN (COSTS OFF) +SELECT * FROM ( + (SELECT * FROM prt5_p0) + UNION ALL + (SELECT * FROM prt5_p1) + UNION ALL + (SELECT * FROM prt5_p2) + ) AS sq1 +JOIN t5_1 ON a = aid AND alabel like '%abc%'; + QUERY PLAN +------------------------------------------------- + Hash Join + Hash Cond: (prt5_p0.a = t5_1.aid) + -> Append + -> Seq Scan on prt5_p0 + -> Seq Scan on prt5_p1 + -> Seq Scan on prt5_p2 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%abc%'::text) +(9 rows) + +-- Don't allow asymmetric JOIN of two partitioned tables. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt5 JOIN prt6 ON a = aid AND alabel like '%abc%'; + QUERY PLAN +------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (prt5.a = prt6.aid) + -> Append + -> Seq Scan on prt5_p0 prt5_1 + -> Seq Scan on prt5_p1 prt5_2 + -> Seq Scan on prt5_p2 prt5_3 + -> Hash + -> Append + -> Seq Scan on prt6_p0 prt6_1 + Filter: (alabel ~~ '%abc%'::text) + -> Seq Scan on prt6_p1 prt6_2 + Filter: (alabel ~~ '%abc%'::text) +(13 rows) + +SELECT count(*) FROM prt5 JOIN prt6 ON a = aid AND alabel like '%abc%'; + count +------- + 4000 +(1 row) + +-- Check that asymmetric JOIN with Subquery is forbidden +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt5 JOIN ( + SELECT * FROM prt6 LIMIT 1000 +) AS sq1 ON a = aid AND alabel like '%abc%'; + QUERY PLAN +---------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (prt5.a = sq1.aid) + -> Append + -> Seq Scan on prt5_p0 prt5_1 + -> Seq Scan on prt5_p1 prt5_2 + -> Seq Scan on prt5_p2 prt5_3 + -> Hash + -> Subquery Scan on sq1 + Filter: (sq1.alabel ~~ '%abc%'::text) + -> Limit + -> Append + -> Seq Scan on prt6_p0 prt6_1 + -> Seq Scan on prt6_p1 prt6_2 +(14 rows) + +SELECT count(*) FROM prt5 JOIN (SELECT * FROM prt6 LIMIT 1000) AS sq1 + ON a = aid AND alabel like '%abc%'; + count +------- + 2000 +(1 row) + +-- Asymmetric JOIN of two plane tables and one partitioned +EXPLAIN (COSTS OFF) +SELECT count(*) + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%' + JOIN t5_2 ON b = bid AND blabel like '%cd%'; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Append + -> Hash Join + Hash Cond: (prt5_1.b = t5_2.bid) + -> Hash Join + Hash Cond: (prt5_1.a = t5_1.aid) + -> Seq Scan on prt5_p0 prt5_1 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%ab%'::text) + -> Hash + -> Seq Scan on t5_2 + Filter: (blabel ~~ '%cd%'::text) + -> Hash Join + Hash Cond: (prt5_2.b = t5_2.bid) + -> Hash Join + Hash Cond: (prt5_2.a = t5_1.aid) + -> Seq Scan on prt5_p1 prt5_2 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%ab%'::text) + -> Hash + -> Seq Scan on t5_2 + Filter: (blabel ~~ '%cd%'::text) + -> Hash Join + Hash Cond: (prt5_3.b = t5_2.bid) + -> Hash Join + Hash Cond: (prt5_3.a = t5_1.aid) + -> Seq Scan on prt5_p2 prt5_3 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%ab%'::text) + -> Hash + -> Seq Scan on t5_2 + Filter: (blabel ~~ '%cd%'::text) +(35 rows) + +SELECT count(*) + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%' + JOIN t5_2 ON b = bid AND blabel like '%cd%'; + count +------- + 11000 +(1 row) + +-- unable to extract non-partitioned right relation +EXPLAIN (COSTS OFF) +SELECT * FROM prt5 RIGHT JOIN t5_1 ON a = aid AND alabel like '%abc%'; + QUERY PLAN +----------------------------------------------- + Hash Right Join + Hash Cond: (prt5.a = t5_1.aid) + Join Filter: (t5_1.alabel ~~ '%abc%'::text) + -> Append + -> Seq Scan on prt5_p0 prt5_1 + -> Seq Scan on prt5_p1 prt5_2 + -> Seq Scan on prt5_p2 prt5_3 + -> Hash + -> Seq Scan on t5_1 +(9 rows) + +-- left side can be extracted, but no cost benefit +EXPLAIN (COSTS OFF) +SELECT * FROM prt5 LEFT JOIN t5_1 ON a = aid AND alabel like '%abc%'; + QUERY PLAN +------------------------------------------------------- + Append + -> Hash Left Join + Hash Cond: (prt5_1.a = t5_1.aid) + -> Seq Scan on prt5_p0 prt5_1 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%abc%'::text) + -> Hash Left Join + Hash Cond: (prt5_2.a = t5_1.aid) + -> Seq Scan on prt5_p1 prt5_2 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%abc%'::text) + -> Hash Left Join + Hash Cond: (prt5_3.a = t5_1.aid) + -> Seq Scan on prt5_p2 prt5_3 + -> Hash + -> Seq Scan on t5_1 + Filter: (alabel ~~ '%abc%'::text) +(19 rows) + +-- validation of the results with/without asymmetric partition-wise join +SELECT * INTO pg_temp.result01a + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%'; +SELECT * INTO pg_temp.result02a + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%' + JOIN t5_2 ON b = bid AND blabel like '%cd%'; +SET enable_partitionwise_join = off; +SELECT * INTO pg_temp.result01b + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%'; +SELECT * INTO pg_temp.result02b + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%' + JOIN t5_2 ON b = bid AND blabel like '%cd%'; +SELECT * FROM pg_temp.result01a EXCEPT SELECT * FROM pg_temp.result01b; + hkey | a | b | aid | alabel +------+---+---+-----+-------- +(0 rows) + +SELECT * FROM pg_temp.result02a EXCEPT SELECT * FROM pg_temp.result02b; + hkey | a | b | aid | alabel | bid | blabel +------+---+---+-----+--------+-----+-------- +(0 rows) + +SET enable_partitionwise_join = on; +-- Trying different JOIN combinations optimiser can provide partitioned relation +-- buried deeply in the inner subtree. Check that it doesn't provide +-- inconsistency in the plan. +EXPLAIN (COSTS OFF) +SELECT * from prt1 d1, prt2 d2, unnest(array[3,4]) n, unnest(array[3,4]) g +WHERE d1.a = n AND d2.b = d1.a and g = n; + QUERY PLAN +-------------------------------------------------------- + Append + -> Nested Loop + Join Filter: (d1_1.a = g.g) + -> Hash Join + Hash Cond: (d1_1.a = n.n) + -> Hash Join + Hash Cond: (d1_1.a = d2_1.b) + -> Seq Scan on prt1_p1 d1_1 + -> Hash + -> Seq Scan on prt2_p1 d2_1 + -> Hash + -> Function Scan on unnest n + -> Function Scan on unnest g + -> Nested Loop + Join Filter: (d1_2.a = g.g) + -> Hash Join + Hash Cond: (d1_2.a = n.n) + -> Hash Join + Hash Cond: (d1_2.a = d2_2.b) + -> Seq Scan on prt1_p2 d1_2 + -> Hash + -> Seq Scan on prt2_p2 d2_2 + -> Hash + -> Function Scan on unnest n + -> Function Scan on unnest g + -> Nested Loop + Join Filter: (d1_3.a = g.g) + -> Hash Join + Hash Cond: (d1_3.a = n.n) + -> Hash Join + Hash Cond: (d1_3.a = d2_3.b) + -> Seq Scan on prt1_p3 d1_3 + -> Hash + -> Seq Scan on prt2_p3 d2_3 + -> Hash + -> Function Scan on unnest n + -> Function Scan on unnest g +(37 rows) + +-- TODO: +-- According to current logic decision on AJ or PWJ for specific joinrel +-- can depend on the order of relations in the FROM list. See how it works. +-- Can we resolve this issue somehow? +EXPLAIN (COSTS OFF) -- PWJ on top +SELECT * from prt1 d1, unnest(array[3,4]) n, prt2 d2 +WHERE d1.a = n AND d2.b = d1.a; + QUERY PLAN +--------------------------------------------------------- + Append + -> Nested Loop + Join Filter: (d1_1.a = n.n) + -> Hash Join + Hash Cond: (d2_1.b = n.n) + -> Seq Scan on prt2_p1 d2_1 + -> Hash + -> Function Scan on unnest n + -> Index Scan using iprt1_p1_a on prt1_p1 d1_1 + Index Cond: (a = d2_1.b) + -> Nested Loop + Join Filter: (d1_2.a = n.n) + -> Hash Join + Hash Cond: (d2_2.b = n.n) + -> Seq Scan on prt2_p2 d2_2 + -> Hash + -> Function Scan on unnest n + -> Index Scan using iprt1_p2_a on prt1_p2 d1_2 + Index Cond: (a = d2_2.b) + -> Nested Loop + Join Filter: (d1_3.a = n.n) + -> Hash Join + Hash Cond: (d2_3.b = n.n) + -> Seq Scan on prt2_p3 d2_3 + -> Hash + -> Function Scan on unnest n + -> Index Scan using iprt1_p3_a on prt1_p3 d1_3 + Index Cond: (a = d2_3.b) +(28 rows) + +EXPLAIN (COSTS OFF) -- AJ on top +SELECT * from prt1 d1, prt2 d2, unnest(array[3,4]) n +WHERE d1.a = n AND d2.b = d1.a; + QUERY PLAN +-------------------------------------------------- + Append + -> Hash Join + Hash Cond: (d1_1.a = n.n) + -> Hash Join + Hash Cond: (d1_1.a = d2_1.b) + -> Seq Scan on prt1_p1 d1_1 + -> Hash + -> Seq Scan on prt2_p1 d2_1 + -> Hash + -> Function Scan on unnest n + -> Hash Join + Hash Cond: (d1_2.a = n.n) + -> Hash Join + Hash Cond: (d1_2.a = d2_2.b) + -> Seq Scan on prt1_p2 d1_2 + -> Hash + -> Seq Scan on prt2_p2 d2_2 + -> Hash + -> Function Scan on unnest n + -> Hash Join + Hash Cond: (d1_3.a = n.n) + -> Hash Join + Hash Cond: (d1_3.a = d2_3.b) + -> Seq Scan on prt1_p3 d1_3 + -> Hash + -> Seq Scan on prt2_p3 d2_3 + -> Hash + -> Function Scan on unnest n +(28 rows) + +-- Allow JOINs in the inner side of AJ +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 d1 LEFT JOIN (unnest(array[3,4]) n + FULL OUTER JOIN unnest(array[3,4]) g ON (n=g)) +ON (d1.a = n); + QUERY PLAN +--------------------------------------------------------- + Append + -> Hash Left Join + Hash Cond: (d1_1.a = n.n) + -> Seq Scan on prt1_p1 d1_1 + -> Hash + -> Hash Left Join + Hash Cond: (n.n = g.g) + -> Function Scan on unnest n + -> Hash + -> Function Scan on unnest g + -> Hash Left Join + Hash Cond: (d1_2.a = n.n) + -> Seq Scan on prt1_p2 d1_2 + -> Hash + -> Hash Left Join + Hash Cond: (n.n = g.g) + -> Function Scan on unnest n + -> Hash + -> Function Scan on unnest g + -> Hash Left Join + Hash Cond: (d1_3.a = n.n) + -> Seq Scan on prt1_p3 d1_3 + -> Hash + -> Hash Left Join + Hash Cond: (n.n = g.g) + -> Function Scan on unnest n + -> Hash + -> Function Scan on unnest g +(28 rows) + +-- Check reparameterization code when an optimizer have to make two level relids +-- adjustment. +SET enable_hashjoin = 'off'; +SET enable_mergejoin = 'off'; +SET enable_material = 'off'; +CREATE TABLE big AS SELECT x AS x FROM generate_series(1,1E4) x; +CREATE INDEX ON big(x); +CREATE TABLE small AS SELECT x, -x AS y FROM generate_series(1,100) x; +CREATE TABLE part_l0 (x int, y int, z int) PARTITION BY HASH (y); +CREATE TABLE part0_l1 PARTITION OF part_l0 (y) + FOR VALUES WITH (modulus 2, remainder 0) PARTITION BY HASH (z); +CREATE TABLE part0_l2 PARTITION OF part0_l1 (z) + FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE part1_l2 PARTITION OF part0_l1 (z) + FOR VALUES WITH (modulus 2, remainder 1); +CREATE TABLE part1_l1 PARTITION OF part_l0 (y) + FOR VALUES WITH (modulus 2, remainder 1); +INSERT INTO part_l0 (x, y, z) (SELECT x,x,x FROM generate_series(1,1E4) x); +ANALYZE big,small,part_l0; +-- Parameter have to be reparameterized by a plane relation. +EXPLAIN (COSTS OFF) +SELECT small.* FROM small + JOIN part_l0 ON small.y = part_l0.y AND small.y + part_l0.y < 10 + LEFT JOIN big ON big.x = small.x; + QUERY PLAN +----------------------------------------------------------------------------------------- + Append + -> Nested Loop Left Join + -> Nested Loop + Join Filter: ((small.y = part_l0_1.y) AND ((small.y + part_l0_1.y) < 10)) + -> Seq Scan on small + -> Seq Scan on part0_l2 part_l0_1 + -> Index Only Scan using big_x_idx on big + Index Cond: (x = (small.x)::numeric) + -> Nested Loop Left Join + -> Nested Loop + Join Filter: ((small.y = part_l0_2.y) AND ((small.y + part_l0_2.y) < 10)) + -> Seq Scan on part1_l2 part_l0_2 + -> Seq Scan on small + -> Index Only Scan using big_x_idx on big + Index Cond: (x = (small.x)::numeric) + -> Nested Loop Left Join + -> Nested Loop + Join Filter: ((small.y = part_l0_3.y) AND ((small.y + part_l0_3.y) < 10)) + -> Seq Scan on small + -> Seq Scan on part1_l1 part_l0_3 + -> Index Only Scan using big_x_idx on big + Index Cond: (x = (small.x)::numeric) +(22 rows) + +SELECT small.* FROM small + JOIN part_l0 ON small.y = part_l0.y AND small.y + part_l0.y < 10 + LEFT JOIN big ON big.x = small.x; + x | y +---+--- +(0 rows) + +-- Parameters have to be reparameterized by plane and partitioned relations. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM small + JOIN part_l0 ON small.y = part_l0.y AND small.y + part_l0.y < 10 + LEFT JOIN big ON big.x = small.x OR big.x = part_l0.x; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate + -> Append + -> Nested Loop Left Join + -> Nested Loop + Join Filter: ((small.y = part_l0_1.y) AND ((small.y + part_l0_1.y) < 10)) + -> Seq Scan on small + -> Seq Scan on part0_l2 part_l0_1 + -> Bitmap Heap Scan on big + Recheck Cond: ((x = (small.x)::numeric) OR (x = (part_l0_1.x)::numeric)) + -> BitmapOr + -> Bitmap Index Scan on big_x_idx + Index Cond: (x = (small.x)::numeric) + -> Bitmap Index Scan on big_x_idx + Index Cond: (x = (part_l0_1.x)::numeric) + -> Nested Loop Left Join + -> Nested Loop + Join Filter: ((small.y = part_l0_2.y) AND ((small.y + part_l0_2.y) < 10)) + -> Seq Scan on part1_l2 part_l0_2 + -> Seq Scan on small + -> Bitmap Heap Scan on big + Recheck Cond: ((x = (small.x)::numeric) OR (x = (part_l0_2.x)::numeric)) + -> BitmapOr + -> Bitmap Index Scan on big_x_idx + Index Cond: (x = (small.x)::numeric) + -> Bitmap Index Scan on big_x_idx + Index Cond: (x = (part_l0_2.x)::numeric) + -> Nested Loop Left Join + -> Nested Loop + Join Filter: ((small.y = part_l0_3.y) AND ((small.y + part_l0_3.y) < 10)) + -> Seq Scan on small + -> Seq Scan on part1_l1 part_l0_3 + -> Bitmap Heap Scan on big + Recheck Cond: ((x = (small.x)::numeric) OR (x = (part_l0_3.x)::numeric)) + -> BitmapOr + -> Bitmap Index Scan on big_x_idx + Index Cond: (x = (small.x)::numeric) + -> Bitmap Index Scan on big_x_idx + Index Cond: (x = (part_l0_3.x)::numeric) +(38 rows) + +SELECT count(*) FROM small + JOIN part_l0 ON small.y = part_l0.y AND small.y + part_l0.y < 10 + LEFT JOIN big ON big.x = small.x OR big.x = part_l0.x; + count +------- + 0 +(1 row) + +DROP TABLE IF EXISTS big,small,part_l0 CASCADE; +RESET enable_hashjoin; +RESET enable_mergejoin; +RESET enable_material; +RESET max_parallel_workers_per_gather; +-- +-- The case of AJ under partitionwise PNL which needs reparameterization +-- +CREATE TABLE small (x int); +CREATE TABLE big (x int, y int); +CREATE TABLE part42 (x int, y int) PARTITION BY RANGE (x); +CREATE TABLE p1 PARTITION OF part42 (x) FOR VALUES FROM (0) TO (50); +CREATE TABLE p2 PARTITION OF part42 (x) FOR VALUES FROM (50) TO (101); +INSERT INTO big (SELECT gs%10,gs FROM generate_series(1,10000) AS gs); +INSERT INTO small (x) SELECT gs FROM generate_series (1,1) AS gs; +INSERT INTO part42 SELECT gs%100, gs%100 FROM generate_series(1,100000) AS gs; +CREATE INDEX big_idx ON big(y); +VACUUM ANALYZE small,big,part42; +SET enable_hashjoin = off; +SET enable_mergejoin = off; +SET max_parallel_workers_per_gather = 0; -- Reduce test unstability +EXPLAIN (COSTS OFF) +SELECT * FROM part42 ext LEFT JOIN big + JOIN part42 p2 ON (p2.x=big.y) +ON (ext.x=p2.x AND ext.y=big.y); + QUERY PLAN +----------------------------------------------------------------- + Append + -> Nested Loop Left Join + Join Filter: ((ext_1.x = p2_1.x) AND (ext_1.y = big.y)) + -> Seq Scan on p1 ext_1 + -> Materialize + -> Nested Loop + -> Seq Scan on p1 p2_1 + -> Memoize + Cache Key: p2_1.x + Cache Mode: logical + -> Index Scan using big_idx on big + Index Cond: (y = p2_1.x) + -> Nested Loop Left Join + Join Filter: ((ext_2.x = p2_2.x) AND (ext_2.y = big.y)) + -> Seq Scan on p2 ext_2 + -> Materialize + -> Nested Loop + -> Seq Scan on p2 p2_2 + -> Memoize + Cache Key: p2_2.x + Cache Mode: logical + -> Index Scan using big_idx on big + Index Cond: (y = p2_2.x) +(23 rows) + +RESET max_parallel_workers_per_gather; +RESET enable_mergejoin; +RESET enable_hashjoin; +-- Parameterized path examples. +CREATE TABLE prta (id integer, payload text) PARTITION BY HASH (id); +CREATE TABLE prta1 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE prta2 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 1); +CREATE UNIQUE INDEX ON prta1 (id); +CREATE UNIQUE INDEX ON prta2 (id); +INSERT INTO prta (id, payload) + (SELECT *, ('abc' || id)::text AS payload + FROM generate_series(1,1) AS id); +CREATE TABLE prtb (id integer, payload text) PARTITION BY HASH (id); +CREATE TABLE prtb1 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE prtb2 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 1); +CREATE UNIQUE INDEX ON prtb1 (id); +CREATE UNIQUE INDEX ON prtb2 (id); +INSERT INTO prtb (id, payload) + (SELECT *, ('def' || id)::text AS payload + FROM generate_series(1,1000) AS id); +CREATE TABLE e (id integer, payload text) PARTITION BY HASH (id); +CREATE TABLE e1 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE e2 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 1); +INSERT INTO e (id, payload) + (SELECT *, ('ghi' || id)::text AS payload + FROM generate_series(1,1000) AS id); +CREATE UNIQUE INDEX ON e1 (id); +CREATE UNIQUE INDEX ON e2 (id); +ANALYZE prta,prtb,e; +EXPLAIN (COSTS OFF) +SELECT * FROM prta,prtb WHERE prta.id=prtb.id; + QUERY PLAN +----------------------------------------------------------- + Append + -> Nested Loop + -> Seq Scan on prta1 prta_1 + -> Index Scan using prtb1_id_idx on prtb1 prtb_1 + Index Cond: (id = prta_1.id) + -> Nested Loop + -> Seq Scan on prta2 prta_2 + -> Index Scan using prtb2_id_idx on prtb2 prtb_2 + Index Cond: (id = prta_2.id) +(9 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM prta,prtb,e WHERE prta.id=prtb.id AND prta.id=e.id; + QUERY PLAN +----------------------------------------------------------------- + Append + -> Nested Loop + Join Filter: (prta_1.id = e_1.id) + -> Nested Loop + -> Seq Scan on prta1 prta_1 + -> Index Scan using prtb1_id_idx on prtb1 prtb_1 + Index Cond: (id = prta_1.id) + -> Index Scan using e1_id_idx on e1 e_1 + Index Cond: (id = prtb_1.id) + -> Nested Loop + Join Filter: (prta_2.id = e_2.id) + -> Nested Loop + -> Seq Scan on prta2 prta_2 + -> Index Scan using prtb2_id_idx on prtb2 prtb_2 + Index Cond: (id = prta_2.id) + -> Index Scan using e2_id_idx on e2 e_2 + Index Cond: (id = prtb_2.id) +(17 rows) + -- semi join EXPLAIN (COSTS OFF) SELECT t1.* FROM prt1_adv t1 WHERE EXISTS (SELECT 1 FROM prt2_adv t2 WHERE t1.a = t2.b) AND t1.b = 0 ORDER BY t1.a; @@ -3380,6 +4166,30 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = DROP TABLE prt1_adv; DROP TABLE prt2_adv; +DROP TABLE prt1; +-- Test function scan with lateral relids +CREATE TABLE prt1 (id int, data json) PARTITION BY HASH(id); +CREATE TABLE prt1_p1 PARTITION OF prt1 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE prt1_p2 PARTITION OF prt1 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE prt1_p3 PARTITION OF prt1 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO prt1 SELECT i, format('{ "name": "value%s", "Test": {"a":"b"}}', i)::json from generate_series(1,100) i; +ANALYZE prt1; +SELECT id, key, value from prt1, json_each(data->'Test') ORDER BY id LIMIT 10; + id | key | value +----+-----+------- + 1 | a | "b" + 2 | a | "b" + 3 | a | "b" + 4 | a | "b" + 5 | a | "b" + 6 | a | "b" + 7 | a | "b" + 8 | a | "b" + 9 | a | "b" + 10 | a | "b" +(10 rows) + +DROP TABLE prt1; -- Tests for list-partitioned tables CREATE TABLE plt1_adv (a int, b int, c text) PARTITION BY LIST (c); CREATE TABLE plt1_adv_p1 PARTITION OF plt1_adv FOR VALUES IN ('0001', '0003'); diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 46b78ba3c4..4bfd92736b 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -2485,32 +2485,56 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on -------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) - Workers Planned: 1 + Workers Planned: 2 Workers Launched: N -> Partial Aggregate (actual rows=N loops=N) - -> Nested Loop (actual rows=N loops=N) - -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) - Filter: (a = ANY ('{0,0,1}'::integer[])) - -> Append (actual rows=N loops=N) + -> Parallel Append (actual rows=N loops=N) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (actual rows=N loops=N) Index Cond: (a = a.a) -(27 rows) +(51 rows) -- Ensure the same partitions are pruned when we make the nested loop -- parameter an Expr rather than a plain Param. @@ -2519,32 +2543,56 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on -------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) - Workers Planned: 1 + Workers Planned: 2 Workers Launched: N -> Partial Aggregate (actual rows=N loops=N) - -> Nested Loop (actual rows=N loops=N) - -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) - Filter: (a = ANY ('{0,0,1}'::integer[])) - -> Append (actual rows=N loops=N) + -> Parallel Append (actual rows=N loops=N) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{0,0,1}'::integer[])) + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (actual rows=N loops=N) Index Cond: (a = (a.a + 0)) -(27 rows) +(51 rows) insert into lprt_a values(3),(3); select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 3)'); @@ -2552,99 +2600,187 @@ select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on -------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) - Workers Planned: 1 + Workers Planned: 2 Workers Launched: N -> Partial Aggregate (actual rows=N loops=N) - -> Nested Loop (actual rows=N loops=N) - -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) - Filter: (a = ANY ('{1,0,3}'::integer[])) - -> Append (actual rows=N loops=N) + -> Parallel Append (actual rows=N loops=N) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,3}'::integer[])) -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (actual rows=N loops=N) Index Cond: (a = a.a) -(27 rows) +(51 rows) select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)'); explain_parallel_append -------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) - Workers Planned: 1 + Workers Planned: 2 Workers Launched: N -> Partial Aggregate (actual rows=N loops=N) - -> Nested Loop (actual rows=N loops=N) - -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) - Filter: (a = ANY ('{1,0,0}'::integer[])) - Rows Removed by Filter: N - -> Append (actual rows=N loops=N) + -> Parallel Append (actual rows=N loops=N) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) Index Cond: (a = a.a) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (actual rows=N loops=N) Index Cond: (a = a.a) -(28 rows) +(60 rows) delete from lprt_a where a = 1; select explain_parallel_append('select avg(ab.a) from ab inner join lprt_a a on ab.a = a.a where a.a in(1, 0, 0)'); - explain_parallel_append -------------------------------------------------------------------------------------------------- + explain_parallel_append +-------------------------------------------------------------------------------------------------------- Finalize Aggregate (actual rows=N loops=N) -> Gather (actual rows=N loops=N) - Workers Planned: 1 + Workers Planned: 2 Workers Launched: N -> Partial Aggregate (actual rows=N loops=N) - -> Nested Loop (actual rows=N loops=N) - -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) - Filter: (a = ANY ('{1,0,0}'::integer[])) - Rows Removed by Filter: N - -> Append (actual rows=N loops=N) - -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (never executed) + -> Parallel Append (actual rows=N loops=N) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a1_b1_a_idx on ab_a1_b1 ab_1 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a1_b2_a_idx on ab_a1_b2 ab_2 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a1_b3_a_idx on ab_a1_b3 ab_3 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a2_b1_a_idx on ab_a2_b1 ab_4 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a2_b2_a_idx on ab_a2_b2 ab_5 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a2_b3_a_idx on ab_a2_b3 ab_6 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a3_b1_a_idx on ab_a3_b1 ab_7 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a3_b2_a_idx on ab_a3_b2 ab_8 (actual rows=N loops=N) Index Cond: (a = a.a) - -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (never executed) + -> Nested Loop (actual rows=N loops=N) + -> Parallel Seq Scan on lprt_a a (actual rows=N loops=N) + Filter: (a = ANY ('{1,0,0}'::integer[])) + Rows Removed by Filter: N + -> Index Scan using ab_a3_b3_a_idx on ab_a3_b3 ab_9 (actual rows=N loops=N) Index Cond: (a = a.a) -(28 rows) +(60 rows) reset enable_hashjoin; reset enable_mergejoin; @@ -2998,43 +3134,63 @@ explain (analyze, costs off, summary off, timing off) select * from tbl1 join tprt on tbl1.col1 > tprt.col1; QUERY PLAN -------------------------------------------------------------------------- - Nested Loop (actual rows=6 loops=1) - -> Seq Scan on tbl1 (actual rows=2 loops=1) - -> Append (actual rows=3 loops=2) + Append (actual rows=6 loops=1) + -> Nested Loop (actual rows=4 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) -> Index Scan using tprt1_idx on tprt_1 (actual rows=2 loops=2) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt2_idx on tprt_2 (actual rows=2 loops=1) + -> Nested Loop (actual rows=2 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt2_idx on tprt_2 (actual rows=1 loops=2) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt3_idx on tprt_3 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt3_idx on tprt_3 (actual rows=0 loops=2) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt4_idx on tprt_4 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt4_idx on tprt_4 (actual rows=0 loops=2) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt5_idx on tprt_5 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt5_idx on tprt_5 (actual rows=0 loops=2) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt6_idx on tprt_6 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt6_idx on tprt_6 (actual rows=0 loops=2) Index Cond: (col1 < tbl1.col1) -(15 rows) +(25 rows) explain (analyze, costs off, summary off, timing off) select * from tbl1 join tprt on tbl1.col1 = tprt.col1; QUERY PLAN -------------------------------------------------------------------------- - Nested Loop (actual rows=2 loops=1) - -> Seq Scan on tbl1 (actual rows=2 loops=1) - -> Append (actual rows=1 loops=2) - -> Index Scan using tprt1_idx on tprt_1 (never executed) + Append (actual rows=2 loops=1) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt1_idx on tprt_1 (actual rows=0 loops=2) Index Cond: (col1 = tbl1.col1) + -> Nested Loop (actual rows=2 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) -> Index Scan using tprt2_idx on tprt_2 (actual rows=1 loops=2) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt3_idx on tprt_3 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt3_idx on tprt_3 (actual rows=0 loops=2) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt4_idx on tprt_4 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt4_idx on tprt_4 (actual rows=0 loops=2) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt5_idx on tprt_5 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt5_idx on tprt_5 (actual rows=0 loops=2) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt6_idx on tprt_6 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=2 loops=1) + -> Index Scan using tprt6_idx on tprt_6 (actual rows=0 loops=2) Index Cond: (col1 = tbl1.col1) -(15 rows) +(25 rows) select tbl1.col1, tprt.col1 from tbl1 inner join tprt on tbl1.col1 > tprt.col1 @@ -3064,43 +3220,63 @@ explain (analyze, costs off, summary off, timing off) select * from tbl1 inner join tprt on tbl1.col1 > tprt.col1; QUERY PLAN -------------------------------------------------------------------------- - Nested Loop (actual rows=23 loops=1) - -> Seq Scan on tbl1 (actual rows=5 loops=1) - -> Append (actual rows=5 loops=5) + Append (actual rows=23 loops=1) + -> Nested Loop (actual rows=10 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) -> Index Scan using tprt1_idx on tprt_1 (actual rows=2 loops=5) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt2_idx on tprt_2 (actual rows=3 loops=4) + -> Nested Loop (actual rows=11 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt2_idx on tprt_2 (actual rows=2 loops=5) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt3_idx on tprt_3 (actual rows=1 loops=2) + -> Nested Loop (actual rows=2 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt3_idx on tprt_3 (actual rows=0 loops=5) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt4_idx on tprt_4 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt4_idx on tprt_4 (actual rows=0 loops=5) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt5_idx on tprt_5 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt5_idx on tprt_5 (actual rows=0 loops=5) Index Cond: (col1 < tbl1.col1) - -> Index Scan using tprt6_idx on tprt_6 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt6_idx on tprt_6 (actual rows=0 loops=5) Index Cond: (col1 < tbl1.col1) -(15 rows) +(25 rows) explain (analyze, costs off, summary off, timing off) select * from tbl1 inner join tprt on tbl1.col1 = tprt.col1; QUERY PLAN -------------------------------------------------------------------------- - Nested Loop (actual rows=3 loops=1) - -> Seq Scan on tbl1 (actual rows=5 loops=1) - -> Append (actual rows=1 loops=5) - -> Index Scan using tprt1_idx on tprt_1 (never executed) + Append (actual rows=3 loops=1) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt1_idx on tprt_1 (actual rows=0 loops=5) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt2_idx on tprt_2 (actual rows=1 loops=2) + -> Nested Loop (actual rows=2 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt2_idx on tprt_2 (actual rows=0 loops=5) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt3_idx on tprt_3 (actual rows=0 loops=3) + -> Nested Loop (actual rows=1 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt3_idx on tprt_3 (actual rows=0 loops=5) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt4_idx on tprt_4 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt4_idx on tprt_4 (actual rows=0 loops=5) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt5_idx on tprt_5 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt5_idx on tprt_5 (actual rows=0 loops=5) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt6_idx on tprt_6 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=5 loops=1) + -> Index Scan using tprt6_idx on tprt_6 (actual rows=0 loops=5) Index Cond: (col1 = tbl1.col1) -(15 rows) +(25 rows) select tbl1.col1, tprt.col1 from tbl1 inner join tprt on tbl1.col1 > tprt.col1 @@ -3149,22 +3325,32 @@ explain (analyze, costs off, summary off, timing off) select * from tbl1 join tprt on tbl1.col1 < tprt.col1; QUERY PLAN -------------------------------------------------------------------------- - Nested Loop (actual rows=1 loops=1) - -> Seq Scan on tbl1 (actual rows=1 loops=1) - -> Append (actual rows=1 loops=1) - -> Index Scan using tprt1_idx on tprt_1 (never executed) + Append (actual rows=1 loops=1) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt1_idx on tprt_1 (actual rows=0 loops=1) Index Cond: (col1 > tbl1.col1) - -> Index Scan using tprt2_idx on tprt_2 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt2_idx on tprt_2 (actual rows=0 loops=1) Index Cond: (col1 > tbl1.col1) - -> Index Scan using tprt3_idx on tprt_3 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt3_idx on tprt_3 (actual rows=0 loops=1) Index Cond: (col1 > tbl1.col1) - -> Index Scan using tprt4_idx on tprt_4 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt4_idx on tprt_4 (actual rows=0 loops=1) Index Cond: (col1 > tbl1.col1) - -> Index Scan using tprt5_idx on tprt_5 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt5_idx on tprt_5 (actual rows=0 loops=1) Index Cond: (col1 > tbl1.col1) + -> Nested Loop (actual rows=1 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) -> Index Scan using tprt6_idx on tprt_6 (actual rows=1 loops=1) Index Cond: (col1 > tbl1.col1) -(15 rows) +(25 rows) select tbl1.col1, tprt.col1 from tbl1 inner join tprt on tbl1.col1 < tprt.col1 @@ -3179,24 +3365,34 @@ delete from tbl1; insert into tbl1 values (10000); explain (analyze, costs off, summary off, timing off) select * from tbl1 join tprt on tbl1.col1 = tprt.col1; - QUERY PLAN -------------------------------------------------------------------- - Nested Loop (actual rows=0 loops=1) - -> Seq Scan on tbl1 (actual rows=1 loops=1) - -> Append (actual rows=0 loops=1) - -> Index Scan using tprt1_idx on tprt_1 (never executed) + QUERY PLAN +-------------------------------------------------------------------------- + Append (actual rows=0 loops=1) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt1_idx on tprt_1 (actual rows=0 loops=1) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt2_idx on tprt_2 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt2_idx on tprt_2 (actual rows=0 loops=1) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt3_idx on tprt_3 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt3_idx on tprt_3 (actual rows=0 loops=1) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt4_idx on tprt_4 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt4_idx on tprt_4 (actual rows=0 loops=1) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt5_idx on tprt_5 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt5_idx on tprt_5 (actual rows=0 loops=1) Index Cond: (col1 = tbl1.col1) - -> Index Scan using tprt6_idx on tprt_6 (never executed) + -> Nested Loop (actual rows=0 loops=1) + -> Seq Scan on tbl1 (actual rows=1 loops=1) + -> Index Scan using tprt6_idx on tprt_6 (actual rows=0 loops=1) Index Cond: (col1 = tbl1.col1) -(15 rows) +(25 rows) select tbl1.col1, tprt.col1 from tbl1 inner join tprt on tbl1.col1 = tprt.col1 diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 2f3eb4e7f1..7922f56861 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -134,6 +134,7 @@ select count(*) = 0 as ok from pg_stat_wal_receiver; select name, setting from pg_settings where name like 'enable%'; name | setting --------------------------------+--------- + enable_asymmetric_join | on enable_async_append | on enable_bitmapscan | on enable_gathermerge | on @@ -157,7 +158,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(23 rows) +(24 rows) -- There are always wait event descriptions for various types. select type, count(*) > 0 as ok FROM pg_wait_events diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 128ce8376e..4b5bb30f57 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -585,6 +585,271 @@ EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = t2.b) WHERE t1.b = 0 ORDER BY t1.a, t2.b; SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = t2.b) WHERE t1.b = 0 ORDER BY t1.a, t2.b; +-- +-- For asymmetric partition-wise join +-- +CREATE TABLE prt5 (hkey int, a int, b int) PARTITION BY HASH(hkey); +CREATE TABLE prt5_p0 PARTITION OF prt5 + FOR VALUES WITH (modulus 3, remainder 0); +CREATE TABLE prt5_p1 PARTITION OF prt5 + FOR VALUES WITH (modulus 3, remainder 1); +CREATE TABLE prt5_p2 PARTITION OF prt5 + FOR VALUES WITH (modulus 3, remainder 2); +CREATE TABLE prt6 (aid int, alabel text) PARTITION BY HASH(aid); +CREATE TABLE prt6_p0 PARTITION OF prt6 + FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE prt6_p1 PARTITION OF prt6 + FOR VALUES WITH (modulus 2, remainder 1); +CREATE TABLE t5_1 (aid int, alabel text); +CREATE TABLE t5_2 (bid int, blabel text); + +INSERT INTO prt5 (SELECT x, (x % 1000)::int, + ((x+1) % 1000)::int + FROM generate_series(1,1000000) x); +INSERT INTO t5_1 (SELECT x, md5(x::text) FROM generate_series(-200, 1500) x); +INSERT INTO t5_2 (SELECT x, md5(x::text) FROM generate_series(-200, 1500) x); +INSERT INTO prt6 (SELECT * FROM t5_1); + +VACUUM ANALYZE prt5,prt6,t5_1,t5_2; + +SET max_parallel_workers_per_gather = 0; + +-- Trivial asymmetric JOIN of partitioned table with a relation +EXPLAIN (COSTS OFF) +SELECT * + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%'; + +-- AJ: the case of WholeRowVar +EXPLAIN (COSTS OFF) +SELECT (t1.*)::prt1 FROM prt1 t1, prt2_p1 t2 WHERE (t1.a = t2.b); + +-- AJ with partitioned table (one partition has pruned) +EXPLAIN (COSTS OFF) +SELECT (t1.*)::prt1 +FROM prt1 t1, prt2_p1 t2 +WHERE (t1.a = t2.b) AND (t1.a = 542 OR t1.a = 1); + +-- Test AJ on anti join +-- First test mostly for the future SJE with support of partitioned tables. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt1 t1, prt2_p1 t2 +WHERE NOT EXISTS (SELECT * FROM prt1 WHERE prt1.a = t1.a); +SELECT count(*) FROM prt1 t1, prt2_p1 t2 +WHERE NOT EXISTS (SELECT * FROM prt1 WHERE prt1.a = t1.a); +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt2_p1 t1 +WHERE NOT EXISTS (SELECT * FROM prt1 WHERE prt1.a = t1.b); +SELECT count(*) FROM prt2_p1 t1 +WHERE NOT EXISTS (SELECT * FROM prt1 WHERE prt1.a = t1.b); + +-- Can't use AJ because of complex inner as well as PWJ (one partition pruned) +EXPLAIN (COSTS OFF) +SELECT (t1.*)::prt1 +FROM prt1 t1, prt2_p1 t2, prt1 t3 +WHERE (t1.a = t2.b) AND (t1.a = 542 OR t1.a = 1) AND t2.a = t3.a; + +-- TODO: We don't see any AJ or PWJ, but may be it's a game of costs? +EXPLAIN (COSTS OFF) +SELECT (t1.*)::prt1 +FROM prt1 t1, prt2_p1 t2, prt1 t3 +WHERE (t1.a = t2.b) AND (t1.a = 542 OR t1.a = 1) + AND (t2.a = t3.a) AND (t3.a = 543 OR t3.a = 2); + +-- The same, but appended with UNION ALL +EXPLAIN (COSTS OFF) +SELECT * FROM ( + (SELECT * FROM prt5_p0) + UNION ALL + (SELECT * FROM prt5_p1) + UNION ALL + (SELECT * FROM prt5_p2) + ) AS sq1 +JOIN t5_1 ON a = aid AND alabel like '%abc%'; + +-- Don't allow asymmetric JOIN of two partitioned tables. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt5 JOIN prt6 ON a = aid AND alabel like '%abc%'; +SELECT count(*) FROM prt5 JOIN prt6 ON a = aid AND alabel like '%abc%'; + +-- Check that asymmetric JOIN with Subquery is forbidden +EXPLAIN (COSTS OFF) +SELECT count(*) FROM prt5 JOIN ( + SELECT * FROM prt6 LIMIT 1000 +) AS sq1 ON a = aid AND alabel like '%abc%'; +SELECT count(*) FROM prt5 JOIN (SELECT * FROM prt6 LIMIT 1000) AS sq1 + ON a = aid AND alabel like '%abc%'; + +-- Asymmetric JOIN of two plane tables and one partitioned +EXPLAIN (COSTS OFF) +SELECT count(*) + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%' + JOIN t5_2 ON b = bid AND blabel like '%cd%'; +SELECT count(*) + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%' + JOIN t5_2 ON b = bid AND blabel like '%cd%'; + +-- unable to extract non-partitioned right relation +EXPLAIN (COSTS OFF) +SELECT * FROM prt5 RIGHT JOIN t5_1 ON a = aid AND alabel like '%abc%'; +-- left side can be extracted, but no cost benefit +EXPLAIN (COSTS OFF) +SELECT * FROM prt5 LEFT JOIN t5_1 ON a = aid AND alabel like '%abc%'; + +-- validation of the results with/without asymmetric partition-wise join +SELECT * INTO pg_temp.result01a + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%'; + +SELECT * INTO pg_temp.result02a + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%' + JOIN t5_2 ON b = bid AND blabel like '%cd%'; + +SET enable_partitionwise_join = off; + +SELECT * INTO pg_temp.result01b + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%abc%'; + +SELECT * INTO pg_temp.result02b + FROM prt5 JOIN t5_1 ON a = aid AND alabel like '%ab%' + JOIN t5_2 ON b = bid AND blabel like '%cd%'; + +SELECT * FROM pg_temp.result01a EXCEPT SELECT * FROM pg_temp.result01b; +SELECT * FROM pg_temp.result02a EXCEPT SELECT * FROM pg_temp.result02b; + +SET enable_partitionwise_join = on; + +-- Trying different JOIN combinations optimiser can provide partitioned relation +-- buried deeply in the inner subtree. Check that it doesn't provide +-- inconsistency in the plan. +EXPLAIN (COSTS OFF) +SELECT * from prt1 d1, prt2 d2, unnest(array[3,4]) n, unnest(array[3,4]) g +WHERE d1.a = n AND d2.b = d1.a and g = n; + +-- TODO: +-- According to current logic decision on AJ or PWJ for specific joinrel +-- can depend on the order of relations in the FROM list. See how it works. +-- Can we resolve this issue somehow? +EXPLAIN (COSTS OFF) -- PWJ on top +SELECT * from prt1 d1, unnest(array[3,4]) n, prt2 d2 +WHERE d1.a = n AND d2.b = d1.a; +EXPLAIN (COSTS OFF) -- AJ on top +SELECT * from prt1 d1, prt2 d2, unnest(array[3,4]) n +WHERE d1.a = n AND d2.b = d1.a; + +-- Allow JOINs in the inner side of AJ +EXPLAIN (COSTS OFF) +SELECT * FROM prt1 d1 LEFT JOIN (unnest(array[3,4]) n + FULL OUTER JOIN unnest(array[3,4]) g ON (n=g)) +ON (d1.a = n); + +-- Check reparameterization code when an optimizer have to make two level relids +-- adjustment. + +SET enable_hashjoin = 'off'; +SET enable_mergejoin = 'off'; +SET enable_material = 'off'; + +CREATE TABLE big AS SELECT x AS x FROM generate_series(1,1E4) x; +CREATE INDEX ON big(x); +CREATE TABLE small AS SELECT x, -x AS y FROM generate_series(1,100) x; + +CREATE TABLE part_l0 (x int, y int, z int) PARTITION BY HASH (y); +CREATE TABLE part0_l1 PARTITION OF part_l0 (y) + FOR VALUES WITH (modulus 2, remainder 0) PARTITION BY HASH (z); +CREATE TABLE part0_l2 PARTITION OF part0_l1 (z) + FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE part1_l2 PARTITION OF part0_l1 (z) + FOR VALUES WITH (modulus 2, remainder 1); +CREATE TABLE part1_l1 PARTITION OF part_l0 (y) + FOR VALUES WITH (modulus 2, remainder 1); +INSERT INTO part_l0 (x, y, z) (SELECT x,x,x FROM generate_series(1,1E4) x); + +ANALYZE big,small,part_l0; + +-- Parameter have to be reparameterized by a plane relation. +EXPLAIN (COSTS OFF) +SELECT small.* FROM small + JOIN part_l0 ON small.y = part_l0.y AND small.y + part_l0.y < 10 + LEFT JOIN big ON big.x = small.x; +SELECT small.* FROM small + JOIN part_l0 ON small.y = part_l0.y AND small.y + part_l0.y < 10 + LEFT JOIN big ON big.x = small.x; + +-- Parameters have to be reparameterized by plane and partitioned relations. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM small + JOIN part_l0 ON small.y = part_l0.y AND small.y + part_l0.y < 10 + LEFT JOIN big ON big.x = small.x OR big.x = part_l0.x; +SELECT count(*) FROM small + JOIN part_l0 ON small.y = part_l0.y AND small.y + part_l0.y < 10 + LEFT JOIN big ON big.x = small.x OR big.x = part_l0.x; + +DROP TABLE IF EXISTS big,small,part_l0 CASCADE; +RESET enable_hashjoin; +RESET enable_mergejoin; +RESET enable_material; +RESET max_parallel_workers_per_gather; + +-- +-- The case of AJ under partitionwise PNL which needs reparameterization +-- +CREATE TABLE small (x int); +CREATE TABLE big (x int, y int); +CREATE TABLE part42 (x int, y int) PARTITION BY RANGE (x); +CREATE TABLE p1 PARTITION OF part42 (x) FOR VALUES FROM (0) TO (50); +CREATE TABLE p2 PARTITION OF part42 (x) FOR VALUES FROM (50) TO (101); +INSERT INTO big (SELECT gs%10,gs FROM generate_series(1,10000) AS gs); +INSERT INTO small (x) SELECT gs FROM generate_series (1,1) AS gs; +INSERT INTO part42 SELECT gs%100, gs%100 FROM generate_series(1,100000) AS gs; +CREATE INDEX big_idx ON big(y); +VACUUM ANALYZE small,big,part42; + +SET enable_hashjoin = off; +SET enable_mergejoin = off; +SET max_parallel_workers_per_gather = 0; -- Reduce test unstability +EXPLAIN (COSTS OFF) +SELECT * FROM part42 ext LEFT JOIN big + JOIN part42 p2 ON (p2.x=big.y) +ON (ext.x=p2.x AND ext.y=big.y); +RESET max_parallel_workers_per_gather; +RESET enable_mergejoin; +RESET enable_hashjoin; + +-- Parameterized path examples. +CREATE TABLE prta (id integer, payload text) PARTITION BY HASH (id); +CREATE TABLE prta1 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE prta2 PARTITION OF prta FOR VALUES WITH (modulus 2, remainder 1); +CREATE UNIQUE INDEX ON prta1 (id); +CREATE UNIQUE INDEX ON prta2 (id); +INSERT INTO prta (id, payload) + (SELECT *, ('abc' || id)::text AS payload + FROM generate_series(1,1) AS id); + +CREATE TABLE prtb (id integer, payload text) PARTITION BY HASH (id); +CREATE TABLE prtb1 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE prtb2 PARTITION OF prtb FOR VALUES WITH (modulus 2, remainder 1); +CREATE UNIQUE INDEX ON prtb1 (id); +CREATE UNIQUE INDEX ON prtb2 (id); +INSERT INTO prtb (id, payload) + (SELECT *, ('def' || id)::text AS payload + FROM generate_series(1,1000) AS id); + +CREATE TABLE e (id integer, payload text) PARTITION BY HASH (id); +CREATE TABLE e1 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 0); +CREATE TABLE e2 PARTITION OF e FOR VALUES WITH (modulus 2, remainder 1); +INSERT INTO e (id, payload) + (SELECT *, ('ghi' || id)::text AS payload + FROM generate_series(1,1000) AS id); +CREATE UNIQUE INDEX ON e1 (id); +CREATE UNIQUE INDEX ON e2 (id); + +ANALYZE prta,prtb,e; + +EXPLAIN (COSTS OFF) +SELECT * FROM prta,prtb WHERE prta.id=prtb.id; + +EXPLAIN (COSTS OFF) +SELECT * FROM prta,prtb,e WHERE prta.id=prtb.id AND prta.id=e.id; + -- semi join EXPLAIN (COSTS OFF) SELECT t1.* FROM prt1_adv t1 WHERE EXISTS (SELECT 1 FROM prt2_adv t2 WHERE t1.a = t2.b) AND t1.b = 0 ORDER BY t1.a; @@ -777,6 +1042,17 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1_adv t1 INNER JOIN prt2_adv t2 ON (t1.a = DROP TABLE prt1_adv; DROP TABLE prt2_adv; +DROP TABLE prt1; + +-- Test function scan with lateral relids +CREATE TABLE prt1 (id int, data json) PARTITION BY HASH(id); +CREATE TABLE prt1_p1 PARTITION OF prt1 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE prt1_p2 PARTITION OF prt1 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE prt1_p3 PARTITION OF prt1 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO prt1 SELECT i, format('{ "name": "value%s", "Test": {"a":"b"}}', i)::json from generate_series(1,100) i; +ANALYZE prt1; +SELECT id, key, value from prt1, json_each(data->'Test') ORDER BY id LIMIT 10; +DROP TABLE prt1; -- Tests for list-partitioned tables CREATE TABLE plt1_adv (a int, b int, c text) PARTITION BY LIST (c); -- 2.45.0