From e47abde1f6a2c8c8d7fa588df72e59c0cd049d4c Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Wed, 25 Dec 2024 14:33:12 +0700 Subject: [PATCH v0] Memoise the inner of SEMI- and ANTI-join. To get the result of semi-join or anti-join, we only need one tuple from the parameterised inner. The Postgres core already includes Memoize's single_row mode for the case when it is proved that inner returns only a single value. Thus, implementing similar logic for semi-joins and anti-joins is quite feasible. It is clear that the semi-join operates correctly: it returns the outer tuple as soon as the first tuple is received from the inner query. In contrast, the behaviour of the anti-join is less straightforward. If anti-join had a join clause not pushed to the parameterised inner, it would be possible to read more than a single tuple from the inner, which causes inconsistency. However, it seems that this is an impossible case for the current planner. --- src/backend/commands/explain.c | 9 +++++++++ src/backend/optimizer/path/costsize.c | 2 +- src/backend/optimizer/path/joinpath.c | 5 +++-- src/backend/optimizer/util/pathnode.c | 2 +- src/test/regress/expected/join.out | 3 ++- src/test/regress/expected/subselect.out | 3 ++- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index d8a7232cedb..c90067dadcb 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3632,6 +3632,10 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) { ExplainPropertyText("Cache Key", keystr.data, es); ExplainPropertyText("Cache Mode", mstate->binary_mode ? "binary" : "logical", es); + + /* Report only in the single mode case to not break current tests */ + if (mstate->singlerow) + ExplainPropertyText("Store Mode", "singlerow", es); } else { @@ -3639,6 +3643,11 @@ show_memoize_info(MemoizeState *mstate, List *ancestors, ExplainState *es) appendStringInfo(es->str, "Cache Key: %s\n", keystr.data); ExplainIndentText(es); appendStringInfo(es->str, "Cache Mode: %s\n", mstate->binary_mode ? "binary" : "logical"); + if (mstate->singlerow) + { + ExplainIndentText(es); + appendStringInfo(es->str, "Store Mode: %s\n", "singlerow"); + } } pfree(keystr.data); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 73d78617009..4cca5a7395c 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2545,7 +2545,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath, ListCell *lc; Cost input_startup_cost = mpath->subpath->startup_cost; Cost input_total_cost = mpath->subpath->total_cost; - double tuples = mpath->subpath->rows; + double tuples = mpath->singlerow ? 1 : mpath->subpath->rows; double calls = mpath->calls; int width = mpath->subpath->pathtarget->width; diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 18891ce9156..89738b483ef 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -682,6 +682,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, ListCell *lc; bool binary_mode; List *ph_lateral_vars; + bool single_mode = false; /* Obviously not if it's disabled */ if (!enable_memoize) @@ -725,7 +726,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, */ if (!extra->inner_unique && (jointype == JOIN_SEMI || jointype == JOIN_ANTI)) - return NULL; + single_mode = true; /* * Memoize normally marks cache entries as complete when it runs out of @@ -808,7 +809,7 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, inner_path, param_exprs, hash_operators, - extra->inner_unique, + extra->inner_unique || single_mode, binary_mode, outer_path->rows); } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 93e73cb44db..9b4c0f96193 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1707,7 +1707,7 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, */ pathnode->path.startup_cost = subpath->startup_cost + cpu_tuple_cost; pathnode->path.total_cost = subpath->total_cost + cpu_tuple_cost; - pathnode->path.rows = subpath->rows; + pathnode->path.rows = (pathnode->singlerow) ? 1 : subpath->rows; return pathnode; } diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index a57bb18c24f..e8293c3c87d 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6469,10 +6469,11 @@ select * from sj t1 -> Memoize Cache Key: t1.a, t1.b Cache Mode: binary + Store Mode: singlerow -> Sample Scan on sj Sampling: system (t1.b) Filter: (t1.a = a) -(8 rows) +(9 rows) -- Ensure that SJE does not form a self-referential lateral dependency explain (costs off) diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index d0db8a412ff..5ab56f8d71f 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -2642,6 +2642,7 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd); -> Memoize Cache Key: b.hundred, b.odd Cache Mode: binary + Store Mode: singlerow -> Subquery Scan on "ANY_subquery" Filter: (b.hundred = "ANY_subquery".min) -> Result @@ -2650,5 +2651,5 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd); -> Index Scan using tenk2_hundred on tenk2 c Index Cond: (hundred IS NOT NULL) Filter: (odd = b.odd) -(16 rows) +(17 rows) -- 2.48.1