From da570914e53bc34e6bf3291649725a041a8b929c Mon Sep 17 00:00:00 2001 From: "Andrey V. Lepikhov" Date: Wed, 28 Sep 2022 13:42:12 +0300 Subject: [PATCH] Transform correlated subquery of type N-J [1] into ordinary join query. With many restrictions, check each subquery and pull up expressions, references upper query block. Works for operators '=' and 'IN'. Machinery of converting ANY subquery to JOIN stays the same with minor changes in walker function. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass a lower outer join through the pullup_sublink recursive procedure to find out some situations when qual references outer side of an outer join. [1] Kim, Won. “On optimizing an SQL-like nested query.” ACM Trans. Database Syst. 7 (1982): 443-469. --- src/backend/optimizer/plan/subselect.c | 328 +++++++++++++++- src/backend/optimizer/prep/prepjointree.c | 71 ++-- src/backend/optimizer/util/tlist.c | 2 +- src/backend/optimizer/util/var.c | 8 + src/backend/utils/misc/guc_tables.c | 10 + src/include/optimizer/optimizer.h | 1 + src/include/optimizer/subselect.h | 3 +- src/include/optimizer/tlist.h | 1 + src/test/regress/expected/prepare.out | 18 + src/test/regress/expected/subselect.out | 438 ++++++++++++++++++++++ src/test/regress/sql/prepare.sql | 7 + src/test/regress/sql/subselect.sql | 162 ++++++++ src/tools/pgindent/typedefs.list | 1 + 13 files changed, 1013 insertions(+), 37 deletions(-) diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 92e3338584..be19d85524 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -32,6 +32,7 @@ #include "optimizer/planner.h" #include "optimizer/prep.h" #include "optimizer/subselect.h" +#include "optimizer/tlist.h" #include "parser/parse_relation.h" #include "rewrite/rewriteManip.h" #include "utils/builtins.h" @@ -65,6 +66,8 @@ typedef struct inline_cte_walker_context } inline_cte_walker_context; +bool optimize_correlated_subqueries = true; + static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, List *plan_params, SubLinkType subLinkType, int subLinkId, @@ -1229,6 +1232,296 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context) return expression_tree_walker(node, inline_cte_walker, context); } +static bool +contain_placeholders(Node *node, inline_cte_walker_context *context) +{ + if (node == NULL) + return false; + + if (IsA(node, Query)) + return query_tree_walker((Query *) node, contain_placeholders, context, 0); + if (IsA(node, PlaceHolderVar)) + return true; + + return expression_tree_walker(node, contain_placeholders, context); +} + +/* + * To be pullable all clauses of flattening correlated subquery should be + * anded and mergejoinable (XXX: really necessary?) + */ +static bool +quals_is_pullable(Node *quals) +{ + if (!contain_vars_of_level(quals, 1)) + return true; + + if (quals && IsA(quals, OpExpr)) + { + OpExpr *expr = (OpExpr *) quals; + Node *leftarg; + + /* Contains only one expression */ + leftarg = linitial(expr->args); + if (!op_mergejoinable(expr->opno, exprType(leftarg))) /* Is it really necessary ? */ + return false; + + if (contain_placeholders(quals, NULL)) + return false; + + return true; + } + else if (is_andclause(quals)) + { + ListCell *l; + + foreach(l, ((BoolExpr *) quals)->args) + { + Node *andarg = (Node *) lfirst(l); + + if (!IsA(andarg, OpExpr)) + return false; + if (!quals_is_pullable(andarg)) + return false; + } + + return true; + } + + return false; +} + +/* + * Mutator context used in pull-up process of expressions from WHERE quals. + */ +typedef struct +{ + Query *subquery; /* Link to the subquery which we are trying to pull up */ + List *pulling_quals; /* List of expressions contained pulled expressions */ + int newvarno; /* Index or range table entry which is a source of pulling + varno */ + bool varlevel_up; /* true if a pulling expression is being processed, + false otherwise. */ +} correlated_t; + +/* + * Pull up expressions, containing a link to upper relation. In the qual it will + * be replaced by TRUE const. In this expression, all links to upper levels will + * be arranged by one level. + */ +static Node * +pull_subquery_clauses_mutator(Node *node, correlated_t *ctx) +{ + if (node == NULL) + return NULL; + + if (IsA(node, OpExpr) && !ctx->varlevel_up) + { + if (!contain_vars_of_level(node, 1)) + return node; + + /* + * The expression contains links to upper relation. It will be pulled to + * uplevel. All links into vars of upper levels must be changed. + */ + + ctx->varlevel_up = true; + ctx->pulling_quals = + lappend(ctx->pulling_quals, + expression_tree_mutator(node, + pull_subquery_clauses_mutator, + (void *) ctx)); + ctx->varlevel_up = false; + + /* Replace position of pulled expression by the 'true' value */ + return makeBoolConst(true, false); + } + if (IsA(node, PlaceHolderVar)) + { + PlaceHolderVar *phv = (PlaceHolderVar *) node; + + if (ctx->varlevel_up && phv->phlevelsup > 0) + phv->phlevelsup--; + /* fall through to recurse into argument */ + } + else if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + + if (rte->rtekind == RTE_CTE) + { + if (rte->ctelevelsup > 0 && ctx->varlevel_up) + rte->ctelevelsup--; + } + return node; + } + else if (IsA(node, Aggref)) + { + if (((Aggref *) node)->agglevelsup > 0 && ctx->varlevel_up) + ((Aggref *) node)->agglevelsup--; + return node; + } + else if (IsA(node, GroupingFunc)) + { + if (((GroupingFunc *) node)->agglevelsup > 0 && ctx->varlevel_up) + ((GroupingFunc *) node)->agglevelsup--; + return node; + } + else if (IsA(node, Var)) + { + Var *var = (Var *) node; + + Assert(ctx->varlevel_up); + + /* An upper relation variable */ + if (var->varlevelsup > 0) + { + /* + * Isn't needed to copy node or change varno because it correctly + * refers to Table Entry of a parent and already removed from + * the subquery clauses list. + */ + var->varlevelsup--; + + return (Node *) var; + } + else + { + Var *newvar; + TargetEntry *tle; + + /* + * The var refers to subquery table entry. Include a copy the var + * into the target list, if necessary. Arrange varattno of the + * new var of upper relation with a link to this entry. + */ + + /* Create a var for usage in upper relation */ + newvar = (Var *) copyObject(node); + + /* Does the var already exists in the target list? */ + tle = tlist_member_match_var(var, ctx->subquery->targetList); + + if (tle == NULL) + { + int resno = list_length(ctx->subquery->targetList) + 1; + + /* + * Target list of the subquery doesn't contain this var. Add it + * into the end of the target list and correct the link + * XXX: Maybe choose real colname here? + */ + tle = makeTargetEntry((Expr *) var, resno, "rescol", false); + ctx->subquery->targetList = lappend(ctx->subquery->targetList, + tle); + } + else + { + if (tle->resjunk) + { + /* + * Target entry exists but used as an utility entry + * (for grouping, as an example). So, revert its status to + * a fully valued entry. + */ + tle->resjunk = false; + tle->resname = pstrdup("resjunkcol"); + } + } + + /* + * Set the new var to refer newly created RangeTblEntry in the upper + * query and varattno to refer at specific position in the target + * list. + */ + newvar->varno = ctx->newvarno; + newvar->varattno = tle->resno; + + return (Node *) newvar; + } + } + if (IsA(node, Query)) + return (Node *) query_tree_mutator((Query *) node, + pull_subquery_clauses_mutator, + (void *) ctx, 0); + + return expression_tree_mutator(node, pull_subquery_clauses_mutator, + (void *) ctx); +} + +static List * +pull_correlated_clauses(PlannerInfo *root, SubLink *sublink, + JoinExpr *lowest_outer_join) +{ + Query *parse = root->parse; + Query *subselect = (Query *) sublink->subselect; + FromExpr *f; + correlated_t ctx = {.subquery = subselect, + .newvarno = list_length(parse->rtable) + 1, /* Looks like a hack */ + .pulling_quals = NIL, + .varlevel_up = false}; + Relids safe_upper_varnos = NULL; + + Assert(IsA(subselect, Query)); + + /* Use only for correlated candidates, just for optimal usage */ + Assert(contain_vars_of_level((Node *) subselect, 1)); + + if (!optimize_correlated_subqueries || + subselect->hasAggs || + subselect->hasWindowFuncs || + subselect->hasForUpdate || /* Pulling of clauses can change a number of tuples which subselect returns. */ + subselect->hasRowSecurity /* Just because of paranoid safety */ + ) + /* The feature is switched off. */ + return NULL; + + /* + * We pull up quals and arrange variable levels for expressions in WHERE + * section only. So, cut the optimization off if an upper relation links + * from another parts of the subquery are detected. + */ + if (contain_vars_of_level((Node *) subselect->cteList, 1) || + /* see comments in subselect.sql */ + contain_vars_of_level((Node *) subselect->rtable, 1) || + contain_vars_of_level((Node *) subselect->targetList, 1) || + contain_vars_of_level((Node *) subselect->returningList, 1) || + contain_vars_of_level((Node *) subselect->groupingSets, 1) || + contain_vars_of_level((Node *) subselect->distinctClause, 1) || + contain_vars_of_level((Node *) subselect->sortClause, 1) || + contain_vars_of_level((Node *) subselect->limitOffset, 1) || + contain_vars_of_level((Node *) subselect->limitCount, 1) || + contain_vars_of_level((Node *) subselect->rowMarks, 1) || + contain_vars_of_level((Node *) subselect->havingQual, 1) || + contain_vars_of_level((Node *) subselect->groupClause, 1)) + return NULL; + + f = subselect->jointree; + + if (!f || !f->quals || !quals_is_pullable(f->quals)) + return NULL; + + if (lowest_outer_join) + safe_upper_varnos = get_relids_in_jointree( + (lowest_outer_join->jointype == JOIN_RIGHT) ? + lowest_outer_join->larg : lowest_outer_join->rarg, true); + + if (safe_upper_varnos && + !bms_is_subset(pull_varnos_of_level(root, f->quals, 1), + safe_upper_varnos)) + return NULL; + + /* + * Now, is proved that it is possible to pull up expressions with variables + * from the upper query. + * Pull up quals, containing correlated expressions. Replace its + * positions with a true boolean expression. + * It would be removed on a next planning stage. + */ + f->quals = pull_subquery_clauses_mutator(f->quals, (void *) &ctx); + + return ctx.pulling_quals; +} /* * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join @@ -1266,7 +1559,7 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context) */ JoinExpr * convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, - Relids available_rels) + Relids available_rels, JoinExpr *lowest_outer_join) { JoinExpr *result; Query *parse = root->parse; @@ -1279,16 +1572,10 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, List *subquery_vars; Node *quals; ParseState *pstate; + List *pclauses = NIL; Assert(sublink->subLinkType == ANY_SUBLINK); - /* - * The sub-select must not refer to any Vars of the parent query. (Vars of - * higher levels should be okay, though.) - */ - if (contain_vars_of_level((Node *) subselect, 1)) - return NULL; - /* * The test expression must contain some Vars of the parent query, else * it's not gonna be a join. (Note that it won't have Vars referring to @@ -1310,6 +1597,17 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, if (contain_volatile_functions(sublink->testexpr)) return NULL; + /* + * The sub-select must not refer to any Vars of the parent query. (Vars of + * higher levels should be okay, though.) + * In the case of correlated subquery, jointree quals structure will be + * modified: expressions with variables from upper query moves to the + * pulled_clauses list, their places in the quals replaces by "true" value. + */ + if (contain_vars_of_level((Node *) subselect, 1) && + (pclauses = pull_correlated_clauses(root, sublink, lowest_outer_join)) == NIL) + return NULL; + /* Create a dummy ParseState for addRangeTableEntryForSubquery */ pstate = make_parsestate(NULL); @@ -1348,6 +1646,20 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, */ quals = convert_testexpr(root, sublink->testexpr, subquery_vars); + /* Nested subquery with references to upper level relation. */ + if (pclauses != NIL) + { + /* Add clauses, pulled from subquery into WHERE section of the parent. */ + if (IsA(quals, BoolExpr)) + { + BoolExpr *b = (BoolExpr *) quals; + b->args = list_concat(b->args, pclauses); + } + else + quals = (Node *) make_andclause( + list_concat(list_make1(quals), pclauses)); + } + /* * And finally, build the JoinExpr node. */ diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 41c7066d90..f72b8b1320 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -62,10 +62,12 @@ typedef struct reduce_outer_joins_state } reduce_outer_joins_state; static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, - Relids *relids); + Relids *relids, + JoinExpr *lowest_outer_join); static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, Node **jtlink1, Relids available_rels1, - Node **jtlink2, Relids available_rels2); + Node **jtlink2, Relids available_rels2, + JoinExpr *lowest_outer_join); static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode, JoinExpr *lowest_outer_join, JoinExpr *lowest_nulling_outer_join, @@ -293,7 +295,7 @@ pull_up_sublinks(PlannerInfo *root) /* Begin recursion through the jointree */ jtnode = pull_up_sublinks_jointree_recurse(root, (Node *) root->parse->jointree, - &relids); + &relids, NULL); /* * root->parse->jointree must always be a FromExpr, so insert a dummy one @@ -313,7 +315,7 @@ pull_up_sublinks(PlannerInfo *root) */ static Node * pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, - Relids *relids) + Relids *relids, JoinExpr *lowest_outer_join) { if (jtnode == NULL) { @@ -343,7 +345,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, newchild = pull_up_sublinks_jointree_recurse(root, lfirst(l), - &childrelids); + &childrelids, + lowest_outer_join); newfromlist = lappend(newfromlist, newchild); frelids = bms_join(frelids, childrelids); } @@ -354,7 +357,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, /* Now process qual --- all children are available for use */ newf->quals = pull_up_sublinks_qual_recurse(root, f->quals, &jtlink, frelids, - NULL, NULL); + NULL, NULL, + lowest_outer_join); /* * Note that the result will be either newf, or a stack of JoinExprs @@ -385,9 +389,11 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, /* Recurse to process children and collect their relids */ j->larg = pull_up_sublinks_jointree_recurse(root, j->larg, - &leftrelids); + &leftrelids, + lowest_outer_join); j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, - &rightrelids); + &rightrelids, + lowest_outer_join); /* * Now process qual, showing appropriate child relids as available, @@ -408,13 +414,14 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, &jtlink, bms_union(leftrelids, rightrelids), - NULL, NULL); + NULL, NULL, + lowest_outer_join); break; case JOIN_LEFT: j->quals = pull_up_sublinks_qual_recurse(root, j->quals, &j->rarg, rightrelids, - NULL, NULL); + NULL, NULL, j); break; case JOIN_FULL: /* can't do anything with full-join quals */ @@ -423,7 +430,7 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, j->quals = pull_up_sublinks_qual_recurse(root, j->quals, &j->larg, leftrelids, - NULL, NULL); + NULL, NULL, j); break; default: elog(ERROR, "unrecognized join type: %d", @@ -468,7 +475,8 @@ pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, static Node * pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, Node **jtlink1, Relids available_rels1, - Node **jtlink2, Relids available_rels2) + Node **jtlink2, Relids available_rels2, + JoinExpr *lowest_outer_join) { if (node == NULL) return NULL; @@ -482,7 +490,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, if (sublink->subLinkType == ANY_SUBLINK) { if ((j = convert_ANY_sublink_to_join(root, sublink, - available_rels1)) != NULL) + available_rels1, + lowest_outer_join)) != NULL) { /* Yes; insert the new join node into the join tree */ j->larg = *jtlink1; @@ -490,7 +499,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, - &child_rels); + &child_rels, j); /* * Now recursively process the pulled-up quals. Any inserted @@ -502,13 +511,15 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, &j->larg, available_rels1, &j->rarg, - child_rels); + child_rels, + j); /* Return NULL representing constant TRUE */ return NULL; } if (available_rels2 != NULL && (j = convert_ANY_sublink_to_join(root, sublink, - available_rels2)) != NULL) + available_rels2, + lowest_outer_join)) != NULL) { /* Yes; insert the new join node into the join tree */ j->larg = *jtlink2; @@ -516,7 +527,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, - &child_rels); + &child_rels, j); /* * Now recursively process the pulled-up quals. Any inserted @@ -528,7 +539,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, &j->larg, available_rels2, &j->rarg, - child_rels); + child_rels, + j); /* Return NULL representing constant TRUE */ return NULL; } @@ -544,7 +556,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, - &child_rels); + &child_rels, j); /* * Now recursively process the pulled-up quals. Any inserted @@ -556,7 +568,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, &j->larg, available_rels1, &j->rarg, - child_rels); + child_rels, + j); /* Return NULL representing constant TRUE */ return NULL; } @@ -570,7 +583,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, - &child_rels); + &child_rels, j); /* * Now recursively process the pulled-up quals. Any inserted @@ -582,7 +595,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, &j->larg, available_rels2, &j->rarg, - child_rels); + child_rels, + j); /* Return NULL representing constant TRUE */ return NULL; } @@ -610,7 +624,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, - &child_rels); + &child_rels, j); /* * Now recursively process the pulled-up quals. Because @@ -622,7 +636,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, j->quals, &j->rarg, child_rels, - NULL, NULL); + NULL, NULL, + j); /* Return NULL representing constant TRUE */ return NULL; } @@ -636,7 +651,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, /* Recursively process pulled-up jointree nodes */ j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg, - &child_rels); + &child_rels, j); /* * Now recursively process the pulled-up quals. Because @@ -648,7 +663,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, j->quals, &j->rarg, child_rels, - NULL, NULL); + NULL, NULL, + j); /* Return NULL representing constant TRUE */ return NULL; } @@ -673,7 +689,8 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, jtlink1, available_rels1, jtlink2, - available_rels2); + available_rels2, + lowest_outer_join); if (newclause) newclauses = lappend(newclauses, newclause); } diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 784a1af82d..5b7aee121f 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -98,7 +98,7 @@ tlist_member(Expr *node, List *targetlist) * This is needed in some cases where we can't be sure of an exact typmod * match. For safety, though, we insist on vartype match. */ -static TargetEntry * +TargetEntry * tlist_member_match_var(Var *var, List *targetlist) { ListCell *temp; diff --git a/src/backend/optimizer/util/var.c b/src/backend/optimizer/util/var.c index 7db86c39ef..54441e692b 100644 --- a/src/backend/optimizer/util/var.c +++ b/src/backend/optimizer/util/var.c @@ -461,6 +461,14 @@ contain_vars_of_level_walker(Node *node, int *sublevels_up) return true; /* abort the tree traversal and return true */ /* else fall through to check the contained expr */ } + if (IsA(node, RangeTblEntry)) + { + RangeTblEntry *rte = (RangeTblEntry *) node; + + /* Someone can call the routine on a field of Query struct */ + return range_table_entry_walker(rte, contain_vars_of_level_walker, + (void *) sublevels_up, 0); + } if (IsA(node, Query)) { /* Recurse into subselects */ diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 05ab087934..55791b474b 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -765,6 +765,16 @@ StaticAssertDecl(lengthof(config_type_names) == (PGC_ENUM + 1), struct config_bool ConfigureNamesBool[] = { + { + {"optimize_correlated_subqueries", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("optimize_correlated_subqueries."), + NULL, + GUC_EXPLAIN | GUC_NOT_IN_SAMPLE + }, + &optimize_correlated_subqueries, + true, + NULL, NULL, NULL + }, { {"enable_seqscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of sequential-scan plans."), diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 409005bae9..cdf3fdce1a 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -88,6 +88,7 @@ extern PGDLLIMPORT double parallel_tuple_cost; extern PGDLLIMPORT double parallel_setup_cost; extern PGDLLIMPORT double recursive_worktable_factor; extern PGDLLIMPORT int effective_cache_size; +extern PGDLLIMPORT bool optimize_correlated_subqueries; extern double clamp_row_est(double nrows); extern long clamp_cardinality_to_long(Cardinality x); diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h index 456d3076e0..4e126e45ee 100644 --- a/src/include/optimizer/subselect.h +++ b/src/include/optimizer/subselect.h @@ -19,7 +19,8 @@ extern void SS_process_ctes(PlannerInfo *root); extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, - Relids available_rels); + Relids available_rels, + JoinExpr *lowest_outer_join); extern JoinExpr *convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, bool under_not, diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index 04668ba1c0..7627e7f679 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -18,6 +18,7 @@ extern TargetEntry *tlist_member(Expr *node, List *targetlist); +extern TargetEntry *tlist_member_match_var(Var *var, List *targetlist); extern List *add_to_flat_tlist(List *tlist, List *exprs); diff --git a/src/test/regress/expected/prepare.out b/src/test/regress/expected/prepare.out index 5815e17b39..749b3faf64 100644 --- a/src/test/regress/expected/prepare.out +++ b/src/test/regress/expected/prepare.out @@ -184,6 +184,24 @@ SELECT name, statement, parameter_types, result_types FROM pg_prepared_statement | UPDATE tenk1 SET stringu1 = $2 WHERE unique1 = $1; | | (6 rows) +-- The optimization on unnesting of correlated subqueries should work +PREPARE q9(name,int) AS + SELECT * FROM tenk1 upper WHERE unique1 IN ( + SELECT sub.unique2 FROM tenk1 sub + WHERE sub.stringu1 = $1 AND sub.unique1 = upper.unique2 + $2); +EXPLAIN (COSTS OFF) EXECUTE q9('abc',2); + QUERY PLAN +----------------------------------------------------- + Nested Loop + -> HashAggregate + Group Key: sub.unique2, sub.unique1 + -> Seq Scan on tenk1 sub + Filter: (stringu1 = 'abc'::name) + -> Index Scan using tenk1_unique1 on tenk1 upper + Index Cond: (unique1 = sub.unique2) + Filter: (sub.unique1 = (unique2 + 2)) +(8 rows) + -- test DEALLOCATE ALL; DEALLOCATE ALL; SELECT name, statement, parameter_types FROM pg_prepared_statements diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 63d26d44fc..1523770984 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -164,6 +164,35 @@ SELECT f1 AS "Correlated Field", f2 AS "Second Field" 3 | 3 (6 rows) +EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN + (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3); + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((upper.f1 = subselect_tbl.f2) AND ((upper.f2)::double precision = subselect_tbl.f3)) + -> Seq Scan on subselect_tbl upper + -> Hash + -> HashAggregate + Group Key: subselect_tbl.f2, subselect_tbl.f3 + -> Seq Scan on subselect_tbl +(7 rows) + +-- Still doesn't work for NOT IN +EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 NOT IN + (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3); + QUERY PLAN +------------------------------------------------------- + Seq Scan on subselect_tbl upper + Filter: (NOT (SubPlan 1)) + SubPlan 1 + -> Seq Scan on subselect_tbl + Filter: ((upper.f2)::double precision = f3) +(5 rows) + SELECT f1 AS "Correlated Field", f3 AS "Second Field" FROM SUBSELECT_TBL upper WHERE f1 IN @@ -177,6 +206,415 @@ SELECT f1 AS "Correlated Field", f3 AS "Second Field" 3 | 3 (5 rows) +-- Constraints, imposed by LATERAL references, prohibit flattening of underlying +-- Sublink. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM SUBSELECT_TBL a + WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN ( + SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2)); + QUERY PLAN +----------------------------------------------- + Aggregate + -> Nested Loop Semi Join + Join Filter: (SubPlan 1) + -> Seq Scan on subselect_tbl a + -> Materialize + -> Seq Scan on subselect_tbl b + SubPlan 1 + -> Seq Scan on subselect_tbl c + Filter: (f1 = a.f2) +(9 rows) + +SELECT count(*) FROM SUBSELECT_TBL a + WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN ( + SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2)); + count +------- + 5 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM SUBSELECT_TBL a + WHERE NOT EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN ( + SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2)); + QUERY PLAN +----------------------------------------------- + Aggregate + -> Nested Loop Anti Join + Join Filter: (SubPlan 1) + -> Seq Scan on subselect_tbl a + -> Materialize + -> Seq Scan on subselect_tbl b + SubPlan 1 + -> Seq Scan on subselect_tbl c + Filter: (f1 = a.f2) +(9 rows) + +-- Prohibit to unnest subquery - quals contain lateral references to rels +-- outside a higher outer join. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN ( + SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2); + QUERY PLAN +--------------------------------------------------------- + Aggregate + -> Nested Loop Left Join + Join Filter: (SubPlan 1) + -> Seq Scan on subselect_tbl a + -> Materialize + -> Seq Scan on subselect_tbl b + SubPlan 1 + -> Seq Scan on subselect_tbl c + Filter: (f3 = (a.f2)::double precision) +(9 rows) + +SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN ( + SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2); + count +------- + 18 +(1 row) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN + (SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f1) +; -- Optimizer removes excess clause + QUERY PLAN +----------------------------------------------- + Hash Join + Hash Cond: (a.f1 = b.f2) + -> Seq Scan on subselect_tbl a + -> Hash + -> HashAggregate + Group Key: b.f2, b.f2 + -> Seq Scan on subselect_tbl b +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE a.f1 IN (SELECT b.f2*b.f1/b.f3+2 FROM subselect_tbl b WHERE b.f3 = a.f2) +; -- a bit more complex targetlist expression shouldn't cut off the optimization + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------- + Hash Join + Hash Cond: (((a.f1)::double precision = ((((b.f2 * b.f1))::double precision / b.f3) + '2'::double precision)) AND ((a.f2)::double precision = b.f3)) + -> Seq Scan on subselect_tbl a + -> Hash + -> HashAggregate + Group Key: ((((b.f2 * b.f1))::double precision / b.f3) + '2'::double precision), b.f3 + -> Seq Scan on subselect_tbl b +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1 FROM subselect_tbl b WHERE b.f3 = a.f2) +; -- Two variables in a target list + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((a.f1 = b.f2) AND (a.f3 = (b.f1)::double precision) AND ((a.f2)::double precision = b.f3)) + -> Seq Scan on subselect_tbl a + -> Hash + -> HashAggregate + Group Key: b.f2, (b.f1)::double precision, b.f3 + -> Seq Scan on subselect_tbl b +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1*2 FROM subselect_tbl b WHERE b.f3 = a.f2) +; -- Expression as an element of composite type shouldn't cut off the optimization + QUERY PLAN +---------------------------------------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((a.f1 = b.f2) AND (a.f3 = ((b.f1 * 2))::double precision) AND ((a.f2)::double precision = b.f3)) + -> Seq Scan on subselect_tbl a + -> Hash + -> HashAggregate + Group Key: b.f2, ((b.f1 * 2))::double precision, b.f3 + -> Seq Scan on subselect_tbl b +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f1 = b.f3 AND b.f3 <> 12) +; -- Two expressions with correlated variables + QUERY PLAN +------------------------------------------------------------------------- + Hash Join + Hash Cond: ((b.f2 = a.f1) AND (b.f3 = (a.f2)::double precision)) + -> HashAggregate + Group Key: b.f2, b.f3, b.f3 + -> Seq Scan on subselect_tbl b + Filter: (f3 <> '12'::double precision) + -> Hash + -> Seq Scan on subselect_tbl a + Filter: ((f2)::double precision = (f1)::double precision) +(9 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f2 = b.f3 AND b.f1 < 12) +; -- Two expressions with correlated variables relates on one upper variable. + QUERY PLAN +-------------------------------------------------------------------- + Hash Join + Hash Cond: ((a.f1 = b.f2) AND ((a.f2)::double precision = b.f3)) + -> Seq Scan on subselect_tbl a + -> Hash + -> HashAggregate + Group Key: b.f2, b.f3, b.f3 + -> Seq Scan on subselect_tbl b + Filter: (f1 < 12) +(8 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + GROUP BY (b.f2) + ) +; -- Pull clauses without unnesting the query. XXX: It reduces performance in most use cases, doesn't it? + QUERY PLAN +----------------------------------------- + Hash Join + Hash Cond: (b.f2 = a.f1) + -> HashAggregate + Group Key: b.f2 + -> Seq Scan on subselect_tbl b + Filter: (f2 < 12) + -> Hash + -> Seq Scan on subselect_tbl a + Filter: (f1 = f2) +(9 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f2 FROM ( + SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,10) x + ) b WHERE b.f2 = a.f2 AND b.f1 BETWEEN 12 AND 14 + UNION ALL + SELECT c.f1 FROM subselect_tbl c + WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14 + ) +; -- Disallow flattening of union all + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> Append + -> Function Scan on generate_series x + Filter: ((x >= 12) AND (x <= 14) AND ((x + 1) = a.f2)) + -> Seq Scan on subselect_tbl c + Filter: ((f1 >= 12) AND (f1 <= 14) AND (f2 = a.f2)) +(8 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f1 FROM subselect_tbl b JOIN subselect_tbl c ON (b.f1 = c.f2) + WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14 + ) +; -- XXX: Could we flatten such subquery? + QUERY PLAN +--------------------------------------------------------------------- + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> Nested Loop + -> Seq Scan on subselect_tbl c + Filter: ((f1 >= 12) AND (f1 <= 14) AND (f2 = a.f2)) + -> Seq Scan on subselect_tbl b + Filter: (f1 = a.f2) +(8 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f1 FROM subselect_tbl b, subselect_tbl c + WHERE b.f1 = c.f2 AND c.f2 = a.f2 AND c.f1 IS NOT NULL + ) +; -- TODO: Could we flatten such subquery? + QUERY PLAN +------------------------------------------------------------------ + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> Nested Loop + -> Seq Scan on subselect_tbl b + Filter: (f1 = a.f2) + -> Materialize + -> Seq Scan on subselect_tbl c + Filter: ((f1 IS NOT NULL) AND (f2 = a.f2)) +(9 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE (a.f1,f2) IN ( + SELECT b.f2, avg(f3) FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + GROUP BY (b.f2) + ) +; -- Doesn't support unnesting with aggregate functions + QUERY PLAN +----------------------------------------------------- + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> GroupAggregate + Group Key: b.f2 + -> Seq Scan on subselect_tbl b + Filter: ((f2 < 12) AND (f2 = a.f2)) +(7 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + WITH cte AS ( + SELECT * FROM subselect_tbl c WHERE f1 < 42 AND f2 = a.f1 + ) + SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12 + ) +; -- Give up optimization if CTE in subquery contains links to upper relation. + QUERY PLAN +------------------------------------------------------------------- + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> Result + One-Time Filter: (a.f1 = a.f2) + -> Seq Scan on subselect_tbl c + Filter: ((f1 < 42) AND (f2 < 12) AND (f2 = a.f2)) +(7 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + WITH cte AS ( + SELECT * FROM subselect_tbl c WHERE f1 < 42 + ) + SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12 + ) +; -- Correlated subquery with trivial CTE can be pulled up + QUERY PLAN +------------------------------------------------- + Hash Semi Join + Hash Cond: (a.f1 = c.f2) + -> Seq Scan on subselect_tbl a + Filter: (f1 = f2) + -> Hash + -> Seq Scan on subselect_tbl c + Filter: ((f1 < 42) AND (f2 < 12)) +(7 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE (a.f1,a.f3) IN ( + SELECT b.f2, avg(b.f3) OVER (PARTITION BY b.f2) + FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + ) +; -- Doesn't support unnesting with window functions in target list + QUERY PLAN +----------------------------------------------------- + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> WindowAgg + -> Seq Scan on subselect_tbl b + Filter: ((f2 < 12) AND (f2 = a.f2)) +(6 rows) + +-- A having qual, group clause and so on, with links to upper relation variable +-- cut off the optimization because another case we must rewrite the subquery +-- as a lateral TargetEntry and arrange these links. +-- But now, machinery of convert_ANY_sublink_to_join() isn't prepared for such +-- complex work and it would induce additional complex code. +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f2 + FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + GROUP BY (b.f2) HAVING b.f2 > a.f3 + ) +; + QUERY PLAN +----------------------------------------------------------------------------------------- + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> Group + Group Key: b.f2 + -> Seq Scan on subselect_tbl b + Filter: ((f2 < 12) AND (f2 = a.f2) AND ((f2)::double precision > a.f3)) +(7 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f2 FROM ( + SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,a.f1) x + ) b WHERE b.f2 = a.f2 AND b.f1 < 12 + ) +; -- Don't allow links to upper query in FROM section of subquery + QUERY PLAN +--------------------------------------------------- + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> Function Scan on generate_series x + Filter: ((x < 12) AND ((x + 1) = a.f2)) +(5 rows) + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT a.f1 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + GROUP BY (a.f1) + ) +; -- GROUP BY contains link to upper relation + QUERY PLAN +----------------------------------------------------- + Seq Scan on subselect_tbl a + Filter: (SubPlan 1) + SubPlan 1 + -> Group + Group Key: a.f1 + -> Seq Scan on subselect_tbl b + Filter: ((f2 < 12) AND (f2 = a.f2)) +(7 rows) + +-- Flatten subquery with not-correlated clauses. The same result set returned +EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN + (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 42); + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Hash Join + Hash Cond: ((upper.f1 = subselect_tbl.f2) AND ((upper.f2)::double precision = subselect_tbl.f3)) + -> Seq Scan on subselect_tbl upper + -> Hash + -> HashAggregate + Group Key: subselect_tbl.f2, subselect_tbl.f3 + -> Seq Scan on subselect_tbl + Filter: (f2 <> 42) +(8 rows) + +SELECT f1 AS "Correlated Field", f3 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN + (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 42); + Correlated Field | Second Field +------------------+-------------- + 2 | 4 + 3 | 5 + 1 | 1 + 2 | 2 + 3 | 3 +(5 rows) + SELECT f1 AS "Correlated Field", f3 AS "Second Field" FROM SUBSELECT_TBL upper WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL diff --git a/src/test/regress/sql/prepare.sql b/src/test/regress/sql/prepare.sql index c6098dc95c..8a8164ee99 100644 --- a/src/test/regress/sql/prepare.sql +++ b/src/test/regress/sql/prepare.sql @@ -78,6 +78,13 @@ PREPARE q8 AS SELECT name, statement, parameter_types, result_types FROM pg_prepared_statements ORDER BY name; +-- The optimization on unnesting of correlated subqueries should work +PREPARE q9(name,int) AS + SELECT * FROM tenk1 upper WHERE unique1 IN ( + SELECT sub.unique2 FROM tenk1 sub + WHERE sub.stringu1 = $1 AND sub.unique1 = upper.unique2 + $2); +EXPLAIN (COSTS OFF) EXECUTE q9('abc',2); + -- test DEALLOCATE ALL; DEALLOCATE ALL; SELECT name, statement, parameter_types FROM pg_prepared_statements diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 40276708c9..40ed61d508 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -67,11 +67,173 @@ SELECT f1 AS "Correlated Field", f2 AS "Second Field" FROM SUBSELECT_TBL upper WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE f1 = upper.f1); +EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN + (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3); +-- Still doesn't work for NOT IN +EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 NOT IN + (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3); + SELECT f1 AS "Correlated Field", f3 AS "Second Field" FROM SUBSELECT_TBL upper WHERE f1 IN (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3); +-- Constraints, imposed by LATERAL references, prohibit flattening of underlying +-- Sublink. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM SUBSELECT_TBL a + WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN ( + SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2)); +SELECT count(*) FROM SUBSELECT_TBL a + WHERE EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN ( + SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2)); +EXPLAIN (COSTS OFF) +SELECT count(*) FROM SUBSELECT_TBL a + WHERE NOT EXISTS (SELECT f2 FROM SUBSELECT_TBL b WHERE f3 IN ( + SELECT f3 FROM SUBSELECT_TBL c WHERE c.f1 = a.f2)); + +-- Prohibit to unnest subquery - quals contain lateral references to rels +-- outside a higher outer join. +EXPLAIN (COSTS OFF) +SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN ( + SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2); +SELECT count(*) FROM SUBSELECT_TBL a LEFT JOIN SUBSELECT_TBL b ON b.f1 IN ( + SELECT c.f2 FROM SUBSELECT_TBL c WHERE c.f3 = a.f2); + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN + (SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f1) +; -- Optimizer removes excess clause +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE a.f1 IN (SELECT b.f2*b.f1/b.f3+2 FROM subselect_tbl b WHERE b.f3 = a.f2) +; -- a bit more complex targetlist expression shouldn't cut off the optimization +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1 FROM subselect_tbl b WHERE b.f3 = a.f2) +; -- Two variables in a target list +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE (a.f1,a.f3) IN (SELECT b.f2,b.f1*2 FROM subselect_tbl b WHERE b.f3 = a.f2) +; -- Expression as an element of composite type shouldn't cut off the optimization +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f1 = b.f3 AND b.f3 <> 12) +; -- Two expressions with correlated variables +EXPLAIN (COSTS OFF) +SELECT * FROM subselect_tbl a +WHERE a.f1 IN (SELECT b.f2 FROM subselect_tbl b WHERE b.f3 = a.f2 AND a.f2 = b.f3 AND b.f1 < 12) +; -- Two expressions with correlated variables relates on one upper variable. +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f2 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + GROUP BY (b.f2) + ) +; -- Pull clauses without unnesting the query. XXX: It reduces performance in most use cases, doesn't it? + +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f2 FROM ( + SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,10) x + ) b WHERE b.f2 = a.f2 AND b.f1 BETWEEN 12 AND 14 + UNION ALL + SELECT c.f1 FROM subselect_tbl c + WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14 + ) +; -- Disallow flattening of union all +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f1 FROM subselect_tbl b JOIN subselect_tbl c ON (b.f1 = c.f2) + WHERE c.f2 = a.f2 AND c.f1 BETWEEN 12 AND 14 + ) +; -- XXX: Could we flatten such subquery? +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f1 FROM subselect_tbl b, subselect_tbl c + WHERE b.f1 = c.f2 AND c.f2 = a.f2 AND c.f1 IS NOT NULL + ) +; -- TODO: Could we flatten such subquery? +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE (a.f1,f2) IN ( + SELECT b.f2, avg(f3) FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + GROUP BY (b.f2) + ) +; -- Doesn't support unnesting with aggregate functions +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + WITH cte AS ( + SELECT * FROM subselect_tbl c WHERE f1 < 42 AND f2 = a.f1 + ) + SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12 + ) +; -- Give up optimization if CTE in subquery contains links to upper relation. +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + WITH cte AS ( + SELECT * FROM subselect_tbl c WHERE f1 < 42 + ) + SELECT b.f2 FROM cte b WHERE b.f2 = a.f2 AND b.f2 < 12 + ) +; -- Correlated subquery with trivial CTE can be pulled up +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE (a.f1,a.f3) IN ( + SELECT b.f2, avg(b.f3) OVER (PARTITION BY b.f2) + FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + ) +; -- Doesn't support unnesting with window functions in target list + +-- A having qual, group clause and so on, with links to upper relation variable +-- cut off the optimization because another case we must rewrite the subquery +-- as a lateral TargetEntry and arrange these links. +-- But now, machinery of convert_ANY_sublink_to_join() isn't prepared for such +-- complex work and it would induce additional complex code. +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f2 + FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + GROUP BY (b.f2) HAVING b.f2 > a.f3 + ) +; +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT b.f2 FROM ( + SELECT x AS f1, x+1 AS f2, x+2 AS f3 FROM generate_series(1,a.f1) x + ) b WHERE b.f2 = a.f2 AND b.f1 < 12 + ) +; -- Don't allow links to upper query in FROM section of subquery +EXPLAIN (COSTS OFF) + SELECT * FROM subselect_tbl a + WHERE a.f1 IN ( + SELECT a.f1 FROM subselect_tbl b WHERE b.f2 = a.f2 AND b.f2 < 12 + GROUP BY (a.f1) + ) +; -- GROUP BY contains link to upper relation + +-- Flatten subquery with not-correlated clauses. The same result set returned +EXPLAIN (COSTS OFF) SELECT f1 AS "Correlated Field", f3 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN + (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 42); +SELECT f1 AS "Correlated Field", f3 AS "Second Field" + FROM SUBSELECT_TBL upper + WHERE f1 IN + (SELECT f2 FROM SUBSELECT_TBL WHERE CAST(upper.f2 AS float) = f3 AND f2 <> 42); + SELECT f1 AS "Correlated Field", f3 AS "Second Field" FROM SUBSELECT_TBL upper WHERE f3 IN (SELECT upper.f1 + f2 FROM SUBSELECT_TBL diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 97c9bc1861..4803e7a978 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -470,6 +470,7 @@ CopySource CopyStmt CopyToState CopyToStateData +correlated_t Cost CostSelector Counters -- 2.37.3