From 3d31893eeb3ed96c7c9292c6961a0455c2a3af4c Mon Sep 17 00:00:00 2001 From: "Andrei V. Lepikhov" Date: Mon, 23 Sep 2024 14:30:28 +0200 Subject: [PATCH v2] Stabilize incremental sort estimation. Carefully identify an expression that can represent the path key on sort estimation. Columns may have different distinct estimations that can trigger wrong cost estimations and choice of sort strategy. Sorting has only pathkeys as input for the cost estimation. This patch, instead of a blind choice of the first equivalence class member, attempts to find columns that are used under the estimating sort operator and chooses the most negligible ndistinct value. --- src/backend/optimizer/path/costsize.c | 52 ++++++++++++++++++++++++--- src/backend/optimizer/util/pathnode.c | 1 + src/include/optimizer/cost.h | 1 + 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index e1523d15df..9c08f6b80c 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1984,6 +1984,50 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost, *run_cost = cpu_operator_cost * tuples; } +static Expr * +identify_sort_expression(PlannerInfo *root, PathKey *key, Relids relids) +{ + EquivalenceMember *candidate = linitial(key->pk_eclass->ec_members); + double ndistinct = -1.0; /* Not defined */ + + if (root == NULL || list_length(key->pk_eclass->ec_members) == 1) + /* Fast path */ + return candidate->em_expr; + + foreach_node(EquivalenceMember, em, key->pk_eclass->ec_members) + { + VariableStatData vardata; + bool isdefault = true; + double ndist; + + /* Sorting over single partition? */ + if (em->em_is_child && bms_num_members(relids) > 1) + continue; + + if (!bms_is_subset(em->em_relids, relids)) + /* + * Avoid expressions not based on a table column. At least, the + * candidate value already initialised as the first EC member. + */ + continue; + + /* Let's check candidate's ndistinct value */ + examine_variable(root, (Node *) em->em_expr, 0, &vardata); + if (HeapTupleIsValid(vardata.statsTuple)) + ndist = get_variable_numdistinct(&vardata, &isdefault); + ReleaseVariableStats(vardata); + + if (ndistinct < 0.0 || (!isdefault && ndist < ndistinct)) + { + candidate = em; + ndistinct = ndist; + } + } + + Assert(candidate != NULL); + return candidate->em_expr; +} + /* * cost_incremental_sort * Determines and returns the cost of sorting a relation incrementally, when @@ -1999,6 +2043,7 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost, void cost_incremental_sort(Path *path, PlannerInfo *root, List *pathkeys, int presorted_keys, + Relids relids, int input_disabled_nodes, Cost input_startup_cost, Cost input_total_cost, double input_tuples, int width, Cost comparison_cost, int sort_mem, @@ -2053,21 +2098,20 @@ cost_incremental_sort(Path *path, foreach(l, pathkeys) { PathKey *key = (PathKey *) lfirst(l); - EquivalenceMember *member = (EquivalenceMember *) - linitial(key->pk_eclass->ec_members); + Expr *expr = identify_sort_expression(root, key, relids); /* * Check if the expression contains Var with "varno 0" so that we * don't call estimate_num_groups in that case. */ - if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr))) + if (bms_is_member(0, pull_varnos(root, (Node *) expr))) { unknown_varno = true; break; } /* expression not containing any Vars with "varno 0" */ - presortedExprs = lappend(presortedExprs, member->em_expr); + presortedExprs = lappend(presortedExprs, expr); if (foreach_current_index(l) + 1 >= presorted_keys) break; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index fc97bf6ee2..8ed566b1bc 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3055,6 +3055,7 @@ create_incremental_sort_path(PlannerInfo *root, cost_incremental_sort(&pathnode->path, root, pathkeys, presorted_keys, + subpath->parent->relids, subpath->disabled_nodes, subpath->startup_cost, subpath->total_cost, diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index 854a782944..ebd5f2c5d9 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -114,6 +114,7 @@ extern void cost_sort(Path *path, PlannerInfo *root, double limit_tuples); extern void cost_incremental_sort(Path *path, PlannerInfo *root, List *pathkeys, int presorted_keys, + Relids relids, int input_disabled_nodes, Cost input_startup_cost, Cost input_total_cost, double input_tuples, int width, Cost comparison_cost, int sort_mem, -- 2.46.1