diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index afb5445b77..9b1d6f4e43 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -253,6 +253,20 @@ can_minmax_aggs(PlannerInfo *root, List **context) if (list_length(aggref->args) != 1) return false; /* it couldn't be MIN/MAX */ + /* + * We might implement the optimization when a FILTER clause is present + * by adding the filter to the quals of the generated subquery. For + * now, just punt. + */ + if (aggref->aggfilter != NULL) + return false; + + curTarget = (TargetEntry *) linitial(aggref->args); + + aggsortop = fetch_agg_sort_op(aggref->aggfnoid); + if (!OidIsValid(aggsortop)) + return false; /* not a MIN/MAX aggregate */ + /* * ORDER BY is usually irrelevant for MIN/MAX, but it can change the * outcome if the aggsortop's operator class recognizes non-identical @@ -267,22 +281,55 @@ can_minmax_aggs(PlannerInfo *root, List **context) * quickly. */ if (aggref->aggorder != NIL) - return false; - /* note: we do not care if DISTINCT is mentioned ... */ - - /* - * We might implement the optimization when a FILTER clause is present - * by adding the filter to the quals of the generated subquery. For - * now, just punt. - */ - if (aggref->aggfilter != NULL) - return false; + { + SortGroupClause *orderClause; + + /* + * If the order clause is the same column as the one we're + * aggregating, we can still use the index: It is undefined which + * value is MIN() or MAX(), as well as which value is first or + * last when sorted. So, we can still use the index IFF the + * aggregated expression equals the expression used in the + * ordering operation. + */ + + /* + * We only accept a single argument to min/max aggregates, + * orderings that have more clauses won't provide correct results. + */ + Assert(list_length(aggref->aggorder) == 1); + + orderClause = castNode(SortGroupClause, linitial(aggref->aggorder)); + + Assert(orderClause->tleSortGroupRef == curTarget->ressortgroupref); + + if (orderClause->sortop != aggsortop) + { + List *btclasses; + ListCell *cell; + bool match = false; + + btclasses = get_op_btree_interpretation(orderClause->sortop); + + foreach(cell, btclasses) + { + OpBtreeInterpretation *interpretation; + interpretation = (OpBtreeInterpretation *) lfirst(cell); + + if (op_in_opfamily(aggsortop, interpretation->opfamily_id)) + { + match = true; + break; + } + } + + if (!match) + return false; + } + } - aggsortop = fetch_agg_sort_op(aggref->aggfnoid); - if (!OidIsValid(aggsortop)) - return false; /* not a MIN/MAX aggregate */ + /* note: we do not care if DISTINCT is mentioned ... */ - curTarget = (TargetEntry *) linitial(aggref->args); if (contain_mutable_functions((Node *) curTarget->expr)) return false; /* not potentially indexable */ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index a5596ab210..94c8cf77f8 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1003,6 +1003,124 @@ select max(unique1) from tenk1 where unique1 > 42; 9999 (1 row) +-- When sorting on the column that's being aggregated, indexes can also be +-- used, but only when the aggregate's operator has the same ordering behavior +-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one +-- chosen for the ORDER BY clause. +explain (costs off) + select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1; + QUERY PLAN +------------------------------------------------------------ + Result + InitPlan 1 + -> Limit + -> Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 IS NOT NULL) +(5 rows) + +select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1; + min +----- + 0 +(1 row) + +explain (costs off) + select max(unique1 ORDER BY unique1 USING <) from tenk1; + QUERY PLAN +--------------------------------------------------------------------- + Result + InitPlan 1 + -> Limit + -> Index Only Scan Backward using tenk1_unique1 on tenk1 + Index Cond: (unique1 IS NOT NULL) +(5 rows) + +select max(unique1 ORDER BY unique1 USING <) from tenk1; + max +------ + 9999 +(1 row) + +explain (costs off) + select max(unique1 ORDER BY tenthous) from tenk1; + QUERY PLAN +------------------------------- + Aggregate + -> Sort + Sort Key: tenthous + -> Seq Scan on tenk1 +(4 rows) + +select max(unique1 ORDER BY tenthous) from tenk1; + max +------ + 9999 +(1 row) + +CREATE OPERATOR @>@ (function=int4gt, leftarg=int4, rightarg=int4); +CREATE OPERATOR @<@ (function=int4lt, leftarg=int4, rightarg=int4); +CREATE OPERATOR @=@ (function=int4eq, leftarg=int4, rightarg=int4); +CREATE OPERATOR FAMILY my_family USING btree; +CREATE OPERATOR CLASS my_int_ops + FOR TYPE int + USING btree FAMILY my_family AS + OPERATOR 1 @<@ FOR SEARCH, + OPERATOR 5 @>@ FOR SEARCH, + OPERATOR 3 @=@, + FUNCTION 1 btint4cmp; +CREATE AGGREGATE my_int_max ( + BASETYPE = int4, + SFUNC = int4larger, + STYPE = int4, + SORTOP = @>@ +); +CREATE INDEX idx_int4 ON tenk1 (unique1 my_int_ops); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT my_int_max(unique1::int4 ORDER BY unique1::int4 USING @<@ ) from tenk1; + QUERY PLAN +----------------------------------------------------------------------- + Result + Output: (InitPlan 1).col1 + InitPlan 1 + -> Limit + Output: tenk1.unique1 + -> Index Only Scan Backward using idx_int4 on public.tenk1 + Output: tenk1.unique1 + Index Cond: (tenk1.unique1 IS NOT NULL) +(8 rows) + +EXPLAIN (VERBOSE, COSTS OFF) +SELECT my_int_max(unique1::int4 ORDER BY unique1::int4 USING @>@ ) from tenk1; + QUERY PLAN +----------------------------------------------------------------------- + Result + Output: (InitPlan 1).col1 + InitPlan 1 + -> Limit + Output: tenk1.unique1 + -> Index Only Scan Backward using idx_int4 on public.tenk1 + Output: tenk1.unique1 + Index Cond: (tenk1.unique1 IS NOT NULL) +(8 rows) + +DROP AGGREGATE my_int_max(int4); +DROP OPERATOR CLASS my_int_ops USING btree CASCADE; +NOTICE: drop cascades to index idx_int4 +DROP OPERATOR FAMILY my_family USING btree; +DROP OPERATOR @>@ (int4, int4); +DROP OPERATOR @<@ (int4, int4); +DROP OPERATOR @=@ (int4, int4); +-- But even then, the index can't be used if we order by multiple columns. +explain (costs off) + select max(unique1 ORDER BY unique1, tenthous) from tenk1; + QUERY PLAN +------------------------------------- + Aggregate + -> Sort + Sort Key: unique1, tenthous + -> Seq Scan on tenk1 +(4 rows) + -- the planner may choose a generic aggregate here if parallel query is -- enabled, since that plan will be parallel safe and the "optimized" -- plan, which has almost identical cost, will not be. we want to test diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index ca6d1bcfb7..19729ae5f2 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -368,6 +368,57 @@ explain (costs off) select max(unique1) from tenk1 where unique1 > 42; select max(unique1) from tenk1 where unique1 > 42; +-- When sorting on the column that's being aggregated, indexes can also be +-- used, but only when the aggregate's operator has the same ordering behavior +-- as the ORDER BY-clause, i.e. if it is in the same btree opclass as the one +-- chosen for the ORDER BY clause. +explain (costs off) + select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1; +select min(unique1 ORDER BY unique1 ASC NULLS LAST) from tenk1; +explain (costs off) + select max(unique1 ORDER BY unique1 USING <) from tenk1; +select max(unique1 ORDER BY unique1 USING <) from tenk1; +explain (costs off) + select max(unique1 ORDER BY tenthous) from tenk1; +select max(unique1 ORDER BY tenthous) from tenk1; + +CREATE OPERATOR @>@ (function=int4gt, leftarg=int4, rightarg=int4); +CREATE OPERATOR @<@ (function=int4lt, leftarg=int4, rightarg=int4); +CREATE OPERATOR @=@ (function=int4eq, leftarg=int4, rightarg=int4); + +CREATE OPERATOR FAMILY my_family USING btree; +CREATE OPERATOR CLASS my_int_ops + FOR TYPE int + USING btree FAMILY my_family AS + OPERATOR 1 @<@ FOR SEARCH, + OPERATOR 5 @>@ FOR SEARCH, + OPERATOR 3 @=@, + FUNCTION 1 btint4cmp; + +CREATE AGGREGATE my_int_max ( + BASETYPE = int4, + SFUNC = int4larger, + STYPE = int4, + SORTOP = @>@ +); + +CREATE INDEX idx_int4 ON tenk1 (unique1 my_int_ops); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT my_int_max(unique1::int4 ORDER BY unique1::int4 USING @<@ ) from tenk1; +EXPLAIN (VERBOSE, COSTS OFF) +SELECT my_int_max(unique1::int4 ORDER BY unique1::int4 USING @>@ ) from tenk1; + +DROP AGGREGATE my_int_max(int4); +DROP OPERATOR CLASS my_int_ops USING btree CASCADE; +DROP OPERATOR FAMILY my_family USING btree; +DROP OPERATOR @>@ (int4, int4); +DROP OPERATOR @<@ (int4, int4); +DROP OPERATOR @=@ (int4, int4); + +-- But even then, the index can't be used if we order by multiple columns. +explain (costs off) + select max(unique1 ORDER BY unique1, tenthous) from tenk1; + -- the planner may choose a generic aggregate here if parallel query is -- enabled, since that plan will be parallel safe and the "optimized" -- plan, which has almost identical cost, will not be. we want to test