From 6f4096b3a036f2faa9049e910a9a14db12a48753 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Wed, 4 Mar 2020 15:57:25 +0100 Subject: [PATCH 05/11] Apply multi-column MCV lists to ScalarArrayOpExpr Commit ca77b08772 added handling of ScalarArrayOpExpr to dependencies, but there's no good reason not to support them in MCV lists too. In fact, MCV lists can handle all cases, including inequalities and ALL. --- src/backend/statistics/extended_stats.c | 66 ++++++++++- src/backend/statistics/mcv.c | 111 +++++++++++++++++- .../statistics/extended_stats_internal.h | 4 +- src/test/regress/expected/stats_ext.out | 60 ++++++++++ src/test/regress/sql/stats_ext.sql | 20 ++++ 5 files changed, 252 insertions(+), 9 deletions(-) diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 03e69d057f..159ec7f178 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -993,7 +993,63 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause, return false; /* Check if the expression the right shape (one Var, one Const) */ - if (!examine_opclause_expression(expr, &var, NULL, NULL)) + if (!examine_clause_args(expr->args, &var, NULL, NULL)) + return false; + + /* + * If it's not one of the supported operators ("=", "<", ">", etc.), + * just ignore the clause, as it's not compatible with MCV lists. + * + * This uses the function for estimating selectivity, not the operator + * directly (a bit awkward, but well ...). + */ + switch (get_oprrest(expr->opno)) + { + case F_EQSEL: + case F_NEQSEL: + case F_SCALARLTSEL: + case F_SCALARLESEL: + case F_SCALARGTSEL: + case F_SCALARGESEL: + /* supported, will continue with inspection of the Var */ + break; + + default: + /* other estimators are considered unknown/unsupported */ + return false; + } + + /* + * If there are any securityQuals on the RTE from security barrier + * views or RLS policies, then the user may not have access to all the + * table's data, and we must check that the operator is leak-proof. + * + * If the operator is leaky, then we must ignore this clause for the + * purposes of estimating with MCV lists, otherwise the operator might + * reveal values from the MCV list that the user doesn't have + * permission to see. + */ + if (rte->securityQuals != NIL && + !get_func_leakproof(get_opcode(expr->opno))) + return false; + + return statext_is_compatible_clause_internal(root, (Node *) var, + relid, attnums); + } + + /* Var IN Array */ + if (IsA(clause, ScalarArrayOpExpr)) + { + RangeTblEntry *rte = root->simple_rte_array[relid]; + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; + Var *var; + + /* Only expressions with two arguments are considered compatible. */ + if (list_length(expr->args) != 2) + return false; + + /* Check if the expression the right shape (one Var, one Const) */ + if (!examine_clause_args(expr->args, &var, NULL, NULL)) return false; /* @@ -1395,7 +1451,7 @@ statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, * on which side of the operator we found the Var node. */ bool -examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonleftp) +examine_clause_args(List *args, Var **varp, Const **cstp, bool *varonleftp) { Var *var; Const *cst; @@ -1404,10 +1460,10 @@ examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool *varonl *rightop; /* enforced by statext_is_compatible_clause_internal */ - Assert(list_length(expr->args) == 2); + Assert(list_length(args) == 2); - leftop = linitial(expr->args); - rightop = lsecond(expr->args); + leftop = linitial(args); + rightop = lsecond(args); /* strip RelabelType from either side of the expression */ if (IsA(leftop, RelabelType)) diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 87e232fdd4..3147d8fedc 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -1579,7 +1579,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, OpExpr *expr = (OpExpr *) clause; FmgrInfo opproc; - /* valid only after examine_opclause_expression returns true */ + /* valid only after examine_clause_args returns true */ Var *var; Const *cst; bool varonleft; @@ -1587,7 +1587,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, fmgr_info(get_opcode(expr->opno), &opproc); /* extract the var and const from the expression */ - if (examine_opclause_expression(expr, &var, &cst, &varonleft)) + if (examine_clause_args(expr->args, &var, &cst, &varonleft)) { int idx; @@ -1652,6 +1652,113 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, } } } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; + FmgrInfo opproc; + + /* valid only after examine_clause_args returns true */ + Var *var; + Const *cst; + bool varonleft; + + fmgr_info(get_opcode(expr->opno), &opproc); + + /* extract the var and const from the expression */ + if (examine_clause_args(expr->args, &var, &cst, &varonleft)) + { + int idx; + + ArrayType *arrayval; + int16 elmlen; + bool elmbyval; + char elmalign; + int num_elems; + Datum *elem_values; + bool *elem_nulls; + + /* ScalarArrayOpExpr has the Var always on the left */ + Assert(varonleft); + + if (!cst->constisnull) + { + arrayval = DatumGetArrayTypeP(cst->constvalue); + get_typlenbyvalalign(ARR_ELEMTYPE(arrayval), + &elmlen, &elmbyval, &elmalign); + deconstruct_array(arrayval, + ARR_ELEMTYPE(arrayval), + elmlen, elmbyval, elmalign, + &elem_values, &elem_nulls, &num_elems); + } + + /* match the attribute to a dimension of the statistic */ + idx = bms_member_index(keys, var->varattno); + + /* + * Walk through the MCV items and evaluate the current clause. + * We can skip items that were already ruled out, and + * terminate if there are no remaining MCV items that might + * possibly match. + */ + for (i = 0; i < mcvlist->nitems; i++) + { + int j; + bool match = (expr->useOr ? false : true); + MCVItem *item = &mcvlist->items[i]; + + /* + * When the MCV item or the Const value is NULL we can treat + * this as a mismatch. We must not call the operator because + * of strictness. + */ + if (item->isnull[idx] || cst->constisnull) + { + matches[i] = RESULT_MERGE(matches[i], is_or, false); + continue; + } + + /* + * Skip MCV items that can't change result in the bitmap. + * Once the value gets false for AND-lists, or true for + * OR-lists, we don't need to look at more clauses. + */ + if (RESULT_IS_FINAL(matches[i], is_or)) + continue; + + for (j = 0; j < num_elems; j++) + { + Datum elem_value = elem_values[j]; + bool elem_isnull = elem_nulls[j]; + bool elem_match; + + /* NULL values always evaluate as not matching. */ + if (elem_isnull) + { + match = RESULT_MERGE(match, expr->useOr, false); + continue; + } + + /* + * Stop evaluating the array elements once we reach + * match value that can't change - ALL() is the same + * as AND-list, ANY() is the same as OR-list. + */ + if (RESULT_IS_FINAL(match, expr->useOr)) + break; + + elem_match = DatumGetBool(FunctionCall2Coll(&opproc, + var->varcollid, + item->values[idx], + elem_value)); + + match = RESULT_MERGE(match, expr->useOr, elem_match); + } + + /* update the match bitmap with the result */ + matches[i] = RESULT_MERGE(matches[i], is_or, match); + } + } + } else if (IsA(clause, NullTest)) { NullTest *expr = (NullTest *) clause; diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index b512ee908a..2b14ab238c 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -96,8 +96,8 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc, MultiSortSupport mss, int numattrs, AttrNumber *attnums); -extern bool examine_opclause_expression(OpExpr *expr, Var **varp, - Const **cstp, bool *varonleftp); +extern bool examine_clause_args(List *args, Var **varp, + Const **cstp, bool *varonleftp); extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat, diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 6a628f5680..9fa659c71d 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -827,6 +827,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' 343 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + estimated | actual +-----------+-------- + 26 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 10 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 1 | 100 +(1 row) + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; ANALYZE mcv_lists; @@ -872,6 +902,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' 200 | 200 (1 row) +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + estimated | actual +-----------+-------- + 200 | 200 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + estimated | actual +-----------+-------- + 150 | 150 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + estimated | actual +-----------+-------- + 100 | 100 +(1 row) + -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); estimated | actual diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 3de2be500a..0ece39a279 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -461,6 +461,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ' SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + -- create statistics CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists; @@ -480,6 +490,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= 4 AND b < SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1'); +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= ANY (ARRAY[1, 2, 3]) AND b IN (''1'', ''2'', ''3'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND c > ANY (ARRAY[1, 2, 3])'); + +SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', ''3'') AND c > ANY (ARRAY[1, 2, 3])'); + -- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute) SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL'); -- 2.21.1