From 7e8944bc4df68d2d8e6870fd73d31cfde141b862 Mon Sep 17 00:00:00 2001 From: amit Date: Tue, 22 Aug 2017 13:48:13 +0900 Subject: [PATCH v31 4/5] Faster partition pruning This adds a new module partprune.c in the optimizer, which is meant as a replacement for using constraint exclusion to prune individual partitions. The new module performs partition pruning using the information contained in parent/partitioned table's boundinfo, after extracting clauses that involve partition keys. With the new module's functionality in place, set_append_rel_size() calls prune_append_rel_partitions() to get a Bitmapset of partitions that need to be scanned and processes only the partitions contained in the set. Authors: Amit Langote, David Rowley (david.rowley@2ndquadrant.com) Dilip Kumar (dilipbalaut@gmail.com), --- src/backend/catalog/partition.c | 664 +++++++++++ src/backend/optimizer/path/allpaths.c | 16 + src/backend/optimizer/util/Makefile | 2 +- src/backend/optimizer/util/clauses.c | 4 +- src/backend/optimizer/util/partprune.c | 1519 +++++++++++++++++++++++++ src/backend/optimizer/util/plancat.c | 42 +- src/backend/optimizer/util/relnode.c | 8 + src/include/catalog/partition.h | 92 ++ src/include/catalog/pg_opfamily.h | 3 + src/include/nodes/relation.h | 4 + src/include/optimizer/clauses.h | 2 + src/include/optimizer/partprune.h | 25 + src/test/regress/expected/inherit.out | 10 +- src/test/regress/expected/partition_prune.out | 486 +++++++- src/test/regress/sql/partition_prune.sql | 102 +- 15 files changed, 2904 insertions(+), 75 deletions(-) create mode 100644 src/backend/optimizer/util/partprune.c create mode 100644 src/include/optimizer/partprune.h diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 90e24ee8ec..59e3234938 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -193,6 +193,15 @@ static int get_greatest_modulus(PartitionBoundInfo b); static uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc, Datum *values, bool *isnull); +static Bitmapset *get_partitions_for_keys_hash(PartitionPruneContext *context, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_for_keys_list(PartitionPruneContext *context, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_for_keys_range(PartitionPruneContext *context, + PartScanKeyInfo *keys); +static Bitmapset *get_partitions_excluded_by_ne_datums(PartitionPruneContext *context, + Datum *ne_datums, int n_ne_datums); + /* * RelationBuildPartitionDesc * Form rel's partition descriptor @@ -1560,9 +1569,664 @@ get_partition_qual_relid(Oid relid) return result; } +/* + * get_partitions_for_keys + * Returns the index of partitions that will need to be scanned for the + * given look up keys + * + * Input: + * See the comments above the definition of PartScanKeyInfo to see what + * kind of information is contained in 'keys'. + * + * Outputs: + * Bitmapset containing indexes of the selected partitions + */ +Bitmapset * +get_partitions_for_keys(PartitionPruneContext *context, + PartScanKeyInfo *keys) +{ + Bitmapset *result; + + switch (context->strategy) + { + case PARTITION_STRATEGY_HASH: + result = get_partitions_for_keys_hash(context, keys); + break; + + case PARTITION_STRATEGY_LIST: + result = get_partitions_for_keys_list(context, keys); + if (keys->n_ne_datums > 0) + { + Bitmapset *ne_parts; + + /* + * Remove the indexes of any partitions which cannot possibly + * contain rows matching the clauses due to key->ne_datums + * containing all datum values which are allowed in the given + * partition. This is only possible to do in LIST partitioning + * as it's the only partitioning strategy which allows the + * specification of exact values. + */ + ne_parts = get_partitions_excluded_by_ne_datums(context, + keys->ne_datums, + keys->n_ne_datums); + result = bms_del_members(result, ne_parts); + bms_free(ne_parts); + } + break; + + case PARTITION_STRATEGY_RANGE: + result = get_partitions_for_keys_range(context, keys); + break; + + default: + result = NULL; + elog(ERROR, "unexpected partition strategy: %d", + context->strategy); + } + + return result; +} + /* Module-local functions */ /* + * get_partitions_for_keys_hash + * Return partitions of a hash partitioned table for requested + * keys + * + * This interprets the keys and looks up partitions in the partition bound + * descriptor using the hash partitioning semantics. + */ +static Bitmapset * +get_partitions_for_keys_hash(PartitionPruneContext *context, + PartScanKeyInfo *keys) +{ + int partnatts = context->partnatts, + nparts = context->nparts, + i; + bool keyisnull[PARTITION_MAX_KEYS]; + FmgrInfo *partsupfunc = context->partsupfunc; + PartitionBoundInfo boundinfo = context->boundinfo; + + /* + * Since tuples with NULL values in the partition key columns are stored + * in regular partitions, we'll treat any IS NULL clauses here as regular + * equality clauses. + */ + memset(keyisnull, false, sizeof(keyisnull)); + i = -1; + while ((i = bms_next_member(keys->keyisnull, i)) >= 0) + { + keys->n_eqkeys++; + Assert(i < partnatts); + keyisnull[i] = true; + } + + /* + * Can only do pruning if we know all the keys and they're all equality + * keys including the nulls that we just counted above. + */ + if (keys->n_eqkeys == partnatts) + { + uint64 rowHash; + int greatest_modulus = get_greatest_modulus(boundinfo), + result_index; + + rowHash = compute_hash_value(partnatts, partsupfunc, + keys->eqkeys, keyisnull); + result_index = boundinfo->indexes[rowHash % greatest_modulus]; + if (result_index >= 0) + return bms_make_singleton(result_index); + } + else + /* Can't do pruning otherwise, so return all partitions. */ + return bms_add_range(NULL, 0, nparts - 1); + + return NULL; +} + +/* + * get_partitions_for_keys_list + * Return partitions of a list partitioned table for requested keys + * + * This interprets the keys and looks up partitions in the partition bound + * descriptor using the list partitioning semantics. + * + * Note: LIST partitioning only supports a single partition key, therefore + * this function requires no looping over the partition keys. + */ +static Bitmapset * +get_partitions_for_keys_list(PartitionPruneContext *context, + PartScanKeyInfo *keys) +{ + FmgrInfo *partsupfunc = context->partsupfunc; + Oid *partcollation = context->partcollation; + PartitionBoundInfo boundinfo = context->boundinfo; + Bitmapset *result = NULL; + int i, + eqoff, + minoff, + maxoff; + bool is_equal; + + /* Handle clauses requesting a NULL valued partition key */ + if (!bms_is_empty(keys->keyisnull)) + { + /* + * NULLs may only exist in the NULL partition, or in the + * default, if there's no NULL partition. + */ + if (partition_bound_accepts_nulls(boundinfo)) + return bms_make_singleton(boundinfo->null_index); + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* + * If there are no datums to compare keys with, but there are partitions, + * just return the default partition if one exists. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; /* shouldn't happen */ + } + + /* Equality key. */ + if (keys->n_eqkeys > 0) + { + eqoff = partition_list_bsearch(partsupfunc, partcollation, + boundinfo, keys->eqkeys[0], + &is_equal); + if (eqoff >= 0 && is_equal) + { + /* An exact matching datum exists. */ + Assert(boundinfo->indexes[eqoff] >= 0); + return bms_make_singleton(boundinfo->indexes[eqoff]); + } + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* + * Find the left-most bound that satisfies the query, i.e., the one that + * satisfies minkeys. + */ + minoff = 0; + if (keys->n_minkeys > 0) + { + minoff = partition_list_bsearch(partsupfunc, partcollation, + boundinfo, keys->minkeys[0], + &is_equal); + if (minoff >= 0) + { + /* + * partition_list_bsearch returning a positive number means that + * minkeys[0] must be greater than or equal to the smallest datum. + * If we didn't find an exact matching datum (!is_equal) or if the + * operator used was non-inclusive (>), then in both of these + * cases we're not interested in the datum pointed to by minoff, + * but we may start getting matches in the partition which the + * next datum belongs to, so point to that one instead. (This may + * be beyond the last datum in the array, but we'll detect that + * later.) + */ + if (!is_equal || !keys->min_incl) + minoff++; + } + else + { + /* + * minoff set to -1 means all datums are greater than minkeys[0], + * which means all partitions satisfy minkeys. In that case, set + * minoff to the index of the leftmost datum, viz. 0. + */ + minoff = 0; + } + + /* + * The value of minkeys[0] is greater than all of the datums we have + * partitions for. The only possible partition that could contain + * a match is the default partition. Return that, if it exists. + */ + if (minoff > boundinfo->ndatums - 1) + return partition_bound_has_default(boundinfo) + ? bms_make_singleton(boundinfo->default_index) + : NULL; + } + + /* + * Find the right-most bound that satisfies the query, i.e., the one that + * satisfies maxkeys. + */ + maxoff = boundinfo->ndatums - 1; + if (keys->n_maxkeys > 0) + { + maxoff = partition_list_bsearch(partsupfunc, partcollation, + boundinfo, keys->maxkeys[0], + &is_equal); + if (maxoff >= 0) + { + /* + * partition_list_bsearch returning a positive number means that + * maxkeys[0] must be greater than or equal to the smallest datum. + * If the match found is an equal match, but the operator used is + * non-inclusive of that value (<), then the partition belonging + * to maxoff cannot match, so we'll decrement maxoff to point to + * the partition belonging to the previous datum. We might end up + * decrementing maxoff down to -1, but we'll handle that later. + */ + if (is_equal && !keys->max_incl) + maxoff--; + } + + /* + * maxkeys is smaller than the datums of all non-default partitions, + * meaning there isn't one to return. Return the default partition if + * one exists. + */ + if (maxoff < 0) + return partition_bound_has_default(boundinfo) + ? bms_make_singleton(boundinfo->default_index) + : NULL; + } + + Assert (minoff >= 0 && maxoff >= 0); + + /* + * All datums between those at minoff and maxoff satisfy query's keys, so + * add the corresponding partitions to the result set. + */ + for (i = minoff; i <= maxoff; i++) + result = bms_add_member(result, boundinfo->indexes[i]); + + /* + * For range queries, always include the default list partition, + * because list partitions divide the key space in a discontinuous manner, + * not all values in the given range will have a partition assigned. This + * may not technically be true for some data types (e.g. integer types), + * however, we currently lack any sort of infrastructure to provide us + * with proofs that would allow us to do anything smarter here. + */ + if (partition_bound_has_default(boundinfo)) + return bms_add_member(result, boundinfo->default_index); + + return result; +} + +/* + * get_partitions_for_keys_range + * Return partitions of a range partitioned table for requested keys + * + * This interprets the keys and looks up partitions in the partition bound + * descriptor using the range partitioning semantics. + */ +static Bitmapset * +get_partitions_for_keys_range(PartitionPruneContext *context, + PartScanKeyInfo *keys) +{ + FmgrInfo *partsupfunc = context->partsupfunc; + Oid *partcollation = context->partcollation; + PartitionBoundInfo boundinfo = context->boundinfo; + Bitmapset *result = NULL; + int partnatts = context->partnatts, + i, + eqoff, + minoff, + maxoff; + bool is_equal; + + /* Only the default range partition accepts nulls. */ + if (!bms_is_empty(keys->keyisnull)) + return partition_bound_has_default(boundinfo) + ? bms_make_singleton(boundinfo->default_index) + : NULL; + + /* + * If there are no datums to compare keys with, but there are partitions, + * just return the default partition, if one exists. + */ + if (boundinfo->ndatums == 0) + { + if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* Equality keys. */ + if (keys->n_eqkeys > 0) + { + /* Valid iff there are as many as partition key columns. */ + Assert(keys->n_eqkeys == partnatts); + eqoff = partition_range_datum_bsearch(partsupfunc, + partcollation, + boundinfo, + keys->n_eqkeys, keys->eqkeys, + &is_equal); + /* + * The bound at eqoff is known to be <= eqkeys, given the way + * partition_range_datum_bsearch works. Considering it as the lower + * bound of the partition that eqkeys falls into, the bound at + * eqoff + 1 would be its upper bound, so use eqoff + 1 to get the + * desired partition's index. + */ + if (eqoff >= 0 && boundinfo->indexes[eqoff + 1] >= 0) + return bms_make_singleton(boundinfo->indexes[eqoff+1]); + /* + * eqkeys falls into a range of values for which no non-default + * partition exists. + */ + else if (partition_bound_has_default(boundinfo)) + return bms_make_singleton(boundinfo->default_index); + else + return NULL; + } + + /* + * Find the leftmost bound that satisfies the query, that is, make minoff + * point to the datum corresponding to the upper bound of the left-most + * partition to be selected. + */ + minoff = 0; + if (keys->n_minkeys > 0) + { + minoff = partition_range_datum_bsearch(partsupfunc, partcollation, + boundinfo, + keys->n_minkeys, keys->minkeys, + &is_equal); + + /* + * If minkeys does not contain values for all partition key columns, + * that is, only a prefix is specified, then there may be multiple + * bounds in boundinfo that share the same prefix. But + * partition_range_datum_bsearch would've returned the offset of just + * one of those. If minkey is inclusive, we must decrement minoff + * until it reaches the leftmost of those bound values, so that + * partitions corresponding to all those bound values are selected. + * If minkeys is exclusive, we must increment minoff until it reaches + * the first bound greater than this prefix, so that none of the + * partitions corresponding to those bound values are selected. + */ + if (is_equal && keys->n_minkeys < partnatts) + { + while (minoff >= 1 && minoff < boundinfo->ndatums - 1) + { + int32 cmpval; + int nextoff; + + nextoff = keys->min_incl ? minoff - 1 : minoff + 1; + cmpval = partition_rbound_datum_cmp(partsupfunc, + partcollation, + boundinfo->datums[nextoff], + boundinfo->kind[nextoff], + keys->minkeys, + keys->n_minkeys); + if (cmpval != 0) + { + /* Move to the non-equal bound only in this case. */ + if (!keys->min_incl) + minoff++; + break; + } + + if (keys->min_incl) + minoff--; + else + minoff++; + } + } + /* + * Assuming minoff currently points to the lower bound of the left- + * most selected partition, increment it so that it points to the + * upper bound. + */ + else + minoff += 1; + } + + /* + * Find the rightmost bound that satisfies the query, that is, make maxoff + * maxoff point to the datum corresponding to the upper bound of the + * right-most partition to be selected. + */ + maxoff = boundinfo->ndatums; + if (keys->n_maxkeys > 0) + { + maxoff = partition_range_datum_bsearch(partsupfunc, + partcollation, + boundinfo, + keys->n_maxkeys, keys->maxkeys, + &is_equal); + + /* See the comment written above for minkeys. */ + if (is_equal && keys->n_maxkeys < partnatts) + { + while (maxoff >= 1 && maxoff < boundinfo->ndatums - 1) + { + int32 cmpval; + int nextoff; + + nextoff = keys->max_incl ? maxoff + 1 : maxoff - 1; + cmpval = partition_rbound_datum_cmp(partsupfunc, + partcollation, + boundinfo->datums[nextoff], + boundinfo->kind[nextoff], + keys->maxkeys, + keys->n_maxkeys); + if (cmpval != 0) + { + /* Move to the non-equal bound only in this case. */ + if (!keys->max_incl) + maxoff -= 1; + break; + } + + if (keys->max_incl) + maxoff += 1; + else + maxoff -= 1; + } + + /* + * Assuming maxoff currently points to the lower bound of the + * right-most partition, increment it so that it points to the + * upper bound. + */ + maxoff += 1; + } + /* + * Assuming maxoff currently points to the lower bound of the right- + * most selected partition, increment it so that it points to the + * upper bound. We do not need to include that partition though if + * maxkeys exactly matched the bound in question and it is exclusive. + * Not incrementing simply means we treat the matched bound itself + * the upper bound of the right-most selected partition. + */ + else if (!is_equal || keys->max_incl) + maxoff += 1; + } + + Assert (minoff >= 0 && maxoff >= 0); + + /* + * At this point, minoff/maxoff supposedly point to the upper bound of + * some partition, but it may not be the case. It might actually be the + * upper bound of an unassigned range of values, which if so, move + * minoff/maxoff to the adjacent bound which must be the upper bound of + * a valid partition. + * + * By doing that, we skip over a portion of values that do indeed satisfy + * the query, but don't have a valid partition assigned. The default + * partition will have to be included to cover those values. Although, if + * the original bound in question contains an infinite value, there would + * not be any unassigned range to speak of, because the range is unbounded + * in that direction by definition, so no need to include the default. + */ + if (boundinfo->indexes[minoff] < 0) + { + int lastkey; + + if (keys->n_minkeys > 0) + lastkey = keys->n_minkeys - 1; + else + lastkey = partnatts - 1; + if (minoff >=0 && minoff < boundinfo->ndatums && + boundinfo->kind[minoff][lastkey] == PARTITION_RANGE_DATUM_VALUE && + partition_bound_has_default(boundinfo)) + result = bms_add_member(result, boundinfo->default_index); + minoff += 1; + } + + if (maxoff >= 1 && boundinfo->indexes[maxoff] < 0) + { + int lastkey; + + if (keys->n_maxkeys > 0) + lastkey = keys->n_maxkeys - 1; + else + lastkey = partnatts - 1; + if (maxoff >=0 && maxoff <= boundinfo->ndatums && + boundinfo->kind[maxoff - 1][lastkey] == PARTITION_RANGE_DATUM_VALUE && + partition_bound_has_default(boundinfo)) + result = bms_add_member(result, boundinfo->default_index); + maxoff -= 1; + } + + if (minoff <= maxoff) + result = bms_add_range(result, + boundinfo->indexes[minoff], + boundinfo->indexes[maxoff]); + + if (!partition_bound_has_default(boundinfo)) + return result; + + /* + * There may exist a range of values unassigned to any non-default + * partition between the datums at minoff and maxoff. Add the default + * partition in that case. + */ + for (i = minoff; i <= maxoff; i++) + { + if (boundinfo->indexes[i] < 0) + return bms_add_member(result, boundinfo->default_index); + } + + /* + * Since partition keys with nulls are mapped to the default range + * partition, we must include the default partition if some keys + * could be null. + */ + if (bms_num_members(keys->keyisnotnull) < partnatts) + result = bms_add_member(result, boundinfo->default_index); + + return result; +} + +/* + * get_partitions_excluded_by_ne_datums + * + * Returns a Bitmapset of partition indexes that can safely be removed due to + * the discovery of <> clauses for each datum value allowed in the partition. + */ +static Bitmapset * +get_partitions_excluded_by_ne_datums(PartitionPruneContext *context, + Datum *ne_datums, int n_ne_datums) +{ + FmgrInfo *partsupfunc = context->partsupfunc; + Oid *partcollation = context->partcollation; + int nparts = context->nparts, + i, + *datums_in_part, + *datums_found; + PartitionBoundInfo boundinfo = context->boundinfo; + Bitmapset *excluded_parts; + Bitmapset *foundoffsets = NULL; + + Assert(context->strategy == PARTITION_STRATEGY_LIST); + Assert(context->partnatts == 1); + + for (i = 0; i < n_ne_datums; i++) + { + int offset; + bool is_equal; + + offset = partition_list_bsearch(partsupfunc, partcollation, + boundinfo, + ne_datums[i], &is_equal); + if (offset >= 0 && is_equal) + { + Assert(boundinfo->indexes[offset] >= 0); + foundoffsets = bms_add_member(foundoffsets, offset); + } + } + + /* No partitions can be excluded if none of the datums were found. */ + if (bms_is_empty(foundoffsets)) + return NULL; + + /* + * Since each list partition can permit multiple values, we must ensure + * that we got clauses for all those values before we can eliminate the + * the entire partition. + * + * We'll need two arrays for this, one to count the number of unique + * datums found in the query which belong to each partition, and another + * to record the number of datums permitted in each partition. Once we've + * counted all this, we can eliminate any partition where the number of + * datums found matches the number of datums allowed in the partition. + */ + datums_in_part = (int *) palloc0(sizeof(int) * nparts); + datums_found = (int *) palloc0(sizeof(int) * nparts); + + i = -1; + while ((i = bms_next_member(foundoffsets, i)) >= 0) + datums_found[boundinfo->indexes[i]]++; + + /* + * Now, in a single pass over all the datums, count the number of datums + * permitted in each partition. + */ + for (i = 0; i < boundinfo->ndatums; i++) + datums_in_part[boundinfo->indexes[i]]++; + + /* + * Now compare the counts and eliminate any partition for which we found + * clauses for all its permitted values. We must be careful here not to + * eliminate the default partition. We can recognize that by it having a + * zero value in both arrays. + */ + excluded_parts = NULL; + + for (i = 0; i < nparts; i++) + { + if (datums_found[i] >= datums_in_part[i] && datums_found[i] > 0) + excluded_parts = bms_add_member(excluded_parts, i); + } + + /* + * Because the above clauses are strict, we can also exclude the NULL + * partition, provided it does not also allow non-NULL values. + */ + if (partition_bound_accepts_nulls(boundinfo) && + datums_in_part[boundinfo->null_index] == 0) + excluded_parts = bms_add_member(excluded_parts, + boundinfo->null_index); + + pfree(datums_in_part); + pfree(datums_found); + + return excluded_parts; +} + +/* * get_partition_operator * * Return oid of the operator of given strategy for a given partition key diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index f714247ebb..a9eba3a831 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -33,6 +33,7 @@ #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/geqo.h" +#include "optimizer/partprune.h" #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/plancat.h" @@ -862,6 +863,7 @@ static void set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + Relids live_children = NULL; int parentRTindex = rti; bool has_live_children; double parent_rows; @@ -875,6 +877,9 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, Assert(IS_SIMPLE_REL(rel)); + if (rte->relkind == RELKIND_PARTITIONED_TABLE) + live_children = prune_append_rel_partitions(root, rel); + /* * Initialize to compute size estimates for whole append relation. * @@ -1123,6 +1128,17 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, continue; } + if (IS_PARTITIONED_REL(rel) && + !bms_is_member(appinfo->child_relid, live_children)) + { + /* + * This child need not be scanned, so we can omit it from the + * appendrel. + */ + set_dummy_rel_pathlist(childrel); + continue; + } + if (relation_excluded_by_constraints(root, childrel, childRTE)) { /* diff --git a/src/backend/optimizer/util/Makefile b/src/backend/optimizer/util/Makefile index c54d0a690d..aebd98875e 100644 --- a/src/backend/optimizer/util/Makefile +++ b/src/backend/optimizer/util/Makefile @@ -12,7 +12,7 @@ subdir = src/backend/optimizer/util top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = clauses.o joininfo.o orclauses.o pathnode.o placeholder.o \ +OBJS = clauses.o joininfo.o orclauses.o partprune.o pathnode.o placeholder.o \ plancat.o predtest.o relnode.o restrictinfo.o tlist.o var.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 89f27ce0eb..0c1f23951a 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -152,8 +152,6 @@ static Node *substitute_actual_parameters(Node *expr, int nargs, List *args, static Node *substitute_actual_parameters_mutator(Node *node, substitute_actual_parameters_context *context); static void sql_inline_error_callback(void *arg); -static Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, - Oid result_collation); static Query *substitute_actual_srf_parameters(Query *expr, int nargs, List *args); static Node *substitute_actual_srf_parameters_mutator(Node *node, @@ -4833,7 +4831,7 @@ sql_inline_error_callback(void *arg) * We use the executor's routine ExecEvalExpr() to avoid duplication of * code and ensure we get the same result as the executor would get. */ -static Expr * +Expr * evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, Oid result_collation) { diff --git a/src/backend/optimizer/util/partprune.c b/src/backend/optimizer/util/partprune.c new file mode 100644 index 0000000000..f94540285f --- /dev/null +++ b/src/backend/optimizer/util/partprune.c @@ -0,0 +1,1519 @@ +/*------------------------------------------------------------------------- + * + * partprune.c + * Provides functions to prune partitions of a partitioned table by + * comparing provided set of clauses with the table's partitions' + * boundaries + * + * Following entry points exist to this module. + * + * prune_append_rel_partitions() + * + * This is to be called for a partitioned table to prune away the partitions + * that provably won't be scanned by a given query based on the table's + * rel->baserestrictinfo. It should be called before starting to look at the + * individual partitions to set their access paths, so that we expend planning + * efforts only on the partitions that are relevant to the query. Pruning by + * this function only occurs if rel->baserestrictinfo contains at least one + * clause whose variable argument matches a proper prefix of the table's + * partition key and the other argument is a Const node. + * + * generate_partition_clauses() + * + * This is to be called to extract clauses that will be useful for partition + * pruning from a list of clauses containing clauses that reference a given + * partitioned table. For example, prune_append_rel_partitions() calls this + * function, because a partitioned table's rel->baserestrictinfo may contain + * clauses that might be useful for partitioning. Caller must have set up a + * valid partition pruning context in the form of struct PartitionPruneContext, + * that is, each of its fields other other than clauseinfo must be valid before + * calling here. After extracting relevant clauses, clauseinfo is filled with + * information that will be used for actual pruning. + * + * get_partitions_from_clauses() + * + * This is to be called to prune partitions based on relevant partitioning + * clauses. Caller must have called generate_partition_clauses() at least + * once and hence a valid partition pruning context must have already been + * created. Especially, PartitionPruneContext.clauseinfo must contain valid + * information. Partition pruning proceeds by extracting constant values + * from the clauses and comparing it with the partition bounds while also + * taking into account strategies of the operators in the matched clauses. + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/optimizer/util/partprune.c + * + *------------------------------------------------------------------------- +*/ + +#include "postgres.h" + +#include "access/hash.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_type.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/partprune.h" +#include "optimizer/planner.h" +#include "optimizer/predtest.h" +#include "optimizer/prep.h" +#include "parser/parse_coerce.h" +#include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" +#include "utils/lsyscache.h" + +/* + * Stores clauses which were matched to a partition key. + * + * Each matching "operator" clause is stored in the 'keyclauses' list for the + * partition key that it was matched to, except if the operator is <>, in + * which case, the clause is added to the 'ne_clauses' list. + * + * Boolean OR clauses whose at least one argument clause matches a partition + * key are added to the 'or_clauses' list. + * + * Based on a IS NULL or IS NOT NULL clause that was matched to a partition + * key, the corresponding bit in 'keyisnull' or 'keyisnotnull' is set. A bit + * in 'keyisnotnull' may also be set when a strict OpExpr is encountered for + * the given partition key. + */ +typedef struct PartitionClauseInfo +{ + /* Lists of clauses indexed by the partition key */ + List *keyclauses[PARTITION_MAX_KEYS]; + + /* Each members is a List itself of a given OR clauses's arguments. */ + List *or_clauses; + + /* List of clauses containing <> operator. */ + List *ne_clauses; + + /* Nth (0 <= N < partnatts) bit set if the key is NULL or NOT NULL. */ + Bitmapset *keyisnull; + Bitmapset *keyisnotnull; + + /* True if at least one of above fields contains valid information. */ + bool foundkeyclauses; + + /* True if mutually contradictory clauses were found. */ + bool constfalse; +} PartitionClauseInfo; + +/* + * Information about a clause matched with a partition key column kept to + * avoid recomputing it in remove_redundant_clauses(). + */ +typedef struct PartClause +{ + Oid opno; /* opno to compare partkey to 'value' */ + Oid inputcollid; /* collation to compare partkey to 'value' */ + Expr *value; /* The value the partition key is being compared to */ + + /* cached info. */ + bool valid_cache; /* Are the following fields populated? */ + int op_strategy; + Oid op_subtype; + FmgrInfo op_func; +} PartClause; + +/* + * Strategy of a partition clause operator per the partitioning operator class + * definition. + */ +typedef enum PartOpStrategy +{ + PART_OP_EQUAL, + PART_OP_LESS, + PART_OP_GREATER +} PartOpStrategy; + +static void extract_partition_clauses(PartitionPruneContext *context, + List *clauses); +static bool match_boolean_partition_clause(Expr *clause, Expr *partkey, + Expr **rightop); +static Bitmapset *get_partitions_from_or_args(PartitionPruneContext *context, + List *or_args); +static void remove_redundant_clauses(PartitionPruneContext *context, + List **minimalclauses); +static bool partition_cmp_args(Oid parttypid, Oid partopfamily, + PartClause *pc, PartClause *leftarg, PartClause *rightarg, + bool *result); +static bool extract_bounding_datums(PartitionPruneContext *context, + List **minimalclauses, PartScanKeyInfo *keys); +static PartOpStrategy partition_op_strategy(char part_strategy, + PartClause *pc, bool *incl); +static bool partkey_datum_from_expr(Oid parttypid, Expr *expr, Datum *value); + +/* + * prune_append_rel_partitions + * Returns RT indexes of relations belonging to the minimum set of + * partitions which must be scanned to satisfy rel's baserestrictinfo + * quals. + */ +Relids +prune_append_rel_partitions(PlannerInfo *root, RelOptInfo *rel) +{ + Relids result = NULL; + List *clauses = rel->baserestrictinfo; + int i; + + if (clauses == NIL) + { + /* If there are no clauses then include every partition */ + for (i = 0; i < rel->nparts; i++) + result = bms_add_member(result, rel->part_rels[i]->relid); + } + else + { + PartitionPruneContext context; + int partnatts = rel->part_scheme->partnatts, + i; + + /* Initiate partition pruning using clauses. */ + memset(&context, 0, sizeof(context)); + context.relid = rel->relid; + context.strategy = rel->part_scheme->strategy; + context.partnatts = partnatts; + + context.partkeys = (Expr **) palloc(sizeof(Expr *) * partnatts); + for (i = 0; i < partnatts; i++) + context.partkeys[i] = linitial(rel->partexprs[i]); + + context.parttypid = rel->part_scheme->parttypid; + context.partopfamily = rel->part_scheme->partopfamily; + context.partcollation = rel->part_scheme->partcollation; + context.partsupfunc = rel->part_scheme->partsupfunc; + context.nparts = rel->nparts; + context.boundinfo = rel->boundinfo; + context.has_default_part = rel->has_default_part; + context.partition_qual = rel->partition_qual; + + /* process clauses; context.clauseinfo will be set */ + generate_partition_clauses(&context, clauses); + + if (!context.clauseinfo->constfalse) + { + /* Actual pruning happens here. */ + Bitmapset *partindexes = get_partitions_from_clauses(&context); + + /* Add selected partitions' RT indexes to result. */ + i = -1; + while ((i = bms_next_member(partindexes, i)) >= 0) + result = bms_add_member(result, rel->part_rels[i]->relid); + } + } + + return result; +} + +/* + * generate_partition_clauses + * Analyzes clauses to find those that match the partition key and sets + * context->clauseinfo + * + * Ideally, this should be called only once for a given query and a given + * partitioned table. + */ +void +generate_partition_clauses(PartitionPruneContext *context, List *clauses) +{ + /* The clauses list may be modified below, so better make a copy. */ + clauses = list_copy(clauses); + + /* + * For sub-partitioned tables there's a corner case where if the + * sub-partitioned table shares any partition keys with its parent, + * then it's possible that the partitioning hierarchy allows the + * parent partition to only contain a narrower range of values than + * the sub-partitioned table does. In this case it is possible that + * we'd include partitions that could not possibly have any tuples + * matching 'clauses'. The possibility of such a partition + * arrangement is perhaps unlikely for non-default partitions, but + * it may be more likely in the case of default partitions, so we'll + * add the parent partition table's partition qual to the clause list + * in this case only. This may result in the default partition being + * eliminated. + */ + if (context->has_default_part && context->partition_qual != NIL) + { + List *partqual = context->partition_qual; + + partqual = (List *) expression_planner((Expr *) partqual); + + /* Fix Vars to have the desired varno */ + if (context->relid != 1) + ChangeVarNodes((Node *) partqual, 1, context->relid, 0); + + clauses = list_concat(clauses, partqual); + } + + /* And away we go to do the real work; context->clauseinfo will be set */ + extract_partition_clauses(context, clauses); +} + +/* + * get_partitions_from_clauses + * Determine partitions that could possible contain a record that + * satisfies clauses as described in context->clauseinfo + * + * Returns a Bitmapset of the matching partition indexes, or NULL if none can + * match. + */ +Bitmapset * +get_partitions_from_clauses(PartitionPruneContext *context) +{ + PartitionClauseInfo *partclauseinfo = context->clauseinfo; + PartScanKeyInfo keys; + Bitmapset *result; + ListCell *lc; + + Assert(partclauseinfo != NULL); + Assert(!partclauseinfo->constfalse); + + if (!partclauseinfo->foundkeyclauses) + { + /* No interesting clauses were found to eliminate partitions. */ + result = bms_add_range(NULL, 0, context->nparts - 1); + } + else + { + List *minimalclauses[PARTITION_MAX_KEYS]; + + /* + * For each partition key column, populate its slot in minimalclauses + * with the most restrictive of the clauses from the corresponding + * list in context->clauseinfo. + */ + remove_redundant_clauses(context, minimalclauses); + + /* Did remove_redundant_clauses find any contradicting clauses? */ + if (partclauseinfo->constfalse) + return NULL; + + if (extract_bounding_datums(context, minimalclauses, &keys)) + { + result = get_partitions_for_keys(context, &keys); + + /* + * No point in trying to look at other conjunctive clauses, if we + * got an empty set in the first place. + */ + if (bms_is_empty(result)) + return NULL; + } + else + { + /* + * Looks like we didn't have *all* the values we'd need to + * prune partitions using get_partitions_for_keys(). + */ + result = bms_add_range(NULL, 0, context->nparts - 1); + } + } + + /* Now apply the OR clauses. */ + foreach(lc, partclauseinfo->or_clauses) + { + List *or_args = (List *) lfirst(lc); + Bitmapset *or_parts; + + or_parts = get_partitions_from_or_args(context, or_args); + + /* + * Clauses in or_clauses are mutually conjunctive and also in + * in conjunction with the rest of the clauses above, so combine the + * partitions thus selected with those in result using set + * intersection. + */ + result = bms_int_members(result, or_parts); + bms_free(or_parts); + } + + return result; +} + +/* Module-local functions */ + +/* + * If the partition key has a collation, then the clause must have the same + * input collation. If the partition key is non-collatable, we assume the + * collation doesn't matter, because while collation wasn't considered when + * performing partitioning, the clause still may have a collation assigned + * due to the other input being of a collatable type. + */ +#define PartCollMatchesExprColl(partcoll, exprcoll) \ + ((partcoll) == InvalidOid || (partcoll) == (exprcoll)) + +/* + * extract_partition_clauses + * Processes 'clauses' to extract clause matching the partition key. + * This adds matched clauses to the list corresponding to particular key + * in context->clauseinfo. Also collects other useful clauses to assist + * in partition elimination, such as OR clauses, clauses containing <> + * operator, and IS [NOT] NULL clauses + * + * We may also discover some contradiction in the clauses which means that no + * partition can possibly match. In this case, the function sets + * context->clauseinfo's 'constfalse' to true and exits immediately without + * processing any further clauses. In this case, the caller must be careful + * not to assume the context->clauseinfo is fully populated with all clauses. + */ +static void +extract_partition_clauses(PartitionPruneContext *context, List *clauses) +{ + PartitionClauseInfo *partclauseinfo; + ListCell *lc; + + context->clauseinfo = partclauseinfo = palloc(sizeof(PartitionClauseInfo)); + memset(partclauseinfo->keyclauses, 0, sizeof(partclauseinfo->keyclauses)); + partclauseinfo->or_clauses = NIL; + partclauseinfo->ne_clauses = NIL; + partclauseinfo->keyisnull = NULL; + partclauseinfo->keyisnotnull = NULL; + partclauseinfo->constfalse = false; + partclauseinfo->foundkeyclauses = false; + + foreach(lc, clauses) + { + Expr *clause = (Expr *) lfirst(lc); + int i; + + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; + + clause = rinfo->clause; + if (rinfo->pseudoconstant && + !DatumGetBool(((Const *) clause)->constvalue)) + { + partclauseinfo->constfalse = true; + return; + } + } + + /* Get the BoolExpr's out of the way.*/ + if (IsA(clause, BoolExpr)) + { + if (or_clause((Node *) clause)) + { + partclauseinfo->or_clauses = + lappend(partclauseinfo->or_clauses, + ((BoolExpr *) clause)->args); + continue; + } + else if (and_clause((Node *) clause)) + { + /* + * Queue its args to be processed later within the same + * invocation. + */ + clauses = list_concat(clauses, + list_copy(((BoolExpr *) clause)->args)); + continue; + } + /* Fall-through for a NOT clause, which is handled below. */ + } + + for (i = 0; i < context->partnatts; i++) + { + Expr *partkey = context->partkeys[i]; + PartClause *pc; + Oid partopfamily = context->partopfamily[i]; + Oid partcoll = context->partcollation[i]; + Oid commutator = InvalidOid; + + /* + * Recognize specially shaped clauses that match with the Boolean + * partition key. + */ + if (IsBooleanOpfamily(partopfamily)) + { + Expr *rightop; + + if (match_boolean_partition_clause(clause, partkey, &rightop)) + { + pc = (PartClause *) palloc0(sizeof(PartClause)); + pc->opno = BooleanEqualOperator; + pc->inputcollid = InvalidOid; + pc->value = rightop; + + partclauseinfo->keyclauses[i] = + lappend(partclauseinfo->keyclauses[i], + pc); + /* Clause was matched. */ + partclauseinfo->foundkeyclauses = true; + continue; + } + } + + if (IsA(clause, OpExpr) && + list_length(((OpExpr *) clause)->args) == 2) + { + OpExpr *opclause = (OpExpr *) clause; + Expr *leftop, + *rightop, + *valueexpr; + bool is_ne_listp = false; + + leftop = (Expr *) get_leftop(clause); + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + rightop = (Expr *) get_rightop(clause); + if (IsA(rightop, RelabelType)) + rightop = ((RelabelType *) rightop)->arg; + + /* check if the clause matches this partition key */ + if (equal(leftop, partkey)) + valueexpr = rightop; + else if (equal(rightop, partkey)) + { + valueexpr = leftop; + + commutator = get_commutator(opclause->opno); + + /* nothing we can do unless we can swap the operands */ + if (!OidIsValid(commutator)) + break; + } + else + /* clause does not match this partition key. */ + continue; + + /* + * Partition key also consists of a collation that's specified + * for it, so try to match it too. There may be multiple keys + * with the same expression but different collations. + */ + if (!PartCollMatchesExprColl(partcoll, opclause->inputcollid)) + continue; + + /* + * Matched with this key. Now check various properties of + * the clause to see if it's sane to use it for pruning. If + * any of the properties makes it unsuitable for pruning, then + * break instead of continuing to match the clause with the + * next key, because the clause is useless no matter which key + * it's matched to. + */ + + /* + * Only allow strict operators. This will guarantee nulls are + * filtered. + */ + if (!op_strict(opclause->opno)) + break; + + /* We can't use any volatile value to prune partitions. */ + if (contain_volatile_functions((Node *) valueexpr)) + break; + + /* + * Normally we only bother with operators that are listed as + * being part of the partitioning operator family. But we + * make an exception in one case -- operators named '<>' are + * not listed in any operator family whatsoever, in which + * case, we try to perform partition pruning with it only if + * list partitioning is in use. + */ + if (!op_in_opfamily(opclause->opno, partopfamily)) + { + Oid negator; + + if (context->strategy != PARTITION_STRATEGY_LIST) + break; + + /* + * To confirm if the operator is really '<>', check if its + * negator is a btree equality operator. + */ + negator = get_negator(opclause->opno); + if (OidIsValid(negator) && + op_in_opfamily(negator, partopfamily)) + { + Oid lefttype; + Oid righttype; + int strategy; + + get_op_opfamily_properties(negator, partopfamily, + false, + &strategy, + &lefttype, &righttype); + + if (strategy == BTEqualStrategyNumber) + is_ne_listp = true; + } + + /* Operator isn't really what we were hoping it'd be. */ + if (!is_ne_listp) + break; + } + + pc = (PartClause *) palloc0(sizeof(PartClause)); + pc->opno = OidIsValid(commutator) ? commutator : opclause->opno; + pc->inputcollid = opclause->inputcollid; + pc->value = valueexpr; + + /* + * We don't turn a <> operator clause into a key right away. + * Instead, the caller will hand over such clauses to + * get_partitions_excluded_by_ne_clauses(). + */ + if (is_ne_listp) + partclauseinfo->ne_clauses = + lappend(partclauseinfo->ne_clauses, + pc); + else + partclauseinfo->keyclauses[i] = + lappend(partclauseinfo->keyclauses[i], + pc); + + /* + * Since we only allow strict operators, check for any + * contradicting IS NULLs. + */ + if (bms_is_member(i, partclauseinfo->keyisnull)) + { + partclauseinfo->constfalse = true; + return; + } + /* Record that a strict clause has been seen for this key */ + partclauseinfo->keyisnotnull = + bms_add_member(partclauseinfo->keyisnotnull, + i); + } + else if (IsA(clause, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause; + Oid saop_op = saop->opno; + Oid saop_coll = saop->inputcollid; + Expr *leftop = (Expr *) linitial(saop->args), + *rightop = (Expr *) lsecond(saop->args); + List *elem_exprs, + *elem_clauses; + ListCell *lc1; + + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + /* Check it matches this partition key */ + if (!equal(leftop, partkey) || + !PartCollMatchesExprColl(partcoll, saop->inputcollid)) + continue; + + /* + * Matched with this key. Check various properties of the + * clause to see if it can sanely be used for partition + * pruning. + */ + + /* + * Only allow strict operators. This will guarantee null are + * filtered. + */ + if (!op_strict(saop->opno)) + break; + + /* Useless if the array has any volatile functions. */ + if (contain_volatile_functions((Node *) rightop)) + break; + + /* + * In case of NOT IN (..), we get a '<>', which we handle if + * list partitioning is in use and we're able to confirm that + * it's negator is a btree equality operator belonging to the + * partitioning operator family. + */ + if (!op_in_opfamily(saop_op, partopfamily)) + { + Oid negator; + + if (context->strategy != PARTITION_STRATEGY_LIST) + break; + + negator = get_negator(saop_op); + if (OidIsValid(negator) && + op_in_opfamily(negator, partopfamily)) + { + int strategy; + Oid lefttype, + righttype; + + get_op_opfamily_properties(negator, partopfamily, + false, &strategy, + &lefttype, &righttype); + if (strategy != BTEqualStrategyNumber) + break; + } + } + + /* + * First generate a list of Const nodes, one for each array + * element. + */ + elem_exprs = NIL; + if (IsA(rightop, Const)) + { + Const *arr = (Const *) lsecond(saop->args); + ArrayType *arrval = DatumGetArrayTypeP(arr->constvalue); + int16 elemlen; + bool elembyval; + char elemalign; + Datum *elem_values; + bool *elem_nulls; + int num_elems; + + get_typlenbyvalalign(ARR_ELEMTYPE(arrval), + &elemlen, &elembyval, &elemalign); + deconstruct_array(arrval, + ARR_ELEMTYPE(arrval), + elemlen, elembyval, elemalign, + &elem_values, &elem_nulls, + &num_elems); + for (i = 0; i < num_elems; i++) + { + /* Only consider non-null values. */ + if (!elem_nulls[i]) + { + Const *elem_expr = makeConst(ARR_ELEMTYPE(arrval), + -1, arr->constcollid, + elemlen, + elem_values[i], + false, elembyval); + + elem_exprs = lappend(elem_exprs, elem_expr); + } + } + } + else + { + ArrayExpr *arrexpr = castNode(ArrayExpr, rightop); + + /* + * For a nested ArrayExpr, we don't know how to get the + * actual scalar values out into a flat list, so we give + * up doing anything with this ScalarArrayOpExpr. + */ + if (arrexpr->multidims) + break; + + elem_exprs = arrexpr->elements; + } + + /* + * Now generate a list of clauses, one for each array element, + * of the form: saop_leftop saop_op elem_expr + */ + elem_clauses = NIL; + foreach(lc1, elem_exprs) + { + Expr *rightop = (Expr *) lfirst(lc1), + *elem_clause; + + elem_clause = (Expr *) make_opclause(saop_op, BOOLOID, + false, + leftop, rightop, + InvalidOid, + saop_coll); + elem_clauses = lappend(elem_clauses, elem_clause); + } + + /* + * Build the OR clause if needed or add the clauses to the end + * of the list that's being processed currently. + */ + if (saop->useOr && list_length(elem_clauses) > 1) + partclauseinfo->or_clauses = + lappend(partclauseinfo->or_clauses, + elem_clauses); + else + clauses = list_concat(clauses, elem_clauses); + } + else if (IsA(clause, NullTest)) + { + NullTest *nulltest = (NullTest *) clause; + Expr *arg = nulltest->arg; + + if (IsA(arg, RelabelType)) + arg = ((RelabelType *) arg)->arg; + + /* Does leftop match with this partition key column? */ + if (equal(arg, partkey)) + { + if (nulltest->nulltesttype == IS_NULL) + { + /* check for conflicting IS NOT NULLs */ + if (bms_is_member(i, partclauseinfo->keyisnotnull)) + { + partclauseinfo->constfalse = true; + return; + } + partclauseinfo->keyisnull = + bms_add_member(partclauseinfo->keyisnull, + i); + } + else + { + /* check for conflicting IS NULLs */ + if (bms_is_member(i, partclauseinfo->keyisnull)) + { + partclauseinfo->constfalse = true; + return; + } + + partclauseinfo->keyisnotnull = + bms_add_member(partclauseinfo->keyisnotnull, + i); + } + } + } + + /* Clause was matched. */ + partclauseinfo->foundkeyclauses = true; + } + } +} + +/* + * match_boolean_partition_clause + * + * Sets *rightop to a Const containing true or false value and returns true if + * we're able to match the clause to the partition key as specially-shaped + * Boolean clause. Returns false otherwise with *rightop set to NULL. + */ +static bool +match_boolean_partition_clause(Expr *clause, Expr *partkey, + Expr **rightop) +{ + Expr *leftop; + + *rightop = NULL; + if (IsA(clause, BooleanTest)) + { + BooleanTest *btest = (BooleanTest *) clause; + + /* Only IS [NOT] TRUE/FALSE are any good to us */ + if (btest->booltesttype == IS_UNKNOWN || + btest->booltesttype == IS_NOT_UNKNOWN) + return false; + + leftop = btest->arg; + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + if (equal(leftop, partkey)) + *rightop = (btest->booltesttype == IS_TRUE || + btest->booltesttype == IS_NOT_FALSE) + ? (Expr *) makeBoolConst(true, false) + : (Expr *) makeBoolConst(false, false); + + if (*rightop) + return true; + } + else + { + leftop = not_clause((Node *) clause) + ? get_notclausearg(clause) + : clause; + + if (IsA(leftop, RelabelType)) + leftop = ((RelabelType *) leftop)->arg; + + /* Clause does not match this partition key. */ + if (equal(leftop, partkey)) + *rightop = not_clause((Node *) clause) + ? (Expr *) makeBoolConst(false, false) + : (Expr *) makeBoolConst(true, false); + else if (equal(negate_clause((Node *) leftop), partkey)) + *rightop = (Expr *) makeBoolConst(false, false); + + if (*rightop) + return true; + } + + return false; +} + +/* + * get_partitions_from_or_args + * + * Returns the set of indexes of partitions, each of which satisfies some + * clause in or_args. + */ +static Bitmapset * +get_partitions_from_or_args(PartitionPruneContext *context, List *or_args) +{ + Bitmapset *result = NULL; + ListCell *lc; + + /* + * When matching an OR expression, it is only checked if at least one of + * its args matches the partition key, not all. For arguments that don't + * match, we cannot eliminate any of its partitions using + * get_partitions_from_clauses(). However, if the table is itself a + * partition, we may be able to prove using constraint exclusion that the + * clause refutes its partition constraint, that is, we can eliminate all + * of its partitions. + */ + foreach(lc, or_args) + { + List *clauses = list_make1(lfirst(lc)); + PartitionPruneContext subcontext; + Bitmapset *arg_partset; + + /* + * All fields except clauseinfo are same as in the parent context, + * which will be set by calling extract_partition_clauses(). + */ + memcpy(&subcontext, context, sizeof(PartitionPruneContext)); + extract_partition_clauses(&subcontext, clauses); + + if (!subcontext.clauseinfo->foundkeyclauses) + { + List *partconstr = context->partition_qual; + + if (partconstr) + { + partconstr = (List *) expression_planner((Expr *) partconstr); + if (context->relid != 1) + ChangeVarNodes((Node *) partconstr, 1, context->relid, 0); + if (predicate_refuted_by(partconstr, clauses, false)) + continue; + } + + /* Couldn't eliminate any of the partitions. */ + return bms_add_range(NULL, 0, context->nparts - 1); + } + + if (!subcontext.clauseinfo->constfalse) + arg_partset = get_partitions_from_clauses(&subcontext); + else + arg_partset = NULL; + + result = bms_add_members(result, arg_partset); + bms_free(arg_partset); + } + + return result; +} + +/* + * remove_redundant_clauses + * Processes the clauses contained in context->clauseinfo to remove the + * ones that are superseeded by other clauses which are more restrictive. + * + * Finished lists of clauses are returned in *minimalclauses which is an array + * with one slot for each of the partition keys. + * + * For example, x > 1 AND x > 2 and x >= 5, the latter is the most + * restrictive, so 5 is the best minimum bound for x. + * + * We also look for clauses which contradict one another in a way that proves + * that the clauses cannot possibly match any partition. Impossible clauses + * include things like: x = 1 AND x = 2, x > 0 and x < 10. The function + * returns right after finding such a clause and before returning, sets + * constfalse in context->clauseinfo to inform the caller that we found such + * clause. + */ +static void +remove_redundant_clauses(PartitionPruneContext *context, + List **minimalclauses) +{ + PartClause *hash_clause, + *btree_clauses[BTMaxStrategyNumber]; + PartitionClauseInfo *partclauseinfo = context->clauseinfo; + ListCell *lc; + int s; + int i; + bool test_result; + + for (i = 0; i < context->partnatts; i++) + { + List *keyclauses = partclauseinfo->keyclauses[i]; + + minimalclauses[i] = NIL; + hash_clause = NULL; + + memset(btree_clauses, 0, sizeof(btree_clauses)); + + foreach(lc, keyclauses) + { + PartClause *pc = (PartClause *) lfirst(lc); + + if (!pc->valid_cache) + { + Oid lefttype; + + get_op_opfamily_properties(pc->opno, + context->partopfamily[i], + false, + &pc->op_strategy, + &lefttype, + &pc->op_subtype); + fmgr_info(get_opcode(pc->opno), &pc->op_func); + pc->valid_cache = true; + } + + /* + * Hash-partitioning knows only about equality. So, if we've + * matched a clause and found another clause whose constant + * operand doesn't match the constant operand of the former, then + * we have found mutually contradictory clauses. + */ + if (context->strategy == PARTITION_STRATEGY_HASH) + { + if (hash_clause == NULL) + hash_clause = pc; + /* check if another clause would contradict the one we have */ + else if (partition_cmp_args(context->parttypid[i], + context->partopfamily[i], + pc, pc, hash_clause, + &test_result)) + { + if (!test_result) + { + partclauseinfo->constfalse = true; + return; + } + } + /* + * Couldn't compare; keep hash_clause set to the previous value, + * and add this one directly to the result. Caller would + * arbitrarily choose one of the many and perform + * partition-pruning with it. + */ + else + minimalclauses[i] = lappend(minimalclauses[i], pc); + + /* + * The code below handles btree operators, so not relevant for + * hash partitioning. + */ + continue; + } + + /* + * The code that follows closely mimics similar processing done by + * nbtutils.c: _bt_preprocess_keys(). + * + * btree_clauses[s] points currently best clause containing the + * operator strategy type s+1; it is NULL if we haven't yet found + * such a clause. + */ + s = pc->op_strategy - 1; + if (btree_clauses[s] == NULL) + { + btree_clauses[s] = pc; + } + else + { + /* + * Is this one more restrictive than what we already have? + * + * Consider some examples: 1. If btree_clauses[BTLT] now contains + * a < 5, and pc is a < 3, then because 3 < 5 is true, a < 5 + * currently at btree_clauses[BTLT] will be replaced by a < 3. + * + * 2. If btree_clauses[BTEQ] now contains a = 5 and pc is a = 7, + * then because 5 = 7 is false, we found a mutual contradiction, + * so we set *constfalse to true and return. + * + * 3. If btree_clauses[BTLT] now contains a < 5 and pc is a < 7, + * then because 7 < 5 is false, we leave a < 5 where it is and + * effectively discard a < 7 as being redundant. + */ + if (partition_cmp_args(context->parttypid[i], + context->partopfamily[i], + pc, pc, btree_clauses[s], + &test_result)) + { + /* pc is more restrictive, so replace the existing. */ + if (test_result) + btree_clauses[s] = pc; + else if (s == BTEqualStrategyNumber - 1) + { + partclauseinfo->constfalse = true; + return; + } + + /* Old one is more restrictive, so keep around. */ + } + else + { + /* + * We couldn't determine which one is more restrictive. Keep + * the previous one in btree_clauses[s] and push this one directly + * to the output list. + */ + minimalclauses[i] = lappend(minimalclauses[i], pc); + } + } + } + + if (context->strategy == PARTITION_STRATEGY_HASH) + { + /* Note we didn't add this one to the result yet. */ + if (hash_clause) + minimalclauses[i] = lappend(minimalclauses[i], hash_clause); + continue; + } + + /* Compare btree operator clauses across strategies. */ + + /* Compare the equality clause with clauses of other strategies. */ + if (btree_clauses[BTEqualStrategyNumber - 1]) + { + PartClause *eq = btree_clauses[BTEqualStrategyNumber - 1]; + + for (s = 0; s < BTMaxStrategyNumber; s++) + { + PartClause *chk = btree_clauses[s]; + + if (!chk || s == (BTEqualStrategyNumber - 1)) + continue; + + /* + * Suppose btree_clauses[BTLT] contained a < 5 and the eq clause + * is a = 5, then because 5 < 5 is false, we found contradiction. + * That is, a < 5 and a = 5 are mutually contradictory. OTOH, if + * eq clause is a = 3, then because 3 < 5, we no longer need + * a < 5, because a = 3 is more restrictive. + */ + if (partition_cmp_args(context->parttypid[i], + context->partopfamily[i], + chk, eq, chk, + &test_result)) + { + if (!test_result) + { + partclauseinfo->constfalse = true; + return; + } + /* Discard the no longer needed clause. */ + btree_clauses[s] = NULL; + } + } + } + + /* + * Try to keep only one of <, <=. + * + * Suppose btree_clauses[BTLT] contains a < 3 and btree_clauses[BTLE] + * contains a <= 3 (or a <= 4), then because 3 <= 3 (or 3 <= 4) is true, + * we discard the a <= 3 (or a <= 4) as redundant. If the latter contains + * contains a <= 2, then because 3 <= 2 is false, we dicard a < 3 as + * redundant. + */ + if (btree_clauses[BTLessStrategyNumber - 1] && + btree_clauses[BTLessEqualStrategyNumber - 1]) + { + PartClause *lt = btree_clauses[BTLessStrategyNumber - 1], + *le = btree_clauses[BTLessEqualStrategyNumber - 1]; + + if (partition_cmp_args(context->parttypid[i], + context->partopfamily[i], + le, lt, le, + &test_result)) + { + if (test_result) + btree_clauses[BTLessEqualStrategyNumber - 1] = NULL; + else + btree_clauses[BTLessStrategyNumber - 1] = NULL; + } + } + + /* Try to keep only one of >, >=. See the example above. */ + if (btree_clauses[BTGreaterStrategyNumber - 1] && + btree_clauses[BTGreaterEqualStrategyNumber - 1]) + { + PartClause *gt = btree_clauses[BTGreaterStrategyNumber - 1], + *ge = btree_clauses[BTGreaterEqualStrategyNumber - 1]; + + if (partition_cmp_args(context->parttypid[i], + context->partopfamily[i], + ge, gt, ge, + &test_result)) + { + if (test_result) + btree_clauses[BTGreaterEqualStrategyNumber - 1] = NULL; + else + btree_clauses[BTGreaterStrategyNumber - 1] = NULL; + } + } + + /* + * btree_clauses now contains the "best" clause or NULL for each btree + * strategy number. Add to the newlist. + */ + for (s = 0; s < BTMaxStrategyNumber; s++) + { + if (btree_clauses[s]) + minimalclauses[i] = lappend(minimalclauses[i], + btree_clauses[s]); + } + } +} + +/* + * partition_cmp_args + * Try to compare the constant arguments of 'leftarg' and 'rightarg', in + * that order, using the operator of 'op' and set *result to the result + * of this comparison. + * + * Returns true if we could actually perform the comparison; otherwise false. + * + * Note: We may not be able to perform the comparison if operand values are + * unknown in this context or if the type of any of the operands are + * incompatible with the operator. + */ +static bool +partition_cmp_args(Oid parttypid, Oid partopfamily, + PartClause *pc, PartClause *leftarg, PartClause *rightarg, + bool *result) +{ + Datum left_value; + Datum right_value; + + Assert(pc->valid_cache && leftarg->valid_cache && rightarg->valid_cache); + + /* + * Try to extract an actual value from each arg. This may fail if the + * value is unknown in this context, in which case we cannot compare. + */ + if (!partkey_datum_from_expr(parttypid, leftarg->value, &left_value)) + return false; + + if (!partkey_datum_from_expr(parttypid, rightarg->value, &right_value)) + return false; + + /* + * We can compare left_value and right_value using op's operator + * only if both are of the expected type. + */ + if (leftarg->op_subtype == pc->op_subtype && + rightarg->op_subtype == pc->op_subtype) + { + *result = DatumGetBool(FunctionCall2Coll(&pc->op_func, + pc->inputcollid, + left_value, + right_value)); + return true; + } + else + { + Oid cmp_op; + + /* Otherwise, look one up in the partitioning operator family. */ + cmp_op = get_opfamily_member(partopfamily, + leftarg->op_subtype, + rightarg->op_subtype, + pc->op_strategy); + if (OidIsValid(cmp_op)) + { + *result = DatumGetBool(OidFunctionCall2Coll(get_opcode(cmp_op), + pc->inputcollid, + left_value, + right_value)); + return true; + } + } + + /* Couldn't do the comparison. */ + *result = false; + return false; +} + +/* + * extract_bounding_datums + * Process clauses in context->clauseinfo and populate 'keys' with all + * min/max/equal/not-equal values that we're able to determine. + * + * *minimalclauses is an array with partnatts members, each of which is a list + * of the most restrictive clauses of each operator strategy for the given + * partition key. + * + * For RANGE partitioning we do not need to match and find values for all + * partition keys. We may be able to eliminate some partitions with just a + * prefix of the partition keys. HASH partitioning does require all keys are + * matched to with at least some combinations of equality clauses and IS NULL + * clauses. LIST partitions don't support multiple partition keys. + * + * Returns true if at least one key was found; false otherwise. + */ +static bool +extract_bounding_datums(PartitionPruneContext *context, + List **minimalclauses, PartScanKeyInfo *keys) +{ + PartitionClauseInfo *clauseinfo = context->clauseinfo; + bool need_next_eq, + need_next_min, + need_next_max; + int i; + ListCell *lc; + + /* + * Based on the strategies of the clauses' operators (=, />=), try + * to construct a tuple of those datums that serve as the exact lookup + * tuple or two tuples that serve as minimum and maximum bound. + * + * If we find datums for all partition key columns that appear in = + * operator clauses, then we have the exact match lookup tuple, which will + * be used to match just one partition (although that's required only for + * range partitioning, finding datums for just some columns is fine for + * hash partitioning). + * + * If the last datum in a tuple comes from a clause containing />= operator, then that constitutes the minimum or maximum bound tuple, + * respectively. There is one exception -- if we have a tuple containing + * values for only a prefix of partition key columns, where none of its + * values come from a />= operator clause, we still consider such + * tuple as both minimum and maximum bound tuple. + */ + need_next_eq = true; + need_next_min = true; + need_next_max = true; + memset(keys, 0, sizeof(PartScanKeyInfo)); + for (i = 0; i < context->partnatts; i++) + { + List *clauselist = minimalclauses[i]; + + /* + * Min and max keys must constitute a prefix of the partition key and + * must appear in the same order as partition keys. Equal keys have + * to satisfy that requirement only for non-hash partitioning. + */ + if (i > keys->n_eqkeys && + context->strategy != PARTITION_STRATEGY_HASH) + need_next_eq = false; + + if (i > keys->n_minkeys) + need_next_min = false; + + if (i > keys->n_maxkeys) + need_next_max = false; + + foreach(lc, clauselist) + { + PartClause *clause = (PartClause *) lfirst(lc); + Expr *value = clause->value; + bool incl; + PartOpStrategy op_strategy; + + op_strategy = partition_op_strategy(context->strategy, clause, + &incl); + switch (op_strategy) + { + case PART_OP_EQUAL: + Assert(incl); + if (need_next_eq && + partkey_datum_from_expr(context->parttypid[i], value, + &keys->eqkeys[i])) + keys->n_eqkeys++; + + if (need_next_max && + partkey_datum_from_expr(context->parttypid[i], value, + &keys->maxkeys[i])) + { + keys->n_maxkeys++; + keys->max_incl = true; + } + + if (need_next_min && + partkey_datum_from_expr(context->parttypid[i], value, + &keys->minkeys[i])) + { + keys->n_minkeys++; + keys->min_incl = true; + } + break; + + case PART_OP_LESS: + if (need_next_max && + partkey_datum_from_expr(context->parttypid[i], value, + &keys->maxkeys[i])) + { + keys->n_maxkeys++; + keys->max_incl = incl; + if (!incl) + need_next_eq = need_next_max = false; + } + break; + + case PART_OP_GREATER: + if (need_next_min && + partkey_datum_from_expr(context->parttypid[i], value, + &keys->minkeys[i])) + { + keys->n_minkeys++; + keys->min_incl = incl; + if (!incl) + need_next_eq = need_next_min = false; + } + break; + + default: + Assert(false); + } + } + } + + /* + * To set eqkeys, we must have found matching clauses containing = + * operator for all partition key columns and if present we don't need + * the values in minkeys and maxkeys anymore. In the case hash + * partitioning, we don't require all of eqkeys to be operator clauses. + * In that case, any IS NULL clauses involving partition key columns are + * also considered as equality keys by the code for hash partition pruning, + * which checks that all partition columns are covered before actually + * performing the pruning. + */ + if (keys->n_eqkeys == context->partnatts || + context->strategy == PARTITION_STRATEGY_HASH) + keys->n_minkeys = keys->n_maxkeys = 0; + else + keys->n_eqkeys = 0; + + /* Collect datums from <> operator clauses in its dedicated array. */ + if (clauseinfo->ne_clauses) + { + Assert(context->strategy == PARTITION_STRATEGY_LIST); + keys->ne_datums = (Datum *) + palloc(list_length(clauseinfo->ne_clauses) * + sizeof(Datum)); + i = 0; + foreach(lc, clauseinfo->ne_clauses) + { + PartClause *pc = (PartClause *) lfirst(lc); + Datum datum; + + if (partkey_datum_from_expr(context->parttypid[0], pc->value, + &datum)) + keys->ne_datums[i++] = datum; + } + keys->n_ne_datums = i; + } + + /* Finally, also set the keyisnull and keyisnotnull values. */ + keys->keyisnull = clauseinfo->keyisnull; + keys->keyisnotnull = clauseinfo->keyisnotnull; + + return (keys->n_eqkeys > 0 || keys->n_minkeys > 0 || + keys->n_maxkeys > 0 || keys->n_ne_datums > 0 || + !bms_is_empty(keys->keyisnull) || + !bms_is_empty(keys->keyisnotnull)); +} + +/* + * partition_op_strategy + * Returns whether the clause in 'pc' contains an =, />= + * operator and set *incl to true if the operator's strategy is + * inclusive. + */ +static PartOpStrategy +partition_op_strategy(char part_strategy, PartClause *pc, bool *incl) +{ + *incl = false; /* may be overwritten below */ + + switch (part_strategy) + { + /* Hash partitioning allows only hash equality. */ + case PARTITION_STRATEGY_HASH: + if (pc->op_strategy == HTEqualStrategyNumber) + { + *incl = true; + return PART_OP_EQUAL; + } + elog(ERROR, "unexpected operator strategy number: %d", + pc->op_strategy); + + /* List and range partitioning support all btree operators. */ + case PARTITION_STRATEGY_LIST: + case PARTITION_STRATEGY_RANGE: + switch (pc->op_strategy) + { + case BTLessEqualStrategyNumber: + *incl = true; + /* fall through */ + + case BTLessStrategyNumber: + return PART_OP_LESS; + + case BTEqualStrategyNumber: + *incl = true; + return PART_OP_EQUAL; + + case BTGreaterEqualStrategyNumber: + *incl = true; + /* fall through */ + + case BTGreaterStrategyNumber: + return PART_OP_GREATER; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) part_strategy); + } + + return PART_OP_EQUAL; /* keep compiler quiet */ +} + +/* + * partkey_datum_from_expr + * Set *value to the constant value obtained by evaluating 'expr' + * + * Note that we may not be able to evaluate the input expression, in which + * case, the function returns false to indicate that *value has not been + * set. True is returned otherwise. + */ +static bool +partkey_datum_from_expr(Oid parttypid, Expr *expr, Datum *value) +{ + Oid exprtype = exprType((Node *) expr); + + if (exprtype != parttypid) + { + ParseState *pstate = make_parsestate(NULL); + + expr = (Expr *) coerce_to_target_type(pstate, (Node *) expr, + exprtype, + parttypid, -1, + COERCION_EXPLICIT, + COERCE_IMPLICIT_CAST, -1); + free_parsestate(pstate); + + /* + * If we couldn't coerce to the partition key's type, that is, the + * type of the datums stored in PartitionBoundInfo for this partition + * key, there's no hope of using this expression for anything + * partitioning-related. + */ + if (expr == NULL) + return false; + + /* + * Transform into a form that the following code can do something + * useful with. + */ + expr = evaluate_expr(expr, + exprType((Node *) expr), + exprTypmod((Node *) expr), + exprCollation((Node *) expr)); + } + + /* + * Add more expression types here as needed to support the requirements + * of the higher-level code. + */ + if (IsA(expr, Const)) + { + *value = ((Const *) expr)->constvalue; + return true; + } + + return false; +} diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index dcfc1665a8..f3063be6d9 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -1171,7 +1171,6 @@ get_relation_constraints(PlannerInfo *root, Index varno = rel->relid; Relation relation; TupleConstr *constr; - List *pcqual; /* * We assume the relation has already been safely locked. @@ -1257,22 +1256,32 @@ get_relation_constraints(PlannerInfo *root, } } - /* Append partition predicates, if any */ - pcqual = RelationGetPartitionQual(relation); - if (pcqual) + /* + * Append partition predicates, if any. + * + * For selects, partition pruning uses the parent table's partition bound + * descriptor, instead of constraint exclusion which is driven by the + * individual partition's partition constraint. + */ + if (root->parse->commandType != CMD_SELECT) { - /* - * Run each expression through const-simplification and - * canonicalization similar to check constraints. - */ - pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); - pcqual = (List *) canonicalize_qual((Expr *) pcqual); + List *pcqual = RelationGetPartitionQual(relation); - /* Fix Vars to have the desired varno */ - if (varno != 1) - ChangeVarNodes((Node *) pcqual, 1, varno, 0); + if (pcqual) + { + /* + * Run each expression through const-simplification and + * canonicalization similar to check constraints. + */ + pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); + pcqual = (List *) canonicalize_qual((Expr *) pcqual); - result = list_concat(result, pcqual); + /* Fix Vars to have the desired varno */ + if (varno != 1) + ChangeVarNodes((Node *) pcqual, 1, varno, 0); + + result = list_concat(result, pcqual); + } } heap_close(relation, NoLock); @@ -1856,6 +1865,11 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, rel->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey); rel->nparts = partdesc->nparts; set_baserel_partition_key_exprs(relation, rel); + if (OidIsValid(get_default_oid_from_partdesc(partdesc))) + rel->has_default_part = true; + else + rel->has_default_part = false; + rel->partition_qual = RelationGetPartitionQual(relation); } /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index da8f0f93fc..7f1428b8d8 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -154,6 +154,8 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->part_scheme = NULL; rel->nparts = 0; rel->boundinfo = NULL; + rel->has_default_part = false; + rel->partition_qual = NIL; rel->part_rels = NULL; rel->partexprs = NULL; rel->nullable_partexprs = NULL; @@ -567,6 +569,8 @@ build_join_rel(PlannerInfo *root, joinrel->part_scheme = NULL; joinrel->nparts = 0; joinrel->boundinfo = NULL; + joinrel->has_default_part = false; + joinrel->partition_qual = NIL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; @@ -734,6 +738,10 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->has_eclass_joins = false; joinrel->top_parent_relids = NULL; joinrel->part_scheme = NULL; + joinrel->nparts = 0; + joinrel->boundinfo = NULL; + joinrel->has_default_part = false; + joinrel->partition_qual = NIL; joinrel->part_rels = NULL; joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 2faf0ca26e..ed27ca921e 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -42,6 +42,94 @@ typedef struct PartitionDescData typedef struct PartitionDescData *PartitionDesc; +typedef struct PartitionPruneContext +{ + /* Table's range table index */ + int relid; + + /* Partition key information */ + char strategy; + int partnatts; + Expr **partkeys; + Oid *parttypid; + Oid *partopfamily; + Oid *partcollation; + FmgrInfo *partsupfunc; + + /* Number of partitions */ + int nparts; + + /* Is one of the partitions the default partition */ + bool has_default_part; + + /* Partition qual if this's not the root partitioned table */ + List *partition_qual; + + /* Partition boundary info */ + PartitionBoundInfo boundinfo; + + /* Information about matched clauses */ + struct PartitionClauseInfo *clauseinfo; +} PartitionPruneContext; + +/* + * PartScanKeyInfo + * Information about partition look up keys to be passed to + * get_partitions_for_keys() + * + * Stores Datums and nullness properties found in clauses which match the + * partition key. Datum arrays eqkeys, minkeys, and maxkeys are indexed by + * partition key number, whereas ne_datums is not. Bitmapsets keyisnull and + * keyisnotnull have a bit for each partition key. + */ +typedef struct PartScanKeyInfo +{ + /* + * Equality look up key. Used to store known Datums values from clauses + * compared by an equality operation to the partition key. + */ + Datum eqkeys[PARTITION_MAX_KEYS]; + + /* + * Lower and upper bounds on a sequence of selected partitions. These may + * contain values for only a prefix of the partition keys. + */ + Datum minkeys[PARTITION_MAX_KEYS]; + Datum maxkeys[PARTITION_MAX_KEYS]; + + /* + * Number of values stored in the corresponding array above + */ + int n_eqkeys; + int n_minkeys; + int n_maxkeys; + + /* + * Properties to mark if the clauses corresponding to the datums stored in + * minkeys and maxkeys, respectively, are inclusive of the stored value or + * not. + */ + bool min_incl; + bool max_incl; + + /* + * Datum values from clauses containing <> operator. Note that, unlike + * the arrays above, the following array is not indexed by partition + * key. We only ever use this array for list partitioning and there + * can only be one partition key in that case anyway. + */ + Datum *ne_datums; + int n_ne_datums; + + /* + * Information about nullness of the partition keys, either specified + * explicitly in the query (in the form of a IS [NOT] NULL clause) or + * implied from strict clauses matching the partition key. + */ + Bitmapset *keyisnull; + Bitmapset *keyisnotnull; +} PartScanKeyInfo; + extern void RelationBuildPartitionDesc(Relation relation); extern bool partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, PartitionBoundInfo b1, @@ -73,4 +161,8 @@ extern List *get_proposed_default_constraint(List *new_part_constaints); extern int get_partition_for_tuple(Relation relation, Datum *values, bool *isnull); +/* For partition-pruning */ +extern Bitmapset *get_partitions_for_keys(PartitionPruneContext *context, + PartScanKeyInfo *keys); + #endif /* PARTITION_H */ diff --git a/src/include/catalog/pg_opfamily.h b/src/include/catalog/pg_opfamily.h index b544474254..0847df97ff 100644 --- a/src/include/catalog/pg_opfamily.h +++ b/src/include/catalog/pg_opfamily.h @@ -188,4 +188,7 @@ DATA(insert OID = 4104 ( 3580 box_inclusion_ops PGNSP PGUID )); DATA(insert OID = 5000 ( 4000 box_ops PGNSP PGUID )); DATA(insert OID = 5008 ( 4000 poly_ops PGNSP PGUID )); +#define IsBooleanOpfamily(opfamily) \ + ((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID) + #endif /* PG_OPFAMILY_H */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index ce9975c620..5ee23a5bb5 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -538,6 +538,8 @@ typedef struct PartitionSchemeData *PartitionScheme; * part_scheme - Partitioning scheme of the relation * boundinfo - Partition bounds * nparts - Number of partitions + * has_default_part - Whether the table has a default partition + * partition_qual - Partition constraint if not the root * part_rels - RelOptInfos for each partition * partexprs, nullable_partexprs - Partition key expressions * @@ -666,6 +668,8 @@ typedef struct RelOptInfo PartitionScheme part_scheme; /* Partitioning scheme. */ int nparts; /* number of partitions */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ + bool has_default_part; /* does it have a default partition? */ + List *partition_qual; /* partition constraint */ struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, * stored in the same order of bounds */ List **partexprs; /* Non-nullable partition key expressions. */ diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index ba4fa4b68b..3c2f54964b 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -84,5 +84,7 @@ extern Node *estimate_expression_value(PlannerInfo *root, Node *node); extern Query *inline_set_returning_function(PlannerInfo *root, RangeTblEntry *rte); +extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, + Oid result_collation); #endif /* CLAUSES_H */ diff --git a/src/include/optimizer/partprune.h b/src/include/optimizer/partprune.h new file mode 100644 index 0000000000..2b84ed90bf --- /dev/null +++ b/src/include/optimizer/partprune.h @@ -0,0 +1,25 @@ +/*------------------------------------------------------------------------- + * + * partprune.h + * prototypes for partprune.c + * + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/optimizer/partprune.h + * + *------------------------------------------------------------------------- + */ +#ifndef PARTPRUNE_H +#define PARTPRUNE_H + +#include "catalog/partition.h" + +extern Relids prune_append_rel_partitions(PlannerInfo *root, + RelOptInfo *rel); +extern void generate_partition_clauses(PartitionPruneContext *context, + List *clauses); +extern Bitmapset *get_partitions_from_clauses(PartitionPruneContext *context); + +#endif /* PARTPRUNE_H */ diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index a79f891da7..11a259ca25 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1715,11 +1715,7 @@ explain (costs off) select * from list_parted where a = 'ab' or a in (null, 'cd' Append -> Seq Scan on part_ab_cd Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) - -> Seq Scan on part_ef_gh - Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) - -> Seq Scan on part_null_xy - Filter: (((a)::text = 'ab'::text) OR ((a)::text = ANY ('{NULL,cd}'::text[]))) -(7 rows) +(3 rows) explain (costs off) select * from list_parted where a = 'ab'; QUERY PLAN @@ -1906,11 +1902,13 @@ explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all parti Filter: (abs(b) = 5) -> Seq Scan on mcrparted3 Filter: (abs(b) = 5) + -> Seq Scan on mcrparted4 + Filter: (abs(b) = 5) -> Seq Scan on mcrparted5 Filter: (abs(b) = 5) -> Seq Scan on mcrparted_def Filter: (abs(b) = 5) -(13 rows) +(15 rows) explain (costs off) select * from mcrparted where a > -1; -- scans all partitions QUERY PLAN diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 348719bd62..948cad4c3d 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -208,16 +208,14 @@ explain (costs off) select * from rlp where 1 > a; /* commuted */ (3 rows) explain (costs off) select * from rlp where a <= 1; - QUERY PLAN ---------------------------------------- + QUERY PLAN +-------------------------- Append -> Seq Scan on rlp1 Filter: (a <= 1) -> Seq Scan on rlp2 Filter: (a <= 1) - -> Seq Scan on rlp_default_default - Filter: (a <= 1) -(7 rows) +(5 rows) explain (costs off) select * from rlp where a = 1; QUERY PLAN @@ -519,15 +517,13 @@ explain (costs off) select * from rlp where a <= 31; Filter: (a <= 31) -> Seq Scan on rlp5_1 Filter: (a <= 31) - -> Seq Scan on rlp5_default - Filter: (a <= 31) -> Seq Scan on rlp_default_10 Filter: (a <= 31) -> Seq Scan on rlp_default_30 Filter: (a <= 31) -> Seq Scan on rlp_default_default Filter: (a <= 31) -(29 rows) +(27 rows) explain (costs off) select * from rlp where a = 1 or a = 7; QUERY PLAN @@ -575,9 +571,7 @@ explain (costs off) select * from rlp where a > 20 and a < 27; Filter: ((a > 20) AND (a < 27)) -> Seq Scan on rlp4_2 Filter: ((a > 20) AND (a < 27)) - -> Seq Scan on rlp4_default - Filter: ((a > 20) AND (a < 27)) -(7 rows) +(5 rows) explain (costs off) select * from rlp where a = 29; QUERY PLAN @@ -651,8 +645,6 @@ explain (costs off) select * from rlp where (a = 1 and a = 3) or (a > 1 and a = QUERY PLAN ------------------------------------------------------------------- Append - -> Seq Scan on rlp2 - Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3abcd Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3efgh @@ -661,7 +653,7 @@ explain (costs off) select * from rlp where (a = 1 and a = 3) or (a > 1 and a = Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -> Seq Scan on rlp3_default Filter: (((a = 1) AND (a = 3)) OR ((a > 1) AND (a = 15))) -(11 rows) +(9 rows) -- multi-column keys create table mc3p (a int, b int, c int) partition by range (a, abs(b), c); @@ -716,9 +708,7 @@ explain (costs off) select * from mc3p where a = 1 and abs(b) = 1 and c < 8; Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -> Seq Scan on mc3p1 Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) - -> Seq Scan on mc3p_default - Filter: ((c < 8) AND (a = 1) AND (abs(b) = 1)) -(7 rows) +(5 rows) explain (costs off) select * from mc3p where a = 10 and abs(b) between 5 and 35; QUERY PLAN @@ -894,6 +884,8 @@ explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1; Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p2 Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) + -> Seq Scan on mc3p3 + Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p4 Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p5 @@ -904,7 +896,7 @@ explain (costs off) select * from mc3p where a = 1 or abs(b) = 1 or c = 1; Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -> Seq Scan on mc3p_default Filter: ((a = 1) OR (abs(b) = 1) OR (c = 1)) -(17 rows) +(19 rows) explain (costs off) select * from mc3p where (a = 1 and abs(b) = 1) or (a = 10 and abs(b) = 10); QUERY PLAN @@ -965,9 +957,11 @@ explain (costs off) select * from mc2p where a = 2 and b < 1; QUERY PLAN --------------------------------------- Append + -> Seq Scan on mc2p2 + Filter: ((b < 1) AND (a = 2)) -> Seq Scan on mc2p3 Filter: ((b < 1) AND (a = 2)) -(3 rows) +(5 rows) explain (costs off) select * from mc2p where a > 1; QUERY PLAN @@ -1009,24 +1003,20 @@ explain (costs off) select * from boolpart where a in (true, false); (5 rows) explain (costs off) select * from boolpart where a = false; - QUERY PLAN ------------------------------------- + QUERY PLAN +------------------------------ Append -> Seq Scan on boolpart_f Filter: (NOT a) - -> Seq Scan on boolpart_default - Filter: (NOT a) -(5 rows) +(3 rows) explain (costs off) select * from boolpart where not a = false; - QUERY PLAN ------------------------------------- + QUERY PLAN +------------------------------ Append -> Seq Scan on boolpart_t Filter: a - -> Seq Scan on boolpart_default - Filter: a -(5 rows) +(3 rows) explain (costs off) select * from boolpart where a is true or a is not true; QUERY PLAN @@ -1036,33 +1026,22 @@ explain (costs off) select * from boolpart where a is true or a is not true; Filter: ((a IS TRUE) OR (a IS NOT TRUE)) -> Seq Scan on boolpart_t Filter: ((a IS TRUE) OR (a IS NOT TRUE)) - -> Seq Scan on boolpart_default - Filter: ((a IS TRUE) OR (a IS NOT TRUE)) -(7 rows) +(5 rows) explain (costs off) select * from boolpart where a is not true; - QUERY PLAN ------------------------------------- + QUERY PLAN +--------------------------------- Append -> Seq Scan on boolpart_f Filter: (a IS NOT TRUE) - -> Seq Scan on boolpart_t - Filter: (a IS NOT TRUE) - -> Seq Scan on boolpart_default - Filter: (a IS NOT TRUE) -(7 rows) +(3 rows) explain (costs off) select * from boolpart where a is not true and a is not false; - QUERY PLAN --------------------------------------------------------- - Append - -> Seq Scan on boolpart_f - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) - -> Seq Scan on boolpart_t - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) - -> Seq Scan on boolpart_default - Filter: ((a IS NOT TRUE) AND (a IS NOT FALSE)) -(7 rows) + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) explain (costs off) select * from boolpart where a is unknown; QUERY PLAN @@ -1088,4 +1067,411 @@ explain (costs off) select * from boolpart where a is not unknown; Filter: (a IS NOT UNKNOWN) (7 rows) -drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart; +-- hash partitioning +create table hp (a int, b text) partition by hash (a, b); +create table hp0 partition of hp for values with (modulus 4, remainder 0); +create table hp3 partition of hp for values with (modulus 4, remainder 3); +create table hp1 partition of hp for values with (modulus 4, remainder 1); +create table hp2 partition of hp for values with (modulus 4, remainder 2); +insert into hp values (null, null); +insert into hp values (1, null); +insert into hp values (1, 'xxx'); +insert into hp values (null, 'xxx'); +insert into hp values (10, 'xxx'); +insert into hp values (10, 'yyy'); +select tableoid::regclass, * from hp order by 1; + tableoid | a | b +----------+----+----- + hp0 | | + hp0 | 1 | + hp0 | 1 | xxx + hp3 | 10 | yyy + hp1 | | xxx + hp2 | 10 | xxx +(6 rows) + +-- partial keys won't prune, nor would non-equality conditions +explain (costs off) select * from hp where a = 1; + QUERY PLAN +------------------------- + Append + -> Seq Scan on hp0 + Filter: (a = 1) + -> Seq Scan on hp1 + Filter: (a = 1) + -> Seq Scan on hp2 + Filter: (a = 1) + -> Seq Scan on hp3 + Filter: (a = 1) +(9 rows) + +explain (costs off) select * from hp where b = 'xxx'; + QUERY PLAN +----------------------------------- + Append + -> Seq Scan on hp0 + Filter: (b = 'xxx'::text) + -> Seq Scan on hp1 + Filter: (b = 'xxx'::text) + -> Seq Scan on hp2 + Filter: (b = 'xxx'::text) + -> Seq Scan on hp3 + Filter: (b = 'xxx'::text) +(9 rows) + +explain (costs off) select * from hp where a is null; + QUERY PLAN +----------------------------- + Append + -> Seq Scan on hp0 + Filter: (a IS NULL) + -> Seq Scan on hp1 + Filter: (a IS NULL) + -> Seq Scan on hp2 + Filter: (a IS NULL) + -> Seq Scan on hp3 + Filter: (a IS NULL) +(9 rows) + +explain (costs off) select * from hp where b is null; + QUERY PLAN +----------------------------- + Append + -> Seq Scan on hp0 + Filter: (b IS NULL) + -> Seq Scan on hp1 + Filter: (b IS NULL) + -> Seq Scan on hp2 + Filter: (b IS NULL) + -> Seq Scan on hp3 + Filter: (b IS NULL) +(9 rows) + +explain (costs off) select * from hp where a < 1 and b = 'xxx'; + QUERY PLAN +------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a < 1) AND (b = 'xxx'::text)) + -> Seq Scan on hp1 + Filter: ((a < 1) AND (b = 'xxx'::text)) + -> Seq Scan on hp2 + Filter: ((a < 1) AND (b = 'xxx'::text)) + -> Seq Scan on hp3 + Filter: ((a < 1) AND (b = 'xxx'::text)) +(9 rows) + +explain (costs off) select * from hp where a <> 1 and b = 'yyy'; + QUERY PLAN +-------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a <> 1) AND (b = 'yyy'::text)) + -> Seq Scan on hp1 + Filter: ((a <> 1) AND (b = 'yyy'::text)) + -> Seq Scan on hp2 + Filter: ((a <> 1) AND (b = 'yyy'::text)) + -> Seq Scan on hp3 + Filter: ((a <> 1) AND (b = 'yyy'::text)) +(9 rows) + +-- pruning should work in all cases below +explain (costs off) select * from hp where a is null and b is null; + QUERY PLAN +----------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a IS NULL) AND (b IS NULL)) +(3 rows) + +explain (costs off) select * from hp where a = 1 and b is null; + QUERY PLAN +------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((b IS NULL) AND (a = 1)) +(3 rows) + +explain (costs off) select * from hp where a = 1 and b = 'xxx'; + QUERY PLAN +------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a = 1) AND (b = 'xxx'::text)) +(3 rows) + +explain (costs off) select * from hp where a is null and b = 'xxx'; + QUERY PLAN +----------------------------------------------------- + Append + -> Seq Scan on hp1 + Filter: ((a IS NULL) AND (b = 'xxx'::text)) +(3 rows) + +explain (costs off) select * from hp where a = 10 and b = 'xxx'; + QUERY PLAN +-------------------------------------------------- + Append + -> Seq Scan on hp2 + Filter: ((a = 10) AND (b = 'xxx'::text)) +(3 rows) + +explain (costs off) select * from hp where a = 10 and b = 'yyy'; + QUERY PLAN +-------------------------------------------------- + Append + -> Seq Scan on hp3 + Filter: ((a = 10) AND (b = 'yyy'::text)) +(3 rows) + +explain (costs off) select * from hp where (a = 10 and b = 'yyy') or (a = 10 and b = 'xxx') or (a is null and b is null); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: (((a = 10) AND (b = 'yyy'::text)) OR ((a = 10) AND (b = 'xxx'::text)) OR ((a IS NULL) AND (b IS NULL))) + -> Seq Scan on hp2 + Filter: (((a = 10) AND (b = 'yyy'::text)) OR ((a = 10) AND (b = 'xxx'::text)) OR ((a IS NULL) AND (b IS NULL))) + -> Seq Scan on hp3 + Filter: (((a = 10) AND (b = 'yyy'::text)) OR ((a = 10) AND (b = 'xxx'::text)) OR ((a IS NULL) AND (b IS NULL))) +(7 rows) + +-- +-- some more cases +-- +-- +-- pruning for partitioned table appearing inside a sub-query +-- +-- pruning won't work for mc3p, because some keys are Params +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = t1.b and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; + QUERY PLAN +----------------------------------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on mc2p0 t1 + Filter: (a = 1) + -> Seq Scan on mc2p1 t1_1 + Filter: (a = 1) + -> Seq Scan on mc2p2 t1_2 + Filter: (a = 1) + -> Seq Scan on mc2p_default t1_3 + Filter: (a = 1) + -> Aggregate + -> Append + -> Seq Scan on mc3p0 t2 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p1 t2_1 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p2 t2_2 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p3 t2_3 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p4 t2_4 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p5 t2_5 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p6 t2_6 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p7 t2_7 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p_default t2_8 + Filter: ((a = t1.b) AND (c = 1) AND (abs(b) = 1)) +(30 rows) + +-- pruning should work fine, because prefix of keys is available +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.c = t1.b and abs(t2.b) = 1 and t2.a = 1) s where t1.a = 1; + QUERY PLAN +----------------------------------------------------------------------- + Nested Loop + -> Append + -> Seq Scan on mc2p0 t1 + Filter: (a = 1) + -> Seq Scan on mc2p1 t1_1 + Filter: (a = 1) + -> Seq Scan on mc2p2 t1_2 + Filter: (a = 1) + -> Seq Scan on mc2p_default t1_3 + Filter: (a = 1) + -> Aggregate + -> Append + -> Seq Scan on mc3p0 t2 + Filter: ((c = t1.b) AND (a = 1) AND (abs(b) = 1)) + -> Seq Scan on mc3p1 t2_1 + Filter: ((c = t1.b) AND (a = 1) AND (abs(b) = 1)) +(16 rows) + +-- pruning should work fine in this case, too. +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = 1 and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; + QUERY PLAN +-------------------------------------------------------------------- + Nested Loop + -> Aggregate + -> Append + -> Seq Scan on mc3p1 t2 + Filter: ((a = 1) AND (c = 1) AND (abs(b) = 1)) + -> Append + -> Seq Scan on mc2p0 t1 + Filter: (a = 1) + -> Seq Scan on mc2p1 t1_1 + Filter: (a = 1) + -> Seq Scan on mc2p2 t1_2 + Filter: (a = 1) + -> Seq Scan on mc2p_default t1_3 + Filter: (a = 1) +(14 rows) + +-- +-- pruning with clauses containing <> operator +-- +-- doesn't prune range or hash partitions +explain (costs off) select * from hp where a <> 1 and b <> 'xxx'; + QUERY PLAN +--------------------------------------------------- + Append + -> Seq Scan on hp0 + Filter: ((a <> 1) AND (b <> 'xxx'::text)) + -> Seq Scan on hp1 + Filter: ((a <> 1) AND (b <> 'xxx'::text)) + -> Seq Scan on hp2 + Filter: ((a <> 1) AND (b <> 'xxx'::text)) + -> Seq Scan on hp3 + Filter: ((a <> 1) AND (b <> 'xxx'::text)) +(9 rows) + +create table rp (a int) partition by range (a); +create table rp0 partition of rp for values from (minvalue) to (1); +create table rp1 partition of rp for values from (1) to (2); +create table rp2 partition of rp for values from (2) to (maxvalue); +explain (costs off) select * from rp where a <> 1; + QUERY PLAN +-------------------------- + Append + -> Seq Scan on rp0 + Filter: (a <> 1) + -> Seq Scan on rp1 + Filter: (a <> 1) + -> Seq Scan on rp2 + Filter: (a <> 1) +(7 rows) + +explain (costs off) select * from rp where a <> 1 and a <> 2; + QUERY PLAN +----------------------------------------- + Append + -> Seq Scan on rp0 + Filter: ((a <> 1) AND (a <> 2)) + -> Seq Scan on rp1 + Filter: ((a <> 1) AND (a <> 2)) + -> Seq Scan on rp2 + Filter: ((a <> 1) AND (a <> 2)) +(7 rows) + +-- null partition should be eliminated due to strict <> clause. +explain (costs off) select * from lp where a <> 'a'; + QUERY PLAN +------------------------------------ + Append + -> Seq Scan on lp_ad + Filter: (a <> 'a'::bpchar) + -> Seq Scan on lp_bc + Filter: (a <> 'a'::bpchar) + -> Seq Scan on lp_ef + Filter: (a <> 'a'::bpchar) + -> Seq Scan on lp_g + Filter: (a <> 'a'::bpchar) + -> Seq Scan on lp_default + Filter: (a <> 'a'::bpchar) +(11 rows) + +-- ensure we detect contradictions in clauses; a can't be NULL and NOT NULL. +explain (costs off) select * from lp where a <> 'a' and a is null; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +explain (costs off) select * from lp where (a <> 'a' and a <> 'd') or a is null; + QUERY PLAN +------------------------------------------------------------------------------ + Append + -> Seq Scan on lp_bc + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) + -> Seq Scan on lp_ef + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) + -> Seq Scan on lp_g + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) + -> Seq Scan on lp_null + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) + -> Seq Scan on lp_default + Filter: (((a <> 'a'::bpchar) AND (a <> 'd'::bpchar)) OR (a IS NULL)) +(11 rows) + +-- case for list partitioned table that's not root +explain (costs off) select * from rlp where a = 15 and b <> 'ab' and b <> 'cd' and b <> 'xy' and b is not null; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------ + Append + -> Seq Scan on rlp3efgh + Filter: ((b IS NOT NULL) AND ((b)::text <> 'ab'::text) AND ((b)::text <> 'cd'::text) AND ((b)::text <> 'xy'::text) AND (a = 15)) + -> Seq Scan on rlp3_default + Filter: ((b IS NOT NULL) AND ((b)::text <> 'ab'::text) AND ((b)::text <> 'cd'::text) AND ((b)::text <> 'xy'::text) AND (a = 15)) +(5 rows) + +-- +-- different collations for different keys with same expression +-- +create table coll_pruning_multi (a text) partition by range (substr(a, 1) collate "POSIX", substr(a, 1) collate "C"); +create table coll_pruning_multi1 partition of coll_pruning_multi for values from ('a', 'a') to ('a', 'e'); +create table coll_pruning_multi2 partition of coll_pruning_multi for values from ('a', 'e') to ('a', 'z'); +create table coll_pruning_multi3 partition of coll_pruning_multi for values from ('b', 'a') to ('b', 'e'); +-- no pruning, because only the 2nd column is constrained +explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'e' collate "C"; + QUERY PLAN +-------------------------------------------------------- + Append + -> Seq Scan on coll_pruning_multi1 + Filter: (substr(a, 1) = 'e'::text COLLATE "C") + -> Seq Scan on coll_pruning_multi2 + Filter: (substr(a, 1) = 'e'::text COLLATE "C") + -> Seq Scan on coll_pruning_multi3 + Filter: (substr(a, 1) = 'e'::text COLLATE "C") +(7 rows) + +-- pruning with just 1st column constrained +explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'a' collate "POSIX"; + QUERY PLAN +------------------------------------------------------------ + Append + -> Seq Scan on coll_pruning_multi1 + Filter: (substr(a, 1) = 'a'::text COLLATE "POSIX") + -> Seq Scan on coll_pruning_multi2 + Filter: (substr(a, 1) = 'a'::text COLLATE "POSIX") +(5 rows) + +-- pruning with just both columns constrained +explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'e' collate "C" and substr(a, 1) = 'a' collate "POSIX"; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Append + -> Seq Scan on coll_pruning_multi2 + Filter: ((substr(a, 1) = 'e'::text COLLATE "C") AND (substr(a, 1) = 'a'::text COLLATE "POSIX")) +(3 rows) + +-- +-- LIKE operators don't prune +-- +create table like_op_noprune (a text) partition by list (a); +create table like_op_noprune1 partition of like_op_noprune for values in ('ABC'); +create table like_op_noprune2 partition of like_op_noprune for values in ('BCD'); +explain (costs off) select * from like_op_noprune where a like '%BC'; + QUERY PLAN +------------------------------------ + Append + -> Seq Scan on like_op_noprune1 + Filter: (a ~~ '%BC'::text) + -> Seq Scan on like_op_noprune2 + Filter: (a ~~ '%BC'::text) +(5 rows) + +drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, hp, rp, coll_pruning_multi, like_op_noprune; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 514f8e5ce1..08fc2dbc21 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -152,4 +152,104 @@ explain (costs off) select * from boolpart where a is not true and a is not fals explain (costs off) select * from boolpart where a is unknown; explain (costs off) select * from boolpart where a is not unknown; -drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart; +-- hash partitioning +create table hp (a int, b text) partition by hash (a, b); +create table hp0 partition of hp for values with (modulus 4, remainder 0); +create table hp3 partition of hp for values with (modulus 4, remainder 3); +create table hp1 partition of hp for values with (modulus 4, remainder 1); +create table hp2 partition of hp for values with (modulus 4, remainder 2); + +insert into hp values (null, null); +insert into hp values (1, null); +insert into hp values (1, 'xxx'); +insert into hp values (null, 'xxx'); +insert into hp values (10, 'xxx'); +insert into hp values (10, 'yyy'); +select tableoid::regclass, * from hp order by 1; + +-- partial keys won't prune, nor would non-equality conditions +explain (costs off) select * from hp where a = 1; +explain (costs off) select * from hp where b = 'xxx'; +explain (costs off) select * from hp where a is null; +explain (costs off) select * from hp where b is null; +explain (costs off) select * from hp where a < 1 and b = 'xxx'; +explain (costs off) select * from hp where a <> 1 and b = 'yyy'; + +-- pruning should work in all cases below +explain (costs off) select * from hp where a is null and b is null; +explain (costs off) select * from hp where a = 1 and b is null; +explain (costs off) select * from hp where a = 1 and b = 'xxx'; +explain (costs off) select * from hp where a is null and b = 'xxx'; +explain (costs off) select * from hp where a = 10 and b = 'xxx'; +explain (costs off) select * from hp where a = 10 and b = 'yyy'; +explain (costs off) select * from hp where (a = 10 and b = 'yyy') or (a = 10 and b = 'xxx') or (a is null and b is null); + +-- +-- some more cases +-- + +-- +-- pruning for partitioned table appearing inside a sub-query +-- + +-- pruning won't work for mc3p, because some keys are Params +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = t1.b and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; + +-- pruning should work fine, because prefix of keys is available +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.c = t1.b and abs(t2.b) = 1 and t2.a = 1) s where t1.a = 1; + +-- pruning should work fine in this case, too. +explain (costs off) select * from mc2p t1, lateral (select count(*) from mc3p t2 where t2.a = 1 and abs(t2.b) = 1 and t2.c = 1) s where t1.a = 1; + +-- +-- pruning with clauses containing <> operator +-- + +-- doesn't prune range or hash partitions +explain (costs off) select * from hp where a <> 1 and b <> 'xxx'; + +create table rp (a int) partition by range (a); +create table rp0 partition of rp for values from (minvalue) to (1); +create table rp1 partition of rp for values from (1) to (2); +create table rp2 partition of rp for values from (2) to (maxvalue); + +explain (costs off) select * from rp where a <> 1; +explain (costs off) select * from rp where a <> 1 and a <> 2; + +-- null partition should be eliminated due to strict <> clause. +explain (costs off) select * from lp where a <> 'a'; + +-- ensure we detect contradictions in clauses; a can't be NULL and NOT NULL. +explain (costs off) select * from lp where a <> 'a' and a is null; + +explain (costs off) select * from lp where (a <> 'a' and a <> 'd') or a is null; + +-- case for list partitioned table that's not root +explain (costs off) select * from rlp where a = 15 and b <> 'ab' and b <> 'cd' and b <> 'xy' and b is not null; + +-- +-- different collations for different keys with same expression +-- +create table coll_pruning_multi (a text) partition by range (substr(a, 1) collate "POSIX", substr(a, 1) collate "C"); +create table coll_pruning_multi1 partition of coll_pruning_multi for values from ('a', 'a') to ('a', 'e'); +create table coll_pruning_multi2 partition of coll_pruning_multi for values from ('a', 'e') to ('a', 'z'); +create table coll_pruning_multi3 partition of coll_pruning_multi for values from ('b', 'a') to ('b', 'e'); + +-- no pruning, because only the 2nd column is constrained +explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'e' collate "C"; + +-- pruning with just 1st column constrained +explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'a' collate "POSIX"; + +-- pruning with just both columns constrained +explain (costs off) select * from coll_pruning_multi where substr(a, 1) = 'e' collate "C" and substr(a, 1) = 'a' collate "POSIX"; + +-- +-- LIKE operators don't prune +-- +create table like_op_noprune (a text) partition by list (a); +create table like_op_noprune1 partition of like_op_noprune for values in ('ABC'); +create table like_op_noprune2 partition of like_op_noprune for values in ('BCD'); +explain (costs off) select * from like_op_noprune where a like '%BC'; + +drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, hp, rp, coll_pruning_multi, like_op_noprune; -- 2.11.0