From 1586963befe8fe7de473a28515bd7676fa2d0acd Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sun, 14 Jul 2019 14:19:01 +0200 Subject: [PATCH 1/5] Use proper collations in extended statistics The extended statistics code was a bit confused about which collation to use when building the statistics and when computing the estimates. For building it used a default collation for each data type, while for estimation it used DEFAULT_COLLATION_OID. That's clearly inconsistent. Commit 5e0928005 changed how this works for per-column statistics, in which case we now use collation specified for each column - both for building the statistics and selectivity estimation. This commit adopts the same approach for extended statistics. Note: One issue is that for per-column statistics we store collation in pg_statistic catalog, but we don't store this in pg_statistic_ext. So we'd have to either add another column into the catalog (which is probably the right thing to do) or rely on info from pg_statistic. But we probably need to add this into pg_statistic_ext, to allow stats on expressions, or extended statistics with different collations. --- src/backend/statistics/mcv.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 913a72ff67..2e375edcb4 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -348,7 +348,7 @@ build_mss(VacAttrStats **stats, int numattrs) elog(ERROR, "cache lookup failed for ordering operator for type %u", colstat->attrtypid); - multi_sort_add_dimension(mss, i, type->lt_opr, type->typcollation); + multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid); } return mss; @@ -668,7 +668,7 @@ statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats) /* sort and deduplicate the data */ ssup[dim].ssup_cxt = CurrentMemoryContext; - ssup[dim].ssup_collation = DEFAULT_COLLATION_OID; + ssup[dim].ssup_collation = stats[dim]->attrcollid; ssup[dim].ssup_nulls_first = false; PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]); @@ -1577,8 +1577,6 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, if (ok) { - TypeCacheEntry *typecache; - FmgrInfo gtproc; Var *var; Const *cst; bool isgt; @@ -1596,10 +1594,6 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, /* match the attribute to a dimension of the statistic */ idx = bms_member_index(keys, var->varattno); - /* get information about the >= procedure */ - typecache = lookup_type_cache(var->vartype, TYPECACHE_GT_OPR); - fmgr_info(get_opcode(typecache->gt_opr), >proc); - /* * Walk through the MCV items and evaluate the current clause. * We can skip items that were already ruled out, and @@ -1636,7 +1630,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, * or (const op var). */ mismatch = !DatumGetBool(FunctionCall2Coll(&opproc, - DEFAULT_COLLATION_OID, + var->varcollid, cst->constvalue, item->values[idx])); @@ -1654,12 +1648,12 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses, */ if (isgt) mismatch = !DatumGetBool(FunctionCall2Coll(&opproc, - DEFAULT_COLLATION_OID, + var->varcollid, cst->constvalue, item->values[idx])); else mismatch = !DatumGetBool(FunctionCall2Coll(&opproc, - DEFAULT_COLLATION_OID, + var->varcollid, item->values[idx], cst->constvalue)); -- 2.20.1