From 430ee6e6739d5f8fb2e25657f0196a5c413c2021 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sat, 22 Jun 2019 13:09:42 +0200 Subject: [PATCH 1/2] fix mcv build perf issue --- src/backend/statistics/mcv.c | 114 ++++++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 8 deletions(-) diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index 5fe61ea0a4..04a4f17b01 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -78,6 +78,9 @@ static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs); static SortItem *build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, int *ndistinct); +static SortItem **build_column_frequencies(SortItem *groups, int ngroups, + MultiSortSupport mss, int *ncounts); + static int count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss); @@ -172,6 +175,8 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs, SortItem *groups; MCVList *mcvlist = NULL; MultiSortSupport mss; + SortItem **freqs; + int *nfreqs; attnums = build_attnums_array(attrs, &numattrs); @@ -188,6 +193,10 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs, /* transform the sorted rows into groups (sorted by frequency) */ groups = build_distinct_groups(nitems, items, mss, &ngroups); + /* compute frequencies for values in each column */ + nfreqs = (int *) palloc0(sizeof(int) * numattrs); + freqs = build_column_frequencies(groups, ngroups, mss, nfreqs); + /* * Maximum number of MCV items to store, based on the attribute with the * largest stats target (and the number of groups we have available). @@ -242,6 +251,16 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs, if (nitems > 0) { int j; + SortItem key; + MultiSortSupport tmp; + + /* used to search values */ + tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup) + + sizeof(SortSupportData)); + + /* space for search key */ + key.values = palloc(sizeof(Datum)); + key.isnull = palloc(sizeof(bool)); /* * Allocate the MCV list structure, set the global parameters. @@ -281,22 +300,28 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs, item->base_frequency = 1.0; for (j = 0; j < numattrs; j++) { - int count = 0; - int k; + SortItem *freq; - for (k = 0; k < ngroups; k++) - { - if (multi_sort_compare_dim(j, &groups[i], &groups[k], mss) == 0) - count += groups[k].count; - } + /* single dimension */ + tmp->ndims = 1; + tmp->ssup[0] = mss->ssup[j]; - item->base_frequency *= (double) count / numrows; + /* fill search key */ + key.values[0] = groups[i].values[j]; + key.isnull[0] = groups[i].isnull[j]; + + freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j], + sizeof(SortItem), + multi_sort_compare, tmp); + + item->base_frequency *= ((double) freq->count) / numrows; } } } pfree(items); pfree(groups); + pfree(freqs); return mcvlist; } @@ -419,6 +444,79 @@ build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss, return groups; } +/* compare sort items (with a single datum) */ +static int +sort_item_compare(const void *a, const void *b, void *arg) +{ + SortSupport ssup = (SortSupport) arg; + SortItem *ia = (SortItem *) a; + SortItem *ib = (SortItem *) b; + + return ApplySortComparator(ia->values[0], ia->isnull[0], + ib->values[0], ib->isnull[0], + ssup); +} + +static SortItem ** +build_column_frequencies(SortItem *groups, int ngroups, MultiSortSupport mss, + int *ncounts) +{ + int i, + j; + SortItem **result; + char *ptr; + + /* allocate arrays for all columns as a single chunk */ + ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) + + mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups)); + + /* initial array of pointers */ + result = (SortItem **) ptr; + ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims); + + for (i = 0; i < mss->ndims; i++) + { + SortSupport ssup = &mss->ssup[i]; + + /* array of values for a single column */ + result[i] = (SortItem *) ptr; + ptr += MAXALIGN(sizeof(SortItem) * ngroups); + + /* extract data for the dimension */ + for (j = 0; j < ngroups; j++) + { + result[i][j].values = palloc(sizeof(Datum)); + result[i][j].isnull = palloc(sizeof(bool)); + + result[i][j].values[0] = groups[j].values[i]; + + result[i][j].isnull[0] = groups[j].isnull[i]; + result[i][j].count = groups[j].count; + } + + /* sort the values, deduplicate */ + qsort_arg((void *) result[i], ngroups, sizeof(SortItem), + sort_item_compare, ssup); + + ncounts[i] = 1; + for (j = 1; j < ngroups; j++) + { + if (sort_item_compare(&result[i][(ncounts[i] - 1)], &result[i][j], ssup) == 0) + { + result[i][(ncounts[i] - 1)].count += result[i][j].count; + continue; + } + + /* */ + if (ncounts[i] != j) + result[i][ncounts[i]] = result[i][j]; + + ncounts[i]++; + } + } + + return result; +} /* * statext_mcv_load -- 2.20.1