diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index df0435c..c4a172a 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -296,6 +296,11 @@ + pg_statistic_ext + extended planner statistics + + + pg_subscription logical replication subscriptions @@ -4223,6 +4228,98 @@ + + <structname>pg_statistic_ext</structname> + + + pg_statistic_ext + + + + The catalog pg_statistic_ext + holds extended planner statistics. + + + + <structname>pg_statistic_ext</> Columns + + + + + Name + Type + References + Description + + + + + + + starelid + oid + pg_class.oid + The table that the described columns belongs to + + + + staname + name + + Name of the statistic. + + + + stanamespace + oid + pg_namespace.oid + + The OID of the namespace that contains this statistic + + + + + staowner + oid + pg_authid.oid + Owner of the statistic + + + + staenabled + char[] + + + An array with the modes of the enabled statistic types, encoded as + d for ndistinct coefficients. + + + + + stakeys + int2vector + pg_attribute.attnum + + This is an array of values that indicate which table columns this + statistic covers. For example a value of 1 3 would + mean that the first and the third table columns make up the statistic key. + + + + + standistinct + pg_ndistinct + + + Ndistict coefficients, serialized as pg_ndistinct type. + + + + + +
+
+ <structname>pg_namespace</structname> diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9408a25..36315da 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -16721,6 +16721,10 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); + pg_get_statisticsextdef + + + pg_get_triggerdef @@ -16890,6 +16894,11 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); uses + pg_get_statisticsextdef(statext_oid) + text + get CREATE STATISTICS command for extended statistics objects + + pg_get_triggerdef(trigger_oid) text get CREATE [ CONSTRAINT ] TRIGGER command for trigger @@ -17033,20 +17042,22 @@ SELECT pg_type_is_visible('myschema.widget'::regtype); pg_get_constraintdef, - pg_get_indexdef, pg_get_ruledef, - and pg_get_triggerdef, respectively reconstruct the - creating command for a constraint, index, rule, or trigger. (Note that this - is a decompiled reconstruction, not the original text of the command.) - pg_get_expr decompiles the internal form of an - individual expression, such as the default value for a column. It can be - useful when examining the contents of system catalogs. If the expression - might contain Vars, specify the OID of the relation they refer to as the - second parameter; if no Vars are expected, zero is sufficient. - pg_get_viewdef reconstructs the SELECT - query that defines a view. Most of these functions come in two variants, - one of which can optionally pretty-print the result. The - pretty-printed format is more readable, but the default format is more - likely to be interpreted the same way by future versions of + pg_get_indexdef, + pg_get_statisticsextdef, + pg_get_ruledef, and + pg_get_triggerdef, respectively reconstruct the + creating command for a constraint, index, extended statistics object, rule, + or trigger. (Note that this is a decompiled reconstruction, not the + original text of the command.) pg_get_expr decompiles + the internal form of an individual expression, such as the default value + for a column. It can be useful when examining the contents of system + catalogs. If the expression might contain Vars, specify the OID of the + relation they refer to as the second parameter; if no Vars are expected, + zero is sufficient. pg_get_viewdef reconstructs the + SELECT query that defines a view. Most of these functions come + in two variants, one of which can optionally pretty-print the + result. The pretty-printed format is more readable, but the default format + is more likely to be interpreted the same way by future versions of PostgreSQL; avoid using pretty-printed output for dump purposes. Passing false for the pretty-print parameter yields the same result as the variant that does not have the parameter at all. diff --git a/doc/src/sgml/planstats.sgml b/doc/src/sgml/planstats.sgml index b73c66b..9260989 100644 --- a/doc/src/sgml/planstats.sgml +++ b/doc/src/sgml/planstats.sgml @@ -448,4 +448,149 @@ rows = (outer_cardinality * inner_cardinality) * selectivity + + Extended Statistics + + + statistics + of the planner + + + + The examples presented in used + statistics about individual columns to compute selectivity estimates. + When estimating the selectivity of conditions over multiple columns, + the planner normally assumes each condition is independent of other + conditions, + and simply multiplies the selectivity estimates of each condition together + to produce a final selectivity estimation for all conditions. + This method can often lead to inaccurate row estimations + when the conditions have dependencies on one another. + Such misestimations can result in poor plan choices being made. + + + + The examples presented below demonstrate such estimation errors on simple + data sets, and also how to resolve them by creating extended statistics + using the CREATE STATISTICS command. + + + + Let's start with a very simple data set — a table with two columns, + containing exactly the same values: + + +CREATE TABLE t (a INT, b INT); +INSERT INTO t SELECT i % 100, i % 100 FROM generate_series(1, 10000) s(i); +ANALYZE t; + + + As explained in , + the planner can determine the cardinality of t + using the number of pages and rows + obtained from pg_class: + + +SELECT relpages, reltuples FROM pg_class WHERE relname = 't'; + + relpages | reltuples +----------+----------- + 45 | 10000 + + + The data distribution is very simple — there are only 100 distinct values + in each column, uniformly distributed. + + + + The following example shows the result of estimating a WHERE + condition on column a: + + +EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1; + QUERY PLAN +------------------------------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..170.00 rows=100 width=8) (actual time=0.031..2.870 rows=100 loops=1) + Filter: (a = 1) + Rows Removed by Filter: 9900 + Planning time: 0.092 ms + Execution time: 3.103 ms +(5 rows) + + + The planner examines the condition and computes the estimate using + eqsel (the selectivity function for =), and + statistics stored in the pg_stats table. In this case, + the planner estimates the condition matches 1% of rows, and by comparing + the estimated and actual number of rows, we see that the estimate is + very accurate (exact, in fact, as the table is very small). + + + + Adding a condition on the second column results in the following plan: + + +EXPLAIN ANALYZE SELECT * FROM t WHERE a = 1 AND b = 1; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Seq Scan on t (cost=0.00..195.00 rows=1 width=8) (actual time=0.033..3.006 rows=100 loops=1) + Filter: ((a = 1) AND (b = 1)) + Rows Removed by Filter: 9900 + Planning time: 0.121 ms + Execution time: 3.220 ms +(5 rows) + + + The planner estimates the selectivity for each condition individually, + arriving to the 1% estimates as above, and then multiplies them, getting + the final 0.01% estimate. The plan however shows that this results in + a significant underestimate, as the actual number of rows matching the + conditions is two orders of magnitude higher than estimated. + + + + Overestimates, i.e., errors in the opposite direction, are also possible. + Consider for example the following combination of range conditions, each + matching + + +EXPLAIN ANALYZE SELECT * FROM t WHERE a <= 49 AND b > 49; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Seq Scan on t (cost=0.00..195.00 rows=2500 width=8) (actual time=1.607..1.607 rows=0 loops=1) + Filter: ((a <= 49) AND (b > 49)) + Rows Removed by Filter: 10000 + Planning time: 0.050 ms + Execution time: 1.623 ms +(5 rows) + + + The planner examines both WHERE clauses and estimates them + using the scalarltsel and scalargtsel functions, + specified as the selectivity functions matching the <= and + > operators. Both conditions match 50% of the + table, and assuming independence the planner multiplies them to compute + the total estimate of 25%. However as the explain output shows, the actual + number of rows is 0, because the columns are correlated and the conditions + contradict each other. + + + + Both estimation errors are caused by violation of the independence + assumption, as the two columns contain exactly the same values, and are + therefore perfectly correlated. Providing additional information about + correlation between columns is the purpose of extended statistics, + and the rest of this section explains in more detail how the planner + leverages them to improve estimates. + + + + For additional details about extended statistics, see + src/backend/statistics/README. There are additional + READMEs for each type of statistics, mentioned in the following + sections. + + + + diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 2bc4d9f..255e800 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -34,6 +34,7 @@ Complete list of usable sgml source files in this directory. + @@ -80,6 +81,7 @@ Complete list of usable sgml source files in this directory. + @@ -126,6 +128,7 @@ Complete list of usable sgml source files in this directory. + diff --git a/doc/src/sgml/ref/alter_statistics.sgml b/doc/src/sgml/ref/alter_statistics.sgml new file mode 100644 index 0000000..3e4d286 --- /dev/null +++ b/doc/src/sgml/ref/alter_statistics.sgml @@ -0,0 +1,115 @@ + + + + + ALTER STATISTICS + + + + ALTER STATISTICS + 7 + SQL - Language Statements + + + + ALTER STATISTICS + + change the definition of a extended statistics + + + + + +ALTER STATISTICS name OWNER TO { new_owner | CURRENT_USER | SESSION_USER } +ALTER STATISTICS name RENAME TO new_name +ALTER STATISTICS name SET SCHEMA new_schema + + + + + Description + + + ALTER STATISTICS changes the parameters of an existing + extended statistics. Any parameters not specifically set in the + ALTER STATISTICS command retain their prior settings. + + + + You must own the statistics to use ALTER STATISTICS. + To change a statistics' schema, you must also have CREATE + privilege on the new schema. + To alter the owner, you must also be a direct or indirect member of the new + owning role, and that role must have CREATE privilege on + the statistics' schema. (These restrictions enforce that altering the owner + doesn't do anything you couldn't do by dropping and recreating the statistics. + However, a superuser can alter ownership of any statistics anyway.) + + + + + Parameters + + + + + name + + + The name (optionally schema-qualified) of the statistics to be altered. + + + + + + new_owner + + + The user name of the new owner of the statistics. + + + + + + new_name + + + The new name for the statistics. + + + + + + new_schema + + + The new schema for the statistics. + + + + + + + + + + Compatibility + + + There's no ALTER STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + + diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 077c003..f3ad5ed 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -119,9 +119,12 @@ ALTER TABLE [ IF EXISTS ] name This form drops a column from a table. Indexes and table constraints involving the column will be automatically - dropped as well. You will need to say CASCADE if - anything outside the table depends on the column, for example, - foreign key references or views. + dropped as well. + Multivariate statistics referencing the dropped column will also be + removed if the removal of the column would cause the statistics to + contain data for only a single column. + You will need to say CASCADE if anything outside the table + depends on the column, for example, foreign key references or views. If IF EXISTS is specified and the column does not exist, no error is thrown. In this case a notice is issued instead. diff --git a/doc/src/sgml/ref/comment.sgml b/doc/src/sgml/ref/comment.sgml index 7483c8c..8fe17a5 100644 --- a/doc/src/sgml/ref/comment.sgml +++ b/doc/src/sgml/ref/comment.sgml @@ -51,6 +51,7 @@ COMMENT ON SCHEMA object_name | SEQUENCE object_name | SERVER object_name | + STATISTICS object_name | TABLE object_name | TABLESPACE object_name | TEXT SEARCH CONFIGURATION object_name | @@ -125,8 +126,8 @@ COMMENT ON The name of the object to be commented. Names of tables, aggregates, collations, conversions, domains, foreign tables, functions, indexes, operators, operator classes, operator families, sequences, - text search objects, types, and views can be schema-qualified. - When commenting on a column, + statistics, text search objects, types, and views can be + schema-qualified. When commenting on a column, relation_name must refer to a table, view, composite type, or foreign table. @@ -327,6 +328,7 @@ COMMENT ON RULE my_rule ON my_table IS 'Logs updates of employee records'; COMMENT ON SCHEMA my_schema IS 'Departmental data'; COMMENT ON SEQUENCE my_sequence IS 'Used to generate primary keys'; COMMENT ON SERVER myserver IS 'my foreign server'; +COMMENT ON STATISTICS my_statistics IS 'Improves planner row estimations'; COMMENT ON TABLE my_schema.my_table IS 'Employee Information'; COMMENT ON TABLESPACE my_tablespace IS 'Tablespace for indexes'; COMMENT ON TEXT SEARCH CONFIGURATION my_config IS 'Special word filtering'; diff --git a/doc/src/sgml/ref/create_statistics.sgml b/doc/src/sgml/ref/create_statistics.sgml new file mode 100644 index 0000000..e3d120e --- /dev/null +++ b/doc/src/sgml/ref/create_statistics.sgml @@ -0,0 +1,154 @@ + + + + + CREATE STATISTICS + + + + CREATE STATISTICS + 7 + SQL - Language Statements + + + + CREATE STATISTICS + define extended statistics + + + + +CREATE STATISTICS [ IF NOT EXISTS ] statistics_name ON ( + column_name, column_name [, ...]) + FROM table_name + + + + + + Description + + + CREATE STATISTICS will create a new extended statistics + on the table. The statistics will be created in the current database and + will be owned by the user issuing the command. + + + + If a schema name is given (for example, CREATE STATISTICS + myschema.mystat ...) then the statistics is created in the specified + schema. Otherwise it is created in the current schema. The name of + the table must be distinct from the name of any other statistics in the + same schema. + + + + + Parameters + + + + + IF NOT EXISTS + + + Do not throw an error if a statistics with the same name already exists. + A notice is issued in this case. Note that there is no guarantee that + the existing statistics is anything like the one that would have been + created. + + + + + + statistics_name + + + The name (optionally schema-qualified) of the statistics to be created. + + + + + + table_name + + + The name (optionally schema-qualified) of the table the statistics should + be created on. + + + + + + column_name + + + The name of a column to be included in the statistics. + + + + + + + + + + Notes + + + You must be the owner of a table to create or change statistics on it. + + + + + Examples + + + Create table t1 with two functionally dependent columns, i.e. + knowledge of a value in the first column is sufficient for determining the + value in the other column. Then functional dependencies are built on those + columns: + + +CREATE TABLE t1 ( + a int, + b int +); + +INSERT INTO t1 SELECT i/100, i/500 + FROM generate_series(1,1000000) s(i); + +CREATE STATISTICS s1 ON (a, b) FROM t1; + +ANALYZE t1; + +-- valid combination of values +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 0); + +-- invalid combination of values +EXPLAIN ANALYZE SELECT * FROM t1 WHERE (a = 1) AND (b = 1); + + + + + + + Compatibility + + + There's no CREATE STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/ref/drop_statistics.sgml b/doc/src/sgml/ref/drop_statistics.sgml new file mode 100644 index 0000000..d7c657f --- /dev/null +++ b/doc/src/sgml/ref/drop_statistics.sgml @@ -0,0 +1,91 @@ + + + + + DROP STATISTICS + + + + DROP STATISTICS + 7 + SQL - Language Statements + + + + DROP STATISTICS + remove extended statistics + + + + +DROP STATISTICS [ IF EXISTS ] name [, ...] + + + + + Description + + + DROP STATISTICS removes statistics from the database. + Only the statistics owner, the schema owner, and superuser can drop a + statistics. + + + + + + Parameters + + + + IF EXISTS + + + Do not throw an error if the statistics do not exist. A notice is + issued in this case. + + + + + + name + + + The name (optionally schema-qualified) of the statistics to drop. + + + + + + + + + Examples + + + ... + + + + + + Compatibility + + + There's no DROP STATISTICS command in the SQL standard. + + + + + See Also + + + + + + + + diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index c8191de..aa8a157 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -60,6 +60,7 @@ &alterSchema; &alterSequence; &alterServer; + &alterStatistics; &alterSubscription; &alterSystem; &alterTable; @@ -108,6 +109,7 @@ &createSchema; &createSequence; &createServer; + &createStatistics; &createSubscription; &createTable; &createTableAs; @@ -154,6 +156,7 @@ &dropSchema; &dropSequence; &dropServer; + &dropStatistics; &dropSubscription; &dropTable; &dropTableSpace; diff --git a/src/backend/Makefile b/src/backend/Makefile index 7a0bbb2..426ef4f 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \ main nodes optimizer port postmaster regex replication rewrite \ - storage tcop tsearch utils $(top_builddir)/src/timezone + statistics storage tcop tsearch utils $(top_builddir)/src/timezone include $(srcdir)/common.mk diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile index 3136858..ff7cc79 100644 --- a/src/backend/catalog/Makefile +++ b/src/backend/catalog/Makefile @@ -33,6 +33,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\ pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \ pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \ pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \ + pg_statistic_ext.h \ pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \ pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \ pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \ diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index be86d76..d01930f 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -48,6 +48,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" @@ -3302,6 +3303,8 @@ static const char *const no_priv_msg[MAX_ACL_KIND] = gettext_noop("permission denied for collation %s"), /* ACL_KIND_CONVERSION */ gettext_noop("permission denied for conversion %s"), + /* ACL_KIND_STATISTICS */ + gettext_noop("permission denied for statistics %s"), /* ACL_KIND_TABLESPACE */ gettext_noop("permission denied for tablespace %s"), /* ACL_KIND_TSDICTIONARY */ @@ -3352,6 +3355,8 @@ static const char *const not_owner_msg[MAX_ACL_KIND] = gettext_noop("must be owner of collation %s"), /* ACL_KIND_CONVERSION */ gettext_noop("must be owner of conversion %s"), + /* ACL_KIND_STATISTICS */ + gettext_noop("must be owner of statistics %s"), /* ACL_KIND_TABLESPACE */ gettext_noop("must be owner of tablespace %s"), /* ACL_KIND_TSDICTIONARY */ @@ -3467,6 +3472,10 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid, mask, how, NULL); case ACL_KIND_NAMESPACE: return pg_namespace_aclmask(table_oid, roleid, mask, how); + case ACL_KIND_STATISTICS: + elog(ERROR, "grantable rights not supported for statistics"); + /* not reached, but keep compiler quiet */ + return ACL_NO_RIGHTS; case ACL_KIND_TABLESPACE: return pg_tablespace_aclmask(table_oid, roleid, mask, how); case ACL_KIND_FDW: @@ -5104,6 +5113,32 @@ pg_subscription_ownercheck(Oid sub_oid, Oid roleid) } /* + * Ownership check for a extended statistics (specified by OID). + */ +bool +pg_statistics_ownercheck(Oid stat_oid, Oid roleid) +{ + HeapTuple tuple; + Oid ownerId; + + /* Superusers bypass all permission checking. */ + if (superuser_arg(roleid)) + return true; + + tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stat_oid)); + if (!HeapTupleIsValid(tuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("statistics with OID %u do not exist", stat_oid))); + + ownerId = ((Form_pg_statistic_ext) GETSTRUCT(tuple))->staowner; + + ReleaseSysCache(tuple); + + return has_privs_of_role(roleid, ownerId); +} + +/* * Check whether specified role has CREATEROLE privilege (or is a superuser) * * Note: roles do not have owners per se; instead we use this test in diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index fc088b2..ee27cae 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -51,6 +51,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_publication_rel.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" @@ -154,6 +155,7 @@ static const Oid object_classes[] = { RewriteRelationId, /* OCLASS_REWRITE */ TriggerRelationId, /* OCLASS_TRIGGER */ NamespaceRelationId, /* OCLASS_SCHEMA */ + StatisticExtRelationId, /* OCLASS_STATISTIC_EXT */ TSParserRelationId, /* OCLASS_TSPARSER */ TSDictionaryRelationId, /* OCLASS_TSDICT */ TSTemplateRelationId, /* OCLASS_TSTEMPLATE */ @@ -1263,6 +1265,10 @@ doDeletion(const ObjectAddress *object, int flags) DropTransformById(object->objectId); break; + case OCLASS_STATISTIC_EXT: + RemoveStatisticsById(object->objectId); + break; + default: elog(ERROR, "unrecognized object class: %u", object->classId); @@ -2377,6 +2383,9 @@ getObjectClass(const ObjectAddress *object) case NamespaceRelationId: return OCLASS_SCHEMA; + case StatisticExtRelationId: + return OCLASS_STATISTIC_EXT; + case TSParserRelationId: return OCLASS_TSPARSER; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 41c0056..131a432 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -52,6 +52,7 @@ #include "catalog/pg_opclass.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_type.h" #include "catalog/pg_type_fn.h" @@ -1608,7 +1609,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum) heap_close(attr_rel, RowExclusiveLock); if (attnum > 0) + { RemoveStatistics(relid, attnum); + RemoveStatisticsExt(relid, attnum); + } relation_close(rel, NoLock); } @@ -1854,6 +1858,7 @@ heap_drop_with_catalog(Oid relid) * delete statistics */ RemoveStatistics(relid, 0); + RemoveStatisticsExt(relid, 0); /* * delete attribute tuples @@ -2766,6 +2771,103 @@ RemoveStatistics(Oid relid, AttrNumber attnum) /* + * RemoveStatisticsExt --- remove entries in pg_statistic_ext for a rel + * + * If attnum is zero, remove all entries for rel; else remove only the one(s) + * for that column. + */ +void +RemoveStatisticsExt(Oid relid, AttrNumber attnum) +{ + Relation pgstatisticext; + TupleDesc tupdesc = NULL; + SysScanDesc scan; + ScanKeyData key; + HeapTuple tuple; + + /* + * When dropping a column, we'll drop statistics with a single remaining + * (undropped column). To do that, we need the tuple descriptor. + * + * We already have the relation locked (as we're running ALTER TABLE ... + * DROP COLUMN), so we'll just get the descriptor here. + */ + if (attnum != 0) + { + Relation rel = relation_open(relid, NoLock); + + /* extended stats are supported on tables and matviews */ + if (rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW) + tupdesc = RelationGetDescr(rel); + + relation_close(rel, NoLock); + } + + if (tupdesc == NULL) + return; + + pgstatisticext = heap_open(StatisticExtRelationId, RowExclusiveLock); + + ScanKeyInit(&key, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pgstatisticext, + StatisticExtRelidIndexId, + true, NULL, 1, &key); + + /* we must loop even when attnum != 0, in case of inherited stats */ + while (HeapTupleIsValid(tuple = systable_getnext(scan))) + { + bool delete = true; + + if (attnum != 0) + { + Datum adatum; + bool isnull; + int i; + int ncolumns = 0; + ArrayType *arr; + int16 *attnums; + + /* get the columns */ + adatum = SysCacheGetAttr(STATEXTOID, tuple, + Anum_pg_statistic_ext_stakeys, &isnull); + Assert(!isnull); + + arr = DatumGetArrayTypeP(adatum); + attnums = (int16 *) ARR_DATA_PTR(arr); + + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + /* count the column unless it's has been / is being dropped */ + if ((!tupdesc->attrs[attnums[i] - 1]->attisdropped) && + (attnums[i] != attnum)) + ncolumns += 1; + } + + /* delete if there are less than two attributes */ + delete = (ncolumns < 2); + } + + if (delete) + { + simple_heap_delete(pgstatisticext, &tuple->t_self); + deleteDependencyRecordsFor(StatisticExtRelationId, + HeapTupleGetOid(tuple), + false); + } + } + + systable_endscan(scan); + + heap_close(pgstatisticext, RowExclusiveLock); +} + + +/* * RelationTruncateIndexes - truncate all indexes associated * with the heap relation to zero tuples. * diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index a38da30..e521bd9 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -2086,6 +2086,62 @@ ConversionIsVisible(Oid conid) } /* + * get_statistics_oid - find a statistics by possibly qualified name + * + * If not found, returns InvalidOid if missing_ok, else throws error + */ +Oid +get_statistics_oid(List *names, bool missing_ok) +{ + char *schemaname; + char *stats_name; + Oid namespaceId; + Oid stats_oid = InvalidOid; + ListCell *l; + + /* deconstruct the name list */ + DeconstructQualifiedName(names, &schemaname, &stats_name); + + if (schemaname) + { + /* use exact schema given */ + namespaceId = LookupExplicitNamespace(schemaname, missing_ok); + if (missing_ok && !OidIsValid(namespaceId)) + stats_oid = InvalidOid; + else + stats_oid = GetSysCacheOid2(STATEXTNAMENSP, + PointerGetDatum(stats_name), + ObjectIdGetDatum(namespaceId)); + } + else + { + /* search for it in search path */ + recomputeNamespacePath(); + + foreach(l, activeSearchPath) + { + namespaceId = lfirst_oid(l); + + if (namespaceId == myTempNamespace) + continue; /* do not look in temp namespace */ + stats_oid = GetSysCacheOid2(STATEXTNAMENSP, + PointerGetDatum(stats_name), + ObjectIdGetDatum(namespaceId)); + if (OidIsValid(stats_oid)) + break; + } + } + + if (!OidIsValid(stats_oid) && !missing_ok) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("statistics \"%s\" do not exist", + NameListToString(names)))); + + return stats_oid; +} + +/* * get_ts_parser_oid - find a TS parser by possibly qualified name * * If not found, returns InvalidOid if missing_ok, else throws error diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index 61a831b..2948d64 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -48,6 +48,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_publication_rel.h" #include "catalog/pg_rewrite.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" @@ -478,6 +479,18 @@ static const ObjectPropertyType ObjectProperty[] = InvalidAttrNumber, ACL_KIND_SUBSCRIPTION, true + }, + { + StatisticExtRelationId, + StatisticExtOidIndexId, + STATEXTOID, + STATEXTNAMENSP, + Anum_pg_statistic_ext_staname, + Anum_pg_statistic_ext_stanamespace, + Anum_pg_statistic_ext_staowner, + InvalidAttrNumber, /* no ACL (same as relation) */ + ACL_KIND_STATISTICS, + true } }; @@ -696,6 +709,10 @@ static const struct object_type_map /* OCLASS_TRANSFORM */ { "transform", OBJECT_TRANSFORM + }, + /* OBJECT_STATISTIC_EXT */ + { + "statistics", OBJECT_STATISTIC_EXT } }; @@ -974,6 +991,12 @@ get_object_address(ObjectType objtype, Node *object, address = get_object_address_defacl(castNode(List, object), missing_ok); break; + case OBJECT_STATISTIC_EXT: + address.classId = StatisticExtRelationId; + address.objectId = get_statistics_oid(castNode(List, object), + missing_ok); + address.objectSubId = 0; + break; default: elog(ERROR, "unrecognized objtype: %d", (int) objtype); /* placate compiler, in case it thinks elog might return */ @@ -2083,6 +2106,7 @@ pg_get_object_address(PG_FUNCTION_ARGS) case OBJECT_ATTRIBUTE: case OBJECT_COLLATION: case OBJECT_CONVERSION: + case OBJECT_STATISTIC_EXT: case OBJECT_TSPARSER: case OBJECT_TSDICTIONARY: case OBJECT_TSTEMPLATE: @@ -2370,6 +2394,10 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser"))); break; + case OBJECT_STATISTIC_EXT: + if (!pg_statistics_ownercheck(address.objectId, roleid)) + aclcheck_error_type(ACLCHECK_NOT_OWNER, address.objectId); + break; default: elog(ERROR, "unrecognized object type: %d", (int) objtype); @@ -3857,6 +3885,10 @@ getObjectTypeDescription(const ObjectAddress *object) appendStringInfoString(&buffer, "subscription"); break; + case OCLASS_STATISTIC_EXT: + appendStringInfoString(&buffer, "statistics"); + break; + default: appendStringInfo(&buffer, "unrecognized %u", object->classId); break; @@ -4880,6 +4912,29 @@ getObjectIdentityParts(const ObjectAddress *object, break; } + case OCLASS_STATISTIC_EXT: + { + HeapTuple tup; + Form_pg_statistic_ext formStatistic; + char *schema; + + tup = SearchSysCache1(STATEXTOID, + ObjectIdGetDatum(object->objectId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for statistics %u", + object->objectId); + formStatistic = (Form_pg_statistic_ext) GETSTRUCT(tup); + schema = get_namespace_name_or_temp(formStatistic->stanamespace); + appendStringInfoString(&buffer, + quote_qualified_identifier(schema, + NameStr(formStatistic->staname))); + if (objname) + *objname = list_make2(schema, + pstrdup(NameStr(formStatistic->staname))); + ReleaseSysCache(tup); + } + break; + default: appendStringInfo(&buffer, "unrecognized object %u %u %d", object->classId, diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c index 8d946ff..d28a8af 100644 --- a/src/backend/catalog/pg_shdepend.c +++ b/src/backend/catalog/pg_shdepend.c @@ -39,6 +39,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" #include "catalog/pg_shdepend.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_ts_config.h" @@ -1415,6 +1416,7 @@ shdepReassignOwned(List *roleids, Oid newrole) case OperatorFamilyRelationId: case OperatorClassRelationId: case ExtensionRelationId: + case StatisticExtRelationId: case TableSpaceRelationId: case DatabaseRelationId: case TSConfigRelationId: diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index b6552da..5839187 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -186,6 +186,16 @@ CREATE OR REPLACE VIEW pg_sequences AS WHERE NOT pg_is_other_temp_schema(N.oid) AND relkind = 'S'; +CREATE VIEW pg_stats_ext AS + SELECT + N.nspname AS schemaname, + C.relname AS tablename, + S.staname AS staname, + S.stakeys AS attnums, + length(s.standistinct) AS ndistbytes + FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid)) + LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace); + CREATE VIEW pg_stats WITH (security_barrier) AS SELECT nspname AS schemaname, diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile index e0fab38..4a6c99e 100644 --- a/src/backend/commands/Makefile +++ b/src/backend/commands/Makefile @@ -18,8 +18,8 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \ event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \ indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \ policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \ - schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \ - tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \ - vacuumlazy.o variable.o view.o + schemacmds.o seclabel.o sequence.o statscmds.o subscriptioncmds.o \ + tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o user.o \ + vacuum.o vacuumlazy.o variable.o view.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c index cf1391c..2c6435b 100644 --- a/src/backend/commands/alter.c +++ b/src/backend/commands/alter.c @@ -33,6 +33,7 @@ #include "catalog/pg_opfamily.h" #include "catalog/pg_proc.h" #include "catalog/pg_subscription.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_ts_parser.h" @@ -120,6 +121,10 @@ report_namespace_conflict(Oid classId, const char *name, Oid nspOid) Assert(OidIsValid(nspOid)); msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\""); break; + case StatisticExtRelationId: + Assert(OidIsValid(nspOid)); + msgfmt = gettext_noop("statistics \"%s\" already exists in schema \"%s\""); + break; case TSParserRelationId: Assert(OidIsValid(nspOid)); msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\""); @@ -373,6 +378,7 @@ ExecRenameStmt(RenameStmt *stmt) case OBJECT_OPCLASS: case OBJECT_OPFAMILY: case OBJECT_LANGUAGE: + case OBJECT_STATISTIC_EXT: case OBJECT_TSCONFIGURATION: case OBJECT_TSDICTIONARY: case OBJECT_TSPARSER: @@ -489,6 +495,7 @@ ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt, case OBJECT_OPERATOR: case OBJECT_OPCLASS: case OBJECT_OPFAMILY: + case OBJECT_STATISTIC_EXT: case OBJECT_TSCONFIGURATION: case OBJECT_TSDICTIONARY: case OBJECT_TSPARSER: @@ -803,6 +810,7 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt) case OBJECT_OPERATOR: case OBJECT_OPCLASS: case OBJECT_OPFAMILY: + case OBJECT_STATISTIC_EXT: case OBJECT_TABLESPACE: case OBJECT_TSDICTIONARY: case OBJECT_TSCONFIGURATION: diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index b91df98..f4703dd 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -17,6 +17,7 @@ #include #include "access/multixact.h" +#include "access/sysattr.h" #include "access/transam.h" #include "access/tupconvert.h" #include "access/tuptoaster.h" @@ -28,6 +29,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_namespace.h" +#include "catalog/pg_statistic_ext.h" #include "commands/dbcommands.h" #include "commands/tablecmds.h" #include "commands/vacuum.h" @@ -39,13 +41,17 @@ #include "parser/parse_relation.h" #include "pgstat.h" #include "postmaster/autovacuum.h" +#include "statistics/stat_ext_internal.h" +#include "statistics/stats.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" #include "storage/proc.h" #include "storage/procarray.h" #include "utils/acl.h" #include "utils/attoptcache.h" +#include "utils/builtins.h" #include "utils/datum.h" +#include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -566,6 +572,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, update_attstats(RelationGetRelid(Irel[ind]), false, thisdata->attr_cnt, thisdata->vacattrstats); } + + /* Build extended statistics (if there are any). */ + BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt, + vacattrstats); } /* @@ -1683,19 +1693,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull) */ typedef struct { - Oid eqopr; /* '=' operator for datatype, if any */ - Oid eqfunc; /* and associated function */ - Oid ltopr; /* '<' operator for datatype, if any */ -} StdAnalyzeData; - -typedef struct -{ - Datum value; /* a data value */ - int tupno; /* position index for tuple it came from */ -} ScalarItem; - -typedef struct -{ int count; /* # of duplicates */ int first; /* values[] index of first occurrence */ } ScalarMCVItem; diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c index ab73fbf..cb948f0 100644 --- a/src/backend/commands/dropcmds.c +++ b/src/backend/commands/dropcmds.c @@ -286,6 +286,13 @@ does_not_exist_skipping(ObjectType objtype, Node *object) msg = gettext_noop("schema \"%s\" does not exist, skipping"); name = strVal((Value *) object); break; + case OBJECT_STATISTIC_EXT: + if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) + { + msg = gettext_noop("extended statistics \"%s\" do not exist, skipping"); + name = NameListToString(castNode(List, object)); + } + break; case OBJECT_TSPARSER: if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name)) { diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 346b347..7366fc7 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -112,6 +112,7 @@ static event_trigger_support_data event_trigger_support[] = { {"SCHEMA", true}, {"SEQUENCE", true}, {"SERVER", true}, + {"STATISTICS", true}, {"SUBSCRIPTION", true}, {"TABLE", true}, {"TABLESPACE", false}, @@ -1108,6 +1109,7 @@ EventTriggerSupportsObjectType(ObjectType obtype) case OBJECT_SCHEMA: case OBJECT_SEQUENCE: case OBJECT_SUBSCRIPTION: + case OBJECT_STATISTIC_EXT: case OBJECT_TABCONSTRAINT: case OBJECT_TABLE: case OBJECT_TRANSFORM: @@ -1173,6 +1175,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass) case OCLASS_PUBLICATION: case OCLASS_PUBLICATION_REL: case OCLASS_SUBSCRIPTION: + case OCLASS_STATISTIC_EXT: return true; } diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c new file mode 100644 index 0000000..b8b584a --- /dev/null +++ b/src/backend/commands/statscmds.c @@ -0,0 +1,279 @@ +/*------------------------------------------------------------------------- + * + * statscmds.c + * Commands for creating and altering extended statistics + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/statscmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_statistic_ext.h" +#include "commands/defrem.h" +#include "miscadmin.h" +#include "statistics/stats.h" +#include "utils/builtins.h" +#include "utils/inval.h" +#include "utils/memutils.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +/* used for sorting the attnums in CreateStatistics */ +static int +compare_int16(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(int16)); +} + +/* + * CREATE STATISTICS + */ +ObjectAddress +CreateStatistics(CreateStatsStmt *stmt) +{ + int i; + ListCell *l; + int16 attnums[STATS_MAX_DIMENSIONS]; + int numcols = 0; + ObjectAddress address = InvalidObjectAddress; + char *namestr; + NameData staname; + Oid statoid; + Oid namespaceId; + HeapTuple htup; + Datum values[Natts_pg_statistic_ext]; + bool nulls[Natts_pg_statistic_ext]; + int2vector *stakeys; + Relation statrel; + Relation rel; + Oid relid; + ObjectAddress parentobject, + childobject; + Datum types[1]; /* only ndistinct defined now */ + int ntypes; + ArrayType *staenabled; + bool build_ndistinct; + bool requested_type = false; + + Assert(IsA(stmt, CreateStatsStmt)); + + /* resolve the pieces of the name (namespace etc.) */ + namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr); + namestrcpy(&staname, namestr); + + /* + * If if_not_exists was given and the statistics already exists, bail out. + */ + if (SearchSysCacheExists2(STATEXTNAMENSP, + PointerGetDatum(&staname), + ObjectIdGetDatum(namespaceId))) + { + if (stmt->if_not_exists) + { + ereport(NOTICE, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("statistics \"%s\" already exist, skipping", + namestr))); + return InvalidObjectAddress; + } + + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("statistics \"%s\" already exist", namestr))); + } + + rel = heap_openrv(stmt->relation, AccessExclusiveLock); + relid = RelationGetRelid(rel); + + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" is not a table or materialized view", + RelationGetRelationName(rel)))); + + /* + * Transform column names to array of attnums. While doing that, we also + * enforce the maximum number of keys. + */ + foreach(l, stmt->keys) + { + char *attname = strVal(lfirst(l)); + HeapTuple atttuple; + + atttuple = SearchSysCacheAttName(relid, attname); + + if (!HeapTupleIsValid(atttuple)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("column \"%s\" referenced in statistics does not exist", + attname))); + + /* more than STATS_MAX_DIMENSIONS columns not allowed */ + if (numcols >= STATS_MAX_DIMENSIONS) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("cannot have more than %d keys in statistics", + STATS_MAX_DIMENSIONS))); + + attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum; + ReleaseSysCache(atttuple); + numcols++; + } + + /* + * Check that at least two columns were specified in the statement. The + * upper bound was already checked in the loop above. + */ + if (numcols < 2) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_COLUMNS), + errmsg("statistics require at least 2 columns"))); + + /* + * Sort the attnums, which makes detecting duplicies somewhat easier, and + * it does not hurt (it does not affect the efficiency, unlike for + * indexes, for example). + */ + qsort(attnums, numcols, sizeof(int16), compare_int16); + + /* + * Look for duplicities in the list of columns. The attnums are sorted so + * just check consecutive elements. + */ + for (i = 1; i < numcols; i++) + if (attnums[i] == attnums[i - 1]) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_COLUMN), + errmsg("duplicate column name in statistics definition"))); + + stakeys = buildint2vector(attnums, numcols); + + /* + * Parse the statistics options. Currently only statistics types are + * recognized. + */ + build_ndistinct = false; + foreach(l, stmt->options) + { + DefElem *opt = (DefElem *) lfirst(l); + + if (strcmp(opt->defname, "ndistinct") == 0) + { + build_ndistinct = defGetBoolean(opt); + requested_type = true; + } + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized STATISTICS option \"%s\"", + opt->defname))); + } + /* If no statistic type was specified, build them all. */ + if (!requested_type) + build_ndistinct = true; + + /* construct the char array of enabled statistic types */ + ntypes = 0; + if (build_ndistinct) + types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT); + Assert(ntypes > 0); + staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c'); + + /* + * Everything seems fine, so let's build the pg_statistic_ext tuple. + */ + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid); + values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname); + values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId); + values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId()); + values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys); + values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled); + + /* no statistics build yet */ + nulls[Anum_pg_statistic_ext_standistinct - 1] = true; + + /* insert it into pg_statistic_ext */ + statrel = heap_open(StatisticExtRelationId, RowExclusiveLock); + htup = heap_form_tuple(statrel->rd_att, values, nulls); + CatalogTupleInsert(statrel, htup); + statoid = HeapTupleGetOid(htup); + heap_freetuple(htup); + heap_close(statrel, RowExclusiveLock); + relation_close(rel, NoLock); + + /* + * Add a dependency on a table, so that stats get dropped on DROP TABLE. + */ + ObjectAddressSet(parentobject, RelationRelationId, relid); + ObjectAddressSet(childobject, StatisticExtRelationId, statoid); + recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO); + + /* + * Also add dependency on the schema (to drop statistics on DROP SCHEMA). + * This is not handled automatically by DROP TABLE because statistics are + * on their own schema. + */ + ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId); + recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO); + + ObjectAddressSet(address, StatisticExtRelationId, statoid); + + /* + * Invalidate relcache so that others see the new statistics. + */ + CacheInvalidateRelcache(rel); + + return address; +} + +/* + * Guts of statistics deletion. + */ +void +RemoveStatisticsById(Oid statsOid) +{ + Relation relation; + Oid relid; + Relation rel; + HeapTuple tup; + Form_pg_statistic_ext statext; + + /* + * Delete the pg_statistic_ext tuple. + */ + relation = heap_open(StatisticExtRelationId, RowExclusiveLock); + + tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid)); + + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for statistics %u", statsOid); + + statext = (Form_pg_statistic_ext) GETSTRUCT(tup); + relid = statext->starelid; + + rel = heap_open(relid, AccessExclusiveLock); + + simple_heap_delete(relation, &tup->t_self); + + CacheInvalidateRelcache(rel); + + ReleaseSysCache(tup); + + heap_close(relation, RowExclusiveLock); + heap_close(rel, NoLock); +} diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 25fd051..e47d5e8 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -4448,6 +4448,20 @@ _copyDropSubscriptionStmt(const DropSubscriptionStmt *from) return newnode; } +static CreateStatsStmt * +_copyCreateStatsStmt(const CreateStatsStmt *from) +{ + CreateStatsStmt *newnode = makeNode(CreateStatsStmt); + + COPY_NODE_FIELD(defnames); + COPY_NODE_FIELD(relation); + COPY_NODE_FIELD(keys); + COPY_NODE_FIELD(options); + COPY_SCALAR_FIELD(if_not_exists); + + return newnode; +} + /* **************************************************************** * pg_list.h copy functions * **************************************************************** @@ -5386,6 +5400,9 @@ copyObject(const void *from) case T_CommonTableExpr: retval = _copyCommonTableExpr(from); break; + case T_CreateStatsStmt: + retval = _copyCreateStatsStmt(from); + break; case T_ObjectWithArgs: retval = _copyObjectWithArgs(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 7418fbe..cf2415f 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2193,6 +2193,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) WRITE_NODE_FIELD(lateral_vars); WRITE_BITMAPSET_FIELD(lateral_referencers); WRITE_NODE_FIELD(indexlist); + WRITE_NODE_FIELD(statlist); WRITE_UINT_FIELD(pages); WRITE_FLOAT_FIELD(tuples, "%.0f"); WRITE_FLOAT_FIELD(allvisfrac, "%.6f"); @@ -2266,6 +2267,18 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node) } static void +_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node) +{ + WRITE_NODE_TYPE("STATISTICEXTINFO"); + + /* NB: this isn't a complete set of fields */ + WRITE_OID_FIELD(statOid); + /* don't write rel, leads to infinite recursion in plan tree dump */ + WRITE_CHAR_FIELD(kind); + /* XXX missing a way to print keys */ +} + +static void _outEquivalenceClass(StringInfo str, const EquivalenceClass *node) { /* @@ -3915,6 +3928,9 @@ outNode(StringInfo str, const void *obj) case T_PlannerParamItem: _outPlannerParamItem(str, obj); break; + case T_StatisticExtInfo: + _outStatisticExtInfo(str, obj); + break; case T_ExtensibleNode: _outExtensibleNode(str, obj); diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 463f806..4ef6924 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -29,6 +29,7 @@ #include "catalog/heap.h" #include "catalog/partition.h" #include "catalog/pg_am.h" +#include "catalog/pg_statistic_ext.h" #include "foreign/fdwapi.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -40,8 +41,11 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "statistics/stats.h" #include "storage/bufmgr.h" +#include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -63,7 +67,7 @@ static List *get_relation_constraints(PlannerInfo *root, bool include_notnull); static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); - +static List *get_relation_statistics(RelOptInfo *rel, Relation relation); /* * get_relation_info - @@ -398,6 +402,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, rel->indexlist = indexinfos; + rel->statlist = get_relation_statistics(rel, relation); + /* Grab foreign-table info using the relcache, while we have it */ if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { @@ -1251,6 +1257,67 @@ get_relation_constraints(PlannerInfo *root, return result; } +/* + * get_relation_statistics + * Retrieve extended statistics defined on the table. + * + * Returns a List (possibly empty) of StatisticExtInfo objects describing + * the statistics. Note that this doesn't load the actual statistics data, + * just the identifying metadata. Only stats actually built are considered. + */ +static List * +get_relation_statistics(RelOptInfo *rel, Relation relation) +{ + List *statoidlist; + List *stainfos = NIL; + ListCell *l; + + statoidlist = RelationGetStatExtList(relation); + + foreach(l, statoidlist) + { + Oid statOid = lfirst_oid(l); + ArrayType *arr; + int2vector *keys; + Datum adatum; + bool isnull; + HeapTuple htup; + + htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid)); + if (!htup) + elog(ERROR, "cache lookup failed for statistics %u", statOid); + + /* + * First, build the array of columns covered. This is ultimately + * wasted if no stats are actually built, but it doesn't seem worth + * troubling over that case. + */ + adatum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_stakeys, &isnull); + Assert(!isnull); + arr = DatumGetArrayTypeP(adatum); + keys = buildint2vector((int16 *) ARR_DATA_PTR(arr), ARR_DIMS(arr)[0]); + + /* add one StatisticExtInfo for each kind built */ + if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT)) + { + StatisticExtInfo *info = makeNode(StatisticExtInfo); + + info->statOid = statOid; + info->rel = rel; + info->kind = STATS_EXT_NDISTINCT; + info->keys = keys; + + stainfos = lcons(info, stainfos); + } + + ReleaseSysCache(htup); + } + + list_free(statoidlist); + + return stainfos; +} /* * relation_excluded_by_constraints diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 6316688..fe7cdfc 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -257,7 +257,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt - CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt + CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt @@ -873,6 +873,7 @@ stmt : | CreateSeqStmt | CreateStmt | CreateSubscriptionStmt + | CreateStatsStmt | CreateTableSpaceStmt | CreateTransformStmt | CreateTrigStmt @@ -3746,6 +3747,35 @@ OptConsTableSpace: USING INDEX TABLESPACE name { $$ = $4; } ExistingIndex: USING INDEX index_name { $$ = $3; } ; +/***************************************************************************** + * + * QUERY : + * CREATE STATISTICS stats_name WITH (options) ON (columns) FROM relname + * + *****************************************************************************/ + + +CreateStatsStmt: CREATE STATISTICS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name + { + CreateStatsStmt *n = makeNode(CreateStatsStmt); + n->defnames = $3; + n->relation = $10; + n->keys = $7; + n->options = $4; + n->if_not_exists = false; + $$ = (Node *)n; + } + | CREATE STATISTICS IF_P NOT EXISTS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name + { + CreateStatsStmt *n = makeNode(CreateStatsStmt); + n->defnames = $6; + n->relation = $13; + n->keys = $10; + n->options = $7; + n->if_not_exists = true; + $$ = (Node *)n; + } + ; /***************************************************************************** * @@ -6018,6 +6048,7 @@ drop_type_any_name: | FOREIGN TABLE { $$ = OBJECT_FOREIGN_TABLE; } | COLLATION { $$ = OBJECT_COLLATION; } | CONVERSION_P { $$ = OBJECT_CONVERSION; } + | STATISTICS { $$ = OBJECT_STATISTIC_EXT; } | TEXT_P SEARCH PARSER { $$ = OBJECT_TSPARSER; } | TEXT_P SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; } | TEXT_P SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; } @@ -6095,7 +6126,7 @@ opt_restart_seqs: * EXTENSION | EVENT TRIGGER | FOREIGN DATA WRAPPER | * FOREIGN TABLE | INDEX | [PROCEDURAL] LANGUAGE | * MATERIALIZED VIEW | POLICY | ROLE | SCHEMA | SEQUENCE | - * SERVER | TABLE | TABLESPACE | + * SERVER | STATISTICS | TABLE | TABLESPACE | * TEXT SEARCH CONFIGURATION | TEXT SEARCH DICTIONARY | * TEXT SEARCH PARSER | TEXT SEARCH TEMPLATE | TYPE | * VIEW] | @@ -6264,6 +6295,7 @@ comment_type_any_name: COLUMN { $$ = OBJECT_COLUMN; } | INDEX { $$ = OBJECT_INDEX; } | SEQUENCE { $$ = OBJECT_SEQUENCE; } + | STATISTICS { $$ = OBJECT_STATISTIC_EXT; } | TABLE { $$ = OBJECT_TABLE; } | VIEW { $$ = OBJECT_VIEW; } | MATERIALIZED VIEW { $$ = OBJECT_MATVIEW; } @@ -8404,6 +8436,15 @@ RenameStmt: ALTER AGGREGATE aggregate_with_argtypes RENAME TO name n->missing_ok = false; $$ = (Node *)n; } + | ALTER STATISTICS any_name RENAME TO name + { + RenameStmt *n = makeNode(RenameStmt); + n->renameType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newname = $6; + n->missing_ok = false; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH PARSER any_name RENAME TO name { RenameStmt *n = makeNode(RenameStmt); @@ -8619,6 +8660,15 @@ AlterObjectSchemaStmt: n->missing_ok = true; $$ = (Node *)n; } + | ALTER STATISTICS any_name SET SCHEMA name + { + AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt); + n->objectType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newschema = $6; + n->missing_ok = false; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH PARSER any_name SET SCHEMA name { AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt); @@ -8882,6 +8932,14 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec n->newowner = $6; $$ = (Node *)n; } + | ALTER STATISTICS any_name OWNER TO RoleSpec + { + AlterOwnerStmt *n = makeNode(AlterOwnerStmt); + n->objectType = OBJECT_STATISTIC_EXT; + n->object = (Node *) $3; + n->newowner = $6; + $$ = (Node *)n; + } | ALTER TEXT_P SEARCH DICTIONARY any_name OWNER TO RoleSpec { AlterOwnerStmt *n = makeNode(AlterOwnerStmt); diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile new file mode 100644 index 0000000..c437eca --- /dev/null +++ b/src/backend/statistics/Makefile @@ -0,0 +1,17 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for statistics +# +# IDENTIFICATION +# src/backend/statistics/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/statistics +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = stat_ext.o mvdist.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/statistics/README b/src/backend/statistics/README new file mode 100644 index 0000000..beb7c24 --- /dev/null +++ b/src/backend/statistics/README @@ -0,0 +1,34 @@ +Extended statistics +=================== + +When estimating various quantities (e.g. condition selectivities) the default +approach relies on the assumption of independence. In practice that's often +not true, resulting in estimation errors. + +Extended statistics track different types of dependencies between the columns, +hopefully improving the estimates and producing better plans. + +Currently we only have one type of extended statistics - ndistinct +coefficients, and we use it to improve estimates of grouping queries. See +README.ndistinct for details. + + +Size of sample in ANALYZE +------------------------- +When performing ANALYZE, the number of rows to sample is determined as + + (300 * statistics_target) + +That works reasonably well for statistics on individual columns, but perhaps +it's not enough for extended statistics. Papers analyzing estimation errors +all use samples proportional to the table (usually finding that 1-3% of the +table is enough to build accurate stats). + +The requested accuracy (number of MCV items or histogram bins) should also +be considered when determining the sample size, and in extended statistics +those are not necessarily limited by statistics_target. + +This however merits further discussion, because collecting the sample is quite +expensive and increasing it further would make ANALYZE even more painful. +Judging by the experiments with the current implementation, the fixed size +seems to work reasonably well for now, so we leave this as a future work. diff --git a/src/backend/statistics/README.ndistinct b/src/backend/statistics/README.ndistinct new file mode 100644 index 0000000..9365b17 --- /dev/null +++ b/src/backend/statistics/README.ndistinct @@ -0,0 +1,22 @@ +ndistinct coefficients +====================== + +Estimating number of groups in a combination of columns (e.g. for GROUP BY) +is tricky, and the estimation error is often significant. + +The ndistinct coefficients address this by storing ndistinct estimates not +only for individual columns, but also for (all) combinations of columns. +So for example given three columns (a,b,c) the statistics will estimate +ndistinct for (a,b), (a,c), (b,c) and (a,b,c). The per-column estimates +are already available in pg_statistic. + + +GROUP BY estimation (estimate_num_groups) +----------------------------------------- + +Although ndistinct coefficient might be used for selectivity estimation +(of equality conditions in WHERE clause), that is not implemented at this +point. + +Instead, ndistinct coefficients are only used in estimate_num_groups() to +estimate grouped queries. diff --git a/src/backend/statistics/mvdist.c b/src/backend/statistics/mvdist.c new file mode 100644 index 0000000..ab520e5 --- /dev/null +++ b/src/backend/statistics/mvdist.c @@ -0,0 +1,613 @@ +/*------------------------------------------------------------------------- + * + * mvdist.c + * POSTGRES multivariate ndistinct coefficients + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/statistics/mvdist.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "catalog/pg_statistic_ext.h" +#include "utils/fmgrprotos.h" +#include "utils/lsyscache.h" +#include "lib/stringinfo.h" +#include "utils/syscache.h" +#include "statistics/stat_ext_internal.h" +#include "statistics/stats.h" + + +static double ndistinct_for_combination(double totalrows, int numrows, + HeapTuple *rows, int2vector *attrs, VacAttrStats **stats, + int k, AttrNumber *combination); +static double estimate_ndistinct(double totalrows, int numrows, int d, int f1); +static int n_choose_k(int n, int k); +static int num_combinations(int n); + +/* Combination generator API */ + +/* internal state for generator of k-combinations of n elements */ +typedef struct CombinationGenerator +{ + int k; /* size of the combination */ + int current; /* index of the next combination to return */ + int ncombinations; /* number of combinations (size of array) */ + AttrNumber *combinations; /* array of pre-built combinations */ +} CombinationGenerator; + +static CombinationGenerator *generator_init(int2vector *attrs, int k); +static void generator_free(CombinationGenerator *state); +static AttrNumber *generator_next(CombinationGenerator *state, int2vector *attrs); +static void generate_combinations(CombinationGenerator *state, int n); + + +/* + * Compute ndistinct coefficient for the combination of attributes. This + * computes the ndistinct estimate using the same estimator used in analyze.c + * and then computes the coefficient. + */ +MVNDistinct +statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows, + int2vector *attrs, VacAttrStats **stats) +{ + int i, + k; + int numattrs = attrs->dim1; + int numcombs = num_combinations(numattrs); + + MVNDistinct result; + + result = palloc0(offsetof(MVNDistinctData, items) + + numcombs * sizeof(MVNDistinctItem)); + + result->nitems = numcombs; + + i = 0; + for (k = 2; k <= numattrs; k++) + { + AttrNumber *combination; + CombinationGenerator *generator; + + generator = generator_init(attrs, k); + + while ((combination = generator_next(generator, attrs))) + { + MVNDistinctItem *item = &result->items[i++]; + + item->nattrs = k; + item->ndistinct = ndistinct_for_combination(totalrows, numrows, rows, + attrs, stats, k, combination); + + item->attrs = palloc(k * sizeof(AttrNumber)); + memcpy(item->attrs, combination, k * sizeof(AttrNumber)); + + /* must not overflow the output array */ + Assert(i <= result->nitems); + } + + generator_free(generator); + } + + /* must consume exactly the whole output array */ + Assert(i == result->nitems); + + return result; +} + +/* + * statext_ndistinct_load + * Load the ndistinct value for the indicated pg_statistic_ext tuple + */ +MVNDistinct +statext_ndistinct_load(Oid mvoid) +{ + bool isnull = false; + Datum ndist; + HeapTuple htup; + + htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid)); + if (!htup) + elog(ERROR, "cache lookup failed for statistics %u", mvoid); + + ndist = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_standistinct, &isnull); + if (isnull) + elog(ERROR, + "requested statistic kind %c not yet built for statistics %u", + STATS_EXT_NDISTINCT, mvoid); + + ReleaseSysCache(htup); + + return statext_ndistinct_deserialize(DatumGetByteaP(ndist)); +} + +/* + * statext_ndistinct_serialize + * serialize ndistinct to the on-disk bytea format + */ +bytea * +statext_ndistinct_serialize(MVNDistinct ndistinct) +{ + int i; + bytea *output; + char *tmp; + Size len; + + /* we need to store nitems */ + len = VARHDRSZ + offsetof(MVNDistinctData, items) + + ndistinct->nitems * offsetof(MVNDistinctItem, attrs); + + /* and also include space for the actual attribute numbers */ + for (i = 0; i < ndistinct->nitems; i++) + len += (sizeof(AttrNumber) * ndistinct->items[i].nattrs); + + output = (bytea *) palloc0(len); + SET_VARSIZE(output, len); + + tmp = VARDATA(output); + + ndistinct->magic = STATS_NDISTINCT_MAGIC; + ndistinct->type = STATS_NDISTINCT_TYPE_BASIC; + + /* first, store the number of items */ + memcpy(tmp, ndistinct, offsetof(MVNDistinctData, items)); + tmp += offsetof(MVNDistinctData, items); + + /* + * store number of attributes and attribute numbers for each ndistinct + * entry + */ + for (i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem item = ndistinct->items[i]; + + memcpy(tmp, &item, offsetof(MVNDistinctItem, attrs)); + tmp += offsetof(MVNDistinctItem, attrs); + + memcpy(tmp, item.attrs, sizeof(AttrNumber) * item.nattrs); + tmp += sizeof(AttrNumber) * item.nattrs; + + Assert(tmp <= ((char *) output + len)); + } + + return output; +} + +/* + * statext_ndistinct_deserialize + * Read an on-disk bytea format MVNDistinct to in-memory format + */ +MVNDistinct +statext_ndistinct_deserialize(bytea *data) +{ + int i; + Size expected_size; + MVNDistinct ndistinct; + char *tmp; + + if (data == NULL) + return NULL; + + if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinctData, items)) + elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)", + VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinctData, items)); + + /* read the MVNDistinct header */ + ndistinct = (MVNDistinct) palloc0(sizeof(MVNDistinctData)); + + /* initialize pointer to the data part (skip the varlena header) */ + tmp = VARDATA_ANY(data); + + /* get the header and perform basic sanity checks */ + memcpy(ndistinct, tmp, offsetof(MVNDistinctData, items)); + tmp += offsetof(MVNDistinctData, items); + + if (ndistinct->magic != STATS_NDISTINCT_MAGIC) + elog(ERROR, "invalid ndistinct magic %d (expected %d)", + ndistinct->magic, STATS_NDISTINCT_MAGIC); + + if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC) + elog(ERROR, "invalid ndistinct type %d (expected %d)", + ndistinct->type, STATS_NDISTINCT_TYPE_BASIC); + + Assert(ndistinct->nitems > 0); + + /* what minimum bytea size do we expect for those parameters */ + expected_size = offsetof(MVNDistinctData, items) + + ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + + sizeof(AttrNumber) * 2); + + if (VARSIZE_ANY_EXHDR(data) < expected_size) + elog(ERROR, "invalid dependencies size %ld (expected at least %ld)", + VARSIZE_ANY_EXHDR(data), expected_size); + + /* allocate space for the ndistinct items */ + ndistinct = repalloc(ndistinct, offsetof(MVNDistinctData, items) + + (ndistinct->nitems * sizeof(MVNDistinctItem))); + + for (i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem *item = &ndistinct->items[i]; + + /* number of attributes */ + memcpy(item, tmp, offsetof(MVNDistinctItem, attrs)); + tmp += offsetof(MVNDistinctItem, attrs); + + /* is the number of attributes valid? */ + Assert((item->nattrs >= 2) && (item->nattrs <= STATS_MAX_DIMENSIONS)); + + /* now that we know the number of attributes, allocate the attribute */ + item->attrs = (AttrNumber *) palloc0(item->nattrs * sizeof(AttrNumber)); + + /* copy attribute numbers */ + memcpy(item->attrs, tmp, sizeof(AttrNumber) * item->nattrs); + tmp += sizeof(AttrNumber) * item->nattrs; + + /* still within the bytea */ + Assert(tmp <= ((char *) data + VARSIZE_ANY(data))); + } + + /* we should have consumed the whole bytea exactly */ + Assert(tmp == ((char *) data + VARSIZE_ANY(data))); + + return ndistinct; +} + +/* + * pg_ndistinct_in - input routine for type pg_ndistinct. + * + * pg_ndistinct is real enough to be a table column, but it has no operations + * of its own, and disallows input too. + */ +Datum +pg_ndistinct_in(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct - output routine for type pg_ndistinct. + * + * Produces a human-readable representation of the value. + */ +Datum +pg_ndistinct_out(PG_FUNCTION_ARGS) +{ + bytea *data = PG_GETARG_BYTEA_PP(0); + MVNDistinct ndist = statext_ndistinct_deserialize(data); + int i, + j; + StringInfoData str; + + initStringInfo(&str); + appendStringInfoChar(&str, '['); + + for (i = 0; i < ndist->nitems; i++) + { + MVNDistinctItem item = ndist->items[i]; + + if (i > 0) + appendStringInfoString(&str, ", "); + + appendStringInfoChar(&str, '{'); + + for (j = 0; j < item.nattrs; j++) + { + if (j > 0) + appendStringInfoString(&str, ", "); + + appendStringInfo(&str, "%d", item.attrs[j]); + } + + appendStringInfo(&str, ", %f", item.ndistinct); + + appendStringInfoChar(&str, '}'); + } + + appendStringInfoChar(&str, ']'); + + PG_RETURN_CSTRING(str.data); +} + +/* + * pg_ndistinct_recv - binary input routine for type pg_ndistinct. + */ +Datum +pg_ndistinct_recv(PG_FUNCTION_ARGS) +{ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + + PG_RETURN_VOID(); /* keep compiler quiet */ +} + +/* + * pg_ndistinct_send - binary output routine for type pg_ndistinct. + * + * n-distinct is serialized into a bytea value, so let's send that. + */ +Datum +pg_ndistinct_send(PG_FUNCTION_ARGS) +{ + return byteasend(fcinfo); +} + +/* + * ndistinct_for_combination + * Estimates number of distinct values in a combination of columns. + * + * This uses the same ndistinct estimator as compute_scalar_stats() in + * ANALYZE, i.e., + * n*d / (n - f1 + f1*n/N) + * + * except that instead of values in a single column we are dealing with + * combination of multiple columns. + */ +static double +ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows, + int2vector *attrs, VacAttrStats **stats, + int k, AttrNumber *combination) +{ + int i, + j; + int f1, + cnt, + d; + int nmultiple, + summultiple; + bool *isnull; + Datum *values; + SortItem *items; + MultiSortSupport mss; + + /* + * It's possible to sort the sample rows directly, but this seemed somehow + * simpler / less error prone. Another option would be to allocate the + * arrays for each SortItem separately, but that'd be significant overhead + * (not just CPU, but especially memory bloat). + */ + mss = multi_sort_init(k); + items = (SortItem *) palloc0(numrows * sizeof(SortItem)); + values = (Datum *) palloc0(sizeof(Datum) * numrows * k); + isnull = (bool *) palloc0(sizeof(bool) * numrows * k); + + Assert((k >= 2) && (k <= attrs->dim1)); + + for (i = 0; i < numrows; i++) + { + items[i].values = &values[i * k]; + items[i].isnull = &isnull[i * k]; + } + + for (i = 0; i < k; i++) + { + /* prepare the sort function for the first dimension */ + multi_sort_add_dimension(mss, i, combination[i], stats); + + /* accumulate all the data into the array and sort it */ + for (j = 0; j < numrows; j++) + { + items[j].values[i] = + heap_getattr(rows[j], attrs->values[combination[i]], + stats[combination[i]]->tupDesc, + &items[j].isnull[i]); + } + } + + qsort_arg((void *) items, numrows, sizeof(SortItem), + multi_sort_compare, mss); + + /* count number of distinct combinations */ + + f1 = 0; + cnt = 1; + d = 1; + for (i = 1; i < numrows; i++) + { + if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0) + { + if (cnt == 1) + f1 += 1; + else + { + nmultiple += 1; + summultiple += cnt; + } + + d++; + cnt = 0; + } + + cnt += 1; + } + + if (cnt == 1) + f1 += 1; + else + { + nmultiple += 1; + summultiple += cnt; + } + + return estimate_ndistinct(totalrows, numrows, d, f1); +} + +/* The Duj1 estimator (already used in analyze.c). */ +static double +estimate_ndistinct(double totalrows, int numrows, int d, int f1) +{ + double numer, + denom, + ndistinct; + + numer = (double) numrows *(double) d; + + denom = (double) (numrows - f1) + + (double) f1 *(double) numrows / totalrows; + + ndistinct = numer / denom; + + /* Clamp to sane range in case of roundoff error */ + if (ndistinct < (double) d) + ndistinct = (double) d; + + if (ndistinct > totalrows) + ndistinct = totalrows; + + return floor(ndistinct + 0.5); +} + +/* + * n_choose_k + * computes binomial coefficients using an algorithm that is both + * efficient and prevents overflows + */ +static int +n_choose_k(int n, int k) +{ + int d, + r; + + Assert((k > 0) && (n >= k)); + + /* use symmetry of the binomial coefficients */ + k = Min(k, n - k); + + r = 1; + for (d = 1; d <= k; ++d) + { + r *= n--; + r /= d; + } + + return r; +} + +/* + * num_combinations + * computes number of combinations, excluding single-value combinations + */ +static int +num_combinations(int n) +{ + int k; + int ncombs = 1; + + for (k = 1; k <= n; k++) + ncombs *= 2; + + ncombs -= (n + 1); + + return ncombs; +} + +/* + * initialize the generator of combinations, and prebuild them. + * + * This pre-builds all the combinations. We could also generate them in + * generator_next(), but this seems simpler. + */ +static CombinationGenerator * +generator_init(int2vector *attrs, int k) +{ + int n = attrs->dim1; + CombinationGenerator *state; + + Assert((n >= k) && (k > 0)); + + /* allocate the generator state as a single chunk of memory */ + state = (CombinationGenerator *) palloc0(sizeof(CombinationGenerator)); + state->combinations = (AttrNumber *) palloc(k * sizeof(AttrNumber)); + + state->ncombinations = n_choose_k(n, k); + state->current = 0; + state->k = k; + + /* now actually pre-generate all the combinations */ + generate_combinations(state, n); + + /* make sure we got the expected number of combinations */ + Assert(state->current == state->ncombinations); + + /* reset the number, so we start with the first one */ + state->current = 0; + + return state; +} + +/* generate next combination */ +static AttrNumber * +generator_next(CombinationGenerator *state, int2vector *attrs) +{ + if (state->current == state->ncombinations) + return NULL; + + return &state->combinations[state->k * state->current++]; +} + +/* free the generator state */ +static void +generator_free(CombinationGenerator *state) +{ + /* we've allocated a single chunk, so just free it */ + pfree(state); +} + +/* + * generate all combinations (k elements from n) + */ +static void +generate_combinations_recurse(CombinationGenerator *state, AttrNumber n, + int index, AttrNumber start, AttrNumber *current) +{ + /* If we haven't filled all the elements, simply recurse. */ + if (index < state->k) + { + AttrNumber i; + + /* + * The values have to be in ascending order, so make sure we start + * with the value passed by parameter. + */ + + for (i = start; i < n; i++) + { + current[index] = i; + generate_combinations_recurse(state, n, (index + 1), (i + 1), current); + } + + return; + } + else + { + /* we got a correct combination */ + state->combinations = (AttrNumber *) repalloc(state->combinations, + state->k * (state->current + 1) * sizeof(AttrNumber)); + memcpy(&state->combinations[(state->k * state->current)], + current, state->k * sizeof(AttrNumber)); + state->current++; + } +} + +/* generate all k-combinations of n elements */ +static void +generate_combinations(CombinationGenerator *state, int n) +{ + AttrNumber *current = (AttrNumber *) palloc0(sizeof(AttrNumber) * state->k); + + generate_combinations_recurse(state, n, 0, 0, current); + + pfree(current); +} diff --git a/src/backend/statistics/stat_ext.c b/src/backend/statistics/stat_ext.c new file mode 100644 index 0000000..f3111ee --- /dev/null +++ b/src/backend/statistics/stat_ext.c @@ -0,0 +1,487 @@ +/*------------------------------------------------------------------------- + * + * stat_ext.c + * POSTGRES extended statistics + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/statistics/stat_ext.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/indexing.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_statistic_ext.h" +#include "nodes/relation.h" +#include "statistics/stat_ext_internal.h" +#include "statistics/stats.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +typedef struct StatExtEntry +{ + Oid relid; /* owning relation XXX useless? */ + Oid statOid; /* OID of pg_statistic_ext entry */ + int2vector *columns; /* columns */ + List *types; /* enabled types */ +} StatExtEntry; + + +static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); +static VacAttrStats **lookup_var_attr_stats(int2vector *attrs, + int natts, VacAttrStats **vacattrstats); +static void statext_store(Oid relid, MVNDistinct ndistinct, + int2vector *attrs, VacAttrStats **stats); + + +/* + * Compute requested extended stats, using the rows sampled for the plain + * (single-column) stats. + * + * This fetches a list of stats from pg_statistic_ext, computes the stats + * and serializes them back into the catalog (as bytea values). + */ +void +BuildRelationExtStatistics(Relation onerel, double totalrows, + int numrows, HeapTuple *rows, + int natts, VacAttrStats **vacattrstats) +{ + Relation pg_stext; + ListCell *lc; + List *stats; + TupleDesc tupdesc = RelationGetDescr(onerel); + + pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock); + stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel)); + + foreach(lc, stats) + { + StatExtEntry *stat = (StatExtEntry *) lfirst(lc); + MVNDistinct ndistinct = NULL; + VacAttrStats **stats; + int2vector *attrs; + int j; + int numatts = 0; + ListCell *lc2; + + /* int2 vector of attnums the stats should be computed on */ + attrs = stat->columns; + + /* filter only the interesting vacattrstats records */ + stats = lookup_var_attr_stats(attrs, natts, vacattrstats); + + /* see how many of the columns are not dropped */ + for (j = 0; j < attrs->dim1; j++) + if (!tupdesc->attrs[attrs->values[j] - 1]->attisdropped) + numatts += 1; + + /* if there are dropped attributes, build a filtered int2vector */ + if (numatts != attrs->dim1) + { + int16 *tmp = palloc0(numatts * sizeof(int16)); + int attnum = 0; + + for (j = 0; j < attrs->dim1; j++) + if (!tupdesc->attrs[attrs->values[j] - 1]->attisdropped) + tmp[attnum++] = attrs->values[j]; + + pfree(attrs); + attrs = buildint2vector(tmp, numatts); + } + + /* check allowed number of dimensions */ + Assert((attrs->dim1 >= 2) && (attrs->dim1 <= STATS_MAX_DIMENSIONS)); + + /* compute statistic of each type */ + foreach(lc2, stat->types) + { + char t = (char) lfirst_int(lc2); + + if (t == STATS_EXT_NDISTINCT) + ndistinct = statext_ndistinct_build(totalrows, numrows, rows, + stat->columns, stats); + } + + /* store the statistics in the catalog */ + statext_store(stat->statOid, ndistinct, attrs, stats); + } + + heap_close(pg_stext, RowExclusiveLock); +} + +/* + * statext_is_kind_built + * Is this stat kind built in the given pg_statistic_ext tuple? + */ +bool +statext_is_kind_built(HeapTuple htup, char type) +{ + AttrNumber attnum; + + switch (type) + { + case STATS_EXT_NDISTINCT: + attnum = Anum_pg_statistic_ext_standistinct; + break; + + default: + elog(ERROR, "unexpected statistics type requested: %d", type); + } + + return !heap_attisnull(htup, attnum); +} + +/* + * Return a list (of StatExtEntry) of statistics enabled for the given relation. + */ +static List * +fetch_statentries_for_relation(Relation pg_statext, Oid relid) +{ + SysScanDesc scan; + ScanKeyData skey; + HeapTuple htup; + List *result = NIL; + + /* + * Prepare to scan pg_statistic_ext for entries having indrelid = this + * rel. + */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + + scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(scan))) + { + StatExtEntry *entry; + Datum datum; + bool isnull; + int i; + ArrayType *arr; + char *enabled; + + entry = palloc0(sizeof(StatExtEntry)); + entry->relid = relid; + entry->statOid = HeapTupleGetOid(htup); + datum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_stakeys, &isnull); + Assert(!isnull); + arr = DatumGetArrayTypeP(datum); + entry->columns = buildint2vector((int16 *) ARR_DATA_PTR(arr), + ARR_DIMS(arr)[0]); + + /* decode the staenabled char array into a list of chars */ + datum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_staenabled, &isnull); + Assert(!isnull); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || + ARR_HASNULL(arr) || + ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "staenabled is not a 1-D char array"); + enabled = (char *) ARR_DATA_PTR(arr); + for (i = 0; i < ARR_DIMS(arr)[0]; i++) + { + Assert(enabled[i] == STATS_EXT_NDISTINCT); + entry->types = lappend_int(entry->types, (int) enabled[i]); + } + + result = lappend(result, entry); + } + + systable_endscan(scan); + + return result; +} + +/* + * Lookup the VacAttrStats info for the selected columns, with indexes + * matching the attrs vector (to make it easy to work with when + * computing extended stats). + */ +static VacAttrStats ** +lookup_var_attr_stats(int2vector *attrs, int natts, VacAttrStats **vacattrstats) +{ + int i, + j; + int numattrs = attrs->dim1; + VacAttrStats **stats = (VacAttrStats **) palloc0(numattrs * sizeof(VacAttrStats *)); + + /* lookup VacAttrStats info for the requested columns (same attnum) */ + for (i = 0; i < numattrs; i++) + { + stats[i] = NULL; + for (j = 0; j < natts; j++) + { + if (attrs->values[i] == vacattrstats[j]->tupattnum) + { + stats[i] = vacattrstats[j]; + break; + } + } + + /* + * Check that we found the info, that the attnum matches and that + * there's the requested 'lt' operator and that the type is + * 'passed-by-value'. + */ + Assert(stats[i] != NULL); + Assert(stats[i]->tupattnum == attrs->values[i]); + + /* + * FIXME This is rather ugly way to check for 'ltopr' (which is + * defined for 'scalar' attributes). + */ + Assert(((StdAnalyzeData *) stats[i]->extra_data)->ltopr != InvalidOid); + } + + return stats; +} + +/* + * statext_store + * Serializes the statistics and stores them into the pg_statistic_ext tuple. + */ +static void +statext_store(Oid statOid, MVNDistinct ndistinct, + int2vector *attrs, VacAttrStats **stats) +{ + HeapTuple stup, + oldtup; + Datum values[Natts_pg_statistic_ext]; + bool nulls[Natts_pg_statistic_ext]; + bool replaces[Natts_pg_statistic_ext]; + + Relation sd = heap_open(StatisticExtRelationId, RowExclusiveLock); + + memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool)); + memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool)); + memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum)); + + /* + * Construct a new pg_statistic_ext tuple - replace only the histogram and + * MCV list, depending whether it actually was computed. + */ + if (ndistinct != NULL) + { + bytea *data = statext_ndistinct_serialize(ndistinct); + + nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL); + values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data); + } + + /* always replace the value (either by bytea or NULL) */ + replaces[Anum_pg_statistic_ext_standistinct - 1] = true; + + /* always change the availability flags */ + nulls[Anum_pg_statistic_ext_stakeys - 1] = false; + + /* use the new attnums, in case we removed some dropped ones */ + replaces[Anum_pg_statistic_ext_stakeys - 1] = true; + + values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(attrs); + + /* Is there already a pg_statistic_ext tuple for this attribute? */ + oldtup = SearchSysCache1(STATEXTOID, + ObjectIdGetDatum(statOid)); + + if (!HeapTupleIsValid(oldtup)) + elog(ERROR, "cache lookup failed for extended statistics %u", statOid); + + /* replace it */ + stup = heap_modify_tuple(oldtup, + RelationGetDescr(sd), + values, + nulls, + replaces); + ReleaseSysCache(oldtup); + CatalogTupleUpdate(sd, &stup->t_self, stup); + + heap_freetuple(stup); + heap_close(sd, RowExclusiveLock); +} + +/* multi-variate stats comparator */ + +/* + * qsort_arg comparator for sorting Datums (MV stats) + * + * This does not maintain the tupnoLink array. + */ +int +compare_scalars_simple(const void *a, const void *b, void *arg) +{ + Datum da = *(Datum *) a; + Datum db = *(Datum *) b; + SortSupport ssup = (SortSupport) arg; + + return ApplySortComparator(da, false, db, false, ssup); +} + +/* + * qsort_arg comparator for sorting data when partitioning a MV bucket + */ +int +compare_scalars_partition(const void *a, const void *b, void *arg) +{ + Datum da = ((ScalarItem *) a)->value; + Datum db = ((ScalarItem *) b)->value; + SortSupport ssup = (SortSupport) arg; + + return ApplySortComparator(da, false, db, false, ssup); +} + +/* initialize multi-dimensional sort */ +MultiSortSupport +multi_sort_init(int ndims) +{ + MultiSortSupport mss; + + Assert(ndims >= 2); + + mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup) + +sizeof(SortSupportData) * ndims); + + mss->ndims = ndims; + + return mss; +} + +/* + * Prepare sort support info for dimension 'dim' (index into vacattrstats) to + * 'mss', at the position 'sortdim' + */ +void +multi_sort_add_dimension(MultiSortSupport mss, int sortdim, + int dim, VacAttrStats **vacattrstats) +{ + /* first, lookup StdAnalyzeData for the dimension (attribute) */ + SortSupportData ssup; + StdAnalyzeData *tmp = (StdAnalyzeData *) vacattrstats[dim]->extra_data; + + Assert(mss != NULL); + Assert(sortdim < mss->ndims); + + /* initialize sort support, etc. */ + memset(&ssup, 0, sizeof(ssup)); + ssup.ssup_cxt = CurrentMemoryContext; + + /* We always use the default collation for statistics */ + ssup.ssup_collation = DEFAULT_COLLATION_OID; + ssup.ssup_nulls_first = false; + + PrepareSortSupportFromOrderingOp(tmp->ltopr, &ssup); + + mss->ssup[sortdim] = ssup; +} + +/* compare all the dimensions in the selected order */ +int +multi_sort_compare(const void *a, const void *b, void *arg) +{ + int i; + SortItem *ia = (SortItem *) a; + SortItem *ib = (SortItem *) b; + + MultiSortSupport mss = (MultiSortSupport) arg; + + for (i = 0; i < mss->ndims; i++) + { + int compare; + + compare = ApplySortComparator(ia->values[i], ia->isnull[i], + ib->values[i], ib->isnull[i], + &mss->ssup[i]); + + if (compare != 0) + return compare; + } + + /* equal by default */ + return 0; +} + +/* compare selected dimension */ +int +multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, + MultiSortSupport mss) +{ + return ApplySortComparator(a->values[dim], a->isnull[dim], + b->values[dim], b->isnull[dim], + &mss->ssup[dim]); +} + +int +multi_sort_compare_dims(int start, int end, + const SortItem *a, const SortItem *b, + MultiSortSupport mss) +{ + int dim; + + for (dim = start; dim <= end; dim++) + { + int r = ApplySortComparator(a->values[dim], a->isnull[dim], + b->values[dim], b->isnull[dim], + &mss->ssup[dim]); + + if (r != 0) + return r; + } + + return 0; +} + +bool +stats_are_enabled(HeapTuple htup, char type) +{ + Datum datum; + bool isnull; + int i, + nenabled; + char *enabled; + ArrayType *enabledArray; + + /* see which statistics are enabled */ + datum = SysCacheGetAttr(STATEXTOID, htup, + Anum_pg_statistic_ext_staenabled, &isnull); + + /* if there are no values in staenabled field, everything is enabled */ + if (isnull || (datum == PointerGetDatum(NULL))) + return false; + + /* + * We expect the array to be a 1-D CHAR array; verify that. We don't need + * to use deconstruct_array() since the array data is just going to look + * like a C array of char values. + */ + enabledArray = DatumGetArrayTypeP(datum); + + if (ARR_NDIM(enabledArray) != 1 || + ARR_HASNULL(enabledArray) || + ARR_ELEMTYPE(enabledArray) != CHAROID) + elog(ERROR, "enabled statistics (staenabled) is not a 1-D char array"); + + nenabled = ARR_DIMS(enabledArray)[0]; + enabled = (char *) ARR_DATA_PTR(enabledArray); + + for (i = 0; i < nenabled; i++) + if (enabled[i] == type) + return true; + + return false; +} diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 20b5273..1a559a5 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate, commandCollected = true; break; + case T_CreateStatsStmt: /* CREATE STATISTICS */ + address = CreateStatistics((CreateStatsStmt *) parsetree); + break; + default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(parsetree)); @@ -1988,6 +1992,8 @@ AlterObjectTypeCommandTag(ObjectType objtype) break; case OBJECT_SUBSCRIPTION: tag = "ALTER SUBSCRIPTION"; + case OBJECT_STATISTIC_EXT: + tag = "ALTER STATISTICS"; break; default: tag = "???"; @@ -2282,6 +2288,8 @@ CreateCommandTag(Node *parsetree) break; case OBJECT_PUBLICATION: tag = "DROP PUBLICATION"; + case OBJECT_STATISTIC_EXT: + tag = "DROP STATISTICS"; break; default: tag = "???"; @@ -2681,6 +2689,10 @@ CreateCommandTag(Node *parsetree) tag = "EXECUTE"; break; + case T_CreateStatsStmt: + tag = "CREATE STATISTICS"; + break; + case T_DeallocateStmt: { DeallocateStmt *stmt = (DeallocateStmt *) parsetree; diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 5c82325..72c8c54 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -35,6 +35,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_partitioned_table.h" #include "catalog/pg_proc.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_trigger.h" #include "catalog/pg_type.h" #include "commands/defrem.h" @@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno, const Oid *excludeOps, bool attrsOnly, bool showTblSpc, int prettyFlags, bool missing_ok); +static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok); static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags, bool attrsOnly); static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand, @@ -1422,6 +1424,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, } /* + * pg_get_statisticsextdef + * Get the definition of an extended statistics object + */ +Datum +pg_get_statisticsextdef(PG_FUNCTION_ARGS) +{ + Oid statextid = PG_GETARG_OID(0); + char *res; + + res = pg_get_statisticsext_worker(statextid, true); + + if (res == NULL) + PG_RETURN_NULL(); + + PG_RETURN_TEXT_P(string_to_text(res)); +} + +/* + * Internal workhorse to decompile an extended statistics object. + */ +static char * +pg_get_statisticsext_worker(Oid statextid, bool missing_ok) +{ + Form_pg_statistic_ext statextrec; + Form_pg_class pgclassrec; + HeapTuple statexttup; + HeapTuple pgclasstup; + StringInfoData buf; + int colno; + + statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid)); + + if (!HeapTupleIsValid(statexttup)) + { + if (missing_ok) + return NULL; + elog(ERROR, "cache lookup failed for extended statistics %u", statextid); + } + + statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup); + + pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid)); + + if (!HeapTupleIsValid(statexttup)) + { + ReleaseSysCache(statexttup); + elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid); + } + + pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup); + + initStringInfo(&buf); + + appendStringInfo(&buf, "CREATE STATISTICS %s ON (", + quote_identifier(NameStr(statextrec->staname))); + + for (colno = 0; colno < statextrec->stakeys.dim1; colno++) + { + AttrNumber attnum = statextrec->stakeys.values[colno]; + char *attname; + + if (colno > 0) + appendStringInfoString(&buf, ", "); + + attname = get_relid_attribute_name(statextrec->starelid, attnum); + + appendStringInfoString(&buf, attname); + } + + appendStringInfo(&buf, ") FROM %s", + quote_identifier(NameStr(pgclassrec->relname))); + + ReleaseSysCache(statexttup); + ReleaseSysCache(pgclasstup); + + return buf.data; +} + +/* * pg_get_partkeydef * * Returns the partition key specification, ie, the following: diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index bb9a544..21b8dfa 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -110,6 +110,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_opfamily.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_type.h" #include "executor/executor.h" #include "mb/pg_wchar.h" @@ -126,6 +127,7 @@ #include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parsetree.h" +#include "statistics/stats.h" #include "utils/builtins.h" #include "utils/bytea.h" #include "utils/date.h" @@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator, static double eqjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2, RelOptInfo *inner_rel); +static double find_ndistinct(PlannerInfo *root, RelOptInfo *rel, List *varinfos, + bool *found); static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound); @@ -208,7 +212,6 @@ static Const *string_to_const(const char *str, Oid datatype); static Const *string_to_bytea_const(const char *str, size_t str_len); static List *add_predicate_to_quals(IndexOptInfo *index, List *indexQuals); - /* * eqsel - Selectivity of "=" for any data types. * @@ -3437,12 +3440,26 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, * don't know by how much. We should never clamp to less than the * largest ndistinct value for any of the Vars, though, since * there will surely be at least that many groups. + * + * However we don't need to do this if we have ndistinct stats on + * the columns - in that case we can simply use the coefficient to + * get the (probably way more accurate) estimate. + * + * XXX Might benefit from some refactoring, mixing the ndistinct + * coefficients and clamp seems a bit unfortunate. */ double clamp = rel->tuples; if (relvarcount > 1) { - clamp *= 0.1; + bool found; + double ndist = find_ndistinct(root, rel, varinfos, &found); + + if (found) + reldistinct = ndist; + else + clamp *= 0.1; + if (clamp < relmaxndistinct) { clamp = relmaxndistinct; @@ -3506,7 +3523,6 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, */ numdistinct *= reldistinct; } - varinfos = newvarinfos; } while (varinfos != NIL); @@ -3668,6 +3684,159 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets) */ /* + * Find applicable ndistinct statistics and compute the coefficient to correct + * the estimate (simply a product of per-column ndistincts). If an exact match + * is found, *found is set to TRUE and the value is returned. + * + * XXX Currently, only perfect matches (exactly same set of columns) are + * considered. If additional columns are used, we fail to find a match. + * It would be better to use the saved stats to a subset of the columns, then + * fixup according to any additional columns not covered by the ndistinct + * stats. + */ +static double +find_ndistinct(PlannerInfo *root, RelOptInfo *rel, List *varinfos, bool *found) +{ + ListCell *lc; + Bitmapset *attnums = NULL; + int nattnums; + VariableStatData vardata; + + /* assume we haven't found any suitable ndistinct statistics */ + *found = false; + + /* bail out immediately if the table has no extended statistics */ + if (!rel->statlist) + return 0.0; + + foreach(lc, varinfos) + { + GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc); + + if (varinfo->rel != rel) + continue; + + /* FIXME handle expressions in general only */ + + /* + * examine the variable (or expression) so that we know which + * attribute we're dealing with - we need this for matching the + * ndistinct coefficient + * + * FIXME probably should remember this from estimate_num_groups + */ + examine_variable(root, varinfo->var, 0, &vardata); + + if (HeapTupleIsValid(vardata.statsTuple)) + { + Form_pg_statistic stats; + + stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); + attnums = bms_add_member(attnums, stats->staattnum); + + ReleaseVariableStats(vardata); + } + } + nattnums = bms_num_members(attnums); + + /* look for a matching ndistinct statistics */ + foreach(lc, rel->statlist) + { + StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc); + int i, + k; + bool matches; + int j; + MVNDistinct stat; + + /* skip statistics of other kinds */ + if (info->kind != STATS_EXT_NDISTINCT) + continue; + + /* + * Only ndistinct stats covering all Vars are acceptable, which can't + * happen if the statistics has fewer attributes than we have Vars. + */ + if (nattnums > info->keys->dim1) + continue; + + /* check that all Vars are covered by the statistic */ + matches = true; /* assume match until we find unmatched + * attribute */ + k = -1; + while ((k = bms_next_member(attnums, k)) >= 0) + { + bool attr_found = false; + + for (i = 0; i < info->keys->dim1; i++) + { + if (info->keys->values[i] == k) + { + attr_found = true; + break; + } + } + + /* found attribute not covered by this ndistinct stats, skip */ + if (!attr_found) + { + matches = false; + break; + } + } + + if (!matches) + continue; + + /* hey, this statistics matches! great, let's extract the value */ + *found = true; + + stat = statext_ndistinct_load(info->statOid); + + for (j = 0; j < stat->nitems; j++) + { + bool item_matches = true; + MVNDistinctItem *item = &stat->items[j]; + + /* not the right item (different number of attributes) */ + if (item->nattrs != nattnums) + continue; + + /* check the attribute numbers */ + k = -1; + while ((k = bms_next_member(attnums, k)) >= 0) + { + bool attr_found = false; + + for (i = 0; i < item->nattrs; i++) + { + if (info->keys->values[item->attrs[i]] == k) + { + attr_found = true; + break; + } + } + + if (!attr_found) + { + item_matches = false; + break; + } + } + + if (!item_matches) + continue; + + return item->ndistinct; + } + } + + /* Nothing usable :-( */ + Assert(!(*found)); + return 0.0; +} + +/* * convert_to_scalar * Convert non-NULL values of the indicated types to the comparison * scale needed by scalarineqsel(). diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index ce55fc5..a6b60c6 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -56,6 +56,7 @@ #include "catalog/pg_publication.h" #include "catalog/pg_rewrite.h" #include "catalog/pg_shseclabel.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_trigger.h" @@ -4452,6 +4453,82 @@ RelationGetIndexList(Relation relation) } /* + * RelationGetStatExtList + * get a list of OIDs of extended statistics on this relation + * + * The statistics list is created only if someone requests it, in a way + * similar to RelationGetIndexList(). We scan pg_statistic_ext to find + * relevant statistics, and add the list to the relcache entry so that we + * won't have to compute it again. Note that shared cache inval of a + * relcache entry will delete the old list and set rd_statvalid to 0, + * so that we must recompute the statistics list on next request. This + * handles creation or deletion of a statistic. + * + * The returned list is guaranteed to be sorted in order by OID, although + * this is not currently needed. + * + * Since shared cache inval causes the relcache's copy of the list to go away, + * we return a copy of the list palloc'd in the caller's context. The caller + * may list_free() the returned list after scanning it. This is necessary + * since the caller will typically be doing syscache lookups on the relevant + * statistics, and syscache lookup could cause SI messages to be processed! + */ +List * +RelationGetStatExtList(Relation relation) +{ + Relation indrel; + SysScanDesc indscan; + ScanKeyData skey; + HeapTuple htup; + List *result; + List *oldlist; + MemoryContext oldcxt; + + /* Quick exit if we already computed the list. */ + if (relation->rd_statvalid != 0) + return list_copy(relation->rd_statlist); + + /* + * We build the list we intend to return (in the caller's context) while + * doing the scan. After successfully completing the scan, we copy that + * list into the relcache entry. This avoids cache-context memory leakage + * if we get some sort of error partway through. + */ + result = NIL; + + /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_starelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + indrel = heap_open(StatisticExtRelationId, AccessShareLock); + indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(htup = systable_getnext(indscan))) + /* TODO maybe include only already built statistics? */ + result = insert_ordered_oid(result, HeapTupleGetOid(htup)); + + systable_endscan(indscan); + + heap_close(indrel, AccessShareLock); + + /* Now save a copy of the completed list in the relcache entry. */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + oldlist = relation->rd_statlist; + relation->rd_statlist = list_copy(result); + + relation->rd_statvalid = true; + MemoryContextSwitchTo(oldcxt); + + /* Don't leak the old list, if there is one */ + list_free(oldlist); + + return result; +} + +/* * insert_ordered_oid * Insert a new Oid into a sorted list of Oids, preserving ordering * @@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared) rel->rd_pkattr = NULL; rel->rd_idattr = NULL; rel->rd_pubactions = NULL; + rel->rd_statvalid = false; + rel->rd_statlist = NIL; rel->rd_createSubid = InvalidSubTransactionId; rel->rd_newRelfilenodeSubid = InvalidSubTransactionId; rel->rd_amcache = NULL; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index b1c0b4b..4a9cb76 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -61,6 +61,7 @@ #include "catalog/pg_shseclabel.h" #include "catalog/pg_replication_origin.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_subscription.h" #include "catalog/pg_tablespace.h" #include "catalog/pg_transform.h" @@ -725,6 +726,28 @@ static const struct cachedesc cacheinfo[] = { }, 32 }, + {StatisticExtRelationId, /* STATEXTNAMENSP */ + StatisticExtNameIndexId, + 2, + { + Anum_pg_statistic_ext_staname, + Anum_pg_statistic_ext_stanamespace, + 0, + 0 + }, + 4 + }, + {StatisticExtRelationId, /* STATEXTOID */ + StatisticExtOidIndexId, + 1, + { + ObjectIdAttributeNumber, + 0, + 0, + 0 + }, + 4 + }, {StatisticRelationId, /* STATRELATTINH */ StatisticRelidAttnumInhIndexId, 3, diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c index 89530a9..e2bc357 100644 --- a/src/bin/pg_dump/common.c +++ b/src/bin/pg_dump/common.c @@ -273,6 +273,10 @@ getSchemaData(Archive *fout, int *numTablesPtr) getIndexes(fout, tblinfo, numTables); if (g_verbose) + write_msg(NULL, "reading extended statistics\n"); + getExtendedStatistics(fout, tblinfo, numTables); + + if (g_verbose) write_msg(NULL, "reading constraints\n"); getConstraints(fout, tblinfo, numTables); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 734373b..1e233c0 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3535,7 +3535,8 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData, bool acl_pass) strcmp(te->desc, "TRIGGER") == 0 || strcmp(te->desc, "ROW SECURITY") == 0 || strcmp(te->desc, "POLICY") == 0 || - strcmp(te->desc, "USER MAPPING") == 0) + strcmp(te->desc, "USER MAPPING") == 0 || + strcmp(te->desc, "STATISTICS") == 0) { /* these object types don't have separate owners */ } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e67171d..c16c2d0 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -190,6 +190,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo); static void dumpSequence(Archive *fout, TableInfo *tbinfo); static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, IndxInfo *indxinfo); +static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo); static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo); @@ -6555,6 +6556,100 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables) } /* + * getExtendedStatistics + * get information about extended statistics on a dumpable table + * or materialized view. + * + * Note: extended statistics data is not returned directly to the caller, but + * it does get entered into the DumpableObject tables. + */ +void +getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables) +{ + int i, + j; + PQExpBuffer query; + PGresult *res; + StatsExtInfo *statsextinfo; + int ntups; + int i_tableoid; + int i_oid; + int i_staname; + int i_stadef; + + /* Extended statistics were new in v10 */ + if (fout->remoteVersion < 100000) + return; + + query = createPQExpBuffer(); + + for (i = 0; i < numTables; i++) + { + TableInfo *tbinfo = &tblinfo[i]; + + /* Only plain tables and materialized views can have extended statistics. */ + /* XXX ensure this is true. It was broken in v25 0002 */ + if (tbinfo->relkind != RELKIND_RELATION && + tbinfo->relkind != RELKIND_MATVIEW) + continue; + + /* + * Ignore extended statistics of tables whose definitions are not to + * be dumped. + */ + if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)) + continue; + + if (g_verbose) + write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n", + tbinfo->dobj.namespace->dobj.name, + tbinfo->dobj.name); + + /* Make sure we are in proper schema so stadef is right */ + selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name); + + resetPQExpBuffer(query); + + appendPQExpBuffer(query, + "SELECT " + "tableoid, " + "oid, " + "staname, " + "pg_catalog.pg_get_statisticsextdef(oid) AS stadef " + "FROM pg_statistic_ext " + "WHERE starelid = '%u' " + "ORDER BY staname", tbinfo->dobj.catId.oid); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + ntups = PQntuples(res); + + i_tableoid = PQfnumber(res, "tableoid"); + i_oid = PQfnumber(res, "oid"); + i_staname = PQfnumber(res, "staname"); + i_stadef = PQfnumber(res, "stadef"); + + statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo)); + + for (j = 0; j < ntups; j++) + { + statsextinfo[j].dobj.objType = DO_STATSEXT; + statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid)); + statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid)); + AssignDumpId(&statsextinfo[j].dobj); + statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname)); + statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace; + statsextinfo[j].statsexttable = tbinfo; + statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef)); + } + + PQclear(res); + } + + destroyPQExpBuffer(query); +} + +/* * getConstraints * * Get info about constraints on dumpable tables. @@ -9206,6 +9301,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) case DO_INDEX: dumpIndex(fout, (IndxInfo *) dobj); break; + case DO_STATSEXT: + dumpStatisticsExt(fout, (StatsExtInfo *) dobj); + break; case DO_REFRESH_MATVIEW: refreshMatViewData(fout, (TableDataInfo *) dobj); break; @@ -15646,6 +15744,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo) } /* + * dumpStatisticsExt + * write out to fout an extended statistics object + */ +static void +dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo) +{ + DumpOptions *dopt = fout->dopt; + TableInfo *tbinfo = statsextinfo->statsexttable; + PQExpBuffer q; + PQExpBuffer delq; + PQExpBuffer labelq; + + if (dopt->dataOnly) + return; + + q = createPQExpBuffer(); + delq = createPQExpBuffer(); + labelq = createPQExpBuffer(); + + appendPQExpBuffer(labelq, "STATISTICS %s", + fmtId(statsextinfo->dobj.name)); + + appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef); + + appendPQExpBuffer(delq, "DROP STATISTICS %s.", + fmtId(tbinfo->dobj.namespace->dobj.name)); + appendPQExpBuffer(delq, "%s;\n", + fmtId(statsextinfo->dobj.name)); + + if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION) + ArchiveEntry(fout, statsextinfo->dobj.catId, + statsextinfo->dobj.dumpId, + statsextinfo->dobj.name, + tbinfo->dobj.namespace->dobj.name, + NULL, + tbinfo->rolname, false, + "STATISTICS", SECTION_POST_DATA, + q->data, delq->data, NULL, + NULL, 0, + NULL, NULL); + + /* Dump Statistics Comments */ + if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT) + dumpComment(fout, labelq->data, + tbinfo->dobj.namespace->dobj.name, + tbinfo->rolname, + statsextinfo->dobj.catId, 0, + statsextinfo->dobj.dumpId); + + destroyPQExpBuffer(q); + destroyPQExpBuffer(delq); + destroyPQExpBuffer(labelq); +} + +/* * dumpConstraint * write out to fout a user-defined constraint */ @@ -17183,6 +17336,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, addObjectDependency(postDataBound, dobj->dumpId); break; case DO_INDEX: + case DO_STATSEXT: case DO_REFRESH_MATVIEW: case DO_TRIGGER: case DO_EVENT_TRIGGER: diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index a466527..cb22f63 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -56,6 +56,7 @@ typedef enum DO_TABLE, DO_ATTRDEF, DO_INDEX, + DO_STATSEXT, DO_RULE, DO_TRIGGER, DO_CONSTRAINT, @@ -362,6 +363,13 @@ typedef struct _indxInfo int relpages; /* relpages of the underlying table */ } IndxInfo; +typedef struct _statsExtInfo +{ + DumpableObject dobj; + TableInfo *statsexttable; /* link to table the stats ext is for */ + char *statsextdef; +} StatsExtInfo; + typedef struct _ruleInfo { DumpableObject dobj; @@ -682,6 +690,7 @@ extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables); extern InhInfo *getInherits(Archive *fout, int *numInherits); extern PartInfo *getPartitions(Archive *fout, int *numPartitions); extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables); +extern void getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables); extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables); extern RuleInfo *getRules(Archive *fout, int *numRules); extern void getTriggers(Archive *fout, TableInfo tblinfo[], int numTables); diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index e555de8..5c19b05 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -53,10 +53,11 @@ static const int dbObjectTypePriority[] = 18, /* DO_TABLE */ 20, /* DO_ATTRDEF */ 28, /* DO_INDEX */ - 29, /* DO_RULE */ - 30, /* DO_TRIGGER */ + 29, /* DO_STATSEXT */ + 30, /* DO_RULE */ + 31, /* DO_TRIGGER */ 27, /* DO_CONSTRAINT */ - 31, /* DO_FK_CONSTRAINT */ + 32, /* DO_FK_CONSTRAINT */ 2, /* DO_PROCLANG */ 10, /* DO_CAST */ 23, /* DO_TABLE_DATA */ @@ -1291,6 +1292,11 @@ describeDumpableObject(DumpableObject *obj, char *buf, int bufsize) "INDEX %s (ID %d OID %u)", obj->name, obj->dumpId, obj->catId.oid); return; + case DO_STATSEXT: + snprintf(buf, bufsize, + "STATISTICS %s (ID %d OID %u)", + obj->name, obj->dumpId, obj->catId.oid); + return; case DO_REFRESH_MATVIEW: snprintf(buf, bufsize, "REFRESH MATERIALIZED VIEW %s (ID %d OID %u)", diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 61a3e2a..aaae9bc 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2320,6 +2320,56 @@ describeOneTableDetails(const char *schemaname, PQclear(result); } + /* print any extended statistics */ + if (pset.sversion >= 100000) + { + printfPQExpBuffer(&buf, + "SELECT oid, stanamespace::regnamespace AS nsp, staname, stakeys,\n" + " (SELECT string_agg(attname::text,', ') \n" + " FROM ((SELECT unnest(stakeys) AS attnum) s\n" + " JOIN pg_attribute a ON (starelid = a.attrelid AND\n" + "a.attnum = s.attnum AND not attisdropped))) AS columns,\n" + " (staenabled::char[] @> '{d}'::char[]) AS ndist_enabled\n" + "FROM pg_statistic_ext stat WHERE starelid = '%s' ORDER BY 1;", + oid); + + result = PSQLexec(buf.data); + if (!result) + goto error_return; + else + tuples = PQntuples(result); + + if (tuples > 0) + { + printTableAddFooter(&cont, _("Statistics:")); + + for (i = 0; i < tuples; i++) + { + int cnt = 0; + + printfPQExpBuffer(&buf, " "); + + /* statistics name (qualified with namespace) */ + appendPQExpBuffer(&buf, "\"%s.%s\" WITH (", + PQgetvalue(result, i, 1), + PQgetvalue(result, i, 2)); + + /* options */ + if (strcmp(PQgetvalue(result, i, 5), "t") == 0) + { + appendPQExpBufferStr(&buf, "ndistinct"); + cnt++; + } + + appendPQExpBuffer(&buf, ") ON (%s)", + PQgetvalue(result, i, 4)); + + printTableAddFooter(&cont, buf.data); + } + } + PQclear(result); + } + /* print rules */ if (tableinfo.hasrules && tableinfo.relkind != RELKIND_MATVIEW) { diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 10759c7..9effbce 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -147,6 +147,7 @@ typedef enum ObjectClass OCLASS_REWRITE, /* pg_rewrite */ OCLASS_TRIGGER, /* pg_trigger */ OCLASS_SCHEMA, /* pg_namespace */ + OCLASS_STATISTIC_EXT, /* pg_statistic_ext */ OCLASS_TSPARSER, /* pg_ts_parser */ OCLASS_TSDICT, /* pg_ts_dict */ OCLASS_TSTEMPLATE, /* pg_ts_template */ diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index 1187797..473fe17 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -119,6 +119,7 @@ extern void RemoveAttrDefault(Oid relid, AttrNumber attnum, DropBehavior behavior, bool complain, bool internal); extern void RemoveAttrDefaultById(Oid attrdefId); extern void RemoveStatistics(Oid relid, AttrNumber attnum); +extern void RemoveStatisticsExt(Oid relid, AttrNumber attnum); extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno, bool relhasoids); diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h index 6bce732..8130581 100644 --- a/src/include/catalog/indexing.h +++ b/src/include/catalog/indexing.h @@ -182,6 +182,13 @@ DECLARE_UNIQUE_INDEX(pg_largeobject_loid_pn_index, 2683, on pg_largeobject using DECLARE_UNIQUE_INDEX(pg_largeobject_metadata_oid_index, 2996, on pg_largeobject_metadata using btree(oid oid_ops)); #define LargeObjectMetadataOidIndexId 2996 +DECLARE_UNIQUE_INDEX(pg_statistic_ext_oid_index, 3380, on pg_statistic_ext using btree(oid oid_ops)); +#define StatisticExtOidIndexId 3380 +DECLARE_UNIQUE_INDEX(pg_statistic_ext_name_index, 3997, on pg_statistic_ext using btree(staname name_ops, stanamespace oid_ops)); +#define StatisticExtNameIndexId 3997 +DECLARE_INDEX(pg_statistic_ext_relid_index, 3379, on pg_statistic_ext using btree(starelid oid_ops)); +#define StatisticExtRelidIndexId 3379 + DECLARE_UNIQUE_INDEX(pg_namespace_nspname_index, 2684, on pg_namespace using btree(nspname name_ops)); #define NamespaceNameIndexId 2684 DECLARE_UNIQUE_INDEX(pg_namespace_oid_index, 2685, on pg_namespace using btree(oid oid_ops)); diff --git a/src/include/catalog/namespace.h b/src/include/catalog/namespace.h index dbeb25b..35e0e2b 100644 --- a/src/include/catalog/namespace.h +++ b/src/include/catalog/namespace.h @@ -141,6 +141,8 @@ extern Oid get_collation_oid(List *collname, bool missing_ok); extern Oid get_conversion_oid(List *conname, bool missing_ok); extern Oid FindDefaultConversionProc(int32 for_encoding, int32 to_encoding); +extern Oid get_statistics_oid(List *names, bool missing_ok); + /* initialization & transaction cleanup code */ extern void InitializeSearchPath(void); extern void AtEOXact_Namespace(bool isCommit, bool parallel); diff --git a/src/include/catalog/pg_cast.h b/src/include/catalog/pg_cast.h index ce8dc59..bc5d28a 100644 --- a/src/include/catalog/pg_cast.h +++ b/src/include/catalog/pg_cast.h @@ -254,6 +254,10 @@ DATA(insert ( 23 18 78 e f )); /* pg_node_tree can be coerced to, but not from, text */ DATA(insert ( 194 25 0 i b )); +/* pg_ndistinct can be coerced to, but not from, bytea and text */ +DATA(insert ( 3361 17 0 i b )); +DATA(insert ( 3361 25 0 i i )); + /* * Datetime category */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 836d6ff..84d8475 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -1983,6 +1983,8 @@ DESCR("select statement of a view"); DATA(insert OID = 1642 ( pg_get_userbyid PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 19 "26" _null_ _null_ _null_ _null_ _null_ pg_get_userbyid _null_ _null_ _null_ )); DESCR("role name by OID (with fallback)"); DATA(insert OID = 1643 ( pg_get_indexdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_indexdef _null_ _null_ _null_ )); +DESCR("extended statistics description"); +DATA(insert OID = 3415 ( pg_get_statisticsextdef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_statisticsextdef _null_ _null_ _null_ )); DESCR("index description"); DATA(insert OID = 3352 ( pg_get_partkeydef PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 25 "26" _null_ _null_ _null_ _null_ _null_ pg_get_partkeydef _null_ _null_ _null_ )); DESCR("partition key description"); @@ -2755,6 +2757,15 @@ DESCR("current user privilege on any column by rel name"); DATA(insert OID = 3029 ( has_any_column_privilege PGNSP PGUID 12 10 0 0 0 f f f f t f s s 2 0 16 "26 25" _null_ _null_ _null_ _null_ _null_ has_any_column_privilege_id _null_ _null_ _null_ )); DESCR("current user privilege on any column by rel oid"); +DATA(insert OID = 3355 ( pg_ndistinct_in PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3361 "2275" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_in _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3356 ( pg_ndistinct_out PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 2275 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_out _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3357 ( pg_ndistinct_recv PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 3361 "2281" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_recv _null_ _null_ _null_ )); +DESCR("I/O"); +DATA(insert OID = 3358 ( pg_ndistinct_send PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 17 "3361" _null_ _null_ _null_ _null_ _null_ pg_ndistinct_send _null_ _null_ _null_ )); +DESCR("I/O"); + DATA(insert OID = 1928 ( pg_stat_get_numscans PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_numscans _null_ _null_ _null_ )); DESCR("statistics: number of scans done for table/index"); DATA(insert OID = 1929 ( pg_stat_get_tuples_returned PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_tuples_returned _null_ _null_ _null_ )); diff --git a/src/include/catalog/pg_statistic_ext.h b/src/include/catalog/pg_statistic_ext.h new file mode 100644 index 0000000..49b7337 --- /dev/null +++ b/src/include/catalog/pg_statistic_ext.h @@ -0,0 +1,74 @@ +/*------------------------------------------------------------------------- + * + * pg_statistic_ext.h + * definition of the system "extended statistic" relation (pg_statistic_ext) + * along with the relation's initial contents. + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/catalog/pg_statistic_ext.h + * + * NOTES + * the genbki.pl script reads this file and generates .bki + * information from the DATA() statements. + * + *------------------------------------------------------------------------- + */ +#ifndef PG_STATISTIC_EXT_H +#define PG_STATISTIC_EXT_H + +#include "catalog/genbki.h" + +/* ---------------- + * pg_statistic_ext definition. cpp turns this into + * typedef struct FormData_pg_statistic_ext + * ---------------- + */ +#define StatisticExtRelationId 3381 + +CATALOG(pg_statistic_ext,3381) +{ + /* These fields form the unique key for the entry: */ + Oid starelid; /* relation containing attributes */ + NameData staname; /* statistics name */ + Oid stanamespace; /* OID of namespace containing this statistics */ + Oid staowner; /* statistics owner */ + + /* + * variable-length fields start here, but we allow direct access to + * stakeys + */ + int2vector stakeys; /* array of column keys */ + +#ifdef CATALOG_VARLEN + char staenabled[1] BKI_FORCE_NOT_NULL; /* statistics requested to build */ + pg_ndistinct standistinct; /* ndistinct coefficients (serialized) */ +#endif + +} FormData_pg_statistic_ext; + +/* ---------------- + * Form_pg_statistic_ext corresponds to a pointer to a tuple with + * the format of pg_statistic_ext relation. + * ---------------- + */ +typedef FormData_pg_statistic_ext *Form_pg_statistic_ext; + +/* ---------------- + * compiler constants for pg_statistic_ext + * ---------------- + */ +#define Natts_pg_statistic_ext 7 +#define Anum_pg_statistic_ext_starelid 1 +#define Anum_pg_statistic_ext_staname 2 +#define Anum_pg_statistic_ext_stanamespace 3 +#define Anum_pg_statistic_ext_staowner 4 +#define Anum_pg_statistic_ext_stakeys 5 +#define Anum_pg_statistic_ext_staenabled 6 +#define Anum_pg_statistic_ext_standistinct 7 + +#define STATS_EXT_NDISTINCT 'd' + +#endif /* PG_STATISTIC_EXT_H */ diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 9f61238..9ad6725 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -364,6 +364,10 @@ DATA(insert OID = 194 ( pg_node_tree PGNSP PGUID -1 f b S f t \054 0 0 0 pg_node DESCR("string representing an internal node tree"); #define PGNODETREEOID 194 +DATA(insert OID = 3361 ( pg_ndistinct PGNSP PGUID -1 f b S f t \054 0 0 0 pg_ndistinct_in pg_ndistinct_out pg_ndistinct_recv pg_ndistinct_send - - - i x f 0 -1 0 100 _null_ _null_ _null_ )); +DESCR("multivariate ndistinct coefficients"); +#define PGNDISTINCTOID 3361 + DATA(insert OID = 32 ( pg_ddl_command PGNSP PGUID SIZEOF_POINTER t p P f t \054 0 0 0 pg_ddl_command_in pg_ddl_command_out pg_ddl_command_recv pg_ddl_command_send - - - ALIGNOF_POINTER p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("internal type for passing CollectedCommand"); #define PGDDLCOMMANDOID 32 diff --git a/src/include/catalog/toasting.h b/src/include/catalog/toasting.h index db7f145..00d0a83 100644 --- a/src/include/catalog/toasting.h +++ b/src/include/catalog/toasting.h @@ -53,6 +53,7 @@ DECLARE_TOAST(pg_proc, 2836, 2837); DECLARE_TOAST(pg_rewrite, 2838, 2839); DECLARE_TOAST(pg_seclabel, 3598, 3599); DECLARE_TOAST(pg_statistic, 2840, 2841); +DECLARE_TOAST(pg_statistic_ext, 3439, 3440); DECLARE_TOAST(pg_trigger, 2336, 2337); /* shared catalogs */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 8740cee..c323e81 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -77,6 +77,10 @@ extern ObjectAddress DefineOperator(List *names, List *parameters); extern void RemoveOperatorById(Oid operOid); extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt); +/* commands/statscmds.c */ +extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt); +extern void RemoveStatisticsById(Oid statsOid); + /* commands/aggregatecmds.c */ extern ObjectAddress DefineAggregate(ParseState *pstate, List *name, List *args, bool oldstyle, List *parameters); diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 2bc7a5d..d269e77 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -278,6 +278,7 @@ typedef enum NodeTag T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, + T_StatisticExtInfo, /* * TAGS FOR MEMORY NODES (memnodes.h) @@ -423,6 +424,7 @@ typedef enum NodeTag T_CreateSubscriptionStmt, T_AlterSubscriptionStmt, T_DropSubscriptionStmt, + T_CreateStatsStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index d576523..62679f4 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1593,6 +1593,7 @@ typedef enum ObjectType OBJECT_SCHEMA, OBJECT_SEQUENCE, OBJECT_SUBSCRIPTION, + OBJECT_STATISTIC_EXT, OBJECT_TABCONSTRAINT, OBJECT_TABLE, OBJECT_TABLESPACE, @@ -2644,6 +2645,20 @@ typedef struct IndexStmt } IndexStmt; /* ---------------------- + * Create Statistics Statement + * ---------------------- + */ +typedef struct CreateStatsStmt +{ + NodeTag type; + List *defnames; /* qualified name (list of Value strings) */ + RangeVar *relation; /* relation to build statistics on */ + List *keys; /* String nodes naming referenced columns */ + List *options; /* list of DefElem */ + bool if_not_exists; /* do nothing if statistics already exists */ +} CreateStatsStmt; + +/* ---------------------- * Create Function Statement * ---------------------- */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 05d6f07..926ccc8 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -525,6 +525,7 @@ typedef struct RelOptInfo List *lateral_vars; /* LATERAL Vars and PHVs referenced by rel */ Relids lateral_referencers; /* rels that reference me laterally */ List *indexlist; /* list of IndexOptInfo */ + List *statlist; /* list of StatisticExtInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; @@ -664,6 +665,24 @@ typedef struct ForeignKeyOptInfo List *rinfos[INDEX_MAX_KEYS]; } ForeignKeyOptInfo; +/* + * StatisticExtInfo + * Information about extended statistics for planning/optimization + * + * This contains information about which columns are covered by the + * statistics (stakeys), which options were requested while adding the + * statistics (*_enabled), and which kinds of statistics were actually + * built and are available for the optimizer (*_built). + */ +typedef struct StatisticExtInfo +{ + NodeTag type; + + Oid statOid; /* OID of the statistics row */ + RelOptInfo *rel; /* back-link to index's table */ + char kind; /* statistic kind of this entry */ + int2vector *keys; /* attnums of the columns covered */ +} StatisticExtInfo; /* * EquivalenceClasses diff --git a/src/include/statistics/stat_ext_internal.h b/src/include/statistics/stat_ext_internal.h new file mode 100644 index 0000000..7058b36 --- /dev/null +++ b/src/include/statistics/stat_ext_internal.h @@ -0,0 +1,68 @@ +/*------------------------------------------------------------------------- + * + * stat_ext_internal.h + * POSTGRES extended statistics internal declarations + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/include/statistics/stat_ext_internal.h + * + *------------------------------------------------------------------------- + */ +#ifndef STAT_EXT_INTERNAL_H +#define STAT_EXT_INTERNAL_H + +#include "utils/sortsupport.h" +#include "statistics/stats.h" + + +typedef struct +{ + Oid eqopr; /* '=' operator for datatype, if any */ + Oid eqfunc; /* and associated function */ + Oid ltopr; /* '<' operator for datatype, if any */ +} StdAnalyzeData; + +typedef struct +{ + Datum value; /* a data value */ + int tupno; /* position index for tuple it came from */ +} ScalarItem; + +/* multi-sort */ +typedef struct MultiSortSupportData +{ + int ndims; /* number of dimensions supported by the */ + SortSupportData ssup[1]; /* sort support data for each dimension */ +} MultiSortSupportData; + +typedef MultiSortSupportData *MultiSortSupport; + +typedef struct SortItem +{ + Datum *values; + bool *isnull; +} SortItem; + +extern MVNDistinct statext_ndistinct_build(double totalrows, + int numrows, HeapTuple *rows, + int2vector *attrs, VacAttrStats **stats); +extern bytea *statext_ndistinct_serialize(MVNDistinct ndistinct); +extern MVNDistinct statext_ndistinct_deserialize(bytea *data); + +extern MultiSortSupport multi_sort_init(int ndims); +extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, + int dim, VacAttrStats **vacattrstats); +extern int multi_sort_compare(const void *a, const void *b, void *arg); +extern int multi_sort_compare_dim(int dim, const SortItem * a, + const SortItem * b, MultiSortSupport mss); +extern int multi_sort_compare_dims(int start, int end, const SortItem * a, + const SortItem * b, MultiSortSupport mss); + +/* comparators, used when constructing extended stats */ +extern int compare_scalars_simple(const void *a, const void *b, void *arg); +extern int compare_scalars_partition(const void *a, const void *b, void *arg); + +#endif /* STAT_EXT_INTERNAL_H */ diff --git a/src/include/statistics/stats.h b/src/include/statistics/stats.h new file mode 100644 index 0000000..1ec9569 --- /dev/null +++ b/src/include/statistics/stats.h @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------- + * + * stats.h + * Extended statistics and selectivity estimation functions. + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/statistics/stats.h + * + *------------------------------------------------------------------------- + */ +#ifndef STATS_H +#define STATS_H + +#include "commands/vacuum.h" + +#define STATS_MAX_DIMENSIONS 8 /* max number of attributes */ + +#define STATS_NDISTINCT_MAGIC 0xA352BFA4 /* marks serialized bytea */ +#define STATS_NDISTINCT_TYPE_BASIC 1 /* basic MCV list type */ + +/* Multivariate distinct coefficients. */ +typedef struct MVNDistinctItem +{ + double ndistinct; + AttrNumber nattrs; + AttrNumber *attrs; +} MVNDistinctItem; + +typedef struct MVNDistinctData +{ + uint32 magic; /* magic constant marker */ + uint32 type; /* type of ndistinct (BASIC) */ + uint32 nitems; /* number of items in the statistic */ + MVNDistinctItem items[FLEXIBLE_ARRAY_MEMBER]; +} MVNDistinctData; + +typedef MVNDistinctData *MVNDistinct; + +extern MVNDistinct statext_ndistinct_load(Oid mvoid); + +extern void BuildRelationExtStatistics(Relation onerel, double totalrows, + int numrows, HeapTuple *rows, + int natts, VacAttrStats **vacattrstats); +extern bool stats_are_enabled(HeapTuple htup, char type); +extern bool statext_is_kind_built(HeapTuple htup, char kind); + +#endif /* STATS_H */ diff --git a/src/include/utils/acl.h b/src/include/utils/acl.h index 0d11852..c957d8e 100644 --- a/src/include/utils/acl.h +++ b/src/include/utils/acl.h @@ -192,6 +192,7 @@ typedef enum AclObjectKind ACL_KIND_OPFAMILY, /* pg_opfamily */ ACL_KIND_COLLATION, /* pg_collation */ ACL_KIND_CONVERSION, /* pg_conversion */ + ACL_KIND_STATISTICS, /* pg_statistic_ext */ ACL_KIND_TABLESPACE, /* pg_tablespace */ ACL_KIND_TSDICTIONARY, /* pg_ts_dict */ ACL_KIND_TSCONFIGURATION, /* pg_ts_config */ @@ -326,6 +327,7 @@ extern bool pg_event_trigger_ownercheck(Oid et_oid, Oid roleid); extern bool pg_extension_ownercheck(Oid ext_oid, Oid roleid); extern bool pg_publication_ownercheck(Oid pub_oid, Oid roleid); extern bool pg_subscription_ownercheck(Oid sub_oid, Oid roleid); +extern bool pg_statistics_ownercheck(Oid stat_oid, Oid roleid); extern bool has_createrole_privilege(Oid roleid); extern bool has_bypassrls_privilege(Oid roleid); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index a617a7c..ab875bb 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -92,6 +92,7 @@ typedef struct RelationData bool rd_isvalid; /* relcache entry is valid */ char rd_indexvalid; /* state of rd_indexlist: 0 = not valid, 1 = * valid, 2 = temporarily forced */ + bool rd_statvalid; /* is rd_statlist valid? */ /* * rd_createSubid is the ID of the highest subtransaction the rel has @@ -136,6 +137,9 @@ typedef struct RelationData Oid rd_pkindex; /* OID of primary key, if any */ Oid rd_replidindex; /* OID of replica identity index, if any */ + /* data managed by RelationGetStatExtList: */ + List *rd_statlist; /* list of OIDs of extended stats */ + /* data managed by RelationGetIndexAttrBitmap: */ Bitmapset *rd_indexattr; /* identifies columns used in indexes */ Bitmapset *rd_keyattr; /* cols that can be ref'd by foreign keys */ diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index da36b67..81af3ae 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -39,6 +39,7 @@ extern void RelationClose(Relation relation); */ extern List *RelationGetFKeyList(Relation relation); extern List *RelationGetIndexList(Relation relation); +extern List *RelationGetStatExtList(Relation relation); extern Oid RelationGetOidIndex(Relation relation); extern Oid RelationGetPrimaryKeyIndex(Relation relation); extern Oid RelationGetReplicaIndex(Relation relation); diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index 66f60d2..048541e 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -86,6 +86,8 @@ enum SysCacheIdentifier PUBLICATIONRELMAP, RULERELNAME, SEQRELID, + STATEXTNAMENSP, + STATEXTOID, STATRELATTINH, SUBSCRIPTIONOID, SUBSCRIPTIONNAME, diff --git a/src/test/regress/expected/alter_generic.out b/src/test/regress/expected/alter_generic.out index b01be59..ce581bb 100644 --- a/src/test/regress/expected/alter_generic.out +++ b/src/test/regress/expected/alter_generic.out @@ -497,6 +497,48 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); ERROR: function 2(integer,integer) does not exist in operator family "alt_opf18" DROP OPERATOR FAMILY alt_opf18 USING btree; -- +-- Statistics +-- +SET SESSION AUTHORIZATION regress_alter_user1; +CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER); +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; +ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict) +ERROR: statistics "alt_stat2" already exists in schema "alt_nsp1" +ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership) +ERROR: must be member of role "regress_alter_user2" +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK +SET SESSION AUTHORIZATION regress_alter_user2; +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; +ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK +ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership) +ERROR: must be member of role "regress_alter_user3" +ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner) +ERROR: must be owner of statistics alt_stat3 +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict) +ERROR: statistics "alt_stat2" already exists in schema "alt_nsp2" +RESET SESSION AUTHORIZATION; +SELECT nspname, staname, rolname + FROM pg_statistic_ext s, pg_namespace n, pg_authid a + WHERE s.stanamespace = n.oid AND s.staowner = a.oid + AND n.nspname in ('alt_nsp1', 'alt_nsp2') + ORDER BY nspname, staname; + nspname | staname | rolname +----------+-----------+--------------------- + alt_nsp1 | alt_stat2 | regress_alter_user2 + alt_nsp1 | alt_stat3 | regress_alter_user1 + alt_nsp1 | alt_stat4 | regress_alter_user2 + alt_nsp2 | alt_stat2 | regress_alter_user3 +(4 rows) + +-- -- Text Search Dictionary -- SET SESSION AUTHORIZATION regress_alter_user1; @@ -639,7 +681,7 @@ DROP LANGUAGE alt_lang3 CASCADE; DROP LANGUAGE alt_lang4 CASCADE; ERROR: language "alt_lang4" does not exist DROP SCHEMA alt_nsp1 CASCADE; -NOTICE: drop cascades to 26 other objects +NOTICE: drop cascades to 27 other objects DETAIL: drop cascades to function alt_func3(integer) drop cascades to function alt_agg3(integer) drop cascades to function alt_func4(integer) @@ -656,6 +698,7 @@ drop cascades to operator family alt_opc1 for access method hash drop cascades to operator family alt_opc2 for access method hash drop cascades to operator family alt_opf4 for access method hash drop cascades to operator family alt_opf2 for access method hash +drop cascades to table alt_regress_1 drop cascades to text search dictionary alt_ts_dict3 drop cascades to text search dictionary alt_ts_dict4 drop cascades to text search dictionary alt_ts_dict2 diff --git a/src/test/regress/expected/object_address.out b/src/test/regress/expected/object_address.out index 90c4ba4..145a530 100644 --- a/src/test/regress/expected/object_address.out +++ b/src/test/regress/expected/object_address.out @@ -38,6 +38,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( TO SQL WITH FUNCTION int4recv(internal)); CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCREATE SLOT); +CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable; -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); ERROR: unrecognized object type "stone" @@ -408,7 +409,8 @@ WITH objects (type, name, args) AS (VALUES ('access method', '{btree}', '{}'), ('publication', '{addr_pub}', '{}'), ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'), - ('subscription', '{addr_sub}', '{}') + ('subscription', '{addr_sub}', '{}'), + ('statistics', '{addr_nsp, gentable_stat}', '{}') ) SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, -- test roundtrip through pg_identify_object_as_address @@ -456,6 +458,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, trigger | | | t on addr_nsp.gentable | t operator family | pg_catalog | integer_ops | pg_catalog.integer_ops USING btree | t policy | | | genpol on addr_nsp.gentable | t + statistics | addr_nsp | gentable_stat | addr_nsp.gentable_stat | t collation | pg_catalog | "default" | pg_catalog."default" | t transform | | | for integer on language sql | t text search dictionary | addr_nsp | addr_ts_dict | addr_nsp.addr_ts_dict | t @@ -465,7 +468,7 @@ SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, subscription | | addr_sub | addr_sub | t publication | | addr_pub | addr_pub | t publication relation | | | gentable in publication addr_pub | t -(45 rows) +(46 rows) --- --- Cleanup resources diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 64d9dd6..262036a 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -823,11 +823,12 @@ WHERE c.castmethod = 'b' AND text | character | 0 | i character varying | character | 0 | i pg_node_tree | text | 0 | i + pg_ndistinct | bytea | 0 | i cidr | inet | 0 | i xml | text | 0 | a xml | character varying | 0 | a xml | character | 0 | a -(7 rows) +(8 rows) -- **************** pg_conversion **************** -- Look for illegal values in pg_conversion fields. diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index bd13ae6..d4b2158 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2160,6 +2160,14 @@ pg_stats| SELECT n.nspname AS schemaname, JOIN pg_attribute a ON (((c.oid = a.attrelid) AND (a.attnum = s.staattnum)))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE ((NOT a.attisdropped) AND has_column_privilege(c.oid, a.attnum, 'select'::text) AND ((c.relrowsecurity = false) OR (NOT row_security_active(c.oid)))); +pg_stats_ext| SELECT n.nspname AS schemaname, + c.relname AS tablename, + s.staname, + s.stakeys AS attnums, + length((s.standistinct)::text) AS ndistbytes + FROM ((pg_statistic_ext s + JOIN pg_class c ON ((c.oid = s.starelid))) + LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))); pg_tables| SELECT n.nspname AS schemaname, c.relname AS tablename, pg_get_userbyid(c.relowner) AS tableowner, diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out index b5eff55..9edba4f 100644 --- a/src/test/regress/expected/sanity_check.out +++ b/src/test/regress/expected/sanity_check.out @@ -142,6 +142,7 @@ pg_shdepend|t pg_shdescription|t pg_shseclabel|t pg_statistic|t +pg_statistic_ext|t pg_subscription|t pg_tablespace|t pg_transform|t diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out new file mode 100644 index 0000000..5593657 --- /dev/null +++ b/src/test/regress/expected/stats_ext.out @@ -0,0 +1,137 @@ +-- Generic extended statistics support +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS ab1_a_b_stats; +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS regress_schema_2.ab1_a_b_stats; +CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 + Table "public.ab1" + Column | Type | Collation | Nullable | Default +--------+---------+-----------+----------+--------- + b | integer | | | + c | integer | | | +Statistics: + "public.ab1_a_b_c_stats" WITH (ndistinct) ON (b, c) + +DROP TABLE ab1; +-- data type passed by value +CREATE TABLE ndistinct ( + a INT, + b INT, + c INT, + d INT +); +-- unknown column +CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; +ERROR: column "unknown_column" referenced in statistics does not exist +-- single column +CREATE STATISTICS s10 ON (a) FROM ndistinct; +ERROR: statistics require at least 2 columns +-- single column, duplicated +CREATE STATISTICS s10 ON (a,a) FROM ndistinct; +ERROR: duplicate column name in statistics definition +-- two columns, one duplicated +CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; +ERROR: duplicate column name in statistics definition +-- correct command +CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; +-- perfectly correlated groups +INSERT INTO ndistinct + SELECT i/100, i/100, i/100 FROM generate_series(1,10000) s(i); +ANALYZE ndistinct; +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + staenabled | standistinct +------------+------------------------------------------------------------------------------------- + {d} | [{0, 1, 101.000000}, {0, 2, 101.000000}, {1, 2, 101.000000}, {0, 1, 2, 101.000000}] +(1 row) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on ndistinct +(3 rows) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b, c + -> Seq Scan on ndistinct +(3 rows) + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + QUERY PLAN +----------------------------- + HashAggregate + Group Key: a, b, c, d + -> Seq Scan on ndistinct +(3 rows) + +TRUNCATE TABLE ndistinct; +-- partially correlated groups +INSERT INTO ndistinct + SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); +ANALYZE ndistinct; +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + staenabled | standistinct +------------+------------------------------------------------------------------------------------- + {d} | [{0, 1, 201.000000}, {0, 2, 201.000000}, {1, 2, 101.000000}, {0, 1, 2, 201.000000}] +(1 row) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate (cost=230.00..232.01 rows=201 width=16) + Group Key: a, b + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=255.00..257.01 rows=201 width=20) + Group Key: a, b, c + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=280.00..290.00 rows=1000 width=24) + Group Key: a, b, c, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=16) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; + QUERY PLAN +---------------------------------------------------------------------- + HashAggregate (cost=255.00..265.00 rows=1000 width=20) + Group Key: b, c, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=12) +(3 rows) + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, d; + QUERY PLAN +--------------------------------------------------------------------- + HashAggregate (cost=230.00..240.00 rows=1000 width=16) + Group Key: a, d + -> Seq Scan on ndistinct (cost=0.00..155.00 rows=10000 width=8) +(3 rows) + +DROP TABLE ndistinct; diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 8d75bbf..84022f6 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -59,7 +59,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -- Look for types that should have an array type according to their typtype, -- but don't. We exclude composites here because we have not bothered to -- make array types corresponding to the system catalogs' rowtypes. --- NOTE: as of v10, this check finds pg_node_tree and smgr. +-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr. SELECT p1.oid, p1.typname FROM pg_type as p1 WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' @@ -67,11 +67,12 @@ WHERE p1.typtype not in ('c','d','p') AND p1.typname NOT LIKE E'\\_%' (SELECT 1 FROM pg_type as p2 WHERE p2.typname = ('_' || p1.typname)::name AND p2.typelem = p1.oid and p1.typarray = p2.oid); - oid | typname ------+-------------- - 194 | pg_node_tree - 210 | smgr -(2 rows) + oid | typname +------+-------------- + 194 | pg_node_tree + 3361 | pg_ndistinct + 210 | smgr +(3 rows) -- Make sure typarray points to a varlena array type of our own base SELECT p1.oid, p1.typname as basetype, p2.typname as arraytype, diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 38743d9..c283bdc 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -89,7 +89,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview # ---------- # Another group of parallel tests # ---------- -test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan +test: alter_generic alter_operator misc psql async dbsize misc_functions sysviews tsrf tidscan stats_ext # rules cannot run concurrently with any test that creates a view test: rules psql_crosstab amutils diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule index d9f64c2..3a0d536 100644 --- a/src/test/regress/serial_schedule +++ b/src/test/regress/serial_schedule @@ -130,6 +130,7 @@ test: misc_functions test: sysviews test: tsrf test: tidscan +test: stats_ext test: rules test: psql_crosstab test: select_parallel diff --git a/src/test/regress/sql/alter_generic.sql b/src/test/regress/sql/alter_generic.sql index c9ea479..f6fa8d8 100644 --- a/src/test/regress/sql/alter_generic.sql +++ b/src/test/regress/sql/alter_generic.sql @@ -433,6 +433,37 @@ ALTER OPERATOR FAMILY alt_opf18 USING btree ADD ALTER OPERATOR FAMILY alt_opf18 USING btree DROP FUNCTION 2 (int4, int4); DROP OPERATOR FAMILY alt_opf18 USING btree; +-- +-- Statistics +-- +SET SESSION AUTHORIZATION regress_alter_user1; +CREATE TABLE alt_regress_1 (a INTEGER, b INTEGER); +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; + +ALTER STATISTICS alt_stat1 RENAME TO alt_stat2; -- failed (name conflict) +ALTER STATISTICS alt_stat1 RENAME TO alt_stat3; -- failed (name conflict) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user2; -- failed (no role membership) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- OK +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- OK + +SET SESSION AUTHORIZATION regress_alter_user2; +CREATE STATISTICS alt_stat1 ON (a, b) FROM alt_regress_1; +CREATE STATISTICS alt_stat2 ON (a, b) FROM alt_regress_1; + +ALTER STATISTICS alt_stat3 RENAME TO alt_stat4; -- failed (not owner) +ALTER STATISTICS alt_stat1 RENAME TO alt_stat4; -- OK +ALTER STATISTICS alt_stat3 OWNER TO regress_alter_user2; -- failed (not owner) +ALTER STATISTICS alt_stat2 OWNER TO regress_alter_user3; -- failed (no role membership) +ALTER STATISTICS alt_stat3 SET SCHEMA alt_nsp2; -- failed (not owner) +ALTER STATISTICS alt_stat2 SET SCHEMA alt_nsp2; -- failed (name conflict) + +RESET SESSION AUTHORIZATION; +SELECT nspname, staname, rolname + FROM pg_statistic_ext s, pg_namespace n, pg_authid a + WHERE s.stanamespace = n.oid AND s.staowner = a.oid + AND n.nspname in ('alt_nsp1', 'alt_nsp2') + ORDER BY nspname, staname; -- -- Text Search Dictionary diff --git a/src/test/regress/sql/object_address.sql b/src/test/regress/sql/object_address.sql index 6b85fe2..9d3230b 100644 --- a/src/test/regress/sql/object_address.sql +++ b/src/test/regress/sql/object_address.sql @@ -41,6 +41,7 @@ CREATE TRANSFORM FOR int LANGUAGE SQL ( TO SQL WITH FUNCTION int4recv(internal)); CREATE PUBLICATION addr_pub FOR TABLE addr_nsp.gentable; CREATE SUBSCRIPTION addr_sub CONNECTION '' PUBLICATION bar WITH (DISABLED, NOCREATE SLOT); +CREATE STATISTICS addr_nsp.gentable_stat ON (a,b) FROM addr_nsp.gentable; -- test some error cases SELECT pg_get_object_address('stone', '{}', '{}'); @@ -185,7 +186,8 @@ WITH objects (type, name, args) AS (VALUES ('access method', '{btree}', '{}'), ('publication', '{addr_pub}', '{}'), ('publication relation', '{addr_nsp, gentable}', '{addr_pub}'), - ('subscription', '{addr_sub}', '{}') + ('subscription', '{addr_sub}', '{}'), + ('statistics', '{addr_nsp, gentable_stat}', '{}') ) SELECT (pg_identify_object(addr1.classid, addr1.objid, addr1.objsubid)).*, -- test roundtrip through pg_identify_object_as_address diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql new file mode 100644 index 0000000..472d465 --- /dev/null +++ b/src/test/regress/sql/stats_ext.sql @@ -0,0 +1,86 @@ +-- Generic extended statistics support +CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER); + +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS ab1_a_b_stats; + +CREATE SCHEMA regress_schema_2; +CREATE STATISTICS regress_schema_2.ab1_a_b_stats ON (a, b) FROM ab1; +DROP STATISTICS regress_schema_2.ab1_a_b_stats; + +CREATE STATISTICS ab1_a_b_c_stats ON (a, b, c) FROM ab1; +CREATE STATISTICS ab1_a_b_stats ON (a, b) FROM ab1; +ALTER TABLE ab1 DROP COLUMN a; +\d ab1 + +DROP TABLE ab1; + + +-- data type passed by value +CREATE TABLE ndistinct ( + a INT, + b INT, + c INT, + d INT +); + +-- unknown column +CREATE STATISTICS s10 ON (unknown_column) FROM ndistinct; + +-- single column +CREATE STATISTICS s10 ON (a) FROM ndistinct; + +-- single column, duplicated +CREATE STATISTICS s10 ON (a,a) FROM ndistinct; + +-- two columns, one duplicated +CREATE STATISTICS s10 ON (a, a, b) FROM ndistinct; + +-- correct command +CREATE STATISTICS s10 ON (a, b, c) FROM ndistinct; + +-- perfectly correlated groups +INSERT INTO ndistinct + SELECT i/100, i/100, i/100 FROM generate_series(1,10000) s(i); + +ANALYZE ndistinct; + +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + +EXPLAIN (COSTS off) + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + +TRUNCATE TABLE ndistinct; + +-- partially correlated groups +INSERT INTO ndistinct + SELECT i/50, i/100, i/200 FROM generate_series(1,10000) s(i); + +ANALYZE ndistinct; + +SELECT staenabled, standistinct + FROM pg_statistic_ext WHERE starelid = 'ndistinct'::regclass; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, b, c, d; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY b, c, d; + +EXPLAIN + SELECT COUNT(*) FROM ndistinct GROUP BY a, d; + +DROP TABLE ndistinct; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index 0a31249..4c65814 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -53,7 +53,7 @@ WHERE (p1.typtype = 'c' AND p1.typrelid = 0) OR -- Look for types that should have an array type according to their typtype, -- but don't. We exclude composites here because we have not bothered to -- make array types corresponding to the system catalogs' rowtypes. --- NOTE: as of v10, this check finds pg_node_tree and smgr. +-- NOTE: as of v10, this check finds pg_node_tree, pg_ndistinct, smgr. SELECT p1.oid, p1.typname FROM pg_type as p1