From 320c0491dea628a0b56926563c94fa7044eb191c Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 24 Jan 2025 09:30:14 -0600 Subject: [PATCH v1 2/2] vacuumdb: Allow analyzing-in-stages only relations missing stats. This commit adds a required argument to --analyze-in-stages that accepts either "missing" or "all". "all" provides the same behavior that --analyze-in-stages has today. "missing" can be used to instead analyze only relations that are missing statistics. This new argument is intentionally marked as required to break any existing post-pg_upgrade vacuumdb scripts. Since stats are going to be carried over by pg_upgrade to v18 or later, running --analyze-in-stages=all is unnecessary for the vast majority of users and is likely to even hurt performance post-upgrade. XXX: I think the above paragraph changes depending on whether extended statistics are carried over by pg_upgrade. If they aren't, we should probably recommend --analyze-in-stages=missing. If they are, then we probably shouldn't recommend anything at all. Does this also change how we structure the user-facing part of this change? Note that the new "missing" mode will generate ANALYZE commands for a relation if it is missing any statistics it should ordinarily have. For example, if a table has statistics for one column but not another, we will analyze the whole table. A similar principle applies to extended statistics, expression indexes, table inheritance, and partitioned tables. XXX: Do we need to make a similar change to --analyze-only and/or --analyze (e.g., a new --missing-stats-only option)? Author: Corey Huinker --- doc/src/sgml/ref/vacuumdb.sgml | 12 +++- src/bin/scripts/t/102_vacuumdb_stages.pl | 64 ++++++++++++++++++++- src/bin/scripts/vacuumdb.c | 73 +++++++++++++++++++++++- src/test/perl/PostgreSQL/Test/Cluster.pm | 27 +++++++++ 4 files changed, 171 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index 66fccb30a2d..3ef8a52a163 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -429,7 +429,7 @@ PostgreSQL documentation - + Only calculate statistics for use by the optimizer (no vacuum), @@ -441,6 +441,8 @@ PostgreSQL documentation + When set to all, vacuumdb will calculate statistics + for all relations, even relations with existing statistics. This option is only useful to analyze a database that currently has no statistics or has wholly incorrect ones, such as if it is newly populated from a restored dump or by pg_upgrade. @@ -449,6 +451,14 @@ PostgreSQL documentation transiently worse due to the low statistics targets of the early stages. + + + When set to missing, vacuumdb will calculate + statistics only for relations that are missing statistics for a column, + index expression, or extended statistics object. This option prevents + vacuumdb from deleting existing statistics so that the query + optimizer's choices do not become transiently worse. + diff --git a/src/bin/scripts/t/102_vacuumdb_stages.pl b/src/bin/scripts/t/102_vacuumdb_stages.pl index 984c8d06de6..0f560accb46 100644 --- a/src/bin/scripts/t/102_vacuumdb_stages.pl +++ b/src/bin/scripts/t/102_vacuumdb_stages.pl @@ -12,7 +12,7 @@ $node->init; $node->start; $node->issues_sql_like( - [ 'vacuumdb', '--analyze-in-stages', 'postgres' ], + [ 'vacuumdb', '--analyze-in-stages', 'all', 'postgres' ], qr/statement:\ SET\ default_statistics_target=1;\ SET\ vacuum_cost_delay=0; .*statement:\ ANALYZE .*statement:\ SET\ default_statistics_target=10;\ RESET\ vacuum_cost_delay; @@ -21,8 +21,68 @@ $node->issues_sql_like( .*statement:\ ANALYZE/sx, 'analyze three times'); +$node->safe_psql('postgres', + 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;'); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-in-stages', '--all' ], + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with missing stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with no missing stats'); + +$node->safe_psql('postgres', + 'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));'); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with missing index expression stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with no missing index expression stats'); + +$node->safe_psql('postgres', + 'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;'); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with missing extended stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with no missing extended stats'); + +$node->safe_psql('postgres', + "CREATE TABLE regression_vacuumdb_child (a INT) INHERITS (regression_vacuumdb_test);\n" + . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n" + . "ANALYZE regression_vacuumdb_child;\n"); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with missing inherited stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with no missing inherited stats'); + +$node->safe_psql('postgres', + "CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n" + . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n" + . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n" + . "ANALYZE regression_vacuumdb_part1;\n"); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_parted', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with missing partition stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', 'missing', '-t', 'regression_vacuumdb_parted', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--analyze-in-stages=missing with no missing partition stats'); + +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', 'all', '--all' ], qr/statement:\ SET\ default_statistics_target=1;\ SET\ vacuum_cost_delay=0; .*statement:\ ANALYZE .*statement:\ SET\ default_statistics_target=1;\ SET\ vacuum_cost_delay=0; diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index 88ceb42746d..ddc01e9d723 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -61,6 +61,7 @@ typedef enum } VacObjFilter; static VacObjFilter objfilter = OBJFILTER_NONE; +static bool missing_only; static SimpleStringList *retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, @@ -123,7 +124,7 @@ main(int argc, char *argv[]) {"schema", required_argument, NULL, 'n'}, {"exclude-schema", required_argument, NULL, 'N'}, {"maintenance-db", required_argument, NULL, 2}, - {"analyze-in-stages", no_argument, NULL, 3}, + {"analyze-in-stages", required_argument, NULL, 3}, {"disable-page-skipping", no_argument, NULL, 4}, {"skip-locked", no_argument, NULL, 5}, {"min-xid-age", required_argument, NULL, 6}, @@ -246,6 +247,16 @@ main(int argc, char *argv[]) break; case 3: analyze_in_stages = vacopts.analyze_only = true; + if (strcmp(optarg, "all") == 0) + missing_only = false; + else if (strcmp(optarg, "missing") == 0) + missing_only = true; + else + { + pg_log_error("unrecognized --analyze-in-stages option: %s", + optarg); + exit(1); + } break; case 4: vacopts.disable_page_skipping = true; @@ -808,6 +819,7 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, " FROM pg_catalog.pg_class c\n" " JOIN pg_catalog.pg_namespace ns" " ON c.relnamespace OPERATOR(pg_catalog.=) ns.oid\n" + " CROSS JOIN LATERAL (SELECT c.relkind IN ('p', 'I')) as p (inherited)\n" " LEFT JOIN pg_catalog.pg_class t" " ON c.reltoastrelid OPERATOR(pg_catalog.=) t.oid\n"); @@ -891,6 +903,63 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, vacopts->min_mxid_age); } + if (missing_only) + { + appendPQExpBufferStr(&catalog_query, " AND (\n"); + + /* regular stats */ + appendPQExpBufferStr(&catalog_query, + " EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" + " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" + " AND NOT a.attisdropped\n" + " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n" + " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n"); + + /* extended stats */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n" + " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n" + " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid))\n"); + + /* expression indexes */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_index i\n" + " CROSS JOIN LATERAL pg_catalog.unnest(i.indkey) WITH ORDINALITY u (attnum, ord)\n" + " WHERE i.indrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND i.indexprs IS NOT NULL\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) i.indexrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) u.ord\n" + " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n"); + + /* table inheritance */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" + " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" + " AND NOT a.attisdropped\n" + " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND c.relhassubclass\n" + " AND NOT p.inherited\n" + " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n" + " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n" + " AND s.stainherit))\n"); + + appendPQExpBufferStr(&catalog_query, " )\n"); + } + /* * Execute the catalog query. We use the default search_path for this * query for consistency with table lookups done elsewhere by the user. @@ -1226,7 +1295,7 @@ help(const char *progname) printf(_(" -V, --version output version information, then exit\n")); printf(_(" -z, --analyze update optimizer statistics\n")); printf(_(" -Z, --analyze-only only update optimizer statistics; no vacuum\n")); - printf(_(" --analyze-in-stages only update optimizer statistics, in multiple\n" + printf(_(" --analyze-in-stages=OPTION only update optimizer statistics, in multiple\n" " stages for faster results; no vacuum\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nConnection options:\n")); diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index f521ad0b12f..d30d97aa96c 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -2802,6 +2802,33 @@ sub issues_sql_like =pod +=item $node->issues_sql_unlike(cmd, unexpected_sql, test_name) + +Run a command on the node, then verify that $unexpected_sql does not appear in +the server log file. + +=cut + +sub issues_sql_unlike +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($self, $cmd, $unexpected_sql, $test_name) = @_; + + local %ENV = $self->_get_env(); + + my $log_location = -s $self->logfile; + + my $result = PostgreSQL::Test::Utils::run_log($cmd); + ok($result, "@$cmd exit code 0"); + my $log = + PostgreSQL::Test::Utils::slurp_file($self->logfile, $log_location); + unlike($log, $unexpected_sql, "$test_name: SQL not found in server log"); + return; +} + +=pod + =item $node->log_content() Returns the contents of log of the node -- 2.39.5 (Apple Git-154)