From 26c7d17c807e5c7b77621c0412d2d56ddbcc38aa Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Tue, 4 Feb 2025 15:07:54 -0600 Subject: [PATCH v3 2/2] vacuumdb: Add option for analyzing only relations missing stats. This commit adds a new --missing-only option that can be used in conjunction with --analyze-only and --analyze-in-stages. When this option is specified, vacuumdb will generate ANALYZE commands for a relation if it is missing any statistics it should ordinarily have. For example, if a table has statistics for one column but not another, we will analyze the whole table. A similar principle applies to extended statistics, expression indexes, and table inheritance. Co-authored-by: Corey Huinker Reviewed-by: TODO Discussion: https://postgr.es/m/Z5O1bpcwDrMgyrYy%40nathan --- doc/src/sgml/ref/vacuumdb.sgml | 16 +++++ src/bin/scripts/t/102_vacuumdb_stages.pl | 60 ++++++++++++++++ src/bin/scripts/vacuumdb.c | 92 ++++++++++++++++++++++++ src/test/perl/PostgreSQL/Test/Cluster.pm | 27 +++++++ 4 files changed, 195 insertions(+) diff --git a/doc/src/sgml/ref/vacuumdb.sgml b/doc/src/sgml/ref/vacuumdb.sgml index 66fccb30a2d..5295a61f083 100644 --- a/doc/src/sgml/ref/vacuumdb.sgml +++ b/doc/src/sgml/ref/vacuumdb.sgml @@ -277,6 +277,22 @@ PostgreSQL documentation + + + + + Only analyze relations that are missing statistics for a column, index + expression, or extended statistics object. This option prevents + vacuumdb from deleting existing statistics + so that the query optimizer's choices do not become transiently worse. + + + This option can only be used in conjunction with + and . + + + + diff --git a/src/bin/scripts/t/102_vacuumdb_stages.pl b/src/bin/scripts/t/102_vacuumdb_stages.pl index 984c8d06de6..b216fb0c2c6 100644 --- a/src/bin/scripts/t/102_vacuumdb_stages.pl +++ b/src/bin/scripts/t/102_vacuumdb_stages.pl @@ -21,6 +21,66 @@ $node->issues_sql_like( .*statement:\ ANALYZE/sx, 'analyze three times'); +$node->safe_psql('postgres', + 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;'); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing stats'); + +$node->safe_psql('postgres', + 'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));'); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing index expression stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing index expression stats'); + +$node->safe_psql('postgres', + 'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;'); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing extended stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing extended stats'); + +$node->safe_psql('postgres', + "CREATE TABLE regression_vacuumdb_child (a INT) INHERITS (regression_vacuumdb_test);\n" + . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n" + . "ANALYZE regression_vacuumdb_child;\n"); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing inherited stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing inherited stats'); + +$node->safe_psql('postgres', + "CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n" + . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n" + . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n" + . "ANALYZE regression_vacuumdb_part1;\n"); +$node->issues_sql_like( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_parted', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with missing partition stats'); +$node->issues_sql_unlike( + [ 'vacuumdb', '--analyze-in-stages', '--missing-only', '-t', 'regression_vacuumdb_parted', 'postgres' ], + qr/statement:\ ANALYZE/sx, + '--missing-only with no missing partition stats'); + $node->issues_sql_like( [ 'vacuumdb', '--analyze-in-stages', '--all' ], qr/statement:\ SET\ default_statistics_target=1;\ SET\ vacuum_cost_delay=0; diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index 52b91837492..f84a521fbfd 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -47,6 +47,7 @@ typedef struct vacuumingOptions bool process_toast; bool skip_database_stats; char *buffer_usage_limit; + bool missing_only; } vacuumingOptions; /* object filter options */ @@ -134,6 +135,7 @@ main(int argc, char *argv[]) {"no-process-toast", no_argument, NULL, 11}, {"no-process-main", no_argument, NULL, 12}, {"buffer-usage-limit", required_argument, NULL, 13}, + {"missing-only", no_argument, NULL, 14}, {NULL, 0, NULL, 0} }; @@ -281,6 +283,9 @@ main(int argc, char *argv[]) case 13: vacopts.buffer_usage_limit = escape_quotes(optarg); break; + case 14: + vacopts.missing_only = true; + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -366,6 +371,11 @@ main(int argc, char *argv[]) pg_fatal("cannot use the \"%s\" option with the \"%s\" option", "buffer-usage-limit", "full"); + /* Prohibit --missing-only without --analyze-only or --analyze-in-stages */ + if (vacopts.missing_only && !vacopts.analyze_only) + pg_fatal("cannot use the \"%s\" option without \"%s\" or \"%s\"", + "missing-only", "analyze-only", "analyze-in-stages"); + /* fill cparams except for dbname, which is set below */ cparams.pghost = host; cparams.pgport = port; @@ -615,6 +625,13 @@ vacuum_one_database(ConnParams *cparams, "--buffer-usage-limit", "16"); } + if (vacopts->missing_only && PQserverVersion(conn) < 150000) + { + PQfinish(conn); + pg_fatal("cannot use the \"%s\" option on server versions older than PostgreSQL %s", + "--missing-only", "15"); + } + /* skip_database_stats is used automatically if server supports it */ vacopts->skip_database_stats = (PQserverVersion(conn) >= 160000); @@ -839,6 +856,7 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, " FROM pg_catalog.pg_class c\n" " JOIN pg_catalog.pg_namespace ns" " ON c.relnamespace OPERATOR(pg_catalog.=) ns.oid\n" + " CROSS JOIN LATERAL (SELECT c.relkind IN ('p', 'I')) as p (inherited)\n" " LEFT JOIN pg_catalog.pg_class t" " ON c.reltoastrelid OPERATOR(pg_catalog.=) t.oid\n"); @@ -922,6 +940,79 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, vacopts->min_mxid_age); } + if (vacopts->missing_only) + { + appendPQExpBufferStr(&catalog_query, " AND (\n"); + + /* regular stats */ + appendPQExpBufferStr(&catalog_query, + " EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" + " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" + " AND NOT a.attisdropped\n" + " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n" + " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n"); + + /* extended stats */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n" + " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n" + " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n" + " AND d.stxdinherit OPERATOR(pg_catalog.=) p.inherited))\n"); + + /* expression indexes */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_index i\n" + " CROSS JOIN LATERAL pg_catalog.unnest(i.indkey) WITH ORDINALITY u (attnum, ord)\n" + " WHERE i.indrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND i.indexprs IS NOT NULL\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) i.indexrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) u.ord\n" + " AND s.stainherit OPERATOR(pg_catalog.=) p.inherited))\n"); + + /* table inheritance and regular stats */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" + " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" + " AND NOT a.attisdropped\n" + " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND c.relhassubclass\n" + " AND NOT p.inherited\n" + " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n" + " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic s\n" + " WHERE s.starelid OPERATOR(pg_catalog.=) a.attrelid\n" + " AND s.staattnum OPERATOR(pg_catalog.=) a.attnum\n" + " AND s.stainherit))\n"); + + /* table inheritance and extended stats */ + appendPQExpBufferStr(&catalog_query, + " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n" + " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n" + " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" + " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" + " AND c.relhassubclass\n" + " AND NOT p.inherited\n" + " AND EXISTS (SELECT NULL FROM pg_catalog.pg_inherits h\n" + " WHERE h.inhparent OPERATOR(pg_catalog.=) c.oid)\n" + " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n" + " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n" + " AND d.stxdinherit))\n"); + + appendPQExpBufferStr(&catalog_query, " )\n"); + } + /* * Execute the catalog query. We use the default search_path for this * query for consistency with table lookups done elsewhere by the user. @@ -1244,6 +1335,7 @@ help(const char *progname) printf(_(" -j, --jobs=NUM use this many concurrent connections to vacuum\n")); printf(_(" --min-mxid-age=MXID_AGE minimum multixact ID age of tables to vacuum\n")); printf(_(" --min-xid-age=XID_AGE minimum transaction ID age of tables to vacuum\n")); + printf(_(" --missing-only only analyze relations with missing statistics\n")); printf(_(" --no-index-cleanup don't remove index entries that point to dead tuples\n")); printf(_(" --no-process-main skip the main relation\n")); printf(_(" --no-process-toast skip the TOAST table associated with the table to vacuum\n")); diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index b105cba05a6..ff8e04d3a03 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -2820,6 +2820,33 @@ sub issues_sql_like =pod +=item $node->issues_sql_unlike(cmd, unexpected_sql, test_name) + +Run a command on the node, then verify that $unexpected_sql does not appear in +the server log file. + +=cut + +sub issues_sql_unlike +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($self, $cmd, $unexpected_sql, $test_name) = @_; + + local %ENV = $self->_get_env(); + + my $log_location = -s $self->logfile; + + my $result = PostgreSQL::Test::Utils::run_log($cmd); + ok($result, "@$cmd exit code 0"); + my $log = + PostgreSQL::Test::Utils::slurp_file($self->logfile, $log_location); + unlike($log, $unexpected_sql, "$test_name: SQL not found in server log"); + return; +} + +=pod + =item $node->log_content() Returns the contents of log of the node -- 2.39.5 (Apple Git-154)