From b0789841d0353e57deb7054f42f0d401a3233bd3 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Mon, 10 Feb 2025 14:25:02 -0600 Subject: [PATCH v17 2/2] Add cost-based delay time to progress views. This commit adds the amount of time spent sleeping due to cost-based delay to the pg_stat_progress_vacuum and pg_stat_progress_analyze system views. A new configuration parameter named track_cost_delay_timing, which is off by default, controls whether this information is gathered. For vacuum, the reported value includes the time that any associated parallel workers have slept. However, parallel workers only report their sleep time no more frequently than once per second to avoid overloading the leader process. XXX: NEEDS CATVERSION BUMP Author: Bertrand Drouvot Co-authored-by: Nathan Bossart Reviewed-by: Sami Imseih Reviewed-by: Robert Haas Reviewed-by: Masahiko Sawada Reviewed-by: Masahiro Ikeda Reviewed-by: Dilip Kumar Reviewed-by: Sergei Kornilov Discussion: https://postgr.es/m/ZmaXmWDL829fzAVX%40ip-10-97-1-34.eu-west-3.compute.internal --- doc/src/sgml/config.sgml | 24 +++++++ doc/src/sgml/monitoring.sgml | 27 ++++++++ src/backend/catalog/system_views.sql | 6 +- src/backend/commands/vacuum.c | 64 +++++++++++++++++++ src/backend/commands/vacuumparallel.c | 5 ++ src/backend/utils/misc/guc_tables.c | 9 +++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/commands/progress.h | 2 + src/include/commands/vacuum.h | 3 + src/test/regress/expected/rules.out | 6 +- 10 files changed, 143 insertions(+), 4 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 38244409e3c..79a66ba7181 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8246,6 +8246,30 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + track_cost_delay_timing (boolean) + + track_cost_delay_timing configuration parameter + + + + + Enables timing of cost-based vacuum delay (see + ). This parameter + is off by default, as it will repeatedly query the operating system for + the current time, which may cause significant overhead on some + platforms. You can use the tool to + measure the overhead of timing on your system. Cost-based vacuum delay + timing information is displayed in + pg_stat_progress_vacuum + and + pg_stat_progress_analyze. + Only superusers and users with the appropriate SET + privilege can change this setting. + + + + track_io_timing (boolean) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index edc2470bcf9..928a6eb64b0 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5606,6 +5606,18 @@ FROM pg_stat_get_backend_idset() AS backendid; acquiring inherited sample rows. + + + + delay_time double precision + + + Total time spent sleeping due to cost-based delay (see + , in milliseconds + (if is enabled, otherwise + zero). + + @@ -6531,6 +6543,21 @@ FROM pg_stat_get_backend_idset() AS backendid; cleaning up indexes. + + + + delay_time double precision + + + Total time spent sleeping due to cost-based delay (see + ), in milliseconds + (if is enabled, otherwise + zero). This includes the time that any associated parallel workers have + slept. However, parallel workers report their sleep time no more + frequently than once per second, so the reported value may be slightly + stale. + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index cddc3ea9b53..eff0990957e 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1213,7 +1213,8 @@ CREATE VIEW pg_stat_progress_analyze AS S.param5 AS ext_stats_computed, S.param6 AS child_tables_total, S.param7 AS child_tables_done, - CAST(S.param8 AS oid) AS current_child_table_relid + CAST(S.param8 AS oid) AS current_child_table_relid, + S.param9 / 1000000::double precision AS delay_time FROM pg_stat_get_progress_info('ANALYZE') AS S LEFT JOIN pg_database D ON S.datid = D.oid; @@ -1233,7 +1234,8 @@ CREATE VIEW pg_stat_progress_vacuum AS S.param4 AS heap_blks_vacuumed, S.param5 AS index_vacuum_count, S.param6 AS max_dead_tuple_bytes, S.param7 AS dead_tuple_bytes, S.param8 AS num_dead_item_ids, S.param9 AS indexes_total, - S.param10 AS indexes_processed + S.param10 AS indexes_processed, + S.param11 / 1000000::double precision AS delay_time FROM pg_stat_get_progress_info('VACUUM') AS S LEFT JOIN pg_database D ON S.datid = D.oid; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 5e394c151c9..4cba2e0a260 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -39,6 +39,7 @@ #include "catalog/pg_inherits.h" #include "commands/cluster.h" #include "commands/defrem.h" +#include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -59,6 +60,12 @@ #include "utils/snapmgr.h" #include "utils/syscache.h" +/* + * Minimum interval for cost-based vacuum delay reports from a parallel worker. + * This aims to avoid sending too many messages and waking up the leader too + * frequently + */ +#define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S) /* * GUC parameters @@ -69,6 +76,7 @@ int vacuum_multixact_freeze_min_age; int vacuum_multixact_freeze_table_age; int vacuum_failsafe_age; int vacuum_multixact_failsafe_age; +bool track_cost_delay_timing; /* * Variables for cost-based vacuum delay. The defaults differ between @@ -79,6 +87,9 @@ int vacuum_multixact_failsafe_age; double vacuum_cost_delay = 0; int vacuum_cost_limit = 200; +/* Variable for reporting cost-based vacuum delay from parallel workers. */ +int64 parallel_vacuum_worker_delay_ns = 0; + /* * VacuumFailsafeActive is a defined as a global so that we can determine * whether or not to re-enable cost-based vacuum delay when vacuuming a table. @@ -2401,13 +2412,66 @@ vacuum_delay_point(bool is_analyze) /* Nap if appropriate */ if (msec > 0) { + instr_time delay_start; + if (msec > vacuum_cost_delay * 4) msec = vacuum_cost_delay * 4; + if (track_cost_delay_timing) + INSTR_TIME_SET_CURRENT(delay_start); + pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); pg_usleep(msec * 1000); pgstat_report_wait_end(); + if (track_cost_delay_timing) + { + instr_time delay_end; + instr_time delay; + + INSTR_TIME_SET_CURRENT(delay_end); + INSTR_TIME_SET_ZERO(delay); + INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start); + + /* + * For parallel workers, we only report the delay time every once + * in a while to avoid overloading the leader with messages and + * interrupts. + */ + if (IsParallelWorker()) + { + static instr_time last_report_time; + instr_time time_since_last_report; + + Assert(!is_analyze); + + /* Accumulate the delay time. */ + parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay); + + /* Calculate interval since last report. */ + INSTR_TIME_SET_ZERO(time_since_last_report); + INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time); + + /* If we haven't reported in a while, do so now. */ + if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >= + PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS) + { + pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME, + parallel_vacuum_worker_delay_ns); + + /* Reset variables. */ + last_report_time = delay_end; + parallel_vacuum_worker_delay_ns = 0; + } + } + else if (is_analyze) + pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME, + INSTR_TIME_GET_NANOSEC(delay)); + else + pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME, + INSTR_TIME_GET_NANOSEC(delay)); + } + /* * We don't want to ignore postmaster death during very long vacuums * with vacuum_cost_delay configured. We can't use the usual diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index dc3322c256b..2b9d548cdeb 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -1094,6 +1094,11 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], &wal_usage[ParallelWorkerNumber]); + /* Report any remaining cost-based vacuum delay time */ + if (track_cost_delay_timing) + pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME, + parallel_vacuum_worker_delay_ns); + TidStoreDetach(dead_items); /* Pop the error context stack */ diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index ce7534d4d23..1efee7af176 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1470,6 +1470,15 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"track_cost_delay_timing", PGC_SUSET, STATS_CUMULATIVE, + gettext_noop("Collects timing statistics for cost-based vacuum delay."), + NULL + }, + &track_cost_delay_timing, + false, + NULL, NULL, NULL + }, { {"track_io_timing", PGC_SUSET, STATS_CUMULATIVE, gettext_noop("Collects timing statistics for database I/O activity."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index c40b7a3121e..6f77e5f8b26 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -632,6 +632,7 @@ #track_activities = on #track_activity_query_size = 1024 # (change requires restart) #track_counts = on +#track_cost_delay_timing = off #track_io_timing = off #track_wal_io_timing = off #track_functions = none # none, pl, all diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 18e3179ef63..7c736e7b03b 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -28,6 +28,7 @@ #define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS 7 #define PROGRESS_VACUUM_INDEXES_TOTAL 8 #define PROGRESS_VACUUM_INDEXES_PROCESSED 9 +#define PROGRESS_VACUUM_DELAY_TIME 10 /* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */ #define PROGRESS_VACUUM_PHASE_SCAN_HEAP 1 @@ -46,6 +47,7 @@ #define PROGRESS_ANALYZE_CHILD_TABLES_TOTAL 5 #define PROGRESS_ANALYZE_CHILD_TABLES_DONE 6 #define PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID 7 +#define PROGRESS_ANALYZE_DELAY_TIME 8 /* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */ #define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS 1 diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index b884304dfe7..b3eedf699af 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -296,6 +296,7 @@ extern PGDLLIMPORT int vacuum_multixact_freeze_min_age; extern PGDLLIMPORT int vacuum_multixact_freeze_table_age; extern PGDLLIMPORT int vacuum_failsafe_age; extern PGDLLIMPORT int vacuum_multixact_failsafe_age; +extern PGDLLIMPORT bool track_cost_delay_timing; /* * Maximum value for default_statistics_target and per-column statistics @@ -313,6 +314,8 @@ extern PGDLLIMPORT bool VacuumFailsafeActive; extern PGDLLIMPORT double vacuum_cost_delay; extern PGDLLIMPORT int vacuum_cost_limit; +extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns; + /* in commands/vacuum.c */ extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel); extern void vacuum(List *relations, VacuumParams *params, diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 3361f6a69c9..5baba8d39ff 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1932,7 +1932,8 @@ pg_stat_progress_analyze| SELECT s.pid, s.param5 AS ext_stats_computed, s.param6 AS child_tables_total, s.param7 AS child_tables_done, - (s.param8)::oid AS current_child_table_relid + (s.param8)::oid AS current_child_table_relid, + ((s.param9)::double precision / (1000000)::double precision) AS delay_time FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_progress_basebackup| SELECT pid, @@ -2062,7 +2063,8 @@ pg_stat_progress_vacuum| SELECT s.pid, s.param7 AS dead_tuple_bytes, s.param8 AS num_dead_item_ids, s.param9 AS indexes_total, - s.param10 AS indexes_processed + s.param10 AS indexes_processed, + ((s.param11)::double precision / (1000000)::double precision) AS delay_time FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_recovery_prefetch| SELECT stats_reset, -- 2.39.5 (Apple Git-154)