From e1b91d3a1ac8700f2a29acd206f7c35e429dd2e4 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Fri, 13 Dec 2024 21:37:39 -0600 Subject: [PATCH v13 1/1] Add cost-based delay time to progress views. XXX: NEEDS CATVERSION BUMP Author: Bertrand Drouvot Reviewed-by: Sami Imseih, Robert Haas, Masahiko Sawada, Masahiro Ikeda, Dilip Kumar, Sergei Kornilov Discussion: https://postgr.es/m/ZmaXmWDL829fzAVX%40ip-10-97-1-34.eu-west-3.compute.internal --- contrib/file_fdw/file_fdw.c | 2 +- doc/src/sgml/config.sgml | 24 ++++++ doc/src/sgml/monitoring.sgml | 27 ++++++ src/backend/catalog/system_views.sql | 6 +- src/backend/commands/analyze.c | 10 +-- src/backend/commands/vacuum.c | 82 ++++++++++++++++++- src/backend/commands/vacuumparallel.c | 5 ++ src/backend/tsearch/ts_typanalyze.c | 2 +- src/backend/utils/adt/array_typanalyze.c | 2 +- src/backend/utils/adt/rangetypes_typanalyze.c | 2 +- src/backend/utils/misc/guc_tables.c | 9 ++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/commands/progress.h | 2 + src/include/commands/vacuum.h | 4 + src/test/regress/expected/rules.out | 6 +- 15 files changed, 169 insertions(+), 15 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 1c81a7c073..088961b61b 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -1237,7 +1237,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, for (;;) { /* Check for user-requested abort or sleep */ - vacuum_delay_point(); + analyze_delay_point(); /* Fetch next row */ MemoryContextReset(tupcontext); diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index e0c8325a39..9c5d44b24d 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8386,6 +8386,30 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + track_cost_delay_timing (boolean) + + track_cost_delay_timing configuration parameter + + + + + Enables timing of cost-based vacuum delay (see + ). This parameter + is off by default, as it will repeatedly query the operating system for + the current time, which may cause significant overhead on some + platforms. You can use the tool to + measure the overhead of timing on your system. Cost-based vacuum delay + timing information is displayed in + pg_stat_progress_vacuum + and + pg_stat_progress_analyze. + Only superusers and users with the appropriate SET + privilege and change this setting. + + + + track_io_timing (boolean) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 840d7f8161..4cf8486225 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5503,6 +5503,18 @@ FROM pg_stat_get_backend_idset() AS backendid; acquiring inherited sample rows. + + + + delay_time double precision + + + Total time spent sleeping due to cost-based delay (see + ), in milliseconds + (if is enabled, otherwise + zero). + + @@ -6428,6 +6440,21 @@ FROM pg_stat_get_backend_idset() AS backendid; cleaning up indexes. + + + + delay_time double precision + + + Total time spent sleeping due to cost-based delay (see + ), in milliseconds + (if is enabled, otherwise + zero). This includes the time that any associated parallel workers have + slept. However, parallel workers report their sleep time no more + frequently than once per second, so the reported value may be slightly + stale. + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index da9a8fe99f..d969e2ac40 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1202,7 +1202,8 @@ CREATE VIEW pg_stat_progress_analyze AS S.param5 AS ext_stats_computed, S.param6 AS child_tables_total, S.param7 AS child_tables_done, - CAST(S.param8 AS oid) AS current_child_table_relid + CAST(S.param8 AS oid) AS current_child_table_relid, + S.param9 / 1000000::double precision AS delay_time FROM pg_stat_get_progress_info('ANALYZE') AS S LEFT JOIN pg_database D ON S.datid = D.oid; @@ -1222,7 +1223,8 @@ CREATE VIEW pg_stat_progress_vacuum AS S.param4 AS heap_blks_vacuumed, S.param5 AS index_vacuum_count, S.param6 AS max_dead_tuple_bytes, S.param7 AS dead_tuple_bytes, S.param8 AS num_dead_item_ids, S.param9 AS indexes_total, - S.param10 AS indexes_processed + S.param10 AS indexes_processed, + S.param11 / 1000000::double precision AS delay_time FROM pg_stat_get_progress_info('VACUUM') AS S LEFT JOIN pg_database D ON S.datid = D.oid; diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 9a56de2282..7656f92bee 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -913,7 +913,7 @@ compute_index_stats(Relation onerel, double totalrows, { HeapTuple heapTuple = rows[rowno]; - vacuum_delay_point(); + analyze_delay_point(); /* * Reset the per-tuple context each time, to reclaim any cruft @@ -1232,7 +1232,7 @@ acquire_sample_rows(Relation onerel, int elevel, /* Outer loop over blocks to sample */ while (table_scan_analyze_next_block(scan, stream)) { - vacuum_delay_point(); + analyze_delay_point(); while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) { @@ -1964,7 +1964,7 @@ compute_trivial_stats(VacAttrStatsP stats, Datum value; bool isnull; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, i, &isnull); @@ -2080,7 +2080,7 @@ compute_distinct_stats(VacAttrStatsP stats, int firstcount1, j; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, i, &isnull); @@ -2427,7 +2427,7 @@ compute_scalar_stats(VacAttrStatsP stats, Datum value; bool isnull; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, i, &isnull); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index bb639ef51f..1c2b0c5932 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -39,6 +39,7 @@ #include "catalog/pg_inherits.h" #include "commands/cluster.h" #include "commands/defrem.h" +#include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -59,6 +60,12 @@ #include "utils/snapmgr.h" #include "utils/syscache.h" +/* + * Minimum interval for cost-based vacuum delay reports from a parallel worker. + * This aims to avoid sending too many messages and waking up the leader too + * frequently. + */ +#define PARALLEL_VACUUM_WORKER_DELAY_REPORT_INTERVAL_NS (NS_PER_S) /* * GUC parameters @@ -69,6 +76,7 @@ int vacuum_multixact_freeze_min_age; int vacuum_multixact_freeze_table_age; int vacuum_failsafe_age; int vacuum_multixact_failsafe_age; +bool track_cost_delay_timing; /* * Variables for cost-based vacuum delay. The defaults differ between @@ -79,6 +87,11 @@ int vacuum_multixact_failsafe_age; double vacuum_cost_delay = 0; int vacuum_cost_limit = 200; +/* + * Variable for reporting cost-based vacuum delay from parallel workers. + */ +int64 parallel_vacuum_worker_delay_ns = 0; + /* * VacuumFailsafeActive is a defined as a global so that we can determine * whether or not to re-enable cost-based vacuum delay when vacuuming a table. @@ -2358,8 +2371,8 @@ vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) * This should be called in each major loop of VACUUM processing, * typically once per page processed. */ -void -vacuum_delay_point(void) +static void +vacuum_delay_point_internal(bool is_analyze) { double msec = 0; @@ -2402,13 +2415,66 @@ vacuum_delay_point(void) /* Nap if appropriate */ if (msec > 0) { + instr_time delay_start; + if (msec > vacuum_cost_delay * 4) msec = vacuum_cost_delay * 4; + if (track_cost_delay_timing) + INSTR_TIME_SET_CURRENT(delay_start); + pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); pg_usleep(msec * 1000); pgstat_report_wait_end(); + if (track_cost_delay_timing) + { + instr_time delay_end; + instr_time delay; + + INSTR_TIME_SET_CURRENT(delay_end); + INSTR_TIME_SET_ZERO(delay); + INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start); + + /* + * For parallel workers, we only report the delay time every once + * in a while to avoid overloading the leader with messages and + * interrupts. + */ + if (IsParallelWorker()) + { + static instr_time last_report_time; + instr_time time_since_last_report; + + Assert(!is_analyze); + + /* accumulate the delay time */ + parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay); + + /* calculate interval since last report */ + INSTR_TIME_SET_ZERO(time_since_last_report); + INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time); + + /* if we haven't reported in a while, do so now */ + if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >= + PARALLEL_VACUUM_WORKER_DELAY_REPORT_INTERVAL_NS) + { + pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME, + parallel_vacuum_worker_delay_ns); + + /* reset variables */ + last_report_time = delay_end; + parallel_vacuum_worker_delay_ns = 0; + } + } + else if (is_analyze) + pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME, + INSTR_TIME_GET_NANOSEC(delay)); + else + pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME, + INSTR_TIME_GET_NANOSEC(delay)); + } + /* * We don't want to ignore postmaster death during very long vacuums * with vacuum_cost_delay configured. We can't use the usual @@ -2435,6 +2501,18 @@ vacuum_delay_point(void) } } +void +vacuum_delay_point(void) +{ + vacuum_delay_point_internal(false); +} + +void +analyze_delay_point(void) +{ + vacuum_delay_point_internal(true); +} + /* * Computes the vacuum delay for parallel workers. * diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 67cba17a56..ea5940e299 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -1087,6 +1087,11 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], &wal_usage[ParallelWorkerNumber]); + /* Report any remaining cost-based vacuum delay time */ + if (track_cost_delay_timing) + pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME, + parallel_vacuum_worker_delay_ns); + TidStoreDetach(dead_items); /* Pop the error context stack */ diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index ccafe42729..eff80980ab 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -204,7 +204,7 @@ compute_tsvector_stats(VacAttrStats *stats, char *lexemesptr; int j; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, vector_no, &isnull); diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c index 2c633bee6b..6491be8b3b 100644 --- a/src/backend/utils/adt/array_typanalyze.c +++ b/src/backend/utils/adt/array_typanalyze.c @@ -314,7 +314,7 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int distinct_count; bool count_item_found; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, array_no, &isnull); if (isnull) diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index 3773f98115..1567ceba23 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -167,7 +167,7 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, upper; float8 length; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, range_no, &isnull); if (isnull) diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 8cf1afbad2..031fcc43e3 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1469,6 +1469,15 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"track_cost_delay_timing", PGC_SUSET, STATS_CUMULATIVE, + gettext_noop("Collects timing statistics for cost-based vacuum delay."), + NULL + }, + &track_cost_delay_timing, + false, + NULL, NULL, NULL + }, { {"track_io_timing", PGC_SUSET, STATS_CUMULATIVE, gettext_noop("Collects timing statistics for database I/O activity."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index a2ac7575ca..c7fd4f26f0 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -640,6 +640,7 @@ #track_activities = on #track_activity_query_size = 1024 # (change requires restart) #track_counts = on +#track_cost_delay_timing = off #track_io_timing = off #track_wal_io_timing = off #track_functions = none # none, pl, all diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 5616d64523..df862192a6 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -28,6 +28,7 @@ #define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS 7 #define PROGRESS_VACUUM_INDEXES_TOTAL 8 #define PROGRESS_VACUUM_INDEXES_PROCESSED 9 +#define PROGRESS_VACUUM_DELAY_TIME 10 /* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */ #define PROGRESS_VACUUM_PHASE_SCAN_HEAP 1 @@ -46,6 +47,7 @@ #define PROGRESS_ANALYZE_CHILD_TABLES_TOTAL 5 #define PROGRESS_ANALYZE_CHILD_TABLES_DONE 6 #define PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID 7 +#define PROGRESS_ANALYZE_DELAY_TIME 8 /* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */ #define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS 1 diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 759f9a87d3..f3f0abc87f 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -296,6 +296,7 @@ extern PGDLLIMPORT int vacuum_multixact_freeze_min_age; extern PGDLLIMPORT int vacuum_multixact_freeze_table_age; extern PGDLLIMPORT int vacuum_failsafe_age; extern PGDLLIMPORT int vacuum_multixact_failsafe_age; +extern PGDLLIMPORT bool track_cost_delay_timing; /* * Maximum value for default_statistics_target and per-column statistics @@ -313,6 +314,8 @@ extern PGDLLIMPORT bool VacuumFailsafeActive; extern PGDLLIMPORT double vacuum_cost_delay; extern PGDLLIMPORT int vacuum_cost_limit; +extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns; + /* in commands/vacuum.c */ extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel); extern void vacuum(List *relations, VacuumParams *params, @@ -340,6 +343,7 @@ extern bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, extern bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs); extern void vac_update_datfrozenxid(void); extern void vacuum_delay_point(void); +extern void analyze_delay_point(void); extern bool vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple, bits32 options); extern Relation vacuum_open_relation(Oid relid, RangeVar *relation, diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 3014d047fe..4796bbd01c 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1926,7 +1926,8 @@ pg_stat_progress_analyze| SELECT s.pid, s.param5 AS ext_stats_computed, s.param6 AS child_tables_total, s.param7 AS child_tables_done, - (s.param8)::oid AS current_child_table_relid + (s.param8)::oid AS current_child_table_relid, + ((s.param9)::double precision / (1000000)::double precision) AS delay_time FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_progress_basebackup| SELECT pid, @@ -2056,7 +2057,8 @@ pg_stat_progress_vacuum| SELECT s.pid, s.param7 AS dead_tuple_bytes, s.param8 AS num_dead_item_ids, s.param9 AS indexes_total, - s.param10 AS indexes_processed + s.param10 AS indexes_processed, + ((s.param11)::double precision / (1000000)::double precision) AS delay_time FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_recovery_prefetch| SELECT stats_reset, -- 2.39.5 (Apple Git-154)