From 859f3fecb4fb4900b6ce12f6346c5d9565fbc072 Mon Sep 17 00:00:00 2001 From: Fujii Masao Date: Fri, 20 Sep 2024 11:33:07 +0900 Subject: [PATCH v4 1/2] Add num_done counter to the pg_stat_checkpointer view. Checkpoints can be skipped when the server is idle. The existing num_timed and num_requested counters in pg_stat_checkpointer track both completed and skipped checkpoints, but there was no way to count only the completed ones. This commit introduces the num_done counter, which tracks only completed checkpoints, making it easier to see how many were actually performed. Bump catalog version. Author: Anton A. Melnikov Reviewed-by: Fujii Masao Discussion: https://postgr.es/m/9ea77f40-818d-4841-9dee-158ac8f6e690@oss.nttdata.com --- doc/src/sgml/monitoring.sgml | 11 +++++- src/backend/access/transam/xlog.c | 9 ++++- src/backend/catalog/system_views.sql | 1 + src/backend/postmaster/checkpointer.c | 39 ++++++++++++------- .../utils/activity/pgstat_checkpointer.c | 2 + src/backend/utils/adt/pgstatfuncs.c | 6 +++ src/include/access/xlog.h | 2 +- src/include/catalog/pg_proc.dat | 6 +++ src/include/pgstat.h | 1 + src/test/regress/expected/rules.out | 1 + 10 files changed, 60 insertions(+), 18 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index a2fda4677d..19bf0164f1 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3063,7 +3063,16 @@ description | Waiting for a newly initialized WAL file to reach durable storage num_requested bigint - Number of requested checkpoints that have been performed + Number of backend requested checkpoints + + + + + + num_done bigint + + + Number of checkpoints that have been performed diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 853ab06812..64304d77d3 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6878,8 +6878,11 @@ update_checkpoint_display(int flags, bool restartpoint, bool reset) * In this case, we only insert an XLOG_CHECKPOINT_SHUTDOWN record, and it's * both the record marking the completion of the checkpoint and the location * from which WAL replay would begin if needed. + * + * Returns true if a new checkpoint was performed, or false if it was skipped + * because the system was idle. */ -void +bool CreateCheckPoint(int flags) { bool shutdown; @@ -6971,7 +6974,7 @@ CreateCheckPoint(int flags) END_CRIT_SECTION(); ereport(DEBUG1, (errmsg_internal("checkpoint skipped because system is idle"))); - return; + return false; } } @@ -7353,6 +7356,8 @@ CreateCheckPoint(int flags) CheckpointStats.ckpt_segs_added, CheckpointStats.ckpt_segs_removed, CheckpointStats.ckpt_segs_recycled); + + return true; } /* diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 7fd5d256a1..49109dbdc8 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1138,6 +1138,7 @@ CREATE VIEW pg_stat_checkpointer AS SELECT pg_stat_get_checkpointer_num_timed() AS num_timed, pg_stat_get_checkpointer_num_requested() AS num_requested, + pg_stat_get_checkpointer_num_performed() AS num_done, pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed, pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req, pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done, diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index eeb73c8572..9087e3f8db 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -460,10 +460,7 @@ CheckpointerMain(char *startup_data, size_t startup_data_len) * Do the checkpoint. */ if (!do_restartpoint) - { - CreateCheckPoint(flags); - ckpt_performed = true; - } + ckpt_performed = CreateCheckPoint(flags); else ckpt_performed = CreateRestartPoint(flags); @@ -484,7 +481,7 @@ CheckpointerMain(char *startup_data, size_t startup_data_len) ConditionVariableBroadcast(&CheckpointerShmem->done_cv); - if (ckpt_performed) + if (!do_restartpoint) { /* * Note we record the checkpoint start time not end time as @@ -493,18 +490,32 @@ CheckpointerMain(char *startup_data, size_t startup_data_len) */ last_checkpoint_time = now; - if (do_restartpoint) - PendingCheckpointerStats.restartpoints_performed++; + if (ckpt_performed) + PendingCheckpointerStats.num_performed++; } else { - /* - * We were not able to perform the restartpoint (checkpoints - * throw an ERROR in case of error). Most likely because we - * have not received any new checkpoint WAL records since the - * last restartpoint. Try again in 15 s. - */ - last_checkpoint_time = now - CheckPointTimeout + 15; + if (ckpt_performed) + { + /* + * The same as for checkpoint. Please see the + * corresponding comment. + */ + last_checkpoint_time = now; + + PendingCheckpointerStats.restartpoints_performed++; + } + else + { + /* + * We were not able to perform the restartpoint + * (checkpoints throw an ERROR in case of error). Most + * likely because we have not received any new checkpoint + * WAL records since the last restartpoint. Try again in + * 15 s. + */ + last_checkpoint_time = now - CheckPointTimeout + 15; + } } ckpt_active = false; diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c index bbfc9c7e18..4a0a2d1493 100644 --- a/src/backend/utils/activity/pgstat_checkpointer.c +++ b/src/backend/utils/activity/pgstat_checkpointer.c @@ -49,6 +49,7 @@ pgstat_report_checkpointer(void) #define CHECKPOINTER_ACC(fld) stats_shmem->stats.fld += PendingCheckpointerStats.fld CHECKPOINTER_ACC(num_timed); CHECKPOINTER_ACC(num_requested); + CHECKPOINTER_ACC(num_performed); CHECKPOINTER_ACC(restartpoints_timed); CHECKPOINTER_ACC(restartpoints_requested); CHECKPOINTER_ACC(restartpoints_performed); @@ -127,6 +128,7 @@ pgstat_checkpointer_snapshot_cb(void) #define CHECKPOINTER_COMP(fld) pgStatLocal.snapshot.checkpointer.fld -= reset.fld; CHECKPOINTER_COMP(num_timed); CHECKPOINTER_COMP(num_requested); + CHECKPOINTER_COMP(num_performed); CHECKPOINTER_COMP(restartpoints_timed); CHECKPOINTER_COMP(restartpoints_requested); CHECKPOINTER_COMP(restartpoints_performed); diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 9c23ac7c8c..17b0fc02ef 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1191,6 +1191,12 @@ pg_stat_get_checkpointer_num_requested(PG_FUNCTION_ARGS) PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_requested); } +Datum +pg_stat_get_checkpointer_num_performed(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_performed); +} + Datum pg_stat_get_checkpointer_restartpoints_timed(PG_FUNCTION_ARGS) { diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 083810f5b4..36f6e4e4b4 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -239,7 +239,7 @@ extern void LocalProcessControlFile(bool reset); extern WalLevel GetActiveWalLevelOnStandby(void); extern void StartupXLOG(void); extern void ShutdownXLOG(int code, Datum arg); -extern void CreateCheckPoint(int flags); +extern bool CreateCheckPoint(int flags); extern bool CreateRestartPoint(int flags); extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN); extern void XLogPutNextOid(Oid nextOid); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 43f608d7a0..52fca54f3a 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5817,6 +5817,12 @@ proname => 'pg_stat_get_checkpointer_num_requested', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_checkpointer_num_requested' }, +{ oid => '2775', + descr => 'statistics: number of checkpoints performed by the checkpointer', + proname => 'pg_stat_get_checkpointer_num_performed', + provolatile => 's', proparallel => 'r', prorettype => 'int8', + proargtypes => '', + prosrc => 'pg_stat_get_checkpointer_num_performed' }, { oid => '6327', descr => 'statistics: number of timed restartpoints started by the checkpointer', proname => 'pg_stat_get_checkpointer_restartpoints_timed', provolatile => 's', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 4752dfe719..476acd680c 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -294,6 +294,7 @@ typedef struct PgStat_CheckpointerStats { PgStat_Counter num_timed; PgStat_Counter num_requested; + PgStat_Counter num_performed; PgStat_Counter restartpoints_timed; PgStat_Counter restartpoints_requested; PgStat_Counter restartpoints_performed; diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index a1626f3fae..f5434d8365 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1824,6 +1824,7 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_buf_written_clean() AS buffers_cle pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; pg_stat_checkpointer| SELECT pg_stat_get_checkpointer_num_timed() AS num_timed, pg_stat_get_checkpointer_num_requested() AS num_requested, + pg_stat_get_checkpointer_num_performed() AS num_done, pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed, pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req, pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done, -- 2.45.2