From 8c28ec288a99233580b15ae689e81610b2a8348c Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikoshia@oss.nttdata.com>
Date: Tue, 24 Dec 2024 17:26:18 +0900
Subject: [PATCH v1] PoC: Allow EXPLAIN to output page fault information

This patch adds PAGEFAULTS option to EXPLAIN and it tracks both
minor and major faults during planning and execution phases.

This is a PoC patch and only shows page faults when the FORMAT is TEXT.

---
 src/backend/commands/explain.c | 103 +++++++++++++++++++++++++++++++--
 src/backend/commands/prepare.c |  23 ++++++++
 src/include/commands/explain.h |  11 ++++
 3 files changed, 133 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index a201ed3082..b8943c7ef9 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -13,6 +13,8 @@
  */
 #include "postgres.h"
 
+#include <sys/resource.h>
+
 #include "access/xact.h"
 #include "catalog/pg_type.h"
 #include "commands/createas.h"
@@ -145,6 +147,8 @@ static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
 static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage);
 static void show_buffer_usage(ExplainState *es, const BufferUsage *usage);
+static bool peek_pagefault(ExplainState *es, const PageFaults * usage);
+static void show_pagefault(ExplainState *es, const PageFaults * usage);
 static void show_wal_usage(ExplainState *es, const WalUsage *usage);
 static void show_memory_counters(ExplainState *es,
 								 const MemoryContextCounters *mem_counters);
@@ -217,6 +221,8 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 			buffers_set = true;
 			es->buffers = defGetBoolean(opt);
 		}
+		else if (strcmp(opt->defname, "pagefaults") == 0)
+			es->pagefaults = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "wal") == 0)
 			es->wal = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "settings") == 0)
@@ -475,6 +481,8 @@ standard_ExplainOneQuery(Query *query, int cursorOptions,
 				planduration;
 	BufferUsage bufusage_start,
 				bufusage;
+	PageFaults	pagefaults = {0};
+	PageFaults	pagefaults_start = {0};
 	MemoryContextCounters mem_counters;
 	MemoryContext planner_ctx = NULL;
 	MemoryContext saved_ctx = NULL;
@@ -499,6 +507,15 @@ standard_ExplainOneQuery(Query *query, int cursorOptions,
 		bufusage_start = pgBufferUsage;
 	INSTR_TIME_SET_CURRENT(planstart);
 
+	if (es->pagefaults)
+	{
+		struct rusage rusage;
+
+		getrusage(RUSAGE_SELF, &rusage);
+		pagefaults_start.minflt = rusage.ru_minflt;
+		pagefaults_start.majflt = rusage.ru_majflt;
+	}
+
 	/* plan the query */
 	plan = pg_plan_query(query, queryString, cursorOptions, params);
 
@@ -518,9 +535,19 @@ standard_ExplainOneQuery(Query *query, int cursorOptions,
 		BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
 	}
 
+	if (es->pagefaults)
+	{
+		struct rusage rusage;
+
+		getrusage(RUSAGE_SELF, &rusage);
+		pagefaults.minflt = rusage.ru_minflt - pagefaults_start.minflt;
+		pagefaults.majflt = rusage.ru_majflt - pagefaults_start.majflt;
+	}
+
 	/* run it (if needed) and produce output */
 	ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
 				   &planduration, (es->buffers ? &bufusage : NULL),
+				   (es->pagefaults ? &pagefaults : NULL),
 				   es->memory ? &mem_counters : NULL);
 }
 
@@ -644,7 +671,7 @@ void
 ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 			   const char *queryString, ParamListInfo params,
 			   QueryEnvironment *queryEnv, const instr_time *planduration,
-			   const BufferUsage *bufusage,
+			   const BufferUsage *bufusage, const PageFaults * planpagefaults,
 			   const MemoryContextCounters *mem_counters)
 {
 	DestReceiver *dest;
@@ -654,6 +681,9 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	int			eflags;
 	int			instrument_option = 0;
 	SerializeMetrics serializeMetrics = {0};
+	PageFaults	pagefaults = {0};
+	PageFaults	pagefaults_start = {0};
+	struct rusage rusage;
 
 	Assert(plannedstmt->commandType != CMD_UTILITY);
 
@@ -674,6 +704,14 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	 */
 	INSTR_TIME_SET_CURRENT(starttime);
 
+	if (es->pagefaults)
+	{
+		getrusage(RUSAGE_SELF, &rusage);
+
+		pagefaults_start.minflt = rusage.ru_minflt;
+		pagefaults_start.majflt = rusage.ru_majflt;
+	}
+
 	/*
 	 * Use a snapshot with an updated command ID to ensure this query sees
 	 * results of any previously executed queries.
@@ -748,7 +786,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	ExplainPrintPlan(es, queryDesc);
 
 	/* Show buffer and/or memory usage in planning */
-	if (peek_buffer_usage(es, bufusage) || mem_counters)
+	if (peek_buffer_usage(es, bufusage) || peek_pagefault(es, planpagefaults) ||
+		mem_counters)
 	{
 		ExplainOpenGroup("Planning", "Planning", true, es);
 
@@ -758,10 +797,12 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 			appendStringInfoString(es->str, "Planning:\n");
 			es->indent++;
 		}
-
 		if (bufusage)
 			show_buffer_usage(es, bufusage);
 
+		if (es->pagefaults)
+			show_pagefault(es, planpagefaults);
+
 		if (mem_counters)
 			show_memory_counters(es, mem_counters);
 
@@ -813,6 +854,26 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 
 	totaltime += elapsed_time(&starttime);
 
+	if (es->pagefaults)
+	{
+		getrusage(RUSAGE_SELF, &rusage);
+
+		pagefaults.minflt = rusage.ru_minflt - pagefaults_start.minflt;
+		pagefaults.majflt = rusage.ru_majflt - pagefaults_start.majflt;
+
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			ExplainIndentText(es);
+			appendStringInfoString(es->str, "Execution:\n");
+			es->indent++;
+		}
+		show_pagefault(es, &pagefaults);
+
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			es->indent--;
+		ExplainCloseGroup("Execution", "Execution", true, es);
+	}
+
 	/*
 	 * We only report execution time if we actually ran the query (that is,
 	 * the user specified ANALYZE), and if summary reporting is enabled (the
@@ -822,7 +883,6 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	if (es->summary && es->analyze)
 		ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3,
 							 es);
-
 	ExplainCloseGroup("Query", NULL, true, es);
 }
 
@@ -4232,6 +4292,41 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage)
 	}
 }
 
+static bool
+peek_pagefault(ExplainState *es, const PageFaults * usage)
+{
+	if (usage == NULL)
+		return false;
+
+	if (usage->minflt <= 0 && usage->majflt <= 0)
+		return false;
+
+	else
+		return true;
+}
+
+/*
+ * Show majar/minor page faults.
+ */
+static void
+show_pagefault(ExplainState *es, const PageFaults * usage)
+{
+	/* Show only positive counter values. */
+	if (usage->minflt <= 0 && usage->majflt <= 0)
+		return;
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+
+		ExplainIndentText(es);
+		appendStringInfoString(es->str, "Page Faults:");
+		appendStringInfo(es->str, " minor=%ld", (long) usage->minflt);
+		appendStringInfo(es->str, " major=%ld", (long) usage->majflt);
+
+		appendStringInfoChar(es->str, '\n');
+	}
+}
+
 /*
  * Show WAL usage details.
  */
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 39a71c1de2..4d944aaeb4 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -17,6 +17,7 @@
 #include "postgres.h"
 
 #include <limits.h>
+#include <sys/resource.h>
 
 #include "access/xact.h"
 #include "catalog/pg_type.h"
@@ -579,6 +580,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 	instr_time	planduration;
 	BufferUsage bufusage_start,
 				bufusage;
+	PageFaults	pagefaults_start,
+				pagefaults;
 	MemoryContextCounters mem_counters;
 	MemoryContext planner_ctx = NULL;
 	MemoryContext saved_ctx = NULL;
@@ -595,6 +598,16 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 
 	if (es->buffers)
 		bufusage_start = pgBufferUsage;
+
+	if (es->pagefaults)
+	{
+		struct rusage rusage;
+
+		getrusage(RUSAGE_SELF, &rusage);
+		pagefaults_start.minflt = rusage.ru_minflt;
+		pagefaults_start.majflt = rusage.ru_majflt;
+	}
+
 	INSTR_TIME_SET_CURRENT(planstart);
 
 	/* Look it up in the hash table */
@@ -646,6 +659,15 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 		BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
 	}
 
+	if (es->pagefaults)
+	{
+		struct rusage rusage;
+
+		getrusage(RUSAGE_SELF, &rusage);
+		pagefaults.minflt = rusage.ru_minflt - pagefaults_start.minflt;
+		pagefaults.majflt = rusage.ru_majflt - pagefaults_start.majflt;
+	}
+
 	plan_list = cplan->stmt_list;
 
 	/* Explain each query */
@@ -656,6 +678,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 		if (pstmt->commandType != CMD_UTILITY)
 			ExplainOnePlan(pstmt, into, es, query_string, paramLI, pstate->p_queryEnv,
 						   &planduration, (es->buffers ? &bufusage : NULL),
+						   (es->pagefaults ? &pagefaults : NULL),
 						   es->memory ? &mem_counters : NULL);
 		else
 			ExplainOneUtility(pstmt->utilityStmt, into, es, pstate, paramLI);
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index aa5872bc15..dd95e0447c 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -49,6 +49,7 @@ typedef struct ExplainState
 	bool		analyze;		/* print actual times */
 	bool		costs;			/* print estimated costs */
 	bool		buffers;		/* print buffer usage */
+	bool		pagefaults;		/* print pagefaults */
 	bool		wal;			/* print WAL usage */
 	bool		timing;			/* print detailed node timing */
 	bool		summary;		/* print total planning and execution timing */
@@ -73,6 +74,15 @@ typedef struct ExplainState
 	ExplainWorkersState *workers_state; /* needed if parallel plan */
 } ExplainState;
 
+typedef struct PageFaults
+{
+	long		minflt;			/* # of page faults which were serviced
+								 * without requiring any I/O */
+	long		majflt;			/* # of page faults which were serviced by
+								 * doing I/O */
+}			PageFaults;
+
+
 /* Hook for plugins to get control in ExplainOneQuery() */
 typedef void (*ExplainOneQuery_hook_type) (Query *query,
 										   int cursorOptions,
@@ -108,6 +118,7 @@ extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into,
 						   ParamListInfo params, QueryEnvironment *queryEnv,
 						   const instr_time *planduration,
 						   const BufferUsage *bufusage,
+						   const PageFaults * planpagefaults,
 						   const MemoryContextCounters *mem_counters);
 
 extern void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc);

base-commit: 6f3820f37aba94232468365bae7ba5de697fe993
-- 
2.39.2

