From 7c76d63811381ee4252e02285f58e0816dcf273d Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikoshia@oss.nttdata.com>
Date: Mon, 6 Jan 2025 18:32:40 +0900
Subject: [PATCH v1] PoC: Allow EXPLAIN to output storage I/O information

This patch adds STORAGEIO option to EXPLAIN and it tracks the
amount of read/write from storage during planning and execution
phases using ru_inblock/ru_outblock from getrusage(2).

This is a PoC patch and only shows them when the FORMAT is TEXT.
Accumulating Parallel workers' I/O has not implemented yet.
Building on Windows causes error.
---
 src/backend/commands/explain.c | 103 ++++++++++++++++++++++++++++++++-
 src/backend/commands/prepare.c |  23 ++++++++
 src/include/commands/explain.h |  10 ++++
 3 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c24e66f82..3c3b68b31 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -13,6 +13,8 @@
  */
 #include "postgres.h"
 
+#include <sys/resource.h>
+
 #include "access/xact.h"
 #include "catalog/pg_type.h"
 #include "commands/createas.h"
@@ -145,6 +147,8 @@ static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
 static bool peek_buffer_usage(ExplainState *es, const BufferUsage *usage);
 static void show_buffer_usage(ExplainState *es, const BufferUsage *usage);
+static bool peek_storageio(ExplainState *es, const StorageIO *usage);
+static void show_storageio(ExplainState *es, const StorageIO *usage);
 static void show_wal_usage(ExplainState *es, const WalUsage *usage);
 static void show_memory_counters(ExplainState *es,
 								 const MemoryContextCounters *mem_counters);
@@ -217,6 +221,8 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 			buffers_set = true;
 			es->buffers = defGetBoolean(opt);
 		}
+		else if (strcmp(opt->defname, "storageio") == 0)
+			es->storageio = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "wal") == 0)
 			es->wal = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "settings") == 0)
@@ -475,6 +481,8 @@ standard_ExplainOneQuery(Query *query, int cursorOptions,
 				planduration;
 	BufferUsage bufusage_start,
 				bufusage;
+	StorageIO	storageio = {0};
+	StorageIO	storageio_start = {0};
 	MemoryContextCounters mem_counters;
 	MemoryContext planner_ctx = NULL;
 	MemoryContext saved_ctx = NULL;
@@ -499,6 +507,15 @@ standard_ExplainOneQuery(Query *query, int cursorOptions,
 		bufusage_start = pgBufferUsage;
 	INSTR_TIME_SET_CURRENT(planstart);
 
+	if (es->storageio)
+	{
+		struct rusage rusage;
+
+		getrusage(RUSAGE_SELF, &rusage);
+		storageio_start.inblock = rusage.ru_inblock;
+		storageio_start.outblock = rusage.ru_oublock;
+	}
+
 	/* plan the query */
 	plan = pg_plan_query(query, queryString, cursorOptions, params);
 
@@ -518,9 +535,19 @@ standard_ExplainOneQuery(Query *query, int cursorOptions,
 		BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
 	}
 
+	if (es->storageio)
+	{
+		struct rusage rusage;
+
+		getrusage(RUSAGE_SELF, &rusage);
+		storageio.inblock = rusage.ru_inblock - storageio_start.inblock;
+		storageio.outblock = rusage.ru_oublock - storageio_start.outblock;
+	}
+
 	/* run it (if needed) and produce output */
 	ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
 				   &planduration, (es->buffers ? &bufusage : NULL),
+				   (es->storageio ? &storageio : NULL),
 				   es->memory ? &mem_counters : NULL);
 }
 
@@ -644,7 +671,7 @@ void
 ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 			   const char *queryString, ParamListInfo params,
 			   QueryEnvironment *queryEnv, const instr_time *planduration,
-			   const BufferUsage *bufusage,
+			   const BufferUsage *bufusage, const StorageIO *planstorageio,
 			   const MemoryContextCounters *mem_counters)
 {
 	DestReceiver *dest;
@@ -654,6 +681,9 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	int			eflags;
 	int			instrument_option = 0;
 	SerializeMetrics serializeMetrics = {0};
+	StorageIO	storageio = {0};
+	StorageIO	storageio_start = {0};
+	struct rusage rusage;
 
 	Assert(plannedstmt->commandType != CMD_UTILITY);
 
@@ -674,6 +704,14 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	 */
 	INSTR_TIME_SET_CURRENT(starttime);
 
+	if (es->storageio)
+	{
+		getrusage(RUSAGE_SELF, &rusage);
+
+		storageio_start.inblock = rusage.ru_inblock;
+		storageio_start.outblock = rusage.ru_oublock;
+	}
+
 	/*
 	 * Use a snapshot with an updated command ID to ensure this query sees
 	 * results of any previously executed queries.
@@ -748,7 +786,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	ExplainPrintPlan(es, queryDesc);
 
 	/* Show buffer and/or memory usage in planning */
-	if (peek_buffer_usage(es, bufusage) || mem_counters)
+	if (peek_buffer_usage(es, bufusage) || peek_storageio(es, planstorageio) ||
+		mem_counters)
 	{
 		ExplainOpenGroup("Planning", "Planning", true, es);
 
@@ -762,6 +801,9 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 		if (bufusage)
 			show_buffer_usage(es, bufusage);
 
+		if (es->storageio)
+			show_storageio(es, planstorageio);
+
 		if (mem_counters)
 			show_memory_counters(es, mem_counters);
 
@@ -813,6 +855,26 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 
 	totaltime += elapsed_time(&starttime);
 
+	if (es->storageio)
+	{
+		getrusage(RUSAGE_SELF, &rusage);
+
+		storageio.inblock = rusage.ru_inblock - storageio_start.inblock;
+		storageio.outblock = rusage.ru_oublock - storageio_start.outblock;
+
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			ExplainIndentText(es);
+			appendStringInfoString(es->str, "Execution:\n");
+			es->indent++;
+		}
+		show_storageio(es, &storageio);
+
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			es->indent--;
+		ExplainCloseGroup("Execution", "Execution", true, es);
+	}
+
 	/*
 	 * We only report execution time if we actually ran the query (that is,
 	 * the user specified ANALYZE), and if summary reporting is enabled (the
@@ -4232,6 +4294,43 @@ show_buffer_usage(ExplainState *es, const BufferUsage *usage)
 	}
 }
 
+static bool
+peek_storageio(ExplainState *es, const StorageIO *usage)
+{
+	if (usage == NULL)
+		return false;
+
+	if (usage->inblock <= 0 && usage->outblock <= 0)
+		return false;
+
+	else
+		return true;
+}
+
+/*
+ * Show storage I/O.
+ *
+ * Since the unit of inblock/outblock is 512 bytes, change them to KB by dividing by two.
+ */
+static void
+show_storageio(ExplainState *es, const StorageIO *usage)
+{
+	/* Show only positive counter values. */
+	if (usage->inblock <= 0 && usage->outblock <= 0)
+		return;
+
+	if (es->format == EXPLAIN_FORMAT_TEXT)
+	{
+
+		ExplainIndentText(es);
+		appendStringInfoString(es->str, "Storage I/O:");
+		appendStringInfo(es->str, " read=%ld KB", (long) usage->inblock / 2);
+		appendStringInfo(es->str, " write=%ld KB", (long) usage->outblock / 2);
+
+		appendStringInfoChar(es->str, '\n');
+	}
+}
+
 /*
  * Show WAL usage details.
  */
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 8989c0c88..4743bacf4 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -17,6 +17,7 @@
 #include "postgres.h"
 
 #include <limits.h>
+#include <sys/resource.h>
 
 #include "access/xact.h"
 #include "catalog/pg_type.h"
@@ -579,6 +580,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 	instr_time	planduration;
 	BufferUsage bufusage_start,
 				bufusage;
+	StorageIO	storageio_start = {0};
+	StorageIO	storageio = {0};
 	MemoryContextCounters mem_counters;
 	MemoryContext planner_ctx = NULL;
 	MemoryContext saved_ctx = NULL;
@@ -595,6 +598,16 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 
 	if (es->buffers)
 		bufusage_start = pgBufferUsage;
+
+	if (es->storageio)
+	{
+		struct rusage rusage;
+
+		getrusage(RUSAGE_SELF, &rusage);
+		storageio_start.inblock = rusage.ru_inblock;
+		storageio_start.outblock = rusage.ru_oublock;
+	}
+
 	INSTR_TIME_SET_CURRENT(planstart);
 
 	/* Look it up in the hash table */
@@ -646,6 +659,15 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 		BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
 	}
 
+	if (es->storageio)
+	{
+		struct rusage rusage;
+
+		getrusage(RUSAGE_SELF, &rusage);
+		storageio.inblock = rusage.ru_inblock - storageio_start.inblock;
+		storageio.outblock = rusage.ru_oublock - storageio_start.outblock;
+	}
+
 	plan_list = cplan->stmt_list;
 
 	/* Explain each query */
@@ -656,6 +678,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 		if (pstmt->commandType != CMD_UTILITY)
 			ExplainOnePlan(pstmt, into, es, query_string, paramLI, pstate->p_queryEnv,
 						   &planduration, (es->buffers ? &bufusage : NULL),
+						   (es->storageio ? &storageio : NULL),
 						   es->memory ? &mem_counters : NULL);
 		else
 			ExplainOneUtility(pstmt->utilityStmt, into, es, pstate, paramLI);
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index ea7419951..7b452b306 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -49,6 +49,7 @@ typedef struct ExplainState
 	bool		analyze;		/* print actual times */
 	bool		costs;			/* print estimated costs */
 	bool		buffers;		/* print buffer usage */
+	bool		storageio;		/* print storageio */
 	bool		wal;			/* print WAL usage */
 	bool		timing;			/* print detailed node timing */
 	bool		summary;		/* print total planning and execution timing */
@@ -73,6 +74,14 @@ typedef struct ExplainState
 	ExplainWorkersState *workers_state; /* needed if parallel plan */
 } ExplainState;
 
+typedef struct StorageIO
+{
+	/* Note that page size here is 512 byte */
+	long		inblock;			/* # of pages that has been read from storage */
+	long		outblock;			/* # of pages that has been written or shall be written to storage */
+}			StorageIO;
+
+
 /* Hook for plugins to get control in ExplainOneQuery() */
 typedef void (*ExplainOneQuery_hook_type) (Query *query,
 										   int cursorOptions,
@@ -108,6 +117,7 @@ extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into,
 						   ParamListInfo params, QueryEnvironment *queryEnv,
 						   const instr_time *planduration,
 						   const BufferUsage *bufusage,
+						   const StorageIO *planstorageio,
 						   const MemoryContextCounters *mem_counters);
 
 extern void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc);

base-commit: b1ef48980ddd082a90ed39aa5914af45e53059e6
-- 
2.34.1

