From bd9ee5e164f7a7f9f4489d617383f83cbb669fb1 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Wed, 24 Apr 2024 14:14:44 +1200
Subject: [PATCH v4] Ensure we allocate NAMEDATALEN bytes for names in Index
 Only Scans

---
 src/backend/executor/nodeIndexonlyscan.c      | 94 +++++++++++++++++--
 src/include/catalog/pg_opclass.dat            |  7 +-
 src/include/nodes/execnodes.h                 |  4 +
 src/test/regress/expected/index_including.out | 25 +++++
 src/test/regress/sql/index_including.sql      | 19 ++++
 5 files changed, 140 insertions(+), 9 deletions(-)

diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index fcf6d1d932..d82da50a69 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -35,18 +35,20 @@
 #include "access/tableam.h"
 #include "access/tupdesc.h"
 #include "access/visibilitymap.h"
+#include "catalog/pg_opfamily_d.h"
 #include "executor/executor.h"
 #include "executor/nodeIndexonlyscan.h"
 #include "executor/nodeIndexscan.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "storage/predicate.h"
+#include "utils/builtins.h"
 #include "utils/rel.h"
 
 
 static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
-static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
-							TupleDesc itupdesc);
+static void StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
+							IndexTuple itup, TupleDesc itupdesc);
 
 
 /* ----------------------------------------------------------------
@@ -205,7 +207,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 			ExecForceStoreHeapTuple(scandesc->xs_hitup, slot, false);
 		}
 		else if (scandesc->xs_itup)
-			StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
+			StoreIndexTuple(node, slot, scandesc->xs_itup, scandesc->xs_itupdesc);
 		else
 			elog(ERROR, "no data returned for index-only scan");
 
@@ -263,7 +265,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
  * right now we don't need it elsewhere.
  */
 static void
-StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
+StoreIndexTuple(IndexOnlyScanState *node, TupleTableSlot *slot,
+				IndexTuple itup, TupleDesc itupdesc)
 {
 	/*
 	 * Note: we must use the tupdesc supplied by the AM in index_deform_tuple,
@@ -276,6 +279,37 @@ StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
 
 	ExecClearTuple(slot);
 	index_deform_tuple(itup, itupdesc, slot->tts_values, slot->tts_isnull);
+
+	/*
+	 * Copy all name columns stored as cstrings back into a NAMEDATALEN byte
+	 * sized allocation.  We mark this branch as unlikely as generally "name"
+	 * is used only for the system catalogs and this would have to be a user
+	 * query running on those or some other user table with an index on a name
+	 * column.
+	 */
+	if (unlikely(node->ioss_NameCStringAttNums != NULL))
+	{
+		int			attcount = node->ioss_NameCStringCount;
+
+		for (int idx = 0; idx < attcount; idx++)
+		{
+			int			attnum = node->ioss_NameCStringAttNums[idx];
+			Name		name;
+
+			/* skip null Datums */
+			if (slot->tts_isnull[attnum])
+				continue;
+
+			/* allocate the NAMEDATALEN and copy the datum into that memory */
+			name = (Name) MemoryContextAlloc(node->ss.ps.ps_ExprContext->ecxt_per_tuple_memory,
+											 NAMEDATALEN);
+
+			/* use namestrcpy to zero-pad all trailing bytes */
+			namestrcpy(name, DatumGetCString(slot->tts_values[attnum]));
+			slot->tts_values[attnum] = NameGetDatum(name);
+		}
+	}
+
 	ExecStoreVirtualTuple(slot);
 }
 
@@ -473,8 +507,11 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 {
 	IndexOnlyScanState *indexstate;
 	Relation	currentRelation;
+	Relation	indexRelation;
 	LOCKMODE	lockmode;
 	TupleDesc	tupDesc;
+	int			indnkeyatts;
+	int			namecount;
 
 	/*
 	 * create state structure
@@ -547,7 +584,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 
 	/* Open the index relation. */
 	lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
-	indexstate->ioss_RelationDesc = index_open(node->indexid, lockmode);
+	indexRelation = index_open(node->indexid, lockmode);
+	indexstate->ioss_RelationDesc = indexRelation;
 
 	/*
 	 * Initialize index-specific scan state
@@ -560,7 +598,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 	 * build the index scan keys from the index qualification
 	 */
 	ExecIndexBuildScanKeys((PlanState *) indexstate,
-						   indexstate->ioss_RelationDesc,
+						   indexRelation,
 						   node->indexqual,
 						   false,
 						   &indexstate->ioss_ScanKeys,
@@ -574,7 +612,7 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 	 * any ORDER BY exprs have to be turned into scankeys in the same way
 	 */
 	ExecIndexBuildScanKeys((PlanState *) indexstate,
-						   indexstate->ioss_RelationDesc,
+						   indexRelation,
 						   node->indexorderby,
 						   true,
 						   &indexstate->ioss_OrderByKeys,
@@ -603,6 +641,48 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 		indexstate->ioss_RuntimeContext = NULL;
 	}
 
+	indexstate->ioss_NameCStringAttNums = NULL;
+	indnkeyatts = indexRelation->rd_index->indnkeyatts;
+	namecount = 0;
+
+	/*
+	 * The "name" type for btree uses text_ops which results in storing
+	 * cstrings in the indexed keys rather than names.  Here we detect that in
+	 * a generic way in case other index AMs want to do the same optimization.
+	 * Check for opclasses with an opcintype of NAMEOID and a index tuple
+	 * descriptor with CSTRINGOID.  If any of these are found, create an array
+	 * marking the index attribute number of each of them.  StoreIndexTuple()
+	 * handles copying the name Datums into a NAMEDATALEN-byte allocation.
+	 */
+
+	/* First count the number of such index keys */
+	for (int attnum = 0; attnum < indnkeyatts; attnum++)
+	{
+		if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID &&
+			indexRelation->rd_opcintype[attnum] == NAMEOID)
+			namecount++;
+	}
+
+	if (namecount > 0)
+	{
+		int			idx = 0;
+
+		/*
+		 * Now create an array to mark the attribute numbers of the keys that
+		 * need to be converted from cstring to name.
+		 */
+		indexstate->ioss_NameCStringAttNums = (int *) palloc(sizeof(int) * namecount);
+
+		for (int attnum = 0; attnum < indnkeyatts; attnum++)
+		{
+			if (indexRelation->rd_att->attrs[attnum].atttypid == CSTRINGOID &&
+				indexRelation->rd_opcintype[attnum] == NAMEOID)
+				indexstate->ioss_NameCStringAttNums[idx++] = attnum;
+		}
+	}
+
+	indexstate->ioss_NameCStringCount = namecount;
+
 	/*
 	 * all done.
 	 */
diff --git a/src/include/catalog/pg_opclass.dat b/src/include/catalog/pg_opclass.dat
index 6c30770fe7..f503c652eb 100644
--- a/src/include/catalog/pg_opclass.dat
+++ b/src/include/catalog/pg_opclass.dat
@@ -91,8 +91,11 @@
 # Here's an ugly little hack to save space in the system catalog indexes.
 # btree doesn't ordinarily allow a storage type different from input type;
 # but cstring and name are the same thing except for trailing padding,
-# and we can safely omit that within an index entry.  So we declare the
-# btree opclass for name as using cstring storage type.
+# so we choose to omit that within an index entry.  Here we declare the
+# btree opclass for name as using cstring storage type.  This does require
+# that we pad the cstring out with the full NAMEDATALEN bytes when performing
+# index-only scans.  See corresponding hacks in ExecInitIndexOnlyScan() and
+# StoreIndexTuple().
 { opcmethod => 'btree', opcname => 'name_ops', opcfamily => 'btree/text_ops',
   opcintype => 'name', opckeytype => 'cstring' },
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index d927ac44a8..c7b1c008cb 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1690,6 +1690,8 @@ typedef struct IndexScanState
  *		TableSlot		   slot for holding tuples fetched from the table
  *		VMBuffer		   buffer in use for visibility map testing, if any
  *		PscanLen		   size of parallel index-only scan descriptor
+ *		NameCStringAttNums attnums of name typed columns to pad to NAMEDATALEN
+ *		NameCStringCount   number of elements in the NameCStringAttNums array
  * ----------------
  */
 typedef struct IndexOnlyScanState
@@ -1709,6 +1711,8 @@ typedef struct IndexOnlyScanState
 	TupleTableSlot *ioss_TableSlot;
 	Buffer		ioss_VMBuffer;
 	Size		ioss_PscanLen;
+	int		   *ioss_NameCStringAttNums;
+	int			ioss_NameCStringCount;
 } IndexOnlyScanState;
 
 /* ----------------
diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out
index 86510687c7..ea8b2454bf 100644
--- a/src/test/regress/expected/index_including.out
+++ b/src/test/regress/expected/index_including.out
@@ -398,3 +398,28 @@ Indexes:
     "tbl_c1_c2_c3_c4_key" UNIQUE CONSTRAINT, btree (c1, c2) INCLUDE (c3, c4)
 
 DROP TABLE tbl;
+/*
+ * 10. Test coverage for names stored as cstrings in indexes
+ */
+CREATE TABLE nametbl (c1 int, c2 name, c3 float);
+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
+INSERT INTO nametbl VALUES(1, 'two', 3.0);
+VACUUM nametbl;
+SET enable_seqscan = 0;
+-- Ensure we get an index only scan plan
+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+                     QUERY PLAN                     
+----------------------------------------------------
+ Index Only Scan using nametbl_c1_c2_idx on nametbl
+   Index Cond: ((c2 = 'two'::name) AND (c1 = 1))
+(2 rows)
+
+-- Validate the results look sane
+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+ c2  | c1 | c3 
+-----+----+----
+ two |  1 |  3
+(1 row)
+
+RESET enable_seqscan;
+DROP TABLE nametbl;
diff --git a/src/test/regress/sql/index_including.sql b/src/test/regress/sql/index_including.sql
index 44b340053b..ad9cbdd028 100644
--- a/src/test/regress/sql/index_including.sql
+++ b/src/test/regress/sql/index_including.sql
@@ -217,3 +217,22 @@ ALTER TABLE tbl ALTER c1 TYPE bigint;
 ALTER TABLE tbl ALTER c3 TYPE bigint;
 \d tbl
 DROP TABLE tbl;
+
+/*
+ * 10. Test coverage for names stored as cstrings in indexes
+ */
+CREATE TABLE nametbl (c1 int, c2 name, c3 float);
+CREATE INDEX nametbl_c1_c2_idx ON nametbl (c2, c1) INCLUDE (c3);
+INSERT INTO nametbl VALUES(1, 'two', 3.0);
+VACUUM nametbl;
+SET enable_seqscan = 0;
+
+-- Ensure we get an index only scan plan
+EXPLAIN (COSTS OFF) SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+
+-- Validate the results look sane
+SELECT c2, c1, c3 FROM nametbl WHERE c2 = 'two' AND c1 = 1;
+
+RESET enable_seqscan;
+
+DROP TABLE nametbl;
\ No newline at end of file
-- 
2.40.1