From 50d1f41b7794d947293f5cec0d9d8406cbf05bbc Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryzbyj@telsasoft.com>
Date: Sat, 1 Feb 2020 15:49:47 -0600
Subject: [PATCH v1 2/3] Allow ALTER TABLE to do an index scan, like CLUSTER

The idea is to implement table rewrite in ALTER more similar to what CLUSTER
does, to allow clustering during table-rewriting ALTER.

Note, this does not do the AM-specific visibility checks that CLUSTER does, so
will not clean up dead tuples.

The indices are normally dropped before doing table rewrite, so index scan
would be impossible.  As a POC, this early implementation avoids dropping the
indices.
---
 src/backend/commands/tablecmds.c | 67 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 642a85c..44f94d2 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -364,6 +364,7 @@ static void ATRewriteTables(AlterTableStmt *parsetree,
 							List **wqueue, LOCKMODE lockmode,
 							AlterTableUtilityContext *context);
 static void ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode);
+static Oid cluster_index(Relation rel);
 static AlteredTableInfo *ATGetQueueEntry(List **wqueue, Relation rel);
 static void ATSimplePermissions(Relation rel, int allowed_targets);
 static void ATWrongRelkindError(Relation rel, int allowed_targets);
@@ -4294,7 +4295,7 @@ ATRewriteCatalogs(List **wqueue, LOCKMODE lockmode,
 			 * multiple columns of a table are altered).
 			 */
 			if (pass == AT_PASS_ALTER_TYPE)
-				ATPostAlterTypeCleanup(wqueue, tab, lockmode);
+				; // ATPostAlterTypeCleanup(wqueue, tab, lockmode);
 
 			relation_close(rel, NoLock);
 		}
@@ -5005,6 +5006,36 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode,
 	}
 }
 
+/* Return the OID of the index on which the table was previously (SET) clustered */
+/* stolen from alter.c */
+static Oid cluster_index(Relation rel)
+{
+	ListCell   *index;
+
+	/* We need to find the index that has indisclustered set. */
+	foreach(index, RelationGetIndexList(rel))
+	{
+		HeapTuple		idxtuple;
+		Form_pg_index	indexForm;
+		Oid				indexOid;
+
+		indexOid = lfirst_oid(index);
+		idxtuple = SearchSysCache1(INDEXRELID,
+						   ObjectIdGetDatum(indexOid));
+		if (!HeapTupleIsValid(idxtuple))
+				elog(ERROR, "cache lookup failed for index %u", indexOid);
+		indexForm = (Form_pg_index) GETSTRUCT(idxtuple);
+		if (indexForm->indisclustered)
+		{
+				ReleaseSysCache(idxtuple);
+				return indexOid;
+		}
+		ReleaseSysCache(idxtuple);
+	}
+
+	return InvalidOid;
+}
+
 /*
  * ATRewriteTable: scan or rewrite one table
  *
@@ -5015,6 +5046,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 {
 	Relation	oldrel;
 	Relation	newrel;
+	Relation	index;
 	TupleDesc	oldTupDesc;
 	TupleDesc	newTupDesc;
 	bool		needscan = false;
@@ -5026,6 +5058,7 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 	BulkInsertState bistate;
 	int			ti_options;
 	ExprState  *partqualstate = NULL;
+	Oid			OIDindex;
 
 	/*
 	 * Open the relation(s).  We have surely already locked the existing
@@ -5040,6 +5073,9 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 	else
 		newrel = NULL;
 
+	OIDindex = cluster_index(oldrel);
+	index = OidIsValid(OIDindex) ? index_open(OIDindex, lockmode) : NULL;
+
 	/*
 	 * Prepare a BulkInsertState and options for table_tuple_insert. Because
 	 * we're building a new heap, we can skip WAL-logging and fsync it to disk
@@ -5129,7 +5165,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 		ExprContext *econtext;
 		TupleTableSlot *oldslot;
 		TupleTableSlot *newslot;
-		TableScanDesc scan;
+		TableScanDesc tblscan;
+		IndexScanDesc indscan;
 		MemoryContext oldCxt;
 		List	   *dropped_attrs = NIL;
 		ListCell   *lc;
@@ -5205,7 +5242,13 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 		 * checking all the constraints.
 		 */
 		snapshot = RegisterSnapshot(GetLatestSnapshot());
-		scan = table_beginscan(oldrel, snapshot, 0, NULL);
+		if (index) {
+			indscan = index_beginscan(oldrel, index, snapshot, 0, 0); // SnapshotAny?
+			tblscan = NULL;
+		} else {
+			tblscan = table_beginscan(oldrel, snapshot, 0, NULL);
+			indscan = NULL;
+		}
 
 		/*
 		 * Switch to per-tuple memory context and reset it for each tuple
@@ -5213,10 +5256,19 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 		 */
 		oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
 
-		while (table_scan_getnextslot(scan, ForwardScanDirection, oldslot))
+		for (;;)
 		{
 			TupleTableSlot *insertslot;
 
+			if (tblscan) {
+				if (!table_scan_getnextslot(tblscan, ForwardScanDirection, oldslot))
+					break;
+			} else {
+				/* indscan */
+				if (!index_getnext_slot(indscan, ForwardScanDirection, oldslot))
+					break;
+			}
+
 			if (tab->rewrite > 0)
 			{
 				/* Extract data from old tuple */
@@ -5362,7 +5414,10 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 		}
 
 		MemoryContextSwitchTo(oldCxt);
-		table_endscan(scan);
+		if (tblscan)
+			table_endscan(tblscan);
+		if (indscan)
+			index_endscan(indscan);
 		UnregisterSnapshot(snapshot);
 
 		ExecDropSingleTupleTableSlot(oldslot);
@@ -5373,6 +5428,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 	FreeExecutorState(estate);
 
 	table_close(oldrel, NoLock);
+	if (index)
+		table_close(index, NoLock);
 	if (newrel)
 	{
 		FreeBulkInsertState(bistate);
-- 
2.7.4

