From fb2d5bc74e244fabf8c951c14730672f8cc2ee92 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Tue, 23 Jun 2015 19:53:05 +0200
Subject: [PATCH 10/24] Add ColumnStoreOptInfo to RelationData

This commit adds new member rd_cstore which caches data from the
pg_cstore row for each RELKIND_COLUMN_STORE relation for easy access --
very similar to what rd_index does for indexes.

Implementation notes

- Adds a new node ColumnStoreOptInfo, parallel to IndexOptInfo
---
 src/backend/nodes/outfuncs.c         |  20 ++++++
 src/backend/optimizer/util/plancat.c |  88 ++++++++++++++++++++++++
 src/backend/utils/cache/relcache.c   | 125 +++++++++++++++++++++++++++++++++++
 src/include/nodes/nodes.h            |   1 +
 src/include/nodes/relation.h         |  30 +++++++++
 src/include/utils/rel.h              |  10 +++
 src/include/utils/relcache.h         |   1 +
 7 files changed, 275 insertions(+)

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 6399d30..235b4e3 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1904,6 +1904,23 @@ _outIndexOptInfo(StringInfo str, const IndexOptInfo *node)
 }
 
 static void
+_outColumnStoreOptInfo(StringInfo str, const ColumnStoreOptInfo *node)
+{
+	WRITE_NODE_TYPE("COLUMNSTOREOPTINFO");
+
+	/* NB: this isn't a complete set of fields */
+	WRITE_OID_FIELD(colstoreoid);
+
+	/* Do NOT print rel field, else infinite recursion */
+	WRITE_UINT_FIELD(pages);
+	WRITE_FLOAT_FIELD(tuples, "%.0f");
+	WRITE_INT_FIELD(ncolumns);
+	/* array fields aren't really worth the trouble to print */
+	WRITE_OID_FIELD(cstam);
+	/* we don't bother with fields copied from the pg_am entry */
+}
+
+static void
 _outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
 {
 	/*
@@ -3343,6 +3360,9 @@ _outNode(StringInfo str, const void *obj)
 			case T_PlannerParamItem:
 				_outPlannerParamItem(str, obj);
 				break;
+			case T_ColumnStoreOptInfo:
+				_outColumnStoreOptInfo(str, obj);
+				break;
 
 			case T_CreateStmt:
 				_outCreateStmt(str, obj);
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 9442e5f..276c052 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -27,6 +27,7 @@
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "catalog/heap.h"
+#include "catalog/pg_cstore.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -71,6 +72,7 @@ static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
  *	min_attr	lowest valid AttrNumber
  *	max_attr	highest valid AttrNumber
  *	indexlist	list of IndexOptInfos for relation's indexes
+ *	cstlist		list of ColumnStoreOptInfos for relation's colstores
  *	serverid	if it's a foreign table, the server OID
  *	fdwroutine	if it's a foreign table, the FDW function pointers
  *	pages		number of pages
@@ -93,6 +95,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 	Relation	relation;
 	bool		hasindex;
 	List	   *indexinfos = NIL;
+	List	   *colstoreinfos = NIL;
 
 	/*
 	 * We need not lock the relation since it was already locked, either by
@@ -381,6 +384,91 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
 	rel->indexlist = indexinfos;
 
+	/* Grab column store info using the relcache, while we have it */
+	if (relation->rd_rel->relhascstore)
+	{
+		List	   *colstoreoidlist;
+		ListCell   *l;
+		LOCKMODE	lmode;
+
+		colstoreoidlist = RelationGetColStoreList(relation);
+
+		/*
+		 * For each column store, we get the same type of lock that the
+		 * executor will need, and do not release it.  This saves a couple
+		 * of trips to the shared lock manager while not creating any real
+		 * loss of concurrency, because no schema changes could be
+		 * happening on the colstore while we hold lock on the parent rel,
+		 * and neither lock type blocks any other kind of colstore operation.
+		 */
+		if (rel->relid == root->parse->resultRelation)
+			lmode = RowExclusiveLock;
+		else
+			lmode = AccessShareLock;
+
+		foreach(l, colstoreoidlist)
+		{
+			Oid			colstoreoid = lfirst_oid(l);
+			Relation	colstoreRelation;
+			Form_pg_cstore colstore;
+			ColumnStoreOptInfo *info;
+			int			ncolumns;
+			int			i;
+
+			/*
+			 * Extract info from the relation descriptor for the colstore.
+			 *
+			 * FIXME There's no 'rd_cstore' in RelationData at the moment,
+			 *       so it needs to be added and integrated into relcache
+			 *       (just a bit of copy'n'paste programming using the
+			 *       rd_index logic).
+			 *
+			 * TODO  Define colstore_open(), similar to index_open().
+			 */
+			colstoreRelation = relation_open(colstoreoid, lmode);
+			colstore = colstoreRelation->rd_cstore;
+
+			/* XXX Invalid and not-yet-usable colstores would be handled here. */
+
+			info = makeNode(ColumnStoreOptInfo);
+
+			info->colstoreoid = colstore->cststoreid;
+			info->reltablespace =
+				RelationGetForm(colstoreRelation)->reltablespace;
+			info->rel = rel;
+			info->ncolumns = ncolumns = colstore->cstnatts;
+			info->cstkeys = (int *) palloc(sizeof(int) * ncolumns);
+
+			for (i = 0; i < ncolumns; i++)
+				info->cstkeys[i] = colstore->cstatts.values[i];
+
+			/*
+			 * TODO This is where to fetch AM for the colstore (see how
+			 *      the index are handled above.
+			 */
+
+			/*
+			 * XXX this is where indexes build targetlists. Colstores don't
+			 * have these (at least not ATM).
+			 */
+
+			/*
+			 * Estimate the colstore size. We don't support partial
+			 * colstores, we just use the same reltuples as the parent.
+			 */
+			info->pages = RelationGetNumberOfBlocks(colstoreRelation);
+			info->tuples = rel->tuples;
+
+			relation_close(colstoreRelation, NoLock);
+
+			colstoreinfos = lcons(info, colstoreinfos);
+		}
+
+		list_free(colstoreoidlist);
+	}
+
+	rel->cstlist = colstoreinfos;
+
 	/* Grab foreign-table info using the relcache, while we have it */
 	if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 	{
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index ddb1fc0..8dcf42a 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -273,6 +273,7 @@ static void IndexSupportInitialize(oidvector *indclass,
 					   AttrNumber maxAttributeNumber);
 static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
 				  StrategyNumber numSupport);
+static void RelationInitColumnStoreInfo(Relation rel);
 static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
 static void unlink_initfile(const char *initfilename);
 
@@ -432,6 +433,7 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple)
 		case RELKIND_INDEX:
 		case RELKIND_VIEW:
 		case RELKIND_MATVIEW:
+		case RELKIND_COLUMN_STORE:
 			break;
 		default:
 			return;
@@ -1055,6 +1057,12 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
 		OidIsValid(relation->rd_rel->relam))
 		RelationInitIndexAccessInfo(relation);
 
+	/*
+	 * if it's a column store, initialize pg_cstore data
+	 */
+	if (relation->rd_rel->relkind == RELKIND_COLUMN_STORE)
+		RelationInitColumnStoreInfo(relation);
+
 	/* extract reloptions if any */
 	RelationParseRelOptions(relation, pg_class_tuple);
 
@@ -1528,6 +1536,27 @@ LookupOpclassInfo(Oid operatorClassOid,
 	return opcentry;
 }
 
+/*
+ * For a column store relation, initialize rd_cstore
+ */
+static void
+RelationInitColumnStoreInfo(Relation rel)
+{
+	HeapTuple	tuple;
+	MemoryContext oldcontext;
+
+	tuple = SearchSysCache1(CSTOREOID,
+							ObjectIdGetDatum(RelationGetRelid(rel)));
+	if (!HeapTupleIsValid(tuple))
+		elog(ERROR, "cache lookup failed for column store %u",
+			 RelationGetRelid(rel));
+	oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
+	rel->rd_cstoretuple = heap_copytuple(tuple);
+	rel->rd_cstore = (Form_pg_cstore) GETSTRUCT(rel->rd_cstoretuple);
+	MemoryContextSwitchTo(oldcontext);
+
+	ReleaseSysCache(tuple);
+}
 
 /*
  *		formrdesc
@@ -3900,6 +3929,100 @@ RelationGetIndexList(Relation relation)
 }
 
 /*
+ * RelationGetColStoreList -- get a list of OIDs of colstores on this relation
+ *
+ * XXX Copy'n'paste of RelationGetIndexList(), simplified for colstores.
+ *
+ * The colstore list is created only if someone requests it.  We scan pg_cstore
+ * to find relevant colstores, and add the list to the relcache entry so that
+ * we won't have to compute it again.  Note that shared cache inval of a
+ * relcache entry will delete the old list and set rd_cstvalid to 0,
+ * so that we must recompute the colstore list on next request.  This handles
+ * creation or deletion of a colstore.
+ *
+ * XXX At the moment we don't need the invalidation, because all the colstores
+ *     are defined at table creation time, but if we ever decide to implement
+ *     ALTER TABLE ... [ADD|DROP] COLUMN STORE, this will be handy. Also, this
+ *     is just copy'n'paste programming using RelationGetIndexList().
+ *
+ * XXX Currently there are no 'islive' or 'isvalid' flags for colstores.
+ *
+ * The returned list is guaranteed to be sorted in order by OID.  This is
+ * needed by the executor, since for colstore types that we obtain exclusive
+ * locks on when updating the colstore, all backends must lock the colstores
+ * in the same order or we will get deadlocks (see ExecOpenColstores()).  Any
+ * consistent ordering would do, but ordering by OID is easy.
+ *
+ * Since shared cache inval causes the relcache's copy of the list to go away,
+ * we return a copy of the list palloc'd in the caller's context.  The caller
+ * may list_free() the returned list after scanning it. This is necessary
+ * since the caller will typically be doing syscache lookups on the relevant
+ * colstores, and syscache lookup could cause SI messages to be processed!
+ */
+List *
+RelationGetColStoreList(Relation relation)
+{
+	Relation	cstrel;
+	SysScanDesc cstscan;
+	ScanKeyData skey;
+	HeapTuple	htup;
+	List	   *result;
+	List	   *oldlist;
+	MemoryContext oldcxt;
+
+	/* Quick exit if we already computed the list. */
+	if (relation->rd_cstvalid)
+		return list_copy(relation->rd_cstlist);
+
+	/*
+	 * We build the list we intend to return (in the caller's context) while
+	 * doing the scan.  After successfully completing the scan, we copy that
+	 * list into the relcache entry.  This avoids cache-context memory leakage
+	 * if we get some sort of error partway through.
+	 */
+	result = NIL;
+
+	/* Prepare to scan pg_cstore for entries having cstrelid = this rel. */
+	ScanKeyInit(&skey,
+				Anum_pg_cstore_cstrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(relation)));
+
+	cstrel = heap_open(CStoreRelationId, AccessShareLock);
+	cstscan = systable_beginscan(cstrel, CStoreCstRelidCstnameIndexId, true,
+								 NULL, 1, &skey);
+
+	while (HeapTupleIsValid(htup = systable_getnext(cstscan)))
+	{
+		Form_pg_cstore cstore = (Form_pg_cstore) GETSTRUCT(htup);
+
+		/*
+		 * TODO Ignore column stores that are being dropped (ALTER TABLE
+		 *      drop COLUMN STORE) here.
+		 */
+
+		/* Add index's OID to result list in the proper order */
+		result = insert_ordered_oid(result, cstore->cststoreid);
+	}
+
+	systable_endscan(cstscan);
+
+	heap_close(cstrel, AccessShareLock);
+
+	/* Now save a copy of the completed list in the relcache entry. */
+	oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+	oldlist = relation->rd_cstlist;
+	relation->rd_cstlist = list_copy(result);
+	relation->rd_cstvalid = true;
+	MemoryContextSwitchTo(oldcxt);
+
+	/* Don't leak the old list, if there is one */
+	list_free(oldlist);
+
+	return result;
+}
+
+/*
  * insert_ordered_oid
  *		Insert a new Oid into a sorted list of Oids, preserving ordering
  *
@@ -4871,6 +4994,8 @@ load_relcache_init_file(bool shared)
 		rel->rd_createSubid = InvalidSubTransactionId;
 		rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
 		rel->rd_amcache = NULL;
+		rel->rd_cstvalid = 0;
+		rel->rd_cstlist = NIL;
 		MemSet(&rel->pgstat_info, 0, sizeof(rel->pgstat_info));
 
 		/*
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 27b38b1..53e0a0b 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -249,6 +249,7 @@ typedef enum NodeTag
 	T_PlaceHolderInfo,
 	T_MinMaxAggInfo,
 	T_PlannerParamItem,
+	T_ColumnStoreOptInfo,
 
 	/*
 	 * TAGS FOR MEMORY NODES (memnodes.h)
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 5dc23d9..18a8891 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -468,6 +468,7 @@ typedef struct RelOptInfo
 	Relids		lateral_relids; /* minimum parameterization of rel */
 	Relids		lateral_referencers;	/* rels that reference me laterally */
 	List	   *indexlist;		/* list of IndexOptInfo */
+	List	   *cstlist;		/* list of ColumnStoreOptInfo */
 	BlockNumber pages;			/* size estimates derived from pg_class */
 	double		tuples;
 	double		allvisfrac;
@@ -562,6 +563,35 @@ typedef struct IndexOptInfo
 	bool		amhasgetbitmap; /* does AM have amgetbitmap interface? */
 } IndexOptInfo;
 
+/*
+ * ColumnStoreOptInfo
+ *		Per-colstore information for planning/optimization
+ *
+ *		cstkeys[] has ncolumns entries, we don't allow expression in colstores
+ *
+ * TODO maybe should have a bunch of capability flags like IndexOptInfo
+ */
+typedef struct ColumnStoreOptInfo
+{
+	NodeTag		type;
+
+	Oid			colstoreoid;	/* OID of the column store relation */
+	Oid			reltablespace;	/* tablespace of colstore (not table) */
+	RelOptInfo *rel;			/* back-link to colstore's table */
+
+	/* colstore-size statistics (from pg_class and elsewhere) */
+	BlockNumber pages;			/* number of disk pages in colstore */
+	double		tuples;			/* number of index tuples in colstore */
+
+	/* colstore descriptor information (from pg_cstore) */
+	int			ncolumns;		/* number of columns in index */
+	int		   *cstkeys;		/* column numbers of colstore's keys */
+	Oid			cstam;			/* OID of the access method (in pg_cstore_am) */
+
+	RegProcedure cstcostestimate;	/* OID of the colstore method's cost fcn */
+
+	List	   *csttlist;		/* targetlist representing colstore's columns */
+} ColumnStoreOptInfo;
 
 /*
  * EquivalenceClasses
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 8a55a09..99c7ef6 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -17,6 +17,7 @@
 #include "access/tupdesc.h"
 #include "catalog/pg_am.h"
 #include "catalog/pg_class.h"
+#include "catalog/pg_cstore.h"
 #include "catalog/pg_index.h"
 #include "fmgr.h"
 #include "nodes/bitmapset.h"
@@ -79,6 +80,7 @@ typedef struct RelationData
 	bool		rd_isvalid;		/* relcache entry is valid */
 	char		rd_indexvalid;	/* state of rd_indexlist: 0 = not valid, 1 =
 								 * valid, 2 = temporarily forced */
+	bool		rd_cstvalid;	/* rd_cstlist is valid */
 
 	/*
 	 * rd_createSubid is the ID of the highest subtransaction the rel has
@@ -117,6 +119,9 @@ typedef struct RelationData
 	Bitmapset  *rd_keyattr;		/* cols that can be ref'd by foreign keys */
 	Bitmapset  *rd_idattr;		/* included in replica identity index */
 
+	/* data managed by RelationGetColStoreList: */
+	List	   *rd_cstlist;	/* list of OIDs of colstores on relation */
+
 	/*
 	 * rd_options is set whenever rd_rel is loaded into the relcache entry.
 	 * Note that you can NOT look into rd_rel for this data.  NULL means "use
@@ -171,6 +176,11 @@ typedef struct RelationData
 	/* use "struct" here to avoid needing to include fdwapi.h: */
 	struct FdwRoutine *rd_fdwroutine;	/* cached function pointers, or NULL */
 
+	/* These are non-NULL only for a column store relation: */
+	Form_pg_cstore rd_cstore;		/* pg_cstore tuple describing this colstore */
+	/* use "struct" here to avoid needing to include htup.h: */
+	struct HeapTupleData *rd_cstoretuple;	/* all of pg_cstore tuple */
+
 	/*
 	 * Hack for CLUSTER, rewriting ALTER TABLE, etc: when writing a new
 	 * version of a table, we need to make any toast pointers inserted into it
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 6953281..0a8d52c 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -38,6 +38,7 @@ extern void RelationClose(Relation relation);
  * Routines to compute/retrieve additional cached information
  */
 extern List *RelationGetIndexList(Relation relation);
+extern List *RelationGetColStoreList(Relation relation);
 extern Oid	RelationGetOidIndex(Relation relation);
 extern Oid	RelationGetReplicaIndex(Relation relation);
 extern List *RelationGetIndexExpressions(Relation relation);
-- 
2.1.4

