From 35a1f3d8f5cc214b9b3a8de66ed0ef390b30efc9 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas@pgaddict.com>
Date: Tue, 14 Jul 2015 21:35:56 +0200
Subject: [PATCH 18/24] initial planning of ColumnStoreMaterialize nodes

Note: This isn't really intended to be the definitive form of this code.
We envision modifying earlier stages of planning so that column stores
are added as range table entries, and have Path structures created for
them.  Therefore, don't look at this code too much.

- modifies create_seqscan to create seqscan + materialize node for
  each column store (trivial and naive, needs a number of fixes)

- defines trivial costing (reuse costing from subpath)

- creates plan nodes from the path

- allows EXPLAIN on the plan

- does not currently implement executor node (points to Materialize)

For a table with 3 column stores, the EXPLAIN looks like this:

test=# explain select * from parent_table ;
                                    QUERY PLAN
-----------------------------------------------------------------------------------
 Column Store Materialize  (cost=0.00..28.50 rows=1850 width=16)
   ->  Column Store Materialize  (cost=0.00..28.50 rows=1850 width=16)
         ->  Column Store Materialize  (cost=0.00..28.50 rows=1850 width=16)
               ->  Seq Scan on parent_table  (cost=0.00..28.50 rows=1850 width=16)
(4 rows)

Implementation notes:

- This doesn't handle anything other than SeqScan (trying to access a
  table via indexscans or other methods is going to fail).
---
 src/backend/commands/explain.c          |  3 +++
 src/backend/executor/execProcnode.c     |  5 ++++
 src/backend/optimizer/path/costsize.c   | 27 +++++++++++++++++++
 src/backend/optimizer/plan/createplan.c | 48 +++++++++++++++++++++++++++++++++
 src/backend/optimizer/plan/setrefs.c    |  1 +
 src/backend/optimizer/util/pathnode.c   | 30 +++++++++++++++++++++
 src/include/nodes/nodes.h               |  1 +
 src/include/nodes/relation.h            | 11 ++++++++
 src/include/optimizer/cost.h            |  2 ++
 9 files changed, 128 insertions(+)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 5d06fa4..6192091 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1045,6 +1045,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		case T_Hash:
 			pname = sname = "Hash";
 			break;
+		case T_ColumnStoreMaterial:
+			pname = sname = "Column Store Materialize";
+			break;
 		default:
 			pname = sname = "???";
 			break;
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 03c2feb..5d68b3e 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -282,6 +282,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
 													estate, eflags);
 			break;
 
+		case T_ColumnStoreMaterial:	/* FIXME this is wrong (no exec implementation yet!!!) */
+			result = (PlanState *) ExecInitMaterial((Material *) node,
+													estate, eflags);
+			break;
+
 		case T_Sort:
 			result = (PlanState *) ExecInitSort((Sort *) node,
 												estate, eflags);
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 7069f60..6624811 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -290,6 +290,33 @@ cost_samplescan(Path *path, PlannerInfo *root,
 }
 
 /*
+ * cost_colmaterial
+ *	  Determines and returns the cost of materializing a column store.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ * 'info' is the column store being materialized
+ */
+void
+cost_colstore_material(ColumnStoreMaterialPath *path, PlannerInfo *root,
+			 RelOptInfo *baserel, ParamPathInfo *param_info,
+			 ColumnStoreOptInfo *info)
+{
+	/* Should only be applied to base relations */
+	Assert(baserel->relid > 0);
+	Assert(baserel->rtekind == RTE_RELATION);
+
+	/*
+	 * FIXME Very naive costing - just reuse cost from the subpath. Ultimately
+	 *       we want something that (at least) considers sizes of the column
+	 *       stores, and ideally some costing for the 'join'.
+	 */
+	path->path.startup_cost = path->subpath->startup_cost;
+	path->path.total_cost = path->subpath->total_cost;
+	path->path.rows = path->subpath->rows;
+}
+
+/*
  * cost_index
  *	  Determines and returns the cost of scanning a relation using an index.
  *
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 404c6f5..28b96ca 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -55,6 +55,8 @@ static Plan *create_append_plan(PlannerInfo *root, AppendPath *best_path);
 static Plan *create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path);
 static Result *create_result_plan(PlannerInfo *root, ResultPath *best_path);
 static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path);
+static ColumnStoreMaterial *create_colstore_material_plan(PlannerInfo *root,
+											ColumnStoreMaterialPath *best_path);
 static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path);
 static SeqScan *create_seqscan_plan(PlannerInfo *root, Path *best_path,
 					List *tlist, List *scan_clauses);
@@ -176,6 +178,7 @@ static EquivalenceMember *find_ec_member_for_tle(EquivalenceClass *ec,
 					   TargetEntry *tle,
 					   Relids relids);
 static Material *make_material(Plan *lefttree);
+static ColumnStoreMaterial *make_colstore_material(Plan *lefttree);
 
 
 /*
@@ -269,6 +272,10 @@ create_plan_recurse(PlannerInfo *root, Path *best_path)
 			plan = (Plan *) create_material_plan(root,
 												 (MaterialPath *) best_path);
 			break;
+		case T_ColumnStoreMaterial:
+			plan = (Plan *) create_colstore_material_plan(root,
+										(ColumnStoreMaterialPath *) best_path);
+			break;
 		case T_Unique:
 			plan = create_unique_plan(root,
 									  (UniquePath *) best_path);
@@ -899,6 +906,32 @@ create_material_plan(PlannerInfo *root, MaterialPath *best_path)
 }
 
 /*
+ * create_colstore_material_plan
+ *	  Create a ColumnStoreMaterial plan for 'best_path' and (recursively) plans
+ *	  for its subpaths.
+ *
+ *	  Returns a Plan node.
+ */
+static ColumnStoreMaterial *
+create_colstore_material_plan(PlannerInfo *root, ColumnStoreMaterialPath *best_path)
+{
+	ColumnStoreMaterial   *plan;
+	Plan				  *subplan;
+
+	subplan = create_plan_recurse(root, best_path->subpath);
+
+	/* We don't want any excess columns in the materialized tuples */
+	disuse_physical_tlist(root, subplan, best_path->subpath);
+
+	plan = make_colstore_material(subplan);
+
+	copy_path_costsize(&plan->plan, (Path *) best_path);
+
+	return plan;
+}
+
+
+/*
  * create_unique_plan
  *	  Create a Unique plan for 'best_path' and (recursively) plans
  *	  for its subpaths.
@@ -4469,6 +4502,21 @@ make_material(Plan *lefttree)
 	return node;
 }
 
+static ColumnStoreMaterial *
+make_colstore_material(Plan *lefttree)
+{
+	ColumnStoreMaterial   *node = makeNode(ColumnStoreMaterial);
+	Plan	   *plan = &node->plan;
+
+	/* cost should be inserted by caller */
+	plan->targetlist = lefttree->targetlist;
+	plan->qual = NIL;
+	plan->lefttree = lefttree;
+	plan->righttree = NULL;
+
+	return node;
+}
+
 /*
  * materialize_finished_plan: stick a Material node atop a completed plan
  *
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index ee8710d..e9e679e 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -599,6 +599,7 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 
 		case T_Hash:
 		case T_Material:
+		case T_ColumnStoreMaterial:
 		case T_Sort:
 		case T_Unique:
 		case T_SetOp:
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 935bc2b..14602e8 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -698,6 +698,7 @@ add_path_precheck(RelOptInfo *parent_rel,
 Path *
 create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
 {
+	ListCell   *cell;
 	Path	   *pathnode = makeNode(Path);
 
 	pathnode->pathtype = T_SeqScan;
@@ -708,6 +709,35 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
 
 	cost_seqscan(pathnode, root, rel, pathnode->param_info);
 
+	/*
+	 * Add materialization node for each column store.
+	 *
+	 * FIXME At the moment we add materialization for each column store - this
+	 *       needs to only choose the required column stores.
+	 *
+	 * FIXME This probably needs to fix parametrization somehow, because the
+	 *       lower nodes may not contain the required columns.
+	 *
+	 * FIXME This also has to fix the tuple width, because right now we assume
+	 *       "full" width in all steps (which is nonsense). That's not a problem
+	 *       now, but once we start moving the materialization steps around
+	 *       (to get late materialization), this might be an issue.
+	 */
+	foreach (cell, rel->cstlist)
+	{
+		ColumnStoreOptInfo *info = (ColumnStoreOptInfo *)lfirst(cell);
+		ColumnStoreMaterialPath  *cstnode = makeNode(ColumnStoreMaterialPath);
+
+		cstnode->path.pathtype = T_ColumnStoreMaterial;
+		cstnode->path.parent = rel;
+		cstnode->colstore = info;
+		cstnode->subpath = pathnode;
+
+		cost_colstore_material(cstnode, root, rel, pathnode->param_info, info);
+
+		pathnode = (Path*)cstnode;
+	}
+
 	return pathnode;
 }
 
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index f04dbcf..bb664d4 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -253,6 +253,7 @@ typedef enum NodeTag
 	T_MinMaxAggInfo,
 	T_PlannerParamItem,
 	T_ColumnStoreOptInfo,
+	T_ColumnStoreMaterialPath,
 
 	/*
 	 * TAGS FOR MEMORY NODES (memnodes.h)
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 18a8891..a8b2a28 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1042,6 +1042,17 @@ typedef struct MaterialPath
 } MaterialPath;
 
 /*
+ * ColumnStoreMaterialPath
+ * ... FIXME comments
+ */
+typedef struct ColumnStoreMaterialPath
+{
+	Path		path;		/* this path node */
+	Path	   *subpath;	/* child path node (seqscan ...) */
+	ColumnStoreOptInfo *colstore;	/* column store info */
+} ColumnStoreMaterialPath;
+
+/*
  * UniquePath represents elimination of distinct rows from the output of
  * its subpath.
  *
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index dd43e45..01e3cef 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -70,6 +70,8 @@ extern void cost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 			 ParamPathInfo *param_info);
 extern void cost_samplescan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 				ParamPathInfo *param_info);
+extern void cost_colstore_material(ColumnStoreMaterialPath *path, PlannerInfo *root,
+			 RelOptInfo *baserel, ParamPathInfo *param_info, ColumnStoreOptInfo *info);
 extern void cost_index(IndexPath *path, PlannerInfo *root,
 		   double loop_count);
 extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
-- 
2.1.4

