From 79e51be780dd733c6789e519176b26ea79282ea8 Mon Sep 17 00:00:00 2001
From: Arseny Sher <sher-ars@ispras.ru>
Date: Fri, 10 Mar 2017 17:26:26 +0300
Subject: [PATCH 3/8] Base for reversed executor.

Framework for implementing reversed executor. Substitutes ExecutePlan call
with RunNode, which invokes pushTuple on leaf nodes in proper order.

See README for more details.
---
 src/backend/executor/README         |  45 +++++++
 src/backend/executor/execMain.c     | 255 +++++++++++++++++-------------------
 src/backend/executor/execProcnode.c |  53 +++++++-
 src/include/executor/executor.h     |   3 +
 src/include/nodes/execnodes.h       |  11 ++
 5 files changed, 230 insertions(+), 137 deletions(-)

diff --git a/src/backend/executor/README b/src/backend/executor/README
index f1d1e4c76c..86f6e99e86 100644
--- a/src/backend/executor/README
+++ b/src/backend/executor/README
@@ -3,6 +3,51 @@ src/backend/executor/README
 The Postgres Executor
 =====================
 
+This is an attempt to implement proof concept of executor with push-based
+achitecture like in [1]. We will call it 'reversed' executor. Right now we will
+not support both reversed and original executor, because it would involve a lot
+of either code copy-pasting (or time to avoid it), while our current goal is
+just to implement working proof of concept to estimate the benefits.
+
+Since this is a huge change, we need to outline the general strategy, things
+we will start with and how we will deal with the old code, remembering that we
+will reuse a great deal of it.
+
+Key points:
+* ExecProcNode is now a stub. All nodes code (ExecSomeNode, etc) is
+  unreachable. However, we leave it to avoid 19k lines removal commit and to
+  produce more useful diffs later; a lot of code will be reused.
+* Base for implementing push model, common for all nodes, is in execMain.c and
+  execProcNode.c. We will substitute execProcNode with pushTuple, it's interface
+  described in the comment to the definition, and this is the only change to the
+  node's interface. We make necessary changes to execMain.c, namely to
+  ExecutorRun, to run nodes in proper order from the below.
+* Then we are ready to implement the nodes one by one.
+
+At first,
+* parallel execution will not be supported.
+* subplans will not be supported.
+* we will not support ExecReScan too for now.
+* only CMD_SELECT operation will be supported.
+* only forward direction will be supported.
+* we will not support set returning functions either.
+
+In general, we try to treat the old code as follows:
+* As said above, leave it even if it dead for now.
+* If is not dead, but not yet updated for reversed executor, remove it.
+  Example is contents of ExecInitNode.
+* Sometimes we need to make minimal changes to some existing function, but these
+  changes will make it incompatible with existing code which is not yet
+  reworked.  In that case, to avoid deleting a lot of code we will just
+  copypaste it until some more generic solution will be provided. Example is
+  heapgettup_pagemode and it's 'reversed' analogue added for seqscan.
+
+
+[1] Efficiently Compiling Efficient Query Plans for Modern Hardware,
+    http://www.vldb.org/pvldb/vol4/p539-neumann.pdf
+
+Below goes the original README text.
+
 The executor processes a tree of "plan nodes".  The plan tree is essentially
 a demand-pull pipeline of tuple processing operations.  Each node, when
 called, will produce the next tuple in its output sequence, or NULL if no
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index f629f0098f..bb25a4137c 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -63,6 +63,7 @@
 #include "utils/ruleutils.h"
 #include "utils/snapmgr.h"
 #include "utils/tqual.h"
+#include "executor/executor.h"
 
 
 /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
@@ -79,13 +80,7 @@ static void InitPlan(QueryDesc *queryDesc, int eflags);
 static void CheckValidRowMarkRel(Relation rel, RowMarkType markType);
 static void ExecPostprocessPlan(EState *estate);
 static void ExecEndPlan(PlanState *planstate, EState *estate);
-static void ExecutePlan(EState *estate, PlanState *planstate,
-			bool use_parallel_mode,
-			CmdType operation,
-			bool sendTuples,
-			uint64 numberTuples,
-			ScanDirection direction,
-			DestReceiver *dest);
+static void RunNode(PlanState *planstate);
 static bool ExecCheckRTEPerms(RangeTblEntry *rte);
 static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid,
 						  Bitmapset *modifiedCols,
@@ -341,18 +336,24 @@ standard_ExecutorRun(QueryDesc *queryDesc,
 	if (sendTuples)
 		(*dest->rStartup) (dest, operation, queryDesc->tupDesc);
 
+	/* set up state needed for sending tuples to the dest */
+	estate->es_current_tuple_count = 0;
+	estate->es_sendTuples = sendTuples;
+	estate->es_numberTuplesRequested = count;
+	estate->es_operation = operation;
+	estate->es_dest = dest;
+
+	/*
+	 * Set the direction.
+	 */
+	estate->es_direction = direction;
+
 	/*
 	 * run plan
 	 */
 	if (!ScanDirectionIsNoMovement(direction))
-		ExecutePlan(estate,
-					queryDesc->planstate,
-					queryDesc->plannedstmt->parallelModeNeeded,
-					operation,
-					sendTuples,
-					count,
-					direction,
-					dest);
+		/* Run each leaf in right order	 */
+		RunNode(queryDesc->planstate);
 
 	/*
 	 * shutdown tuple receiver, if we started it
@@ -1533,126 +1534,6 @@ ExecEndPlan(PlanState *planstate, EState *estate)
 	}
 }
 
-/* ----------------------------------------------------------------
- *		ExecutePlan
- *
- *		Processes the query plan until we have retrieved 'numberTuples' tuples,
- *		moving in the specified direction.
- *
- *		Runs to completion if numberTuples is 0
- *
- * Note: the ctid attribute is a 'junk' attribute that is removed before the
- * user can see it
- * ----------------------------------------------------------------
- */
-static void
-ExecutePlan(EState *estate,
-			PlanState *planstate,
-			bool use_parallel_mode,
-			CmdType operation,
-			bool sendTuples,
-			uint64 numberTuples,
-			ScanDirection direction,
-			DestReceiver *dest)
-{
-	TupleTableSlot *slot;
-	uint64		current_tuple_count;
-
-	/*
-	 * initialize local variables
-	 */
-	current_tuple_count = 0;
-
-	/*
-	 * Set the direction.
-	 */
-	estate->es_direction = direction;
-
-	/*
-	 * If a tuple count was supplied, we must force the plan to run without
-	 * parallelism, because we might exit early.  Also disable parallelism
-	 * when writing into a relation, because no database changes are allowed
-	 * in parallel mode.
-	 */
-	if (numberTuples || dest->mydest == DestIntoRel)
-		use_parallel_mode = false;
-
-	if (use_parallel_mode)
-		EnterParallelMode();
-
-	/*
-	 * Loop until we've processed the proper number of tuples from the plan.
-	 */
-	for (;;)
-	{
-		/* Reset the per-output-tuple exprcontext */
-		ResetPerTupleExprContext(estate);
-
-		/*
-		 * Execute the plan and obtain a tuple
-		 */
-		slot = ExecProcNode(planstate);
-
-		/*
-		 * if the tuple is null, then we assume there is nothing more to
-		 * process so we just end the loop...
-		 */
-		if (TupIsNull(slot))
-		{
-			/* Allow nodes to release or shut down resources. */
-			(void) ExecShutdownNode(planstate);
-			break;
-		}
-
-		/*
-		 * If we have a junk filter, then project a new tuple with the junk
-		 * removed.
-		 *
-		 * Store this new "clean" tuple in the junkfilter's resultSlot.
-		 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
-		 * because that tuple slot has the wrong descriptor.)
-		 */
-		if (estate->es_junkFilter != NULL)
-			slot = ExecFilterJunk(estate->es_junkFilter, slot);
-
-		/*
-		 * If we are supposed to send the tuple somewhere, do so. (In
-		 * practice, this is probably always the case at this point.)
-		 */
-		if (sendTuples)
-		{
-			/*
-			 * If we are not able to send the tuple, we assume the destination
-			 * has closed and no more tuples can be sent. If that's the case,
-			 * end the loop.
-			 */
-			if (!((*dest->receiveSlot) (slot, dest)))
-				break;
-		}
-
-		/*
-		 * Count tuples processed, if this is a SELECT.  (For other operation
-		 * types, the ModifyTable plan node must count the appropriate
-		 * events.)
-		 */
-		if (operation == CMD_SELECT)
-			(estate->es_processed)++;
-
-		/*
-		 * check our tuple count.. if we've processed the proper number then
-		 * quit, else loop again and process more tuples.  Zero numberTuples
-		 * means no limit.
-		 */
-		current_tuple_count++;
-		if (numberTuples && numberTuples == current_tuple_count)
-			break;
-	}
-
-	if (use_parallel_mode)
-		ExitParallelMode();
-}
-
-
 /*
  * ExecRelCheck --- check that tuple meets constraints for result relation
  *
@@ -3291,3 +3172,107 @@ ExecBuildSlotPartitionKeyDescription(Relation rel,
 
 	return buf.data;
 }
+
+/*
+ * This function pushes the ready tuple to it's destination. It should
+ * be called by top-level PlanState.
+ * For now, I added the state needed for this to estate, specifically
+ * current_tuple_count, sendTuples, numberTuplesRequested (old numberTuples),
+ * cmdType, dest.
+ *
+ * slot is the tuple to push
+ * planstate is top-level node
+ * returns true, if we are ready to accept more tuples, false otherwise
+ */
+bool
+SendReadyTuple(TupleTableSlot *slot, PlanState *planstate)
+{
+	EState *estate;
+	bool sendTuples;
+	CmdType operation;
+	DestReceiver *dest;
+
+	estate = planstate->state;
+	sendTuples = estate->es_sendTuples;
+	operation = estate->es_operation;
+	dest = estate->es_dest;
+
+	if (TupIsNull(slot))
+	{
+		/* Allow nodes to release or shut down resources. */
+		(void) ExecShutdownNode(planstate);
+		return false;
+	}
+
+	/*
+	 * If we have a junk filter, then project a new tuple with the junk
+	 * removed.
+	 *
+	 * Store this new "clean" tuple in the junkfilter's resultSlot.
+	 * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
+	 * because that tuple slot has the wrong descriptor.)
+	 */
+	if (estate->es_junkFilter != NULL)
+		slot = ExecFilterJunk(estate->es_junkFilter, slot);
+
+	/*
+	 * If we are supposed to send the tuple somewhere, do so. (In
+	 * practice, this is probably always the case at this point.)
+	 */
+	if (sendTuples)
+	{
+		/*
+		 * If we are not able to send the tuple, we assume the destination
+		 * has closed and no more tuples can be sent.
+		 */
+		if (!((*dest->receiveSlot) (slot, dest)))
+			return false;
+	}
+
+	/*
+	 * Count tuples processed, if this is a SELECT.  (For other operation
+	 * types, the ModifyTable plan node must count the appropriate
+	 * events.)
+	 */
+	if (operation == CMD_SELECT)
+		(estate->es_processed)++;
+
+	/*
+	 * check our tuple count.. if we've processed the proper number then
+	 * quit, else process more tuples.  Zero numberTuplesRequested
+	 * means no limit.
+	 */
+	estate->es_current_tuple_count++;
+	if (estate->es_numberTuplesRequested &&
+		estate->es_numberTuplesRequested == estate->es_current_tuple_count)
+		return false;
+
+	ResetPerTupleExprContext(estate);
+	return true;
+}
+
+/*
+ * When pushing, we have to call pushTuple on each leaf of the tree in correct
+ * order: first inner sides, then outer. This function does exactly that.
+ */
+void
+RunNode(PlanState *planstate)
+{
+	Assert(planstate != NULL);
+
+	if (innerPlanState(planstate) != NULL)
+	{
+		RunNode(innerPlanState(planstate));
+		/* I assume that if inner node exists, outer exists too */
+		RunNode(outerPlanState(planstate));
+		return;
+	}
+	if (outerPlanState(planstate) != NULL)
+	{
+		RunNode(outerPlanState(planstate));
+		return;
+	}
+
+	/* node has no childs, it is a leaf */
+	pushTuple(NULL, planstate, NULL);
+}
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 649d1e58f6..a95cfe5430 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -155,7 +155,6 @@ ExecInitNode(Plan *node, EState *estate, int eflags, PlanState *parent)
 			result = NULL;		/* keep compiler quiet */
 			break;
 	}
-	return NULL;
 
 	/* Set up instrumentation for this node if requested */
 	if (estate->es_instrument)
@@ -164,7 +163,6 @@ ExecInitNode(Plan *node, EState *estate, int eflags, PlanState *parent)
 	return result;
 }
 
-
 /*
  * Unsupported, left to avoid deleting 19k lines of existing code
  */
@@ -175,6 +173,57 @@ ExecProcNode(PlanState *node)
 	return NULL;
 }
 
+/*
+ * Instead of old ExecProcNode, here we will have function pushTuple
+ * pushing one tuple.
+ * 'tuple' is a tuple to push
+ * 'node' is a receiver of tuple
+ * 'pusher' is a sender of a tuple, it's parent is 'node'. We need it to
+ * distinguish inner and outer pushes.
+ * Returns true if node is still accepting tuples, false if not.
+ * ReScans are not supported yet.
+ * In general, if a tuple (even NULL) was pushed into a node which returned
+ * 'false' before, the behaviour is undefined, i.e. it is not allowed;
+ * however, we will try to catch such situations with asserts.
+ * If lower node have sent NULL tuple to upper node, we for now will not care
+ * to return it meaningful bool result and sent just false by convention.
+ */
+bool
+pushTuple(TupleTableSlot *slot, PlanState *node, PlanState *pusher)
+{
+	bool push_from_outer;
+
+	CHECK_FOR_INTERRUPTS();
+
+	/* If the receiver is NULL, then pusher is top-level node, so we need
+	 * to send the tuple to the dest
+	 */
+	if (!node)
+	{
+		return SendReadyTuple(slot, pusher);
+	}
+
+	/*
+	 * If pusher is NULL, then node is a bottom node, another special case:
+	 * bottom nodes obviously don't need neither tuple nor pusher
+	 */
+	if (!pusher)
+	{
+		switch (nodeTag(node))
+		{
+			default:
+				elog(ERROR, "bottom node type not supported: %d",
+					 (int) nodeTag(node));
+				return false;
+		}
+	}
+
+	/* does push come from the outer side? */
+	push_from_outer = outerPlanState(node) == pusher;
+
+	elog(ERROR, "node type not supported: %d", (int) nodeTag(node));
+}
+
 /* ----------------------------------------------------------------
  * Unsupported too; we don't need it in push model
  * ----------------------------------------------------------------
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 716362970f..eb4e27ce21 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -179,6 +179,7 @@ extern void ExecutorRun(QueryDesc *queryDesc,
 			ScanDirection direction, uint64 count);
 extern void standard_ExecutorRun(QueryDesc *queryDesc,
 					 ScanDirection direction, uint64 count);
+extern bool SendReadyTuple(TupleTableSlot *slot, PlanState *planstate);
 extern void ExecutorFinish(QueryDesc *queryDesc);
 extern void standard_ExecutorFinish(QueryDesc *queryDesc);
 extern void ExecutorEnd(QueryDesc *queryDesc);
@@ -240,6 +241,8 @@ extern TupleTableSlot *ExecProcNode(PlanState *node);
 extern Node *MultiExecProcNode(PlanState *node);
 extern void ExecEndNode(PlanState *node);
 extern bool ExecShutdownNode(PlanState *node);
+extern bool pushTuple(TupleTableSlot *slot, PlanState *node,
+					  PlanState *pusher);
 
 /*
  * prototypes from functions in execQual.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 738f098b00..da7fd9c7ac 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -28,6 +28,7 @@
 #include "utils/tuplesort.h"
 #include "nodes/tidbitmap.h"
 #include "storage/condition_variable.h"
+#include "tcop/dest.h" /* for DestReceiver type in EState */
 
 
 /* ----------------
@@ -416,6 +417,16 @@ typedef struct EState
 	List	   *es_auxmodifytables;		/* List of secondary ModifyTableStates */
 
 	/*
+	 * State needed to push tuples to dest in push model, technically it is
+	 * local variables from old ExecutePlan
+	 */
+	uint64		es_current_tuple_count;
+	bool		es_sendTuples;
+	uint64		es_numberTuplesRequested;
+	CmdType		es_operation;
+	DestReceiver *es_dest;
+
+	/*
 	 * this ExprContext is for per-output-tuple operations, such as constraint
 	 * checks and index-value computations.  It will be reset for each output
 	 * tuple.  Note that it will be created only if needed.
-- 
2.11.0

