From a30fa52fb58b518faa5d7c87c5ab6a37236d5c73 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Tue, 3 Oct 2017 23:45:44 -0700
Subject: [PATCH 2/4] More efficient AggState->pertrans iteration.

Turns out AggStatePerTrans is so large that multiplications are needed
to access elements of AggState->pertrans on x86.

Author: Andres Freund
---
 src/backend/executor/nodeAgg.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index a939ae99fa8..0408142764c 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -971,13 +971,14 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup *sort_pergroups, AggStat
 	int			numHashes = aggstate->num_hashes;
 	int			numTrans = aggstate->numtrans;
 	TupleTableSlot *combinedslot;
+	AggStatePerTrans pertrans;
 
 	/* compute required inputs for all aggregates */
 	combinedslot = ExecProject(aggstate->combinedproj);
 
-	for (transno = 0; transno < numTrans; transno++)
+	for (transno = 0, pertrans = &aggstate->pertrans[0];
+		 transno < numTrans; transno++, pertrans++)
 	{
-		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
 		int			numTransInputs = pertrans->numTransInputs;
 		int			inputoff = pertrans->inputoff;
 		TupleTableSlot *slot;
@@ -1125,6 +1126,7 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 	int			transno;
 	int			numTrans = aggstate->numtrans;
 	TupleTableSlot *slot;
+	AggStatePerTrans pertrans;
 
 	/* combine not supported with grouping sets */
 	Assert(aggstate->phase->numsets <= 1);
@@ -1132,9 +1134,9 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
 	/* compute input for all aggregates */
 	slot = ExecProject(aggstate->combinedproj);
 
-	for (transno = 0; transno < numTrans; transno++)
+	for (transno = 0, pertrans = &aggstate->pertrans[0];
+		 transno < numTrans; transno++, pertrans++)
 	{
-		AggStatePerTrans pertrans = &aggstate->pertrans[transno];
 		AggStatePerGroup pergroupstate = &pergroup[transno];
 		FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 		int			inputoff = pertrans->inputoff;
@@ -2697,6 +2699,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	AggState   *aggstate;
 	AggStatePerAgg peraggs;
 	AggStatePerTrans pertransstates;
+	AggStatePerTrans pertrans;
 	Plan	   *outerPlan;
 	ExprContext *econtext;
 	int			numaggs,
@@ -3399,10 +3402,9 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 */
 	combined_inputeval = NIL;
 	column_offset = 0;
-	for (transno = 0; transno < aggstate->numtrans; transno++)
+	for (transno = 0, pertrans = &pertransstates[0];
+		 transno < aggstate->numtrans; transno++, pertrans++)
 	{
-		AggStatePerTrans pertrans = &pertransstates[transno];
-
 		/*
 		 * Mark this per-trans state with its starting column in the combined
 		 * slot.
@@ -3940,6 +3942,7 @@ ExecEndAgg(AggState *node)
 	int			transno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
+	AggStatePerTrans pertrans;
 
 	/* Make sure we have closed any open tuplesorts */
 
@@ -3948,10 +3951,9 @@ ExecEndAgg(AggState *node)
 	if (node->sort_out)
 		tuplesort_end(node->sort_out);
 
-	for (transno = 0; transno < node->numtrans; transno++)
+	for (transno = 0, pertrans = &node->pertrans[0];
+		 transno < node->numtrans; transno++, pertrans++)
 	{
-		AggStatePerTrans pertrans = &node->pertrans[transno];
-
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
 			if (pertrans->sortstates[setno])
@@ -3988,6 +3990,7 @@ ExecReScanAgg(AggState *node)
 	int			transno;
 	int			numGroupingSets = Max(node->maxsets, 1);
 	int			setno;
+	AggStatePerTrans pertrans;
 
 	node->agg_done = false;
 
@@ -4019,12 +4022,11 @@ ExecReScanAgg(AggState *node)
 	}
 
 	/* Make sure we have closed any open tuplesorts */
-	for (transno = 0; transno < node->numtrans; transno++)
+	for (transno = 0, pertrans = &node->pertrans[0];
+		 transno < node->numtrans; transno++, pertrans++)
 	{
 		for (setno = 0; setno < numGroupingSets; setno++)
 		{
-			AggStatePerTrans pertrans = &node->pertrans[transno];
-
 			if (pertrans->sortstates[setno])
 			{
 				tuplesort_end(pertrans->sortstates[setno]);
-- 
2.14.1.536.g6867272d5b.dirty

