diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index f509c8e8f5..fe4a520305 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1291,6 +1291,68 @@ project_aggregates(AggState *aggstate)
 	return NULL;
 }
 
+static bool
+find_aggregated_cols_walker(Node *node, Bitmapset **colnos)
+{
+	if (node == NULL)
+		return false;
+
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		*colnos = bms_add_member(*colnos, var->varattno);
+
+		return false;
+	}
+	return expression_tree_walker(node, find_aggregated_cols_walker,
+								  (void *) colnos);
+}
+
+/*
+ * find_aggregated_cols
+ *	  Construct a bitmapset of the column numbers of aggregated Vars
+ *	  appearing in our targetlist and qual (HAVING clause)
+ */
+static Bitmapset *
+find_aggregated_cols(AggState *aggstate)
+{
+	Agg		   *node = (Agg *) aggstate->ss.ps.plan;
+	Bitmapset  *colnos = NULL;
+	ListCell   *temp;
+
+	/*
+	 * We only want the columns used by aggregations in the targetlist or qual
+	 */
+	if (node->plan.targetlist != NULL)
+	{
+		foreach(temp, (List *) node->plan.targetlist)
+		{
+			if (IsA(lfirst(temp), TargetEntry))
+			{
+				Node *node = (Node *)((TargetEntry *)lfirst(temp))->expr;
+				if (IsA(node, Aggref) || IsA(node, GroupingFunc))
+					find_aggregated_cols_walker(node, &colnos);
+			}
+		}
+	}
+
+	if (node->plan.qual != NULL)
+	{
+		foreach(temp, (List *) node->plan.qual)
+		{
+			if (IsA(lfirst(temp), TargetEntry))
+			{
+				Node *node = (Node *)((TargetEntry *)lfirst(temp))->expr;
+				if (IsA(node, Aggref) || IsA(node, GroupingFunc))
+					find_aggregated_cols_walker(node, &colnos);
+			}
+		}
+	}
+
+	return colnos;
+}
+
 /*
  * find_unaggregated_cols
  *	  Construct a bitmapset of the column numbers of un-aggregated Vars
@@ -1520,6 +1582,23 @@ find_hash_columns(AggState *aggstate)
 		for (i = 0; i < perhash->numCols; i++)
 			colnos = bms_add_member(colnos, grpColIdx[i]);
 
+		/*
+		 * Find the columns used by aggregations
+		 *
+		 * This is shared by the entire aggregation.
+		 */
+		if (aggstate->aggregated_columns == NULL)
+			aggstate->aggregated_columns = find_aggregated_cols(aggstate);
+
+		/*
+		 * The necessary columns to spill are either group keys or used by
+		 * aggregations
+		 *
+		 * This is the convenient place to calculate the necessary columns to
+		 * spill, because the group keys are different per hash.
+		 */
+		perhash->necessarySpillCols = bms_union(colnos, aggstate->aggregated_columns);
+
 		/*
 		 * First build mapping for columns directly hashed. These are the
 		 * first, because they'll be accessed when computing hash values and
@@ -1861,6 +1940,23 @@ lookup_hash_entries(AggState *aggstate)
 				hash_spill_init(spill, 0, perhash->aggnode->numGroups,
 								aggstate->hashentrysize);
 
+			AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set];
+			for (int ttsno = 0; ttsno < slot->tts_nvalid; ttsno++)
+			{
+				/*
+				 * null the column out if it's unnecessary, the following
+				 * forming functions will shrink it.
+				 *
+				 * it must be a virtual tuple here, this function is only used
+				 * by the first round, tuples are from other node but not the
+				 * spilled files.
+				 *
+				 * note: ttsno is zero indexed, cols are one indexed.
+				 */
+				if (!bms_is_member(ttsno+1, perhash->necessarySpillCols))
+					slot->tts_isnull[ttsno] = true;
+			}
+
 			aggstate->hash_disk_used += hash_spill_tuple(spill, 0, slot, hash);
 		}
 	}
@@ -2623,7 +2719,15 @@ hash_spill_tuple(HashAggSpill *spill, int input_bits, TupleTableSlot *slot,
 
 	Assert(spill->partitions != NULL);
 
-	/*TODO: project needed attributes only */
+	/*
+	 * heap_form_minimal_tuple() if it's a virtual tuple,
+	 * tts_minimal_get_minimal_tuple() if it's a minimal tuple, which is
+	 * exactly what we want.
+	 *
+	 * when we spill the tuples from input, they are virtual tuples with some
+	 * columns nulled out, when we re-spill the tuples from spilling files,
+	 * they are minimal tuples which was already nulled out before.
+	 */
 	tuple = ExecFetchSlotMinimalTuple(slot, &shouldFree);
 
 	if (spill->partition_bits == 0)
@@ -3072,6 +3176,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 */
 	aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
 
+	aggstate->aggregated_columns = NULL;
 	aggstate->num_hashes = numHashes;
 	if (numHashes)
 	{
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 68c9e5f540..3b61109b52 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -302,6 +302,7 @@ typedef struct AggStatePerHashData
 	AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
 	AttrNumber *hashGrpColIdxHash;	/* indices in hash table tuples */
 	Agg		   *aggnode;		/* original Agg node, for numGroups etc. */
+	Bitmapset  *necessarySpillCols;  /* the necessary columns if spills */
 }			AggStatePerHashData;
 
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index b9803a28bd..0c034b5f67 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2084,6 +2084,7 @@ typedef struct AggState
 	uint64		hash_disk_used; /* bytes of disk space used */
 	int			hash_batches_used;	/* batches used during entire execution */
 	List	   *hash_batches;	/* hash batches remaining to be processed */
+	Bitmapset  *aggregated_columns; /* the columns used by aggregations */
 
 	AggStatePerHash perhash;	/* array of per-hashtable data */
 	AggStatePerGroup *hash_pergroup;	/* grouping set indexed array of