diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 31554fd867..adc82028ba 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -250,6 +250,8 @@ struct Tuplesortstate
 	bool		bounded;		/* did caller specify a maximum number of
 								 * tuples to return? */
 	bool		boundUsed;		/* true if we made use of a bounded heap */
+	bool		genTupleCxt;	/* true if we should use a generation.c memory
+								 * context for tuple storage */
 	int			bound;			/* if bounded, the maximum number of tuples */
 	bool		tuples;			/* Can SortTuple.tuple ever be set? */
 	int64		availMem;		/* remaining memory available, in bytes */
@@ -618,7 +620,7 @@ struct Sharedsort
 
 static Tuplesortstate *tuplesort_begin_common(int workMem,
 											  SortCoordinate coordinate,
-											  int sortopt);
+											  int sortopt, bool genTupleCxt);
 static void tuplesort_begin_batch(Tuplesortstate *state);
 static void puttuple_common(Tuplesortstate *state, SortTuple *tuple);
 static bool consider_abort_common(Tuplesortstate *state);
@@ -842,7 +844,8 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
  */
 
 static Tuplesortstate *
-tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt)
+tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt,
+					   bool genTupleCxt)
 {
 	Tuplesortstate *state;
 	MemoryContext maincontext;
@@ -907,6 +910,8 @@ tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt)
 	state->memtupsize = INITIAL_MEMTUPSIZE;
 	state->memtuples = NULL;
 
+	state->genTupleCxt = genTupleCxt;
+
 	/*
 	 * After all of the other non-parallel-related state, we setup all of the
 	 * state needed for each batch.
@@ -966,21 +971,16 @@ tuplesort_begin_batch(Tuplesortstate *state)
 	 * eases memory management.  Resetting at key points reduces
 	 * fragmentation. Note that the memtuples array of SortTuples is allocated
 	 * in the parent context, not this context, because there is no need to
-	 * free memtuples early.  For bounded sorts, tuples may be pfreed in any
-	 * order, so we use a regular aset.c context so that it can make use of
-	 * free'd memory.  When the sort is not bounded, we make use of a
-	 * generation.c context as this keeps allocations more compact with less
-	 * wastage.  Allocations are also slightly more CPU efficient.
+	 * free memtuples early.
 	 */
-	if (state->sortopt & TUPLESORT_ALLOWBOUNDED)
+	if (state->genTupleCxt)
+		state->tuplecontext = GenerationContextCreate(state->sortcontext,
+													  "Caller tuples",
+													   ALLOCSET_DEFAULT_SIZES);
+	else
 		state->tuplecontext = AllocSetContextCreate(state->sortcontext,
 													"Caller tuples",
 													ALLOCSET_DEFAULT_SIZES);
-	else
-		state->tuplecontext = GenerationContextCreate(state->sortcontext,
-													  "Caller tuples",
-													  ALLOCSET_DEFAULT_SIZES);
-
 
 	state->status = TSS_INITIAL;
 	state->bounded = false;
@@ -1033,11 +1033,21 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 					 bool *nullsFirstFlags,
 					 int workMem, SortCoordinate coordinate, int sortopt)
 {
-	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
-												   sortopt);
+	Tuplesortstate *state;
 	MemoryContext oldcontext;
+	bool	genTupleCxt;
 	int			i;
 
+	/*
+	 * For bounded sorts, tuples may be pfreed in any order, so we use a
+	 * regular aset.c context so that it can make use of free'd memory.  When
+	 * the sort is not bounded, we make use of a generation.c context as this
+	 * keeps allocations more compact with less wastage.  Allocations are also
+	 * slightly more CPU efficient.
+	 */
+	genTupleCxt = (sortopt & TUPLESORT_ALLOWBOUNDED) == 0;
+	state = tuplesort_begin_common(workMem, coordinate, sortopt, genTupleCxt);
+
 	oldcontext = MemoryContextSwitchTo(state->maincontext);
 
 	AssertArg(nkeys > 0);
@@ -1107,14 +1117,24 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 						int workMem,
 						SortCoordinate coordinate, int sortopt)
 {
-	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
-												   sortopt);
+	Tuplesortstate *state;
 	BTScanInsert indexScanKey;
 	MemoryContext oldcontext;
+	bool	genTupleCxt;
 	int			i;
 
 	Assert(indexRel->rd_rel->relam == BTREE_AM_OID);
 
+	/*
+	 * For bounded sorts, tuples may be pfreed in any order, so we use a
+	 * regular aset.c context so that it can make use of free'd memory.  When
+	 * the sort is not bounded, we make use of a generation.c context as this
+	 * keeps allocations more compact with less wastage.  Allocations are also
+	 * slightly more CPU efficient.
+	 */
+	genTupleCxt = (sortopt & TUPLESORT_ALLOWBOUNDED) == 0;
+	state = tuplesort_begin_common(workMem, coordinate, sortopt, genTupleCxt);
+
 	oldcontext = MemoryContextSwitchTo(state->maincontext);
 
 #ifdef TRACE_SORT
@@ -1214,12 +1234,18 @@ tuplesort_begin_index_btree(Relation heapRel,
 							SortCoordinate coordinate,
 							int sortopt)
 {
-	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
-												   sortopt);
+	Tuplesortstate *state;
 	BTScanInsert indexScanKey;
 	MemoryContext oldcontext;
 	int			i;
 
+	/*
+	 * We pass false for genTupleCxt due to index_form_tuple being called
+	 * in the tuplecontext. We don't want index_form_tuple() allocating
+	 * memory in the generation context as generation.c does not reuse
+	 * pfree'd memory very well.
+	 */
+	state = tuplesort_begin_common(workMem, coordinate, sortopt, false);
 	oldcontext = MemoryContextSwitchTo(state->maincontext);
 
 #ifdef TRACE_SORT
@@ -1296,10 +1322,17 @@ tuplesort_begin_index_hash(Relation heapRel,
 						   SortCoordinate coordinate,
 						   int sortopt)
 {
-	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
-												   sortopt);
+	Tuplesortstate *state;
 	MemoryContext oldcontext;
 
+	/*
+	 * We pass false for genTupleCxt due to index_form_tuple being called
+	 * in the tuplecontext. We don't want index_form_tuple() allocating
+	 * memory in the generation context as generation.c does not reuse
+	 * pfree'd memory very well.
+	 */
+	state = tuplesort_begin_common(workMem, coordinate, sortopt, false);
+
 	oldcontext = MemoryContextSwitchTo(state->maincontext);
 
 #ifdef TRACE_SORT
@@ -1341,11 +1374,18 @@ tuplesort_begin_index_gist(Relation heapRel,
 						   SortCoordinate coordinate,
 						   int sortopt)
 {
-	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
-												   sortopt);
+	Tuplesortstate *state;
 	MemoryContext oldcontext;
 	int			i;
 
+	/*
+	 * We pass false for genTupleCxt due to index_form_tuple being called
+	 * in the tuplecontext. We don't want index_form_tuple() allocating
+	 * memory in the generation context as generation.c does not reuse
+	 * pfree'd memory very well.
+	 */
+	state = tuplesort_begin_common(workMem, coordinate, sortopt, false);
+
 	oldcontext = MemoryContextSwitchTo(state->sortcontext);
 
 #ifdef TRACE_SORT
@@ -1397,11 +1437,21 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 					  bool nullsFirstFlag, int workMem,
 					  SortCoordinate coordinate, int sortopt)
 {
-	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
-												   sortopt);
+	Tuplesortstate *state;
 	MemoryContext oldcontext;
 	int16		typlen;
 	bool		typbyval;
+	bool		genTupleCxt;
+
+	/*
+	 * For bounded sorts, tuples may be pfreed in any order, so we use a
+	 * regular aset.c context so that it can make use of free'd memory.  When
+	 * the sort is not bounded, we make use of a generation.c context as this
+	 * keeps allocations more compact with less wastage.  Allocations are also
+	 * slightly more CPU efficient.
+	 */
+	genTupleCxt = (sortopt & TUPLESORT_ALLOWBOUNDED) == 0;
+	state = tuplesort_begin_common(workMem, coordinate, sortopt, genTupleCxt);
 
 	oldcontext = MemoryContextSwitchTo(state->maincontext);