diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index 1f36d675a1..493e1a28b3 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -989,6 +989,118 @@ tts_buffer_heap_store_tuple(TupleTableSlot *slot, HeapTuple tuple,
 	}
 }
 
+/*
+ * slot_deform_heap_tuple_internal
+ *		An always inline helper function for use in slot_deform_heap_tuple to
+ *		allow the compiler to emit specialized versions of this function for
+ *		various combinations of "slow" and "hasnulls".  For example, if a
+ *		given tuple has no nulls, then we needn't check "hasnulls" for every
+ *		attribute that we're deforming.  The caller can just call this
+ *		function with hasnulls set to constant-false and have the compiler
+ *		remove the constant-false branches and emit more optimal code.
+ *
+ * Returns the next attnum to deform, which can be equal to natts when the
+ * function managed to deform all requested attributes.  *offp is an input and
+ * output parameter which is the byte offset within the tuple to start deforming
+ * from which, on return, gets set to the offset where the next attribute
+ * should be deformed from.  *slowp is set to true when subsequent deforming
+ * of this tuple must use a version of this function with "slow" passed as
+ * true.
+ *
+ * Callers cannot assume when we return "attnum" (i.e. all requested
+ * attributes have been deformed) that slow mode isn't required for any
+ * additional deforming as the final attribute may have caused a switch to
+ * slow mode.
+ */
+static pg_attribute_always_inline int
+slot_deform_heap_tuple_internal(TupleTableSlot *slot, HeapTuple tuple,
+								int attnum, int natts, bool slow,
+								bool hasnulls, uint32 *offp, bool *slowp)
+{
+	TupleDesc tupleDesc = slot->tts_tupleDescriptor;
+	Datum *values = slot->tts_values;
+	bool *isnull = slot->tts_isnull;
+	HeapTupleHeader tup = tuple->t_data;
+	char *tp;				 /* ptr to tuple data */
+	bits8 *bp = tup->t_bits; /* ptr to null bitmap in tuple */
+	bool slownext = false;
+
+	tp = (char *) tup + tup->t_hoff;
+
+	for (; attnum < natts; attnum++)
+	{
+		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDesc, attnum);
+
+		if (hasnulls && att_isnull(attnum, bp))
+		{
+			values[attnum] = (Datum) 0;
+			isnull[attnum] = true;
+			if (!slow)
+			{
+				*slowp = true;
+				return attnum + 1;
+			}
+			else
+				continue;
+		}
+
+		isnull[attnum] = false;
+
+		/* calculate the offset of this attribute */
+		if (!slow && thisatt->attcacheoff >= 0)
+			*offp = thisatt->attcacheoff;
+		else if (thisatt->attlen == -1)
+		{
+			/*
+			 * We can only cache the offset for a varlena attribute if the
+			 * offset is already suitably aligned, so that there would be no
+			 * pad bytes in any case: then the offset will be valid for either
+			 * an aligned or unaligned value.
+			 */
+			if (!slow && *offp == att_nominal_alignby(*offp, thisatt->attalignby))
+				thisatt->attcacheoff = *offp;
+			else
+			{
+				*offp = att_pointer_alignby(*offp,
+											thisatt->attalignby,
+											-1,
+											tp + *offp);
+
+				if (!slow)
+					slownext = true;
+			}
+		}
+		else
+		{
+			/* not varlena, so safe to use att_align_nominal */
+			*offp = att_nominal_alignby(*offp, thisatt->attalignby);
+
+			if (!slow)
+				thisatt->attcacheoff = *offp;
+		}
+
+		values[attnum] = fetchatt(thisatt, tp + *offp);
+
+		*offp = att_addlength_pointer(*offp, thisatt->attlen, tp + *offp);
+
+		/* check if we need to switch to slow mode */
+		if (!slow)
+		{
+			/*
+			 * We're unable to deform any further if the above code set
+			 * 'slownext', or if this isn't a fixed-width attribute.
+			 */
+			if (slownext || thisatt->attlen <= 0)
+			{
+				*slowp = true;
+				return attnum + 1;
+			}
+		}
+	}
+
+	return natts;
+}
+
 /*
  * slot_deform_heap_tuple
  *		Given a TupleTableSlot, extract data from the slot's physical tuple
@@ -1007,16 +1119,10 @@ static pg_attribute_always_inline void
 slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 					   int natts)
 {
-	TupleDesc	tupleDesc = slot->tts_tupleDescriptor;
-	Datum	   *values = slot->tts_values;
-	bool	   *isnull = slot->tts_isnull;
-	HeapTupleHeader tup = tuple->t_data;
-	bool		hasnulls = HeapTupleHasNulls(tuple);
-	int			attnum;
-	char	   *tp;				/* ptr to tuple data */
-	uint32		off;			/* offset in tuple data */
-	bits8	   *bp = tup->t_bits;	/* ptr to null bitmap in tuple */
-	bool		slow;			/* can we use/set attcacheoff? */
+	bool hasnulls = HeapTupleHasNulls(tuple);
+	int attnum;
+	uint32 off; /* offset in tuple data */
+	bool slow;	/* can we use/set attcacheoff? */
 
 	/* We can only fetch as many attributes as the tuple has. */
 	natts = Min(HeapTupleHeaderGetNatts(tuple->t_data), natts);
@@ -1039,57 +1145,52 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 		slow = TTS_SLOW(slot);
 	}
 
-	tp = (char *) tup + tup->t_hoff;
-
-	for (; attnum < natts; attnum++)
+	/*
+	 * If 'slow' isn't set, try deforming using deforming code that does not
+	 * contain any of the extra checks required for non-fixed offset
+	 * deforming.  During deforming, if or when we find a NULL or a variable
+	 * length attribute, we'll switch to a deforming method which includes the
+	 * extra code required for non-fixed offset deforming, a.k.a slow mode.
+	 * Because this is performance critical, we inline
+	 * slot_deform_heap_tuple_internal passing the 'slow' and 'hasnull'
+	 * parameters as constants to allow the compiler to emit specialized code
+	 * with the known-const false comparisons and subsequent branches removed.
+	 */
+	if (!slow)
 	{
-		CompactAttribute *thisatt = TupleDescCompactAttr(tupleDesc, attnum);
-
-		if (hasnulls && att_isnull(attnum, bp))
-		{
-			values[attnum] = (Datum) 0;
-			isnull[attnum] = true;
-			slow = true;		/* can't use attcacheoff anymore */
-			continue;
-		}
-
-		isnull[attnum] = false;
-
-		if (!slow && thisatt->attcacheoff >= 0)
-			off = thisatt->attcacheoff;
-		else if (thisatt->attlen == -1)
-		{
-			/*
-			 * We can only cache the offset for a varlena attribute if the
-			 * offset is already suitably aligned, so that there would be no
-			 * pad bytes in any case: then the offset will be valid for either
-			 * an aligned or unaligned value.
-			 */
-			if (!slow &&
-				off == att_nominal_alignby(off, thisatt->attalignby))
-				thisatt->attcacheoff = off;
-			else
-			{
-				off = att_pointer_alignby(off, thisatt->attalignby, -1,
-										  tp + off);
-				slow = true;
-			}
-		}
+		/* Tuple without any NULLs? We can skip doing any NULL checking */
+		if (!hasnulls)
+			attnum = slot_deform_heap_tuple_internal(slot,
+													 tuple,
+													 attnum,
+													 natts,
+													 false, /* slow */
+													 false, /* hasnulls */
+													 &off,
+													 &slow);
 		else
-		{
-			/* not varlena, so safe to use att_nominal_alignby */
-			off = att_nominal_alignby(off, thisatt->attalignby);
-
-			if (!slow)
-				thisatt->attcacheoff = off;
-		}
-
-		values[attnum] = fetchatt(thisatt, tp + off);
-
-		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
+			attnum = slot_deform_heap_tuple_internal(slot,
+													 tuple,
+													 attnum,
+													 natts,
+													 false, /* slow */
+													 true,	/* hasnulls */
+													 &off,
+													 &slow);
+	}
 
-		if (thisatt->attlen <= 0)
-			slow = true;		/* can't use attcacheoff anymore */
+	/* If there's still work to do then we must be in slow mode */
+	if (attnum < natts)
+	{
+		/* XXX is it worth adding a separate call when hasnulls is false? */
+		attnum = slot_deform_heap_tuple_internal(slot,
+												 tuple,
+												 attnum,
+												 natts,
+												 true, /* slow */
+												 hasnulls,
+												 &off,
+												 &slow);
 	}
 
 	/*
@@ -1103,7 +1204,6 @@ slot_deform_heap_tuple(TupleTableSlot *slot, HeapTuple tuple, uint32 *offp,
 		slot->tts_flags &= ~TTS_FLAG_SLOW;
 }
 
-
 const TupleTableSlotOps TTSOpsVirtual = {
 	.base_slot_size = sizeof(VirtualTupleTableSlot),
 	.init = tts_virtual_init,