diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 64eec91f8b..667a23eed0 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -1653,10 +1653,54 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		hashtable->spaceUsed += hashTupleSize;
 		if (hashtable->spaceUsed > hashtable->spacePeak)
 			hashtable->spacePeak = hashtable->spaceUsed;
+
+		/*
+		 * Consider increasing number of batches.
+		 *
+		 * Each batch requires a non-trivial amount of memory, because BufFile
+		 * includes a PGAlignedBlock (typically 8kB buffer). So when doubling
+		 * the number of batches, we need to be careful and only allow that if
+		 * it actually has a chance of reducing memory usage.
+		 *
+		 * In particular, doubling the number of batches is pointless when
+		 *
+		 *		(spaceUsed / 2) < (nbatches * sizeof(BufFile))
+		 *
+		 * because we expect to save roughly 1/2 of memory currently used for
+		 * data (rows) at the price of doubling the memory used for BufFile.
+		 *
+		 * We can't stop adding batches entirely, because that would just mean
+		 * the batches would need more and more memory. So we need to increase
+		 * the number of batches, even if we can't enforce work_mem properly.
+		 * The goal is to minimize the overall memory usage of the hash join.
+		 *
+		 * Note: This applies mostly to cases of significant underestimates,
+		 * resulting in an explosion of the number of batches. The properly
+		 * estimated cases should generally end up using merge join based on
+		 * high cost of the batched hash join.
+		 */
 		if (hashtable->spaceUsed +
-			hashtable->nbuckets_optimal * sizeof(HashJoinTuple)
+			hashtable->nbuckets_optimal * sizeof(HashJoinTuple) +
+			hashtable->nbatch * sizeof(PGAlignedBlock)
 			> hashtable->spaceAllowed)
+		{
 			ExecHashIncreaseNumBatches(hashtable);
+
+			/*
+			 * Consider increasing the resize threshold. For well estimated cases
+			 * this does nothing, because batches are expected to account only for
+			 * small fraction of work_mem. But if we significantly underestimate
+			 * the number of batches, we may end up in a situation where BufFile
+			 * alone exceed work_mem. So move the threshold a bit, until the next
+			 * point where it'll make sense to consider adding batches again.
+			 */
+			hashtable->spaceAllowed
+				= Max(hashtable->spaceAllowed,
+					  hashtable->nbatch * sizeof(PGAlignedBlock) * 3);
+
+			elog(WARNING, "ExecHashIncreaseNumBatches: nbatch=%d spaceAllowed=%ld",
+				 hashtable->nbatch, hashtable->spaceAllowed);
+		}
 	}
 	else
 	{
@@ -2661,6 +2705,8 @@ ExecHashGetInstrumentation(HashInstrumentation *instrument,
 	instrument->nbatch = hashtable->nbatch;
 	instrument->nbatch_original = hashtable->nbatch_original;
 	instrument->space_peak = hashtable->spacePeak;
+	/* account for memory used for BufFile */
+	instrument->space_peak += hashtable->nbatch * sizeof(PGAlignedBlock);
 }
 
 /*