From 007648104e7d0c86088aa29f45445d2da6954e1f Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Sun, 28 Jan 2018 16:52:01 -0800
Subject: [PATCH 3/3] Add randomness to execGrouping.c hashtable IV.

Makes issues due to skewed input data less bad.
---
 src/backend/executor/execGrouping.c        | 17 ++++++++++-------
 src/backend/executor/nodeAgg.c             |  3 +--
 src/backend/executor/nodeRecursiveunion.c  |  3 +--
 src/backend/executor/nodeSetOp.c           |  3 +--
 src/backend/executor/nodeSubplan.c         |  6 ++----
 src/include/executor/executor.h            |  2 +-
 src/test/regress/expected/groupingsets.out | 22 +++++++++++++---------
 src/test/regress/sql/groupingsets.sql      |  6 ++++--
 8 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 8e8dbb1f205..3aa4d375e8d 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -292,8 +292,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 					FmgrInfo *eqfunctions,
 					FmgrInfo *hashfunctions,
 					long nbuckets, Size additionalsize,
-					MemoryContext tablecxt, MemoryContext tempcxt,
-					bool use_variable_hash_iv)
+					MemoryContext tablecxt, MemoryContext tempcxt)
 {
 	TupleHashTable hashtable;
 	Size		entrysize = sizeof(TupleHashEntryData) + additionalsize;
@@ -319,17 +318,21 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 	hashtable->cur_eq_funcs = NULL;
 
 	/*
+	 * Initialize IV with a random number, to make it less likely that input
+	 * resulting in badly distributed hash values causes persistent
+	 * problems.
+	 *
 	 * If parallelism is in use, even if the master backend is performing the
 	 * scan itself, we don't want to create the hashtable exactly the same way
 	 * in all workers. As hashtables are iterated over in keyspace-order,
 	 * doing so in all processes in the same way is likely to lead to
 	 * "unbalanced" hashtables when the table size initially is
-	 * underestimated.
+	 * underestimated. So add the worker's number to the mix, to avoid issues
+	 * with randomness being initialized the same between backends due to
+	 * timing or such.
 	 */
-	if (use_variable_hash_iv)
-		hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
-	else
-		hashtable->hash_iv = 0;
+	hashtable->hash_iv = hash_combine(murmurhash32(random()),
+									  murmurhash32(ParallelWorkerNumber));
 
 	hashtable->hashtab = tuplehash_create(tablecxt, nbuckets, hashtable);
 
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index ec62e7fb389..cb64e216989 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1283,8 +1283,7 @@ build_hash_table(AggState *aggstate)
 												 perhash->aggnode->numGroups,
 												 additionalsize,
 												 aggstate->hashcontext->ecxt_per_tuple_memory,
-												 tmpmem,
-												 DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+												 tmpmem);
 	}
 }
 
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 817749855fc..8d1ba68c50d 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -43,8 +43,7 @@ build_hash_table(RecursiveUnionState *rustate)
 											 node->numGroups,
 											 0,
 											 rustate->tableContext,
-											 rustate->tempContext,
-											 false);
+											 rustate->tempContext);
 }
 
 
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index c91c3402d25..1c93df2d5ed 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -131,8 +131,7 @@ build_hash_table(SetOpState *setopstate)
 												node->numGroups,
 												0,
 												setopstate->tableContext,
-												setopstate->tempContext,
-												false);
+												setopstate->tempContext);
 }
 
 /*
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index edf7d034bd3..0c6c857f905 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -501,8 +501,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 										  nbuckets,
 										  0,
 										  node->hashtablecxt,
-										  node->hashtempcxt,
-										  false);
+										  node->hashtempcxt);
 
 	if (!subplan->unknownEqFalse)
 	{
@@ -521,8 +520,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 											  nbuckets,
 											  0,
 											  node->hashtablecxt,
-											  node->hashtempcxt,
-											  false);
+											  node->hashtempcxt);
 	}
 
 	/*
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 6545a802223..e1dc3d04fa3 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -135,7 +135,7 @@ extern TupleHashTable BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 					FmgrInfo *hashfunctions,
 					long nbuckets, Size additionalsize,
 					MemoryContext tablecxt,
-					MemoryContext tempcxt, bool use_variable_hash_iv);
+					MemoryContext tempcxt);
 extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
 					 TupleTableSlot *slot,
 					 bool *isnew);
diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out
index d21a494a9dd..3549d26d4c9 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -1159,7 +1159,8 @@ explain (costs off)
 -- check that functionally dependent cols are not nulled
 select a, d, grouping(a,b,c)
   from gstest3
- group by grouping sets ((a,b), (a,c));
+ group by grouping sets ((a,b), (a,c))
+ order by 3, 1, 2;
  a | d | grouping 
 ---+---+----------
  1 | 1 |        1
@@ -1171,14 +1172,17 @@ select a, d, grouping(a,b,c)
 explain (costs off)
   select a, d, grouping(a,b,c)
     from gstest3
-   group by grouping sets ((a,b), (a,c));
-        QUERY PLAN         
----------------------------
- HashAggregate
-   Hash Key: a, b
-   Hash Key: a, c
-   ->  Seq Scan on gstest3
-(4 rows)
+   group by grouping sets ((a,b), (a,c))
+   order by 3, 1, 2;
+              QUERY PLAN               
+---------------------------------------
+ Sort
+   Sort Key: (GROUPING(a, b, c)), a, d
+   ->  HashAggregate
+         Hash Key: a, b
+         Hash Key: a, c
+         ->  Seq Scan on gstest3
+(6 rows)
 
 -- simple rescan tests
 select a, b, sum(v.x)
diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql
index eb680286030..4d3a5919e09 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -332,11 +332,13 @@ explain (costs off)
 -- check that functionally dependent cols are not nulled
 select a, d, grouping(a,b,c)
   from gstest3
- group by grouping sets ((a,b), (a,c));
+ group by grouping sets ((a,b), (a,c))
+ order by 3, 1, 2;
 explain (costs off)
   select a, d, grouping(a,b,c)
     from gstest3
-   group by grouping sets ((a,b), (a,c));
+   group by grouping sets ((a,b), (a,c))
+   order by 3, 1, 2;
 
 -- simple rescan tests
 
-- 
2.15.1.354.g95ec6b1b33.dirty

