From ed798392e2fc0e0df167b80d468faaf74fd4bc33 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Mon, 4 Nov 2024 23:45:07 +1300 Subject: [PATCH v5 2/2] Support murmur32 hashing of the final ExprState hash value *** No JIT support yet *** --- src/backend/executor/execExpr.c | 18 +++++++++++++++--- src/backend/executor/execExprInterp.c | 11 +++++++++++ src/backend/executor/execGrouping.c | 23 ++++++++--------------- src/backend/executor/nodeSubplan.c | 3 ++- src/include/executor/execExpr.h | 1 + src/include/executor/executor.h | 3 ++- 6 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index c7bb13e270..b55431229d 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3986,12 +3986,15 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate, * init_value: Normally 0, but can be set to other values to seed the hash * with. Non-zero is marginally slower, so best to only use if it's provably * worthwhile. + * murmurfinal: Can be set to true to have the final result hashed through + * murmur32. This can improve the hash perturbation of the result. */ ExprState * ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops, FmgrInfo *hashfunctions, Oid *collations, int numCols, AttrNumber *keyColIdx, - PlanState *parent, uint32 init_value) + PlanState *parent, uint32 init_value, + bool murmurfinal) { ExprState *state = makeNode(ExprState); ExprEvalStep scratch = {0}; @@ -4008,7 +4011,7 @@ ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops, * hashing of individual columns. We only need this if there is more than * one column to hash or an initial value plus one column. */ - if ((int64) numCols + (init_value != 0) > 1) + if ((int64) numCols + (init_value != 0) > 1 || murmurfinal) iresult = palloc(sizeof(NullableDatum)); /* find the highest attnum so we deform the tuple to that point */ @@ -4081,7 +4084,7 @@ ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops, /* Call the hash function */ scratch.opcode = opcode; - if (i == numCols - 1) + if (i == numCols - 1 && !murmurfinal) { /* * The result for hashing the final column is stored in the @@ -4116,6 +4119,15 @@ ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops, opcode = EEOP_HASHDATUM_NEXT32; } + if (murmurfinal) + { + scratch.opcode = EEOP_HASHDATUM_MURMUR32_FINAL; + scratch.resvalue = &state->resvalue; + scratch.resnull = &state->resnull; + scratch.d.hashdatum.iresult = iresult; + ExprEvalPushStep(state, &scratch); + } + scratch.resvalue = NULL; scratch.resnull = NULL; scratch.opcode = EEOP_DONE; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 30c5a19aad..391835a3e5 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -59,6 +59,7 @@ #include "access/heaptoast.h" #include "catalog/pg_type.h" #include "commands/sequence.h" +#include "common/hashfn.h" #include "executor/execExpr.h" #include "executor/nodeSubplan.h" #include "funcapi.h" @@ -482,6 +483,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_HASHDATUM_FIRST_STRICT, &&CASE_EEOP_HASHDATUM_NEXT32, &&CASE_EEOP_HASHDATUM_NEXT32_STRICT, + &&CASE_EEOP_HASHDATUM_MURMUR32_FINAL, &&CASE_EEOP_CONVERT_ROWTYPE, &&CASE_EEOP_SCALARARRAYOP, &&CASE_EEOP_HASHED_SCALARARRAYOP, @@ -1655,6 +1657,15 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) EEO_NEXT(); } + EEO_CASE(EEOP_HASHDATUM_MURMUR32_FINAL) + { + uint32 hashkey = DatumGetUInt32(op->d.hashdatum.iresult->value); + + *op->resvalue = murmurhash32(hashkey); + *op->resnull = false; + EEO_NEXT(); + } + EEO_CASE(EEOP_XMLEXPR) { /* too complex for an inline implementation */ diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 9a88fc6524..5d88f517d5 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -231,7 +231,8 @@ BuildTupleHashTableExt(PlanState *parent, numCols, keyColIdx, allow_jit ? parent : NULL, - hash_iv); + hash_iv, + true); /* build comparator for all columns */ /* XXX: should we support non-minimal tuples for the inputslot? */ @@ -436,7 +437,6 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple) { TupleHashTable hashtable = (TupleHashTable) tb->private_data; - uint32 hashkey; TupleTableSlot *slot; bool isnull; @@ -444,9 +444,9 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb, { /* Process the current input tuple for the table */ hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot; - hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr, - hashtable->exprcontext, - &isnull)); + return DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr, + hashtable->exprcontext, + &isnull)); } else { @@ -458,17 +458,10 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb, */ slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot; ExecStoreMinimalTuple(tuple, slot, false); - hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr, - hashtable->exprcontext, - &isnull)); + return DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr, + hashtable->exprcontext, + &isnull)); } - - /* - * The hashing done above, even with an initial value, doesn't tend to - * result in good hash perturbation. Running the value produced above - * through murmurhash32 leads to near perfect hash perturbation. - */ - return murmurhash32(hashkey); } /* diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 40bae49267..0e3d97f4f0 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -1046,7 +1046,8 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->numCols, sstate->keyColIdx, parent, - 0); + 0, + true); /* * Create comparator for lookups of rows in the table (potentially diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index cd97dfa062..081d91cf6a 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -241,6 +241,7 @@ typedef enum ExprEvalOp EEOP_HASHDATUM_FIRST_STRICT, EEOP_HASHDATUM_NEXT32, EEOP_HASHDATUM_NEXT32_STRICT, + EEOP_HASHDATUM_MURMUR32_FINAL, /* evaluate assorted special-purpose expression types */ EEOP_CONVERT_ROWTYPE, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index e77377ff9b..210217f406 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -296,7 +296,8 @@ extern ExprState *ExecBuildHash32FromAttrs(TupleDesc desc, int numCols, AttrNumber *keyColIdx, PlanState *parent, - uint32 init_value); + uint32 init_value, + bool murmurfinal); extern ExprState *ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops, const Oid *hashfunc_oids, -- 2.34.1