diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index f509c8e8f5..fe4a520305 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1291,6 +1291,68 @@ project_aggregates(AggState *aggstate) return NULL; } +static bool +find_aggregated_cols_walker(Node *node, Bitmapset **colnos) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + *colnos = bms_add_member(*colnos, var->varattno); + + return false; + } + return expression_tree_walker(node, find_aggregated_cols_walker, + (void *) colnos); +} + +/* + * find_aggregated_cols + * Construct a bitmapset of the column numbers of aggregated Vars + * appearing in our targetlist and qual (HAVING clause) + */ +static Bitmapset * +find_aggregated_cols(AggState *aggstate) +{ + Agg *node = (Agg *) aggstate->ss.ps.plan; + Bitmapset *colnos = NULL; + ListCell *temp; + + /* + * We only want the columns used by aggregations in the targetlist or qual + */ + if (node->plan.targetlist != NULL) + { + foreach(temp, (List *) node->plan.targetlist) + { + if (IsA(lfirst(temp), TargetEntry)) + { + Node *node = (Node *)((TargetEntry *)lfirst(temp))->expr; + if (IsA(node, Aggref) || IsA(node, GroupingFunc)) + find_aggregated_cols_walker(node, &colnos); + } + } + } + + if (node->plan.qual != NULL) + { + foreach(temp, (List *) node->plan.qual) + { + if (IsA(lfirst(temp), TargetEntry)) + { + Node *node = (Node *)((TargetEntry *)lfirst(temp))->expr; + if (IsA(node, Aggref) || IsA(node, GroupingFunc)) + find_aggregated_cols_walker(node, &colnos); + } + } + } + + return colnos; +} + /* * find_unaggregated_cols * Construct a bitmapset of the column numbers of un-aggregated Vars @@ -1520,6 +1582,23 @@ find_hash_columns(AggState *aggstate) for (i = 0; i < perhash->numCols; i++) colnos = bms_add_member(colnos, grpColIdx[i]); + /* + * Find the columns used by aggregations + * + * This is shared by the entire aggregation. + */ + if (aggstate->aggregated_columns == NULL) + aggstate->aggregated_columns = find_aggregated_cols(aggstate); + + /* + * The necessary columns to spill are either group keys or used by + * aggregations + * + * This is the convenient place to calculate the necessary columns to + * spill, because the group keys are different per hash. + */ + perhash->necessarySpillCols = bms_union(colnos, aggstate->aggregated_columns); + /* * First build mapping for columns directly hashed. These are the * first, because they'll be accessed when computing hash values and @@ -1861,6 +1940,23 @@ lookup_hash_entries(AggState *aggstate) hash_spill_init(spill, 0, perhash->aggnode->numGroups, aggstate->hashentrysize); + AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set]; + for (int ttsno = 0; ttsno < slot->tts_nvalid; ttsno++) + { + /* + * null the column out if it's unnecessary, the following + * forming functions will shrink it. + * + * it must be a virtual tuple here, this function is only used + * by the first round, tuples are from other node but not the + * spilled files. + * + * note: ttsno is zero indexed, cols are one indexed. + */ + if (!bms_is_member(ttsno+1, perhash->necessarySpillCols)) + slot->tts_isnull[ttsno] = true; + } + aggstate->hash_disk_used += hash_spill_tuple(spill, 0, slot, hash); } } @@ -2623,7 +2719,15 @@ hash_spill_tuple(HashAggSpill *spill, int input_bits, TupleTableSlot *slot, Assert(spill->partitions != NULL); - /*TODO: project needed attributes only */ + /* + * heap_form_minimal_tuple() if it's a virtual tuple, + * tts_minimal_get_minimal_tuple() if it's a minimal tuple, which is + * exactly what we want. + * + * when we spill the tuples from input, they are virtual tuples with some + * columns nulled out, when we re-spill the tuples from spilling files, + * they are minimal tuples which was already nulled out before. + */ tuple = ExecFetchSlotMinimalTuple(slot, &shouldFree); if (spill->partition_bits == 0) @@ -3072,6 +3176,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) */ aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData)); + aggstate->aggregated_columns = NULL; aggstate->num_hashes = numHashes; if (numHashes) { diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h index 68c9e5f540..3b61109b52 100644 --- a/src/include/executor/nodeAgg.h +++ b/src/include/executor/nodeAgg.h @@ -302,6 +302,7 @@ typedef struct AggStatePerHashData AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */ AttrNumber *hashGrpColIdxHash; /* indices in hash table tuples */ Agg *aggnode; /* original Agg node, for numGroups etc. */ + Bitmapset *necessarySpillCols; /* the necessary columns if spills */ } AggStatePerHashData; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index b9803a28bd..0c034b5f67 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -2084,6 +2084,7 @@ typedef struct AggState uint64 hash_disk_used; /* bytes of disk space used */ int hash_batches_used; /* batches used during entire execution */ List *hash_batches; /* hash batches remaining to be processed */ + Bitmapset *aggregated_columns; /* the columns used by aggregations */ AggStatePerHash perhash; /* array of per-hashtable data */ AggStatePerGroup *hash_pergroup; /* grouping set indexed array of