From f95b1cb33159620175e012a471513f9a32d64c43 Mon Sep 17 00:00:00 2001 From: amit Date: Fri, 8 Sep 2017 17:35:10 +0900 Subject: [PATCH 2/3] Make RelationGetPartitionDispatch expansion order depth-first This is so as it matches what the planner is doing with partitioning inheritance expansion. Matching with planner order helps because it helps ease matching the executor's per-partition objects with planner-created per-partition nodes. --- src/backend/catalog/partition.c | 191 ++++++++++++++++------------------------ 1 file changed, 74 insertions(+), 117 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 4f594243d3..a4abe08088 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -139,6 +139,8 @@ static int32 partition_bound_cmp(PartitionKey key, static int partition_bound_bsearch(PartitionKey key, PartitionBoundInfo boundinfo, void *probe, bool probe_is_bound, bool *is_equal); +static void get_partition_dispatch_recurse(Relation rel, Relation parent, + List **pds, List **leaf_part_oids); /* * RelationBuildPartitionDesc @@ -961,21 +963,6 @@ get_partition_qual_relid(Oid relid) } /* - * Append OIDs of rel's partitions to the list 'partoids' and for each OID, - * append pointer rel to the list 'parents'. - */ -#define APPEND_REL_PARTITION_OIDS(rel, partoids, parents) \ - do\ - {\ - int i;\ - for (i = 0; i < (rel)->rd_partdesc->nparts; i++)\ - {\ - (partoids) = lappend_oid((partoids), (rel)->rd_partdesc->oids[i]);\ - (parents) = lappend((parents), (rel));\ - }\ - } while(0) - -/* * RelationGetPartitionDispatchInfo * Returns information necessary to route tuples down a partition tree * @@ -991,16 +978,47 @@ PartitionDispatch * RelationGetPartitionDispatchInfo(Relation rel, int *num_parted, List **leaf_part_oids) { + List *pdlist; PartitionDispatchData **pd; - List *all_parts = NIL, - *all_parents = NIL, - *parted_rels, - *parted_rel_parents; - ListCell *lc1, - *lc2; - int i, - k, - offset; + ListCell *lc; + int i; + + Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + + *num_parted = 0; + *leaf_part_oids = NIL; + + get_partition_dispatch_recurse(rel, NULL, &pdlist, leaf_part_oids); + *num_parted = list_length(pdlist); + pd = (PartitionDispatchData **) palloc(*num_parted * + sizeof(PartitionDispatchData *)); + i = 0; + foreach (lc, pdlist) + { + pd[i++] = lfirst(lc); + } + + return pd; +} + +/* + * get_partition_dispatch_recurse + * Recursively expand partition tree rooted at rel + * + * As the partition tree is expanded in a depth-first manner, we mantain two + * global lists: of PartitionDispatch objects corresponding to partitioned + * tables in *pds and of the leaf partition OIDs in *leaf_part_oids. + */ +static void +get_partition_dispatch_recurse(Relation rel, Relation parent, + List **pds, List **leaf_part_oids) +{ + PartitionDesc partdesc = RelationGetPartitionDesc(rel); + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionDispatch pd; + int i; + int next_leaf_idx = list_length(*leaf_part_oids), + next_parted_idx = list_length(*pds); /* * We rely on the relcache to traverse the partition tree to build both @@ -1016,108 +1034,47 @@ RelationGetPartitionDispatchInfo(Relation rel, * a bit tricky but works because the foreach() macro doesn't fetch the * next list element until the bottom of the loop. */ - *num_parted = 1; - parted_rels = list_make1(rel); - /* Root partitioned table has no parent, so NULL for parent */ - parted_rel_parents = list_make1(NULL); - APPEND_REL_PARTITION_OIDS(rel, all_parts, all_parents); - forboth(lc1, all_parts, lc2, all_parents) + pd = (PartitionDispatch) palloc(sizeof(PartitionDispatchData)); + *pds = lappend(*pds, pd); + pd->reldesc = rel; + pd->key = partkey; + pd->partdesc = partdesc; + if (parent != NULL) + pd->parentoid = RelationGetRelid(parent); + else + pd->parentoid = InvalidOid; + pd->indexes = (int *) palloc(partdesc->nparts * sizeof(int)); + for (i = 0; i < partdesc->nparts; i++) { - Oid partrelid = lfirst_oid(lc1); - Relation parent = lfirst(lc2); + Oid partrelid = partdesc->oids[i]; - if (get_rel_relkind(partrelid) == RELKIND_PARTITIONED_TABLE) + if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE) { - /* - * Already locked by the caller. Note that it is the - * responsibility of the caller to close the below relcache entry, - * once done using the information being collected here (for - * example, in ExecEndModifyTable). - */ - Relation partrel = heap_open(partrelid, NoLock); - - (*num_parted)++; - parted_rels = lappend(parted_rels, partrel); - parted_rel_parents = lappend(parted_rel_parents, parent); - APPEND_REL_PARTITION_OIDS(partrel, all_parts, all_parents); + *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid); + pd->indexes[i] = next_leaf_idx++; } - } - - /* - * We want to create two arrays - one for leaf partitions and another for - * partitioned tables (including the root table and internal partitions). - * While we only create the latter here, leaf partition array of suitable - * objects (such as, ResultRelInfo) is created by the caller using the - * list of OIDs we return. Indexes into these arrays get assigned in a - * breadth-first manner, whereby partitions of any given level are placed - * consecutively in the respective arrays. - */ - pd = (PartitionDispatchData **) palloc(*num_parted * - sizeof(PartitionDispatchData *)); - *leaf_part_oids = NIL; - i = k = offset = 0; - forboth(lc1, parted_rels, lc2, parted_rel_parents) - { - Relation partrel = lfirst(lc1); - Relation parent = lfirst(lc2); - PartitionKey partkey = RelationGetPartitionKey(partrel); - PartitionDesc partdesc = RelationGetPartitionDesc(partrel); - int j, - m; - - pd[i] = (PartitionDispatch) palloc(sizeof(PartitionDispatchData)); - pd[i]->reldesc = partrel; - pd[i]->key = partkey; - pd[i]->partdesc = partdesc; - if (parent != NULL) - pd[i]->parentoid = RelationGetRelid(parent); else - pd[i]->parentoid = InvalidOid; - - pd[i]->indexes = (int *) palloc(partdesc->nparts * sizeof(int)); - - /* - * Indexes corresponding to the internal partitions are multiplied by - * -1 to distinguish them from those of leaf partitions. Encountering - * an index >= 0 means we found a leaf partition, which is immediately - * returned as the partition we are looking for. A negative index - * means we found a partitioned table, whose PartitionDispatch object - * is located at the above index multiplied back by -1. Using the - * PartitionDispatch object, search is continued further down the - * partition tree. - */ - m = 0; - for (j = 0; j < partdesc->nparts; j++) { - Oid partrelid = partdesc->oids[j]; + Relation partrel = heap_open(partrelid, NoLock); - if (get_rel_relkind(partrelid) != RELKIND_PARTITIONED_TABLE) - { - *leaf_part_oids = lappend_oid(*leaf_part_oids, partrelid); - pd[i]->indexes[j] = k++; - } - else - { - /* - * offset denotes the number of partitioned tables of upper - * levels including those of the current level. Any partition - * of this table must belong to the next level and hence will - * be placed after the last partitioned table of this level. - */ - pd[i]->indexes[j] = -(1 + offset + m); - m++; - } - } - i++; + /* + * offset denotes the number of partitioned tables of upper + * levels including those of the current level. Any partition + * of this table must belong to the next level and hence will + * be placed after the last partitioned table of this level. + */ + pd->indexes[i] = -(1 + next_parted_idx); + get_partition_dispatch_recurse(partrel, rel, pds, leaf_part_oids); - /* - * This counts the number of partitioned tables at upper levels - * including those of the current level. - */ - offset += m; + /* + * Fast-forward both leaf partition and parted indexes to account + * for the leaf partitions and PartitionDispatch objects just + * added. + */ + next_parted_idx += (list_length(*pds) - next_parted_idx - 1); + next_leaf_idx += (list_length(*leaf_part_oids) - next_leaf_idx); + } } - - return pd; } /* Module-local functions */ -- 2.11.0