diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 23c766b5fc..342cf9b4f1 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -32,18 +32,18 @@ #include "utils/ruleutils.h" #define PARTITION_ROUTING_INITSIZE 8 -#define PARTITION_ROUTING_MAXSIZE UINT_MAX -static int ExecUseUpdateResultRelForRouting(ModifyTableState *mtstate, - PartitionTupleRouting *proute, - PartitionDispatch pd, int partidx); +static void +ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate, + PartitionTupleRouting *proute); +static void ExecExpandRoutingArrays(PartitionTupleRouting *proute); static int ExecInitPartitionInfo(ModifyTableState *mtstate, - ResultRelInfo *resultRelInfo, + ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, EState *estate, PartitionDispatch parent, int partidx); static PartitionDispatch ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, - Oid partoid, PartitionDispatch parent_pd, int part_index); + Oid partoid, PartitionDispatch parent_pd, int partidx); static void FormPartitionKeyDatum(PartitionDispatch pd, TupleTableSlot *slot, EState *estate, @@ -70,22 +70,9 @@ static void find_matching_subplans_recurse(PartitionPruneState *prunestate, * Note that all the relations in the partition tree are locked using the * RowExclusiveLock mode upon return from this function. * - * This is called during the initialization of a COPY FROM command or of a - * INSERT/UPDATE query. We provisionally allocate space to hold - * PARTITION_ROUTING_INITSIZE number of PartitionDispatch and ResultRelInfo - * pointers in their respective arrays. The arrays will be doubled in - * size via repalloc (subject to the limit of PARTITION_ROUTING_MAXSIZE - * entries at most) if and when we run out of space, as more partitions need - * to be added. Since we already have the root parent open, its - * PartitionDispatch is created here. - * - * PartitionDispatch object of a non-root partitioned table or ResultRelInfo - * of a leaf partition is allocated and added to the respective array when - * it is encountered for the first time in ExecFindPartition. As mentioned - * above, we might need to expand the respective array before storing it. - * - * Tuple conversion maps (either child to parent and/or vice versa) and the - * array(s) to hold them are allocated only if needed. + * Callers must use the returned PartitionTupleRouting during calls to + * ExecFindPartition. The actual ResultRelInfos are allocated lazily by that + * function. */ PartitionTupleRouting * ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) @@ -96,24 +83,47 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) /* Lock all the partitions. */ (void) find_all_inheritors(RelationGetRelid(rel), RowExclusiveLock, NULL); - proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting)); + /* + * Here we attempt to expend as little effort as possible in setting up + * the PartitionTupleRouting. Each partition's ResultRelInfo is built + * lazily, only when we actually need to route a tuple to that partition. + * The reason for this is that a common case is for INSERT to insert a + * single tuple into a single partition. + * + * We initially allocate enough memory to hold PARTITION_ROUTING_INITSIZE + * PartitionDispatch and ResultRelInfo pointers in their respective arrays. + * More space can be allocated later, if required via + * ExecExpandRoutingArrays. + * + * We're certain to only need just 1 PartitionDispatch; the one for the + * partitioned table which is the target of the command. We'll only setup + * PartitionDispatchs for any subpartitions if tuples actually get routed + * to (through) them. + */ + proute = (PartitionTupleRouting *) palloc(sizeof(PartitionTupleRouting)); proute->partition_root = rel; - proute->dispatch_allocsize = PARTITION_ROUTING_INITSIZE; proute->partition_dispatch_info = (PartitionDispatchData **) palloc(sizeof(PartitionDispatchData) * PARTITION_ROUTING_INITSIZE); + proute->num_dispatch = 0; + proute->dispatch_allocsize = PARTITION_ROUTING_INITSIZE; + + proute->partitions = (ResultRelInfo **) + palloc(sizeof(ResultRelInfo *) * PARTITION_ROUTING_INITSIZE); + proute->num_partitions = 0; + proute->partitions_allocsize = PARTITION_ROUTING_INITSIZE; + + /* We only allocate these arrays when we need to store the first map */ + proute->parent_child_tupconv_maps = NULL; + proute->child_parent_tupconv_maps = NULL; + proute->child_parent_map_not_required = NULL; /* - * Initialize this table's PartitionDispatch object. Since the root - * parent doesn't itself have any parent, last two parameters are - * not used. + * Initialize this table's PartitionDispatch object. Here we pass in + * the parent is NULL as we don't need to care about any parent of the + * target partitioned table. */ (void) ExecInitPartitionDispatchInfo(proute, RelationGetRelid(rel), NULL, 0); - proute->num_dispatch = 1; - proute->partitions_allocsize = PARTITION_ROUTING_INITSIZE; - proute->partitions = (ResultRelInfo **) - palloc(sizeof(ResultRelInfo *) * PARTITION_ROUTING_INITSIZE); - proute->num_partitions = 0; /* * If UPDATE needs to do tuple routing, we'll need a slot that will @@ -124,18 +134,16 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) * ExecUseUpdateResultRelForRouting. */ if (node && node->operation == CMD_UPDATE) + { + ExecHashSubPlanResultRelsByOid(mtstate, proute); proute->root_tuple_slot = MakeTupleTableSlot(NULL); + } else { + proute->subplan_partition_table = NULL; proute->root_tuple_slot = NULL; - proute->subplan_partition_offsets = NULL; - proute->num_subplan_partition_offsets = 0; } - /* We only allocate this when we need to store the first non-NULL map */ - proute->parent_child_tupconv_maps = NULL; - proute->child_parent_tupconv_maps = NULL; - /* * Initialize an empty slot that will be used to manipulate tuples of any * given partition's rowtype. @@ -185,7 +193,7 @@ ExecFindPartition(ModifyTableState *mtstate, PartitionDesc partdesc; TupleTableSlot *myslot = parent->tupslot; TupleConversionMap *map = parent->tupmap; - int cur_index = -1; + int partidx = -1; rel = parent->reldesc; partdesc = RelationGetPartitionDesc(rel); @@ -216,146 +224,143 @@ ExecFindPartition(ModifyTableState *mtstate, FormPartitionKeyDatum(parent, slot, estate, values, isnull); /* - * Nothing for get_partition_for_tuple() to do if there are no - * partitions to begin with. + * If this partitioned table has no partitions or no partition for + * these values, then error out. */ - if (partdesc->nparts == 0) - break; - - cur_index = get_partition_for_tuple(rel, values, isnull); - - /* - * cur_index < 0 means we failed to find a partition of this parent. - * cur_index >= 0 means we either found the leaf partition, or the - * next parent to find a partition of. - */ - if (cur_index < 0) - break; + if (partdesc->nparts == 0 || + (partidx = get_partition_for_tuple(rel, values, isnull)) < 0) + { + char *val_desc; + + val_desc = ExecBuildSlotPartitionKeyDescription(rel, + values, isnull, 64); + Assert(OidIsValid(RelationGetRelid(rel))); + ereport(ERROR, + (errcode(ERRCODE_CHECK_VIOLATION), + errmsg("no partition of relation \"%s\" found for row", + RelationGetRelationName(rel)), + val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0)); + } - if (partdesc->is_leaf[cur_index]) + if (partdesc->is_leaf[partidx]) { - /* Get the ResultRelInfo of this leaf partition. */ - if (parent->indexes[cur_index] >= 0) + /* + * Get the index for PartitionTupleRouting->partitions array index + * for this leaf partition. This may require building a new + * ResultRelInfo. + */ + if (likely(parent->indexes[partidx] >= 0)) { - /* - * Already assigned (either created fresh or reused from the - * set of UPDATE result rels.) - */ - Assert(parent->indexes[cur_index] < proute->num_partitions); - result = parent->indexes[cur_index]; + /* ResultRelInfo already built */ + Assert(parent->indexes[partidx] < proute->num_partitions); + result = parent->indexes[partidx]; } - else if (node && node->operation == CMD_UPDATE) + else { - /* Try to assign an existing result rel for tuple routing. */ - result = ExecUseUpdateResultRelForRouting(mtstate, proute, - parent, cur_index); + if (proute->subplan_partition_table) + { + ResultRelInfo *rri; + Oid partoid = partdesc->oids[partidx]; - /* We may not really have found one. */ - Assert(result < 0 || - parent->indexes[cur_index] < proute->num_partitions); - } + rri = hash_search(proute->subplan_partition_table, + &partoid, HASH_FIND, NULL); - /* We need to create one afresh. */ - if (result < 0) - { - result = ExecInitPartitionInfo(mtstate, resultRelInfo, - proute, estate, - parent, cur_index); - Assert(result >= 0 && result < proute->num_partitions); + if (rri) + { + result = proute->num_partitions++; + parent->indexes[partidx] = result; + + /* Allocate more space in the arrays, if required */ + if (result >= proute->partitions_allocsize) + ExecExpandRoutingArrays(proute); + + /* Save here for later use. */ + proute->partitions[result] = rri; + } + } + + /* We need to create one afresh. */ + if (result < 0) + { + result = ExecInitPartitionInfo(mtstate, resultRelInfo, + proute, estate, + parent, partidx); + Assert(result >= 0 && result < proute->num_partitions); + } } - break; + + ecxt->ecxt_scantuple = ecxt_scantuple_old; + return result; } else { - /* Get the PartitionDispatch of this parent. */ - if (parent->indexes[cur_index] >= 0) + /* + * Partition is a sub-partitioned table; get the PartitionDispatch + */ + if (likely(parent->indexes[partidx] >= 0)) { - /* Already allocated. */ - Assert(parent->indexes[cur_index] < proute->num_dispatch); - parent = pd[parent->indexes[cur_index]]; + /* Already built. */ + Assert(parent->indexes[partidx] < proute->num_dispatch); + parent = pd[parent->indexes[partidx]]; } else { - /* Not yet, allocate one. */ - PartitionDispatch new_parent; - - new_parent = - ExecInitPartitionDispatchInfo(proute, - partdesc->oids[cur_index], - parent, cur_index); - Assert(parent->indexes[cur_index] >= 0 && - parent->indexes[cur_index] < proute->num_dispatch); - parent = new_parent; + /* Not yet built. Do that now. */ + PartitionDispatch subparent; + + subparent = ExecInitPartitionDispatchInfo(proute, + partdesc->oids[partidx], + parent, partidx); + Assert(parent->indexes[partidx] >= 0 && + parent->indexes[partidx] < proute->num_dispatch); + parent = subparent; } } } - - /* A partition was not found. */ - if (result < 0) - { - char *val_desc; - - val_desc = ExecBuildSlotPartitionKeyDescription(rel, - values, isnull, 64); - Assert(OidIsValid(RelationGetRelid(rel))); - ereport(ERROR, - (errcode(ERRCODE_CHECK_VIOLATION), - errmsg("no partition of relation \"%s\" found for row", - RelationGetRelationName(rel)), - val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0)); - } - - ecxt->ecxt_scantuple = ecxt_scantuple_old; - return result; } /* - * ExecUseUpdateResultRelForRouting - * Checks if any of the ResultRelInfo's created by ExecInitModifyTable - * belongs to the passed in partition, and if so, stores its pointer in - * in proute so that it can be used as the target of tuple routing - * - * Return value is the index at which the found result rel is stored in proute - * or -1 if none found. + * ExecHashSubPlanResultRelsByOid + * Build a hash table to allow fast lookups of subplan ResultRelInfos by + * partition Oid. We also populate the subplan ResultRelInfo with an + * ri_PartitionRoot. */ -static int -ExecUseUpdateResultRelForRouting(ModifyTableState *mtstate, - PartitionTupleRouting *proute, - PartitionDispatch pd, - int partidx) +static void +ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate, + PartitionTupleRouting *proute) { - Oid partoid = pd->partdesc->oids[partidx]; ModifyTable *node = (ModifyTable *) mtstate->ps.plan; - ResultRelInfo *update_result_rels = NULL; - int num_update_result_rels = 0; + ResultRelInfo *subplan_result_rels; + HASHCTL ctl; + HTAB *htab; + int nsubplans; int i; - int part_result_rel_index = -1; - update_result_rels = mtstate->resultRelInfo; - num_update_result_rels = list_length(node->plans); + subplan_result_rels = mtstate->resultRelInfo; + nsubplans = list_length(node->plans); - /* If here for the first time, initialize necessary info in proute. */ - if (proute->subplan_partition_offsets == NULL) - { - proute->subplan_partition_offsets = - palloc(num_update_result_rels * sizeof(int)); - memset(proute->subplan_partition_offsets, -1, - num_update_result_rels * sizeof(int)); - proute->num_subplan_partition_offsets = num_update_result_rels; - } + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(ResultRelInfo **); + ctl.hcxt = CurrentMemoryContext; - /* - * Go through UPDATE result rels and save the pointers of those that - * belong to this table's partitions in proute. - */ - for (i = 0; i < num_update_result_rels; i++) + htab = hash_create("PartitionTupleRouting table", nsubplans, &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + proute->subplan_partition_table = htab; + + /* Hash all subplan by Oid */ + for (i = 0; i < nsubplans; i++) { - ResultRelInfo *update_result_rel = &update_result_rels[i]; + ResultRelInfo *rri = &subplan_result_rels[i]; + bool found; + Oid partoid = RelationGetRelid(rri->ri_RelationDesc); + ResultRelInfo **subplanrri; - if (partoid != RelationGetRelid(update_result_rel->ri_RelationDesc)) - continue; + subplanrri = (ResultRelInfo **) hash_search(htab, &partoid, HASH_ENTER, + &found); - /* Found it. */ + if (!found) + *subplanrri = rri; /* * This is required in order to convert the partition's tuple @@ -363,59 +368,69 @@ ExecUseUpdateResultRelForRouting(ModifyTableState *mtstate, * descriptor. When generating the per-subplan result rels, * this was not set. */ - update_result_rel->ri_PartitionRoot = proute->partition_root; + rri->ri_PartitionRoot = proute->partition_root; + } +} - /* - * Remember the index of this UPDATE result rel in the tuple - * routing partition array. - */ - proute->subplan_partition_offsets[i] = proute->num_partitions; +/* + * ExecExpandRoutingArrays + * Double the size of the allocated arrays in 'proute' + */ +static void +ExecExpandRoutingArrays(PartitionTupleRouting *proute) +{ + int new_size = proute->partitions_allocsize * 2; + int old_size = proute->partitions_allocsize; - /* - * Also, record in PartitionDispatch that we have a valid - * ResultRelInfo for this partition. - */ - Assert(pd->indexes[partidx] == -1); - part_result_rel_index = proute->num_partitions++; - if (part_result_rel_index >= PARTITION_ROUTING_MAXSIZE) - elog(ERROR, "invalid partition index: %u", part_result_rel_index); - pd->indexes[partidx] = part_result_rel_index; - if (part_result_rel_index >= proute->partitions_allocsize) - { - /* Expand allocated place. */ - proute->partitions_allocsize = - Min(proute->partitions_allocsize * 2, - PARTITION_ROUTING_MAXSIZE); - proute->partitions = (ResultRelInfo **) - repalloc(proute->partitions, - sizeof(ResultRelInfo *) * - proute->partitions_allocsize); - } - proute->partitions[part_result_rel_index] = update_result_rel; - break; + proute->partitions_allocsize = new_size; + + proute->partitions = (ResultRelInfo **) + repalloc(proute->partitions, sizeof(ResultRelInfo *) * new_size); + + if (proute->parent_child_tupconv_maps != NULL) + { + proute->parent_child_tupconv_maps = (TupleConversionMap **) + repalloc( proute->parent_child_tupconv_maps, + sizeof(TupleConversionMap *) * new_size); + memset(&proute->parent_child_tupconv_maps[old_size], 0, + sizeof(TupleConversionMap *) * (new_size - old_size)); } - return part_result_rel_index; + if (proute->child_parent_map_not_required != NULL) + { + proute->child_parent_tupconv_maps = (TupleConversionMap **) + repalloc(proute->child_parent_tupconv_maps, + sizeof(TupleConversionMap *) * new_size); + memset(&proute->child_parent_tupconv_maps[old_size], 0, + sizeof(TupleConversionMap *) * (new_size - old_size)); + } + + if (proute->child_parent_map_not_required != NULL) + { + proute->child_parent_map_not_required = (bool *) + repalloc(proute->child_parent_map_not_required, + sizeof(bool) * new_size); + memset(&proute->child_parent_map_not_required[old_size], 0, + sizeof(bool) * (new_size - old_size)); + } } /* * ExecInitPartitionInfo * Initialize ResultRelInfo and other information for a partition - * - * This also stores it in the proute->partitions array at the next - * available index, possibly expanding the array if there isn't any space - * left in it, and returns the index where it's stored. + * and store it in the next empty slot in 'proute's partitions array and + * return the index of that element. */ static int ExecInitPartitionInfo(ModifyTableState *mtstate, - ResultRelInfo *resultRelInfo, + ResultRelInfo *rootResultRelInfo, PartitionTupleRouting *proute, EState *estate, PartitionDispatch parent, int partidx) { Oid partoid = parent->partdesc->oids[partidx]; ModifyTable *node = (ModifyTable *) mtstate->ps.plan; - Relation rootrel = resultRelInfo->ri_RelationDesc, + Relation rootrel = rootResultRelInfo->ri_RelationDesc, partrel; Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; ResultRelInfo *leaf_part_rri; @@ -605,18 +620,14 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, } part_result_rel_index = proute->num_partitions++; - if (part_result_rel_index >= PARTITION_ROUTING_MAXSIZE) - elog(ERROR, "invalid partition index: %u", part_result_rel_index); parent->indexes[partidx] = part_result_rel_index; + + /* Allocate more space in the arrays, if required */ if (part_result_rel_index >= proute->partitions_allocsize) - { - /* Expand allocated place. */ - proute->partitions_allocsize = - Min(proute->partitions_allocsize * 2, PARTITION_ROUTING_MAXSIZE); - proute->partitions = (ResultRelInfo **) - repalloc(proute->partitions, - sizeof(ResultRelInfo *) * proute->partitions_allocsize); - } + ExecExpandRoutingArrays(proute); + + /* Save here for later use. */ + proute->partitions[part_result_rel_index] = leaf_part_rri; /* Set up information needed for routing tuples to the partition. */ ExecInitRoutingInfo(mtstate, estate, proute, leaf_part_rri, @@ -639,7 +650,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, * list and searching for ancestry relationships to each index in the * ancestor table. */ - if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) > 0) + if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) > 0) { List *childIdxs; @@ -652,7 +663,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, ListCell *lc2; ancestors = get_partition_ancestors(childIdx); - foreach(lc2, resultRelInfo->ri_onConflictArbiterIndexes) + foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes) { if (list_member_oid(ancestors, lfirst_oid(lc2))) arbiterIndexes = lappend_oid(arbiterIndexes, childIdx); @@ -666,7 +677,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, * (This shouldn't happen, since arbiter index selection should not * pick up an invalid index.) */ - if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) != + if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) != list_length(arbiterIndexes)) elog(ERROR, "invalid arbiter index list"); leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes; @@ -683,7 +694,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, NULL; Assert(node->onConflictSet != NIL); - Assert(resultRelInfo->ri_onConflict != NULL); + Assert(rootResultRelInfo->ri_onConflict != NULL); /* * If the partition's tuple descriptor matches exactly the root @@ -692,7 +703,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, * need to create state specific to this partition. */ if (map == NULL) - leaf_part_rri->ri_onConflict = resultRelInfo->ri_onConflict; + leaf_part_rri->ri_onConflict = rootResultRelInfo->ri_onConflict; else { List *onconflset; @@ -783,9 +794,6 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, } } - /* Save here for later use. */ - proute->partitions[part_result_rel_index] = leaf_part_rri; - MemoryContextSwitchTo(oldContext); return part_result_rel_index; @@ -825,21 +833,10 @@ ExecInitRoutingInfo(ModifyTableState *mtstate, /* Allocate parent child map array only if we need to store a map */ if (proute->parent_child_tupconv_maps == NULL) { - proute->parent_child_tupconv_maps_allocsize = new_size = - PARTITION_ROUTING_INITSIZE; + new_size = proute->partitions_allocsize; proute->parent_child_tupconv_maps = (TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) * new_size); } - /* We may have ran out of the initially allocated space. */ - else if (partidx >= proute->parent_child_tupconv_maps_allocsize) - { - proute->parent_child_tupconv_maps_allocsize = new_size = - Min(proute->parent_child_tupconv_maps_allocsize * 2, - PARTITION_ROUTING_MAXSIZE); - proute->parent_child_tupconv_maps = (TupleConversionMap **) - repalloc( proute->parent_child_tupconv_maps, - sizeof(TupleConversionMap *) * new_size); - } proute->parent_child_tupconv_maps[partidx] = map; } @@ -867,7 +864,7 @@ ExecInitRoutingInfo(ModifyTableState *mtstate, */ static PartitionDispatch ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, Oid partoid, - PartitionDispatch parent_pd, int part_index) + PartitionDispatch parent_pd, int partidx) { Relation rel; TupleDesc tupdesc; @@ -921,15 +918,12 @@ ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, Oid partoid, memset(pd->indexes, -1, sizeof(int) * partdesc->nparts); dispatchidx = proute->num_dispatch++; - if (dispatchidx >= PARTITION_ROUTING_MAXSIZE) - elog(ERROR, "invalid partition index: %u", dispatchidx); if (parent_pd) - parent_pd->indexes[part_index] = dispatchidx; + parent_pd->indexes[partidx] = dispatchidx; if (dispatchidx >= proute->dispatch_allocsize) { /* Expand allocated space. */ - proute->dispatch_allocsize = - Min(proute->dispatch_allocsize * 2, PARTITION_ROUTING_MAXSIZE); + proute->dispatch_allocsize *= 2; proute->partition_dispatch_info = (PartitionDispatchData **) repalloc(proute->partition_dispatch_info, sizeof(PartitionDispatchData *) * @@ -954,20 +948,22 @@ ExecInitPartitionDispatchInfo(PartitionTupleRouting *proute, Oid partoid, void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute) { + int size; + Assert(proute != NULL); + size = proute->partitions_allocsize; + /* * These array elements get filled up with maps on an on-demand basis. * Initially just set all of them to NULL. */ - proute->child_parent_tupconv_maps_allocsize = PARTITION_ROUTING_INITSIZE; proute->child_parent_tupconv_maps = - (TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) * - PARTITION_ROUTING_INITSIZE); + (TupleConversionMap **) palloc0(sizeof(TupleConversionMap *) * size); /* Same is the case for this array. All the values are set to false */ - proute->child_parent_map_not_required = - (bool *) palloc0(sizeof(bool) * PARTITION_ROUTING_INITSIZE); + proute->child_parent_map_not_required = (bool *) palloc0(sizeof(bool) * + size); } /* @@ -978,7 +974,6 @@ TupleConversionMap * TupConvMapForLeaf(PartitionTupleRouting *proute, ResultRelInfo *rootRelInfo, int leaf_index) { - ResultRelInfo **resultRelInfos = proute->partitions; TupleConversionMap **map; TupleDesc tupdesc; @@ -987,7 +982,7 @@ TupConvMapForLeaf(PartitionTupleRouting *proute, ExecSetupChildParentMapForLeaf(proute); /* If it's already known that we don't need a map, return NULL. */ - if (proute->child_parent_map_not_required[leaf_index]) + else if (proute->child_parent_map_not_required[leaf_index]) return NULL; /* If we've already got a map, return it. */ @@ -996,37 +991,16 @@ TupConvMapForLeaf(PartitionTupleRouting *proute, return *map; /* No map yet; try to create one. */ - tupdesc = RelationGetDescr(resultRelInfos[leaf_index]->ri_RelationDesc); + tupdesc = RelationGetDescr(proute->partitions[leaf_index]->ri_RelationDesc); *map = convert_tuples_by_name(tupdesc, RelationGetDescr(rootRelInfo->ri_RelationDesc), gettext_noop("could not convert row type")); - /* If it turns out no map is needed, remember for next time. */ - - /* We may have run out of the initially allocated space. */ - if (leaf_index >= proute->child_parent_tupconv_maps_allocsize) - { - int new_size, - old_size; - - old_size = proute->child_parent_tupconv_maps_allocsize; - proute->child_parent_tupconv_maps_allocsize = new_size = - Min(proute->parent_child_tupconv_maps_allocsize * 2, - PARTITION_ROUTING_MAXSIZE); - proute->child_parent_tupconv_maps = (TupleConversionMap **) - repalloc(proute->child_parent_tupconv_maps, - sizeof(TupleConversionMap *) * new_size); - memset(proute->child_parent_tupconv_maps + old_size, 0, - sizeof(TupleConversionMap *) * (new_size - old_size)); - - proute->child_parent_map_not_required = (bool *) - repalloc(proute->child_parent_map_not_required, - sizeof(bool) * new_size); - memset(proute->child_parent_map_not_required + old_size, false, - sizeof(bool) * (new_size - old_size)); - } - + /* + * If it turns out no map is needed, remember that so we don't try making + * one again next time. + */ proute->child_parent_map_not_required[leaf_index] = (*map == NULL); return *map; @@ -1102,23 +1076,18 @@ ExecCleanupTupleRouting(ModifyTableState *mtstate, resultRelInfo); /* - * Check if this result rel is one of UPDATE subplan result rels, - * which if so, let ExecEndPlan() close it. + * Check if this result rel is one belonging to the node's subplans, + * if so, let ExecEndPlan() clean it up. */ - if (proute->subplan_partition_offsets) + if (proute->subplan_partition_table) { - int j; - int found = false; + Oid partoid; + bool found; - for (j = 0; j < proute->num_subplan_partition_offsets; j++) - { - if (proute->subplan_partition_offsets[j] == i) - { - found = true; - break; - } - } + partoid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + (void) hash_search(proute->subplan_partition_table, &partoid, + HASH_FIND, &found); if (found) continue; } diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h index 8d20469c98..4cc7508067 100644 --- a/src/include/catalog/partition.h +++ b/src/include/catalog/partition.h @@ -29,8 +29,8 @@ typedef struct PartitionDescData Oid *oids; /* Array of length 'nparts' containing * partition OIDs in order of the their * bounds */ - bool *is_leaf; /* Array of length 'nparts' containing whether - * a partition is a leaf partition */ + bool *is_leaf; /* Array of 'nparts' elements storing whether + * a partition is a leaf partition or not */ PartitionBoundInfo boundinfo; /* collection of partition bounds */ } PartitionDescData; diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 91b840e12f..1b421f2ec5 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -31,9 +31,13 @@ * tupmap TupleConversionMap to convert from the parent's rowtype to * this table's rowtype (when extracting the partition key of a * tuple just before routing it through this table) - * indexes Array with partdesc->nparts members (for details on what - * individual members represent, see how they are set in - * get_partition_dispatch_recurse()) + * indexes Array with partdesc->nparts elements. For leaf partitions the + * index into the PartitionTupleRouting->partitions array is + * stored. When the partition is itself a partitioned table then + * we store the index into + * PartitionTupleRouting->partition_dispatch_info. -1 means + * we've not yet allocated anything in PartitionTupleRouting for + * the partition. *----------------------- */ typedef struct PartitionDispatchData @@ -55,7 +59,7 @@ typedef struct PartitionDispatchData *PartitionDispatch; * partitions * * partition_root Root table, that is, the table mentioned in the - * INSERT or UPDATE query or COPY FROM command. + * command. * * partition_dispatch_info Contains PartitionDispatch objects for every * partitioned table touched by tuple routing. The @@ -84,8 +88,11 @@ typedef struct PartitionDispatchData *PartitionDispatch; * placed in 'partitions' * * partitions_allocsize (>= 'num_partitions') is the number of entries - * that can be stored in 'partitions' before needing - * to reallocate more space + * that can be stored in 'partitions', + * 'parent_child_tupconv_maps', + * 'child_parent_tupconv_maps' and + * 'child_parent_map_not_required' arrays before + * needing to reallocate more space * * parent_child_tupconv_maps Contains information to convert tuples of the * root parent's rowtype to those of the leaf @@ -98,12 +105,8 @@ typedef struct PartitionDispatchData *PartitionDispatch; * need not be more of these maps than there are * partitions that were touched. * - * parent_child_tupconv_maps_allocsize The number of entries that can be - * stored in 'parent_child_tupconv_maps' before - * needing to reallocate more space - * * partition_tuple_slot This is a tuple slot used to store a tuple using - * rowtype of the the partition chosen by tuple + * rowtype of the partition chosen by tuple * routing. Maintained separately because partitions * may have different rowtype. * @@ -111,31 +114,22 @@ typedef struct PartitionDispatchData *PartitionDispatch; * do tuple routing. * * child_parent_tupconv_maps Information to convert tuples of the leaf - * partitions' rowtype to the the root parent's - * rowtype. These are needed by transition table - * machinery when storing tuples of partition's - * rowtype into the transition table that can only - * store tuples of the root parent's rowtype. - * Like 'parent_child_tupconv_maps' it remains NULL - * if none of the partitions selected by tuple - * routing needed a conversion map. Also, if non- - * NULL, is of the same size as 'partitions'. + * partitions' rowtype to the root parent's rowtype. + * These are needed by transition table machinery + * when storing tuples of partition's rowtype into + * the transition table that can only store tuples of + * the root parent's rowtype. Like + * 'parent_child_tupconv_maps' it remains NULL if + * none of the partitions selected by tuple routing + * needed a conversion map. Also, if non-NULL, is of + * the same size as 'partitions'. * * child_parent_map_not_required Stores if we don't need a conversion * map for a partition so that TupConvMapForLeaf - * can return quickly if set + * can return without having to re-check if it needs + * to build a map. * - * child_parent_tupconv_maps_allocsize The number of entries that can be - * stored in 'child_parent_tupconv_maps' before - * needing to reallocate more space - * - * subplan_partition_offsets The following maps indexes of UPDATE result - * rels in the per-subplan array to indexes of their - * pointers in the 'partitions' - * - * num_subplan_partition_offsets The number of entries in - * 'subplan_partition_offsets', which is same as the - * number of UPDATE result rels + * subplan_partition_table Hash table to store subplan index by Oid. * * root_tuple_slot During UPDATE tuple routing, this tuple slot is * used to transiently store a tuple using the root @@ -151,24 +145,15 @@ typedef struct PartitionTupleRouting PartitionDispatch *partition_dispatch_info; int num_dispatch; int dispatch_allocsize; - ResultRelInfo **partitions; int num_partitions; int partitions_allocsize; - TupleConversionMap **parent_child_tupconv_maps; - int parent_child_tupconv_maps_allocsize; - - TupleTableSlot *partition_tuple_slot; - TupleConversionMap **child_parent_tupconv_maps; bool *child_parent_map_not_required; - int child_parent_tupconv_maps_allocsize; - - int *subplan_partition_offsets; - int num_subplan_partition_offsets; - + HTAB *subplan_partition_table; TupleTableSlot *root_tuple_slot; + TupleTableSlot *partition_tuple_slot; } PartitionTupleRouting; /*-----------------------