From cf659e0b221ddc04f5851b91518cc123be547f21 Mon Sep 17 00:00:00 2001 From: amitlan Date: Tue, 25 May 2021 22:48:47 +0900 Subject: [PATCH 1/2] ExecFindPartition: cache last used partition v3 --- src/backend/executor/execPartition.c | 198 ++++++++++++++++++++++----- 1 file changed, 162 insertions(+), 36 deletions(-) diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 606c920b06..1d0d8e63f6 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -133,6 +133,16 @@ struct PartitionTupleRouting * routing it through this table). A NULL value is stored if no tuple * conversion is required. * + * savedPartInfo + * If non-NULL, ResultRelInfo for the partition that was most recently + * chosen as the routing target; ExecFindPartition() checks if the + * same one can be used for the current row before applying the tuple- + * routing algorithm to it. + * + * savedDispatchInfo + * If non-NULL, PartititionDispatch for the sub-partitioned partition + * that was most recently chosen as the routing target + * * indexes * Array of partdesc->nparts elements. For leaf partitions the index * corresponds to the partition's ResultRelInfo in the encapsulating @@ -150,6 +160,8 @@ typedef struct PartitionDispatchData PartitionDesc partdesc; TupleTableSlot *tupslot; AttrMap *tupmap; + ResultRelInfo *savedPartInfo; + PartitionDispatch savedDispatchInfo; int indexes[FLEXIBLE_ARRAY_MEMBER]; } PartitionDispatchData; @@ -234,6 +246,82 @@ ExecSetupPartitionTupleRouting(EState *estate, Relation rel) return proute; } +/* + * Remember this partition for the next tuple inserted into this parent; see + * CanUseSavedPartitionForTuple() for how it's decided whether a tuple can + * indeed reuse this partition. + * + * Do this only if we have range/list partitions, because only + * in that case it's conceivable that consecutively inserted rows + * tend to go into the same partition. + */ +static inline void +SavePartitionForNextTuple(PartitionDispatch dispatch, + ResultRelInfo *partInfo, + PartitionDispatch dispatchInfo) +{ + if ((dispatch->key->strategy == PARTITION_STRATEGY_RANGE || + dispatch->key->strategy == PARTITION_STRATEGY_LIST)) + { + dispatch->savedPartInfo = partInfo; + dispatch->savedDispatchInfo = dispatchInfo; + } +} + +/* + * Check if the saved partition accepts this tuple by evaluating its + * partition constraint against the tuple. If it does, we save a trip + * to get_partition_for_tuple(), which can be a slightly more expensive + * way to get the same partition, especially if there are many + * partitions to search through. + */ +static inline bool +CanUseSavedPartitionForTuple(PartitionDispatch dispatch, + TupleTableSlot *rootslot, + EState *estate) +{ + if (dispatch->savedPartInfo) + { + ResultRelInfo *rri; + TupleTableSlot *tmpslot; + TupleConversionMap *map; + + rri = dispatch->savedPartInfo; + map = rri->ri_RootToPartitionMap; + if (map) + tmpslot = execute_attr_map_slot(map->attrMap, rootslot, + rri->ri_PartitionTupleSlot); + else + tmpslot = rootslot; + return ExecPartitionCheck(rri, tmpslot, estate, false); + } + + return false; +} + +/* + * Convert the tuple to a sub-partitioned partition's layout, if needed. + */ +static inline TupleTableSlot * +ConvertTupleToPartition(PartitionDispatch dispatch, + TupleTableSlot *slot, + TupleTableSlot **parent_slot) +{ + if (dispatch->tupslot) + { + AttrMap *map = dispatch->tupmap; + TupleTableSlot *tempslot = *parent_slot; + + *parent_slot = dispatch->tupslot; + slot = execute_attr_map_slot(map, slot, *parent_slot); + + if (tempslot != NULL) + ExecClearTuple(tempslot); + } + + return slot; +} + /* * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that * the tuple contained in *slot should belong to. @@ -292,6 +380,34 @@ ExecFindPartition(ModifyTableState *mtstate, CHECK_FOR_INTERRUPTS(); rel = dispatch->reldesc; + + if (CanUseSavedPartitionForTuple(dispatch, rootslot, estate)) + { + /* If the saved partition is leaf partition, just return it. */ + if (dispatch->savedDispatchInfo == NULL) + { + /* Restore ecxt's scantuple before returning. */ + ecxt->ecxt_scantuple = ecxt_scantuple_saved; + MemoryContextSwitchTo(oldcxt); + return dispatch->savedPartInfo; + } + else + { + /* + * Saved partition is sub-partitioned, so continue the loop to + * find the next level partition. + */ + dispatch = dispatch->savedDispatchInfo; + slot = ConvertTupleToPartition(dispatch, slot, &myslot); + continue; + } + } + else + { + dispatch->savedPartInfo = rri = NULL; + dispatch->savedDispatchInfo = NULL; + } + partdesc = dispatch->partdesc; /* @@ -372,6 +488,8 @@ ExecFindPartition(ModifyTableState *mtstate, } Assert(rri != NULL); + SavePartitionForNextTuple(dispatch, rri, NULL); + /* Signal to terminate the loop */ dispatch = NULL; } @@ -382,6 +500,8 @@ ExecFindPartition(ModifyTableState *mtstate, */ if (likely(dispatch->indexes[partidx] >= 0)) { + PartitionDispatch subdispatch; + /* Already built. */ Assert(dispatch->indexes[partidx] < proute->num_dispatch); @@ -391,7 +511,11 @@ ExecFindPartition(ModifyTableState *mtstate, * Move down to the next partition level and search again * until we find a leaf partition that matches this tuple */ - dispatch = pd[dispatch->indexes[partidx]]; + subdispatch = pd[dispatch->indexes[partidx]]; + + SavePartitionForNextTuple(dispatch, rri, subdispatch); + + dispatch = subdispatch; } else { @@ -411,24 +535,13 @@ ExecFindPartition(ModifyTableState *mtstate, dispatch->indexes[partidx] < proute->num_dispatch); rri = proute->nonleaf_partitions[dispatch->indexes[partidx]]; - dispatch = subdispatch; - } - /* - * Convert the tuple to the new parent's layout, if different from - * the previous parent. - */ - if (dispatch->tupslot) - { - AttrMap *map = dispatch->tupmap; - TupleTableSlot *tempslot = myslot; - - myslot = dispatch->tupslot; - slot = execute_attr_map_slot(map, slot, myslot); + SavePartitionForNextTuple(dispatch, rri, subdispatch); - if (tempslot != NULL) - ExecClearTuple(tempslot); + dispatch = subdispatch; } + + slot = ConvertTupleToPartition(dispatch, slot, &myslot); } /* @@ -858,27 +971,11 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, return leaf_part_rri; } -/* - * ExecInitRoutingInfo - * Set up information needed for translating tuples between root - * partitioned table format and partition format, and keep track of it - * in PartitionTupleRouting. - */ -static void -ExecInitRoutingInfo(ModifyTableState *mtstate, - EState *estate, - PartitionTupleRouting *proute, - PartitionDispatch dispatch, - ResultRelInfo *partRelInfo, - int partidx, - bool is_borrowed_rel) +static inline void +InitRootToPartitionMap(ResultRelInfo *partRelInfo, + ResultRelInfo *rootRelInfo, + EState *estate) { - ResultRelInfo *rootRelInfo = partRelInfo->ri_RootResultRelInfo; - MemoryContext oldcxt; - int rri_index; - - oldcxt = MemoryContextSwitchTo(proute->memcxt); - /* * Set up a tuple conversion map to convert a tuple routed to the * partition from the parent's type to the partition's. @@ -907,6 +1004,30 @@ ExecInitRoutingInfo(ModifyTableState *mtstate, } else partRelInfo->ri_PartitionTupleSlot = NULL; +} + +/* + * ExecInitRoutingInfo + * Set up information needed for translating tuples between root + * partitioned table format and partition format, and keep track of it + * in PartitionTupleRouting. + */ +static void +ExecInitRoutingInfo(ModifyTableState *mtstate, + EState *estate, + PartitionTupleRouting *proute, + PartitionDispatch dispatch, + ResultRelInfo *partRelInfo, + int partidx, + bool is_borrowed_rel) +{ + ResultRelInfo *rootRelInfo = partRelInfo->ri_RootResultRelInfo; + MemoryContext oldcxt; + int rri_index; + + oldcxt = MemoryContextSwitchTo(proute->memcxt); + + InitRootToPartitionMap(partRelInfo, rootRelInfo, estate); /* * If the partition is a foreign table, let the FDW init itself for @@ -1051,6 +1172,9 @@ ExecInitPartitionDispatchInfo(EState *estate, pd->tupslot = NULL; } + pd->savedPartInfo = NULL; + pd->savedDispatchInfo = NULL; + /* * Initialize with -1 to signify that the corresponding partition's * ResultRelInfo or PartitionDispatch has not been created yet. @@ -1094,6 +1218,8 @@ ExecInitPartitionDispatchInfo(EState *estate, ResultRelInfo *rri = makeNode(ResultRelInfo); InitResultRelInfo(rri, rel, 0, rootResultRelInfo, 0); + /* The map is needed in CanUseSavedPartitionForTuple(). */ + InitRootToPartitionMap(rri, rootResultRelInfo, estate); proute->nonleaf_partitions[dispatchidx] = rri; } else -- 2.24.1