From af0e27b033082bed836cb49cdf0b2614ebf9bba1 Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat Date: Wed, 1 Apr 2020 22:17:26 +0530 Subject: [PATCH 5/5] Address Tomas's comments. --- doc/src/sgml/config.sgml | 6 +- src/backend/nodes/outfuncs.c | 2 +- src/backend/optimizer/README | 37 +++++-- src/backend/optimizer/path/joinrels.c | 31 ++++-- src/backend/optimizer/util/relnode.c | 30 ++++-- src/backend/partitioning/partbounds.c | 143 +++++++++++++++++++------- src/include/nodes/pathnodes.h | 19 +++- 7 files changed, 200 insertions(+), 68 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 2de21903a1..d61a01c156 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4693,9 +4693,9 @@ ANY num_sync ( merged && - !rel2->merged && + if (!rel1->partbounds_merged && + !rel2->partbounds_merged && rel1->nparts == rel2->nparts && partition_bounds_equal(part_scheme->partnatts, part_scheme->parttyplen, @@ -1388,7 +1397,7 @@ compute_partition_bounds(PlannerInfo *root, RelOptInfo *rel1, return; } nparts = list_length(*parts1); - joinrel->merged = true; + joinrel->partbounds_merged = true; } Assert(nparts > 0); @@ -1404,13 +1413,15 @@ compute_partition_bounds(PlannerInfo *root, RelOptInfo *rel1, Assert(joinrel->part_rels); /* - * If the join rel's merged flag is true, it means inputs are not + * If the join relation's bounds were computed by merging the bounds of + * one of the previous joining pairs, it means inputs may not be * guaranteed to have the same partition bounds, therefore we can't * assume that the partitions at the same cardinal positions form the * pairs; let get_matching_part_pairs() generate the pairs. Otherwise, - * nothing to do since we can assume that. + * nothing to do since matching partitions are at the same positions in + * both the part_rels array. */ - if (joinrel->merged) + if (joinrel->partbounds_merged) { get_matching_part_pairs(root, joinrel, rel1, rel2, parts1, parts2); @@ -1457,7 +1468,7 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, check_stack_depth(); /* Nothing to do, if the join relation is not partitioned. */ - if (joinrel->part_scheme == NULL || joinrel->nparts == 0) + if (IS_JOINREL_NOT_PARTITITIONED(joinrel)) return; /* The join relation should have consider_partitionwise_join set. */ @@ -1483,12 +1494,12 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, Assert(joinrel->part_scheme == rel1->part_scheme && joinrel->part_scheme == rel2->part_scheme); - Assert(!(joinrel->merged && (joinrel->nparts <= 0))); + Assert(!(joinrel->partbounds_merged && (joinrel->nparts <= 0))); compute_partition_bounds(root, rel1, rel2, joinrel, parent_sjinfo, &parts1, &parts2); - if (joinrel->merged) + if (joinrel->partbounds_merged) { lcr1 = list_head(parts1); lcr2 = list_head(parts2); @@ -1512,7 +1523,7 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, AppendRelInfo **appinfos; int nappinfos; - if (joinrel->merged) + if (joinrel->partbounds_merged) { child_rel1 = lfirst_node(RelOptInfo, lcr1); child_rel2 = lfirst_node(RelOptInfo, lcr2); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 0e4944ac8e..7df76a22ed 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -242,7 +242,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->part_scheme = NULL; rel->nparts = -1; rel->boundinfo = NULL; - rel->merged = false; + rel->partbounds_merged = false; rel->partition_qual = NIL; rel->part_rels = NULL; rel->all_partrels = NULL; @@ -657,7 +657,7 @@ build_join_rel(PlannerInfo *root, joinrel->part_scheme = NULL; joinrel->nparts = -1; joinrel->boundinfo = NULL; - joinrel->merged = false; + joinrel->partbounds_merged = false; joinrel->partition_qual = NIL; joinrel->part_rels = NULL; joinrel->all_partrels = NULL; @@ -835,7 +835,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->part_scheme = NULL; joinrel->nparts = -1; joinrel->boundinfo = NULL; - joinrel->merged = false; + joinrel->partbounds_merged = false; joinrel->partition_qual = NIL; joinrel->part_rels = NULL; joinrel->all_partrels = NULL; @@ -1638,12 +1638,22 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, * We can only consider this join as an input to further partitionwise * joins if (a) the input relations are partitioned and have * consider_partitionwise_join=true, (b) the partition schemes match, and - * (c) we can identify an equi-join between the partition keys. Note that - * if it were possible for have_partkey_equi_join to return different - * answers for the same joinrel depending on which join ordering we try - * first, this logic would break. That shouldn't happen, though, because - * of the way the query planner deduces implied equalities and reorders - * the joins. Please see optimizer/README for details. + * (c) we can identify an equi-join between the partition keys. + * + * Note that if it were possible for have_partkey_equi_join to return + * different answers for the same joinrel depending on which join ordering + * we try first, this logic would break. That shouldn't happen, though, + * because of the way the query planner deduces implied equalities and + * reorders the joins. Please see optimizer/README for details. + * + * It might be possible though that one or both relations in the given pair + * of joining relations do not have partition bounds set but a later pair + * does. This is possible if partitionwise join was not possible for one of + * the joining relation (which in itself is a join relation) because we + * could not merge bounds for none of its joining pairs. Hence we just + * check existence of partition scheme for the joining relations and let + * try_partitionwise_join() handle the rest for each of the joining pairs + * of this join relation. */ if (outer_rel->part_scheme == NULL || inner_rel->part_scheme == NULL || !outer_rel->consider_partitionwise_join || @@ -1653,6 +1663,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, jointype, restrictlist)) { Assert(!IS_PARTITIONED_REL(joinrel)); + /* Join is not partitioned. */ + joinrel->nparts = 0; return; } diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index ede16cba15..d5f6eac4be 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -69,13 +69,23 @@ typedef struct PartitionRangeBound bool lower; /* this is the lower (vs upper) bound */ } PartitionRangeBound; -/* Per-partitioned-relation data for merge_list_bounds()/merge_range_bounds() */ +/* + * A mapping between partitions of a joining relation and the partitions of the + * join (a.k.a merged partitions.). + */ typedef struct PartitionMap { int nparts; /* number of partitions */ - int *merged_indexes; /* indexes of merged partitions */ - bool *merged; /* flags to indicate whether partitions are - * merged with non-dummy partitions */ + int *merged_indexes; /* ith entry in this array gives the index of + * merged partition to which ith partition + * (of relation for which this map is + * maintained) is mapped. */ + bool *matches_non_dummy; /* ith entry in the array indicates + * whether the ith partition (of an + * outer relation) matches non-dummy + * partition of the other joining + * relation. + */ bool did_remapping; /* did we re-map partitions? */ int *old_indexes; /* old indexes of merged partitions if * did_remapping */ @@ -1086,6 +1096,38 @@ partition_bounds_merge(int partnatts, * merge_list_bounds * Create the partition bounds for a join relation between list * partitioned tables, if possible + * + * For each list item from either joining relations the partitions from both + * the sides containing that list item form a pair of matching partitions. The + * partition resulting from joining these two partitions will contain that list + * item in its bounds in the join. If a list item from the inner side of the + * join is missing from the outer side the rows with that list item as the + * partition key do not appear in the join and hence that list item does not + * appear in the bounds of any of the partition of the join. Corresponding + * partition from the inner side may not appear in the join if none of its list + * items appear in the other relation. Even if a list item from the outer side + * of the join is missing from the inner side, the corresponding rows and hence + * the partition appear in the join. So these list items are included in the + * partition bounds of the join. + * + * If a list item from one relation doesn't appear in the partition bounds of + * the other relation explicitly, it may be covered by the default partition of + * the other relation. In such a case, the corresponding partition from the + * first relation forms a matching pair with the default partition from the + * other relation. + * + * Since the list items appear in the ascending order, an algorithm similar to + * merge join is used to find the matching partitions and compute the list + * items that will be part of the join. + * + * If there are multiple partitions from one side matching a given partition on + * the other side, the algorithm bails out since we do not have machinary for + * joining one partition with mulitple partitions. It might happen that any of + * the list items of a partition from the outer relation do not appear in the + * inner relation and there is no default partition in the inner relation. Such + * a partition from the outer side will have no matching partition on the inner + * side. The algorithm will bail out in such a case since we do not have a + * mechanism to perform a join with a non-existing relation. */ static PartitionBoundInfo merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation, @@ -1381,6 +1423,35 @@ cleanup: * merge_range_bounds * Create the partition bounds for a join relation between range * partitioned tables, if possible + * + * For each range from either joining relations the partitions from both the + * sides containing that range form a pair of matching partitions. The + * partition resulting from joining these two partitions will cover the range + * produced by merging the ranges (See get_merged_range_bounds()) of the + * corresponding partitions. If a range from the inner side of the join is + * missing from the outer side the rows in that range do not appear in the join + * and hence that range does not appears in the join. Even if a range from the + * outer side of the join is missing from the inner side, the corresponding + * rows and hence the range appears in the join. + * + * If a range from one relation doesn't appear in the other relation + * explicitly, it may be covered by the default partition of the other + * relation. In such a case, the corresponding partition from the first + * relation forms a matching pair with the default partition from the other + * relation. + * + * Since the ranges appear in the ascending order, an algorithm similar to + * merge join is used to find the matching partitions and compute the ranges of + * partitions of the join. + * + * If there are multiple partitions from one side matching a given partition on + * the other side, the algorithm bails out since we do not have machinary for + * joining one partition with mulitple partitions. It might happen that any of + * the ranges of a partition from the outer relation do not appear in the inner + * relation and there is no default partition in the inner relation. Such a + * partition from the outer side will have no matching partition on the inner + * side. The algorithm will bail out in such a case since we do not have a + * mechanism to perform a join with a non-existing relation. */ static PartitionBoundInfo merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs, @@ -1490,10 +1561,10 @@ merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs, /* Both partitions should not have been merged yet. */ Assert(outer_index >= 0); Assert(outer_map.merged_indexes[outer_index] == -1 && - outer_map.merged[outer_index] == false); + outer_map.matches_non_dummy[outer_index] == false); Assert(inner_index >= 0); Assert(inner_map.merged_indexes[inner_index] == -1 && - inner_map.merged[inner_index] == false); + inner_map.matches_non_dummy[inner_index] == false); /* * Get the index of the merged partition. Both partitions aren't @@ -1555,7 +1626,7 @@ merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs, /* The outer partition should not have been merged yet. */ Assert(outer_index >= 0); Assert(outer_map.merged_indexes[outer_index] == -1 && - outer_map.merged[outer_index] == false); + outer_map.matches_non_dummy[outer_index] == false); /* * If the inner side has the default partition, or this is an outer @@ -1592,7 +1663,7 @@ merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs, /* The inner partition should not have been merged yet. */ Assert(inner_index >= 0); Assert(inner_map.merged_indexes[inner_index] == -1 && - inner_map.merged[inner_index] == false); + inner_map.matches_non_dummy[inner_index] == false); /* * If the outer side has the default partition, or this is a FULL @@ -1695,13 +1766,13 @@ init_partition_map(RelOptInfo *rel, PartitionMap *map) map->nparts = nparts; map->merged_indexes = (int *) palloc(sizeof(int) * nparts); - map->merged = (bool *) palloc(sizeof(bool) * nparts); + map->matches_non_dummy = (bool *) palloc(sizeof(bool) * nparts); map->did_remapping = false; map->old_indexes = (int *) palloc(sizeof(int) * nparts); for (i = 0; i < nparts; i++) { map->merged_indexes[i] = map->old_indexes[i] = -1; - map->merged[i] = false; + map->matches_non_dummy[i] = false; } } @@ -1712,7 +1783,7 @@ static void free_partition_map(PartitionMap *map) { pfree(map->merged_indexes); - pfree(map->merged); + pfree(map->matches_non_dummy); pfree(map->old_indexes); } @@ -1744,15 +1815,15 @@ merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map, { int outer_merged_index; int inner_merged_index; - bool outer_merged; - bool inner_merged; + bool outer_matches_non_dummy; + bool inner_matches_non_dummy; Assert(outer_index >= 0 && outer_index < outer_map->nparts); outer_merged_index = outer_map->merged_indexes[outer_index]; - outer_merged = outer_map->merged[outer_index]; + outer_matches_non_dummy = outer_map->matches_non_dummy[outer_index]; Assert(inner_index >= 0 && inner_index < inner_map->nparts); inner_merged_index = inner_map->merged_indexes[inner_index]; - inner_merged = inner_map->merged[inner_index]; + inner_matches_non_dummy = inner_map->matches_non_dummy[inner_index]; /* * Handle cases where we have already assigned a merged partition to each @@ -1769,11 +1840,11 @@ merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map, */ if (outer_merged_index == inner_merged_index) { - Assert(outer_merged); - Assert(inner_merged); + Assert(outer_matches_non_dummy); + Assert(inner_matches_non_dummy); return outer_merged_index; } - if (!outer_merged && !inner_merged) + if (!outer_matches_non_dummy && !inner_matches_non_dummy) { /* * This can only happen for a list-partitioning case. We re-map @@ -1784,18 +1855,18 @@ merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map, */ if (outer_merged_index < inner_merged_index) { - outer_map->merged[outer_index] = true; + outer_map->matches_non_dummy[outer_index] = true; inner_map->merged_indexes[inner_index] = outer_merged_index; - inner_map->merged[inner_index] = true; + inner_map->matches_non_dummy[inner_index] = true; inner_map->did_remapping = true; inner_map->old_indexes[inner_index] = inner_merged_index; return outer_merged_index; } else { - inner_map->merged[inner_index] = true; + inner_map->matches_non_dummy[inner_index] = true; outer_map->merged_indexes[outer_index] = inner_merged_index; - outer_map->merged[outer_index] = true; + outer_map->matches_non_dummy[outer_index] = true; outer_map->did_remapping = true; outer_map->old_indexes[outer_index] = outer_merged_index; return inner_merged_index; @@ -1817,31 +1888,31 @@ merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map, { int merged_index = *next_index; - Assert(!outer_merged); - Assert(!inner_merged); + Assert(!outer_matches_non_dummy); + Assert(!inner_matches_non_dummy); outer_map->merged_indexes[outer_index] = merged_index; - outer_map->merged[outer_index] = true; + outer_map->matches_non_dummy[outer_index] = true; inner_map->merged_indexes[inner_index] = merged_index; - inner_map->merged[inner_index] = true; + inner_map->matches_non_dummy[inner_index] = true; *next_index = *next_index + 1; return merged_index; } - if (outer_merged_index >= 0 && !outer_map->merged[outer_index]) + if (outer_merged_index >= 0 && !outer_map->matches_non_dummy[outer_index]) { Assert(inner_merged_index == -1); - Assert(!inner_merged); + Assert(!inner_matches_non_dummy); inner_map->merged_indexes[inner_index] = outer_merged_index; - inner_map->merged[inner_index] = true; - outer_map->merged[outer_index] = true; + inner_map->matches_non_dummy[inner_index] = true; + outer_map->matches_non_dummy[outer_index] = true; return outer_merged_index; } - if (inner_merged_index >= 0 && !inner_map->merged[inner_index]) + if (inner_merged_index >= 0 && !inner_map->matches_non_dummy[inner_index]) { Assert(outer_merged_index == -1); - Assert(!outer_merged); + Assert(!outer_matches_non_dummy); outer_map->merged_indexes[outer_index] = inner_merged_index; - outer_map->merged[outer_index] = true; - inner_map->merged[inner_index] = true; + outer_map->matches_non_dummy[outer_index] = true; + inner_map->matches_non_dummy[inner_index] = true; return inner_merged_index; } return -1; @@ -2237,7 +2308,7 @@ merge_default_partitions(PartitionMap *outer_map, * * Note: The caller assumes that the given partition doesn't have a non-dummy * matching partition on the other side, but if the given partition finds the - * matchig partition later, we will adjust the assignment. + * matching partition later, we will adjust the assignment. */ static int merge_partition_with_dummy(PartitionMap *map, int index, int *next_index) @@ -2246,7 +2317,7 @@ merge_partition_with_dummy(PartitionMap *map, int index, int *next_index) Assert(index >= 0 && index < map->nparts); Assert(map->merged_indexes[index] == -1); - Assert(!map->merged[index]); + Assert(!map->matches_non_dummy[index]); map->merged_indexes[index] = merged_index; /* Leave the merged flag alone! */ *next_index = *next_index + 1; diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 622ea2bf63..2c6935d81d 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -723,8 +723,8 @@ typedef struct RelOptInfo int nparts; /* number of partitions; 0 = not partitioned; * -1 = not yet set */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ - bool merged; /* true if partition bounds were created by - * partition_bounds_merge() */ + bool partbounds_merged; /* true if partition bounds were created + * by partition_bounds_merge() */ List *partition_qual; /* partition constraint */ struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, * stored in the same order of bounds */ @@ -754,6 +754,21 @@ typedef struct RelOptInfo ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \ (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs) +/* + * Is given join relation deemed to be unpartitioned certainly (by + * build_joinrel_partition_info() and try_partitionwise_join())? A join is + * considered to be partitioned if it can be computed using partitionwise join + * technique. If that's possible, we will have both partition scheme and number + * of partitions set in the join relation. If the partition schemes of the + * joining relations match but the partition bounds can not be merged, number + * of partitions will be set to 0. + * + * The macro is expected to be called only on a join relation, but we don't + * check that explicitly here. + */ +#define IS_JOINREL_NOT_PARTITITIONED(joinrel) \ + ((joinrel)->part_scheme == NULL || (joinrel)->nparts == 0) + /* * IndexOptInfo * Per-index information for planning/optimization -- 2.17.1