Re: Problem with default partition pruning - Mailing list pgsql-hackers
From | Kyotaro HORIGUCHI |
---|---|
Subject | Re: Problem with default partition pruning |
Date | |
Msg-id | 20190319.152756.202071463.horiguchi.kyotaro@lab.ntt.co.jp Whole thread Raw |
In response to | Re: Problem with default partition pruning (Amit Langote <Langote_Amit_f8@lab.ntt.co.jp>) |
List | pgsql-hackers |
Hi. At Mon, 18 Mar 2019 18:44:07 +0900, Amit Langote <Langote_Amit_f8@lab.ntt.co.jp> wrote in <9bed6b79-f264-6976-b880-e2a5d23e9d85@lab.ntt.co.jp> > > v2 patch attached. > > Could you please check it again? > > I think the updated patch breaks the promise that > get_matching_range_bounds won't set scan_default based on individual > pruning value comparisons. How about the attached delta patch that > applies on top of your earlier v1 patch, which fixes the issue reported by > Thibaut? I read through the patch and understood how it works. And Amit's proposal looks fine. But that makes me think of scan_default as a wart. The attached patch is a refactoring that removes scan_default from PruneStepResult and the defaut partition is represented as the same way as non-default partitions, without changing in behavior. This improves the modularity of partprune code a bit. The fix would be put on top of this easily. Thoughts? regards. -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 5b897d50ee..828240119d 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -92,7 +92,6 @@ static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, PartitionBoundInfo boundinfo, PartitionRangeBound *probe, bool *is_equal); -static int get_partition_bound_num_indexes(PartitionBoundInfo b); static Expr *make_partition_op_expr(PartitionKey key, int keynum, uint16 strategy, Expr *arg1, Expr *arg2); static Oid get_partition_operator(PartitionKey key, int col, @@ -266,6 +265,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->ndatums = ndatums; boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); + boundinfo->nindexes = greatest_modulus; boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int)); for (i = 0; i < greatest_modulus; i++) boundinfo->indexes[i] = -1; @@ -399,7 +399,10 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->ndatums = ndatums; boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); - boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); + + /* the last element is reserved for the default partition */ + boundinfo->nindexes = ndatums + 1; + boundinfo->indexes = (int *) palloc(boundinfo->nindexes * sizeof(int)); /* * Copy values. Canonical indexes are values ranging from 0 to (nparts - @@ -423,6 +426,9 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->indexes[i] = (*mapping)[orig_index]; } + /* set default partition index (-1) */ + boundinfo->indexes[ndatums] = -1; + /* * Set the canonical value for null_index, if any. * @@ -596,7 +602,8 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts, * For range partitioning, an additional value of -1 is stored as the last * element. */ - boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int)); + boundinfo->nindexes = ndatums + 1; + boundinfo->indexes = (int *) palloc(boundinfo->nindexes * sizeof(int)); for (i = 0; i < ndatums; i++) { @@ -676,6 +683,9 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, if (b1->ndatums != b2->ndatums) return false; + if (b1->nindexes != b2->nindexes) + return false; + if (b1->null_index != b2->null_index) return false; @@ -704,7 +714,7 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, * their indexes array will be same. So, it suffices to compare * indexes array. */ - for (i = 0; i < greatest_modulus; i++) + for (i = 0; i < b1->nindexes; i++) if (b1->indexes[i] != b2->indexes[i]) return false; @@ -765,9 +775,9 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, return false; } - /* There are ndatums+1 indexes in case of range partitions */ - if (b1->strategy == PARTITION_STRATEGY_RANGE && - b1->indexes[i] != b2->indexes[i]) + /* There may be ndatums+1 indexes in some cases */ + Assert (i == b1->nindexes || i == b1->nindexes - 1); + if (i < b1->nindexes && b1->indexes[i] != b2->indexes[i]) return false; } return true; @@ -793,7 +803,7 @@ partition_bounds_copy(PartitionBoundInfo src, ndatums = dest->ndatums = src->ndatums; partnatts = key->partnatts; - num_indexes = get_partition_bound_num_indexes(src); + num_indexes = dest->nindexes = src->nindexes; /* List partitioned tables have only a single partition key. */ Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1); @@ -1710,46 +1720,6 @@ qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) b1->lower, b2); } -/* - * get_partition_bound_num_indexes - * - * Returns the number of the entries in the partition bound indexes array. - */ -static int -get_partition_bound_num_indexes(PartitionBoundInfo bound) -{ - int num_indexes; - - Assert(bound); - - switch (bound->strategy) - { - case PARTITION_STRATEGY_HASH: - - /* - * The number of the entries in the indexes array is same as the - * greatest modulus. - */ - num_indexes = get_hash_partition_greatest_modulus(bound); - break; - - case PARTITION_STRATEGY_LIST: - num_indexes = bound->ndatums; - break; - - case PARTITION_STRATEGY_RANGE: - /* Range partitioned table has an extra index. */ - num_indexes = bound->ndatums + 1; - break; - - default: - elog(ERROR, "unexpected partition strategy: %d", - (int) bound->strategy); - } - - return num_indexes; -} - /* * get_partition_operator * diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 95a60183a1..3cc9c9f5b8 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -105,7 +105,6 @@ typedef struct PruneStepResult */ Bitmapset *bound_offsets; - bool scan_default; /* Scan the default partition? */ bool scan_null; /* Scan the partition for NULL values? */ } PruneStepResult; @@ -671,23 +670,20 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps) Assert(final_result != NULL); i = -1; result = NULL; - scan_default = final_result->scan_default; + scan_default = false; while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0) { int partindex = context->boundinfo->indexes[i]; /* - * In range and hash partitioning cases, some index values may be -1, - * indicating that no partition has been defined to accept a given - * range of values (that is, the bound at given offset is the upper - * bound of this unassigned range of values) or for a given remainder, - * respectively. As it's still part of the queried range of values, - * add the default partition, if any. + * Some index slot may contain -1, indicating that no partition has + * been defined to accept a given range of values. As it's still part + * of the queried range of values, add the default partition, if any. */ if (partindex >= 0) result = bms_add_member(result, partindex); - else - scan_default |= partition_bound_has_default(context->boundinfo); + else if (partition_bound_has_default(context->boundinfo)) + scan_default = true; } /* Add the null and/or default partition if needed and if present. */ @@ -2202,7 +2198,7 @@ get_matching_hash_bounds(PartitionPruneContext *context, * There is neither a special hash null partition or the default hash * partition. */ - result->scan_null = result->scan_default = false; + result->scan_null = false; return result; } @@ -2212,10 +2208,6 @@ get_matching_hash_bounds(PartitionPruneContext *context, * Determine the offsets of list bounds matching the specified value, * according to the semantics of the given operator strategy * - * scan_default will be set in the returned struct, if the default partition - * needs to be scanned, provided one exists at all. scan_null will be set if - * the special null-accepting partition needs to be scanned. - * * 'opstrategy' if non-zero must be a btree strategy number. * * 'value' contains the value to use for pruning. @@ -2244,7 +2236,7 @@ get_matching_list_bounds(PartitionPruneContext *context, Assert(context->strategy == PARTITION_STRATEGY_LIST); Assert(context->partnatts == 1); - result->scan_null = result->scan_default = false; + result->scan_null = false; if (!bms_is_empty(nullkeys)) { @@ -2256,7 +2248,8 @@ get_matching_list_bounds(PartitionPruneContext *context, if (partition_bound_accepts_nulls(boundinfo)) result->scan_null = true; else - result->scan_default = partition_bound_has_default(boundinfo); + /* scan the default partition, if any */ + result->bound_offsets = bms_make_singleton(boundinfo->ndatums); return result; } @@ -2266,7 +2259,7 @@ get_matching_list_bounds(PartitionPruneContext *context, */ if (boundinfo->ndatums == 0) { - result->scan_default = partition_bound_has_default(boundinfo); + result->bound_offsets = bms_make_singleton(boundinfo->ndatums); return result; } @@ -2280,10 +2273,9 @@ get_matching_list_bounds(PartitionPruneContext *context, */ if (nvalues == 0) { - Assert(boundinfo->ndatums > 0); - result->bound_offsets = bms_add_range(NULL, 0, - boundinfo->ndatums - 1); - result->scan_default = partition_bound_has_default(boundinfo); + Assert(boundinfo->nindexes > 0); + result->bound_offsets = bms_add_range(result->bound_offsets, + 0, boundinfo->nindexes - 1); return result; } @@ -2294,8 +2286,8 @@ get_matching_list_bounds(PartitionPruneContext *context, * First match to all bounds. We'll remove any matching datums below. */ Assert(boundinfo->ndatums > 0); - result->bound_offsets = bms_add_range(NULL, 0, - boundinfo->ndatums - 1); + result->bound_offsets = bms_add_range(result->bound_offsets, + 0, boundinfo->ndatums); off = partition_list_bsearch(partsupfunc, partcollation, boundinfo, value, &is_equal); @@ -2308,23 +2300,9 @@ get_matching_list_bounds(PartitionPruneContext *context, off); } - /* Always include the default partition if any. */ - result->scan_default = partition_bound_has_default(boundinfo); - return result; } - /* - * With range queries, always include the default list partition, because - * list partitions divide the key space in a discontinuous manner, not all - * values in the given range will have a partition assigned. This may not - * technically be true for some data types (e.g. integer types), however, - * we currently lack any sort of infrastructure to provide us with proofs - * that would allow us to do anything smarter here. - */ - if (opstrategy != BTEqualStrategyNumber) - result->scan_default = partition_bound_has_default(boundinfo); - switch (opstrategy) { case BTEqualStrategyNumber: @@ -2338,7 +2316,9 @@ get_matching_list_bounds(PartitionPruneContext *context, result->bound_offsets = bms_make_singleton(off); } else - result->scan_default = partition_bound_has_default(boundinfo); + /* scan the default partition, if any */ + result->bound_offsets = bms_make_singleton(boundinfo->ndatums); + return result; case BTGreaterEqualStrategyNumber: @@ -2367,11 +2347,14 @@ get_matching_list_bounds(PartitionPruneContext *context, /* * off is greater than the numbers of datums we have partitions * for. The only possible partition that could contain a match is - * the default partition, but we must've set context->scan_default - * above anyway if one exists. + * the default partition. Scan the default partition if one + * exists. */ if (off > boundinfo->ndatums - 1) + { + result->bound_offsets = bms_make_singleton(boundinfo->ndatums); return result; + } minoff = off; break; @@ -2390,11 +2373,14 @@ get_matching_list_bounds(PartitionPruneContext *context, /* * off is smaller than the datums of all non-default partitions. * The only possible partition that could contain a match is the - * default partition, but we must've set context->scan_default - * above anyway if one exists. + * default partition, but we scan the default partition if one + * exists. */ if (off < 0) + { + result->bound_offsets = bms_make_singleton(boundinfo->ndatums); return result; + } maxoff = off; break; @@ -2404,8 +2390,21 @@ get_matching_list_bounds(PartitionPruneContext *context, break; } + /* + * With range queries, always include the default list partition, because + * list partitions divide the key space in a discontinuous manner, not all + * values in the given range will have a partition assigned. This may not + * technically be true for some data types (e.g. integer types), however, + * we currently lack any sort of infrastructure to provide us with proofs + * that would allow us to do anything smarter here. + */ + Assert (opstrategy != BTEqualStrategyNumber); + result->bound_offsets = bms_add_member(result->bound_offsets, + boundinfo->ndatums); + Assert(minoff >= 0 && maxoff >= 0); - result->bound_offsets = bms_add_range(NULL, minoff, maxoff); + result->bound_offsets = bms_add_range(result->bound_offsets, + minoff, maxoff); return result; } @@ -2418,9 +2417,8 @@ get_matching_list_bounds(PartitionPruneContext *context, * Each datum whose offset is in result is to be treated as the upper bound of * the partition that will contain the desired values. * - * scan_default will be set in the returned struct, if the default partition - * needs to be scanned, provided one exists at all. Although note that we - * intentionally don't set scan_default in this function if only because the + * bound_offsets may contain a bit for the indexes element that contains -1, + * which means the default partition if any. That happens only if the * matching set of values, found by comparing the input values using the * specified opstrategy, contains unassigned portions of key space, which * we normally assume to belong to the default partition. We don't infer @@ -2461,7 +2459,7 @@ get_matching_range_bounds(PartitionPruneContext *context, Assert(context->strategy == PARTITION_STRATEGY_RANGE); Assert(nvalues <= partnatts); - result->scan_null = result->scan_default = false; + result->scan_null = false; /* * If there are no datums to compare keys with, or if we got an IS NULL @@ -2469,7 +2467,7 @@ get_matching_range_bounds(PartitionPruneContext *context, */ if (boundinfo->ndatums == 0 || !bms_is_empty(nullkeys)) { - result->scan_default = partition_bound_has_default(boundinfo); + result->bound_offsets = bms_make_singleton(boundinfo->ndatums); return result; } @@ -2489,9 +2487,12 @@ get_matching_range_bounds(PartitionPruneContext *context, if (partindices[maxoff] < 0) maxoff--; - result->scan_default = partition_bound_has_default(boundinfo); + /* offset 0 is always corresnponds to invalid partition */ + result->bound_offsets = bms_make_singleton(0); + Assert(minoff >= 0 && maxoff >= 0); - result->bound_offsets = bms_add_range(NULL, minoff, maxoff); + result->bound_offsets = bms_add_range(result->bound_offsets, + minoff, maxoff); return result; } @@ -2501,7 +2502,7 @@ get_matching_range_bounds(PartitionPruneContext *context, * default partition, if any. */ if (nvalues < partnatts) - result->scan_default = partition_bound_has_default(boundinfo); + result->bound_offsets = bms_make_singleton(0); switch (opstrategy) { @@ -2518,7 +2519,8 @@ get_matching_range_bounds(PartitionPruneContext *context, if (nvalues == partnatts) { /* There can only be zero or one matching partition. */ - result->bound_offsets = bms_make_singleton(off + 1); + result->bound_offsets = + bms_add_member(result->bound_offsets, off + 1); return result; } else @@ -2607,7 +2609,8 @@ get_matching_range_bounds(PartitionPruneContext *context, } Assert(minoff >= 0 && maxoff >= 0); - result->bound_offsets = bms_add_range(NULL, minoff, maxoff); + result->bound_offsets = bms_add_range(result->bound_offsets, + minoff, maxoff); } else { @@ -2620,7 +2623,8 @@ get_matching_range_bounds(PartitionPruneContext *context, * -1 indicating that all bounds are greater, then we simply * end up adding the first bound's offset, that is, 0. */ - result->bound_offsets = bms_make_singleton(off + 1); + result->bound_offsets = + bms_add_member(result->bound_offsets, off + 1); } return result; @@ -2821,8 +2825,8 @@ get_matching_range_bounds(PartitionPruneContext *context, Assert(minoff >= 0 && maxoff >= 0); if (minoff <= maxoff) - result->bound_offsets = bms_add_range(NULL, minoff, maxoff); - + result->bound_offsets = bms_add_range(result->bound_offsets, + minoff, maxoff); return result; } @@ -3001,7 +3005,6 @@ perform_pruning_base_step(PartitionPruneContext *context, result = (PruneStepResult *) palloc(sizeof(PruneStepResult)); result->bound_offsets = NULL; - result->scan_default = false; result->scan_null = false; return result; @@ -3102,17 +3105,9 @@ perform_pruning_combine_step(PartitionPruneContext *context, { PartitionBoundInfo boundinfo = context->boundinfo; - /* - * Add all valid offsets into the boundinfo->indexes array. For range - * partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1) - * valid entries. - */ - if (context->strategy == PARTITION_STRATEGY_RANGE) - result->bound_offsets = bms_add_range(NULL, 0, boundinfo->ndatums); - else - result->bound_offsets = bms_add_range(NULL, 0, - boundinfo->ndatums - 1); - result->scan_default = partition_bound_has_default(boundinfo); + /* Add all valid offsets into the boundinfo->indexes array. */ + result->bound_offsets = bms_add_range(NULL, 0, boundinfo->nindexes - 1); + result->scan_null = partition_bound_accepts_nulls(boundinfo); return result; } @@ -3143,8 +3138,6 @@ perform_pruning_combine_step(PartitionPruneContext *context, /* Update whether to scan null and default partitions. */ if (!result->scan_null) result->scan_null = step_result->scan_null; - if (!result->scan_default) - result->scan_default = step_result->scan_default; } break; @@ -3166,7 +3159,6 @@ perform_pruning_combine_step(PartitionPruneContext *context, result->bound_offsets = bms_copy(step_result->bound_offsets); result->scan_null = step_result->scan_null; - result->scan_default = step_result->scan_default; firststep = false; } else @@ -3179,8 +3171,6 @@ perform_pruning_combine_step(PartitionPruneContext *context, /* Update whether to scan null and default partitions. */ if (result->scan_null) result->scan_null = step_result->scan_null; - if (result->scan_default) - result->scan_default = step_result->scan_default; } } break; diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index b1ae39ad63..18ac8cf0bb 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -65,6 +65,7 @@ typedef struct PartitionBoundInfoData PartitionRangeDatumKind **kind; /* The kind of each range bound datum; * NULL for hash and list partitioned * tables */ + int nindexes; /* Length of the indexes following array */ int *indexes; /* Partition indexes */ int null_index; /* Index of the null-accepting partition; -1 * if there isn't one */
pgsql-hackers by date: