Re: Problem with default partition pruning - Mailing list pgsql-hackers

From Kyotaro HORIGUCHI
Subject Re: Problem with default partition pruning
Date
Msg-id 20190319.152756.202071463.horiguchi.kyotaro@lab.ntt.co.jp
Whole thread Raw
In response to Re: Problem with default partition pruning  (Amit Langote <Langote_Amit_f8@lab.ntt.co.jp>)
List pgsql-hackers
Hi.

At Mon, 18 Mar 2019 18:44:07 +0900, Amit Langote <Langote_Amit_f8@lab.ntt.co.jp> wrote in
<9bed6b79-f264-6976-b880-e2a5d23e9d85@lab.ntt.co.jp>
> > v2 patch attached.
> > Could you please check it again?
> 
> I think the updated patch breaks the promise that
> get_matching_range_bounds won't set scan_default based on individual
> pruning value comparisons.  How about the attached delta patch that
> applies on top of your earlier v1 patch, which fixes the issue reported by
> Thibaut?

I read through the patch and understood how it works. And Amit's
proposal looks fine.

But that makes me think of scan_default as a wart. 

The attached patch is a refactoring that removes scan_default
from PruneStepResult and the defaut partition is represented as
the same way as non-default partitions, without changing in
behavior. This improves the modularity of partprune code a bit.

The fix would be put on top of this easily.

Thoughts?

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index 5b897d50ee..828240119d 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -92,7 +92,6 @@ static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
                         Oid *partcollation,
                         PartitionBoundInfo boundinfo,
                         PartitionRangeBound *probe, bool *is_equal);
-static int    get_partition_bound_num_indexes(PartitionBoundInfo b);
 static Expr *make_partition_op_expr(PartitionKey key, int keynum,
                        uint16 strategy, Expr *arg1, Expr *arg2);
 static Oid get_partition_operator(PartitionKey key, int col,
@@ -266,6 +265,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
 
     boundinfo->ndatums = ndatums;
     boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+    boundinfo->nindexes = greatest_modulus;
     boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
     for (i = 0; i < greatest_modulus; i++)
         boundinfo->indexes[i] = -1;
@@ -399,7 +399,10 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
 
     boundinfo->ndatums = ndatums;
     boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
-    boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
+
+    /* the last element is reserved for the default partition */
+    boundinfo->nindexes = ndatums + 1;
+    boundinfo->indexes = (int *) palloc(boundinfo->nindexes *  sizeof(int));
 
     /*
      * Copy values.  Canonical indexes are values ranging from 0 to (nparts -
@@ -423,6 +426,9 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
         boundinfo->indexes[i] = (*mapping)[orig_index];
     }
 
+    /* set default partition index (-1) */
+    boundinfo->indexes[ndatums] = -1;
+
     /*
      * Set the canonical value for null_index, if any.
      *
@@ -596,7 +602,8 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
      * For range partitioning, an additional value of -1 is stored as the last
      * element.
      */
-    boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
+    boundinfo->nindexes = ndatums + 1;
+    boundinfo->indexes = (int *) palloc(boundinfo->nindexes * sizeof(int));
 
     for (i = 0; i < ndatums; i++)
     {
@@ -676,6 +683,9 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
     if (b1->ndatums != b2->ndatums)
         return false;
 
+    if (b1->nindexes != b2->nindexes)
+        return false;
+
     if (b1->null_index != b2->null_index)
         return false;
 
@@ -704,7 +714,7 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
          * their indexes array will be same.  So, it suffices to compare
          * indexes array.
          */
-        for (i = 0; i < greatest_modulus; i++)
+        for (i = 0; i < b1->nindexes; i++)
             if (b1->indexes[i] != b2->indexes[i])
                 return false;
 
@@ -765,9 +775,9 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
                 return false;
         }
 
-        /* There are ndatums+1 indexes in case of range partitions */
-        if (b1->strategy == PARTITION_STRATEGY_RANGE &&
-            b1->indexes[i] != b2->indexes[i])
+        /* There may be ndatums+1 indexes in some cases */
+        Assert (i == b1->nindexes || i == b1->nindexes - 1);
+        if (i < b1->nindexes && b1->indexes[i] != b2->indexes[i])
             return false;
     }
     return true;
@@ -793,7 +803,7 @@ partition_bounds_copy(PartitionBoundInfo src,
     ndatums = dest->ndatums = src->ndatums;
     partnatts = key->partnatts;
 
-    num_indexes = get_partition_bound_num_indexes(src);
+    num_indexes = dest->nindexes = src->nindexes;
 
     /* List partitioned tables have only a single partition key. */
     Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
@@ -1710,46 +1720,6 @@ qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
                                 b1->lower, b2);
 }
 
-/*
- * get_partition_bound_num_indexes
- *
- * Returns the number of the entries in the partition bound indexes array.
- */
-static int
-get_partition_bound_num_indexes(PartitionBoundInfo bound)
-{
-    int            num_indexes;
-
-    Assert(bound);
-
-    switch (bound->strategy)
-    {
-        case PARTITION_STRATEGY_HASH:
-
-            /*
-             * The number of the entries in the indexes array is same as the
-             * greatest modulus.
-             */
-            num_indexes = get_hash_partition_greatest_modulus(bound);
-            break;
-
-        case PARTITION_STRATEGY_LIST:
-            num_indexes = bound->ndatums;
-            break;
-
-        case PARTITION_STRATEGY_RANGE:
-            /* Range partitioned table has an extra index. */
-            num_indexes = bound->ndatums + 1;
-            break;
-
-        default:
-            elog(ERROR, "unexpected partition strategy: %d",
-                 (int) bound->strategy);
-    }
-
-    return num_indexes;
-}
-
 /*
  * get_partition_operator
  *
diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c
index 95a60183a1..3cc9c9f5b8 100644
--- a/src/backend/partitioning/partprune.c
+++ b/src/backend/partitioning/partprune.c
@@ -105,7 +105,6 @@ typedef struct PruneStepResult
      */
     Bitmapset  *bound_offsets;
 
-    bool        scan_default;    /* Scan the default partition? */
     bool        scan_null;        /* Scan the partition for NULL values? */
 } PruneStepResult;
 
@@ -671,23 +670,20 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
     Assert(final_result != NULL);
     i = -1;
     result = NULL;
-    scan_default = final_result->scan_default;
+    scan_default = false;
     while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0)
     {
         int            partindex = context->boundinfo->indexes[i];
 
         /*
-         * In range and hash partitioning cases, some index values may be -1,
-         * indicating that no partition has been defined to accept a given
-         * range of values (that is, the bound at given offset is the upper
-         * bound of this unassigned range of values) or for a given remainder,
-         * respectively.  As it's still part of the queried range of values,
-         * add the default partition, if any.
+         * Some index slot may contain -1, indicating that no partition has
+         * been defined to accept a given range of values. As it's still part
+         * of the queried range of values, add the default partition, if any.
          */
         if (partindex >= 0)
             result = bms_add_member(result, partindex);
-        else
-            scan_default |= partition_bound_has_default(context->boundinfo);
+        else if (partition_bound_has_default(context->boundinfo))
+            scan_default = true;
     }
 
     /* Add the null and/or default partition if needed and if present. */
@@ -2202,7 +2198,7 @@ get_matching_hash_bounds(PartitionPruneContext *context,
      * There is neither a special hash null partition or the default hash
      * partition.
      */
-    result->scan_null = result->scan_default = false;
+    result->scan_null = false;
 
     return result;
 }
@@ -2212,10 +2208,6 @@ get_matching_hash_bounds(PartitionPruneContext *context,
  *        Determine the offsets of list bounds matching the specified value,
  *        according to the semantics of the given operator strategy
  *
- * scan_default will be set in the returned struct, if the default partition
- * needs to be scanned, provided one exists at all.  scan_null will be set if
- * the special null-accepting partition needs to be scanned.
- *
  * 'opstrategy' if non-zero must be a btree strategy number.
  *
  * 'value' contains the value to use for pruning.
@@ -2244,7 +2236,7 @@ get_matching_list_bounds(PartitionPruneContext *context,
     Assert(context->strategy == PARTITION_STRATEGY_LIST);
     Assert(context->partnatts == 1);
 
-    result->scan_null = result->scan_default = false;
+    result->scan_null = false;
 
     if (!bms_is_empty(nullkeys))
     {
@@ -2256,7 +2248,8 @@ get_matching_list_bounds(PartitionPruneContext *context,
         if (partition_bound_accepts_nulls(boundinfo))
             result->scan_null = true;
         else
-            result->scan_default = partition_bound_has_default(boundinfo);
+            /* scan the default partition, if any */
+            result->bound_offsets = bms_make_singleton(boundinfo->ndatums);
         return result;
     }
 
@@ -2266,7 +2259,7 @@ get_matching_list_bounds(PartitionPruneContext *context,
      */
     if (boundinfo->ndatums == 0)
     {
-        result->scan_default = partition_bound_has_default(boundinfo);
+        result->bound_offsets = bms_make_singleton(boundinfo->ndatums);
         return result;
     }
 
@@ -2280,10 +2273,9 @@ get_matching_list_bounds(PartitionPruneContext *context,
      */
     if (nvalues == 0)
     {
-        Assert(boundinfo->ndatums > 0);
-        result->bound_offsets = bms_add_range(NULL, 0,
-                                              boundinfo->ndatums - 1);
-        result->scan_default = partition_bound_has_default(boundinfo);
+        Assert(boundinfo->nindexes > 0);
+        result->bound_offsets = bms_add_range(result->bound_offsets,
+                                              0, boundinfo->nindexes - 1);
         return result;
     }
 
@@ -2294,8 +2286,8 @@ get_matching_list_bounds(PartitionPruneContext *context,
          * First match to all bounds.  We'll remove any matching datums below.
          */
         Assert(boundinfo->ndatums > 0);
-        result->bound_offsets = bms_add_range(NULL, 0,
-                                              boundinfo->ndatums - 1);
+        result->bound_offsets = bms_add_range(result->bound_offsets,
+                                              0, boundinfo->ndatums);
 
         off = partition_list_bsearch(partsupfunc, partcollation, boundinfo,
                                      value, &is_equal);
@@ -2308,23 +2300,9 @@ get_matching_list_bounds(PartitionPruneContext *context,
                                                    off);
         }
 
-        /* Always include the default partition if any. */
-        result->scan_default = partition_bound_has_default(boundinfo);
-
         return result;
     }
 
-    /*
-     * With range queries, always include the default list partition, because
-     * list partitions divide the key space in a discontinuous manner, not all
-     * values in the given range will have a partition assigned.  This may not
-     * technically be true for some data types (e.g. integer types), however,
-     * we currently lack any sort of infrastructure to provide us with proofs
-     * that would allow us to do anything smarter here.
-     */
-    if (opstrategy != BTEqualStrategyNumber)
-        result->scan_default = partition_bound_has_default(boundinfo);
-
     switch (opstrategy)
     {
         case BTEqualStrategyNumber:
@@ -2338,7 +2316,9 @@ get_matching_list_bounds(PartitionPruneContext *context,
                 result->bound_offsets = bms_make_singleton(off);
             }
             else
-                result->scan_default = partition_bound_has_default(boundinfo);
+                /* scan the default partition, if any */
+                result->bound_offsets = bms_make_singleton(boundinfo->ndatums);
+
             return result;
 
         case BTGreaterEqualStrategyNumber:
@@ -2367,11 +2347,14 @@ get_matching_list_bounds(PartitionPruneContext *context,
             /*
              * off is greater than the numbers of datums we have partitions
              * for.  The only possible partition that could contain a match is
-             * the default partition, but we must've set context->scan_default
-             * above anyway if one exists.
+             * the default partition. Scan the default partition if one
+             * exists.
              */
             if (off > boundinfo->ndatums - 1)
+            {
+                result->bound_offsets = bms_make_singleton(boundinfo->ndatums);
                 return result;
+            }
 
             minoff = off;
             break;
@@ -2390,11 +2373,14 @@ get_matching_list_bounds(PartitionPruneContext *context,
             /*
              * off is smaller than the datums of all non-default partitions.
              * The only possible partition that could contain a match is the
-             * default partition, but we must've set context->scan_default
-             * above anyway if one exists.
+             * default partition, but we scan the default partition if one
+             * exists.
              */
             if (off < 0)
+            {
+                result->bound_offsets = bms_make_singleton(boundinfo->ndatums);
                 return result;
+            }
 
             maxoff = off;
             break;
@@ -2404,8 +2390,21 @@ get_matching_list_bounds(PartitionPruneContext *context,
             break;
     }
 
+    /*
+     * With range queries, always include the default list partition, because
+     * list partitions divide the key space in a discontinuous manner, not all
+     * values in the given range will have a partition assigned.  This may not
+     * technically be true for some data types (e.g. integer types), however,
+     * we currently lack any sort of infrastructure to provide us with proofs
+     * that would allow us to do anything smarter here.
+     */
+    Assert (opstrategy != BTEqualStrategyNumber);
+    result->bound_offsets = bms_add_member(result->bound_offsets,
+                                           boundinfo->ndatums);
+
     Assert(minoff >= 0 && maxoff >= 0);
-    result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
+    result->bound_offsets = bms_add_range(result->bound_offsets,
+                                          minoff, maxoff);
     return result;
 }
 
@@ -2418,9 +2417,8 @@ get_matching_list_bounds(PartitionPruneContext *context,
  * Each datum whose offset is in result is to be treated as the upper bound of
  * the partition that will contain the desired values.
  *
- * scan_default will be set in the returned struct, if the default partition
- * needs to be scanned, provided one exists at all.  Although note that we
- * intentionally don't set scan_default in this function if only because the
+ * bound_offsets may contain a bit for the indexes element that contains -1,
+ * which means the default partition if any.  That happens only if the
  * matching set of values, found by comparing the input values using the
  * specified opstrategy, contains unassigned portions of key space, which
  * we normally assume to belong to the default partition.  We don't infer
@@ -2461,7 +2459,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
     Assert(context->strategy == PARTITION_STRATEGY_RANGE);
     Assert(nvalues <= partnatts);
 
-    result->scan_null = result->scan_default = false;
+    result->scan_null = false;
 
     /*
      * If there are no datums to compare keys with, or if we got an IS NULL
@@ -2469,7 +2467,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
      */
     if (boundinfo->ndatums == 0 || !bms_is_empty(nullkeys))
     {
-        result->scan_default = partition_bound_has_default(boundinfo);
+        result->bound_offsets = bms_make_singleton(boundinfo->ndatums);
         return result;
     }
 
@@ -2489,9 +2487,12 @@ get_matching_range_bounds(PartitionPruneContext *context,
         if (partindices[maxoff] < 0)
             maxoff--;
 
-        result->scan_default = partition_bound_has_default(boundinfo);
+        /* offset 0 is always corresnponds to invalid partition */
+        result->bound_offsets = bms_make_singleton(0);
+
         Assert(minoff >= 0 && maxoff >= 0);
-        result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
+        result->bound_offsets = bms_add_range(result->bound_offsets,
+                                              minoff, maxoff);
 
         return result;
     }
@@ -2501,7 +2502,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
      * default partition, if any.
      */
     if (nvalues < partnatts)
-        result->scan_default = partition_bound_has_default(boundinfo);
+        result->bound_offsets = bms_make_singleton(0);
 
     switch (opstrategy)
     {
@@ -2518,7 +2519,8 @@ get_matching_range_bounds(PartitionPruneContext *context,
                 if (nvalues == partnatts)
                 {
                     /* There can only be zero or one matching partition. */
-                    result->bound_offsets = bms_make_singleton(off + 1);
+                    result->bound_offsets =
+                        bms_add_member(result->bound_offsets, off + 1);
                     return result;
                 }
                 else
@@ -2607,7 +2609,8 @@ get_matching_range_bounds(PartitionPruneContext *context,
                 }
 
                 Assert(minoff >= 0 && maxoff >= 0);
-                result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
+                result->bound_offsets = bms_add_range(result->bound_offsets,
+                                                      minoff, maxoff);
             }
             else
             {
@@ -2620,7 +2623,8 @@ get_matching_range_bounds(PartitionPruneContext *context,
                  * -1 indicating that all bounds are greater, then we simply
                  * end up adding the first bound's offset, that is, 0.
                  */
-                result->bound_offsets = bms_make_singleton(off + 1);
+                result->bound_offsets =
+                    bms_add_member(result->bound_offsets, off + 1);
             }
 
             return result;
@@ -2821,8 +2825,8 @@ get_matching_range_bounds(PartitionPruneContext *context,
 
     Assert(minoff >= 0 && maxoff >= 0);
     if (minoff <= maxoff)
-        result->bound_offsets = bms_add_range(NULL, minoff, maxoff);
-
+        result->bound_offsets = bms_add_range(result->bound_offsets,
+                                              minoff, maxoff);
     return result;
 }
 
@@ -3001,7 +3005,6 @@ perform_pruning_base_step(PartitionPruneContext *context,
 
                     result = (PruneStepResult *) palloc(sizeof(PruneStepResult));
                     result->bound_offsets = NULL;
-                    result->scan_default = false;
                     result->scan_null = false;
 
                     return result;
@@ -3102,17 +3105,9 @@ perform_pruning_combine_step(PartitionPruneContext *context,
     {
         PartitionBoundInfo boundinfo = context->boundinfo;
 
-        /*
-         * Add all valid offsets into the boundinfo->indexes array.  For range
-         * partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1)
-         * valid entries.
-         */
-        if (context->strategy == PARTITION_STRATEGY_RANGE)
-            result->bound_offsets = bms_add_range(NULL, 0, boundinfo->ndatums);
-        else
-            result->bound_offsets = bms_add_range(NULL, 0,
-                                                  boundinfo->ndatums - 1);
-        result->scan_default = partition_bound_has_default(boundinfo);
+        /* Add all valid offsets into the boundinfo->indexes array. */
+        result->bound_offsets = bms_add_range(NULL, 0, boundinfo->nindexes - 1);
+
         result->scan_null = partition_bound_accepts_nulls(boundinfo);
         return result;
     }
@@ -3143,8 +3138,6 @@ perform_pruning_combine_step(PartitionPruneContext *context,
                 /* Update whether to scan null and default partitions. */
                 if (!result->scan_null)
                     result->scan_null = step_result->scan_null;
-                if (!result->scan_default)
-                    result->scan_default = step_result->scan_default;
             }
             break;
 
@@ -3166,7 +3159,6 @@ perform_pruning_combine_step(PartitionPruneContext *context,
                     result->bound_offsets =
                         bms_copy(step_result->bound_offsets);
                     result->scan_null = step_result->scan_null;
-                    result->scan_default = step_result->scan_default;
                     firststep = false;
                 }
                 else
@@ -3179,8 +3171,6 @@ perform_pruning_combine_step(PartitionPruneContext *context,
                     /* Update whether to scan null and default partitions. */
                     if (result->scan_null)
                         result->scan_null = step_result->scan_null;
-                    if (result->scan_default)
-                        result->scan_default = step_result->scan_default;
                 }
             }
             break;
diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h
index b1ae39ad63..18ac8cf0bb 100644
--- a/src/include/partitioning/partbounds.h
+++ b/src/include/partitioning/partbounds.h
@@ -65,6 +65,7 @@ typedef struct PartitionBoundInfoData
     PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
                                      * NULL for hash and list partitioned
                                      * tables */
+    int            nindexes;        /* Length of the indexes following array */
     int           *indexes;        /* Partition indexes */
     int            null_index;        /* Index of the null-accepting partition; -1
                                  * if there isn't one */

pgsql-hackers by date:

Previous
From: Michael Paquier
Date:
Subject: Re: Unaccent extension python script Issue in Windows
Next
From: Michael Paquier
Date:
Subject: Re: pg_basebackup ignores the existing data directory permissions