Re: Multi-Column List Partitioning - Mailing list pgsql-hackers

From Amul Sul
Subject Re: Multi-Column List Partitioning
Date
Msg-id CAAJ_b94Vp5apiRb3JXa52dBkzcxG=c1swfBvWXXY1YCM5wcZCQ@mail.gmail.com
Whole thread Raw
In response to Re: Multi-Column List Partitioning  (Nitin Jadhav <nitinjadhavpostgres@gmail.com>)
Responses Re: Multi-Column List Partitioning
List pgsql-hackers
Hi,

Few comments for v7 patch, note that I haven't been through the
previous discussion, if any of the review comments that has been
already discussed & overridden, then please ignore here too:


partbounds.c: In function ‘get_qual_for_list.isra.18’:
partbounds.c:4284:29: warning: ‘boundinfo’ may be used uninitialized
in this function [-Wmaybe-uninitialized]
         datumCopy(bound_info->datums[i][j],
                   ~~~~~~~~~~^~~~~~~~
partbounds.c:4335:21: note: ‘boundinfo’ was declared here
  PartitionBoundInfo boundinfo;
                     ^~~~~~~~~
partbounds.c: In function ‘partition_bounds_merge’:
partbounds.c:1305:12: warning: ‘inner_isnull’ may be used
uninitialized in this function [-Wmaybe-uninitialized]
   bool    *inner_isnull;
            ^~~~~~~~~~~~
partbounds.c:1304:12: warning: ‘outer_isnull’ may be used
uninitialized in this function [-Wmaybe-uninitialized]
   bool    *outer_isnull;
           ^~~~~~~~~~~~

Got these warnings with gcc -O2 compilation.
----

 /*
+ * isListBoundDuplicated
+ *
+ * Returns TRUE if the list bound element 'new_bound' is already present
+ * in the target list 'list_bounds', FALSE otherwise.
+ */
+static bool
+isListBoundDuplicated(List *list_bounds, List *new_bound)
+{
+ ListCell   *cell = NULL;
+
+ foreach(cell, list_bounds)
+ {
+ int i;
+ List   *elem = lfirst(cell);
+ bool isDuplicate = true;
+
+ Assert(list_length(elem) == list_length(new_bound));
+
+ for (i = 0; i < list_length(elem); i++)
+ {
+ Const   *value1 = castNode(Const, list_nth(elem, i));
+ Const   *value2 = castNode(Const, list_nth(new_bound, i));
+
+ if (!equal(value1, value2))
+ {
+ isDuplicate = false;
+ break;
+ }
+ }
+
+ if (isDuplicate)
+ return true;
+ }
+
+ return false;
+}

This function is unnecessarily complicated, I think you can avoid
inner for loops; simply replace for-loop-block with  "if
(equal(lfirst(cell), new_bound)) return true".
----

+ char   **colname = (char **) palloc0(partnatts * sizeof(char *));
+ Oid    *coltype = palloc0(partnatts * sizeof(Oid));
+ int32    *coltypmod = palloc0(partnatts * sizeof(int));
+ Oid    *partcollation = palloc0(partnatts * sizeof(Oid));
+
This allocation seems to be worthless, read ahead.
----

+ for (i = 0; i < partnatts; i++)
+ {
+ if (key->partattrs[i] != 0)
+ colname[i] = get_attname(RelationGetRelid(parent),
+ key->partattrs[i], false);
+ else
+ {
+ colname[i] =
+ deparse_expression((Node *) list_nth(partexprs, j),
+    deparse_context_for(RelationGetRelationName(parent),
+    RelationGetRelid(parent)),
+    false, false);
+ ++j;
+ }
+
+ coltype[i] = get_partition_col_typid(key, i);
+ coltypmod[i] = get_partition_col_typmod(key, i);
+ partcollation[i] = get_partition_col_collation(key, i);
+ }

I think there is no need for this separate loop inside
transformPartitionListBounds, you can do that same in the next loop as
well. And instead of  get_partition_col_* calling and storing, simply
use that directly as an argument to transformPartitionBoundValue().
----

+
+ if (IsA(expr, RowExpr) &&
+ partnatts != list_length(((RowExpr *) expr)->args))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
+ errmsg("Must specify exactly one value per partitioning column"),
+ parser_errposition(pstate, exprLocation((Node *) spec))));
+

I think this should be inside the "else" block after "!IsA(rowexpr,
RowExpr)" error and you can avoid IsA() check too.
----

-               if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
+               if (b1->isnulls)
+                   b1_isnull = b1->isnulls[i][j];
+               if (b2->isnulls)
+                   b2_isnull = b2->isnulls[i][j];
+
+               /*
+                * If any of the partition bound has NULL value, then check
+                * equality for the NULL value instead of comparing the datums
+                * as it does not contain valid value in case of NULL.
+                */
+               if (b1_isnull || b2_isnull)
+               {
+                   if (b1_isnull != b2_isnull)
+                       return false;
+               }
+               else if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],

Looks difficult to understand at first glance, how about the following:

if (b1->isnulls != b2->isnulls)
    return false;

if (b1->isnulls)
{
    if (b1->isnulls[i][j] != b2->isnulls[i][j])
        return false;
    if (b1->isnulls[i][j])
        continue;
}

See how range partitioning infinite values are handled. Also, place
this before the comment block that was added for the "!datumIsEqual()"
case.
----

+       if (src->isnulls)
+           dest->isnulls[i] = (bool *) palloc(sizeof(bool) * natts);
...
+           if (src->isnulls)
+               dest->isnulls[i][j] = src->isnulls[i][j];
+
Nothing wrong with this but if we could have checked "dest->isnulls"
instead of "src->isnulls" would be much better.
----

-           if (dest->kind == NULL ||
-               dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
+           if ((dest->kind == NULL ||
+                dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) &&
+               (key->strategy != PARTITION_STRATEGY_LIST ||
+                (src->isnulls == NULL || !src->isnulls[i][j])))
                dest->datums[i][j] = datumCopy(src->datums[i][j],
                                               byval, typlen);
Condition "key->strategy != PARTITION_STRATEGY_LIST" seems to be unnecessary.
----

+       for (i = 0; i < partnatts; i++)
+       {
+           if (outer_isnull[i])
+           {
+               outer_has_null = true;
+               if (outer_map.merged_indexes[outer_index] == -1)
+                   consider_outer_null = true;
+           }
+       }
+
+       for (i = 0; i < partnatts; i++)
+       {
+           if (inner_isnull[i])
+           {
+               inner_has_null = true;
+               if (inner_map.merged_indexes[inner_index] == -1)
+                   consider_inner_null = true;
+           }
+       }

Can't be a single loop?
----

It would be helpful if you could run pgindent on your patch if not done already.
----

That's all for now, I am yet to finish the complete patch reading and
understand the code flow, but I am out of time now.

Regards,
Amul



pgsql-hackers by date:

Previous
From: Daniel Gustafsson
Date:
Subject: Re: Is ssl_crl_file "SSL server cert revocation list"?
Next
From: Bharath Rupireddy
Date:
Subject: Re: O(n) tasks cause lengthy startups and checkpoints