Re: Typmod associated with multi-row VALUES constructs - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Typmod associated with multi-row VALUES constructs
Date
Msg-id 7083.1481230690@sss.pgh.pa.us
Whole thread Raw
In response to Re: Typmod associated with multi-row VALUES constructs  (Kyotaro HORIGUCHI <horiguchi.kyotaro@lab.ntt.co.jp>)
Responses Re: [HACKERS] Typmod associated with multi-row VALUES constructs
List pgsql-hackers
I've pushed the previous patch to HEAD.  Attached is a proposed patch
(against 9.6) that we could use for the back branches; it takes the
brute force approach of just computing the correct value on-demand
in the two functions at issue.  The key question of course is whether
this is acceptable from a performance standpoint.  I did a simple test
using a 1000-entry VALUES list:

select count(a) from (
values
  ('0'::varchar(3), '0'::varchar(4)),
  ('1'::varchar(3), '1'::varchar(4)),
  ('2'::varchar(3), '2'::varchar(4)),
  ('3'::varchar(3), '3'::varchar(4)),
  ('4'::varchar(3), '4'::varchar(4)),
  ...
  ('996'::varchar(3), '996'::varchar(4)),
  ('997'::varchar(3), '997'::varchar(4)),
  ('998'::varchar(3), '998'::varchar(4)),
  ('999'::varchar(3), '999'::varchar(4))
) v(a,b);

Since all the rows do have the same typmod, this represents the worst
case where we have to scan all the way to the end to confirm the typmod,
and it has about as little overhead otherwise as I could think of doing.
I ran it like this:

pgbench -U postgres -n -c 1 -T 1000 -f bigvalues.sql regression

and could not see any above-the-noise-level difference --- in fact,
it seemed like it was faster *with* the patch, which is obviously
impossible; I blame that on chance realignments of loops vs. cache
line boundaries.

So I think this is an okay candidate for back-patching.  If anyone
wants to do their own performance tests, please do.

            regards, tom lane

diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 1e3ecbc..c51fd81 100644
*** a/src/backend/parser/parse_relation.c
--- b/src/backend/parser/parse_relation.c
*************** static void expandTupleDesc(TupleDesc tu
*** 52,57 ****
--- 52,58 ----
                  int rtindex, int sublevels_up,
                  int location, bool include_dropped,
                  List **colnames, List **colvars);
+ static int32 *getValuesTypmods(RangeTblEntry *rte);
  static int    specialAttNum(const char *attname);
  static bool isQueryUsingTempRelation_walker(Node *node, void *context);

*************** expandRTE(RangeTblEntry *rte, int rtinde
*** 2157,2165 ****
--- 2158,2179 ----
              {
                  /* Values RTE */
                  ListCell   *aliasp_item = list_head(rte->eref->colnames);
+                 int32       *coltypmods;
                  ListCell   *lcv;
                  ListCell   *lcc;

+                 /*
+                  * It's okay to extract column types from the expressions in
+                  * the first row, since all rows will have been coerced to the
+                  * same types.  Their typmods might not be the same though, so
+                  * we potentially need to examine all rows to compute those.
+                  * Column collations are pre-computed in values_collations.
+                  */
+                 if (colvars)
+                     coltypmods = getValuesTypmods(rte);
+                 else
+                     coltypmods = NULL;
+
                  varattno = 0;
                  forboth(lcv, (List *) linitial(rte->values_lists),
                          lcc, rte->values_collations)
*************** expandRTE(RangeTblEntry *rte, int rtinde
*** 2184,2196 ****

                          varnode = makeVar(rtindex, varattno,
                                            exprType(col),
!                                           exprTypmod(col),
                                            colcollation,
                                            sublevels_up);
                          varnode->location = location;
                          *colvars = lappend(*colvars, varnode);
                      }
                  }
              }
              break;
          case RTE_JOIN:
--- 2198,2212 ----

                          varnode = makeVar(rtindex, varattno,
                                            exprType(col),
!                                           coltypmods[varattno - 1],
                                            colcollation,
                                            sublevels_up);
                          varnode->location = location;
                          *colvars = lappend(*colvars, varnode);
                      }
                  }
+                 if (coltypmods)
+                     pfree(coltypmods);
              }
              break;
          case RTE_JOIN:
*************** expandRTE(RangeTblEntry *rte, int rtinde
*** 2296,2301 ****
--- 2312,2319 ----
                          varnode = makeVar(rtindex, varattno,
                                            coltype, coltypmod, colcoll,
                                            sublevels_up);
+                         varnode->location = location;
+
                          *colvars = lappend(*colvars, varnode);
                      }
                  }
*************** expandTupleDesc(TupleDesc tupdesc, Alias
*** 2413,2418 ****
--- 2431,2504 ----
  }

  /*
+  * getValuesTypmods -- expandRTE subroutine
+  *
+  * Identify per-column typmods for the given VALUES RTE.  Returns a
+  * palloc'd array.
+  */
+ static int32 *
+ getValuesTypmods(RangeTblEntry *rte)
+ {
+     int32       *coltypmods;
+     List       *firstrow;
+     int            ncolumns,
+                 nvalid,
+                 i;
+     ListCell   *lc;
+
+     Assert(rte->values_lists != NIL);
+     firstrow = (List *) linitial(rte->values_lists);
+     ncolumns = list_length(firstrow);
+     coltypmods = (int32 *) palloc(ncolumns * sizeof(int32));
+     nvalid = 0;
+
+     /* Collect the typmods from the first VALUES row */
+     i = 0;
+     foreach(lc, firstrow)
+     {
+         Node       *col = (Node *) lfirst(lc);
+
+         coltypmods[i] = exprTypmod(col);
+         if (coltypmods[i] >= 0)
+             nvalid++;
+         i++;
+     }
+
+     /*
+      * Scan remaining rows; as soon as we have a non-matching typmod for a
+      * column, reset that typmod to -1.  We can bail out early if all typmods
+      * become -1.
+      */
+     if (nvalid > 0)
+     {
+         for_each_cell(lc, lnext(list_head(rte->values_lists)))
+         {
+             List       *thisrow = (List *) lfirst(lc);
+             ListCell   *lc2;
+
+             Assert(list_length(thisrow) == ncolumns);
+             i = 0;
+             foreach(lc2, thisrow)
+             {
+                 Node       *col = (Node *) lfirst(lc2);
+
+                 if (coltypmods[i] >= 0 && coltypmods[i] != exprTypmod(col))
+                 {
+                     coltypmods[i] = -1;
+                     nvalid--;
+                 }
+                 i++;
+             }
+
+             if (nvalid <= 0)
+                 break;
+         }
+     }
+
+     return coltypmods;
+ }
+
+ /*
   * expandRelAttrs -
   *      Workhorse for "*" expansion: produce a list of targetentries
   *      for the attributes of the RTE
*************** get_rte_attribute_type(RangeTblEntry *rt
*** 2656,2673 ****
              break;
          case RTE_VALUES:
              {
!                 /* Values RTE --- get type info from first sublist */
!                 /* collation is stored separately, though */
                  List       *collist = (List *) linitial(rte->values_lists);
                  Node       *col;

                  if (attnum < 1 || attnum > list_length(collist))
                      elog(ERROR, "values list %s does not have attribute %d",
                           rte->eref->aliasname, attnum);
                  col = (Node *) list_nth(collist, attnum - 1);
                  *vartype = exprType(col);
!                 *vartypmod = exprTypmod(col);
                  *varcollid = list_nth_oid(rte->values_collations, attnum - 1);
              }
              break;
          case RTE_JOIN:
--- 2742,2766 ----
              break;
          case RTE_VALUES:
              {
!                 /*
!                  * Values RTE --- we can get type info from first sublist, but
!                  * typmod may require scanning all sublists, and collation is
!                  * stored separately.  Using getValuesTypmods() is overkill,
!                  * but this path is taken so seldom for VALUES that it's not
!                  * worth writing extra code.
!                  */
                  List       *collist = (List *) linitial(rte->values_lists);
                  Node       *col;
+                 int32       *coltypmods = getValuesTypmods(rte);

                  if (attnum < 1 || attnum > list_length(collist))
                      elog(ERROR, "values list %s does not have attribute %d",
                           rte->eref->aliasname, attnum);
                  col = (Node *) list_nth(collist, attnum - 1);
                  *vartype = exprType(col);
!                 *vartypmod = coltypmods[attnum - 1];
                  *varcollid = list_nth_oid(rte->values_collations, attnum - 1);
+                 pfree(coltypmods);
              }
              break;
          case RTE_JOIN:
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
index 81b4e8d..b1c3cff 100644
*** a/src/test/regress/expected/create_view.out
--- b/src/test/regress/expected/create_view.out
*************** SELECT relname, relkind, reloptions FROM
*** 288,293 ****
--- 288,330 ----
   mysecview4 | v       | {security_barrier=false}
  (4 rows)

+ -- This test checks that proper typmods are assigned in a multi-row VALUES
+ CREATE VIEW tt1 AS
+   SELECT * FROM (
+     VALUES
+        ('abc'::varchar(3), '0123456789', 42, 'abcd'::varchar(4)),
+        ('0123456789', 'abc'::varchar(3), 42.12, 'abc'::varchar(4))
+   ) vv(a,b,c,d);
+ \d+ tt1
+                       View "testviewschm2.tt1"
+  Column |         Type         | Modifiers | Storage  | Description
+ --------+----------------------+-----------+----------+-------------
+  a      | character varying    |           | extended |
+  b      | character varying    |           | extended |
+  c      | numeric              |           | main     |
+  d      | character varying(4) |           | extended |
+ View definition:
+  SELECT vv.a,
+     vv.b,
+     vv.c,
+     vv.d
+    FROM ( VALUES ('abc'::character varying(3),'0123456789'::character varying,42,'abcd'::character varying(4)),
('0123456789'::charactervarying,'abc'::character varying(3),42.12,'abc'::character varying(4))) vv(a, b, c, d); 
+
+ SELECT * FROM tt1;
+      a      |     b      |   c   |  d
+ ------------+------------+-------+------
+  abc        | 0123456789 |    42 | abcd
+  0123456789 | abc        | 42.12 | abc
+ (2 rows)
+
+ SELECT a::varchar(3) FROM tt1;
+   a
+ -----
+  abc
+  012
+ (2 rows)
+
+ DROP VIEW tt1;
  -- Test view decompilation in the face of relation renaming conflicts
  CREATE TABLE tt1 (f1 int, f2 int, f3 text);
  CREATE TABLE tx1 (x1 int, x2 int, x3 text);
diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql
index 8bed5a5..5fe8b94 100644
*** a/src/test/regress/sql/create_view.sql
--- b/src/test/regress/sql/create_view.sql
*************** SELECT relname, relkind, reloptions FROM
*** 224,229 ****
--- 224,242 ----
                       'mysecview3'::regclass, 'mysecview4'::regclass)
         ORDER BY relname;

+ -- This test checks that proper typmods are assigned in a multi-row VALUES
+
+ CREATE VIEW tt1 AS
+   SELECT * FROM (
+     VALUES
+        ('abc'::varchar(3), '0123456789', 42, 'abcd'::varchar(4)),
+        ('0123456789', 'abc'::varchar(3), 42.12, 'abc'::varchar(4))
+   ) vv(a,b,c,d);
+ \d+ tt1
+ SELECT * FROM tt1;
+ SELECT a::varchar(3) FROM tt1;
+ DROP VIEW tt1;
+
  -- Test view decompilation in the face of relation renaming conflicts

  CREATE TABLE tt1 (f1 int, f2 int, f3 text);

pgsql-hackers by date:

Previous
From: Stephen Frost
Date:
Subject: Re: pg_dump vs. TRANSFORMs
Next
From: Tom Lane
Date:
Subject: Re: pg_dump vs. TRANSFORMs