Re: Virtual generated columns - Mailing list pgsql-hackers

From jian he
Subject Re: Virtual generated columns
Date
Msg-id CACJufxEGPYtFe79hbsMeOBOivfNnPRsw7Gjvk67m1x2MQggyiQ@mail.gmail.com
Whole thread Raw
In response to Re: Virtual generated columns  (Peter Eisentraut <peter@eisentraut.org>)
List pgsql-hackers
On Thu, Aug 8, 2024 at 2:23 PM Peter Eisentraut <peter@eisentraut.org> wrote:
>
> Thank you for your extensive testing.  Here is a new patch set that has
> fixed all the issues you have reported (MERGE, sublinks, statistics,
> ANALYZE).

                    if (coldef->generated && restdef->generated &&
coldef->generated != restdef->generated)
                        ereport(ERROR,
                                (errcode(ERRCODE_INVALID_COLUMN_DEFINITION),
                                 errmsg("column \"%s\" inherits from
generated column of different kind",
                                        restdef->colname)));
the error message is not informal. maybe add errhint that
"column \"%s\" should be same as parent table's generated column kind:
%s", "virtual"|"stored"


 .../regress/expected/create_table_like.out    |  23 +-
 .../regress/expected/generated_stored.out     |  27 +-
 ...rated_stored.out => generated_virtual.out} | 835 +++++++++---------
 src/test/regress/parallel_schedule            |   3 +
 src/test/regress/sql/create_table_like.sql    |   2 +-
 src/test/regress/sql/generated_stored.sql     |  10 +-
 ...rated_stored.sql => generated_virtual.sql} | 301 ++++---
 src/test/subscription/t/011_generated.pl      |  38 +-
 55 files changed, 1280 insertions(+), 711 deletions(-)
 copy src/test/regress/expected/{generated_stored.out
generated_virtual.out} (69%)
 copy src/test/regress/sql/{generated_stored.sql => generated_virtual.sql} (72%)

I don't understand the "copy =>" part, I guess related to copy content
from stored to virtual.
anyway. some minor issue:

-- alter generation expression of parent and all its children altogether
ALTER TABLE gtest_parent ALTER COLUMN f3 SET EXPRESSION AS (f2 * 2);
\d gtest_parent
\d gtest_child
\d gtest_child2
\d gtest_child3
SELECT tableoid::regclass, * FROM gtest_parent ORDER BY 1, 2, 3;

The first line ALTER TABLE will fail for
src/test/regress/sql/generated_virtual.sql.
so no need
"""
\d gtest_parent
\d gtest_child
\d gtest_child2
\d gtest_child3
SELECT tableoid::regclass, * FROM gtest_parent ORDER BY 1, 2, 3;
"""

Similarly the following tests for gtest29 may aslo need change
-- ALTER TABLE ... ALTER COLUMN ... DROP EXPRESSION

since we cannot do ALTER TABLE SET EXPRESSION for virtual generated columns.


-- ALTER TABLE ... ALTER COLUMN
CREATE TABLE gtest27 (
    a int,
    b int,
    x int GENERATED ALWAYS AS ((a + b) * 2) VIRTUAL
);
INSERT INTO gtest27 (a, b) VALUES (3, 7), (4, 11);
ALTER TABLE gtest27 ALTER COLUMN a TYPE text;  -- error
ALTER TABLE gtest27 ALTER COLUMN x TYPE numeric;

will
ALTER TABLE gtest27 ALTER COLUMN a TYPE int4;
be a no-op?


do we need a document that virtual generated columns will use the
expression's collation.
see:
drop table if exists t5;
CREATE TABLE t5 (
    a text collate "C",
    b text collate "C" GENERATED ALWAYS AS (a collate case_insensitive) ,
    d int DEFAULT 22
);
INSERT INTO t5(a,d) values ('d1',28), ('D2',27), ('D1',26);
select * from t5 order by b asc, d asc;



+ /*
+ * TODO: Prevent virtual generated columns from having a
+ * domain type.  We would have to enforce domain constraints
+ * when columns underlying the generated column change.  This
+ * could possibly be implemented, but it's not.
+ *
+ * XXX If column->typeName is not set, then this column
+ * definition is probably a partition definition and will
+ * presumably get its pre-vetted type from elsewhere.  If that
+ * doesn't hold, maybe this check needs to be moved elsewhere.
+ */
+ if (column->generated == ATTRIBUTE_GENERATED_VIRTUAL && column->typeName)
+ {
+ Type ctype;
+
+ ctype = typenameType(cxt->pstate, column->typeName, NULL);
+ if (((Form_pg_type) GETSTRUCT(ctype))->typtype == TYPTYPE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("virtual generated column \"%s\" cannot have a domain type",
+ column->colname),
+ parser_errposition(cxt->pstate,
+ column->location)));
+ ReleaseSysCache(ctype);
+ }

create domain mydomain as int4;
create type mydomainrange as range(subtype=mydomain);
CREATE TABLE t3( b bigint, c mydomain GENERATED ALWAYS AS ('11') VIRTUAL);
CREATE TABLE t3( b bigint, c mydomainrange GENERATED ALWAYS AS
('[4,50)') VIRTUAL);
domain will error out, domain over range is ok, is this fine?



+      When <literal>VIRTUAL</literal> is specified, the column will be
+      computed when it is read, and it will not occupy any storage.  When
+      <literal>STORED</literal> is specified, the column will be computed on
+      write and will be stored on disk.  <literal>VIRTUAL</literal> is the
+      default.
drop table if exists t5;
CREATE TABLE t5 (
    a int,
    b text storage extended collate "C"  GENERATED ALWAYS AS (a::text
collate case_insensitive) ,
    d int DEFAULT 22
);
select reltoastrelid <> 0 as has_toast_table from pg_class where oid =
't5'::regclass;

if really no storage, should table t5 have an associated toast table or not?
also check ALTER TABLE variant:
alter table t5 alter b set storage extended;



Do we need to do something in ATExecSetStatistics for cases like:
ALTER TABLE t5 ALTER b SET STATISTICS 2000;
(b is a generated virtual column).
because of
examine_attribute
    if (attr->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
        return NULL;
i guess, this won't have a big impact.



There are some issues with changing virtual generated column type.
like:
drop table if exists another;
create table another (f4 int, f2 text, f3 text, f1 int GENERATED
ALWAYS AS (f4));
insert into another values(1, 'one', 'uno'), (2, 'two', 'due'),(3,
'three', 'tre');
alter table another
  alter f1 type text using f2 || ' and ' || f3 || ' more';
table another;

or
alter table another
  alter f1 type text using f2 || ' and ' || f3 || ' more',
  drop column f1;
ERROR:  column "f1" of relation "another" does not exist

These two command outputs seem not right.
the stored generated column which works as expected.


in src/test/regress/sql/alter_table.sql
-- We disallow changing table's row type if it's used for storage
create table at_tab1 (a int, b text);
create table at_tab2 (x int, y at_tab1);
alter table at_tab1 alter column b type varchar; -- fails
drop table at_tab2;

I think the above restriction should  apply to virtual generated columns too.
given in ATPrepAlterColumnType, not storage we still call
find_composite_type_dependencies

    if (!RELKIND_HAS_STORAGE(tab->relkind))
    {
        /*
         * For relations without storage, do this check now.  Regular tables
         * will check it later when the table is being rewritten.
         */
        find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL);
    }

so i think in ATPrepAlterColumnType, we should do:

    if (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
    {
        find_composite_type_dependencies(rel->rd_rel->reltype, rel, NULL);
    }
    else if (tab->relkind == RELKIND_RELATION ||
             tab->relkind == RELKIND_PARTITIONED_TABLE)
   {
   }
    else if (transform)
        ereport(ERROR,
                (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                 errmsg("\"%s\" is not a table",
                        RelationGetRelationName(rel))));

you may add following tests:
------------------------------------------------------------------------
create table at_tab1 (a int, b text GENERATED ALWAYS AS ('hello'), c text);
create table at_tab2 (x int, y at_tab1);
alter table at_tab1 alter column b type varchar; -- fails
drop table at_tab1, at_tab2;

-- Check it for a partitioned table, too
create table at_tab1 (a int, b text GENERATED ALWAYS AS ('hello'), c
text) partition by list(a);;
create table at_tab2 (x int, y at_tab1);
alter table at_tab1 alter column b type varchar; -- fails
drop table at_tab1, at_tab2;
---------------------------------------------------------------------------------



pgsql-hackers by date:

Previous
From: Andreas Karlsson
Date:
Subject: Re: tiny step toward threading: reduce dependence on setlocale()
Next
From: Peter Smith
Date:
Subject: Re: Logical Replication of sequences