Re: Generated column is not updated (Postgres 13) - Mailing list pgsql-bugs

From Tom Lane
Subject Re: Generated column is not updated (Postgres 13)
Date
Msg-id 3569485.1621475748@sss.pgh.pa.us
Whole thread Raw
In response to Re: Generated column is not updated (Postgres 13)  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: Generated column is not updated (Postgres 13)  (Vitaly Ustinov <vitaly@ustinov.ca>)
List pgsql-bugs
I wrote:
> ... I think we ought
> to disallow the case instead.  I observe that we already disallow
> generated columns depending on each other: ...
> But a whole-row var violates this concept completely: it makes the
> generated column depend, not only on every other column, but on itself
> too.  Also, even if you don't mind null-for-not-yet-computed-value,
> that would expose the computation order of the generated columns.

After actually looking at the code involved, I'm even more on the
warpath.  Not only is it failing to reject whole-row vars, but it's
failing to reject system columns.  That is (a) infeasible to support,
given that we don't know the values of the system columns at the time
we compute generated expressions, and (b) just plain ludicrous in
expressions that are required to be immutable.

I see that there is actually a regression test case that believes
that "tableoid" should be allowed, but I think that is nonsense.

In the first place, it's impossible to claim that tableoid is an
immutable expression.  Consider, say, "tableoid > 30000".  Do you
think such a column is likely to survive dump-and-reload unchanged?
Also, while that example is artificial, I'm having a hard time
coming up with realistic immutable use-cases for generation
expressions involving tableoid.

In the second place, there are a bunch of implementation dependencies
that we'd have to fix if we want to consider that supported.  I think
it's mostly accidental that the case seems to work in the mainline
INSERT code path.  It's not hard to find cases where it does not work,
for example

regression=# create table foo (f1 int);
CREATE TABLE
regression=# insert into foo values (1);
INSERT 0 1
regression=# alter table foo add column f2 oid GENERATED ALWAYS AS (tableoid) STORED;
ALTER TABLE
regression=# table foo;
 f1 | f2
----+----
  1 |  0
(1 row)

So I think we should just forbid tableoid along with other system
columns, as attached.

            regards, tom lane

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 431e62e389..53ff26774a 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -3020,15 +3020,29 @@ check_nested_generated_walker(Node *node, void *context)
         AttrNumber    attnum;

         relid = rt_fetch(var->varno, pstate->p_rtable)->relid;
+        if (!OidIsValid(relid))
+            return false;        /* XXX shouldn't we raise an error? */
+
         attnum = var->varattno;

-        if (OidIsValid(relid) && AttributeNumberIsValid(attnum) && get_attgenerated(relid, attnum))
+        if (attnum > 0 && get_attgenerated(relid, attnum))
             ereport(ERROR,
-                    (errcode(ERRCODE_SYNTAX_ERROR),
+                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
                      errmsg("cannot use generated column \"%s\" in column generation expression",
                             get_attname(relid, attnum, false)),
                      errdetail("A generated column cannot reference another generated column."),
                      parser_errposition(pstate, var->location)));
+        if (attnum == 0)
+            ereport(ERROR,
+                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                     errmsg("cannot use whole-row variable in column generation expression"),
+                     parser_errposition(pstate, var->location)));
+        if (attnum < 0)
+            ereport(ERROR,
+                    (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                     errmsg("cannot use system column \"%s\" in column generation expression",
+                            get_attname(relid, attnum, false)),
+                     parser_errposition(pstate, var->location)));

         return false;
     }
diff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out
index 675773f0c1..896b568165 100644
--- a/src/test/regress/expected/generated.out
+++ b/src/test/regress/expected/generated.out
@@ -46,6 +46,16 @@ ERROR:  cannot use generated column "b" in column generation expression
 LINE 1: ...AYS AS (a * 2) STORED, c int GENERATED ALWAYS AS (b * 3) STO...
                                                              ^
 DETAIL:  A generated column cannot reference another generated column.
+-- a whole-row var is a self-reference on steroids
+CREATE TABLE gtest_err_2c (a int PRIMARY KEY, b int GENERATED ALWAYS AS (length(gtest_err_2c::text)) STORED);
+ERROR:  cannot use whole-row variable in column generation expression
+LINE 1: ...nt PRIMARY KEY, b int GENERATED ALWAYS AS (length(gtest_err_...
+                                                             ^
+-- no system columns, either, as those aren't known when generation occurs
+CREATE TABLE gtest_err_2d (a int PRIMARY KEY, b oid GENERATED ALWAYS AS (tableoid) STORED);
+ERROR:  cannot use system column "tableoid" in column generation expression
+LINE 1: ...2d (a int PRIMARY KEY, b oid GENERATED ALWAYS AS (tableoid) ...
+                                                             ^
 -- invalid reference
 CREATE TABLE gtest_err_3 (a int PRIMARY KEY, b int GENERATED ALWAYS AS (c * 2) STORED);
 ERROR:  column "c" does not exist
@@ -452,19 +462,6 @@ SELECT * FROM gtest4;

 DROP TABLE gtest4;
 DROP TYPE double_int;
--- using tableoid is allowed
-CREATE TABLE gtest_tableoid (
-  a int PRIMARY KEY,
-  b bool GENERATED ALWAYS AS (tableoid <> 0) STORED
-);
-INSERT INTO gtest_tableoid VALUES (1), (2);
-SELECT * FROM gtest_tableoid;
- a | b
----+---
- 1 | t
- 2 | t
-(2 rows)
-
 -- drop column behavior
 CREATE TABLE gtest10 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED);
 ALTER TABLE gtest10 DROP COLUMN b;
diff --git a/src/test/regress/sql/generated.sql b/src/test/regress/sql/generated.sql
index 63251c443a..69c334742f 100644
--- a/src/test/regress/sql/generated.sql
+++ b/src/test/regress/sql/generated.sql
@@ -17,6 +17,11 @@ CREATE TABLE gtest_err_1 (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) S
 -- references to other generated columns, including self-references
 CREATE TABLE gtest_err_2a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (b * 2) STORED);
 CREATE TABLE gtest_err_2b (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) STORED, c int GENERATED ALWAYS AS (b *
3)STORED); 
+-- a whole-row var is a self-reference on steroids
+CREATE TABLE gtest_err_2c (a int PRIMARY KEY, b int GENERATED ALWAYS AS (length(gtest_err_2c::text)) STORED);
+
+-- no system columns, either, as those aren't known when generation occurs
+CREATE TABLE gtest_err_2d (a int PRIMARY KEY, b oid GENERATED ALWAYS AS (tableoid) STORED);

 -- invalid reference
 CREATE TABLE gtest_err_3 (a int PRIMARY KEY, b int GENERATED ALWAYS AS (c * 2) STORED);
@@ -215,14 +220,6 @@ SELECT * FROM gtest4;
 DROP TABLE gtest4;
 DROP TYPE double_int;

--- using tableoid is allowed
-CREATE TABLE gtest_tableoid (
-  a int PRIMARY KEY,
-  b bool GENERATED ALWAYS AS (tableoid <> 0) STORED
-);
-INSERT INTO gtest_tableoid VALUES (1), (2);
-SELECT * FROM gtest_tableoid;
-
 -- drop column behavior
 CREATE TABLE gtest10 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (b * 2) STORED);
 ALTER TABLE gtest10 DROP COLUMN b;

pgsql-bugs by date:

Previous
From: Peter Geoghegan
Date:
Subject: Re: Fwd: BUG #17017: Two versions of the same row of records are returned in one query
Next
From: Michael Paquier
Date:
Subject: Re: BUG #17023: wal_log_hints not configured even if it on