Re: [PATCH] Allow anonymous rowtypes in function return column definition - Mailing list pgsql-hackers

From Tom Lane
Subject Re: [PATCH] Allow anonymous rowtypes in function return column definition
Date
Msg-id 19439.1548889181@sss.pgh.pa.us
Whole thread Raw
In response to Re: [PATCH] Allow anonymous rowtypes in function return column definition  (Elvis Pranskevichus <el@prans.net>)
Responses Re: [PATCH] Allow anonymous rowtypes in function return column definition  (Elvis Pranskevichus <el@prans.net>)
List pgsql-hackers
Elvis Pranskevichus <el@prans.net> writes:
> On Sunday, January 13, 2019 4:57:48 PM EST Tom Lane wrote:
>> I feel a bit uncomfortable about defining the new flag to
>> CheckAttributeNamesTypes as "allow_anonymous_records"; that
>> seems pretty short-sighted and single-purpose, especially in
>> view of there being no obvious reason why it shouldn't accept
>> RECORDARRAY too.  Maybe with a clearer explanation of the
>> issues above, we could define that flag in a more on-point way.

> I renamed it to "in_coldeflist", which seems like a clearer indication 
> that we are validating that, and not a regular table definition.

I still found this pretty disjointed.  After a bit of thought I propose
the attached reformulation, which has the callers just tell the routines
which types to allow explicitly, with the justification comments living
at the call sites instead of within the routines.

One point that we hadn't really talked about is what happens when
CheckArrayTypes recurses.  The existing logic is that it just passes
down the same restrictions it was told at the top level, and I kept
that here.  Right now it hardly matters what we pass down, because
the base type of a domain or array can't be a pseudotype, and we
don't really expect to find one in a named composite type either.
The one case where it could matter is if someone tries to use
"pg_statistic" as a field's composite type: that would be allowed if
allow_system_table_mods and otherwise not.  But it's not really
hard to imagine future additions where it would matter and it'd
be important to restrict things as we recurse down.  I think this
formulation makes it easier to see what to do in such cases.

            regards, tom lane

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 910f651..06d18a1 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -445,11 +445,14 @@ heap_create(const char *relname,
  *        this is used to make certain the tuple descriptor contains a
  *        valid set of attribute names and datatypes.  a problem simply
  *        generates ereport(ERROR) which aborts the current transaction.
+ *
+ *        relkind is the relkind of the relation to be created.
+ *        flags controls which datatypes are allowed, cf CheckAttributeType.
  * --------------------------------
  */
 void
 CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
-                         bool allow_system_table_mods)
+                         int flags)
 {
     int            i;
     int            j;
@@ -507,7 +510,7 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
                            TupleDescAttr(tupdesc, i)->atttypid,
                            TupleDescAttr(tupdesc, i)->attcollation,
                            NIL, /* assume we're creating a new rowtype */
-                           allow_system_table_mods);
+                           flags);
     }
 }

@@ -524,13 +527,22 @@ CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
  * containing_rowtypes.  When checking a to-be-created rowtype, it's
  * sufficient to pass NIL, because there could not be any recursive reference
  * to a not-yet-existing rowtype.
+ *
+ * flags is a bitmask controlling which datatypes we allow.  For the most
+ * part, pseudo-types are disallowed as attribute types, but there are some
+ * exceptions: ANYARRAYOID, RECORDOID, and RECORDARRAYOID can be allowed
+ * in some cases.  (This works because values of those type classes are
+ * self-identifying to some extent.  However, RECORDOID and RECORDARRAYOID
+ * are reliably identifiable only within a session, since the identity info
+ * may use a typmod that is only locally assigned.  The caller is expected
+ * to know whether these cases are safe.)
  * --------------------------------
  */
 void
 CheckAttributeType(const char *attname,
                    Oid atttypid, Oid attcollation,
                    List *containing_rowtypes,
-                   bool allow_system_table_mods)
+                   int flags)
 {
     char        att_typtype = get_typtype(atttypid);
     Oid            att_typelem;
@@ -538,12 +550,18 @@ CheckAttributeType(const char *attname,
     if (att_typtype == TYPTYPE_PSEUDO)
     {
         /*
-         * Refuse any attempt to create a pseudo-type column, except for a
-         * special hack for pg_statistic: allow ANYARRAY when modifying system
-         * catalogs (this allows creating pg_statistic and cloning it during
-         * VACUUM FULL)
+         * We disallow pseudo-type columns, with the exception of ANYARRAY,
+         * RECORD, and RECORD[] when the caller says that those are OK.
+         *
+         * We don't need to worry about recursive containment for RECORD and
+         * RECORD[] because (a) no named composite type should be allowed to
+         * contain those, and (b) two "anonymous" record types couldn't be
+         * considered to be the same type, so infinite recursion isn't
+         * possible.
          */
-        if (atttypid != ANYARRAYOID || !allow_system_table_mods)
+        if (!((atttypid == ANYARRAYOID && (flags & CHKATYPE_ANYARRAY)) ||
+              (atttypid == RECORDOID && (flags & CHKATYPE_ANYRECORD)) ||
+              (atttypid == RECORDARRAYOID && (flags & CHKATYPE_ANYRECORD))))
             ereport(ERROR,
                     (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
                      errmsg("column \"%s\" has pseudo-type %s",
@@ -556,7 +574,7 @@ CheckAttributeType(const char *attname,
          */
         CheckAttributeType(attname, getBaseType(atttypid), attcollation,
                            containing_rowtypes,
-                           allow_system_table_mods);
+                           flags);
     }
     else if (att_typtype == TYPTYPE_COMPOSITE)
     {
@@ -594,7 +612,7 @@ CheckAttributeType(const char *attname,
             CheckAttributeType(NameStr(attr->attname),
                                attr->atttypid, attr->attcollation,
                                containing_rowtypes,
-                               allow_system_table_mods);
+                               flags);
         }

         relation_close(relation, AccessShareLock);
@@ -608,7 +626,7 @@ CheckAttributeType(const char *attname,
          */
         CheckAttributeType(attname, att_typelem, attcollation,
                            containing_rowtypes,
-                           allow_system_table_mods);
+                           flags);
     }

     /*
@@ -1074,7 +1092,13 @@ heap_create_with_catalog(const char *relname,
      */
     Assert(IsNormalProcessingMode() || IsBootstrapProcessingMode());

-    CheckAttributeNamesTypes(tupdesc, relkind, allow_system_table_mods);
+    /*
+     * Validate proposed tupdesc for the desired relkind.  If
+     * allow_system_table_mods is on, allow ANYARRAY to be used; this is a
+     * hack to allow creating pg_statistic and cloning it during VACUUM FULL.
+     */
+    CheckAttributeNamesTypes(tupdesc, relkind,
+                             allow_system_table_mods ? CHKATYPE_ANYARRAY : 0);

     /*
      * This would fail later on anyway, if the relation already exists.  But
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 169b2de..faf6956 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -410,7 +410,7 @@ ConstructTupleDescriptor(Relation heapRelation,
              */
             CheckAttributeType(NameStr(to->attname),
                                to->atttypid, to->attcollation,
-                               NIL, false);
+                               NIL, 0);
         }

         /*
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 434be40..35a9ade 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -5505,7 +5505,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
     /* make sure datatype is legal for a column */
     CheckAttributeType(colDef->colname, typeOid, collOid,
                        list_make1_oid(rel->rd_rel->reltype),
-                       false);
+                       0);

     /* construct new attribute's pg_attribute entry */
     attribute.attrelid = myrelid;
@@ -9445,7 +9445,7 @@ ATPrepAlterColumnType(List **wqueue,
     /* make sure datatype is legal for a column */
     CheckAttributeType(colName, targettype, targetcollid,
                        list_make1_oid(rel->rd_rel->reltype),
-                       false);
+                       0);

     if (tab->relkind == RELKIND_RELATION ||
         tab->relkind == RELKIND_PARTITIONED_TABLE)
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 3ff799f..f3b6d19 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -1590,9 +1590,15 @@ addRangeTableEntryForFunction(ParseState *pstate,

             /*
              * Ensure that the coldeflist defines a legal set of names (no
-             * duplicates) and datatypes (no pseudo-types, for instance).
+             * duplicates, but we needn't worry about system column names) and
+             * datatypes.  Although we mostly can't allow pseudo-types, it
+             * seems safe to allow RECORD and RECORD[], since values within
+             * those type classes are self-identifying at runtime, and the
+             * coldeflist doesn't represent anything that will be visible to
+             * other sessions.
              */
-            CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE, false);
+            CheckAttributeNamesTypes(tupdesc, RELKIND_COMPOSITE_TYPE,
+                                     CHKATYPE_ANYRECORD);
         }
         else
             ereport(ERROR,
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index 625b7e5..50fb62b 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -19,6 +19,10 @@
 #include "parser/parse_node.h"


+/* flag bits for CheckAttributeType/CheckAttributeNamesTypes */
+#define CHKATYPE_ANYARRAY        0x01    /* allow ANYARRAY */
+#define CHKATYPE_ANYRECORD        0x02    /* allow RECORD and RECORD[] */
+
 typedef struct RawColumnDefault
 {
     AttrNumber    attnum;            /* attribute to attach default to */
@@ -130,12 +134,12 @@ extern const FormData_pg_attribute *SystemAttributeDefinition(AttrNumber attno);
 extern const FormData_pg_attribute *SystemAttributeByName(const char *attname);

 extern void CheckAttributeNamesTypes(TupleDesc tupdesc, char relkind,
-                         bool allow_system_table_mods);
+                         int flags);

 extern void CheckAttributeType(const char *attname,
                    Oid atttypid, Oid attcollation,
                    List *containing_rowtypes,
-                   bool allow_system_table_mods);
+                   int flags);

 /* pg_partitioned_table catalog manipulation functions */
 extern void StorePartitionKey(Relation rel,
diff --git a/src/test/regress/expected/rowtypes.out b/src/test/regress/expected/rowtypes.out
index d6a1a33..054faabb 100644
--- a/src/test/regress/expected/rowtypes.out
+++ b/src/test/regress/expected/rowtypes.out
@@ -667,6 +667,17 @@ select row(1, '(1,2)')::testtype6 *<> row(1, '(1,3)')::testtype6;
  t
 (1 row)

+-- anonymous rowtypes in coldeflists
+select q.a, q.b = row(2), q.c = array[row(3)], q.d = row(row(4)) from
+    unnest(array[row(1, row(2), array[row(3)], row(row(4))),
+                 row(2, row(3), array[row(4)], row(row(5)))])
+      as q(a int, b record, c record[], d record);
+ a | ?column? | ?column? | ?column?
+---+----------+----------+----------
+ 1 | t        | t        | t
+ 2 | f        | f        | f
+(2 rows)
+
 drop type testtype1, testtype2, testtype3, testtype4, testtype5, testtype6;
 --
 -- Test case derived from bug #5716: check multiple uses of a rowtype result
diff --git a/src/test/regress/sql/rowtypes.sql b/src/test/regress/sql/rowtypes.sql
index e6d3898..454d462 100644
--- a/src/test/regress/sql/rowtypes.sql
+++ b/src/test/regress/sql/rowtypes.sql
@@ -259,6 +259,12 @@ select row(1, '(1,2)')::testtype6 *< row(1, '(1,3)')::testtype6;
 select row(1, '(1,2)')::testtype6 *>= row(1, '(1,3)')::testtype6;
 select row(1, '(1,2)')::testtype6 *<> row(1, '(1,3)')::testtype6;

+-- anonymous rowtypes in coldeflists
+select q.a, q.b = row(2), q.c = array[row(3)], q.d = row(row(4)) from
+    unnest(array[row(1, row(2), array[row(3)], row(row(4))),
+                 row(2, row(3), array[row(4)], row(row(5)))])
+      as q(a int, b record, c record[], d record);
+
 drop type testtype1, testtype2, testtype3, testtype4, testtype5, testtype6;



pgsql-hackers by date:

Previous
From: David Rowley
Date:
Subject: Re: pg_dump multi VALUES INSERT
Next
From: David Rowley
Date:
Subject: Re: Delay locking partitions during query execution