Thread: Improving information_schema._pg_expandarray()

Improving information_schema._pg_expandarray()

From

Tom Lane

Date:

23 December 2023, 21:18:05

I happened to notice that information_schema._pg_expandarray(),
which has the nigh-unreadable definition

    AS 'select $1[s],
        s operator(pg_catalog.-) pg_catalog.array_lower($1,1) operator(pg_catalog.+) 1
        from pg_catalog.generate_series(pg_catalog.array_lower($1,1),
                                        pg_catalog.array_upper($1,1),
                                        1) as g(s)';

can now be implemented using unnest():

    AS 'SELECT * FROM pg_catalog.unnest($1) WITH ORDINALITY';

It seems to be slightly more efficient this way, but the main point
is to make it more readable.

I then realized that we could also borrow unnest's infrastructure
for rowcount estimation:

    ROWS 100 SUPPORT pg_catalog.array_unnest_support

because array_unnest_support just looks at the array argument and
doesn't have any hard dependency on the function being specifically
unnest().  I'm not sure that any of its uses in information_schema
can benefit from that right now, but surely it can't hurt.

One minor annoyance is that psql.sql is using _pg_expandarray
as a test case for \sf[+].  While we could keep doing so, I think
the main point of that test case is to exercise \sf+'s line
numbering ability, so the new one-line body is not a great test.
I changed that test to use _pg_index_position instead.

            regards, tom lane

diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index 10b34c3c5b..893f73ecb5 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -43,11 +43,8 @@ SET search_path TO information_schema;
 CREATE FUNCTION _pg_expandarray(IN anyarray, OUT x anyelement, OUT n int)
     RETURNS SETOF RECORD
     LANGUAGE sql STRICT IMMUTABLE PARALLEL SAFE
-    AS 'select $1[s],
-        s operator(pg_catalog.-) pg_catalog.array_lower($1,1) operator(pg_catalog.+) 1
-        from pg_catalog.generate_series(pg_catalog.array_lower($1,1),
-                                        pg_catalog.array_upper($1,1),
-                                        1) as g(s)';
+    ROWS 100 SUPPORT pg_catalog.array_unnest_support
+    AS 'SELECT * FROM pg_catalog.unnest($1) WITH ORDINALITY';

 /* Given an index's OID and an underlying-table column number, return the
  * column's position in the index (NULL if not there) */
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 631012a0f2..e783a24519 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -6317,6 +6317,9 @@ array_unnest(PG_FUNCTION_ARGS)

 /*
  * Planner support function for array_unnest(anyarray)
+ *
+ * Note: this is now also used for information_schema._pg_expandarray(),
+ * which is simply a wrapper around array_unnest().
  */
 Datum
 array_unnest_support(PG_FUNCTION_ARGS)
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index 13e4f6db7b..5d61e4c7bb 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -5293,26 +5293,30 @@ comment on function psql_df_plpgsql () is 'some comment';
 rollback;
 drop role regress_psql_user;
 -- check \sf
-\sf information_schema._pg_expandarray
-CREATE OR REPLACE FUNCTION information_schema._pg_expandarray(anyarray, OUT x anyelement, OUT n integer)
- RETURNS SETOF record
+\sf information_schema._pg_index_position
+CREATE OR REPLACE FUNCTION information_schema._pg_index_position(oid, smallint)
+ RETURNS integer
  LANGUAGE sql
- IMMUTABLE PARALLEL SAFE STRICT
-AS $function$select $1[s],
-        s operator(pg_catalog.-) pg_catalog.array_lower($1,1) operator(pg_catalog.+) 1
-        from pg_catalog.generate_series(pg_catalog.array_lower($1,1),
-                                        pg_catalog.array_upper($1,1),
-                                        1) as g(s)$function$
-\sf+ information_schema._pg_expandarray
-        CREATE OR REPLACE FUNCTION information_schema._pg_expandarray(anyarray, OUT x anyelement, OUT n integer)
-         RETURNS SETOF record
+ STABLE STRICT
+BEGIN ATOMIC
+ SELECT (ss.a).n AS n
+    FROM ( SELECT information_schema._pg_expandarray(pg_index.indkey) AS a
+            FROM pg_index
+           WHERE (pg_index.indexrelid = $1)) ss
+   WHERE ((ss.a).x = $2);
+END
+\sf+ information_schema._pg_index_position
+        CREATE OR REPLACE FUNCTION information_schema._pg_index_position(oid, smallint)
+         RETURNS integer
          LANGUAGE sql
-         IMMUTABLE PARALLEL SAFE STRICT
-1       AS $function$select $1[s],
-2               s operator(pg_catalog.-) pg_catalog.array_lower($1,1) operator(pg_catalog.+) 1
-3               from pg_catalog.generate_series(pg_catalog.array_lower($1,1),
-4                                               pg_catalog.array_upper($1,1),
-5                                               1) as g(s)$function$
+         STABLE STRICT
+1       BEGIN ATOMIC
+2        SELECT (ss.a).n AS n
+3           FROM ( SELECT information_schema._pg_expandarray(pg_index.indkey) AS a
+4                   FROM pg_index
+5                  WHERE (pg_index.indexrelid = $1)) ss
+6          WHERE ((ss.a).x = $2);
+7       END
 \sf+ interval_pl_time
         CREATE OR REPLACE FUNCTION pg_catalog.interval_pl_time(interval, time without time zone)
          RETURNS time without time zone
diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql
index 695c72d866..f199d624d3 100644
--- a/src/test/regress/sql/psql.sql
+++ b/src/test/regress/sql/psql.sql
@@ -1312,8 +1312,8 @@ rollback;
 drop role regress_psql_user;

 -- check \sf
-\sf information_schema._pg_expandarray
-\sf+ information_schema._pg_expandarray
+\sf information_schema._pg_index_position
+\sf+ information_schema._pg_index_position
 \sf+ interval_pl_time
 \sf ts_debug(text)
 \sf+ ts_debug(text)

Re: Improving information_schema._pg_expandarray()

From

Pavel Stehule

Date:

23 December 2023, 21:27:18

so 23. 12. 2023 v 19:18 odesílatel Tom Lane <tgl@sss.pgh.pa.us> napsal:

I happened to notice that information_schema._pg_expandarray(),
which has the nigh-unreadable definition

AS 'select $1[s],
s operator(pg_catalog.-) pg_catalog.array_lower($1,1) operator(pg_catalog.+) 1
from pg_catalog.generate_series(pg_catalog.array_lower($1,1),
pg_catalog.array_upper($1,1),
1) as g(s)';

can now be implemented using unnest():

AS 'SELECT * FROM pg_catalog.unnest($1) WITH ORDINALITY';

It seems to be slightly more efficient this way, but the main point
is to make it more readable.

I then realized that we could also borrow unnest's infrastructure
for rowcount estimation:

ROWS 100 SUPPORT pg_catalog.array_unnest_support

because array_unnest_support just looks at the array argument and
doesn't have any hard dependency on the function being specifically
unnest(). I'm not sure that any of its uses in information_schema
can benefit from that right now, but surely it can't hurt.

One minor annoyance is that psql.sql is using _pg_expandarray
as a test case for \sf[+]. While we could keep doing so, I think
the main point of that test case is to exercise \sf+'s line
numbering ability, so the new one-line body is not a great test.
I changed that test to use _pg_index_position instead.

regards

Pavel

regards, tom lane

Re: Improving information_schema._pg_expandarray()

From

Dagfinn Ilmari Mannsåker

Date:

13 May, 19:56:07

[ I got distracted while writing this follow-up and only just found it
  in my list of unsent Gnus buffers, and now it's probably too late to
  make it for 17, but here it is anyway while I remember. ]

Tom Lane <tgl@sss.pgh.pa.us> writes:

> I happened to notice that information_schema._pg_expandarray(),
> which has the nigh-unreadable definition
>
>     AS 'select $1[s],
>         s operator(pg_catalog.-) pg_catalog.array_lower($1,1) operator(pg_catalog.+) 1
>         from pg_catalog.generate_series(pg_catalog.array_lower($1,1),
>                                         pg_catalog.array_upper($1,1),
>                                         1) as g(s)';
>
> can now be implemented using unnest():
>
>     AS 'SELECT * FROM pg_catalog.unnest($1) WITH ORDINALITY';
>
> It seems to be slightly more efficient this way, but the main point
> is to make it more readable.

I didn't spot this until it got committed, but it got me wondering what
eliminating the wrapper function completely would look like, so I
whipped up the attached.  It instead calls UNNEST() laterally in the
queries, which has the side benefit of getting rid of several
subselects, one of which was particularly confusing.  In one place the
lateral form eliminated the need for WITH ORDINALITY as well.


- ilmari

From 9d1b2f2d16f10903d975a3bb7551a38c5ce62e15 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= <ilmari@ilmari.org>
Date: Thu, 28 Dec 2023 23:47:33 +0000
Subject: [PATCH] Eliminate information_schema._pg_expandarray completely

Commit 58054de2d0847c09ef091956f72ae5e9fb9a176e made it into a simple
wrapper around unnest, but we can simplfy things further by calling
unnest directly in the queries.
---
 src/backend/catalog/information_schema.sql | 104 +++++++++------------
 src/backend/utils/adt/arrayfuncs.c         |   3 -
 src/test/regress/expected/psql.out         |  30 +++---
 src/test/regress/sql/psql.sql              |   2 +-
 4 files changed, 58 insertions(+), 81 deletions(-)

diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index c4145131ce..cf25f5d1bc 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -39,22 +39,15 @@ SET search_path TO information_schema;
  * A few supporting functions first ...
  */
 
-/* Expand any 1-D array into a set with integers 1..N */
-CREATE FUNCTION _pg_expandarray(IN anyarray, OUT x anyelement, OUT n int)
-    RETURNS SETOF RECORD
-    LANGUAGE sql STRICT IMMUTABLE PARALLEL SAFE
-    ROWS 100 SUPPORT pg_catalog.array_unnest_support
-    AS 'SELECT * FROM pg_catalog.unnest($1) WITH ORDINALITY';
-
 /* Given an index's OID and an underlying-table column number, return the
  * column's position in the index (NULL if not there) */
 CREATE FUNCTION _pg_index_position(oid, smallint) RETURNS int
     LANGUAGE sql STRICT STABLE
 BEGIN ATOMIC
-SELECT (ss.a).n FROM
-  (SELECT information_schema._pg_expandarray(indkey) AS a
-   FROM pg_catalog.pg_index WHERE indexrelid = $1) ss
-  WHERE (ss.a).x = $2;
+SELECT ik.icol
+FROM pg_catalog.pg_index,
+     pg_catalog.unnest(indkey) WITH ORDINALITY ik(tcol, icol)
+WHERE indexrelid = $1 AND ik.tcol = $2;
 END;
 
 CREATE FUNCTION _pg_truetypid(pg_attribute, pg_type) RETURNS oid
@@ -1079,37 +1072,32 @@ GRANT SELECT ON enabled_roles TO PUBLIC;
 
 CREATE VIEW key_column_usage AS
     SELECT CAST(current_database() AS sql_identifier) AS constraint_catalog,
-           CAST(nc_nspname AS sql_identifier) AS constraint_schema,
+           CAST(nc.nspname AS sql_identifier) AS constraint_schema,
            CAST(conname AS sql_identifier) AS constraint_name,
            CAST(current_database() AS sql_identifier) AS table_catalog,
-           CAST(nr_nspname AS sql_identifier) AS table_schema,
+           CAST(nr.nspname AS sql_identifier) AS table_schema,
            CAST(relname AS sql_identifier) AS table_name,
            CAST(a.attname AS sql_identifier) AS column_name,
-           CAST((ss.x).n AS cardinal_number) AS ordinal_position,
+           CAST(ck.icol AS cardinal_number) AS ordinal_position,
            CAST(CASE WHEN contype = 'f' THEN
-                       _pg_index_position(ss.conindid, ss.confkey[(ss.x).n])
+                       _pg_index_position(c.conindid, c.confkey[ck.icol])
                      ELSE NULL
                 END AS cardinal_number)
              AS position_in_unique_constraint
     FROM pg_attribute a,
-         (SELECT r.oid AS roid, r.relname, r.relowner,
-                 nc.nspname AS nc_nspname, nr.nspname AS nr_nspname,
-                 c.oid AS coid, c.conname, c.contype, c.conindid,
-                 c.confkey, c.confrelid,
-                 _pg_expandarray(c.conkey) AS x
-          FROM pg_namespace nr, pg_class r, pg_namespace nc,
-               pg_constraint c
-          WHERE nr.oid = r.relnamespace
-                AND r.oid = c.conrelid
-                AND nc.oid = c.connamespace
-                AND c.contype IN ('p', 'u', 'f')
-                AND r.relkind IN ('r', 'p')
-                AND (NOT pg_is_other_temp_schema(nr.oid)) ) AS ss
-    WHERE ss.roid = a.attrelid
-          AND a.attnum = (ss.x).x
+         pg_namespace nr, pg_class r, pg_namespace nc,
+         pg_constraint c, UNNEST(c.conkey) WITH ORDINALITY ck(tcol, icol)
+    WHERE nr.oid = r.relnamespace
+          AND r.oid = c.conrelid
+          AND nc.oid = c.connamespace
+          AND c.contype IN ('p', 'u', 'f')
+          AND r.relkind IN ('r', 'p')
+          AND (NOT pg_is_other_temp_schema(nr.oid))
+          AND r.oid = a.attrelid
+          AND a.attnum = ck.tcol
           AND NOT a.attisdropped
           AND (pg_has_role(relowner, 'USAGE')
-               OR has_column_privilege(roid, a.attnum,
+               OR has_column_privilege(r.oid, a.attnum,
                                        'SELECT, INSERT, UPDATE, REFERENCES'));
 
 GRANT SELECT ON key_column_usage TO PUBLIC;
@@ -1146,20 +1134,20 @@ GRANT SELECT ON key_column_usage TO PUBLIC;
 
 CREATE VIEW parameters AS
     SELECT CAST(current_database() AS sql_identifier) AS specific_catalog,
-           CAST(n_nspname AS sql_identifier) AS specific_schema,
-           CAST(nameconcatoid(proname, p_oid) AS sql_identifier) AS specific_name,
-           CAST((ss.x).n AS cardinal_number) AS ordinal_position,
+           CAST(n.nspname AS sql_identifier) AS specific_schema,
+           CAST(nameconcatoid(proname, p.oid) AS sql_identifier) AS specific_name,
+           CAST(a.argnum AS cardinal_number) AS ordinal_position,
            CAST(
              CASE WHEN proargmodes IS NULL THEN 'IN'
-                WHEN proargmodes[(ss.x).n] = 'i' THEN 'IN'
-                WHEN proargmodes[(ss.x).n] = 'o' THEN 'OUT'
-                WHEN proargmodes[(ss.x).n] = 'b' THEN 'INOUT'
-                WHEN proargmodes[(ss.x).n] = 'v' THEN 'IN'
-                WHEN proargmodes[(ss.x).n] = 't' THEN 'OUT'
+                WHEN proargmodes[a.argnum] = 'i' THEN 'IN'
+                WHEN proargmodes[a.argnum] = 'o' THEN 'OUT'
+                WHEN proargmodes[a.argnum] = 'b' THEN 'INOUT'
+                WHEN proargmodes[a.argnum] = 'v' THEN 'IN'
+                WHEN proargmodes[a.argnum] = 't' THEN 'OUT'
              END AS character_data) AS parameter_mode,
            CAST('NO' AS yes_or_no) AS is_result,
            CAST('NO' AS yes_or_no) AS as_locator,
-           CAST(NULLIF(proargnames[(ss.x).n], '') AS sql_identifier) AS parameter_name,
+           CAST(NULLIF(proargnames[a.argnum], '') AS sql_identifier) AS parameter_name,
            CAST(
              CASE WHEN t.typelem <> 0 AND t.typlen = -1 THEN 'ARRAY'
                   WHEN nt.nspname = 'pg_catalog' THEN format_type(t.oid, null)
@@ -1186,22 +1174,19 @@ CREATE VIEW parameters AS
            CAST(null AS sql_identifier) AS scope_schema,
            CAST(null AS sql_identifier) AS scope_name,
            CAST(null AS cardinal_number) AS maximum_cardinality,
-           CAST((ss.x).n AS sql_identifier) AS dtd_identifier,
+           CAST(a.argtypid AS sql_identifier) AS dtd_identifier,
            CAST(
              CASE WHEN pg_has_role(proowner, 'USAGE')
-                  THEN pg_get_function_arg_default(p_oid, (ss.x).n)
+                  THEN pg_get_function_arg_default(p.oid, a.argnum::int)
                   ELSE NULL END
              AS character_data) AS parameter_default
-
-    FROM pg_type t, pg_namespace nt,
-         (SELECT n.nspname AS n_nspname, p.proname, p.oid AS p_oid, p.proowner,
-                 p.proargnames, p.proargmodes,
-                 _pg_expandarray(coalesce(p.proallargtypes, p.proargtypes::oid[])) AS x
-          FROM pg_namespace n, pg_proc p
-          WHERE n.oid = p.pronamespace
-                AND (pg_has_role(p.proowner, 'USAGE') OR
-                     has_function_privilege(p.oid, 'EXECUTE'))) AS ss
-    WHERE t.oid = (ss.x).x AND t.typnamespace = nt.oid;
+    FROM pg_type t, pg_namespace nt, pg_namespace n, pg_proc p,
+         UNNEST(coalesce(p.proallargtypes, p.proargtypes::oid[]))
+           WITH ORDINALITY a(argtypid, argnum)
+    WHERE n.oid = p.pronamespace
+          AND (pg_has_role(p.proowner, 'USAGE') OR
+               has_function_privilege(p.oid, 'EXECUTE'))
+          AND t.oid = a.argtypid AND t.typnamespace = nt.oid;
 
 GRANT SELECT ON parameters TO PUBLIC;
 
@@ -2083,14 +2068,11 @@ CREATE VIEW triggered_update_columns AS
            CAST(c.relname AS sql_identifier) AS event_object_table,
            CAST(a.attname AS sql_identifier) AS event_object_column
 
-    FROM pg_namespace n, pg_class c, pg_trigger t,
-         (SELECT tgoid, (ta0.tgat).x AS tgattnum, (ta0.tgat).n AS tgattpos
-          FROM (SELECT oid AS tgoid, information_schema._pg_expandarray(tgattr) AS tgat FROM pg_trigger) AS ta0) AS
ta,
-         pg_attribute a
+    FROM pg_namespace n, pg_class c, pg_trigger t, pg_attribute a,
+         UNNEST(tgattr) ta(tgattnum)
 
     WHERE n.oid = c.relnamespace
           AND c.oid = t.tgrelid
-          AND t.oid = ta.tgoid
           AND (a.attrelid, a.attnum) = (t.tgrelid, ta.tgattnum)
           AND NOT t.tgisinternal
           AND (NOT pg_is_other_temp_schema(n.oid))
@@ -2787,10 +2769,10 @@ CREATE VIEW element_types AS
            /* parameters */
            SELECT pronamespace,
                   CAST(nameconcatoid(proname, oid) AS sql_identifier),
-                  'ROUTINE'::text, (ss.x).n, (ss.x).x, 0
-           FROM (SELECT p.pronamespace, p.proname, p.oid,
-                        _pg_expandarray(coalesce(p.proallargtypes, p.proargtypes::oid[])) AS x
-                 FROM pg_proc p) AS ss
+                  'ROUTINE'::text, a.argnum, a.argtypid, 0
+            FROM pg_proc p,
+                 UNNEST(coalesce(p.proallargtypes, p.proargtypes::oid[]))
+                   WITH ORDINALITY a(argtypid, argnum)
 
            UNION ALL
 
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d6641b570d..673dd02455 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -6316,9 +6316,6 @@ array_unnest(PG_FUNCTION_ARGS)
 
 /*
  * Planner support function for array_unnest(anyarray)
- *
- * Note: this is now also used for information_schema._pg_expandarray(),
- * which is simply a wrapper around array_unnest().
  */
 Datum
 array_unnest_support(PG_FUNCTION_ARGS)
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index 3bbe4c5f97..0ef2f44e56 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -5246,11 +5246,11 @@ reset work_mem;
  pg_catalog | bit_xor | smallint         | smallint            | agg
 (3 rows)
 
-\df *._pg_expandarray
-                                             List of functions
-       Schema       |      Name       | Result data type |            Argument data types            | Type 
---------------------+-----------------+------------------+-------------------------------------------+------
- information_schema | _pg_expandarray | SETOF record     | anyarray, OUT x anyelement, OUT n integer | func
+\df *._pg_index_position
+                                    List of functions
+       Schema       |        Name        | Result data type | Argument data types | Type 
+--------------------+--------------------+------------------+---------------------+------
+ information_schema | _pg_index_position | integer          | oid, smallint       | func
 (1 row)
 
 \do - pg_catalog.int4
@@ -5303,11 +5303,10 @@ CREATE OR REPLACE FUNCTION information_schema._pg_index_position(oid, smallint)
  LANGUAGE sql
  STABLE STRICT
 BEGIN ATOMIC
- SELECT (ss.a).n AS n
-    FROM ( SELECT information_schema._pg_expandarray(pg_index.indkey) AS a
-            FROM pg_index
-           WHERE (pg_index.indexrelid = $1)) ss
-   WHERE ((ss.a).x = $2);
+ SELECT ik.icol
+    FROM pg_index,
+     LATERAL unnest(pg_index.indkey) WITH ORDINALITY ik(tcol, icol)
+   WHERE ((pg_index.indexrelid = $1) AND (ik.tcol = $2));
 END
 \sf+ information_schema._pg_index_position
         CREATE OR REPLACE FUNCTION information_schema._pg_index_position(oid, smallint)
@@ -5315,12 +5314,11 @@ END
          LANGUAGE sql
          STABLE STRICT
 1       BEGIN ATOMIC
-2        SELECT (ss.a).n AS n
-3           FROM ( SELECT information_schema._pg_expandarray(pg_index.indkey) AS a
-4                   FROM pg_index
-5                  WHERE (pg_index.indexrelid = $1)) ss
-6          WHERE ((ss.a).x = $2);
-7       END
+2        SELECT ik.icol
+3           FROM pg_index,
+4            LATERAL unnest(pg_index.indkey) WITH ORDINALITY ik(tcol, icol)
+5          WHERE ((pg_index.indexrelid = $1) AND (ik.tcol = $2));
+6       END
 \sf+ interval_pl_time
         CREATE OR REPLACE FUNCTION pg_catalog.interval_pl_time(interval, time without time zone)
          RETURNS time without time zone
diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql
index 3b3c6f6e29..1db9389159 100644
--- a/src/test/regress/sql/psql.sql
+++ b/src/test/regress/sql/psql.sql
@@ -1287,7 +1287,7 @@ reset work_mem;
 \df has_database_privilege oid text
 \df has_database_privilege oid text -
 \dfa bit* small*
-\df *._pg_expandarray
+\df *._pg_index_position
 \do - pg_catalog.int4
 \do && anyarray *
 
-- 
2.39.2