Thread: Converting contrib SQL functions to new style

Converting contrib SQL functions to new style

From
Tom Lane
Date:
Attached are some draft patches to convert almost all of the
contrib modules' SQL functions to use SQL-standard function bodies.
The point of this is to remove the residual search_path security
hazards that we couldn't fix in commits 7eeb1d986 et al.  Since
a SQL-style function body is fully parsed at creation time,
its object references are not subject to capture by the run-time
search path.  Possibly there are small performance benefits too,
though I've not tried to measure that.

I've not touched the documentation yet.  I suppose that we can
tone down the warnings added by 7eeb1d986 quite a bit, maybe
replacing them with just "be sure to use version x.y or later".
However I think we may still need an assumption that earthdistance
and cube are in the same schema --- any comments on that?

I'd like to propose squeezing these changes into v14, even though
we're past feature freeze.  Reason one is that this is less a
new feature than a security fix; reason two is that this provides
some non-artificial test coverage for the SQL-function-body feature.

BTW, there still remain a couple of old-style SQL functions in
contrib/adminpack and contrib/lo.  AFAICS those are unconditionally
secure, so I didn't bother with them.

Thoughts?

            regards, tom lane

diff --git a/contrib/citext/Makefile b/contrib/citext/Makefile
index a7de52928d..d879f4eb0b 100644
--- a/contrib/citext/Makefile
+++ b/contrib/citext/Makefile
@@ -4,6 +4,7 @@ MODULES = citext

 EXTENSION = citext
 DATA = citext--1.4.sql \
+    citext--1.6--1.7.sql \
     citext--1.5--1.6.sql \
     citext--1.4--1.5.sql \
     citext--1.3--1.4.sql \
diff --git a/contrib/citext/citext--1.6--1.7.sql b/contrib/citext/citext--1.6--1.7.sql
new file mode 100644
index 0000000000..51a7edf2bc
--- /dev/null
+++ b/contrib/citext/citext--1.6--1.7.sql
@@ -0,0 +1,65 @@
+/* contrib/citext/citext--1.6--1.7.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION citext UPDATE TO '1.7'" to load this file. \quit
+
+--
+-- Matching citext in string comparison functions.
+-- XXX TODO Ideally these would be implemented in C.
+--
+
+CREATE OR REPLACE FUNCTION regexp_match( citext, citext ) RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_match( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
+
+CREATE OR REPLACE FUNCTION regexp_match( citext, citext, text ) RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_match( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0
THEN $3 || 'i' ELSE $3 END ); 
+
+CREATE OR REPLACE FUNCTION regexp_matches( citext, citext ) RETURNS SETOF TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE ROWS 1
+RETURN pg_catalog.regexp_matches( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
+
+CREATE OR REPLACE FUNCTION regexp_matches( citext, citext, text ) RETURNS SETOF TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE ROWS 10
+RETURN pg_catalog.regexp_matches( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0
THEN $3 || 'i' ELSE $3 END ); 
+
+CREATE OR REPLACE FUNCTION regexp_replace( citext, citext, text ) returns TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_replace( $1::pg_catalog.text, $2::pg_catalog.text, $3, 'i');
+
+CREATE OR REPLACE FUNCTION regexp_replace( citext, citext, text, text ) returns TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_replace( $1::pg_catalog.text, $2::pg_catalog.text, $3, CASE WHEN pg_catalog.strpos($4, 'c') =
0THEN  $4 || 'i' ELSE $4 END); 
+
+CREATE OR REPLACE FUNCTION regexp_split_to_array( citext, citext ) RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_split_to_array( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
+
+CREATE OR REPLACE FUNCTION regexp_split_to_array( citext, citext, text ) RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_split_to_array( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3,
'c')= 0 THEN  $3 || 'i' ELSE $3 END ); 
+
+CREATE OR REPLACE FUNCTION regexp_split_to_table( citext, citext ) RETURNS SETOF TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_split_to_table( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
+
+CREATE OR REPLACE FUNCTION regexp_split_to_table( citext, citext, text ) RETURNS SETOF TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_split_to_table( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3,
'c')= 0 THEN  $3 || 'i' ELSE $3 END ); 
+
+CREATE OR REPLACE FUNCTION strpos( citext, citext ) RETURNS INT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.strpos( pg_catalog.lower( $1::pg_catalog.text ), pg_catalog.lower( $2::pg_catalog.text ) );
+
+CREATE OR REPLACE FUNCTION replace( citext, citext, citext ) RETURNS TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_replace( $1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text,
'([^a-zA-Z_0-9])',E'\\\\\\1', 'g'), $3::pg_catalog.text, 'gi' ); 
+
+CREATE OR REPLACE FUNCTION split_part( citext, citext, int ) RETURNS TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN (pg_catalog.regexp_split_to_array( $1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text,
'([^a-zA-Z_0-9])',E'\\\\\\1', 'g'), 'i'))[$3]; 
+
+CREATE OR REPLACE FUNCTION translate( citext, citext, text ) RETURNS TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.translate( pg_catalog.translate( $1::pg_catalog.text, pg_catalog.lower($2::pg_catalog.text), $3),
pg_catalog.upper($2::pg_catalog.text),$3); 
diff --git a/contrib/citext/citext.control b/contrib/citext/citext.control
index ccf445475d..f82265b334 100644
--- a/contrib/citext/citext.control
+++ b/contrib/citext/citext.control
@@ -1,6 +1,6 @@
 # citext extension
 comment = 'data type for case-insensitive character strings'
-default_version = '1.6'
+default_version = '1.7'
 module_pathname = '$libdir/citext'
 relocatable = true
 trusted = true
diff --git a/contrib/earthdistance/Makefile b/contrib/earthdistance/Makefile
index f93b7a925a..0cf3fa379a 100644
--- a/contrib/earthdistance/Makefile
+++ b/contrib/earthdistance/Makefile
@@ -3,7 +3,8 @@
 MODULES = earthdistance

 EXTENSION = earthdistance
-DATA = earthdistance--1.1.sql earthdistance--1.0--1.1.sql
+DATA = earthdistance--1.1.sql earthdistance--1.0--1.1.sql \
+    earthdistance--1.1--1.2.sql
 PGFILEDESC = "earthdistance - calculate distances on the surface of the Earth"

 REGRESS = earthdistance
diff --git a/contrib/earthdistance/earthdistance--1.1--1.2.sql b/contrib/earthdistance/earthdistance--1.1--1.2.sql
new file mode 100644
index 0000000000..7c895e2204
--- /dev/null
+++ b/contrib/earthdistance/earthdistance--1.1--1.2.sql
@@ -0,0 +1,57 @@
+/* contrib/earthdistance/earthdistance--1.1--1.2.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION earthdistance UPDATE TO '1.2'" to load this file. \quit
+
+CREATE OR REPLACE FUNCTION earth() RETURNS float8
+LANGUAGE SQL IMMUTABLE PARALLEL SAFE
+RETURN '6378168'::float8;
+
+CREATE OR REPLACE FUNCTION sec_to_gc(float8)
+RETURNS float8
+LANGUAGE SQL
+IMMUTABLE STRICT
+PARALLEL SAFE
+RETURN CASE WHEN $1 < 0 THEN 0::float8 WHEN $1/(2*earth()) > 1 THEN pi()*earth() ELSE 2*earth()*asin($1/(2*earth()))
END;
+
+CREATE OR REPLACE FUNCTION gc_to_sec(float8)
+RETURNS float8
+LANGUAGE SQL
+IMMUTABLE STRICT
+PARALLEL SAFE
+RETURN CASE WHEN $1 < 0 THEN 0::float8 WHEN $1/earth() > pi() THEN 2*earth() ELSE 2*earth()*sin($1/(2*earth())) END;
+
+CREATE OR REPLACE FUNCTION ll_to_earth(float8, float8)
+RETURNS earth
+LANGUAGE SQL
+IMMUTABLE STRICT
+PARALLEL SAFE
+RETURN
cube(cube(cube(earth()*cos(radians($1))*cos(radians($2))),earth()*cos(radians($1))*sin(radians($2))),earth()*sin(radians($1)))::earth;
+
+CREATE OR REPLACE FUNCTION latitude(earth)
+RETURNS float8
+LANGUAGE SQL
+IMMUTABLE STRICT
+PARALLEL SAFE
+RETURN CASE WHEN cube_ll_coord($1, 3)/earth() < -1 THEN -90::float8 WHEN cube_ll_coord($1, 3)/earth() > 1 THEN
90::float8ELSE degrees(asin(cube_ll_coord($1, 3)/earth())) END; 
+
+CREATE OR REPLACE FUNCTION longitude(earth)
+RETURNS float8
+LANGUAGE SQL
+IMMUTABLE STRICT
+PARALLEL SAFE
+RETURN degrees(atan2(cube_ll_coord($1, 2), cube_ll_coord($1, 1)));
+
+CREATE OR REPLACE FUNCTION earth_distance(earth, earth)
+RETURNS float8
+LANGUAGE SQL
+IMMUTABLE STRICT
+PARALLEL SAFE
+RETURN sec_to_gc(cube_distance($1, $2));
+
+CREATE OR REPLACE FUNCTION earth_box(earth, float8)
+RETURNS cube
+LANGUAGE SQL
+IMMUTABLE STRICT
+PARALLEL SAFE
+RETURN cube_enlarge($1, gc_to_sec($2), 3);
diff --git a/contrib/earthdistance/earthdistance.control b/contrib/earthdistance/earthdistance.control
index 5816d22cdd..de2465d487 100644
--- a/contrib/earthdistance/earthdistance.control
+++ b/contrib/earthdistance/earthdistance.control
@@ -1,6 +1,6 @@
 # earthdistance extension
 comment = 'calculate great-circle distances on the surface of the Earth'
-default_version = '1.1'
+default_version = '1.2'
 module_pathname = '$libdir/earthdistance'
 relocatable = true
 requires = 'cube'
diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql
index be89a64ca1..4077f9ecee 100644
--- a/contrib/pageinspect/pageinspect--1.8--1.9.sql
+++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql
@@ -135,3 +135,73 @@ CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass,
 RETURNS SETOF record
 AS 'MODULE_PATHNAME', 'brin_page_items'
 LANGUAGE C STRICT PARALLEL SAFE;
+
+-- Convert SQL functions to new style
+
+CREATE OR REPLACE FUNCTION heap_page_item_attrs(
+    IN page bytea,
+    IN rel_oid regclass,
+    IN do_detoast bool,
+    OUT lp smallint,
+    OUT lp_off smallint,
+    OUT lp_flags smallint,
+    OUT lp_len smallint,
+    OUT t_xmin xid,
+    OUT t_xmax xid,
+    OUT t_field3 int4,
+    OUT t_ctid tid,
+    OUT t_infomask2 integer,
+    OUT t_infomask integer,
+    OUT t_hoff smallint,
+    OUT t_bits text,
+    OUT t_oid oid,
+    OUT t_attrs bytea[]
+    )
+RETURNS SETOF record
+LANGUAGE SQL PARALLEL SAFE
+BEGIN ATOMIC
+SELECT lp,
+       lp_off,
+       lp_flags,
+       lp_len,
+       t_xmin,
+       t_xmax,
+       t_field3,
+       t_ctid,
+       t_infomask2,
+       t_infomask,
+       t_hoff,
+       t_bits,
+       t_oid,
+       tuple_data_split(
+         rel_oid,
+         t_data,
+         t_infomask,
+         t_infomask2,
+         t_bits,
+         do_detoast)
+         AS t_attrs
+  FROM heap_page_items(page);
+END;
+
+CREATE OR REPLACE FUNCTION heap_page_item_attrs(IN page bytea, IN rel_oid regclass,
+    OUT lp smallint,
+    OUT lp_off smallint,
+    OUT lp_flags smallint,
+    OUT lp_len smallint,
+    OUT t_xmin xid,
+    OUT t_xmax xid,
+    OUT t_field3 int4,
+    OUT t_ctid tid,
+    OUT t_infomask2 integer,
+    OUT t_infomask integer,
+    OUT t_hoff smallint,
+    OUT t_bits text,
+    OUT t_oid oid,
+    OUT t_attrs bytea[]
+    )
+RETURNS SETOF record
+LANGUAGE SQL PARALLEL SAFE
+BEGIN ATOMIC
+SELECT * FROM heap_page_item_attrs(page, rel_oid, false);
+END;
diff --git a/contrib/pg_freespacemap/Makefile b/contrib/pg_freespacemap/Makefile
index da40b80c7c..e09fa7ccbe 100644
--- a/contrib/pg_freespacemap/Makefile
+++ b/contrib/pg_freespacemap/Makefile
@@ -6,7 +6,9 @@ OBJS = \
     pg_freespacemap.o

 EXTENSION = pg_freespacemap
-DATA = pg_freespacemap--1.1.sql pg_freespacemap--1.1--1.2.sql \
+DATA = pg_freespacemap--1.1.sql \
+    pg_freespacemap--1.2--1.3.sql \
+    pg_freespacemap--1.1--1.2.sql \
     pg_freespacemap--1.0--1.1.sql
 PGFILEDESC = "pg_freespacemap - monitoring of free space map"

diff --git a/contrib/pg_freespacemap/pg_freespacemap--1.2--1.3.sql
b/contrib/pg_freespacemap/pg_freespacemap--1.2--1.3.sql
new file mode 100644
index 0000000000..7f92c9e92e
--- /dev/null
+++ b/contrib/pg_freespacemap/pg_freespacemap--1.2--1.3.sql
@@ -0,0 +1,13 @@
+/* contrib/pg_freespacemap/pg_freespacemap--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_freespacemap UPDATE TO '1.3'" to load this file. \quit
+
+CREATE OR REPLACE FUNCTION
+  pg_freespace(rel regclass, blkno OUT bigint, avail OUT int2)
+RETURNS SETOF RECORD
+LANGUAGE SQL PARALLEL SAFE
+BEGIN ATOMIC
+  SELECT blkno, pg_freespace($1, blkno) AS avail
+  FROM generate_series(0, pg_relation_size($1) / current_setting('block_size')::bigint - 1) AS blkno;
+END;
diff --git a/contrib/pg_freespacemap/pg_freespacemap.control b/contrib/pg_freespacemap/pg_freespacemap.control
index ac8fc5050a..1992320691 100644
--- a/contrib/pg_freespacemap/pg_freespacemap.control
+++ b/contrib/pg_freespacemap/pg_freespacemap.control
@@ -1,5 +1,5 @@
 # pg_freespacemap extension
 comment = 'examine the free space map (FSM)'
-default_version = '1.2'
+default_version = '1.3'
 module_pathname = '$libdir/pg_freespacemap'
 relocatable = true
diff --git a/contrib/xml2/Makefile b/contrib/xml2/Makefile
index 0d703fe0e8..8597e9aa9c 100644
--- a/contrib/xml2/Makefile
+++ b/contrib/xml2/Makefile
@@ -7,7 +7,9 @@ OBJS = \
     xslt_proc.o
 
 EXTENSION = xml2
-DATA = xml2--1.1.sql xml2--1.0--1.1.sql
+DATA = xml2--1.1.sql \
+    xml2--1.1--1.2.sql \
+    xml2--1.0--1.1.sql
 PGFILEDESC = "xml2 - XPath querying and XSLT"
 
 REGRESS = xml2
diff --git a/contrib/xml2/xml2--1.1--1.2.sql b/contrib/xml2/xml2--1.1--1.2.sql
new file mode 100644
index 0000000000..12d00064e8
--- /dev/null
+++ b/contrib/xml2/xml2--1.1--1.2.sql
@@ -0,0 +1,18 @@
+/* contrib/xml2/xml2--1.1--1.2.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION xml2 UPDATE TO '1.2'" to load this file. \quit
+
+CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text
+LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE
+RETURN xpath_list($1,$2,',');
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text)
+RETURNS text
+LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE
+RETURN xpath_nodeset($1,$2,'','');
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text)
+RETURNS text
+LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE
+RETURN xpath_nodeset($1,$2,'',$3);
diff --git a/contrib/xml2/xml2.control b/contrib/xml2/xml2.control
index ba2c0599a3..b32156c949 100644
--- a/contrib/xml2/xml2.control
+++ b/contrib/xml2/xml2.control
@@ -1,6 +1,6 @@
 # xml2 extension
 comment = 'XPath querying and XSLT'
-default_version = '1.1'
+default_version = '1.2'
 module_pathname = '$libdir/pgxml'
 # XXX do we still need this to be non-relocatable?
 relocatable = false

Re: Converting contrib SQL functions to new style

From
Noah Misch
Date:
On Tue, Apr 13, 2021 at 06:26:34PM -0400, Tom Lane wrote:
> Attached are some draft patches to convert almost all of the
> contrib modules' SQL functions to use SQL-standard function bodies.
> The point of this is to remove the residual search_path security
> hazards that we couldn't fix in commits 7eeb1d986 et al.  Since
> a SQL-style function body is fully parsed at creation time,
> its object references are not subject to capture by the run-time
> search path.

Are there any inexact matches in those function/operator calls?  Will that
matter more or less than it does today?

> However I think we may still need an assumption that earthdistance
> and cube are in the same schema --- any comments on that?

That part doesn't change, indeed.

> I'd like to propose squeezing these changes into v14, even though
> we're past feature freeze.  Reason one is that this is less a
> new feature than a security fix; reason two is that this provides
> some non-artificial test coverage for the SQL-function-body feature.

Dogfooding like this is good.  What about the SQL-language functions that
initdb creates?



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Noah Misch <noah@leadboat.com> writes:
> On Tue, Apr 13, 2021 at 06:26:34PM -0400, Tom Lane wrote:
>> Attached are some draft patches to convert almost all of the
>> contrib modules' SQL functions to use SQL-standard function bodies.
>> The point of this is to remove the residual search_path security
>> hazards that we couldn't fix in commits 7eeb1d986 et al.  Since
>> a SQL-style function body is fully parsed at creation time,
>> its object references are not subject to capture by the run-time
>> search path.

> Are there any inexact matches in those function/operator calls?  Will that
> matter more or less than it does today?

I can't claim to have looked closely for inexact matches.  It should
matter less than today, since there's a hazard only during creation
(with a somewhat-controlled search path) and not during use.  But
that doesn't automatically eliminate the issue.

>> I'd like to propose squeezing these changes into v14, even though
>> we're past feature freeze.  Reason one is that this is less a
>> new feature than a security fix; reason two is that this provides
>> some non-artificial test coverage for the SQL-function-body feature.

> Dogfooding like this is good.  What about the SQL-language functions that
> initdb creates?

Hadn't thought about those, but converting them seems like a good idea.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Noah Misch
Date:
On Tue, Apr 13, 2021 at 11:11:13PM -0400, Tom Lane wrote:
> Noah Misch <noah@leadboat.com> writes:
> > On Tue, Apr 13, 2021 at 06:26:34PM -0400, Tom Lane wrote:
> >> Attached are some draft patches to convert almost all of the
> >> contrib modules' SQL functions to use SQL-standard function bodies.
> >> The point of this is to remove the residual search_path security
> >> hazards that we couldn't fix in commits 7eeb1d986 et al.  Since
> >> a SQL-style function body is fully parsed at creation time,
> >> its object references are not subject to capture by the run-time
> >> search path.
> 
> > Are there any inexact matches in those function/operator calls?  Will that
> > matter more or less than it does today?
> 
> I can't claim to have looked closely for inexact matches.  It should
> matter less than today, since there's a hazard only during creation
> (with a somewhat-controlled search path) and not during use.  But
> that doesn't automatically eliminate the issue.

Once CREATE EXTENSION is over, things are a great deal safer under this
proposal, as you say.  I suspect it makes CREATE EXTENSION more hazardous.
Today, typical SQL commands in extension creation scripts don't activate
inexact argument type matching.  You were careful to make each script clear
the search_path around commands deviating from that (commit 7eeb1d9).  I think
"CREATE FUNCTION plus1dot1(int) RETURNS numeric LANGUAGE SQL RETURN $1 + 1.1;"
in a trusted extension script would constitute a security vulnerability, since
it can lock in the wrong operator.



Re: Converting contrib SQL functions to new style

From
Robert Haas
Date:
On Wed, Apr 14, 2021 at 8:58 AM Noah Misch <noah@leadboat.com> wrote:
> Once CREATE EXTENSION is over, things are a great deal safer under this
> proposal, as you say.  I suspect it makes CREATE EXTENSION more hazardous.
> Today, typical SQL commands in extension creation scripts don't activate
> inexact argument type matching.  You were careful to make each script clear
> the search_path around commands deviating from that (commit 7eeb1d9).  I think
> "CREATE FUNCTION plus1dot1(int) RETURNS numeric LANGUAGE SQL RETURN $1 + 1.1;"
> in a trusted extension script would constitute a security vulnerability, since
> it can lock in the wrong operator.

I don't understand how that can happen, unless we've failed to secure
the search_path. And, if we've failed to secure the search_path, I
think we are in a lot of trouble no matter what else we do.

-- 
Robert Haas
EDB: http://www.enterprisedb.com



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Robert Haas <robertmhaas@gmail.com> writes:
> On Wed, Apr 14, 2021 at 8:58 AM Noah Misch <noah@leadboat.com> wrote:
>> Once CREATE EXTENSION is over, things are a great deal safer under this
>> proposal, as you say.  I suspect it makes CREATE EXTENSION more hazardous.
>> Today, typical SQL commands in extension creation scripts don't activate
>> inexact argument type matching.  You were careful to make each script clear
>> the search_path around commands deviating from that (commit 7eeb1d9).  I think
>> "CREATE FUNCTION plus1dot1(int) RETURNS numeric LANGUAGE SQL RETURN $1 + 1.1;"
>> in a trusted extension script would constitute a security vulnerability, since
>> it can lock in the wrong operator.

> I don't understand how that can happen, unless we've failed to secure
> the search_path. And, if we've failed to secure the search_path, I
> think we are in a lot of trouble no matter what else we do.

The situation of interest is where you are trying to install an extension
into a schema that also contains malicious objects.  We've managed to make
most of the commands you might use in an extension script secure against
that situation, and Noah wants to hold SQL-function creation to that same
standard.

My concern in this patch is rendering SQL functions safe against untrusted
search_path at *time of use*, which is really an independent security
concern.

If you're willing to assume there's nothing untrustworthy in your
search_path, then there's no issue and nothing to fix.  Unfortunately,
that seems like a rather head-in-the-sand standpoint.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Mark Dilger
Date:

> On Apr 13, 2021, at 3:26 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>
> However I think we may still need an assumption that earthdistance
> and cube are in the same schema --- any comments on that?

This is probably not worth doing, and we are already past feature freeze, but adding syntax to look up the namespace of
anextension might help.  The problem seems to be that we can't syntactically refer to the schema of an extension.  We
haveto instead query pg_catalog.pg_extension joined against pg_catalog.pg_namespace and then interpolate the namespace
nameinto strings that get executed, which is ugly. 

This syntax is perhaps a non-starter, but conceptually something like:

-CREATE DOMAIN earth AS cube
+CREATE DOMAIN earthdistance::->earth AS cube::->cube

Then we'd perhaps extend RangeVar with an extensionname field and have either a schemaname or an extensionname be
lookedup in places where we currently lookup schemas, adding a catcache for extensions.  (Like I said, probably not
worthdoing.) 


We could get something like this working just inside the CREATE EXTENSION command if we expanded on the @extschema@
ideaa bit.  At first I thought this idea would suffer race conditions with concurrent modifications of pg_extension or
pg_namespace,but it looks like we already have a snapshot when processing the script file, so: 

-CREATE DOMAIN earth AS cube
+CREATE DOMAIN @@earthdistance@@::earth AS @@cube@@::cube

or such, with @@foo@@ being parsed out, looked up in pg_extension join pg_namespace, and substituted back in.

—
Mark Dilger
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company






Re: Converting contrib SQL functions to new style

From
Robert Haas
Date:
On Wed, Apr 14, 2021 at 10:49 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
> The situation of interest is where you are trying to install an extension
> into a schema that also contains malicious objects.  We've managed to make
> most of the commands you might use in an extension script secure against
> that situation, and Noah wants to hold SQL-function creation to that same
> standard.

Oh, I was forgetting that the creation schema has to be first in your
search path. :-(

Does the idea of allowing the creation schema to be set separately
have any legs? Because it seems like that would help here.

-- 
Robert Haas
EDB: http://www.enterprisedb.com



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Mark Dilger <mark.dilger@enterprisedb.com> writes:
>> On Apr 13, 2021, at 3:26 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> However I think we may still need an assumption that earthdistance
>> and cube are in the same schema --- any comments on that?

> This is probably not worth doing, and we are already past feature
> freeze, but adding syntax to look up the namespace of an extension might
> help.

Yeah, that idea was discussed before (perhaps only in private
security-team threads, though).  We didn't do anything about it because
at the time there didn't seem to be pressing need, but in the context
of SQL function bodies there's an obvious use-case.

> We could get something like this working just inside the CREATE EXTENSION command if we expanded on the @extschema@
ideaa bit.  At first I thought this idea would suffer race conditions with concurrent modifications of pg_extension or
pg_namespace,but it looks like we already have a snapshot when processing the script file, so: 

> -CREATE DOMAIN earth AS cube
> +CREATE DOMAIN @@earthdistance@@::earth AS @@cube@@::cube

Right, extending the @extschema@ mechanism is what was discussed,
though I think I'd lean towards something like @extschema:cube@
to denote the schema of a referenced extension "cube".

I'm not sure this is useful enough to break feature freeze for,
but I'm +1 for investigating it for v15.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Robert Haas <robertmhaas@gmail.com> writes:
> On Wed, Apr 14, 2021 at 10:49 AM Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> The situation of interest is where you are trying to install an extension
>> into a schema that also contains malicious objects.  We've managed to make
>> most of the commands you might use in an extension script secure against
>> that situation, and Noah wants to hold SQL-function creation to that same
>> standard.

> Oh, I was forgetting that the creation schema has to be first in your
> search path. :-(

> Does the idea of allowing the creation schema to be set separately
> have any legs? Because it seems like that would help here.

Doesn't help that much, because you still have to reference objects
already created by your own extension, so it's hard to see how the
target schema won't need to be in the path.

[ thinks for awhile ... ]

Could we hack things so that extension scripts are only allowed to
reference objects created (a) by the system, (b) earlier in the
same script, or (c) owned by one of the declared prerequisite
extensions?  Seems like that might provide a pretty bulletproof
defense against trojan-horse objects, though I'm not sure how much
of a pain it'd be to implement.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Robert Haas
Date:
On Wed, Apr 14, 2021 at 1:41 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:
> Doesn't help that much, because you still have to reference objects
> already created by your own extension, so it's hard to see how the
> target schema won't need to be in the path.

Oh, woops.

> Could we hack things so that extension scripts are only allowed to
> reference objects created (a) by the system, (b) earlier in the
> same script, or (c) owned by one of the declared prerequisite
> extensions?  Seems like that might provide a pretty bulletproof
> defense against trojan-horse objects, though I'm not sure how much
> of a pain it'd be to implement.

That doesn't seem like a crazy idea, but the previous idea of having
some magic syntax that means "the schema where extension FOO is" seems
like it might be easier to implement and more generally useful. If we
taught the core system that %!!**&^%?(earthdistance) means "the schema
where the earthdistance is located" that syntax might get some use
even outside of extension creation scripts, which seems like it could
be a good thing, just because code that is used more widely is more
likely to have been debugged to the point where it actually works.

-- 
Robert Haas
EDB: http://www.enterprisedb.com



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Robert Haas <robertmhaas@gmail.com> writes:
> On Wed, Apr 14, 2021 at 1:41 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> Could we hack things so that extension scripts are only allowed to
>> reference objects created (a) by the system, (b) earlier in the
>> same script, or (c) owned by one of the declared prerequisite
>> extensions?  Seems like that might provide a pretty bulletproof
>> defense against trojan-horse objects, though I'm not sure how much
>> of a pain it'd be to implement.

> That doesn't seem like a crazy idea, but the previous idea of having
> some magic syntax that means "the schema where extension FOO is" seems
> like it might be easier to implement and more generally useful.

I think that's definitely useful, but it's not a fix for the
reference-capture problem unless you care to assume that the other
extension's schema is free of trojan-horse objects.  So I'm thinking
that we really ought to pursue both ideas.

This may mean that squeezing these contrib changes into v14 is a lost
cause.  We certainly shouldn't try to do what I suggest above for
v14; but without it, these changes are just moving the security
issue to a different place rather than eradicating it completely.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Andrew Dunstan
Date:
On 4/14/21 2:03 PM, Tom Lane wrote:
> Robert Haas <robertmhaas@gmail.com> writes:
>> On Wed, Apr 14, 2021 at 1:41 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:
>>> Could we hack things so that extension scripts are only allowed to
>>> reference objects created (a) by the system, (b) earlier in the
>>> same script, or (c) owned by one of the declared prerequisite
>>> extensions?  Seems like that might provide a pretty bulletproof
>>> defense against trojan-horse objects, though I'm not sure how much
>>> of a pain it'd be to implement.
>> That doesn't seem like a crazy idea, but the previous idea of having
>> some magic syntax that means "the schema where extension FOO is" seems
>> like it might be easier to implement and more generally useful.
> I think that's definitely useful, but it's not a fix for the
> reference-capture problem unless you care to assume that the other
> extension's schema is free of trojan-horse objects.  So I'm thinking
> that we really ought to pursue both ideas.
>
> This may mean that squeezing these contrib changes into v14 is a lost
> cause.  We certainly shouldn't try to do what I suggest above for
> v14; but without it, these changes are just moving the security
> issue to a different place rather than eradicating it completely.
>
>             



Is there anything else we should be doing along the eat your own dogfood
line that don't have these security implications?


cheers


andrew


-- 

Andrew Dunstan
EDB: https://www.enterprisedb.com




Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Andrew Dunstan <andrew@dunslane.net> writes:
> On 4/14/21 2:03 PM, Tom Lane wrote:
>> This may mean that squeezing these contrib changes into v14 is a lost
>> cause.  We certainly shouldn't try to do what I suggest above for
>> v14; but without it, these changes are just moving the security
>> issue to a different place rather than eradicating it completely.

> Is there anything else we should be doing along the eat your own dogfood
> line that don't have these security implications?

We can still convert the initdb-created SQL functions to new style,
since there's no security threat during initdb.  I'll make a patch
for that soon.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Vik Fearing
Date:
On 4/14/21 7:36 PM, Tom Lane wrote:
> Mark Dilger <mark.dilger@enterprisedb.com> writes:
>>> On Apr 13, 2021, at 3:26 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>>> However I think we may still need an assumption that earthdistance
>>> and cube are in the same schema --- any comments on that?
> 
>> This is probably not worth doing, and we are already past feature
>> freeze, but adding syntax to look up the namespace of an extension might
>> help.
> 
> Yeah, that idea was discussed before (perhaps only in private
> security-team threads, though).  We didn't do anything about it because
> at the time there didn't seem to be pressing need, but in the context
> of SQL function bodies there's an obvious use-case.
> 
>> We could get something like this working just inside the CREATE EXTENSION command if we expanded on the @extschema@
ideaa bit.  At first I thought this idea would suffer race conditions with concurrent modifications of pg_extension or
pg_namespace,but it looks like we already have a snapshot when processing the script file, so:
 
> 
>> -CREATE DOMAIN earth AS cube
>> +CREATE DOMAIN @@earthdistance@@::earth AS @@cube@@::cube
> 
> Right, extending the @extschema@ mechanism is what was discussed,
> though I think I'd lean towards something like @extschema:cube@
> to denote the schema of a referenced extension "cube".
> 
> I'm not sure this is useful enough to break feature freeze for,
> but I'm +1 for investigating it for v15.
Just like we have a pseudo "$user" schema, could we have a pseudo
"$extension" catalog?  That should avoid changing grammar rules too much.

CREATE TABLE unaccented_words (
    word "$extension".citext.citext,
    CHECK (word = "$extension".unaccent.unaccent(word)
);

-- 
Vik Fearing



Re: Converting contrib SQL functions to new style

From
Mark Dilger
Date:

> On Apr 14, 2021, at 2:47 PM, Vik Fearing <vik@postgresfriends.org> wrote:
>
> On 4/14/21 7:36 PM, Tom Lane wrote:
>> Mark Dilger <mark.dilger@enterprisedb.com> writes:
>>>> On Apr 13, 2021, at 3:26 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>>>> However I think we may still need an assumption that earthdistance
>>>> and cube are in the same schema --- any comments on that?
>>
>>> This is probably not worth doing, and we are already past feature
>>> freeze, but adding syntax to look up the namespace of an extension might
>>> help.
>>
>> Yeah, that idea was discussed before (perhaps only in private
>> security-team threads, though).  We didn't do anything about it because
>> at the time there didn't seem to be pressing need, but in the context
>> of SQL function bodies there's an obvious use-case.
>>
>>> We could get something like this working just inside the CREATE EXTENSION command if we expanded on the @extschema@
ideaa bit.  At first I thought this idea would suffer race conditions with concurrent modifications of pg_extension or
pg_namespace,but it looks like we already have a snapshot when processing the script file, so: 
>>
>>> -CREATE DOMAIN earth AS cube
>>> +CREATE DOMAIN @@earthdistance@@::earth AS @@cube@@::cube
>>
>> Right, extending the @extschema@ mechanism is what was discussed,
>> though I think I'd lean towards something like @extschema:cube@
>> to denote the schema of a referenced extension "cube".
>>
>> I'm not sure this is useful enough to break feature freeze for,
>> but I'm +1 for investigating it for v15.
> Just like we have a pseudo "$user" schema, could we have a pseudo
> "$extension" catalog?  That should avoid changing grammar rules too much.
>
> CREATE TABLE unaccented_words (
>    word "$extension".citext.citext,
>    CHECK (word = "$extension".unaccent.unaccent(word)
> );

Having a single variable $extension might help in many cases, but I don't see how to use it to handle the remaining
cross-extensionreferences, such as earthdistance needing to reference cube. 

—
Mark Dilger
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company






Re: Converting contrib SQL functions to new style

From
Vik Fearing
Date:
On 4/15/21 12:18 AM, Mark Dilger wrote:
> 
> 
>> On Apr 14, 2021, at 2:47 PM, Vik Fearing <vik@postgresfriends.org> wrote:
>>
>> On 4/14/21 7:36 PM, Tom Lane wrote:
>>> Mark Dilger <mark.dilger@enterprisedb.com> writes:
>>>>> On Apr 13, 2021, at 3:26 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>>>>> However I think we may still need an assumption that earthdistance
>>>>> and cube are in the same schema --- any comments on that?
>>>
>>>> This is probably not worth doing, and we are already past feature
>>>> freeze, but adding syntax to look up the namespace of an extension might
>>>> help.
>>>
>>> Yeah, that idea was discussed before (perhaps only in private
>>> security-team threads, though).  We didn't do anything about it because
>>> at the time there didn't seem to be pressing need, but in the context
>>> of SQL function bodies there's an obvious use-case.
>>>
>>>> We could get something like this working just inside the CREATE EXTENSION command if we expanded on the
@extschema@idea a bit.  At first I thought this idea would suffer race conditions with concurrent modifications of
pg_extensionor pg_namespace, but it looks like we already have a snapshot when processing the script file, so:
 
>>>
>>>> -CREATE DOMAIN earth AS cube
>>>> +CREATE DOMAIN @@earthdistance@@::earth AS @@cube@@::cube
>>>
>>> Right, extending the @extschema@ mechanism is what was discussed,
>>> though I think I'd lean towards something like @extschema:cube@
>>> to denote the schema of a referenced extension "cube".
>>>
>>> I'm not sure this is useful enough to break feature freeze for,
>>> but I'm +1 for investigating it for v15.
>> Just like we have a pseudo "$user" schema, could we have a pseudo
>> "$extension" catalog?  That should avoid changing grammar rules too much.
>>
>> CREATE TABLE unaccented_words (
>>    word "$extension".citext.citext,
>>    CHECK (word = "$extension".unaccent.unaccent(word)
>> );
> 
> Having a single variable $extension might help in many cases, but I don't see how to use it to handle the remaining
cross-extensionreferences, such as earthdistance needing to reference cube.
 


Sorry, I hadn't realized that was a real example so I made up my own.

Basically my idea is to use the fully qualified catalog.schema.object
syntax where the catalog is a special "$extension" value (meaning we
would have to forbid that as an actual database name) and the schema is
the name of the extension whose schema we want.  The object is then just
the object.


CREATE DOMAIN earth AS "$extension".cube.cube
  CONSTRAINT not_point check("$extension".cube.cube_is_point(value))
  CONSTRAINT not_3d check("$extension".cube.cube_dim(value <= 3)
  ...;


CREATE FUNCTION earth_box(earth, float8)
 RETURNS "$extension".cube.cube
 LANGUAGE sql
 IMMUTABLE PARALLEL SAFE STRICT
RETURN "$extension".cube.cube_enlarge($1, gc_to_sec($2), 3);


If I had my druthers, we would spell it pg_extension instead of
"$extension" because I hate double-quoting identifiers, but that's just
bikeshedding and has little to do with the concept itself.
-- 
Vik Fearing



Re: Converting contrib SQL functions to new style

From
Alvaro Herrera
Date:
On 2021-Apr-15, Vik Fearing wrote:

> CREATE DOMAIN earth AS "$extension".cube.cube
>   CONSTRAINT not_point check("$extension".cube.cube_is_point(value))
>   CONSTRAINT not_3d check("$extension".cube.cube_dim(value <= 3)
>   ...;

I find this syntax pretty weird -- here, the ".cube." part of the
identifier is acting as an argument of sorts for the preceding
$extension thingy.  This looks very surprising.

Something similar to OPERATOR() syntax may be more palatable:

 CREATE DOMAIN earth AS PG_EXTENSION_SCHEMA(cube).cube
   CONSTRAINT not_point check(PG_EXTENSION_SCHEMA(cube).cube_is_point(value))
   CONSTRAINT not_3d check(PG_EXTENSION_SCHEMA(cube).cube_dim(value <= 3)
   ...;

Here, the PG_EXTENSION_SCHEMA() construct expands into the schema of the
given extension.  This looks more natural to me, since the extension
that acts as argument to PG_EXTENSION_SCHEMA() does look like an
argument.

I don't know if the parser would like this, though.

-- 
Álvaro Herrera       Valdivia, Chile



Re: Converting contrib SQL functions to new style

From
Noah Misch
Date:
On Wed, Apr 14, 2021 at 02:03:56PM -0400, Tom Lane wrote:
> Robert Haas <robertmhaas@gmail.com> writes:
> > On Wed, Apr 14, 2021 at 1:41 PM Tom Lane <tgl@sss.pgh.pa.us> wrote:
> >> Could we hack things so that extension scripts are only allowed to
> >> reference objects created (a) by the system, (b) earlier in the
> >> same script, or (c) owned by one of the declared prerequisite
> >> extensions?  Seems like that might provide a pretty bulletproof
> >> defense against trojan-horse objects, though I'm not sure how much
> >> of a pain it'd be to implement.

Good idea.

> > That doesn't seem like a crazy idea, but the previous idea of having
> > some magic syntax that means "the schema where extension FOO is" seems
> > like it might be easier to implement and more generally useful.
> 
> I think that's definitely useful, but it's not a fix for the
> reference-capture problem unless you care to assume that the other
> extension's schema is free of trojan-horse objects.

I could see using that, perhaps in a non-SQL-language function.  I agree it
solves different problems.



Re: Converting contrib SQL functions to new style

From
Peter Eisentraut
Date:
On 14.04.21 00:26, Tom Lane wrote:
> Attached are some draft patches to convert almost all of the
> contrib modules' SQL functions to use SQL-standard function bodies.

This first patch is still the patch of record in CF 2021-09, but from 
the subsequent discussion, it seems more work is being contemplated.



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Peter Eisentraut <peter.eisentraut@enterprisedb.com> writes:
> On 14.04.21 00:26, Tom Lane wrote:
>> Attached are some draft patches to convert almost all of the
>> contrib modules' SQL functions to use SQL-standard function bodies.

> This first patch is still the patch of record in CF 2021-09, but from 
> the subsequent discussion, it seems more work is being contemplated.

Yeah, it looks like we already did the unconditionally-safe part
(i.e. making initdb-created SQL functions use new style, cf 767982e36).

The rest of this is stuck pending investigation of the ideas about
making new-style function creation safer when the creation-time path
isn't secure, so I suppose we should mark it RWF rather than leaving
it in the queue.  Will go do that.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Ronan Dunklau <ronan.dunklau@aiven.io> writes:
> Le mercredi 1 septembre 2021, 19:27:35 heure normale d’Europe centrale Tom
> Lane a écrit :
>> The rest of this is stuck pending investigation of the ideas about
>> making new-style function creation safer when the creation-time path
>> isn't secure, so I suppose we should mark it RWF rather than leaving
>> it in the queue.  Will go do that.

> Sorry to revive such an old thread but ... since the introduction of the
> @extschema:name@ syntax in 72a5b1fc880481914da2d4233077438dd87840ca we can now
> proceed with this or am I missing something ?

Yeah, seems like we could make it work with that.

> I've updated the previous patches to convert them to the new-style, added one
> for lo as well.

The cfbot says many of these fail regression tests --- lots of

 CREATE EXTENSION citext SCHEMA s;
+ERROR:  extension "citext" has no installation script nor update path for version "1.8"

and such.  I didn't poke into why, but maybe you missed "git add'ing"
some changes?

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Michael Paquier <michael@paquier.xyz> writes:
> I was wondering what was going on here, and this patch comes down to
> switching all these definitions from that:
> CREATE FUNCTION lo_oid(lo) RETURNS pg_catalog.oid AS
> 'SELECT $1::pg_catalog.oid' LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE;

> To that:
> +CREATE OR REPLACE FUNCTION lo_oid(lo) RETURNS pg_catalog.oid
> +LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE
> +RETURN (SELECT $1::pg_catalog.oid);

Right.

> This makes the executions more robust run-time search_path checks.  Is
> that something that should be considered for a backpatch, actually?

No, I don't think so.  For one thing, it would not help existing
installations unless they issue "ALTER EXTENSION UPDATE", which
people are not likely to do in a minor update.  But also, we don't
know of live attacks against these functions with their current
definitions, so I don't think this is urgent.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Ronan Dunklau
Date:
Le mercredi 6 novembre 2024, 06:52:16 heure normale d’Europe centrale Michael
Paquier a écrit :
> On Tue, Nov 05, 2024 at 08:05:16PM -0500, Tom Lane wrote:
> > No, I don't think so.  For one thing, it would not help existing
> > installations unless they issue "ALTER EXTENSION UPDATE", which
> > people are not likely to do in a minor update.  But also, we don't
> > know of live attacks against these functions with their current
> > definitions, so I don't think this is urgent.
>
> OK, thanks.

For most of them I agree, but one side effect of the current implementation is
that we have a bug when pg_upgrad'ing if earthdistance is installed:
https://www.postgresql.org/message-id/flat/
152106914669.1223.5104148605998271987%40wrigleys.postgresql.org

The new version of earthdistance fixes that problem, so this one would be worth
a back patch to 16, giving users the opportunity to update earthdistance
before running pg_upgrade.

Thanks for the tip about the CI, will make sure to have it setup before next
time.

--
Ronan Dunklau






Re: Converting contrib SQL functions to new style

From
Peter Eisentraut
Date:
On 13.11.24 09:15, Ronan Dunklau wrote:
> Le mardi 12 novembre 2024, 09:30:30 heure normale d’Europe centrale Michael
> Paquier a écrit :
>> On Thu, Nov 07, 2024 at 10:06:37AM +0900, Michael Paquier wrote:
>>> Good point.  Checking all these contrib updates one-by-one is an ant's
>>> work, but I'll see if I can get at least some of them done on HEAD.
>>
>> I've begun looking at that a bit, and there are a couple of things
>> that we could do better with xml2 in 0005 at least in the context of
>> this patch: xpath_nodeset() and xpath_list() don't have any test
>> coverage.  That's not an issue directly related to this patch, but
>> perhaps we should add something for the functions that we are
>> manipulating after this upgrade path at least?  That's one way to
>> automatically make sure that these changes work the same way as the
>> original.
>>
>> The same argument comes up with lo_oid() in 0006.
> 
> Ok, please find attached a new complete patch series including tests for the
> uncovered functions. Tests pass both before and after the move to SQL-body
> functions.

By the way, if we're going to touch all these extension script files to 
make them more modern SQL-like, we could also use named parameters more. 
  For example,

+CREATE OR REPLACE FUNCTION regexp_match(string citext, pattern citext) 
RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_match( $1::pg_catalog.text, 
$2::pg_catalog.text, 'i' );

could be

+CREATE OR REPLACE FUNCTION regexp_match(string citext, pattern citext) 
RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_match( string::pg_catalog.text, 
pattern::pg_catalog.text, 'i' );

etc.




Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Michael Paquier <michael@paquier.xyz> writes:
> Doing that step-by-step is better than nothing, hence limiting the use
> of named parameters for only the functions whose body is rewritten is
> fine by me, as a first step, as long as the names are used rather the
> dollar parameter numbers.  I'd suggest to do take the bonus step of
> applying the same rule to all the other functions so as everything
> applies with the same extension update in a single major release.
> Perhaps on top of the patches already proposed?  There is no need for
> an extra version bump if all that is done in the same development
> cycle.

FWIW, I don't agree at all with doing argument name addition in
this patchset.  Certainly that's a fine thing to do, but it's an
independent issue and should be handled in an independent patch.

I see that the cfbot is unhappy because it doesn't understand
that some of the patches have been applied already.  I am going
to go ahead and get the earthdistance one done, because we have
a live problem report about that [1].  I'll rebase and repost
the remainder afterwards.

            regards, tom lane

[1] https://www.postgresql.org/message-id/flat/6a6439f1-8039-44e2-8fb9-59028f7f2014%40mailbox.org



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
I wrote:
> I see that the cfbot is unhappy because it doesn't understand
> that some of the patches have been applied already.  I am going
> to go ahead and get the earthdistance one done, because we have
> a live problem report about that [1].  I'll rebase and repost
> the remainder afterwards.

Pushed the earthdistance patch, backpatching to v16 as already
discussed upthread.  While it's still fresh in mind, a few
comments about that:

The search path during execution of earthdistance's scripts
is going to be pg_catalog (implicitly first), then earthdistance's
schema, then cube's schema (if different), then pg_temp (to make
sure it's not implicitly before the others).  So that means the
ground rules for object references are:

* References to pg_catalog objects are unconditionally secure,
with or without qualification, if they have simple names (such
as datatypes).  Calls of functions and operators in pg_catalog
are unconditionally secure if the argument data types exactly match
the function or operator.  If implicit casting is required, there's
a hazard of the reference being captured by a better match further
down the search path.  That can be fixed by adding explicit schema
qualification, but I think usually it's better to make the argument
data types match by explicitly casting them.  In particular the
syntax for schema-qualifying an operator is painful enough that
nobody wants to do that.

* References to earthdistance's own objects are again unconditionally
secure for simple names (given no conflicts with pg_catalog).  For
calls of functions and operators, the only safe answer is to make the
argument data types match: schema qualification isn't enough, since
there might be hostile objects in earthdistance's installation schema.

* References to cube's objects require an @extschema:cube@
qualification to ensure they aren't captured by hostile objects
in earthdistance's schema, *and* we have to be careful about
exact argument data type matches to prevent capture by
hostile objects in cube's schema.

Ronan had added some "pg_catalog." qualifications to the previous
version of the patch, but I found all of them to be unnecessary
per these ground rules, and probably not good style because they
convey an impression of schema safety that is entirely false if
the operators aren't likewise qualified.

Also, I realized while looking at the patch that fixing the
functions wasn't sufficient, because we also had

CREATE DOMAIN earth AS cube
  CONSTRAINT not_point check(cube_is_point(value))
  CONSTRAINT not_3d check(cube_dim(value) <= 3)
  CONSTRAINT on_surface check(abs(cube_distance(value, '(0)'::cube) /
  earth() - '1'::float8) < '10e-7'::float8);

The reference to type cube is potentially subvertible, and so are the
cube function calls.  We could fix the constraint expressions (at
great cost) in the 1.2 update script by dropping and re-adding the
domain constraints, but there's nothing the update script can do to
change the domain's base type.

What I did about this was to add @extschema:cube@ decoration in this
command in the 1.1 base script.  I don't think this is a violation of
the normal rule that extension scripts mustn't change post-release,
because the intended results are the same.  (That is, really the rule
is "the results of installation mustn't change".)  This fixes things
for fresh installations including dump/restore updates.  It won't do
anything for you if you're trying to pg_upgrade an already-trojaned
earthdistance extension, but it's better than not addressing the issue
at all.

I think we could now mark earthdistance as trusted (probably only
in HEAD), but it'd be good for someone else to verify it.

            regards, tom lane



Re: Converting contrib SQL functions to new style

From
Tom Lane
Date:
Here's the remaining two patches in the current set.  This is just
to pacify the cfbot: I've not done anything to them, just verified
that they still apply and pass regression.

            regards, tom lane

>From 9c5235cd123eeb55b95b8bfd281dfcc37df197c5 Mon Sep 17 00:00:00 2001
From: Ronan Dunklau <ronan.dunklau@aiven.io>
Date: Mon, 28 Oct 2024 16:13:35 +0100
Subject: [PATCH v4 3/6] Use "new style" SQL function in citext extension

Author: Tom Lane
Discussion: https://www.postgresql.org/message-id/3395418.1618352794%40sss.pgh.pa.us
---
 contrib/citext/Makefile             |  1 +
 contrib/citext/citext--1.7--1.8.sql | 60 +++++++++++++++++++++++++++++
 contrib/citext/citext.control       |  2 +-
 contrib/citext/meson.build          |  1 +
 4 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 contrib/citext/citext--1.7--1.8.sql

diff --git a/contrib/citext/Makefile b/contrib/citext/Makefile
index b9b3713f537..fc990607bf2 100644
--- a/contrib/citext/Makefile
+++ b/contrib/citext/Makefile
@@ -4,6 +4,7 @@ MODULES = citext

 EXTENSION = citext
 DATA = citext--1.4.sql \
+    citext--1.7--1.8.sql \
     citext--1.6--1.7.sql \
     citext--1.5--1.6.sql \
     citext--1.4--1.5.sql \
diff --git a/contrib/citext/citext--1.7--1.8.sql b/contrib/citext/citext--1.7--1.8.sql
new file mode 100644
index 00000000000..7c95a9883aa
--- /dev/null
+++ b/contrib/citext/citext--1.7--1.8.sql
@@ -0,0 +1,60 @@
+/* contrib/citext/citext--1.7--1.8.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION citext UPDATE TO '1.8'" to load this file. \quit
+
+CREATE OR REPLACE FUNCTION regexp_match(string citext, pattern citext) RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_match( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
+
+CREATE OR REPLACE FUNCTION regexp_match(string citext, pattern citext, flags text) RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_match( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0
THEN $3 || 'i' ELSE $3 END ); 
+
+CREATE OR REPLACE FUNCTION regexp_matches(string citext, pattern citext) RETURNS SETOF TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE ROWS 1
+RETURN pg_catalog.regexp_matches( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
+
+CREATE OR REPLACE FUNCTION regexp_matches(string citext, pattern citext, flags text) RETURNS SETOF TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE ROWS 10
+RETURN pg_catalog.regexp_matches( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3, 'c') = 0
THEN $3 || 'i' ELSE $3 END ); 
+
+CREATE OR REPLACE FUNCTION regexp_replace(string citext, pattern citext, replacement text) returns TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_replace( $1::pg_catalog.text, $2::pg_catalog.text, $3, 'i');
+
+CREATE OR REPLACE FUNCTION regexp_replace(string citext, pattern citext, replacement text, flags text) returns TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_replace( $1::pg_catalog.text, $2::pg_catalog.text, $3, CASE WHEN pg_catalog.strpos($4, 'c') =
0THEN  $4 || 'i' ELSE $4 END); 
+
+CREATE OR REPLACE FUNCTION regexp_split_to_array(string citext, pattern citext) RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_split_to_array( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
+
+CREATE OR REPLACE FUNCTION regexp_split_to_array(string citext, pattern citext, flags text) RETURNS TEXT[]
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_split_to_array( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3,
'c')= 0 THEN  $3 || 'i' ELSE $3 END ); 
+
+CREATE OR REPLACE FUNCTION regexp_split_to_table(string citext, pattern citext) RETURNS SETOF TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_split_to_table( $1::pg_catalog.text, $2::pg_catalog.text, 'i' );
+
+CREATE OR REPLACE FUNCTION regexp_split_to_table(string citext, pattern citext, flags text) RETURNS SETOF TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_split_to_table( $1::pg_catalog.text, $2::pg_catalog.text, CASE WHEN pg_catalog.strpos($3,
'c')= 0 THEN  $3 || 'i' ELSE $3 END ); 
+
+CREATE OR REPLACE FUNCTION strpos( citext, citext ) RETURNS INT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.strpos( pg_catalog.lower( $1::pg_catalog.text ), pg_catalog.lower( $2::pg_catalog.text ) );
+
+CREATE OR REPLACE FUNCTION replace( citext, citext, citext ) RETURNS TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.regexp_replace( $1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text,
'([^a-zA-Z_0-9])',E'\\\\\\1', 'g'), $3::pg_catalog.text, 'gi' ); 
+
+CREATE OR REPLACE FUNCTION split_part( citext, citext, int ) RETURNS TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN (pg_catalog.regexp_split_to_array( $1::pg_catalog.text, pg_catalog.regexp_replace($2::pg_catalog.text,
'([^a-zA-Z_0-9])',E'\\\\\\1', 'g'), 'i'))[$3]; 
+
+CREATE OR REPLACE FUNCTION translate( citext, citext, text ) RETURNS TEXT
+LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE
+RETURN pg_catalog.translate( pg_catalog.translate( $1::pg_catalog.text, pg_catalog.lower($2::pg_catalog.text), $3),
pg_catalog.upper($2::pg_catalog.text),$3); 
diff --git a/contrib/citext/citext.control b/contrib/citext/citext.control
index f82265b3347..2b0f3fa8407 100644
--- a/contrib/citext/citext.control
+++ b/contrib/citext/citext.control
@@ -1,6 +1,6 @@
 # citext extension
 comment = 'data type for case-insensitive character strings'
-default_version = '1.7'
+default_version = '1.8'
 module_pathname = '$libdir/citext'
 relocatable = true
 trusted = true
diff --git a/contrib/citext/meson.build b/contrib/citext/meson.build
index 40cdd0d2f18..ae87445e6a4 100644
--- a/contrib/citext/meson.build
+++ b/contrib/citext/meson.build
@@ -26,6 +26,7 @@ install_data(
   'citext--1.4--1.5.sql',
   'citext--1.5--1.6.sql',
   'citext--1.6--1.7.sql',
+  'citext--1.7--1.8.sql',
   kwargs: contrib_data_args,
 )

--
2.47.0

>From ca3f87189eaf060a91486b5d2167930ecdf7b541 Mon Sep 17 00:00:00 2001
From: Ronan Dunklau <ronan.dunklau@aiven.io>
Date: Mon, 28 Oct 2024 16:25:52 +0100
Subject: [PATCH v4 5/6] Use "new style" SQL function in xml2 extension

Author: Tom Lane
Discussion: https://www.postgresql.org/message-id/3395418.1618352794%40sss.pgh.pa.us
---
 contrib/xml2/Makefile           |  4 +++-
 contrib/xml2/meson.build        |  1 +
 contrib/xml2/xml2--1.1--1.2.sql | 18 ++++++++++++++++++
 contrib/xml2/xml2.control       |  2 +-
 4 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 contrib/xml2/xml2--1.1--1.2.sql

diff --git a/contrib/xml2/Makefile b/contrib/xml2/Makefile
index 0d703fe0e8f..8597e9aa9c5 100644
--- a/contrib/xml2/Makefile
+++ b/contrib/xml2/Makefile
@@ -7,7 +7,9 @@ OBJS = \
     xslt_proc.o

 EXTENSION = xml2
-DATA = xml2--1.1.sql xml2--1.0--1.1.sql
+DATA = xml2--1.1.sql \
+    xml2--1.1--1.2.sql \
+    xml2--1.0--1.1.sql
 PGFILEDESC = "xml2 - XPath querying and XSLT"

 REGRESS = xml2
diff --git a/contrib/xml2/meson.build b/contrib/xml2/meson.build
index 5e80e17f824..32d9ab53cbd 100644
--- a/contrib/xml2/meson.build
+++ b/contrib/xml2/meson.build
@@ -27,6 +27,7 @@ contrib_targets += xml2
 install_data(
   'xml2--1.0--1.1.sql',
   'xml2--1.1.sql',
+  'xml2--1.1--1.2.sql',
   'xml2.control',
   kwargs: contrib_data_args,
 )
diff --git a/contrib/xml2/xml2--1.1--1.2.sql b/contrib/xml2/xml2--1.1--1.2.sql
new file mode 100644
index 00000000000..12d00064e8f
--- /dev/null
+++ b/contrib/xml2/xml2--1.1--1.2.sql
@@ -0,0 +1,18 @@
+/* contrib/xml2/xml2--1.1--1.2.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION xml2 UPDATE TO '1.2'" to load this file. \quit
+
+CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text
+LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE
+RETURN xpath_list($1,$2,',');
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text)
+RETURNS text
+LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE
+RETURN xpath_nodeset($1,$2,'','');
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text)
+RETURNS text
+LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE
+RETURN xpath_nodeset($1,$2,'',$3);
diff --git a/contrib/xml2/xml2.control b/contrib/xml2/xml2.control
index ba2c0599a37..b32156c949e 100644
--- a/contrib/xml2/xml2.control
+++ b/contrib/xml2/xml2.control
@@ -1,6 +1,6 @@
 # xml2 extension
 comment = 'XPath querying and XSLT'
-default_version = '1.1'
+default_version = '1.2'
 module_pathname = '$libdir/pgxml'
 # XXX do we still need this to be non-relocatable?
 relocatable = false
--
2.47.0