Re: Leakproofness of texteq()/textne() - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Leakproofness of texteq()/textne()
Date
Msg-id 30973.1568739609@sss.pgh.pa.us
Whole thread Raw
In response to Re: Leakproofness of texteq()/textne()  (Peter Eisentraut <peter.eisentraut@2ndquadrant.com>)
List pgsql-hackers
Peter Eisentraut <peter.eisentraut@2ndquadrant.com> writes:
> On 2019-09-16 06:24, Tom Lane wrote:
>> So it seems that the consensus is that it's okay to mark these
>> functions leakproof, because if any of the errors they throw
>> are truly reachable for other than data-corruption reasons,
>> we would wish to try to prevent such errors.  (Maybe through
>> upstream validity checks?  Hard to say how we'd do it exactly,
>> when we don't have an idea what the problem is.)

> Yeah, it seems like as we expand our Unicode capabilities, we will see
> more cases like "it could fail here in theory, but it shouldn't happen
> for normal data", and the answer can't be to call all that untrusted or
> leaky.  It's the job of the database software to sort that out.
> Obviously, it will require careful evaluation in each case.

Here's a proposed patch to mark functions that depend on varstr_cmp
as leakproof.  I think we can apply this to HEAD and then close the
open item as "won't fix for v12".

            regards, tom lane

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d36156f..8c18294 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1463,6 +1463,12 @@ check_collation_set(Oid collid)
  *    to allow null-termination for inputs to strcoll().
  * Returns an integer less than, equal to, or greater than zero, indicating
  * whether arg1 is less than, equal to, or greater than arg2.
+ *
+ * Note: many functions that depend on this are marked leakproof; therefore,
+ * avoid reporting the actual contents of the input when throwing errors.
+ * All errors herein should be things that can't happen except on corrupt
+ * data, anyway; otherwise we will have trouble with indexing strings that
+ * would cause them.
  */
 int
 varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index e6645f1..5e69dee 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -681,44 +681,44 @@
   proname => 'nameeqtext', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'name text', prosrc => 'nameeqtext' },
 { oid => '241',
-  proname => 'namelttext', prorettype => 'bool', proargtypes => 'name text',
-  prosrc => 'namelttext' },
+  proname => 'namelttext', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'name text', prosrc => 'namelttext' },
 { oid => '242',
-  proname => 'nameletext', prorettype => 'bool', proargtypes => 'name text',
-  prosrc => 'nameletext' },
+  proname => 'nameletext', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'name text', prosrc => 'nameletext' },
 { oid => '243',
-  proname => 'namegetext', prorettype => 'bool', proargtypes => 'name text',
-  prosrc => 'namegetext' },
+  proname => 'namegetext', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'name text', prosrc => 'namegetext' },
 { oid => '244',
-  proname => 'namegttext', prorettype => 'bool', proargtypes => 'name text',
-  prosrc => 'namegttext' },
+  proname => 'namegttext', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'name text', prosrc => 'namegttext' },
 { oid => '245',
   proname => 'namenetext', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'name text', prosrc => 'namenetext' },
 { oid => '246', descr => 'less-equal-greater',
-  proname => 'btnametextcmp', prorettype => 'int4', proargtypes => 'name text',
-  prosrc => 'btnametextcmp' },
+  proname => 'btnametextcmp', proleakproof => 't', prorettype => 'int4',
+  proargtypes => 'name text', prosrc => 'btnametextcmp' },
 { oid => '247',
   proname => 'texteqname', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'text name', prosrc => 'texteqname' },
 { oid => '248',
-  proname => 'textltname', prorettype => 'bool', proargtypes => 'text name',
-  prosrc => 'textltname' },
+  proname => 'textltname', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'text name', prosrc => 'textltname' },
 { oid => '249',
-  proname => 'textlename', prorettype => 'bool', proargtypes => 'text name',
-  prosrc => 'textlename' },
+  proname => 'textlename', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'text name', prosrc => 'textlename' },
 { oid => '250',
-  proname => 'textgename', prorettype => 'bool', proargtypes => 'text name',
-  prosrc => 'textgename' },
+  proname => 'textgename', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'text name', prosrc => 'textgename' },
 { oid => '251',
-  proname => 'textgtname', prorettype => 'bool', proargtypes => 'text name',
-  prosrc => 'textgtname' },
+  proname => 'textgtname', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'text name', prosrc => 'textgtname' },
 { oid => '252',
   proname => 'textnename', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'text name', prosrc => 'textnename' },
 { oid => '253', descr => 'less-equal-greater',
-  proname => 'bttextnamecmp', prorettype => 'int4', proargtypes => 'text name',
-  prosrc => 'bttextnamecmp' },
+  proname => 'bttextnamecmp', proleakproof => 't', prorettype => 'int4',
+  proargtypes => 'text name', prosrc => 'bttextnamecmp' },

 { oid => '266', descr => 'concatenate name and oid',
   proname => 'nameconcatoid', prorettype => 'name', proargtypes => 'name oid',
@@ -1002,14 +1002,14 @@
   proname => 'btcharcmp', proleakproof => 't', prorettype => 'int4',
   proargtypes => 'char char', prosrc => 'btcharcmp' },
 { oid => '359', descr => 'less-equal-greater',
-  proname => 'btnamecmp', prorettype => 'int4', proargtypes => 'name name',
-  prosrc => 'btnamecmp' },
+  proname => 'btnamecmp', proleakproof => 't', prorettype => 'int4',
+  proargtypes => 'name name', prosrc => 'btnamecmp' },
 { oid => '3135', descr => 'sort support',
   proname => 'btnamesortsupport', prorettype => 'void',
   proargtypes => 'internal', prosrc => 'btnamesortsupport' },
 { oid => '360', descr => 'less-equal-greater',
-  proname => 'bttextcmp', prorettype => 'int4', proargtypes => 'text text',
-  prosrc => 'bttextcmp' },
+  proname => 'bttextcmp', proleakproof => 't', prorettype => 'int4',
+  proargtypes => 'text text', prosrc => 'bttextcmp' },
 { oid => '3255', descr => 'sort support',
   proname => 'bttextsortsupport', prorettype => 'void',
   proargtypes => 'internal', prosrc => 'bttextsortsupport' },
@@ -1230,11 +1230,11 @@
   proargmodes => '{v}', prosrc => 'pg_num_nonnulls' },

 { oid => '458', descr => 'larger of two',
-  proname => 'text_larger', prorettype => 'text', proargtypes => 'text text',
-  prosrc => 'text_larger' },
+  proname => 'text_larger', proleakproof => 't', prorettype => 'text',
+  proargtypes => 'text text', prosrc => 'text_larger' },
 { oid => '459', descr => 'smaller of two',
-  proname => 'text_smaller', prorettype => 'text', proargtypes => 'text text',
-  prosrc => 'text_smaller' },
+  proname => 'text_smaller', proleakproof => 't', prorettype => 'text',
+  proargtypes => 'text text', prosrc => 'text_smaller' },

 { oid => '460', descr => 'I/O',
   proname => 'int8in', prorettype => 'int8', proargtypes => 'cstring',
@@ -1334,17 +1334,17 @@
   prosrc => 'int28' },

 { oid => '655',
-  proname => 'namelt', prorettype => 'bool', proargtypes => 'name name',
-  prosrc => 'namelt' },
+  proname => 'namelt', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'name name', prosrc => 'namelt' },
 { oid => '656',
-  proname => 'namele', prorettype => 'bool', proargtypes => 'name name',
-  prosrc => 'namele' },
+  proname => 'namele', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'name name', prosrc => 'namele' },
 { oid => '657',
-  proname => 'namegt', prorettype => 'bool', proargtypes => 'name name',
-  prosrc => 'namegt' },
+  proname => 'namegt', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'name name', prosrc => 'namegt' },
 { oid => '658',
-  proname => 'namege', prorettype => 'bool', proargtypes => 'name name',
-  prosrc => 'namege' },
+  proname => 'namege', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'name name', prosrc => 'namege' },
 { oid => '659',
   proname => 'namene', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'name name', prosrc => 'namene' },
@@ -1451,17 +1451,17 @@
   proargtypes => 'circle point', prosrc => 'dist_cpoint' },

 { oid => '740',
-  proname => 'text_lt', prorettype => 'bool', proargtypes => 'text text',
-  prosrc => 'text_lt' },
+  proname => 'text_lt', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'text text', prosrc => 'text_lt' },
 { oid => '741',
-  proname => 'text_le', prorettype => 'bool', proargtypes => 'text text',
-  prosrc => 'text_le' },
+  proname => 'text_le', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'text text', prosrc => 'text_le' },
 { oid => '742',
-  proname => 'text_gt', prorettype => 'bool', proargtypes => 'text text',
-  prosrc => 'text_gt' },
+  proname => 'text_gt', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'text text', prosrc => 'text_gt' },
 { oid => '743',
-  proname => 'text_ge', prorettype => 'bool', proargtypes => 'text text',
-  prosrc => 'text_ge' },
+  proname => 'text_ge', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'text text', prosrc => 'text_ge' },

 { oid => '745', descr => 'current user name',
   proname => 'current_user', provolatile => 's', prorettype => 'name',
@@ -2051,29 +2051,29 @@
   proname => 'bpchareq', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'bpchar bpchar', prosrc => 'bpchareq' },
 { oid => '1049',
-  proname => 'bpcharlt', prorettype => 'bool', proargtypes => 'bpchar bpchar',
-  prosrc => 'bpcharlt' },
+  proname => 'bpcharlt', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'bpchar bpchar', prosrc => 'bpcharlt' },
 { oid => '1050',
-  proname => 'bpcharle', prorettype => 'bool', proargtypes => 'bpchar bpchar',
-  prosrc => 'bpcharle' },
+  proname => 'bpcharle', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'bpchar bpchar', prosrc => 'bpcharle' },
 { oid => '1051',
-  proname => 'bpchargt', prorettype => 'bool', proargtypes => 'bpchar bpchar',
-  prosrc => 'bpchargt' },
+  proname => 'bpchargt', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'bpchar bpchar', prosrc => 'bpchargt' },
 { oid => '1052',
-  proname => 'bpcharge', prorettype => 'bool', proargtypes => 'bpchar bpchar',
-  prosrc => 'bpcharge' },
+  proname => 'bpcharge', proleakproof => 't', prorettype => 'bool',
+  proargtypes => 'bpchar bpchar', prosrc => 'bpcharge' },
 { oid => '1053',
   proname => 'bpcharne', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'bpchar bpchar', prosrc => 'bpcharne' },
 { oid => '1063', descr => 'larger of two',
-  proname => 'bpchar_larger', prorettype => 'bpchar',
+  proname => 'bpchar_larger', proleakproof => 't', prorettype => 'bpchar',
   proargtypes => 'bpchar bpchar', prosrc => 'bpchar_larger' },
 { oid => '1064', descr => 'smaller of two',
-  proname => 'bpchar_smaller', prorettype => 'bpchar',
+  proname => 'bpchar_smaller', proleakproof => 't', prorettype => 'bpchar',
   proargtypes => 'bpchar bpchar', prosrc => 'bpchar_smaller' },
 { oid => '1078', descr => 'less-equal-greater',
-  proname => 'bpcharcmp', prorettype => 'int4', proargtypes => 'bpchar bpchar',
-  prosrc => 'bpcharcmp' },
+  proname => 'bpcharcmp', proleakproof => 't', prorettype => 'int4',
+  proargtypes => 'bpchar bpchar', prosrc => 'bpcharcmp' },
 { oid => '3328', descr => 'sort support',
   proname => 'bpchar_sortsupport', prorettype => 'void',
   proargtypes => 'internal', prosrc => 'bpchar_sortsupport' },
@@ -6541,38 +6541,38 @@
   proargtypes => 'float8 float8', prosrc => 'aggregate_dummy' },

 { oid => '2160',
-  proname => 'text_pattern_lt', prorettype => 'bool',
+  proname => 'text_pattern_lt', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'text text', prosrc => 'text_pattern_lt' },
 { oid => '2161',
-  proname => 'text_pattern_le', prorettype => 'bool',
+  proname => 'text_pattern_le', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'text text', prosrc => 'text_pattern_le' },
 { oid => '2163',
-  proname => 'text_pattern_ge', prorettype => 'bool',
+  proname => 'text_pattern_ge', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'text text', prosrc => 'text_pattern_ge' },
 { oid => '2164',
-  proname => 'text_pattern_gt', prorettype => 'bool',
+  proname => 'text_pattern_gt', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'text text', prosrc => 'text_pattern_gt' },
 { oid => '2166', descr => 'less-equal-greater',
-  proname => 'bttext_pattern_cmp', prorettype => 'int4',
+  proname => 'bttext_pattern_cmp', proleakproof => 't', prorettype => 'int4',
   proargtypes => 'text text', prosrc => 'bttext_pattern_cmp' },
 { oid => '3332', descr => 'sort support',
   proname => 'bttext_pattern_sortsupport', prorettype => 'void',
   proargtypes => 'internal', prosrc => 'bttext_pattern_sortsupport' },

 { oid => '2174',
-  proname => 'bpchar_pattern_lt', prorettype => 'bool',
+  proname => 'bpchar_pattern_lt', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'bpchar bpchar', prosrc => 'bpchar_pattern_lt' },
 { oid => '2175',
-  proname => 'bpchar_pattern_le', prorettype => 'bool',
+  proname => 'bpchar_pattern_le', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'bpchar bpchar', prosrc => 'bpchar_pattern_le' },
 { oid => '2177',
-  proname => 'bpchar_pattern_ge', prorettype => 'bool',
+  proname => 'bpchar_pattern_ge', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'bpchar bpchar', prosrc => 'bpchar_pattern_ge' },
 { oid => '2178',
-  proname => 'bpchar_pattern_gt', prorettype => 'bool',
+  proname => 'bpchar_pattern_gt', proleakproof => 't', prorettype => 'bool',
   proargtypes => 'bpchar bpchar', prosrc => 'bpchar_pattern_gt' },
 { oid => '2180', descr => 'less-equal-greater',
-  proname => 'btbpchar_pattern_cmp', prorettype => 'int4',
+  proname => 'btbpchar_pattern_cmp', proleakproof => 't', prorettype => 'int4',
   proargtypes => 'bpchar bpchar', prosrc => 'btbpchar_pattern_cmp' },
 { oid => '3333', descr => 'sort support',
   proname => 'btbpchar_pattern_sortsupport', prorettype => 'void',
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 33c058f..c19740e 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -526,9 +526,19 @@ int42ge(integer,smallint)
 oideq(oid,oid)
 oidne(oid,oid)
 nameeqtext(name,text)
+namelttext(name,text)
+nameletext(name,text)
+namegetext(name,text)
+namegttext(name,text)
 namenetext(name,text)
+btnametextcmp(name,text)
 texteqname(text,name)
+textltname(text,name)
+textlename(text,name)
+textgename(text,name)
+textgtname(text,name)
 textnename(text,name)
+bttextnamecmp(text,name)
 float4eq(real,real)
 float4ne(real,real)
 float4lt(real,real)
@@ -559,8 +569,12 @@ btfloat4cmp(real,real)
 btfloat8cmp(double precision,double precision)
 btoidcmp(oid,oid)
 btcharcmp("char","char")
+btnamecmp(name,name)
+bttextcmp(text,text)
 cash_cmp(money,money)
 btoidvectorcmp(oidvector,oidvector)
+text_larger(text,text)
+text_smaller(text,text)
 int8eq(bigint,bigint)
 int8ne(bigint,bigint)
 int8lt(bigint,bigint)
@@ -574,6 +588,10 @@ int84gt(bigint,integer)
 int84le(bigint,integer)
 int84ge(bigint,integer)
 oidvectorne(oidvector,oidvector)
+namelt(name,name)
+namele(name,name)
+namegt(name,name)
+namege(name,name)
 namene(name,name)
 oidvectorlt(oidvector,oidvector)
 oidvectorle(oidvector,oidvector)
@@ -582,6 +600,10 @@ oidvectorge(oidvector,oidvector)
 oidvectorgt(oidvector,oidvector)
 oidlt(oid,oid)
 oidle(oid,oid)
+text_lt(text,text)
+text_le(text,text)
+text_gt(text,text)
+text_ge(text,text)
 macaddr_eq(macaddr,macaddr)
 macaddr_lt(macaddr,macaddr)
 macaddr_le(macaddr,macaddr)
@@ -611,7 +633,14 @@ network_ne(inet,inet)
 network_cmp(inet,inet)
 lseg_eq(lseg,lseg)
 bpchareq(character,character)
+bpcharlt(character,character)
+bpcharle(character,character)
+bpchargt(character,character)
+bpcharge(character,character)
 bpcharne(character,character)
+bpchar_larger(character,character)
+bpchar_smaller(character,character)
+bpcharcmp(character,character)
 date_eq(date,date)
 date_lt(date,date)
 date_le(date,date)
@@ -707,6 +736,16 @@ timestamp_lt(timestamp without time zone,timestamp without time zone)
 timestamp_le(timestamp without time zone,timestamp without time zone)
 timestamp_ge(timestamp without time zone,timestamp without time zone)
 timestamp_gt(timestamp without time zone,timestamp without time zone)
+text_pattern_lt(text,text)
+text_pattern_le(text,text)
+text_pattern_ge(text,text)
+text_pattern_gt(text,text)
+bttext_pattern_cmp(text,text)
+bpchar_pattern_lt(character,character)
+bpchar_pattern_le(character,character)
+bpchar_pattern_ge(character,character)
+bpchar_pattern_gt(character,character)
+btbpchar_pattern_cmp(character,character)
 btint48cmp(integer,bigint)
 btint84cmp(bigint,integer)
 btint24cmp(smallint,integer)
@@ -1322,8 +1361,6 @@ WHERE o1.oprnegate = o2.oid AND p1.oid = o1.oprcode AND p2.oid = o2.oprcode AND

 -- Btree comparison operators' functions should have the same volatility
 -- and leakproofness markings as the associated comparison support function.
--- As of Postgres 12, the only exceptions are that textual equality functions
--- are marked leakproof but textual comparison/inequality functions are not.
 SELECT pp.oid::regprocedure as proc, pp.provolatile as vp, pp.proleakproof as lp,
        po.oid::regprocedure as opr, po.provolatile as vo, po.proleakproof as lo
 FROM pg_proc pp, pg_proc po, pg_operator o, pg_amproc ap, pg_amop ao
@@ -1336,16 +1373,9 @@ WHERE pp.oid = ap.amproc AND po.oid = o.oprcode AND o.oid = ao.amopopr AND
     (pp.provolatile != po.provolatile OR
      pp.proleakproof != po.proleakproof)
 ORDER BY 1;
-                   proc                    | vp | lp |              opr              | vo | lo
--------------------------------------------+----+----+-------------------------------+----+----
- btnametextcmp(name,text)                  | i  | f  | nameeqtext(name,text)         | i  | t
- bttextnamecmp(text,name)                  | i  | f  | texteqname(text,name)         | i  | t
- btnamecmp(name,name)                      | i  | f  | nameeq(name,name)             | i  | t
- bttextcmp(text,text)                      | i  | f  | texteq(text,text)             | i  | t
- bpcharcmp(character,character)            | i  | f  | bpchareq(character,character) | i  | t
- bttext_pattern_cmp(text,text)             | i  | f  | texteq(text,text)             | i  | t
- btbpchar_pattern_cmp(character,character) | i  | f  | bpchareq(character,character) | i  | t
-(7 rows)
+ proc | vp | lp | opr | vo | lo
+------+----+----+-----+----+----
+(0 rows)

 -- **************** pg_aggregate ****************
 -- Look for illegal values in pg_aggregate fields.
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index 1227ef7..624bea4 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -806,8 +806,6 @@ WHERE o1.oprnegate = o2.oid AND p1.oid = o1.oprcode AND p2.oid = o2.oprcode AND

 -- Btree comparison operators' functions should have the same volatility
 -- and leakproofness markings as the associated comparison support function.
--- As of Postgres 12, the only exceptions are that textual equality functions
--- are marked leakproof but textual comparison/inequality functions are not.
 SELECT pp.oid::regprocedure as proc, pp.provolatile as vp, pp.proleakproof as lp,
        po.oid::regprocedure as opr, po.provolatile as vo, po.proleakproof as lo
 FROM pg_proc pp, pg_proc po, pg_operator o, pg_amproc ap, pg_amop ao

pgsql-hackers by date:

Previous
From: Robert Haas
Date:
Subject: Re: block-level incremental backup
Next
From: Peter Eisentraut
Date:
Subject: some PostgreSQL 12 release notes comments