Re: Error-safe user functions - Mailing list pgsql-hackers

From Tom Lane
Subject Re: Error-safe user functions
Date
Msg-id 3526121.1672000729@sss.pgh.pa.us
Whole thread Raw
In response to Re: Error-safe user functions  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-hackers
I got annoyed by the fact that types cid, xid, xid8 don't throw
error even for obvious garbage, because they just believe the
result of strtoul or strtoull without any checking.  That was
probably up to project standards when cidin and xidin were
written; but surely it's not anymore, especially when we can
piggyback on work already done for type oid.

Anybody have an objection to the following?  One note is that
because we already had test cases checking that xid would
accept hex input, I made the common subroutines use "0" not
"10" for strtoul's last argument, meaning that oid will accept
hex now too.

            regards, tom lane

diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 7cded73e6e..c67a79344a 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -477,6 +477,180 @@ invalid_syntax:
                     "bigint", s)));
 }

+/*
+ * Convert input string to an unsigned 32 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters.
+ *
+ * If endloc isn't NULL, store a pointer to the rest of the string there,
+ * so that caller can parse the rest.  Otherwise, it's an error if anything
+ * but whitespace follows.
+ *
+ * typname is what is reported in error messges.
+ *
+ * If escontext points to an ErrorSaveContext node, that is filled instead
+ * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
+ * to detect errors.
+ */
+uint32
+uint32in_subr(const char *s, char **endloc,
+              const char *typname, Node *escontext)
+{
+    uint32        result;
+    unsigned long cvt;
+    char       *endptr;
+
+    /* Ensure that empty-input is handled consistently across platforms */
+    if (*s == '\0')
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("invalid input syntax for type %s: \"%s\"",
+                        typname, s)));
+
+    errno = 0;
+    cvt = strtoul(s, &endptr, 0);
+
+    /*
+     * strtoul() normally only sets ERANGE.  On some systems it also may set
+     * EINVAL, which simply means it couldn't parse the input string. This is
+     * handled by the second "if" consistent across platforms.
+     */
+    if (errno && errno != ERANGE && errno != EINVAL)
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("invalid input syntax for type %s: \"%s\"",
+                        typname, s)));
+
+    if (endptr == s)
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("invalid input syntax for type %s: \"%s\"",
+                        typname, s)));
+
+    if (errno == ERANGE)
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                 errmsg("value \"%s\" is out of range for type %s",
+                        s, typname)));
+
+    if (endloc)
+    {
+        /* caller wants to deal with rest of string */
+        *endloc = endptr;
+    }
+    else
+    {
+        /* allow only whitespace after number */
+        while (*endptr && isspace((unsigned char) *endptr))
+            endptr++;
+        if (*endptr)
+            ereturn(escontext, 0,
+                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                     errmsg("invalid input syntax for type %s: \"%s\"",
+                            typname, s)));
+    }
+
+    result = (uint32) cvt;
+
+    /*
+     * Cope with possibility that unsigned long is wider than uint32, in which
+     * case strtoul will not raise an error for some values that are out of
+     * the range of uint32.
+     *
+     * For backwards compatibility, we want to accept inputs that are given
+     * with a minus sign, so allow the input value if it matches after either
+     * signed or unsigned extension to long.
+     *
+     * To ensure consistent results on 32-bit and 64-bit platforms, make sure
+     * the error message is the same as if strtoul() had returned ERANGE.
+     */
+#if PG_UINT32_MAX != ULONG_MAX
+    if (cvt != (unsigned long) result &&
+        cvt != (unsigned long) ((int) result))
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                 errmsg("value \"%s\" is out of range for type %s",
+                        s, typname)));
+#endif
+
+    return result;
+}
+
+/*
+ * Convert input string to an unsigned 64 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters.
+ *
+ * If endloc isn't NULL, store a pointer to the rest of the string there,
+ * so that caller can parse the rest.  Otherwise, it's an error if anything
+ * but whitespace follows.
+ *
+ * typname is what is reported in error messges.
+ *
+ * If escontext points to an ErrorSaveContext node, that is filled instead
+ * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
+ * to detect errors.
+ */
+uint64
+uint64in_subr(const char *s, char **endloc,
+              const char *typname, Node *escontext)
+{
+    uint64        result;
+    char       *endptr;
+
+    /* Ensure that empty-input is handled consistently across platforms */
+    if (*s == '\0')
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("invalid input syntax for type %s: \"%s\"",
+                        typname, s)));
+
+    errno = 0;
+    result = strtou64(s, &endptr, 0);
+
+    /*
+     * strtoul[l]() normally only sets ERANGE.  On some systems it also may
+     * set EINVAL, which simply means it couldn't parse the input string. This
+     * is handled by the second "if" consistent across platforms.
+     */
+    if (errno && errno != ERANGE && errno != EINVAL)
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("invalid input syntax for type %s: \"%s\"",
+                        typname, s)));
+
+    if (endptr == s)
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                 errmsg("invalid input syntax for type %s: \"%s\"",
+                        typname, s)));
+
+    if (errno == ERANGE)
+        ereturn(escontext, 0,
+                (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+                 errmsg("value \"%s\" is out of range for type %s",
+                        s, typname)));
+
+    if (endloc)
+    {
+        /* caller wants to deal with rest of string */
+        *endloc = endptr;
+    }
+    else
+    {
+        /* allow only whitespace after number */
+        while (*endptr && isspace((unsigned char) *endptr))
+            endptr++;
+        if (*endptr)
+            ereturn(escontext, 0,
+                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                     errmsg("invalid input syntax for type %s: \"%s\"",
+                            typname, s)));
+    }
+
+    return result;
+}
+
 /*
  * pg_itoa: converts a signed 16-bit integer to its string representation
  * and returns strlen(a).
diff --git a/src/backend/utils/adt/oid.c b/src/backend/utils/adt/oid.c
index 9d382b5cb7..6b70b774d5 100644
--- a/src/backend/utils/adt/oid.c
+++ b/src/backend/utils/adt/oid.c
@@ -32,106 +32,13 @@
  *     USER I/O ROUTINES                                                         *
  *****************************************************************************/

-/*
- * Parse a single OID and return its value.
- *
- * If endloc isn't NULL, store a pointer to the rest of the string there,
- * so that caller can parse the rest.  Otherwise, it's an error if anything
- * but whitespace follows.
- *
- * If escontext points to an ErrorSaveContext node, that is filled instead
- * of throwing an error; the caller must check SOFT_ERROR_OCCURRED()
- * to detect errors.
- */
-static Oid
-oidin_subr(const char *s, char **endloc, Node *escontext)
-{
-    unsigned long cvt;
-    char       *endptr;
-    Oid            result;
-
-    if (*s == '\0')
-        ereturn(escontext, InvalidOid,
-                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                 errmsg("invalid input syntax for type %s: \"%s\"",
-                        "oid", s)));
-
-    errno = 0;
-    cvt = strtoul(s, &endptr, 10);
-
-    /*
-     * strtoul() normally only sets ERANGE.  On some systems it also may set
-     * EINVAL, which simply means it couldn't parse the input string. This is
-     * handled by the second "if" consistent across platforms.
-     */
-    if (errno && errno != ERANGE && errno != EINVAL)
-        ereturn(escontext, InvalidOid,
-                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                 errmsg("invalid input syntax for type %s: \"%s\"",
-                        "oid", s)));
-
-    if (endptr == s && *s != '\0')
-        ereturn(escontext, InvalidOid,
-                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                 errmsg("invalid input syntax for type %s: \"%s\"",
-                        "oid", s)));
-
-    if (errno == ERANGE)
-        ereturn(escontext, InvalidOid,
-                (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                 errmsg("value \"%s\" is out of range for type %s",
-                        s, "oid")));
-
-    if (endloc)
-    {
-        /* caller wants to deal with rest of string */
-        *endloc = endptr;
-    }
-    else
-    {
-        /* allow only whitespace after number */
-        while (*endptr && isspace((unsigned char) *endptr))
-            endptr++;
-        if (*endptr)
-            ereturn(escontext, InvalidOid,
-                    (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                     errmsg("invalid input syntax for type %s: \"%s\"",
-                            "oid", s)));
-    }
-
-    result = (Oid) cvt;
-
-    /*
-     * Cope with possibility that unsigned long is wider than Oid, in which
-     * case strtoul will not raise an error for some values that are out of
-     * the range of Oid.
-     *
-     * For backwards compatibility, we want to accept inputs that are given
-     * with a minus sign, so allow the input value if it matches after either
-     * signed or unsigned extension to long.
-     *
-     * To ensure consistent results on 32-bit and 64-bit platforms, make sure
-     * the error message is the same as if strtoul() had returned ERANGE.
-     */
-#if OID_MAX != ULONG_MAX
-    if (cvt != (unsigned long) result &&
-        cvt != (unsigned long) ((int) result))
-        ereturn(escontext, InvalidOid,
-                (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-                 errmsg("value \"%s\" is out of range for type %s",
-                        s, "oid")));
-#endif
-
-    return result;
-}
-
 Datum
 oidin(PG_FUNCTION_ARGS)
 {
     char       *s = PG_GETARG_CSTRING(0);
     Oid            result;

-    result = oidin_subr(s, NULL, fcinfo->context);
+    result = uint32in_subr(s, NULL, "oid", fcinfo->context);
     PG_RETURN_OID(result);
 }

@@ -218,7 +125,8 @@ oidvectorin(PG_FUNCTION_ARGS)
             oidString++;
         if (*oidString == '\0')
             break;
-        result->values[n] = oidin_subr(oidString, &oidString, escontext);
+        result->values[n] = uint32in_subr(oidString, &oidString,
+                                          "oid", escontext);
         if (SOFT_ERROR_OCCURRED(escontext))
             PG_RETURN_NULL();
     }
@@ -339,7 +247,8 @@ oidparse(Node *node)
              * constants by the lexer.  Accept these if they are valid OID
              * strings.
              */
-            return oidin_subr(castNode(Float, node)->fval, NULL, NULL);
+            return uint32in_subr(castNode(Float, node)->fval, NULL,
+                                 "oid", NULL);
         default:
             elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
     }
diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c
index e4b4952a28..6c94aadb6f 100644
--- a/src/backend/utils/adt/xid.c
+++ b/src/backend/utils/adt/xid.c
@@ -31,8 +31,10 @@ Datum
 xidin(PG_FUNCTION_ARGS)
 {
     char       *str = PG_GETARG_CSTRING(0);
+    TransactionId result;

-    PG_RETURN_TRANSACTIONID((TransactionId) strtoul(str, NULL, 0));
+    result = uint32in_subr(str, NULL, "xid", fcinfo->context);
+    PG_RETURN_TRANSACTIONID(result);
 }

 Datum
@@ -183,8 +185,10 @@ Datum
 xid8in(PG_FUNCTION_ARGS)
 {
     char       *str = PG_GETARG_CSTRING(0);
+    uint64        result;

-    PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(strtou64(str, NULL, 0)));
+    result = uint64in_subr(str, NULL, "xid8", fcinfo->context);
+    PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(result));
 }

 Datum
@@ -321,8 +325,10 @@ Datum
 cidin(PG_FUNCTION_ARGS)
 {
     char       *str = PG_GETARG_CSTRING(0);
+    CommandId    result;

-    PG_RETURN_COMMANDID((CommandId) strtoul(str, NULL, 0));
+    result = uint32in_subr(str, NULL, "xid", fcinfo->context);
+    PG_RETURN_COMMANDID(result);
 }

 /*
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 15373ba68f..a4a20b5a45 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -51,6 +51,10 @@ extern int32 pg_strtoint32(const char *s);
 extern int32 pg_strtoint32_safe(const char *s, Node *escontext);
 extern int64 pg_strtoint64(const char *s);
 extern int64 pg_strtoint64_safe(const char *s, Node *escontext);
+extern uint32 uint32in_subr(const char *s, char **endloc,
+                            const char *typname, Node *escontext);
+extern uint64 uint64in_subr(const char *s, char **endloc,
+                            const char *typname, Node *escontext);
 extern int    pg_itoa(int16 i, char *a);
 extern int    pg_ultoa_n(uint32 value, char *a);
 extern int    pg_ulltoa_n(uint64 value, char *a);
diff --git a/src/test/regress/expected/xid.out b/src/test/regress/expected/xid.out
index c7b8d299c8..e62f701943 100644
--- a/src/test/regress/expected/xid.out
+++ b/src/test/regress/expected/xid.out
@@ -13,29 +13,58 @@ select '010'::xid,
    8 |  42 | 4294967295 | 4294967295 |    8 |   42 | 18446744073709551615 | 18446744073709551615
 (1 row)

--- garbage values are not yet rejected (perhaps they should be)
+-- garbage values
 select ''::xid;
- xid
------
-   0
+ERROR:  invalid input syntax for type xid: ""
+LINE 1: select ''::xid;
+               ^
+select 'asdf'::xid;
+ERROR:  invalid input syntax for type xid: "asdf"
+LINE 1: select 'asdf'::xid;
+               ^
+select ''::xid8;
+ERROR:  invalid input syntax for type xid8: ""
+LINE 1: select ''::xid8;
+               ^
+select 'asdf'::xid8;
+ERROR:  invalid input syntax for type xid8: "asdf"
+LINE 1: select 'asdf'::xid8;
+               ^
+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('42', 'xid');
+ pg_input_is_valid
+-------------------
+ t
 (1 row)

-select 'asdf'::xid;
- xid
------
-   0
+SELECT pg_input_is_valid('asdf', 'xid');
+ pg_input_is_valid
+-------------------
+ f
 (1 row)

-select ''::xid8;
- xid8
-------
-    0
+SELECT pg_input_error_message('0xffffffffff', 'xid');
+              pg_input_error_message
+---------------------------------------------------
+ value "0xffffffffff" is out of range for type xid
 (1 row)

-select 'asdf'::xid8;
- xid8
-------
-    0
+SELECT pg_input_is_valid('42', 'xid8');
+ pg_input_is_valid
+-------------------
+ t
+(1 row)
+
+SELECT pg_input_is_valid('asdf', 'xid8');
+ pg_input_is_valid
+-------------------
+ f
+(1 row)
+
+SELECT pg_input_error_message('0xffffffffffffffffffff', 'xid8');
+                    pg_input_error_message
+--------------------------------------------------------------
+ value "0xffffffffffffffffffff" is out of range for type xid8
 (1 row)

 -- equality
diff --git a/src/test/regress/sql/xid.sql b/src/test/regress/sql/xid.sql
index 2289803681..b6996588ef 100644
--- a/src/test/regress/sql/xid.sql
+++ b/src/test/regress/sql/xid.sql
@@ -10,12 +10,20 @@ select '010'::xid,
        '0xffffffffffffffff'::xid8,
        '-1'::xid8;

--- garbage values are not yet rejected (perhaps they should be)
+-- garbage values
 select ''::xid;
 select 'asdf'::xid;
 select ''::xid8;
 select 'asdf'::xid8;

+-- Also try it with non-error-throwing API
+SELECT pg_input_is_valid('42', 'xid');
+SELECT pg_input_is_valid('asdf', 'xid');
+SELECT pg_input_error_message('0xffffffffff', 'xid');
+SELECT pg_input_is_valid('42', 'xid8');
+SELECT pg_input_is_valid('asdf', 'xid8');
+SELECT pg_input_error_message('0xffffffffffffffffffff', 'xid8');
+
 -- equality
 select '1'::xid = '1'::xid;
 select '1'::xid != '1'::xid;

pgsql-hackers by date:

Previous
From: Ankit Kumar Pandey
Date:
Subject: Re: [PATCH] Improve ability to display optimizer analysis using OPTIMIZER_DEBUG
Next
From: Daniel Gustafsson
Date:
Subject: Re: pg_upgrade: Make testing different transfer modes easier