Re: bugfix: invalid bit/varbit input causes the log file to be unreadable - Mailing list pgsql-hackers

From Tom Lane
Subject Re: bugfix: invalid bit/varbit input causes the log file to be unreadable
Date
Msg-id 273506.1593364259@sss.pgh.pa.us
Whole thread Raw
In response to Re: bugfix: invalid bit/varbit input causes the log file to be unreadable  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: bugfix: invalid bit/varbit input causes the log file to beunreadable
List pgsql-hackers
I wrote:
> Even granting the premise, the proposed patch seems like a significant
> decrease in user-friendliness for typical cases.  I'd rather see us
> make an effort to print one valid-per-the-DB-encoding character.
> Now that we can rely on snprintf to count %s restrictions in bytes,
> I think something like this should work:
>                          errmsg("\"%.*s\" is not a valid binary digit",
>                                 pg_mblen(sp), sp)));
> But the real problem is that this is only the tip of the iceberg.
> You didn't even hit all the %c usages in varbit.c.

I went through all the %c format sequences in the backend to see which
ones could use this type of fix.  There were not as many as I'd expected,
but still a fair number.  (I skipped cases where the input was coming from
the catalogs, as well as some non-user-facing debug printouts.)  That
leads to the attached patch, which seems to do the job without breaking
anything that works today.

            regards, tom lane

PS: I failed to resist the temptation to improve some shoddy error
messages nearby in pageinspect/heapfuncs.c.

diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index 60bdbea46b..b3304ff844 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -80,7 +80,9 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
             }
             else if (*(state->ptr) == '=' && !ignoreeq)
             {
-                elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr -
state->begin));
+                elog(ERROR, "Syntax error near \"%.*s\" at position %d",
+                     pg_mblen(state->ptr), state->ptr,
+                     (int32) (state->ptr - state->begin));
             }
             else if (*(state->ptr) == '\\')
             {
@@ -219,7 +221,9 @@ parse_hstore(HSParser *state)
             }
             else if (!isspace((unsigned char) *(state->ptr)))
             {
-                elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr -
state->begin));
+                elog(ERROR, "Syntax error near \"%.*s\" at position %d",
+                     pg_mblen(state->ptr), state->ptr,
+                     (int32) (state->ptr - state->begin));
             }
         }
         else if (st == WGT)
@@ -234,7 +238,9 @@ parse_hstore(HSParser *state)
             }
             else
             {
-                elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr -
state->begin));
+                elog(ERROR, "Syntax error near \"%.*s\" at position %d",
+                     pg_mblen(state->ptr), state->ptr,
+                     (int32) (state->ptr - state->begin));
             }
         }
         else if (st == WVAL)
@@ -267,7 +273,9 @@ parse_hstore(HSParser *state)
             }
             else if (!isspace((unsigned char) *(state->ptr)))
             {
-                elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr -
state->begin));
+                elog(ERROR, "Syntax error near \"%.*s\" at position %d",
+                     pg_mblen(state->ptr), state->ptr,
+                     (int32) (state->ptr - state->begin));
             }
         }
         else
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 11a910184b..f04455da12 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -30,6 +30,7 @@
 #include "catalog/pg_am_d.h"
 #include "catalog/pg_type.h"
 #include "funcapi.h"
+#include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pageinspect.h"
 #include "port/pg_bitutils.h"
@@ -99,7 +100,8 @@ text_to_bits(char *str, int len)
         else
             ereport(ERROR,
                     (errcode(ERRCODE_DATA_CORRUPTED),
-                     errmsg("illegal character '%c' in t_bits string", str[off])));
+                     errmsg("invalid character \"%.*s\" in t_bits string",
+                            pg_mblen(str + off), str + off)));

         if (off % 8 == 7)
             bits[off / 8] = byte;
@@ -460,14 +462,13 @@ tuple_data_split(PG_FUNCTION_ARGS)
         if (!t_bits_str)
             ereport(ERROR,
                     (errcode(ERRCODE_DATA_CORRUPTED),
-                     errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
-                            bits_len)));
+                     errmsg("t_bits string must not be NULL")));

         bits_str_len = strlen(t_bits_str);
         if (bits_len != bits_str_len)
             ereport(ERROR,
                     (errcode(ERRCODE_DATA_CORRUPTED),
-                     errmsg("unexpected length of t_bits %u, expected %d",
+                     errmsg("unexpected length of t_bits string: %u, expected %u",
                             bits_str_len, bits_len)));

         /* do the conversion */
@@ -478,7 +479,7 @@ tuple_data_split(PG_FUNCTION_ARGS)
         if (t_bits_str)
             ereport(ERROR,
                     (errcode(ERRCODE_DATA_CORRUPTED),
-                     errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
+                     errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long",
                             strlen(t_bits_str))));
     }

diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 61d318d93c..a609d49c12 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -15,6 +15,7 @@

 #include <ctype.h>

+#include "mb/pg_wchar.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"

@@ -171,17 +172,19 @@ hex_encode(const char *src, size_t len, char *dst)
 }

 static inline char
-get_hex(char c)
+get_hex(const char *cp)
 {
+    unsigned char c = (unsigned char) *cp;
     int            res = -1;

-    if (c > 0 && c < 127)
-        res = hexlookup[(unsigned char) c];
+    if (c < 127)
+        res = hexlookup[c];

     if (res < 0)
         ereport(ERROR,
                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                 errmsg("invalid hexadecimal digit: \"%c\"", c)));
+                 errmsg("invalid hexadecimal digit: \"%.*s\"",
+                        pg_mblen(cp), cp)));

     return (char) res;
 }
@@ -205,13 +208,15 @@ hex_decode(const char *src, size_t len, char *dst)
             s++;
             continue;
         }
-        v1 = get_hex(*s++) << 4;
+        v1 = get_hex(s) << 4;
+        s++;
         if (s >= srcend)
             ereport(ERROR,
                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                      errmsg("invalid hexadecimal data: odd number of digits")));

-        v2 = get_hex(*s++);
+        v2 = get_hex(s);
+        s++;
         *p++ = v1 | v2;
     }

@@ -338,7 +343,8 @@ pg_base64_decode(const char *src, size_t len, char *dst)
             if (b < 0)
                 ereport(ERROR,
                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                         errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c)));
+                         errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
+                                pg_mblen(s - 1), s - 1)));
         }
         /* add it to buffer */
         buf = (buf << 6) + b;
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
index f87db8ccf6..a5b461c567 100644
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -526,8 +526,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
                 ereport(ERROR,
                         (errcode(ERRCODE_SYNTAX_ERROR),
                          errmsg("invalid input syntax for type %s", "jsonpath"),
-                         errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate",
-                                   flags->val[i])));
+                         errdetail("unrecognized flag character \"%.*s\" in LIKE_REGEX predicate",
+                                   pg_mblen(flags->val+i), flags->val+i)));
                 break;
         }
     }
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 06f808652a..c70c5eeeb3 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -423,8 +423,8 @@ parse_re_flags(pg_re_flags *flags, text *opts)
                 default:
                     ereport(ERROR,
                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                             errmsg("invalid regular expression option: \"%c\"",
-                                    opt_p[i])));
+                             errmsg("invalid regular expression option: \"%.*s\"",
+                                    pg_mblen(opt_p + i), opt_p + i)));
                     break;
             }
         }
diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
index f0c6a44b84..3c03459f51 100644
--- a/src/backend/utils/adt/varbit.c
+++ b/src/backend/utils/adt/varbit.c
@@ -230,8 +230,8 @@ bit_in(PG_FUNCTION_ARGS)
             else if (*sp != '0')
                 ereport(ERROR,
                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                         errmsg("\"%c\" is not a valid binary digit",
-                                *sp)));
+                         errmsg("\"%.*s\" is not a valid binary digit",
+                                pg_mblen(sp), sp)));

             x >>= 1;
             if (x == 0)
@@ -255,8 +255,8 @@ bit_in(PG_FUNCTION_ARGS)
             else
                 ereport(ERROR,
                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                         errmsg("\"%c\" is not a valid hexadecimal digit",
-                                *sp)));
+                         errmsg("\"%.*s\" is not a valid hexadecimal digit",
+                                pg_mblen(sp), sp)));

             if (bc)
             {
@@ -531,8 +531,8 @@ varbit_in(PG_FUNCTION_ARGS)
             else if (*sp != '0')
                 ereport(ERROR,
                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                         errmsg("\"%c\" is not a valid binary digit",
-                                *sp)));
+                         errmsg("\"%.*s\" is not a valid binary digit",
+                                pg_mblen(sp), sp)));

             x >>= 1;
             if (x == 0)
@@ -556,8 +556,8 @@ varbit_in(PG_FUNCTION_ARGS)
             else
                 ereport(ERROR,
                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                         errmsg("\"%c\" is not a valid hexadecimal digit",
-                                *sp)));
+                         errmsg("\"%.*s\" is not a valid hexadecimal digit",
+                                pg_mblen(sp), sp)));

             if (bc)
             {
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 2eaabd6231..df10bfb906 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -5586,8 +5586,8 @@ text_format(PG_FUNCTION_ARGS)
         if (strchr("sIL", *cp) == NULL)
             ereport(ERROR,
                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                     errmsg("unrecognized format() type specifier \"%c\"",
-                            *cp),
+                     errmsg("unrecognized format() type specifier \"%.*s\"",
+                            pg_mblen(cp), cp),
                      errhint("For a single \"%%\" use \"%%%%\".")));

         /* If indirect width was specified, get its value */
@@ -5707,8 +5707,8 @@ text_format(PG_FUNCTION_ARGS)
                 /* should not get here, because of previous check */
                 ereport(ERROR,
                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                         errmsg("unrecognized format() type specifier \"%c\"",
-                                *cp),
+                         errmsg("unrecognized format() type specifier \"%.*s\"",
+                                pg_mblen(cp), cp),
                          errhint("For a single \"%%\" use \"%%%%\".")));
                 break;
         }

pgsql-hackers by date:

Previous
From: Joe Conway
Date:
Subject: Re: pg_read_file() with virtual files returns empty string
Next
From: Tom Lane
Date:
Subject: Re: Fwd: PostgreSQL: WolfSSL support