Re: bugfix: invalid bit/varbit input causes the log file to be unreadable - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: bugfix: invalid bit/varbit input causes the log file to be unreadable |
Date | |
Msg-id | 273506.1593364259@sss.pgh.pa.us Whole thread Raw |
In response to | Re: bugfix: invalid bit/varbit input causes the log file to be unreadable (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: bugfix: invalid bit/varbit input causes the log file to beunreadable
|
List | pgsql-hackers |
I wrote: > Even granting the premise, the proposed patch seems like a significant > decrease in user-friendliness for typical cases. I'd rather see us > make an effort to print one valid-per-the-DB-encoding character. > Now that we can rely on snprintf to count %s restrictions in bytes, > I think something like this should work: > errmsg("\"%.*s\" is not a valid binary digit", > pg_mblen(sp), sp))); > But the real problem is that this is only the tip of the iceberg. > You didn't even hit all the %c usages in varbit.c. I went through all the %c format sequences in the backend to see which ones could use this type of fix. There were not as many as I'd expected, but still a fair number. (I skipped cases where the input was coming from the catalogs, as well as some non-user-facing debug printouts.) That leads to the attached patch, which seems to do the job without breaking anything that works today. regards, tom lane PS: I failed to resist the temptation to improve some shoddy error messages nearby in pageinspect/heapfuncs.c. diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index 60bdbea46b..b3304ff844 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -80,7 +80,9 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped) } else if (*(state->ptr) == '=' && !ignoreeq) { - elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin)); + elog(ERROR, "Syntax error near \"%.*s\" at position %d", + pg_mblen(state->ptr), state->ptr, + (int32) (state->ptr - state->begin)); } else if (*(state->ptr) == '\\') { @@ -219,7 +221,9 @@ parse_hstore(HSParser *state) } else if (!isspace((unsigned char) *(state->ptr))) { - elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin)); + elog(ERROR, "Syntax error near \"%.*s\" at position %d", + pg_mblen(state->ptr), state->ptr, + (int32) (state->ptr - state->begin)); } } else if (st == WGT) @@ -234,7 +238,9 @@ parse_hstore(HSParser *state) } else { - elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin)); + elog(ERROR, "Syntax error near \"%.*s\" at position %d", + pg_mblen(state->ptr), state->ptr, + (int32) (state->ptr - state->begin)); } } else if (st == WVAL) @@ -267,7 +273,9 @@ parse_hstore(HSParser *state) } else if (!isspace((unsigned char) *(state->ptr))) { - elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin)); + elog(ERROR, "Syntax error near \"%.*s\" at position %d", + pg_mblen(state->ptr), state->ptr, + (int32) (state->ptr - state->begin)); } } else diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index 11a910184b..f04455da12 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -30,6 +30,7 @@ #include "catalog/pg_am_d.h" #include "catalog/pg_type.h" #include "funcapi.h" +#include "mb/pg_wchar.h" #include "miscadmin.h" #include "pageinspect.h" #include "port/pg_bitutils.h" @@ -99,7 +100,8 @@ text_to_bits(char *str, int len) else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("illegal character '%c' in t_bits string", str[off]))); + errmsg("invalid character \"%.*s\" in t_bits string", + pg_mblen(str + off), str + off))); if (off % 8 == 7) bits[off / 8] = byte; @@ -460,14 +462,13 @@ tuple_data_split(PG_FUNCTION_ARGS) if (!t_bits_str) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("argument of t_bits is null, but it is expected to be null and %d character long", - bits_len))); + errmsg("t_bits string must not be NULL"))); bits_str_len = strlen(t_bits_str); if (bits_len != bits_str_len) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("unexpected length of t_bits %u, expected %d", + errmsg("unexpected length of t_bits string: %u, expected %u", bits_str_len, bits_len))); /* do the conversion */ @@ -478,7 +479,7 @@ tuple_data_split(PG_FUNCTION_ARGS) if (t_bits_str) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length", + errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long", strlen(t_bits_str)))); } diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 61d318d93c..a609d49c12 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -15,6 +15,7 @@ #include <ctype.h> +#include "mb/pg_wchar.h" #include "utils/builtins.h" #include "utils/memutils.h" @@ -171,17 +172,19 @@ hex_encode(const char *src, size_t len, char *dst) } static inline char -get_hex(char c) +get_hex(const char *cp) { + unsigned char c = (unsigned char) *cp; int res = -1; - if (c > 0 && c < 127) - res = hexlookup[(unsigned char) c]; + if (c < 127) + res = hexlookup[c]; if (res < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid hexadecimal digit: \"%c\"", c))); + errmsg("invalid hexadecimal digit: \"%.*s\"", + pg_mblen(cp), cp))); return (char) res; } @@ -205,13 +208,15 @@ hex_decode(const char *src, size_t len, char *dst) s++; continue; } - v1 = get_hex(*s++) << 4; + v1 = get_hex(s) << 4; + s++; if (s >= srcend) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal data: odd number of digits"))); - v2 = get_hex(*s++); + v2 = get_hex(s); + s++; *p++ = v1 | v2; } @@ -338,7 +343,8 @@ pg_base64_decode(const char *src, size_t len, char *dst) if (b < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c))); + errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence", + pg_mblen(s - 1), s - 1))); } /* add it to buffer */ buf = (buf << 6) + b; diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index f87db8ccf6..a5b461c567 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -526,8 +526,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("invalid input syntax for type %s", "jsonpath"), - errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate", - flags->val[i]))); + errdetail("unrecognized flag character \"%.*s\" in LIKE_REGEX predicate", + pg_mblen(flags->val+i), flags->val+i))); break; } } diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 06f808652a..c70c5eeeb3 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -423,8 +423,8 @@ parse_re_flags(pg_re_flags *flags, text *opts) default: ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid regular expression option: \"%c\"", - opt_p[i]))); + errmsg("invalid regular expression option: \"%.*s\"", + pg_mblen(opt_p + i), opt_p + i))); break; } } diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c index f0c6a44b84..3c03459f51 100644 --- a/src/backend/utils/adt/varbit.c +++ b/src/backend/utils/adt/varbit.c @@ -230,8 +230,8 @@ bit_in(PG_FUNCTION_ARGS) else if (*sp != '0') ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("\"%c\" is not a valid binary digit", - *sp))); + errmsg("\"%.*s\" is not a valid binary digit", + pg_mblen(sp), sp))); x >>= 1; if (x == 0) @@ -255,8 +255,8 @@ bit_in(PG_FUNCTION_ARGS) else ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("\"%c\" is not a valid hexadecimal digit", - *sp))); + errmsg("\"%.*s\" is not a valid hexadecimal digit", + pg_mblen(sp), sp))); if (bc) { @@ -531,8 +531,8 @@ varbit_in(PG_FUNCTION_ARGS) else if (*sp != '0') ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("\"%c\" is not a valid binary digit", - *sp))); + errmsg("\"%.*s\" is not a valid binary digit", + pg_mblen(sp), sp))); x >>= 1; if (x == 0) @@ -556,8 +556,8 @@ varbit_in(PG_FUNCTION_ARGS) else ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("\"%c\" is not a valid hexadecimal digit", - *sp))); + errmsg("\"%.*s\" is not a valid hexadecimal digit", + pg_mblen(sp), sp))); if (bc) { diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 2eaabd6231..df10bfb906 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -5586,8 +5586,8 @@ text_format(PG_FUNCTION_ARGS) if (strchr("sIL", *cp) == NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognized format() type specifier \"%c\"", - *cp), + errmsg("unrecognized format() type specifier \"%.*s\"", + pg_mblen(cp), cp), errhint("For a single \"%%\" use \"%%%%\"."))); /* If indirect width was specified, get its value */ @@ -5707,8 +5707,8 @@ text_format(PG_FUNCTION_ARGS) /* should not get here, because of previous check */ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("unrecognized format() type specifier \"%c\"", - *cp), + errmsg("unrecognized format() type specifier \"%.*s\"", + pg_mblen(cp), cp), errhint("For a single \"%%\" use \"%%%%\"."))); break; }
pgsql-hackers by date: