From bba7744e59941d8bb2f039e631d090d0e3956d6c Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Mon, 8 Apr 2024 15:31:17 -0700 Subject: [PATCH] json_lex_string: don't overread on bad UTF8 Inputs to pg_parse_json[_incremental] are not guaranteed to be null-terminated, so pg_encoding_mblen_bounded (which uses strnlen) can walk off the end of the buffer. Check against the end pointer instead. TODO: - pg_encoding_mblen_bounded() now has no callers; should we remove it? - Do we really want to print incomplete UTF-8 sequences as-is once we know they're bad? --- src/common/jsonapi.c | 5 +++-- src/test/modules/test_json_parser/t/002_inline.pl | 4 ++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index fc0cb36974..6633503490 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -1689,8 +1689,9 @@ json_lex_string(JsonLexContext *lex) } while (0) #define FAIL_AT_CHAR_END(code) \ do { \ - lex->token_terminator = \ - s + pg_encoding_mblen_bounded(lex->input_encoding, s); \ + lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); \ + if (lex->token_terminator >= end) \ + lex->token_terminator = end; \ return code; \ } while (0) diff --git a/src/test/modules/test_json_parser/t/002_inline.pl b/src/test/modules/test_json_parser/t/002_inline.pl index f83cec03f8..0335a26f47 100644 --- a/src/test/modules/test_json_parser/t/002_inline.pl +++ b/src/test/modules/test_json_parser/t/002_inline.pl @@ -128,5 +128,9 @@ test( "incorrect escape count", '"\\\\\\\\\\\\\\"', error => qr/Token ""\\\\\\\\\\\\\\"" is invalid/); +test( + "incomplete UTF-8 sequence", + "\"\\\x{F5}", # three bytes: double-quote, backslash, + error => qr/(Token|Escape sequence) ""?\\\x{F5}" is invalid/); done_testing(); -- 2.34.1