From ef486287090daa24d51735ba9fa9585341b6e8ec Mon Sep 17 00:00:00 2001 From: John Naylor Date: Wed, 6 Jul 2022 15:35:33 +0700 Subject: [PATCH v4 3/4] Use lookahead path in json string lexing for the non-escape case too This removes some duplicated code and enables the no-escape path to be optimized in the same way. Per suggestion from Andres Freund --- src/common/jsonapi.c | 46 +++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index ad4858c623..81e176ad8d 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -686,15 +686,6 @@ json_lex_string(JsonLexContext *lex) lex->token_terminator = s; return JSON_INVALID_TOKEN; } - else if (*s == '"') - break; - else if ((unsigned char) *s < 32) - { - /* Per RFC4627, these characters MUST be escaped. */ - /* Since *s isn't printable, exclude it from the context string */ - lex->token_terminator = s; - return JSON_ESCAPING_REQUIRED; - } else if (*s == '\\') { /* OK, we have an escape character. */ @@ -849,22 +840,41 @@ json_lex_string(JsonLexContext *lex) return JSON_ESCAPING_INVALID; } } - else if (lex->strval != NULL) + else { - /* start lookahead at next byte */ - char *p = s + 1; + /* start lookahead at current byte */ + char *p = s; if (hi_surrogate != -1) return JSON_UNICODE_LOW_SURROGATE; while (p < end) { - if (*p == '\\' || *p == '"' || (unsigned char) *p < 32) + if (*p == '\\' || *p == '"') break; + else if ((unsigned char) *p < 32) + { + /* Per RFC4627, these characters MUST be escaped. */ + /* + * Since *s isn't printable, exclude it from the context + * string + */ + lex->token_terminator = p; + return JSON_ESCAPING_REQUIRED; + } p++; } - appendBinaryStringInfo(lex->strval, s, p - s); + if (lex->strval != NULL) + appendBinaryStringInfo(lex->strval, s, p - s); + + if (*p == '"') + { + /* Hooray, we found the end of the string! */ + lex->prev_token_terminator = lex->token_terminator; + lex->token_terminator = p + 1; + return JSON_SUCCESS; + } /* * s will be incremented at the top of the loop, so set it to just @@ -873,14 +883,6 @@ json_lex_string(JsonLexContext *lex) s = p - 1; } } - - if (hi_surrogate != -1) - return JSON_UNICODE_LOW_SURROGATE; - - /* Hooray, we found the end of the string! */ - lex->prev_token_terminator = lex->token_terminator; - lex->token_terminator = s + 1; - return JSON_SUCCESS; } /* -- 2.36.1