From 9a5dfd7172aaf588612fe820f26e3134270a6eec Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 5 Jul 2019 14:22:42 +0700 Subject: [PATCH v4 2/3] Unify xuiend and xusend into a single start condition Whether scanning a string or an identifier with unicode escapes, we enter a single state to look for a possible UESCAPE. This shrinks the transition array to 26074. --- src/backend/parser/scan.l | 127 +++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 64 deletions(-) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index cbf3f6deca..c0aa6cd22e 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -172,9 +172,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); * extended quoted strings (support backslash escape sequences) * $foo$ quoted strings * quoted identifier with Unicode escapes - * end of a quoted identifier with Unicode escapes, UESCAPE can follow * quoted string with Unicode escapes - * end of a quoted string with Unicode escapes, UESCAPE can follow + * end of a quoted string or identifier with Unicode escapes, + * UESCAPE can follow * Unicode surrogate pair in extended quoted string * * Remember to add an <> case whenever you add a new exclusive state! @@ -190,9 +190,8 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); %x xe %x xdolq %x xui -%x xuiend %x xus -%x xusend +%x xuend %x xeu /* @@ -591,39 +590,14 @@ other . yylval->str = litbufdup(yyscanner); return SCONST; case xus: - /* xusend state looks for possible UESCAPE */ - BEGIN(xusend); + /* xuend state looks for possible UESCAPE */ + BEGIN(xuend); break; default: yyerror("unhandled previous state after endquote"); } } -{whitespace} { - /* stay in xusend state over whitespace */ - } -<> | -{other} | -{xustop1} { - /* no UESCAPE after the quote, throw back everything */ - yyless(0); - BEGIN(INITIAL); - yylval->str = litbuf_udeescape('\\', yyscanner); - return SCONST; - } -{xustop2} { - /* found UESCAPE after the end quote */ - BEGIN(INITIAL); - if (!check_uescapechar(yytext[yyleng - 2])) - { - SET_YYLLOC(); - ADVANCE_YYLLOC(yyleng - 2); - yyerror("invalid Unicode escape character"); - } - yylval->str = litbuf_udeescape(yytext[yyleng - 2], - yyscanner); - return SCONST; - } {xqdouble} { addlitchar('\'', yyscanner); } @@ -776,52 +750,77 @@ other . return IDENT; } {dquote} { - yyless(1); - /* xuiend state looks for possible UESCAPE */ - BEGIN(xuiend); + /* xuend state looks for possible UESCAPE */ + yyextra->state_before_quote_stop = YYSTATE; + BEGIN(xuend); } -{whitespace} { - /* stay in xuiend state over whitespace */ + +{whitespace} { + /* stay in xuend state over whitespace */ } -<> | -{other} | -{xustop1} { +<> | +{other} | +{xustop1} { /* no UESCAPE after the quote, throw back everything */ - char *ident; - int identlen; - yyless(0); - BEGIN(INITIAL); - if (yyextra->literallen == 0) - yyerror("zero-length delimited identifier"); - ident = litbuf_udeescape('\\', yyscanner); - identlen = strlen(ident); - if (identlen >= NAMEDATALEN) - truncate_identifier(ident, identlen, true); - yylval->str = ident; - return IDENT; + if (yyextra->state_before_quote_stop == xus) + { + BEGIN(INITIAL); + yylval->str = litbuf_udeescape('\\', yyscanner); + return SCONST; + } + else if (yyextra->state_before_quote_stop == xui) + { + char *ident; + int identlen; + + BEGIN(INITIAL); + if (yyextra->literallen == 0) + yyerror("zero-length delimited identifier"); + ident = litbuf_udeescape('\\', yyscanner); + identlen = strlen(ident); + if (identlen >= NAMEDATALEN) + truncate_identifier(ident, identlen, true); + yylval->str = ident; + return IDENT; + } + else + yyerror("unhandled previous state in xuend"); } -{xustop2} { +{xustop2} { /* found UESCAPE after the end quote */ - char *ident; - int identlen; - - BEGIN(INITIAL); - if (yyextra->literallen == 0) - yyerror("zero-length delimited identifier"); if (!check_uescapechar(yytext[yyleng - 2])) { SET_YYLLOC(); ADVANCE_YYLLOC(yyleng - 2); yyerror("invalid Unicode escape character"); } - ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner); - identlen = strlen(ident); - if (identlen >= NAMEDATALEN) - truncate_identifier(ident, identlen, true); - yylval->str = ident; - return IDENT; + + if (yyextra->state_before_quote_stop == xus) + { + BEGIN(INITIAL); + yylval->str = litbuf_udeescape(yytext[yyleng - 2], + yyscanner); + return SCONST; + } + else if (yyextra->state_before_quote_stop == xui) + { + char *ident; + int identlen; + + BEGIN(INITIAL); + if (yyextra->literallen == 0) + yyerror("zero-length delimited identifier"); + ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner); + identlen = strlen(ident); + if (identlen >= NAMEDATALEN) + truncate_identifier(ident, identlen, true); + yylval->str = ident; + return IDENT; + } + else + yyerror("unhandled previous state in xuend"); } {xddouble} { addlitchar('"', yyscanner); -- 2.17.2 (Apple Git-113)