From 8295efb9994e28c8b0c9b0e4992c1ed3cf891791 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Fri, 5 Jul 2019 14:26:00 +0700 Subject: [PATCH v4 3/3] Use separate start conditions for both UESCAPE and the following character. This shrinks the transition array to 23696 elements and simplifies the uescape/uescapefail rules. --- src/backend/parser/scan.l | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index c0aa6cd22e..1837636273 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -175,6 +175,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); * quoted string with Unicode escapes * end of a quoted string or identifier with Unicode escapes, * UESCAPE can follow + * escape character for Unicode escapes * Unicode surrogate pair in extended quoted string * * Remember to add an <> case whenever you add a new exclusive state! @@ -192,6 +193,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner); %x xui %x xus %x xuend +%x xuchar %x xeu /* @@ -295,10 +297,14 @@ xdstop {dquote} xddouble {dquote}{dquote} xdinside [^"]+ -/* Unicode escapes */ -uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote} +/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */ +uescape [uU][eE][sS][cC][aA][pP][eE] /* error rule to avoid backup */ -uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU] +uescapefail [uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU] + +/* escape character */ +uescchar {quote}[^']{quote} +uesccharfail {quote}[^']|{other} /* Quoted identifier with Unicode escapes */ xuistart [uU]&{dquote} @@ -306,9 +312,8 @@ xuistart [uU]&{dquote} /* Quoted string with Unicode escapes */ xusstart [uU]&{quote} -/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */ -xustop1 {uescapefail}? -xustop2 {uescape} +/* End of string or identifier with Unicode escapes but no UESCAPE */ +xustop {uescapefail}? /* error rule to avoid backup */ xufailed [uU]& @@ -755,12 +760,12 @@ other . BEGIN(xuend); } -{whitespace} { - /* stay in xuend state over whitespace */ +{whitespace} { + /* stay in xuend/xuchar state over whitespace */ } <> | {other} | -{xustop1} { +{xustop} { /* no UESCAPE after the quote, throw back everything */ yyless(0); @@ -788,8 +793,11 @@ other . else yyerror("unhandled previous state in xuend"); } -{xustop2} { +{uescape} { /* found UESCAPE after the end quote */ + BEGIN(xuchar); + } +{uescchar} { if (!check_uescapechar(yytext[yyleng - 2])) { SET_YYLLOC(); @@ -820,8 +828,14 @@ other . return IDENT; } else - yyerror("unhandled previous state in xuend"); + yyerror("unhandled previous state in xuchar"); + } +<> | +{uesccharfail} { + SET_YYLLOC(); + yyerror("missing or invalid Unicode escape character"); } + {xddouble} { addlitchar('"', yyscanner); } -- 2.17.2 (Apple Git-113)