Re: CASE control block broken by a single line comment - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Re: CASE control block broken by a single line comment |
Date | |
Msg-id | 3571674.1712616892@sss.pgh.pa.us Whole thread Raw |
In response to | Re: CASE control block broken by a single line comment (Erik Wienhold <ewie@ewie.name>) |
Responses |
Re: CASE control block broken by a single line comment
Re: CASE control block broken by a single line comment |
List | pgsql-hackers |
Erik Wienhold <ewie@ewie.name> writes: > On 2024-04-07 06:33 +0200, Tom Lane wrote: >> I suspect it'd be much more robust if we could remove the comment from >> the expr->query string. No idea how hard that is. > I slept on it and I think this can be fixed by tracking the end of the > last token before THEN and use that instead of yylloc in the call to > plpgsql_append_source_text. We already already track the token length > in plpgsql_yyleng but don't make it available outside pl_scanner.c yet. > Attached v2 tries to do that. But it breaks other test cases, probably > because the calculation of endlocation is off. I'm missing something > here. I poked at this and found that the failures occur when the patched code decides to trim an expression like "_r.v" to just "_r", naturally breaking the semantics completely. That happens because when plpgsql_yylex recognizes a compound token, it doesn't bother to adjust the token length to include the additional word(s). I vaguely remember having thought about that when writing the lookahead logic, and deciding that it wasn't worth the trouble -- but now it is. Up to now, the only thing we did with plpgsql_yyleng was to set the cutoff point for text reported by plpgsql_yyerror. Extending the token length changes reports like this: regression=# do $$ declare r record; r.x$$; ERROR: syntax error at or near "r" LINE 1: do $$ declare r record; r.x$$; ^ to this: regression=# do $$ declare r record; r.x$$; ERROR: syntax error at or near "r.x" LINE 1: do $$ declare r record; r.x$$; ^ which seems like strictly an improvement to me (the syntax error is premature EOF, which is after the "x"); but in any case it's minor enough to not be worth worrying about. Looking around, I noticed that we *have* had a similar case in the past, which 4adead1d2 noticed and worked around by suppressing the whitespace-trimming action in read_sql_construct. We could probably reach a near-one-line fix for the current problem by passing trim=false in the CASE calls, but TBH that discovery just reinforces my feeling that we need a cleaner fix. The attached v3 reverts the make-trim-optional hack that 4adead1d2 added, since we don't need or want the manual trimming anymore. With this in mind, I find the other manual whitespace trimming logic, in make_execsql_stmt(), quite scary; but it looks somewhat nontrivial to get rid of it. (The problem is that parsing of an INTO clause will leave us with a pushed-back token as next, and then we don't know where the end of the token before that is.) Since we don't currently do anything as crazy as combining execsql statements, I think the problem is only latent, but still... Anyway, the attached works for me. regards, tom lane diff --git a/src/pl/plpgsql/src/expected/plpgsql_control.out b/src/pl/plpgsql/src/expected/plpgsql_control.out index 328bd48586..ccd4f54704 100644 --- a/src/pl/plpgsql/src/expected/plpgsql_control.out +++ b/src/pl/plpgsql/src/expected/plpgsql_control.out @@ -681,3 +681,20 @@ select case_test(13); other (1 row) +-- test line comment between WHEN and THEN +create or replace function case_comment(int) returns text as $$ +begin + case $1 + when 1 -- comment before THEN + then return 'one'; + else + return 'other'; + end case; +end; +$$ language plpgsql immutable; +select case_comment(1); + case_comment +-------------- + one +(1 row) + diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index bef33d58a2..a29d2dfacd 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -66,7 +66,6 @@ static PLpgSQL_expr *read_sql_construct(int until, RawParseMode parsemode, bool isexpression, bool valid_sql, - bool trim, int *startloc, int *endtoken); static PLpgSQL_expr *read_sql_expression(int until, @@ -895,7 +894,7 @@ stmt_perform : K_PERFORM */ new->expr = read_sql_construct(';', 0, 0, ";", RAW_PARSE_DEFAULT, - false, false, true, + false, false, &startloc, NULL); /* overwrite "perform" ... */ memcpy(new->expr->query, " SELECT", 7); @@ -981,7 +980,7 @@ stmt_assign : T_DATUM plpgsql_push_back_token(T_DATUM); new->expr = read_sql_construct(';', 0, 0, ";", pmode, - false, true, true, + false, true, NULL, NULL); $$ = (PLpgSQL_stmt *) new; @@ -1474,7 +1473,6 @@ for_control : for_variable K_IN RAW_PARSE_DEFAULT, true, false, - true, &expr1loc, &tok); @@ -1879,7 +1877,7 @@ stmt_raise : K_RAISE expr = read_sql_construct(',', ';', K_USING, ", or ; or USING", RAW_PARSE_PLPGSQL_EXPR, - true, true, true, + true, true, NULL, &tok); new->params = lappend(new->params, expr); } @@ -2016,7 +2014,7 @@ stmt_dynexecute : K_EXECUTE expr = read_sql_construct(K_INTO, K_USING, ';', "INTO or USING or ;", RAW_PARSE_PLPGSQL_EXPR, - true, true, true, + true, true, NULL, &endtoken); new = palloc(sizeof(PLpgSQL_stmt_dynexecute)); @@ -2055,7 +2053,7 @@ stmt_dynexecute : K_EXECUTE expr = read_sql_construct(',', ';', K_INTO, ", or ; or INTO", RAW_PARSE_PLPGSQL_EXPR, - true, true, true, + true, true, NULL, &endtoken); new->params = lappend(new->params, expr); } while (endtoken == ','); @@ -2640,7 +2638,7 @@ read_sql_expression(int until, const char *expected) { return read_sql_construct(until, 0, 0, expected, RAW_PARSE_PLPGSQL_EXPR, - true, true, true, NULL, NULL); + true, true, NULL, NULL); } /* Convenience routine to read an expression with two possible terminators */ @@ -2650,7 +2648,7 @@ read_sql_expression2(int until, int until2, const char *expected, { return read_sql_construct(until, until2, 0, expected, RAW_PARSE_PLPGSQL_EXPR, - true, true, true, NULL, endtoken); + true, true, NULL, endtoken); } /* Convenience routine to read a SQL statement that must end with ';' */ @@ -2659,7 +2657,7 @@ read_sql_stmt(void) { return read_sql_construct(';', 0, 0, ";", RAW_PARSE_DEFAULT, - false, true, true, NULL, NULL); + false, true, NULL, NULL); } /* @@ -2672,7 +2670,6 @@ read_sql_stmt(void) * parsemode: raw_parser() mode to use * isexpression: whether to say we're reading an "expression" or a "statement" * valid_sql: whether to check the syntax of the expr - * trim: trim trailing whitespace * startloc: if not NULL, location of first token is stored at *startloc * endtoken: if not NULL, ending token is stored at *endtoken * (this is only interesting if until2 or until3 isn't zero) @@ -2685,7 +2682,6 @@ read_sql_construct(int until, RawParseMode parsemode, bool isexpression, bool valid_sql, - bool trim, int *startloc, int *endtoken) { @@ -2693,6 +2689,7 @@ read_sql_construct(int until, StringInfoData ds; IdentifierLookup save_IdentifierLookup; int startlocation = -1; + int endlocation = -1; int parenlevel = 0; PLpgSQL_expr *expr; @@ -2743,6 +2740,8 @@ read_sql_construct(int until, expected), parser_errposition(yylloc))); } + /* Remember end+1 location of last accepted token */ + endlocation = yylloc + plpgsql_token_length(); } plpgsql_IdentifierLookup = save_IdentifierLookup; @@ -2753,7 +2752,7 @@ read_sql_construct(int until, *endtoken = tok; /* give helpful complaint about empty input */ - if (startlocation >= yylloc) + if (startlocation >= endlocation) { if (isexpression) yyerror("missing expression"); @@ -2761,14 +2760,14 @@ read_sql_construct(int until, yyerror("missing SQL statement"); } - plpgsql_append_source_text(&ds, startlocation, yylloc); - - /* trim any trailing whitespace, for neatness */ - if (trim) - { - while (ds.len > 0 && scanner_isspace(ds.data[ds.len - 1])) - ds.data[--ds.len] = '\0'; - } + /* + * We save only the text from startlocation to endlocation-1. This + * suppresses the "until" token as well as any whitespace or comments + * following the last accepted token. (We used to strip such trailing + * whitespace by hand, but that causes problems if there's a "-- comment" + * in front of said whitespace.) + */ + plpgsql_append_source_text(&ds, startlocation, endlocation); expr = palloc0(sizeof(PLpgSQL_expr)); expr->query = pstrdup(ds.data); @@ -3933,16 +3932,12 @@ read_cursor_args(PLpgSQL_var *cursor, int until) * Read the value expression. To provide the user with meaningful * parse error positions, we check the syntax immediately, instead of * checking the final expression that may have the arguments - * reordered. Trailing whitespace must not be trimmed, because - * otherwise input of the form (param -- comment\n, param) would be - * translated into a form where the second parameter is commented - * out. + * reordered. */ item = read_sql_construct(',', ')', 0, ",\" or \")", RAW_PARSE_PLPGSQL_EXPR, true, true, - false, /* do not trim */ NULL, &endtoken); argv[argpos] = item->query; diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c index 101804d506..9407da51ef 100644 --- a/src/pl/plpgsql/src/pl_scanner.c +++ b/src/pl/plpgsql/src/pl_scanner.c @@ -184,6 +184,8 @@ plpgsql_yylex(void) tok1 = T_DATUM; else tok1 = T_CWORD; + /* Adjust token length to include A.B.C */ + aux1.leng = aux5.lloc - aux1.lloc + aux5.leng; } else { @@ -197,6 +199,8 @@ plpgsql_yylex(void) tok1 = T_DATUM; else tok1 = T_CWORD; + /* Adjust token length to include A.B */ + aux1.leng = aux3.lloc - aux1.lloc + aux3.leng; } } else @@ -210,6 +214,8 @@ plpgsql_yylex(void) tok1 = T_DATUM; else tok1 = T_CWORD; + /* Adjust token length to include A.B */ + aux1.leng = aux3.lloc - aux1.lloc + aux3.leng; } } else @@ -298,6 +304,17 @@ plpgsql_yylex(void) return tok1; } +/* + * Return the length of the token last returned by plpgsql_yylex(). + * + * In the case of compound tokens, the length includes all the parts. + */ +int +plpgsql_token_length(void) +{ + return plpgsql_yyleng; +} + /* * Internal yylex function. This wraps the core lexer and adds one feature: * a token pushback stack. We also make a couple of trivial single-token diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h index 40056bb851..50c3b28472 100644 --- a/src/pl/plpgsql/src/plpgsql.h +++ b/src/pl/plpgsql/src/plpgsql.h @@ -1317,6 +1317,7 @@ extern void plpgsql_dumptree(PLpgSQL_function *func); */ extern int plpgsql_base_yylex(void); extern int plpgsql_yylex(void); +extern int plpgsql_token_length(void); extern void plpgsql_push_back_token(int token); extern bool plpgsql_token_is_unreserved_keyword(int token); extern void plpgsql_append_source_text(StringInfo buf, diff --git a/src/pl/plpgsql/src/sql/plpgsql_control.sql b/src/pl/plpgsql/src/sql/plpgsql_control.sql index ed7231134f..8e007c51dc 100644 --- a/src/pl/plpgsql/src/sql/plpgsql_control.sql +++ b/src/pl/plpgsql/src/sql/plpgsql_control.sql @@ -486,3 +486,17 @@ select case_test(1); select case_test(2); select case_test(12); select case_test(13); + +-- test line comment between WHEN and THEN +create or replace function case_comment(int) returns text as $$ +begin + case $1 + when 1 -- comment before THEN + then return 'one'; + else + return 'other'; + end case; +end; +$$ language plpgsql immutable; + +select case_comment(1); diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out index 0637256676..074af8f33a 100644 --- a/src/test/regress/expected/plpgsql.out +++ b/src/test/regress/expected/plpgsql.out @@ -2390,11 +2390,9 @@ select namedparmcursor_test7(); ERROR: division by zero CONTEXT: SQL expression "42/0 AS p1, 77 AS p2" PL/pgSQL function namedparmcursor_test7() line 6 at OPEN --- check that line comments work correctly within the argument list (there --- is some special handling of this case in the code: the newline after the --- comment must be preserved when the argument-evaluating query is --- constructed, otherwise the comment effectively comments out the next --- argument, too) +-- check that line comments work correctly within the argument list +-- (this used to require a special hack in the code; it no longer does, +-- but let's keep the test anyway) create function namedparmcursor_test8() returns int4 as $$ declare c1 cursor (p1 int, p2 int) for diff --git a/src/test/regress/sql/plpgsql.sql b/src/test/regress/sql/plpgsql.sql index 9ca9449a50..18c91572ae 100644 --- a/src/test/regress/sql/plpgsql.sql +++ b/src/test/regress/sql/plpgsql.sql @@ -2047,11 +2047,9 @@ begin end $$ language plpgsql; select namedparmcursor_test7(); --- check that line comments work correctly within the argument list (there --- is some special handling of this case in the code: the newline after the --- comment must be preserved when the argument-evaluating query is --- constructed, otherwise the comment effectively comments out the next --- argument, too) +-- check that line comments work correctly within the argument list +-- (this used to require a special hack in the code; it no longer does, +-- but let's keep the test anyway) create function namedparmcursor_test8() returns int4 as $$ declare c1 cursor (p1 int, p2 int) for
pgsql-hackers by date: