Sanding down some edge cases for PL/pgSQL reserved words - Mailing list pgsql-hackers
From | Tom Lane |
---|---|
Subject | Sanding down some edge cases for PL/pgSQL reserved words |
Date | |
Msg-id | 2185258.1745617445@sss.pgh.pa.us Whole thread Raw |
Responses |
Re: Sanding down some edge cases for PL/pgSQL reserved words
|
List | pgsql-hackers |
This is a rather delayed response to the discussion of bug #18693 [1], in which I wrote: > (It's kind of annoying that "strict" has to be double-quoted > in the RAISE NOTICE, especially since you get a rather misleading > error if it isn't. But that seems like a different discussion.) As an example of that, if you don't double-quote "strict" in this usage you get regression=# do $$ declare r record; begin SELECT a, b AS STRICT INTO r FROM (SELECT 'A' AS a, 'B' AS b) AS q; RAISE NOTICE 'STRICT r.strict = %', r.strict; end $$; ERROR: record "r" has no field "strict" LINE 1: r.strict ^ QUERY: r.strict CONTEXT: PL/pgSQL function inline_code_block line 3 at RAISE which is pretty bogus because the record *does* have a field named "strict". The actual problem is that STRICT is a fully reserved PL/pgSQL keyword, which means you need to double-quote it if you want to use it this way. The attached patches provide two independent responses to that: 1. AFAICS, there is no real reason for STRICT to be a reserved rather than unreserved PL/pgSQL keyword, and for that matter not EXECUTE either. Making them unreserved does allow some ambiguity, but I don't think there's any surprises in how that ambiguity would be resolved; and certainly we've preferred ambiguity over introducing new reserved keywords in PL/pgSQL before. I think these two just escaped that treatment by dint of being ancient. 2. That "has no field" error message is flat-out wrong. The now-known way to trigger it has a different cause, and what's more, we simply do not know at this point whether the malleable record type has such a field. So in 0002 below I just changed it to assume that the problem is a reserved field name. We might find another way to reach that failure in future, but I doubt that "has no field" would be the right thing to say in any case. This is v19 material at this point, so I'll stick it on the CF queue. regards, tom lane [1] https://www.postgresql.org/message-id/flat/18693-65968418890877b4%40postgresql.org From 2d19cbfbbd9aa4166cbeb46195d7a7b04e8fda52 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Fri, 25 Apr 2025 16:27:13 -0400 Subject: [PATCH v1 1/2] De-reserve keywords EXECUTE and STRICT in PL/pgSQL. On close inspection, there does not seem to be a strong reason why these should be fully-reserved keywords. I guess they just escaped consideration in previous attempts to minimize PL/pgSQL's list of reserved words. --- src/pl/plpgsql/src/pl_gram.y | 13 +++++++++---- src/pl/plpgsql/src/pl_reserved_kwlist.h | 2 -- src/pl/plpgsql/src/pl_scanner.c | 2 +- src/pl/plpgsql/src/pl_unreserved_kwlist.h | 2 ++ 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 5612e66d023..7b672ea5179 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -1368,7 +1368,8 @@ for_control : for_variable K_IN int tok = yylex(&yylval, &yylloc, yyscanner); int tokloc = yylloc; - if (tok == K_EXECUTE) + if (tok_is_keyword(tok, &yylval, + K_EXECUTE, "execute")) { /* EXECUTE means it's a dynamic FOR loop */ PLpgSQL_stmt_dynfors *new; @@ -2135,7 +2136,8 @@ stmt_open : K_OPEN cursor_variable yyerror(&yylloc, NULL, yyscanner, "syntax error, expected \"FOR\""); tok = yylex(&yylval, &yylloc, yyscanner); - if (tok == K_EXECUTE) + if (tok_is_keyword(tok, &yylval, + K_EXECUTE, "execute")) { int endtoken; @@ -2536,6 +2538,7 @@ unreserved_keyword : | K_ERRCODE | K_ERROR | K_EXCEPTION + | K_EXECUTE | K_EXIT | K_FETCH | K_FIRST @@ -2581,6 +2584,7 @@ unreserved_keyword : | K_SLICE | K_SQLSTATE | K_STACKED + | K_STRICT | K_TABLE | K_TABLE_NAME | K_TYPE @@ -3514,7 +3518,8 @@ make_return_query_stmt(int location, YYSTYPE *yylvalp, YYLTYPE *yyllocp, yyscan_ new->stmtid = ++plpgsql_curr_compile->nstatements; /* check for RETURN QUERY EXECUTE */ - if ((tok = yylex(yylvalp, yyllocp, yyscanner)) != K_EXECUTE) + tok = yylex(yylvalp, yyllocp, yyscanner); + if (!tok_is_keyword(tok, yylvalp, K_EXECUTE, "execute")) { /* ordinary static query */ plpgsql_push_back_token(tok, yylvalp, yyllocp, yyscanner); @@ -3597,7 +3602,7 @@ read_into_target(PLpgSQL_variable **target, bool *strict, YYSTYPE *yylvalp, YYLT *strict = false; tok = yylex(yylvalp, yyllocp, yyscanner); - if (strict && tok == K_STRICT) + if (strict && tok_is_keyword(tok, yylvalp, K_STRICT, "strict")) { *strict = true; tok = yylex(yylvalp, yyllocp, yyscanner); diff --git a/src/pl/plpgsql/src/pl_reserved_kwlist.h b/src/pl/plpgsql/src/pl_reserved_kwlist.h index ce7b0c9d331..f3ef2cbd8d7 100644 --- a/src/pl/plpgsql/src/pl_reserved_kwlist.h +++ b/src/pl/plpgsql/src/pl_reserved_kwlist.h @@ -33,7 +33,6 @@ PG_KEYWORD("case", K_CASE) PG_KEYWORD("declare", K_DECLARE) PG_KEYWORD("else", K_ELSE) PG_KEYWORD("end", K_END) -PG_KEYWORD("execute", K_EXECUTE) PG_KEYWORD("for", K_FOR) PG_KEYWORD("foreach", K_FOREACH) PG_KEYWORD("from", K_FROM) @@ -44,7 +43,6 @@ PG_KEYWORD("loop", K_LOOP) PG_KEYWORD("not", K_NOT) PG_KEYWORD("null", K_NULL) PG_KEYWORD("or", K_OR) -PG_KEYWORD("strict", K_STRICT) PG_KEYWORD("then", K_THEN) PG_KEYWORD("to", K_TO) PG_KEYWORD("using", K_USING) diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c index d08187dafcb..19825e5c718 100644 --- a/src/pl/plpgsql/src/pl_scanner.c +++ b/src/pl/plpgsql/src/pl_scanner.c @@ -53,7 +53,7 @@ IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL; * We try to avoid reserving more keywords than we have to; but there's * little point in not reserving a word if it's reserved in the core grammar. * Currently, the following words are reserved here but not in the core: - * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE + * BEGIN BY DECLARE FOREACH IF LOOP WHILE */ /* ScanKeywordList lookup data for PL/pgSQL keywords */ diff --git a/src/pl/plpgsql/src/pl_unreserved_kwlist.h b/src/pl/plpgsql/src/pl_unreserved_kwlist.h index 98f99ec470c..b48c5a645ff 100644 --- a/src/pl/plpgsql/src/pl_unreserved_kwlist.h +++ b/src/pl/plpgsql/src/pl_unreserved_kwlist.h @@ -58,6 +58,7 @@ PG_KEYWORD("elsif", K_ELSIF) PG_KEYWORD("errcode", K_ERRCODE) PG_KEYWORD("error", K_ERROR) PG_KEYWORD("exception", K_EXCEPTION) +PG_KEYWORD("execute", K_EXECUTE) PG_KEYWORD("exit", K_EXIT) PG_KEYWORD("fetch", K_FETCH) PG_KEYWORD("first", K_FIRST) @@ -103,6 +104,7 @@ PG_KEYWORD("scroll", K_SCROLL) PG_KEYWORD("slice", K_SLICE) PG_KEYWORD("sqlstate", K_SQLSTATE) PG_KEYWORD("stacked", K_STACKED) +PG_KEYWORD("strict", K_STRICT) PG_KEYWORD("table", K_TABLE) PG_KEYWORD("table_name", K_TABLE_NAME) PG_KEYWORD("type", K_TYPE) -- 2.43.5 From ed82817f7d72446474db18b4dac3516e57dba725 Mon Sep 17 00:00:00 2001 From: Tom Lane <tgl@sss.pgh.pa.us> Date: Fri, 25 Apr 2025 17:22:27 -0400 Subject: [PATCH v1 2/2] Improve error report for PL/pgSQL reserved word used as a field name. The current code in resolve_column_ref (dating to commits 01f7d2990 and fe24d7816) believes that not finding a RECFIELD datum is a can't-happen case, in consequence of which I didn't spend a whole lot of time considering what to do if it did happen. But it turns out that it *can* happen if the would-be field name is a fully-reserved PL/pgSQL keyword. Change the error message to describe that situation, and add a test case demonstrating it. This might need further refinement if anyone can find other ways to trigger a failure here; but without an example it's not clear what other error to throw. --- src/pl/plpgsql/src/expected/plpgsql_misc.out | 22 ++++++++++++++++++++ src/pl/plpgsql/src/pl_comp.c | 19 ++++++++++------- src/pl/plpgsql/src/sql/plpgsql_misc.sql | 16 ++++++++++++++ 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/pl/plpgsql/src/expected/plpgsql_misc.out b/src/pl/plpgsql/src/expected/plpgsql_misc.out index a6511df08ec..7c87029783a 100644 --- a/src/pl/plpgsql/src/expected/plpgsql_misc.out +++ b/src/pl/plpgsql/src/expected/plpgsql_misc.out @@ -65,3 +65,25 @@ do $$ declare x public.foo%rowtype; begin end $$; ERROR: relation "public.foo" does not exist CONTEXT: compilation of PL/pgSQL function "inline_code_block" near line 1 do $$ declare x public.misc_table%rowtype; begin end $$; +-- Test handling of a reserved word as a record field name +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r.foreach = %', r.foreach; +end $$; +NOTICE: r.x = 1 +ERROR: field name "foreach" is a reserved key word +LINE 1: r.foreach + ^ +HINT: Use double quotes to quote it. +QUERY: r.foreach +CONTEXT: PL/pgSQL function inline_code_block line 5 at RAISE +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r."foreach" = %', r."foreach"; +end $$; +NOTICE: r.x = 1 +NOTICE: r."foreach" = 2 diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 519f7695d7c..5589adb2474 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -1201,17 +1201,22 @@ resolve_column_ref(ParseState *pstate, PLpgSQL_expr *expr, } /* - * We should not get here, because a RECFIELD datum should - * have been built at parse time for every possible qualified - * reference to fields of this record. But if we do, handle - * it like field-not-found: throw error or return NULL. + * Ideally we'd never get here, because a RECFIELD datum + * should have been built at parse time for every qualified + * reference to a field of this record that appears in the + * source text. However, plpgsql_yylex will not build such a + * datum unless the field name lexes as token type IDENT. + * Hence, if the would-be field name is a PL/pgSQL reserved + * word, we lose. Assume that that's what happened and tell + * the user to quote it, unless the caller prefers we just + * return NULL. */ if (error_if_no_field) ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("record \"%s\" has no field \"%s\"", - (nnames_field == 1) ? name1 : name2, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("field name \"%s\" is a reserved key word", colname), + errhint("Use double quotes to quote it."), parser_errposition(pstate, cref->location))); } break; diff --git a/src/pl/plpgsql/src/sql/plpgsql_misc.sql b/src/pl/plpgsql/src/sql/plpgsql_misc.sql index d3a7f703a75..4c31b9d11fc 100644 --- a/src/pl/plpgsql/src/sql/plpgsql_misc.sql +++ b/src/pl/plpgsql/src/sql/plpgsql_misc.sql @@ -37,3 +37,19 @@ do $$ declare x foo.bar%rowtype; begin end $$; do $$ declare x foo.bar.baz%rowtype; begin end $$; do $$ declare x public.foo%rowtype; begin end $$; do $$ declare x public.misc_table%rowtype; begin end $$; + +-- Test handling of a reserved word as a record field name + +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r.foreach = %', r.foreach; +end $$; + +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r."foreach" = %', r."foreach"; +end $$; -- 2.43.5
pgsql-hackers by date: