Re: Parsing error with begin atomic syntax used in a do block - Mailing list pgsql-bugs

From Tom Lane
Subject Re: Parsing error with begin atomic syntax used in a do block
Date
Msg-id 2176911.1705545007@sss.pgh.pa.us
Whole thread Raw
In response to Re: Parsing error with begin atomic syntax used in a do block  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-bugs
I wrote:
> The problem here is that "begin" is a reserved word in plpgsql
> (and for that matter, so is "end").

Oh!  I'm mistaken, that is not the issue.  The problem is that
plpgsql assumes that the SQL command ends at the first semicolon.

psql has pretty much this same issue of "does this semicolon end
the command", and it has some heuristics for dealing with that.
Since that's survived awhile now without complaints, we could do
worse than to propagate the identical heuristics into plpgsql,
more or less as attached.

(This also fixes the same problem for multi-command CREATE RULE,
which has been broken in plpgsql since the late bronze age;
but there were not enough people complaining to prompt a fix.)

            regards, tom lane

diff --git a/src/pl/plpgsql/src/Makefile b/src/pl/plpgsql/src/Makefile
index dfb815212f..63cb96fae3 100644
--- a/src/pl/plpgsql/src/Makefile
+++ b/src/pl/plpgsql/src/Makefile
@@ -32,8 +32,9 @@ DATA = plpgsql.control plpgsql--1.0.sql

 REGRESS_OPTS = --dbname=$(PL_TESTDB)

-REGRESS = plpgsql_array plpgsql_call plpgsql_control plpgsql_copy plpgsql_domain \
-    plpgsql_record plpgsql_cache plpgsql_simple plpgsql_transaction \
+REGRESS = plpgsql_array plpgsql_cache plpgsql_call plpgsql_control \
+    plpgsql_copy plpgsql_domain plpgsql_misc \
+    plpgsql_record plpgsql_simple plpgsql_transaction \
     plpgsql_trap plpgsql_trigger plpgsql_varprops

 # where to find gen_keywordlist.pl and subsidiary files
diff --git a/src/pl/plpgsql/src/expected/plpgsql_misc.out b/src/pl/plpgsql/src/expected/plpgsql_misc.out
new file mode 100644
index 0000000000..faddba2511
--- /dev/null
+++ b/src/pl/plpgsql/src/expected/plpgsql_misc.out
@@ -0,0 +1,31 @@
+--
+-- Miscellaneous topics
+--
+-- Verify that we can parse new-style CREATE FUNCTION/PROCEDURE
+do
+$$
+  declare procedure int;  -- check we still recognize non-keywords as vars
+  begin
+  create function test1() returns int
+    begin atomic
+      select 2 + 2;
+    end;
+  create or replace procedure test2(x int)
+    begin atomic
+      select x + 2;
+    end;
+  end
+$$;
+\sf test1
+CREATE OR REPLACE FUNCTION public.test1()
+ RETURNS integer
+ LANGUAGE sql
+BEGIN ATOMIC
+ SELECT (2 + 2);
+END
+\sf test2
+CREATE OR REPLACE PROCEDURE public.test2(IN x integer)
+ LANGUAGE sql
+BEGIN ATOMIC
+ SELECT (x + 2);
+END
diff --git a/src/pl/plpgsql/src/meson.build b/src/pl/plpgsql/src/meson.build
index be213c51ee..3dd734b776 100644
--- a/src/pl/plpgsql/src/meson.build
+++ b/src/pl/plpgsql/src/meson.build
@@ -76,12 +76,13 @@ tests += {
   'regress': {
     'sql': [
       'plpgsql_array',
+      'plpgsql_cache',
       'plpgsql_call',
       'plpgsql_control',
       'plpgsql_copy',
       'plpgsql_domain',
+      'plpgsql_misc',
       'plpgsql_record',
-      'plpgsql_cache',
       'plpgsql_simple',
       'plpgsql_transaction',
       'plpgsql_trap',
diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y
index 86680d26ce..f4ccb22e35 100644
--- a/src/pl/plpgsql/src/pl_gram.y
+++ b/src/pl/plpgsql/src/pl_gram.y
@@ -76,7 +76,8 @@ static    PLpgSQL_expr    *read_sql_expression2(int until, int until2,
                                               int *endtoken);
 static    PLpgSQL_expr    *read_sql_stmt(void);
 static    PLpgSQL_type    *read_datatype(int tok);
-static    PLpgSQL_stmt    *make_execsql_stmt(int firsttoken, int location);
+static    PLpgSQL_stmt    *make_execsql_stmt(int firsttoken, int location,
+                                           PLword *word);
 static    PLpgSQL_stmt_fetch *read_fetch_direction(void);
 static    void             complete_direction(PLpgSQL_stmt_fetch *fetch,
                                             bool *check_FROM);
@@ -1972,15 +1973,15 @@ loop_body        : proc_sect K_END K_LOOP opt_label ';'
  */
 stmt_execsql    : K_IMPORT
                     {
-                        $$ = make_execsql_stmt(K_IMPORT, @1);
+                        $$ = make_execsql_stmt(K_IMPORT, @1, NULL);
                     }
                 | K_INSERT
                     {
-                        $$ = make_execsql_stmt(K_INSERT, @1);
+                        $$ = make_execsql_stmt(K_INSERT, @1, NULL);
                     }
                 | K_MERGE
                     {
-                        $$ = make_execsql_stmt(K_MERGE, @1);
+                        $$ = make_execsql_stmt(K_MERGE, @1, NULL);
                     }
                 | T_WORD
                     {
@@ -1991,7 +1992,7 @@ stmt_execsql    : K_IMPORT
                         if (tok == '=' || tok == COLON_EQUALS ||
                             tok == '[' || tok == '.')
                             word_is_not_variable(&($1), @1);
-                        $$ = make_execsql_stmt(T_WORD, @1);
+                        $$ = make_execsql_stmt(T_WORD, @1, &($1));
                     }
                 | T_CWORD
                     {
@@ -2002,7 +2003,7 @@ stmt_execsql    : K_IMPORT
                         if (tok == '=' || tok == COLON_EQUALS ||
                             tok == '[' || tok == '.')
                             cword_is_not_variable(&($1), @1);
-                        $$ = make_execsql_stmt(T_CWORD, @1);
+                        $$ = make_execsql_stmt(T_CWORD, @1, NULL);
                     }
                 ;

@@ -2947,8 +2948,13 @@ read_datatype(int tok)
     return result;
 }

+/*
+ * Read a generic SQL statement.  We have already read its first token;
+ * firsttoken is that token's code and location its starting location.
+ * If firsttoken == T_WORD, pass its yylval value as "word", else pass NULL.
+ */
 static PLpgSQL_stmt *
-make_execsql_stmt(int firsttoken, int location)
+make_execsql_stmt(int firsttoken, int location, PLword *word)
 {
     StringInfoData ds;
     IdentifierLookup save_IdentifierLookup;
@@ -2961,9 +2967,16 @@ make_execsql_stmt(int firsttoken, int location)
     bool        have_strict = false;
     int            into_start_loc = -1;
     int            into_end_loc = -1;
+    int            paren_depth = 0;
+    int            begin_depth = 0;
+    bool        in_routine_definition = false;
+    int            token_count = 0;
+    char        tokens[4];        /* records the first few tokens */

     initStringInfo(&ds);

+    memset(tokens, 0, sizeof(tokens));
+
     /* special lookup mode for identifiers within the SQL text */
     save_IdentifierLookup = plpgsql_IdentifierLookup;
     plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_EXPR;
@@ -2972,6 +2985,12 @@ make_execsql_stmt(int firsttoken, int location)
      * Scan to the end of the SQL command.  Identify any INTO-variables
      * clause lurking within it, and parse that via read_into_target().
      *
+     * The end of the statement is defined by a semicolon ... except that
+     * semicolons within parentheses or BEGIN/END blocks don't terminate a
+     * statement.  We follow psql's lead in not recognizing BEGIN/END except
+     * after CREATE [OR REPLACE] {FUNCTION|PROCEDURE}.  END can also appear
+     * within a CASE construct, so we treat CASE/END like BEGIN/END.
+     *
      * Because INTO is sometimes used in the main SQL grammar, we have to be
      * careful not to take any such usage of INTO as a PL/pgSQL INTO clause.
      * There are currently three such cases:
@@ -2997,13 +3016,50 @@ make_execsql_stmt(int firsttoken, int location)
      * break this logic again ... beware!
      */
     tok = firsttoken;
+    if (tok == T_WORD && strcmp(word->ident, "create") == 0)
+        tokens[token_count] = 'c';
+    token_count++;
+
     for (;;)
     {
         prev_tok = tok;
         tok = yylex();
         if (have_into && into_end_loc < 0)
             into_end_loc = yylloc;        /* token after the INTO part */
-        if (tok == ';')
+        /* Detect CREATE [OR REPLACE] {FUNCTION|PROCEDURE} */
+        if (tokens[0] == 'c' && token_count < sizeof(tokens))
+        {
+            if (tok == K_OR)
+                tokens[token_count] = 'o';
+            else if (tok == T_WORD &&
+                     strcmp(yylval.word.ident, "replace") == 0)
+                tokens[token_count] = 'r';
+            else if (tok == T_WORD &&
+                     strcmp(yylval.word.ident, "function") == 0)
+                tokens[token_count] = 'f';
+            else if (tok == T_WORD &&
+                     strcmp(yylval.word.ident, "procedure") == 0)
+                tokens[token_count] = 'f';    /* treat same as "function" */
+            if (tokens[1] == 'f' ||
+                (tokens[1] == 'o' && tokens[2] == 'r' && tokens[3] == 'f'))
+                in_routine_definition = true;
+            token_count++;
+        }
+        /* Track paren nesting (needed for CREATE RULE syntax) */
+        if (tok == '(')
+            paren_depth++;
+        else if (tok == ')' && paren_depth > 0)
+            paren_depth--;
+        /* We need track BEGIN/END nesting only in a routine definition */
+        if (in_routine_definition)
+        {
+            if (tok == K_BEGIN || tok == K_CASE)
+                begin_depth++;
+            else if (tok == K_END && begin_depth > 0)
+                begin_depth--;
+        }
+        /* Command-ending semicolon? */
+        if (tok == ';' && paren_depth == 0 && begin_depth == 0)
             break;
         if (tok == 0)
             yyerror("unexpected end of function definition");
diff --git a/src/pl/plpgsql/src/sql/plpgsql_misc.sql b/src/pl/plpgsql/src/sql/plpgsql_misc.sql
new file mode 100644
index 0000000000..71a8fc232e
--- /dev/null
+++ b/src/pl/plpgsql/src/sql/plpgsql_misc.sql
@@ -0,0 +1,22 @@
+--
+-- Miscellaneous topics
+--
+
+-- Verify that we can parse new-style CREATE FUNCTION/PROCEDURE
+do
+$$
+  declare procedure int;  -- check we still recognize non-keywords as vars
+  begin
+  create function test1() returns int
+    begin atomic
+      select 2 + 2;
+    end;
+  create or replace procedure test2(x int)
+    begin atomic
+      select x + 2;
+    end;
+  end
+$$;
+
+\sf test1
+\sf test2

pgsql-bugs by date:

Previous
From: Jeff Davis
Date:
Subject: Re: [16+] subscription can end up in inconsistent state
Next
From: vignesh C
Date:
Subject: Re: [16+] subscription can end up in inconsistent state