Re: BUG #15555: Syntax errors when using the COMMENT command in plpgsql and a "comment" variable - Mailing list pgsql-bugs

From Tom Lane
Subject Re: BUG #15555: Syntax errors when using the COMMENT command in plpgsql and a "comment" variable
Date
Msg-id 23647.1545084358@sss.pgh.pa.us
Whole thread Raw
In response to Re: BUG #15555: Syntax errors when using the COMMENT command in plpgsql and a "comment" variable  (Tom Lane <tgl@sss.pgh.pa.us>)
Responses Re: BUG #15555: Syntax errors when using the COMMENT command inplpgsql and a "comment" variable
List pgsql-bugs
I wrote:
>> I wonder whether we could improve matters by adjusting the heuristic for
>> such things in pl_scanner.c:
>> 
>>         * If we are at start of statement, prefer unreserved keywords
>>         * over variable names, unless the next token is assignment or
>>         * '[', in which case prefer variable names.  (Note we need not
>>         * consider '.' as the next token; that case was handled above,
>>         * and we always prefer variable names in that case.)  If we are
>>         * not at start of statement, always prefer variable names over
>>         * unreserved keywords.

[ pokes at that for a bit ]  The logic here is a bit denser than
one could wish, but here's a draft patch that seems to get the
job done.  It passes check-world, which isn't conclusive but
at least suggests that this doesn't break anything.

I'll add this to the next CF in hopes that somebody will try to
break it.

            regards, tom lane

diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c
index 8bacc74..03e7c6c 100644
*** a/src/pl/plpgsql/src/pl_comp.c
--- b/src/pl/plpgsql/src/pl_comp.c
*************** make_datum_param(PLpgSQL_expr *expr, int
*** 1353,1358 ****
--- 1353,1361 ----
   * yytxt is the original token text; we need this to check for quoting,
   * so that later checks for unreserved keywords work properly.
   *
+  * We attempt to recognize the token as a variable only if lookup is true
+  * and the plpgsql_IdentifierLookup context permits it.
+  *
   * If recognized as a variable, fill in *wdatum and return true;
   * if not recognized, fill in *word and return false.
   * (Note: those two pointers actually point to members of the same union,
*************** make_datum_param(PLpgSQL_expr *expr, int
*** 1360,1376 ****
   * ----------
   */
  bool
! plpgsql_parse_word(char *word1, const char *yytxt,
                     PLwdatum *wdatum, PLword *word)
  {
      PLpgSQL_nsitem *ns;

      /*
!      * We should do nothing in DECLARE sections.  In SQL expressions, there's
!      * no need to do anything either --- lookup will happen when the
!      * expression is compiled.
       */
!     if (plpgsql_IdentifierLookup == IDENTIFIER_LOOKUP_NORMAL)
      {
          /*
           * Do a lookup in the current namespace stack
--- 1363,1379 ----
   * ----------
   */
  bool
! plpgsql_parse_word(char *word1, const char *yytxt, bool lookup,
                     PLwdatum *wdatum, PLword *word)
  {
      PLpgSQL_nsitem *ns;

      /*
!      * We should not lookup variables in DECLARE sections.  In SQL
!      * expressions, there's no need to do so either --- lookup will happen
!      * when the expression is compiled.
       */
!     if (lookup && plpgsql_IdentifierLookup == IDENTIFIER_LOOKUP_NORMAL)
      {
          /*
           * Do a lookup in the current namespace stack
diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c
index ab18946..3e746d1 100644
*** a/src/pl/plpgsql/src/pl_scanner.c
--- b/src/pl/plpgsql/src/pl_scanner.c
*************** plpgsql_yylex(void)
*** 328,333 ****
--- 328,334 ----
                  push_back_token(tok2, &aux2);
                  if (plpgsql_parse_word(aux1.lval.str,
                                         core_yy.scanbuf + aux1.lloc,
+                                        true,
                                         &aux1.lval.wdatum,
                                         &aux1.lval.word))
                      tok1 = T_DATUM;
*************** plpgsql_yylex(void)
*** 349,401 ****
              push_back_token(tok2, &aux2);

              /*
!              * If we are at start of statement, prefer unreserved keywords
!              * over variable names, unless the next token is assignment or
!              * '[', in which case prefer variable names.  (Note we need not
!              * consider '.' as the next token; that case was handled above,
!              * and we always prefer variable names in that case.)  If we are
!              * not at start of statement, always prefer variable names over
!              * unreserved keywords.
               */
!             if (AT_STMT_START(plpgsql_yytoken) &&
!                 !(tok2 == '=' || tok2 == COLON_EQUALS || tok2 == '['))
              {
!                 /* try for unreserved keyword, then for variable name */
!                 if (core_yy.scanbuf[aux1.lloc] != '"' &&
!                     (kw = ScanKeywordLookup(aux1.lval.str,
!                                             unreserved_keywords,
!                                             num_unreserved_keywords)))
!                 {
!                     aux1.lval.keyword = kw->name;
!                     tok1 = kw->value;
!                 }
!                 else if (plpgsql_parse_word(aux1.lval.str,
!                                             core_yy.scanbuf + aux1.lloc,
!                                             &aux1.lval.wdatum,
!                                             &aux1.lval.word))
!                     tok1 = T_DATUM;
!                 else
!                     tok1 = T_WORD;
              }
              else
!             {
!                 /* try for variable name, then for unreserved keyword */
!                 if (plpgsql_parse_word(aux1.lval.str,
!                                        core_yy.scanbuf + aux1.lloc,
!                                        &aux1.lval.wdatum,
!                                        &aux1.lval.word))
!                     tok1 = T_DATUM;
!                 else if (!aux1.lval.word.quoted &&
!                          (kw = ScanKeywordLookup(aux1.lval.word.ident,
!                                                  unreserved_keywords,
!                                                  num_unreserved_keywords)))
!                 {
!                     aux1.lval.keyword = kw->name;
!                     tok1 = kw->value;
!                 }
!                 else
!                     tok1 = T_WORD;
!             }
          }
      }
      else
--- 350,389 ----
              push_back_token(tok2, &aux2);

              /*
!              * See if it matches a variable name, except in the context where
!              * we are at start of statement and the next token isn't
!              * assignment or '['.  In that case, it couldn't validly be a
!              * variable name, and skipping the lookup allows variable names to
!              * be used that would conflict with plpgsql or core keywords that
!              * introduce statements (e.g., "comment").  Without this special
!              * logic, every statement-introducing keyword would effectively be
!              * reserved in PL/pgSQL, which would be unpleasant.
!              *
!              * If it isn't a variable name, try to match against unreserved
!              * plpgsql keywords.  If not one of those either, it's T_WORD.
!              *
!              * Note: we must call plpgsql_parse_word even if we don't want to
!              * do variable lookup, because it sets up aux1.lval.word for the
!              * non-variable cases.
               */
!             if (plpgsql_parse_word(aux1.lval.str,
!                                    core_yy.scanbuf + aux1.lloc,
!                                    (!AT_STMT_START(plpgsql_yytoken) ||
!                                     (tok2 == '=' || tok2 == COLON_EQUALS ||
!                                      tok2 == '[')),
!                                    &aux1.lval.wdatum,
!                                    &aux1.lval.word))
!                 tok1 = T_DATUM;
!             else if (!aux1.lval.word.quoted &&
!                      (kw = ScanKeywordLookup(aux1.lval.word.ident,
!                                              unreserved_keywords,
!                                              num_unreserved_keywords)))
              {
!                 aux1.lval.keyword = kw->name;
!                 tok1 = kw->value;
              }
              else
!                 tok1 = T_WORD;
          }
      }
      else
diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h
index 42177cc..962b9c2 100644
*** a/src/pl/plpgsql/src/plpgsql.h
--- b/src/pl/plpgsql/src/plpgsql.h
*************** extern PLpgSQL_function *plpgsql_compile
*** 1175,1181 ****
  extern PLpgSQL_function *plpgsql_compile_inline(char *proc_source);
  extern void plpgsql_parser_setup(struct ParseState *pstate,
                       PLpgSQL_expr *expr);
! extern bool plpgsql_parse_word(char *word1, const char *yytxt,
                     PLwdatum *wdatum, PLword *word);
  extern bool plpgsql_parse_dblword(char *word1, char *word2,
                        PLwdatum *wdatum, PLcword *cword);
--- 1175,1181 ----
  extern PLpgSQL_function *plpgsql_compile_inline(char *proc_source);
  extern void plpgsql_parser_setup(struct ParseState *pstate,
                       PLpgSQL_expr *expr);
! extern bool plpgsql_parse_word(char *word1, const char *yytxt, bool lookup,
                     PLwdatum *wdatum, PLword *word);
  extern bool plpgsql_parse_dblword(char *word1, char *word2,
                        PLwdatum *wdatum, PLcword *cword);
diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out
index b9a987d..d2f68e1 100644
*** a/src/test/regress/expected/plpgsql.out
--- b/src/test/regress/expected/plpgsql.out
*************** select unreserved_test();
*** 4781,4786 ****
--- 4781,4807 ----
                43
  (1 row)

+ create or replace function unreserved_test() returns int as $$
+ declare
+   comment int := 21;
+ begin
+   comment := comment * 2;
+   comment on function unreserved_test() is 'this is a test';
+   return comment;
+ end
+ $$ language plpgsql;
+ select unreserved_test();
+  unreserved_test
+ -----------------
+               42
+ (1 row)
+
+ select obj_description('unreserved_test()'::regprocedure, 'pg_proc');
+  obj_description
+ -----------------
+  this is a test
+ (1 row)
+
  drop function unreserved_test();
  --
  -- Test FOREACH over arrays
diff --git a/src/test/regress/sql/plpgsql.sql b/src/test/regress/sql/plpgsql.sql
index 01239e2..9c8cf75 100644
*** a/src/test/regress/sql/plpgsql.sql
--- b/src/test/regress/sql/plpgsql.sql
*************** $$ language plpgsql;
*** 3892,3897 ****
--- 3892,3911 ----

  select unreserved_test();

+ create or replace function unreserved_test() returns int as $$
+ declare
+   comment int := 21;
+ begin
+   comment := comment * 2;
+   comment on function unreserved_test() is 'this is a test';
+   return comment;
+ end
+ $$ language plpgsql;
+
+ select unreserved_test();
+
+ select obj_description('unreserved_test()'::regprocedure, 'pg_proc');
+
  drop function unreserved_test();

  --

pgsql-bugs by date:

Previous
From: Tom Lane
Date:
Subject: Re: BUG #15555: Syntax errors when using the COMMENT command in plpgsql and a "comment" variable
Next
From: Hugh Ranalli
Date:
Subject: Re: BUG #15548: Unaccent does not remove combining diacritical characters