dollar quoting with flex - Mailing list pgsql-patches

From Andrew Dunstan
Subject dollar quoting with flex
Date
Msg-id 403B8C83.5030607@dunslane.net
Whole thread Raw
Responses Re: dollar quoting with flex  (Tom Lane <tgl@sss.pgh.pa.us>)
Re: dollar quoting with flex  (Tom Lane <tgl@sss.pgh.pa.us>)
List pgsql-patches
(Fourth try ;-)

Attached is a patch for dollar quoting in the backend and in psql (with
the new flex scanner). I'm fairly confident about the backend (because
this is mainly Tom's work adapted :-) ) but rather less so about psql -
I don't entirely understand all the odd states in psql's scanner. I'm
not sure that I have freed up memory in all the necessary cases. Nor am
I sure what the state is or should be if we end an included file in a
dollar-quoting state, nor how to handle such a situation. So, some extra
eyeballs would be appreciated.

However - it does seem to work in my simple testing.

If this is all OK, the remaining tasks would include pg_dump, docs (Jon
Jensen says he will attack these two) and some regression tests (any
volunteers?)

cheers

andrew
Index: src/backend/parser/scan.l
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/parser/scan.l,v
retrieving revision 1.114
diff -c -r1.114 scan.l
*** src/backend/parser/scan.l    21 Feb 2004 00:34:52 -0000    1.114
--- src/backend/parser/scan.l    24 Feb 2004 17:33:01 -0000
***************
*** 37,42 ****
--- 37,43 ----
  extern YYSTYPE yylval;

  static int        xcdepth = 0;    /* depth of nesting in slash-star comments */
+ static char    *dolqstart;      /* current $foo$ quote start string */

  /*
   * literalbuf is used to accumulate literal values when multiple rules
***************
*** 94,99 ****
--- 95,101 ----
   *  <xd> delimited identifiers (double-quoted identifiers)
   *  <xh> hexadecimal numeric string
   *  <xq> quoted strings
+  *  <dolq> $foo$ quoted strings
   */

  %x xb
***************
*** 101,106 ****
--- 103,109 ----
  %x xd
  %x xh
  %x xq
+ %x dolq

  /*
   * In order to make the world safe for Windows and Mac clients as well as
***************
*** 175,180 ****
--- 178,194 ----
  xqoctesc        [\\][0-7]{1,3}
  xqcat            {quote}{whitespace_with_newline}{quote}

+ /* $foo$ style quotes ("dollar quoting")
+  * The quoted string starts with $foo$ where "foo" is an optional string
+  * in the form of an identifier, except that it may not contain "$",
+  * and extends to the first occurrence of an identical string.
+  * There is *no* processing of the quoted text.
+  */
+ dolq_start        [A-Za-z\200-\377_]
+ dolq_cont        [A-Za-z\200-\377_0-9]
+ dolqdlm         \$({dolq_start}{dolq_cont}*)?\$
+ dolqins         [^$]+
+
  /* Double quote
   * Allows embedded spaces and other special characters into identifiers.
   */
***************
*** 242,248 ****
  other            .

  /*
!  * Quoted strings must allow some special characters such as single-quote
   *  and newline.
   * Embedded single-quotes are implemented both in the SQL standard
   *  style of two adjacent single quotes "''" and in the Postgres/Java style
--- 256,263 ----
  other            .

  /*
!  * Dollar quoted strings are totally opaque, and no escaping is done on them.
!  * Other quoted strings must allow some special characters such as single-quote
   *  and newline.
   * Embedded single-quotes are implemented both in the SQL standard
   *  style of two adjacent single quotes "''" and in the Postgres/Java style
***************
*** 390,395 ****
--- 405,439 ----
                  }
  <xq><<EOF>>        { yyerror("unterminated quoted string"); }

+ {dolqdlm}       {
+                     token_start = yytext;
+                     dolqstart = pstrdup(yytext);
+                     BEGIN(dolq);
+                     startlit();
+                 }
+ <dolq>{dolqdlm} {
+                     if (strcmp(yytext, dolqstart) == 0)
+                     {
+                         pfree(dolqstart);
+                         BEGIN(INITIAL);
+                         yylval.str = litbufdup();
+                         return SCONST;
+                     }
+                     /*
+                      * When we fail to match $...$ to dolqstart, transfer
+                      * the $... part to the output, but put back the final
+                      * $ for rescanning.  Consider $delim$...$junk$delim$
+                      */
+                     addlit(yytext, yyleng-1);
+                     yyless(yyleng-1);
+                 }
+ <dolq>{dolqins} {
+                     addlit(yytext, yyleng);
+                 }
+ <dolq>.         {
+                     addlitchar(yytext[0]);
+                 }
+ <dolq><<EOF>>   { yyerror("unterminated dollar-quoted string"); }
  {xdstart}        {
                      token_start = yytext;
                      BEGIN(xd);
***************
*** 407,413 ****
                      yylval.str = ident;
                      return IDENT;
                  }
! <xd>{xddouble} {
                      addlitchar('"');
                  }
  <xd>{xdinside}    {
--- 451,457 ----
                      yylval.str = ident;
                      return IDENT;
                  }
! <xd>{xddouble}  {
                      addlitchar('"');
                  }
  <xd>{xdinside}    {
Index: src/bin/psql/prompt.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.c,v
retrieving revision 1.34
diff -c -r1.34 prompt.c
*** src/bin/psql/prompt.c    25 Jan 2004 03:07:22 -0000    1.34
--- src/bin/psql/prompt.c    24 Feb 2004 17:33:19 -0000
***************
*** 85,90 ****
--- 85,91 ----
          case PROMPT_CONTINUE:
          case PROMPT_SINGLEQUOTE:
          case PROMPT_DOUBLEQUOTE:
+         case PROMPT_DOLLARQUOTE:
          case PROMPT_COMMENT:
          case PROMPT_PAREN:
              prompt_name = "PROMPT2";
***************
*** 198,203 ****
--- 199,207 ----
                              break;
                          case PROMPT_DOUBLEQUOTE:
                              buf[0] = '"';
+                             break;
+                         case PROMPT_DOLLARQUOTE:
+                             buf[0] = '$';
                              break;
                          case PROMPT_COMMENT:
                              buf[0] = '*';
Index: src/bin/psql/prompt.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.h,v
retrieving revision 1.13
diff -c -r1.13 prompt.h
*** src/bin/psql/prompt.h    29 Nov 2003 19:52:07 -0000    1.13
--- src/bin/psql/prompt.h    24 Feb 2004 17:33:19 -0000
***************
*** 15,20 ****
--- 15,21 ----
      PROMPT_COMMENT,
      PROMPT_SINGLEQUOTE,
      PROMPT_DOUBLEQUOTE,
+     PROMPT_DOLLARQUOTE,
      PROMPT_PAREN,
      PROMPT_COPY
  } promptStatus_t;
Index: src/bin/psql/psqlscan.l
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/psqlscan.l,v
retrieving revision 1.1
diff -c -r1.1 psqlscan.l
*** src/bin/psql/psqlscan.l    19 Feb 2004 19:40:09 -0000    1.1
--- src/bin/psql/psqlscan.l    24 Feb 2004 17:33:19 -0000
***************
*** 92,97 ****
--- 92,98 ----
      int            start_state;    /* saved YY_START */
      int            paren_depth;    /* depth of nesting in parentheses */
      int            xcdepth;        /* depth of nesting in slash-star comments */
+     char        *dolqstart;      /* current $foo$ quote start string */
  } PsqlScanStateData;

  static PsqlScanState cur_state;    /* current state while active */
***************
*** 151,156 ****
--- 152,158 ----
   *  <xd> delimited identifiers (double-quoted identifiers)
   *  <xh> hexadecimal numeric string
   *  <xq> quoted strings
+  *  <dolq> $foo$ quoted strings
   */

  %x xb
***************
*** 158,163 ****
--- 160,166 ----
  %x xd
  %x xh
  %x xq
+ %x dolq
  /* Additional exclusive states for psql only: lex backslash commands */
  %x xslashcmd
  %x xslasharg
***************
*** 241,246 ****
--- 244,260 ----
  xqoctesc        [\\][0-7]{1,3}
  xqcat            {quote}{whitespace_with_newline}{quote}

+ /* $foo$ style quotes ("dollar quoting")
+  * The quoted string starts with $foo$ where "foo" is an optional string
+  * in the form of an identifier, except that it may not contain "$",
+  * and extends to the first occurrence of an identical string.
+  * There is *no* processing of the quoted text.
+  */
+ dolq_start        [A-Za-z\200-\377_]
+ dolq_cont        [A-Za-z\200-\377_0-9]
+ dolqdlm         \$({dolq_start}{dolq_cont}*)?\$
+ dolqins         [^$]+
+
  /* Double quote
   * Allows embedded spaces and other special characters into identifiers.
   */
***************
*** 428,433 ****
--- 442,477 ----
                      ECHO;
                  }

+ {dolqdlm}       {
+                     cur_state->dolqstart = pg_strdup(yytext);
+                     BEGIN(dolq);
+                     ECHO;
+                 }
+ <dolq>{dolqdlm} {
+                     if (strcmp(yytext, cur_state->dolqstart) == 0)
+                     {
+                         free(cur_state->dolqstart);
+                         cur_state->dolqstart = NULL;
+                         BEGIN(INITIAL);
+                         ECHO;
+                     }
+                     else
+                     {
+                         /*
+                          * When we fail to match $...$ to dolqstart, transfer
+                          * the $... part to the output, but put back the final
+                          * $ for rescanning.  Consider $delim$...$junk$delim$
+                          */
+                         emit(yytext, yyleng-1);
+                         yyless(yyleng-1);
+                     }
+                 }
+ <dolq>{dolqins} {
+                     ECHO;
+                 }
+ <dolq>.         {
+                     ECHO;
+                 }
  {xdstart}        {
                      BEGIN(xd);
                      ECHO;
***************
*** 1007,1012 ****
--- 1051,1060 ----
                  case xq:
                      result = PSCAN_INCOMPLETE;
                      *prompt = PROMPT_SINGLEQUOTE;
+                     break;
+                 case dolq:
+                     result = PSCAN_INCOMPLETE;
+                     *prompt = PROMPT_DOLLARQUOTE;
                      break;
                  default:
                      /* can't get here */

pgsql-patches by date:

Previous
From: Devrim GUNDUZ
Date:
Subject: Turkish translation of FAQ
Next
From: Tom Lane
Date:
Subject: Re: dollar quoting with flex