(Fourth try ;-)
Attached is a patch for dollar quoting in the backend and in psql (with
the new flex scanner). I'm fairly confident about the backend (because
this is mainly Tom's work adapted :-) ) but rather less so about psql -
I don't entirely understand all the odd states in psql's scanner. I'm
not sure that I have freed up memory in all the necessary cases. Nor am
I sure what the state is or should be if we end an included file in a
dollar-quoting state, nor how to handle such a situation. So, some extra
eyeballs would be appreciated.
However - it does seem to work in my simple testing.
If this is all OK, the remaining tasks would include pg_dump, docs (Jon
Jensen says he will attack these two) and some regression tests (any
volunteers?)
cheers
andrew
Index: src/backend/parser/scan.l
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/backend/parser/scan.l,v
retrieving revision 1.114
diff -c -r1.114 scan.l
*** src/backend/parser/scan.l 21 Feb 2004 00:34:52 -0000 1.114
--- src/backend/parser/scan.l 24 Feb 2004 17:33:01 -0000
***************
*** 37,42 ****
--- 37,43 ----
extern YYSTYPE yylval;
static int xcdepth = 0; /* depth of nesting in slash-star comments */
+ static char *dolqstart; /* current $foo$ quote start string */
/*
* literalbuf is used to accumulate literal values when multiple rules
***************
*** 94,99 ****
--- 95,101 ----
* <xd> delimited identifiers (double-quoted identifiers)
* <xh> hexadecimal numeric string
* <xq> quoted strings
+ * <dolq> $foo$ quoted strings
*/
%x xb
***************
*** 101,106 ****
--- 103,109 ----
%x xd
%x xh
%x xq
+ %x dolq
/*
* In order to make the world safe for Windows and Mac clients as well as
***************
*** 175,180 ****
--- 178,194 ----
xqoctesc [\\][0-7]{1,3}
xqcat {quote}{whitespace_with_newline}{quote}
+ /* $foo$ style quotes ("dollar quoting")
+ * The quoted string starts with $foo$ where "foo" is an optional string
+ * in the form of an identifier, except that it may not contain "$",
+ * and extends to the first occurrence of an identical string.
+ * There is *no* processing of the quoted text.
+ */
+ dolq_start [A-Za-z\200-\377_]
+ dolq_cont [A-Za-z\200-\377_0-9]
+ dolqdlm \$({dolq_start}{dolq_cont}*)?\$
+ dolqins [^$]+
+
/* Double quote
* Allows embedded spaces and other special characters into identifiers.
*/
***************
*** 242,248 ****
other .
/*
! * Quoted strings must allow some special characters such as single-quote
* and newline.
* Embedded single-quotes are implemented both in the SQL standard
* style of two adjacent single quotes "''" and in the Postgres/Java style
--- 256,263 ----
other .
/*
! * Dollar quoted strings are totally opaque, and no escaping is done on them.
! * Other quoted strings must allow some special characters such as single-quote
* and newline.
* Embedded single-quotes are implemented both in the SQL standard
* style of two adjacent single quotes "''" and in the Postgres/Java style
***************
*** 390,395 ****
--- 405,439 ----
}
<xq><<EOF>> { yyerror("unterminated quoted string"); }
+ {dolqdlm} {
+ token_start = yytext;
+ dolqstart = pstrdup(yytext);
+ BEGIN(dolq);
+ startlit();
+ }
+ <dolq>{dolqdlm} {
+ if (strcmp(yytext, dolqstart) == 0)
+ {
+ pfree(dolqstart);
+ BEGIN(INITIAL);
+ yylval.str = litbufdup();
+ return SCONST;
+ }
+ /*
+ * When we fail to match $...$ to dolqstart, transfer
+ * the $... part to the output, but put back the final
+ * $ for rescanning. Consider $delim$...$junk$delim$
+ */
+ addlit(yytext, yyleng-1);
+ yyless(yyleng-1);
+ }
+ <dolq>{dolqins} {
+ addlit(yytext, yyleng);
+ }
+ <dolq>. {
+ addlitchar(yytext[0]);
+ }
+ <dolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
{xdstart} {
token_start = yytext;
BEGIN(xd);
***************
*** 407,413 ****
yylval.str = ident;
return IDENT;
}
! <xd>{xddouble} {
addlitchar('"');
}
<xd>{xdinside} {
--- 451,457 ----
yylval.str = ident;
return IDENT;
}
! <xd>{xddouble} {
addlitchar('"');
}
<xd>{xdinside} {
Index: src/bin/psql/prompt.c
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.c,v
retrieving revision 1.34
diff -c -r1.34 prompt.c
*** src/bin/psql/prompt.c 25 Jan 2004 03:07:22 -0000 1.34
--- src/bin/psql/prompt.c 24 Feb 2004 17:33:19 -0000
***************
*** 85,90 ****
--- 85,91 ----
case PROMPT_CONTINUE:
case PROMPT_SINGLEQUOTE:
case PROMPT_DOUBLEQUOTE:
+ case PROMPT_DOLLARQUOTE:
case PROMPT_COMMENT:
case PROMPT_PAREN:
prompt_name = "PROMPT2";
***************
*** 198,203 ****
--- 199,207 ----
break;
case PROMPT_DOUBLEQUOTE:
buf[0] = '"';
+ break;
+ case PROMPT_DOLLARQUOTE:
+ buf[0] = '$';
break;
case PROMPT_COMMENT:
buf[0] = '*';
Index: src/bin/psql/prompt.h
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.h,v
retrieving revision 1.13
diff -c -r1.13 prompt.h
*** src/bin/psql/prompt.h 29 Nov 2003 19:52:07 -0000 1.13
--- src/bin/psql/prompt.h 24 Feb 2004 17:33:19 -0000
***************
*** 15,20 ****
--- 15,21 ----
PROMPT_COMMENT,
PROMPT_SINGLEQUOTE,
PROMPT_DOUBLEQUOTE,
+ PROMPT_DOLLARQUOTE,
PROMPT_PAREN,
PROMPT_COPY
} promptStatus_t;
Index: src/bin/psql/psqlscan.l
===================================================================
RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/psqlscan.l,v
retrieving revision 1.1
diff -c -r1.1 psqlscan.l
*** src/bin/psql/psqlscan.l 19 Feb 2004 19:40:09 -0000 1.1
--- src/bin/psql/psqlscan.l 24 Feb 2004 17:33:19 -0000
***************
*** 92,97 ****
--- 92,98 ----
int start_state; /* saved YY_START */
int paren_depth; /* depth of nesting in parentheses */
int xcdepth; /* depth of nesting in slash-star comments */
+ char *dolqstart; /* current $foo$ quote start string */
} PsqlScanStateData;
static PsqlScanState cur_state; /* current state while active */
***************
*** 151,156 ****
--- 152,158 ----
* <xd> delimited identifiers (double-quoted identifiers)
* <xh> hexadecimal numeric string
* <xq> quoted strings
+ * <dolq> $foo$ quoted strings
*/
%x xb
***************
*** 158,163 ****
--- 160,166 ----
%x xd
%x xh
%x xq
+ %x dolq
/* Additional exclusive states for psql only: lex backslash commands */
%x xslashcmd
%x xslasharg
***************
*** 241,246 ****
--- 244,260 ----
xqoctesc [\\][0-7]{1,3}
xqcat {quote}{whitespace_with_newline}{quote}
+ /* $foo$ style quotes ("dollar quoting")
+ * The quoted string starts with $foo$ where "foo" is an optional string
+ * in the form of an identifier, except that it may not contain "$",
+ * and extends to the first occurrence of an identical string.
+ * There is *no* processing of the quoted text.
+ */
+ dolq_start [A-Za-z\200-\377_]
+ dolq_cont [A-Za-z\200-\377_0-9]
+ dolqdlm \$({dolq_start}{dolq_cont}*)?\$
+ dolqins [^$]+
+
/* Double quote
* Allows embedded spaces and other special characters into identifiers.
*/
***************
*** 428,433 ****
--- 442,477 ----
ECHO;
}
+ {dolqdlm} {
+ cur_state->dolqstart = pg_strdup(yytext);
+ BEGIN(dolq);
+ ECHO;
+ }
+ <dolq>{dolqdlm} {
+ if (strcmp(yytext, cur_state->dolqstart) == 0)
+ {
+ free(cur_state->dolqstart);
+ cur_state->dolqstart = NULL;
+ BEGIN(INITIAL);
+ ECHO;
+ }
+ else
+ {
+ /*
+ * When we fail to match $...$ to dolqstart, transfer
+ * the $... part to the output, but put back the final
+ * $ for rescanning. Consider $delim$...$junk$delim$
+ */
+ emit(yytext, yyleng-1);
+ yyless(yyleng-1);
+ }
+ }
+ <dolq>{dolqins} {
+ ECHO;
+ }
+ <dolq>. {
+ ECHO;
+ }
{xdstart} {
BEGIN(xd);
ECHO;
***************
*** 1007,1012 ****
--- 1051,1060 ----
case xq:
result = PSCAN_INCOMPLETE;
*prompt = PROMPT_SINGLEQUOTE;
+ break;
+ case dolq:
+ result = PSCAN_INCOMPLETE;
+ *prompt = PROMPT_DOLLARQUOTE;
break;
default:
/* can't get here */