Re: [HACKERS] dollar quoting - Mailing list pgsql-patches
From | Andrew Dunstan |
---|---|
Subject | Re: [HACKERS] dollar quoting |
Date | |
Msg-id | 4027A942.1080905@dunslane.net Whole thread Raw |
In response to | Re: [HACKERS] dollar quoting (Tom Lane <tgl@sss.pgh.pa.us>) |
Responses |
Re: [HACKERS] dollar quoting
|
List | pgsql-patches |
I think the attached patch addresses Tom's comments. I ended up not using a regex, which seemed to be a little heavy handed, but just writing a small custom recognition function, that should (and I think does) mimic the pattern recognition for these tokens used by the backend lexer. This patch just puts that function in mainloop.c, but perhaps it belongs elsewhere (string_utils.c maybe?). I don't have strong opinions on that. Enjoy andrew Tom Lane wrote: >Andrew Dunstan <andrew@dunslane.net> writes: > > >>Comments welcome. Reviewers: I am not sure I got multi-byte stuff right >>in psql/mainloop.c - please pay close attention to that. >> >> > >The i-1 stuff should generally be i-prevlen. Not sure if there are any >other pitfalls. > >A bigger problem here: > > > >>+ else if (!dol_quote && line[i] == '$' && >>+ !isdigit(line[i + thislen]) && >>+ (dol_end = strchr(line+i+1,'$')) != NULL && >>+ (i == 0 || >>+ ! ((line[i-1] & 0x80) != 0 || isalnum(line[i-1]) || >>+ line[i-1] == '_'))) >>+ { >> >> > >is that you aren't checking that what comes between the two dollar signs >looks like empty-or-an-identifier. The check for >next-char-isn't-a-digit is part of that but not the only part. > >Also I'm not sure about the positioning of these tests relative to the >in_quote and in_xcomment tests. As you have it, $foo$ will be >recognized within an xcomment, which I think is at variance with the >proposed backend lexing behavior. > >Also, the strdup should be pg_strdup. > > regards, tom lane > >---------------------------(end of broadcast)--------------------------- >TIP 1: subscribe and unsubscribe commands go to majordomo@postgresql.org > > > Index: src/backend/parser/scan.l =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/backend/parser/scan.l,v retrieving revision 1.112 diff -c -w -r1.112 scan.l *** src/backend/parser/scan.l 29 Nov 2003 19:51:52 -0000 1.112 --- src/backend/parser/scan.l 9 Feb 2004 15:26:34 -0000 *************** *** 39,44 **** --- 39,46 ---- static int xcdepth = 0; /* depth of nesting in slash-star comments */ + static char *dolqstart; /* current $foo$ quote start string */ + /* * literalbuf is used to accumulate literal values when multiple rules * are needed to parse a single literal. Call startlit to reset buffer *************** *** 95,100 **** --- 97,103 ---- * <xd> delimited identifiers (double-quoted identifiers) * <xh> hexadecimal numeric string * <xq> quoted strings + * <dolq> $foo$-style quoted strings */ %x xb *************** *** 102,107 **** --- 105,111 ---- %x xd %x xh %x xq + %x dolq /* Bit string * It is tempting to scan the string for only those characters *************** *** 141,146 **** --- 145,159 ---- xqoctesc [\\][0-7]{1,3} xqcat {quote}{whitespace_with_newline}{quote} + /* $foo$ style quotes ("dollar quoting") + * The quoted string starts with $foo$ where "foo" is an optional string + * in the form of an identifier, except that it may not contain "$", + * and extends to the first occurrence + * of an identical string. There is *no* processing of the quoted text. + */ + dolqdelim \$([A-Za-z\200-\377][A-Za-z\200-\377_0-9]*)?\$ + dolqinside [^$]+ + /* Double quote * Allows embedded spaces and other special characters into identifiers. */ *************** *** 387,392 **** --- 400,434 ---- } <xq><<EOF>> { yyerror("unterminated quoted string"); } + {dolqdelim} { + token_start = yytext; + dolqstart = pstrdup(yytext); + BEGIN(dolq); + startlit(); + } + <dolq>{dolqdelim} { + if (strcmp(yytext, dolqstart) == 0) + { + pfree(dolqstart); + BEGIN(INITIAL); + yylval.str = litbufdup(); + return SCONST; + } + /* + * When we fail to match $...$ to dolqstart, transfer + * the $... part to the output, but put back the final + * $ for rescanning. Consider $delim$...$junk$delim$ + */ + addlit(yytext, yyleng-1); + yyless(yyleng-1); + } + <dolq>{dolqinside} { + addlit(yytext, yyleng); + } + <dolq>. { + addlitchar(yytext[0]); + } + <dolq><<EOF>> { yyerror("unterminated special-quoted string"); } {xdstart} { token_start = yytext; Index: src/bin/psql/mainloop.c =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/mainloop.c,v retrieving revision 1.61 diff -c -w -r1.61 mainloop.c *** src/bin/psql/mainloop.c 25 Jan 2004 03:07:22 -0000 1.61 --- src/bin/psql/mainloop.c 9 Feb 2004 15:26:51 -0000 *************** *** 21,26 **** --- 21,61 ---- sigjmp_buf main_loop_jmp; #endif + /* + * function to detect a valid $foo$ quote delimiter at the start of the + * parameter dquote. + */ + + static bool valid_dolquote(char * dquote) + { + int i; + + /* must start with a $ */ + if (dquote[0] != '$') + return false; + + /* empty 'identifier' case */ + if (dquote[1] == '$') + return true; + + /* first 'identifier' char must be a letter or have high bit set */ + if (!isalpha(dquote[1]) && (dquote[1] & 0x80) == 0) + return false; + + /* subsequent chars must be alphanumeric or _ or have high bit set */ + for (i = 2; dquote[i] != '$'; i++) + { + if ((dquote[i] & 0x80) == 0 && ! isalnum(dquote[i]) && + dquote[i] != '_') + { + /* we found an invalid character */ + return false; + } + } + + return true; + } + /* * Main processing loop for reading lines of input *************** *** 49,54 **** --- 84,92 ---- unsigned int query_start; volatile int count_eof = 0; volatile unsigned int bslash_count = 0; + volatile bool free_dolquote = false; + char *dol_quote = NULL; + int i, prevlen, *************** *** 120,125 **** --- 158,164 ---- in_quote = 0; paren_level = 0; count_eof = 0; + free_dolquote = true; slashCmdStatus = CMD_UNKNOWN; } else *************** *** 136,141 **** --- 175,190 ---- pqsignal(SIGINT, handle_sigint); /* control-C => cancel */ #endif /* not WIN32 */ + if (free_dolquote) + { + if(dol_quote) + { + free(dol_quote); + dol_quote = NULL; + } + free_dolquote = false; + } + fflush(stdout); if (slashCmdStatus == CMD_NEWEDIT) *************** *** 150,155 **** --- 199,209 ---- in_xcomment = 0; in_quote = 0; paren_level = 0; + if(dol_quote) + { + free(dol_quote); + dol_quote = NULL; + } slashCmdStatus = CMD_UNKNOWN; } *************** *** 161,167 **** { int prompt_status; ! if (in_quote && in_quote == '\'') prompt_status = PROMPT_SINGLEQUOTE; else if (in_quote && in_quote == '"') prompt_status = PROMPT_DOUBLEQUOTE; --- 215,223 ---- { int prompt_status; ! if (dol_quote) ! prompt_status = PROMPT_DOLLARQUOTE; ! else if (in_quote && in_quote == '\'') prompt_status = PROMPT_SINGLEQUOTE; else if (in_quote && in_quote == '"') prompt_status = PROMPT_DOUBLEQUOTE; *************** *** 268,273 **** --- 324,343 ---- in_quote = 0; } + /* in or end of $foo$ type quote? */ + + else if (dol_quote) + { + if (strncmp(line+i,dol_quote,strlen(dol_quote)) == 0) + { + ADVANCE_1; + while(line[i] != '$') + ADVANCE_1; + free(dol_quote); + dol_quote = NULL; + } + } + /* start of extended comment? */ else if (line[i] == '/' && line[i + thislen] == '*') { *************** *** 288,297 **** else if (line[i] == '\'' || line[i] == '"') in_quote = line[i]; /* single-line comment? truncate line */ else if (line[i] == '-' && line[i + thislen] == '-') { ! line[i] = '\0'; /* remove comment */ break; } --- 358,395 ---- else if (line[i] == '\'' || line[i] == '"') in_quote = line[i]; + /* + * start of $foo$ type quote? + * + * must not be preceded by a valid identifier character + */ + + else if (!dol_quote && valid_dolquote(line+i) && + (i == 0 || + ! ((line[i-prevlen] & 0x80) != 0 || + isalnum(line[i-prevlen]) || + line[i-prevlen] == '_' || + line[i-prevlen] == '$' ))) + { + char * dol_end; + char eos; + + dol_end = strchr(line+i+1,'$'); + dol_end ++; + eos = *dol_end; + *dol_end = '\0'; + dol_quote = pg_strdup(line+i); + *dol_end = eos; + ADVANCE_1; + while(line[i] != '$') + ADVANCE_1; + + } + /* single-line comment? truncate line */ else if (line[i] == '-' && line[i + thislen] == '-') { ! line[i] = '\0'; /* removae comment */ break; } *************** *** 458,464 **** /* Put the rest of the line in the query buffer. */ ! if (in_quote || line[query_start + strspn(line + query_start, " \t\n\r")] != '\0') { if (query_buf->len > 0) appendPQExpBufferChar(query_buf, '\n'); --- 556,563 ---- /* Put the rest of the line in the query buffer. */ ! if (in_quote || dol_quote || ! line[query_start + strspn(line + query_start, " \t\n\r")] != '\0') { if (query_buf->len > 0) appendPQExpBufferChar(query_buf, '\n'); Index: src/bin/psql/prompt.c =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.c,v retrieving revision 1.34 diff -c -w -r1.34 prompt.c *** src/bin/psql/prompt.c 25 Jan 2004 03:07:22 -0000 1.34 --- src/bin/psql/prompt.c 9 Feb 2004 15:26:51 -0000 *************** *** 85,90 **** --- 85,91 ---- case PROMPT_CONTINUE: case PROMPT_SINGLEQUOTE: case PROMPT_DOUBLEQUOTE: + case PROMPT_DOLLARQUOTE: case PROMPT_COMMENT: case PROMPT_PAREN: prompt_name = "PROMPT2"; *************** *** 198,203 **** --- 199,207 ---- break; case PROMPT_DOUBLEQUOTE: buf[0] = '"'; + break; + case PROMPT_DOLLARQUOTE: + buf[0] = '$'; break; case PROMPT_COMMENT: buf[0] = '*'; Index: src/bin/psql/prompt.h =================================================================== RCS file: /projects/cvsroot/pgsql-server/src/bin/psql/prompt.h,v retrieving revision 1.13 diff -c -w -r1.13 prompt.h *** src/bin/psql/prompt.h 29 Nov 2003 19:52:07 -0000 1.13 --- src/bin/psql/prompt.h 9 Feb 2004 15:26:51 -0000 *************** *** 15,20 **** --- 15,21 ---- PROMPT_COMMENT, PROMPT_SINGLEQUOTE, PROMPT_DOUBLEQUOTE, + PROMPT_DOLLARQUOTE, PROMPT_PAREN, PROMPT_COPY } promptStatus_t;
pgsql-patches by date: