Thread: Patch for Improved Syntax Error Reporting
Attached please find a patch to the input parser that yields better syntax error reporting on parse errors. For example: test=# SELECT * FRUM bob; ERROR: parser: parse error at or near "frum" becomes: test=# SELECT * FRUM bob; ERROR: parser: parse error at or near 'frum': SELECT * FRUM bob; ^ I've also modified the regression tests accordingly. I haven't made the corresponding changes to the ecpg grammar -- I'm not sure whether changes like this are desirable there. Feedback welcome. Comments? Neil -- Neil Padgett Red Hat Canada Ltd. E-Mail: npadgett@redhat.com 2323 Yonge Street, Suite #300, Toronto, ON M4P 2C9 Index: src/backend/parser/scan.l =================================================================== RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/parser/scan.l,v retrieving revision 1.88 diff -c -p -r1.88 scan.l *** src/backend/parser/scan.l 2001/03/22 17:41:47 1.88 --- src/backend/parser/scan.l 2001/08/01 17:43:53 *************** *** 37,42 **** --- 37,43 ---- extern char *parseString; static char *parseCh; + static int parseOffset; /* some versions of lex define this as a macro */ #if defined(yywrap) *************** other . *** 254,262 **** */ %% ! {whitespace} { /* ignore */ } ! {xcstart} { xcdepth = 0; BEGIN(xc); /* Put back any characters past slash-star; see above */ --- 255,266 ---- */ %% ! {whitespace} { parseOffset += yyleng; ! /* ignore */ ! } ! {xcstart} { ! parseOffset += 2; xcdepth = 0; BEGIN(xc); /* Put back any characters past slash-star; see above */ *************** other . *** 264,293 **** } <xc>{xcstart} { xcdepth++; /* Put back any characters past slash-star; see above */ yyless(2); } <xc>{xcstop} { if (xcdepth <= 0) BEGIN(INITIAL); else xcdepth--; } ! <xc>{xcinside} { /* ignore */ } ! <xc>{op_chars} { /* ignore */ } <xc><<EOF>> { elog(ERROR, "Unterminated /* comment"); } ! {xbitstart} { BEGIN(xbit); startlit(); addlit("b", 1); } <xbit>{xbitstop} { BEGIN(INITIAL); if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0') elog(ERROR, "invalid bit string input: '%s'", --- 268,303 ---- } <xc>{xcstart} { + parseOffset += 2; xcdepth++; /* Put back any characters past slash-star; see above */ yyless(2); } <xc>{xcstop} { + parseOffset += yyleng; if (xcdepth <= 0) BEGIN(INITIAL); else xcdepth--; } ! <xc>{xcinside} { parseOffset += yyleng; ! /* ignore */ } ! <xc>{op_chars} { parseOffset += yyleng; ! /* ignore */ } <xc><<EOF>> { elog(ERROR, "Unterminated /* comment"); } ! {xbitstart} { ! parseOffset += yyleng; BEGIN(xbit); startlit(); addlit("b", 1); } <xbit>{xbitstop} { + parseOffset += yyleng; BEGIN(INITIAL); if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0') elog(ERROR, "invalid bit string input: '%s'", *************** other . *** 297,311 **** } <xh>{xhinside} | <xbit>{xbitinside} { addlit(yytext, yyleng); } <xh>{xhcat} | ! <xbit>{xbitcat} { /* ignore */ } <xbit><<EOF>> { elog(ERROR, "unterminated bit string literal"); } {xhstart} { BEGIN(xh); startlit(); } --- 307,324 ---- } <xh>{xhinside} | <xbit>{xbitinside} { + parseOffset += yyleng; addlit(yytext, yyleng); } <xh>{xhcat} | ! <xbit>{xbitcat} { ! parseOffset += yyleng; /* ignore */ } <xbit><<EOF>> { elog(ERROR, "unterminated bit string literal"); } {xhstart} { + parseOffset += yyleng; BEGIN(xh); startlit(); } *************** other . *** 313,318 **** --- 326,332 ---- long val; char* endptr; + parseOffset += yyleng; BEGIN(INITIAL); errno = 0; val = strtol(literalbuf, &endptr, 16); *************** other . *** 330,339 **** --- 344,355 ---- <xh><<EOF>> { elog(ERROR, "Unterminated hexadecimal integer"); } {xqstart} { + parseOffset += yyleng; BEGIN(xq); startlit(); } <xq>{xqstop} { + parseOffset += yyleng; BEGIN(INITIAL); yylval.str = scanstr(literalbuf); return SCONST; *************** other . *** 341,359 **** --- 357,379 ---- <xq>{xqdouble} | <xq>{xqinside} | <xq>{xqliteral} { + parseOffset += yyleng; addlit(yytext, yyleng); } <xq>{xqcat} { + parseOffset += yyleng; /* ignore */ } <xq><<EOF>> { elog(ERROR, "Unterminated quoted string"); } {xdstart} { + parseOffset += yyleng; BEGIN(xd); startlit(); } <xd>{xdstop} { + parseOffset += yyleng; BEGIN(INITIAL); if (strlen(literalbuf) == 0) elog(ERROR, "zero-length delimited identifier"); *************** other . *** 375,391 **** return IDENT; } <xd>{xddouble} { addlit(yytext, yyleng-1); } ! <xd>{xdinside} { addlit(yytext, yyleng); } <xd><<EOF>> { elog(ERROR, "Unterminated quoted identifier"); } ! {typecast} { return TYPECAST; } - {self} { return yytext[0]; } - {operator} { /* * Check for embedded slash-star or dash-dash; those --- 395,417 ---- return IDENT; } <xd>{xddouble} { + parseOffset += yyleng; addlit(yytext, yyleng-1); } ! <xd>{xdinside} { ! parseOffset += yyleng; addlit(yytext, yyleng); } <xd><<EOF>> { elog(ERROR, "Unterminated quoted identifier"); } ! {typecast} { ! parseOffset += yyleng; ! return TYPECAST; } ! ! {self} { ! parseOffset += yyleng; ! return yytext[0]; } {operator} { /* * Check for embedded slash-star or dash-dash; those *************** other . *** 396,401 **** --- 422,429 ---- int nchars = yyleng; char *slashstar = strstr((char*)yytext, "/*"); char *dashdash = strstr((char*)yytext, "--"); + + parseOffset += yyleng; if (slashstar && dashdash) { *************** other . *** 455,461 **** return Op; } ! {param} { yylval.ival = atol((char*)&yytext[1]); return PARAM; } --- 483,490 ---- return Op; } ! {param} { ! parseOffset += yyleng; yylval.ival = atol((char*)&yytext[1]); return PARAM; } *************** other . *** 463,469 **** {integer} { long val; char* endptr; ! errno = 0; val = strtol((char *)yytext, &endptr, 10); if (*endptr != '\0' || errno == ERANGE --- 492,499 ---- {integer} { long val; char* endptr; ! ! parseOffset += yyleng; errno = 0; val = strtol((char *)yytext, &endptr, 10); if (*endptr != '\0' || errno == ERANGE *************** other . *** 480,490 **** yylval.ival = val; return ICONST; } ! {decimal} { yylval.str = pstrdup((char*)yytext); return FCONST; } ! {real} { yylval.str = pstrdup((char*)yytext); return FCONST; } --- 510,522 ---- yylval.ival = val; return ICONST; } ! {decimal} { ! parseOffset += yyleng; yylval.str = pstrdup((char*)yytext); return FCONST; } ! {real} { ! parseOffset += yyleng; yylval.str = pstrdup((char*)yytext); return FCONST; } *************** other . *** 493,498 **** --- 525,532 ---- {identifier} { ScanKeyword *keyword; int i; + + parseOffset += yyleng; /* Is it a keyword? */ keyword = ScanKeywordLookup((char*) yytext); *************** other . *** 530,545 **** return IDENT; } ! {other} { return yytext[0]; } %% void yyerror(const char *message) { ! elog(ERROR, "parser: %s at or near \"%s\"", message, yytext); } int yywrap(void) { --- 564,634 ---- return IDENT; } ! {other} { ! parseOffset += yyleng; ! return yytext[0]; ! } %% void yyerror(const char *message) { ! int errorOffset; ! char *line; ! char *endOfLine; ! char *beginningOfLine; ! size_t buffSize; ! ! /* Calculate the error's offset from the beginning of the input */ ! ! errorOffset = parseOffset + 1 - yyleng; ! ! /* Find the beginning of the input line */ ! ! for(beginningOfLine = parseString + errorOffset; ! beginningOfLine > parseString; ! beginningOfLine--) ! if(*(beginningOfLine - 1) == '\n' || ! *(beginningOfLine - 1) == '\r' || ! *(beginningOfLine - 1) == '\f') ! break; ! ! /* Find the end of the input line */ ! ! for(endOfLine = parseString + errorOffset; ! *endOfLine != '\0'; ! endOfLine++) ! if(*endOfLine == '\n' || ! *endOfLine == '\r' || ! *endOfLine == '\f') ! break; ! ! /* Calculate the offset of the error into the input line */ ! ! errorOffset = errorOffset - (int)(beginningOfLine - parseString); ! ! /* Allocate a buffer for the line */ ! ! buffSize = (endOfLine - beginningOfLine) + 1; ! line = palloc(buffSize); ! ! /* Copy the line into the buffer */ ! ! memcpy(line, beginningOfLine, buffSize); ! *(line + buffSize - 1) = '\0'; ! ! /* Report the error */ ! ! elog(ERROR, "parser: %s at or near \"%s\":\n%s\n%*s", ! message, ! yytext, ! line, ! errorOffset, ! "^"); } + int yywrap(void) { *************** scanner_init(void) *** 557,562 **** --- 646,655 ---- because input()/myinput() checks the non-nullness of parseCh to know when to pass the string to lex/flex */ parseCh = NULL; + + /* Initialize the parse input offset -- used by enhanced syntax error reporting */ + + parseOffset = 0; /* initialize literal buffer to a reasonable but expansible size */ literalalloc = 128; Index: src/test/regress/expected/errors.out =================================================================== RCS file: /home/projects/pgsql/cvsroot/pgsql/src/test/regress/expected/errors.out,v retrieving revision 1.26 diff -c -p -r1.26 errors.out *** src/test/regress/expected/errors.out 2000/11/08 22:10:03 1.26 --- src/test/regress/expected/errors.out 2001/08/01 17:43:56 *************** select 1 *** 19,25 **** select -- no such relation select * from nonesuch; ! ERROR: parser: parse error at or near "select" -- bad name in target list select nonesuch from pg_database; ERROR: Attribute 'nonesuch' not found --- 19,27 ---- select -- no such relation select * from nonesuch; ! ERROR: parser: parse error at or near "select": ! select ! ^ -- bad name in target list select nonesuch from pg_database; ERROR: Attribute 'nonesuch' not found *************** select * from pg_database where pg_datab *** 31,37 **** ERROR: Attribute 'nonesuch' not found -- bad select distinct on syntax, distinct attribute missing select distinct on (foobar) from pg_database; ! ERROR: parser: parse error at or near "from" -- bad select distinct on syntax, distinct attribute not in target list select distinct on (foobar) * from pg_database; ERROR: Attribute 'foobar' not found --- 33,41 ---- ERROR: Attribute 'nonesuch' not found -- bad select distinct on syntax, distinct attribute missing select distinct on (foobar) from pg_database; ! ERROR: parser: parse error at or near "from": ! select distinct on (foobar) from pg_database; ! ^ -- bad select distinct on syntax, distinct attribute not in target list select distinct on (foobar) * from pg_database; ERROR: Attribute 'foobar' not found *************** ERROR: Attribute 'foobar' not found *** 39,46 **** -- DELETE -- missing relation name (this had better not wildcard!) delete from; ! ERROR: parser: parse error at or near ";" -- no such relation delete from nonesuch; ERROR: Relation 'nonesuch' does not exist --- 43,52 ---- -- DELETE -- missing relation name (this had better not wildcard!) + delete from; + ERROR: parser: parse error at or near ";": delete from; ! ^ -- no such relation delete from nonesuch; ERROR: Relation 'nonesuch' does not exist *************** ERROR: Relation 'nonesuch' does not exi *** 49,55 **** -- missing relation name (this had better not wildcard!) drop table; ! ERROR: parser: parse error at or near ";" -- no such relation drop table nonesuch; ERROR: table "nonesuch" does not exist --- 55,63 ---- -- missing relation name (this had better not wildcard!) drop table; ! ERROR: parser: parse error at or near ";": ! drop table; ! ^ -- no such relation drop table nonesuch; ERROR: table "nonesuch" does not exist *************** ERROR: table "nonesuch" does not exist *** 58,65 **** -- relation renaming -- missing relation name alter table rename; ! ERROR: parser: parse error at or near ";" -- no such relation alter table nonesuch rename to newnonesuch; ERROR: Relation "nonesuch" does not exist --- 66,75 ---- -- relation renaming -- missing relation name + alter table rename; + ERROR: parser: parse error at or near ";": alter table rename; ! ^ -- no such relation alter table nonesuch rename to newnonesuch; ERROR: Relation "nonesuch" does not exist *************** ERROR: Define: "basetype" unspecified *** 116,125 **** -- missing index name drop index; ! ERROR: parser: parse error at or near ";" -- bad index name drop index 314159; ! ERROR: parser: parse error at or near "314159" -- no such index drop index nonesuch; ERROR: index "nonesuch" does not exist --- 126,139 ---- -- missing index name drop index; ! ERROR: parser: parse error at or near ";": ! drop index; ! ^ -- bad index name + drop index 314159; + ERROR: parser: parse error at or near "314159": drop index 314159; ! ^ -- no such index drop index nonesuch; ERROR: index "nonesuch" does not exist *************** ERROR: index "nonesuch" does not exist *** 127,143 **** -- REMOVE AGGREGATE -- missing aggregate name drop aggregate; ! ERROR: parser: parse error at or near ";" -- bad aggregate name drop aggregate 314159; ! ERROR: parser: parse error at or near "314159" -- no such aggregate drop aggregate nonesuch; ! ERROR: parser: parse error at or near ";" -- missing aggregate type drop aggregate newcnt1; ! ERROR: parser: parse error at or near ";" -- bad aggregate type drop aggregate newcnt nonesuch; ERROR: RemoveAggregate: type 'nonesuch' does not exist --- 141,165 ---- -- REMOVE AGGREGATE -- missing aggregate name + drop aggregate; + ERROR: parser: parse error at or near ";": drop aggregate; ! ^ -- bad aggregate name drop aggregate 314159; ! ERROR: parser: parse error at or near "314159": ! drop aggregate 314159; ! ^ -- no such aggregate + drop aggregate nonesuch; + ERROR: parser: parse error at or near ";": drop aggregate nonesuch; ! ^ -- missing aggregate type drop aggregate newcnt1; ! ERROR: parser: parse error at or near ";": ! drop aggregate newcnt1; ! ^ -- bad aggregate type drop aggregate newcnt nonesuch; ERROR: RemoveAggregate: type 'nonesuch' does not exist *************** ERROR: RemoveAggregate: aggregate 'newc *** 148,158 **** -- REMOVE FUNCTION -- missing function name drop function (); ! ERROR: parser: parse error at or near "(" -- bad function name drop function 314159(); ! ERROR: parser: parse error at or near "314159" -- no such function drop function nonesuch(); ERROR: RemoveFunction: function 'nonesuch()' does not exist --- 170,184 ---- -- REMOVE FUNCTION -- missing function name + drop function (); + ERROR: parser: parse error at or near "(": drop function (); ! ^ -- bad function name drop function 314159(); ! ERROR: parser: parse error at or near "314159": ! drop function 314159(); ! ^ -- no such function drop function nonesuch(); ERROR: RemoveFunction: function 'nonesuch()' does not exist *************** ERROR: RemoveFunction: function 'nonesu *** 160,170 **** -- REMOVE TYPE -- missing type name drop type; ! ERROR: parser: parse error at or near ";" -- bad type name drop type 314159; ! ERROR: parser: parse error at or near "314159" -- no such type drop type nonesuch; ERROR: RemoveType: type 'nonesuch' does not exist --- 186,200 ---- -- REMOVE TYPE -- missing type name + drop type; + ERROR: parser: parse error at or near ";": drop type; ! ^ -- bad type name + drop type 314159; + ERROR: parser: parse error at or near "314159": drop type 314159; ! ^ -- no such type drop type nonesuch; ERROR: RemoveType: type 'nonesuch' does not exist *************** ERROR: RemoveType: type 'nonesuch' does *** 173,194 **** -- missing everything drop operator; ! ERROR: parser: parse error at or near ";" -- bad operator name drop operator equals; ! ERROR: parser: parse error at or near "equals" -- missing type list drop operator ===; ! ERROR: parser: parse error at or near ";" -- missing parentheses drop operator int4, int4; ! ERROR: parser: parse error at or near "int4" -- missing operator name drop operator (int4, int4); ! ERROR: parser: parse error at or near "(" -- missing type list contents drop operator === (); ! ERROR: parser: parse error at or near ")" -- no such operator drop operator === (int4); ERROR: parser: argument type missing (use NONE for unary operators) --- 203,236 ---- -- missing everything drop operator; ! ERROR: parser: parse error at or near ";": ! drop operator; ! ^ -- bad operator name + drop operator equals; + ERROR: parser: parse error at or near "equals": drop operator equals; ! ^ -- missing type list drop operator ===; ! ERROR: parser: parse error at or near ";": ! drop operator ===; ! ^ -- missing parentheses + drop operator int4, int4; + ERROR: parser: parse error at or near "int4": drop operator int4, int4; ! ^ -- missing operator name drop operator (int4, int4); ! ERROR: parser: parse error at or near "(": ! drop operator (int4, int4); ! ^ -- missing type list contents + drop operator === (); + ERROR: parser: parse error at or near ")": drop operator === (); ! ^ -- no such operator drop operator === (int4); ERROR: parser: argument type missing (use NONE for unary operators) *************** ERROR: RemoveOperator: binary operator *** 199,206 **** drop operator = (nonesuch); ERROR: parser: argument type missing (use NONE for unary operators) -- no such type1 drop operator = ( , int4); ! ERROR: parser: parse error at or near "," -- no such type1 drop operator = (nonesuch, int4); ERROR: RemoveOperator: type 'nonesuch' does not exist --- 241,250 ---- drop operator = (nonesuch); ERROR: parser: argument type missing (use NONE for unary operators) -- no such type1 + drop operator = ( , int4); + ERROR: parser: parse error at or near ",": drop operator = ( , int4); ! ^ -- no such type1 drop operator = (nonesuch, int4); ERROR: RemoveOperator: type 'nonesuch' does not exist *************** drop operator = (int4, nonesuch); *** 209,233 **** ERROR: RemoveOperator: type 'nonesuch' does not exist -- no such type2 drop operator = (int4, ); ! ERROR: parser: parse error at or near ")" -- -- DROP RULE -- missing rule name drop rule; ! ERROR: parser: parse error at or near ";" -- bad rule name drop rule 314159; ! ERROR: parser: parse error at or near "314159" -- no such rule drop rule nonesuch; ERROR: Rule or view "nonesuch" not found -- bad keyword drop tuple rule nonesuch; ! ERROR: parser: parse error at or near "tuple" -- no such rule drop instance rule nonesuch; ! ERROR: parser: parse error at or near "instance" -- no such rule drop rewrite rule nonesuch; ! ERROR: parser: parse error at or near "rewrite" --- 253,289 ---- ERROR: RemoveOperator: type 'nonesuch' does not exist -- no such type2 drop operator = (int4, ); ! ERROR: parser: parse error at or near ")": ! drop operator = (int4, ); ! ^ -- -- DROP RULE -- missing rule name + drop rule; + ERROR: parser: parse error at or near ";": drop rule; ! ^ -- bad rule name + drop rule 314159; + ERROR: parser: parse error at or near "314159": drop rule 314159; ! ^ -- no such rule drop rule nonesuch; ERROR: Rule or view "nonesuch" not found -- bad keyword drop tuple rule nonesuch; ! ERROR: parser: parse error at or near "tuple": ! drop tuple rule nonesuch; ! ^ -- no such rule + drop instance rule nonesuch; + ERROR: parser: parse error at or near "instance": drop instance rule nonesuch; ! ^ -- no such rule + drop rewrite rule nonesuch; + ERROR: parser: parse error at or near "rewrite": drop rewrite rule nonesuch; ! ^ \ No newline at end of file Index: src/test/regress/expected/strings.out =================================================================== RCS file: /home/projects/pgsql/cvsroot/pgsql/src/test/regress/expected/strings.out,v retrieving revision 1.10 diff -c -p -r1.10 strings.out *** src/test/regress/expected/strings.out 2001/06/01 17:49:17 1.10 --- src/test/regress/expected/strings.out 2001/08/01 17:43:56 *************** SELECT 'first line' *** 17,23 **** ' - next line' /* this comment is not allowed here */ ' - third line' AS "Illegal comment within continuation"; ! ERROR: parser: parse error at or near "'" -- -- test conversions between various string types -- --- 17,25 ---- ' - next line' /* this comment is not allowed here */ ' - third line' AS "Illegal comment within continuation"; ! ERROR: parser: parse error at or near "'": ! ' - third line' ! ^ -- -- test conversions between various string types -- Index: src/test/regress/output/constraints.source =================================================================== RCS file: /home/projects/pgsql/cvsroot/pgsql/src/test/regress/output/constraints.source,v retrieving revision 1.18 diff -c -p -r1.18 constraints.source *** src/test/regress/output/constraints.source 2001/02/22 05:32:56 1.18 --- src/test/regress/output/constraints.source 2001/08/01 17:43:56 *************** SELECT '' AS four, * FROM DEFAULTEXPR_TB *** 45,56 **** -- syntax errors -- test for extraneous comma CREATE TABLE error_tbl (i int DEFAULT (100, )); ! ERROR: parser: parse error at or near "," -- this will fail because gram.y uses b_expr not a_expr for defaults, -- to avoid a shift/reduce conflict that arises from NOT NULL being -- part of the column definition syntax: CREATE TABLE error_tbl (b1 bool DEFAULT 1 IN (1, 2)); ! ERROR: parser: parse error at or near "IN" -- this should work, however: CREATE TABLE error_tbl (b1 bool DEFAULT (1 IN (1, 2))); DROP TABLE error_tbl; --- 45,60 ---- -- syntax errors -- test for extraneous comma CREATE TABLE error_tbl (i int DEFAULT (100, )); ! ERROR: parser: parse error at or near ",": ! CREATE TABLE error_tbl (i int DEFAULT (100, )); ! ^ -- this will fail because gram.y uses b_expr not a_expr for defaults, -- to avoid a shift/reduce conflict that arises from NOT NULL being -- part of the column definition syntax: CREATE TABLE error_tbl (b1 bool DEFAULT 1 IN (1, 2)); ! ERROR: parser: parse error at or near "IN": ! CREATE TABLE error_tbl (b1 bool DEFAULT 1 IN (1, 2)); ! ^ -- this should work, however: CREATE TABLE error_tbl (b1 bool DEFAULT (1 IN (1, 2))); DROP TABLE error_tbl;
Tom Lane wrote: > > Neil Padgett <npadgett@redhat.com> writes: > > Attached please find a patch to the input parser that yields better > > syntax error reporting on parse errors. > > This has been discussed before (you guys really should spend more time > reading the mail list archives) and in fact you are not the first to I've just read the relevant messages. Though, I do find the list archives a bit slow for any useful browsing -- can I grab a copy of them from somewhere? Setting up a mirror might be an idea. . . > submit essentially this patch. The major objections to reporting syntax > problems this way, IIRC, are that (a) it makes unsupportable assumptions > about what the user interface looks like, for example the assumption > that the error message will be displayed in a fixed-width font; and Can you cite a client which does not use a fixed-width font at this time? I think most I've worked with use one for SQL interaction -- it is pretty much "the way things are done." I suppose, however, there could be some clients which display error messages in a dialog box or something similar, so I do agree that this is something that does need to be handled, and which the patch doesn't address. See below for a suggestion for this. > (b) it becomes essentially useless when the input query exceeds a few > lines in size. How so? I grab out the line of interest when reporting the error. > > The conclusion we had come to in previous discussion is that the error > offset ought to be delivered to the client application as a separate > component of the error report, and the client ought to be responsible > for doing something appropriate with it --- which might, for example, > include highlighting the offending word(s) if it's a GUI application > that has the input query in a window. psql couldn't do that, but might > choose to redisplay a few dozen characters around the position of the > error. Well, perhaps the error message could be changed to something like this, with a special string marking the parse error position? test=# SELECT * FRUM bob; ERROR: parser: parse error at or near 'frum': SELECT * ***FRUM bob; Or, perhaps better than a magic string: test=# SELECT * FRUM bob; ERROR: parser: parse error at or near 'frum' (index 10) The latter is probably more useful, though it does place a burden on the client to format and display an error message. But, the client program can mark out the error in any way it sees fit. In fact, it could even leave the raw message in place and still the user will get something more useful than the current output. No protocol change is required, but very useful functionality is added. Neil -- Neil Padgett Red Hat Canada Ltd. E-Mail: npadgett@redhat.com 2323 Yonge Street, Suite #300, Toronto, ON M4P 2C9
> Tom Lane wrote: > > > > Neil Padgett <npadgett@redhat.com> writes: > > > Attached please find a patch to the input parser that yields better > > > syntax error reporting on parse errors. > > > > This has been discussed before (you guys really should spend more time > > reading the mail list archives) and in fact you are not the first to Tom, it is hard to imagine how they would even find relevant stuff on this issue. The TODO.detail item is very vague. Would they start searching for keywords in the mailing list search engine? Not sure what keywords they would even use. In fact, their solution is an improvement over what is in TODO.detail/yacc now. > I've just read the relevant messages. Though, I do find the list > archives a bit slow for any useful browsing -- can I grab a copy of them > from somewhere? Setting up a mirror might be an idea. . . The whole internet seems slow today. I think it is that Code Red worm. > > submit essentially this patch. The major objections to reporting syntax > > problems this way, IIRC, are that (a) it makes unsupportable assumptions > > about what the user interface looks like, for example the assumption > > that the error message will be displayed in a fixed-width font; and > > Can you cite a client which does not use a fixed-width font at this > time? I think most I've worked with use one for SQL interaction -- it is > pretty much "the way things are done." I suppose, however, there could > be some clients which display error messages in a dialog box or > something similar, so I do agree that this is something that does need > to be handled, and which the patch doesn't address. See below for a > suggestion for this. I know some people like a client-independent way of displaying errors, but I like the direct approach of this patch, returning a string with the error line highlighted and the location marked. I don't want to push added complexity into the client, especially when we don't even have a client who has this need yet. IMHO, I am starting to see a lot of over-engineering demands made of these patches. I think it is wasting time and is of little value to average PostgreSQL users. Of course, others may disagree, but that is my opinion. So, my vote is to accept the patch as-is. When we have need for more complex reporting, we can add it. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
Neil Padgett <npadgett@redhat.com> writes: > I've just read the relevant messages. Though, I do find the list > archives a bit slow for any useful browsing -- can I grab a copy of them > from somewhere? Setting up a mirror might be an idea. . . The raw archives are under http://www.ca.postgresql.org/mhonarc/ in monthly files, for example http://www.ca.postgresql.org/mhonarc/pgsql-patches/archive/pgsql-patches.200108 I am not sure whether our mirror sites mirror them or not. In any case, you should talk to Marc if you want to coordinate some sort of long-term mirroring arrangement. > Can you cite a client which does not use a fixed-width font at this > time? I think most I've worked with use one for SQL interaction -- it is > pretty much "the way things are done." I'd believe that data is entered/displayed in fixed-width text; I'm less ready to assume that for error messages, however. >> (b) it becomes essentially useless when the input query exceeds a few >> lines in size. > How so? I grab out the line of interest when reporting the error. My apologies, I missed that aspect of your patch. An interesting solution. However, it doesn't handle embedded tabs, and there is still the objection that a client app might want to present the location info in a completely different fashion anyway. >> The conclusion we had come to in previous discussion is that the error >> offset ought to be delivered to the client application as a separate >> component of the error report, > Well, perhaps the error message could be changed to something like this, > with a special string marking the parse error position? > test=# SELECT * FRUM bob; > ERROR: parser: parse error at or near 'frum': > SELECT * ***FRUM bob; I was thinking something along the lines of ERROR: message string just like now POSITION: 42 OTHERSTUFF: yadda yadda ie, the error message string is now interpreted as keyworded lines, somewhat like (say) mail headers. This would be workable for new clients, wouldn't break anything at the wire-protocol level, and would not be totally useless if presented "raw" to a user by an old client. See the archives for more info --- I think the last discussion was three or four months back when Peter E. started to make noises about fixing elog for internationalization. regards, tom lane
Bruce Momjian <pgman@candle.pha.pa.us> writes: > Tom, it is hard to imagine how they would even find relevant stuff on > this issue. The TODO.detail item is very vague. I dunno that it's vague --- a quick look indicates that TODO.detail/elog has most of the recent messages on the subject. (Neil, the "recent discussion" that I referred to seems to be in there, or most of it anyway, if you didn't see it in the archives yet.) > In fact, their solution is an improvement over what is in > TODO.detail/yacc now. Agreed, the idea of pulling out just the one line is an improvement over the last patch. It's still going down the wrong path though. We should be empowering client apps to highlight syntax errors properly, not presenting edited info in a way that might be useful to humans but will be unintelligible to programs. If we go that route, it will be harder to do the right thing later. > I know some people like a client-independent way of displaying errors, > but I like the direct approach of this patch, returning a string with > the error line highlighted and the location marked. I don't want to > push added complexity into the client, especially when we don't even > have a client who has this need yet. pgAdmin, phpAdmin, pgaccess, and friends don't count? We have GUI front ends *today*, you know. regards, tom lane
> > In fact, their solution is an improvement over what is in > > TODO.detail/yacc now. > > Agreed, the idea of pulling out just the one line is an improvement over > the last patch. It's still going down the wrong path though. We should > be empowering client apps to highlight syntax errors properly, not > presenting edited info in a way that might be useful to humans but will > be unintelligible to programs. If we go that route, it will be harder > to do the right thing later. > > > I know some people like a client-independent way of displaying errors, > > but I like the direct approach of this patch, returning a string with > > the error line highlighted and the location marked. I don't want to > > push added complexity into the client, especially when we don't even > > have a client who has this need yet. > > pgAdmin, phpAdmin, pgaccess, and friends don't count? We have GUI front > ends *today*, you know. But how do they display error messages now? Can't they just continue doing that with this new code? Do we want to make them code their own error handling, and for what little benefit? Let them figure out how to display the error in fixed-width font and be done with it. I am sure they have bigger things to do than colorize error locations. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
> > > In fact, their solution is an improvement over what is in > > > TODO.detail/yacc now. > > > > Agreed, the idea of pulling out just the one line is an improvement over > > the last patch. It's still going down the wrong path though. We should > > be empowering client apps to highlight syntax errors properly, not > > presenting edited info in a way that might be useful to humans but will > > be unintelligible to programs. If we go that route, it will be harder > > to do the right thing later. > > > > > I know some people like a client-independent way of displaying errors, > > > but I like the direct approach of this patch, returning a string with > > > the error line highlighted and the location marked. I don't want to > > > push added complexity into the client, especially when we don't even > > > have a client who has this need yet. > > > > pgAdmin, phpAdmin, pgaccess, and friends don't count? We have GUI front > > ends *today*, you know. > > But how do they display error messages now? Can't they just continue > doing that with this new code? Do we want to make them code their own > error handling, and for what little benefit? Let them figure out how to > display the error in fixed-width font and be done with it. I am sure > they have bigger things to do than colorize error locations. A bigger question is that if we decide to output just offset information in the message, we have to be sure _all_ the clients can interpret it or the syntax information is confusing. Are we prepared to get all the clients update at the same time we add the feature? Seems we should go with a simple solution now and add later. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
Bruce Momjian wrote: > > > > > In fact, their solution is an improvement over what is in > > > > TODO.detail/yacc now. > > > > > > Agreed, the idea of pulling out just the one line is an improvement over > > > the last patch. It's still going down the wrong path though. We should > > > be empowering client apps to highlight syntax errors properly, not > > > presenting edited info in a way that might be useful to humans but will > > > be unintelligible to programs. If we go that route, it will be harder > > > to do the right thing later. > > > > > > > I know some people like a client-independent way of displaying errors, > > > > but I like the direct approach of this patch, returning a string with > > > > the error line highlighted and the location marked. I don't want to > > > > push added complexity into the client, especially when we don't even > > > > have a client who has this need yet. > > > > > > pgAdmin, phpAdmin, pgaccess, and friends don't count? We have GUI front > > > ends *today*, you know. > > > > But how do they display error messages now? Can't they just continue > > doing that with this new code? Do we want to make them code their own > > error handling, and for what little benefit? Let them figure out how to > > display the error in fixed-width font and be done with it. I am sure > > they have bigger things to do than colorize error locations. > > A bigger question is that if we decide to output just offset information > in the message, we have to be sure _all_ the clients can interpret it or > the syntax information is confusing. Are we prepared to get all the > clients update at the same time we add the feature? Seems we should go > with a simple solution now and add later. > If instead of printing: ERROR: A parse error near "foo" we print ERROR: A parse error near "foo" (index=10) it should not affect any of the existing clients. For the clients, this will be just text as before and they will print it as received. If some client wants, it may look for the index information and do whatever is convenient for that interface (as an enhancement). So I think this option is backward compatible. -- Fernando Nasser Red Hat - Toronto E-Mail: fnasser@redhat.com 2323 Yonge Street, Suite #300 Toronto, Ontario M4P 2C9
Fernando Nasser <fnasser@cygnus.com> writes: > If instead of printing: > ERROR: A parse error near "foo" > we print > ERROR: A parse error near "foo" (index=10) > it should not affect any of the existing clients. One objection to this idea is that it doesn't play nicely with localization of error message texts. I'd sooner do ERROR: A parse error near "foo" ERRORLOCATION: 10 which doesn't create any problems with localization (there's no particular need to translate the keywords, since a client probably wouldn't show them to the user anyway). It's just as backward compatible, and not that much uglier for an old client. regards, tom lane
Tom Lane wrote: > > Fernando Nasser <fnasser@cygnus.com> writes: > > If instead of printing: > > ERROR: A parse error near "foo" > > we print > > ERROR: A parse error near "foo" (index=10) > > it should not affect any of the existing clients. > > One objection to this idea is that it doesn't play nicely with > localization of error message texts. I'd sooner do > > ERROR: A parse error near "foo" > ERRORLOCATION: 10 > > which doesn't create any problems with localization (there's no > particular need to translate the keywords, since a client probably > wouldn't show them to the user anyway). It's just as backward > compatible, and not that much uglier for an old client. I'm not sure how this format causes a problem with localization. The trailing bracketed text isn't part of the message text -- it's just a set of fields and values. So, since the keywords aren't really intended for raw display, they don't require translation. Parsing the format is no harder, and the raw output isn't as ugly as is a multi-line list of fields and values, IMHO. (I really dislike unnecessarily having gobs of output lines in the message.) Neil -- Neil Padgett Red Hat Canada Ltd. E-Mail: npadgett@redhat.com 2323 Yonge Street, Suite #300, Toronto, ON M4P 2C9
Neil Padgett <npadgett@redhat.com> writes: > I'm not sure how this format causes a problem with localization. The > trailing bracketed text isn't part of the message text -- it's just a > set of fields and values. It *looks* like it is part of the message text --- to users, and to programs that aren't specifically aware that it isn't. A user-friendly client would need to take extra steps to strip out the (index=N) part to avoid complaints from users that their error messages aren't getting fully translated. > So, since the keywords aren't really intended > for raw display, they don't require translation. Parsing the format is > no harder, and the raw output isn't as ugly as is a multi-line list of > fields and values, IMHO. (I really dislike unnecessarily having gobs of > output lines in the message.) I don't much care for it either, and wouldn't propose it if this were the sole application. However, we have other applications, as noted in the previous discussion: --- distinguishing the actual error message from tips/hints about what to do about it. There are a fair number of these already, and right now there's just a very weak formatting convention that hints appear on a separate line. --- distinguishing a translatable (primary) error message from a maintainer error message that need not be translated. We have lots and lots of errors in the backend that could all fit under a single primary error code of "Internal error, please report this to pgsql-bugs", thus vastly reducing the burden on message translators. The maintainer error message (eg, "foobar: unexpected node type 124") still needs to appear, but it could be a secondary field. --- including backend file name and line number of the elog call, for easier debugging and unambiguous identification of an error source. --- severity level --- doubtless other ideas will occur to us once we have the capability. Given all these potential benefits, I'm willing to endure the downside of slightly ugly-looking error reports in old clients. They'll still *work*, mind you, and indeed emit info that users might like to have. To the extent that it's clutter, people will be motivated to update their clients. Doesn't seem like much of a downside. regards, tom lane
Tom Lane wrote: > > Neil Padgett <npadgett@redhat.com> writes: > > I'm not sure how this format causes a problem with localization. The > > trailing bracketed text isn't part of the message text -- it's just a > > set of fields and values. > > It *looks* like it is part of the message text --- to users, and to > programs that aren't specifically aware that it isn't. A user-friendly > client would need to take extra steps to strip out the (index=N) part > to avoid complaints from users that their error messages aren't getting > fully translated. > This is exactly what I want. If you don't have a new client, it looks like a message with some funk on the end. If you have a new, friendly client, it will strip out the field/value list at the end. Exactly the same as the multi-line list, really. And the translation complaint is equally applicable to either format. > > So, since the keywords aren't really intended > > for raw display, they don't require translation. Parsing the format is > > no harder, and the raw output isn't as ugly as is a multi-line list of > > fields and values, IMHO. (I really dislike unnecessarily having gobs of > > output lines in the message.) > > I don't much care for it either, and wouldn't propose it if this were > the sole application. However, we have other applications, as noted in > the previous discussion: > > --- distinguishing the actual error message from tips/hints about what > to do about it. There are a fair number of these already, and right > now there's just a very weak formatting convention that hints > appear on a separate line. I didn't know that such a convention exists already -- how would these hints look under your proposed new format? > > --- distinguishing a translatable (primary) error message from a > maintainer error message that need not be translated. We have lots > and lots of errors in the backend that could all fit under a single > primary error code of "Internal error, please report this to > pgsql-bugs", thus vastly reducing the burden on message translators. > The maintainer error message (eg, "foobar: unexpected node type 124") > still needs to appear, but it could be a secondary field. > Why aren't we using numerics to do this? I recall reading something (in the archives?) about them before, but I don't recall the outcome. Is anything like numerics being added to help with the localization efforts? It would seem this is the best way to handle primary errors, versus maintainer errors. > --- including backend file name and line number of the elog call, for > easier debugging and unambiguous identification of an error source. > > --- severity level > > --- doubtless other ideas will occur to us once we have the capability. Hmm... You could do any of these with either format, but I'm starting to that with this many fields, any message in my suggested format is probably going to wrap. So I'm pretty much sold on a multi-line format. (It might even be less ugly for messages with lots of fields!) > > Given all these potential benefits, I'm willing to endure the downside > of slightly ugly-looking error reports in old clients. They'll still > *work*, mind you, and indeed emit info that users might like to have. > To the extent that it's clutter, people will be motivated to update > their clients. Doesn't seem like much of a downside. > No, I don't think so either. It seems that this new format makes sense. Would the elog call be changed to support passing in a list of arguments? Or are you proposing we just hard code the field name / value lists into the messages? (a bad idea, IMHO) We should probably introduce a new call, say, eelog (for enhanced error log) that takes such a list, and then we could define elog as a macro which calls eelog with suitable defaults for use with "legacy" messages. Then, we wouldn't need to go after every error message right away. (And in fact, probably, in the case of soem rare messages. need not ever.) The question this brings up is whether a logging change can / should be tackled in this release. Specifically, with the current state of internationalization work, is it best to do it now, or later? Or, for now, should we just decide on an output format, and then hardcode the field output for just the syntax error reporting, leaving everything else to be tackled later? Neil -- Neil Padgett Red Hat Canada Ltd. E-Mail: npadgett@redhat.com 2323 Yonge Street, Suite #300, Toronto, ON M4P 2C9
Neil Padgett <npadgett@redhat.com> writes: > This is exactly what I want. If you don't have a new client, it looks > like a message with some funk on the end. If you have a new, friendly > client, it will strip out the field/value list at the end. Exactly the > same as the multi-line list, really. And the translation complaint is > equally applicable to either format. Agreed --- as long as the *only* thing you want to add is a syntax error location, that way would be better. But it doesn't scale... >> --- distinguishing the actual error message from tips/hints about what >> to do about it. There are a fair number of these already, and right >> now there's just a very weak formatting convention that hints >> appear on a separate line. > I didn't know that such a convention exists already -- how would these > hints look under your proposed new format? Well, it's a pretty weak convention, but here are a couple of examples of what I'm talking about: elog(ERROR, "_bt_getstackbuf: my bits moved right off the end of the world!" "\n\tRecreate index %s.", RelationGetRelationName(rel)); elog(ERROR, "Left hand side of operator '%s' has an unknown type" "\n\tProbably a bad attribute name", op); elog(ERROR, "Unable to identify a %s operator '%s' for type '%s'" "\n\tYou may need to add parentheses or an explicit cast", (is_left_op ? "left" : "right"), op, typeidTypeName(arg)); In all these cases, I'd call the added-on lines hints --- they're not part of the basic error message, and the hint may not be applicable to your particular situation. Without wanting to start an argument as to the validity of these particular hints, I do think that it'd be a good idea to distinguish them from the primary error message. In the first example, where I'd like to see us end up is something like: ERROR: Internal error, please report to pgsql-bugs DETAIL: my bits moved right off the end of the world! HINT: Recreate index foo CODELOCATION: _bt_getstackbuf: src/backend/access/nbtree/nbtinsert.c, line 551 I'm not wedded to these particular keywords, but hopefully this will serve as an illustration that we're cramming a lot of stuff into an error message already. Breaking it out into fields could render it more intelligible, not less so --- and with an updated client, a user could choose not to look at the info he doesn't want. Right now he has no real prospect of suppressing unwanted info. An example is the source routine name that we cram into many messages, as a poor man's substitute for accurate error location info. That's pretty much useless to non-hackers, and ought to be moved out to a secondary field. > Why aren't we using numerics to do this? Why, thanks for reminding me. Adding a standardized error code (not message) that client programs could interpret is another thing that is on the TODO list. Seems like another application for a separable field of an error report. I think we should keep such codes separate from the (localizable) message text, however. Peter E. made some cogent arguments that trying to drive localization off error numbers would be a losing proposition, as opposed to using gettext(). > Would the elog call be changed to support passing in a list of > arguments? That hadn't really been decided, but clearly some change of the elog API will be needed. I think there is more about this in the archives than you will find in TODO.detail/elog. > We should probably introduce > a new call, say, eelog (for enhanced error log) that takes such a list, > and then we could define elog as a macro which calls eelog with suitable > defaults for use with "legacy" messages. Then, we wouldn't need to go > after every error message right away. Yeah, the $64 question is how to avoid needing a "big bang" changeover of all the elog calls. Even if we wanted to try that, it'd be a continuing headache for user-added datatypes and such. I'd like to be able to leave the existing elog() API in place for a few releases, if possible. > The question this brings up is whether a logging change can / should be > tackled in this release. Specifically, with the current state of > internationalization work, is it best to do it now, or later? I'm still pointing towards 7.2 beta near the end of the month, which would be a mighty tight schedule for anything ambitious in elog rework. On the other hand, there's no harm in working out a design now with the intention of starting to implement it in the 7.3 cycle. regards, tom lane
> > But how do they display error messages now? Can't they just continue > > doing that with this new code? Do we want to make them code their own > > error handling, and for what little benefit? Let them figure out how to > > display the error in fixed-width font and be done with it. I am sure > > they have bigger things to do than colorize error locations. > > My 2c: > > Why not do tom's suggestion for the POSITION: n thing, and modify psql to > strip out that header, and output the relevant part of the sql with a caret > highlighting the error position. > > This will make it so that writers of the guis and format errors how they > like, and users of the most popular text interface (psql) get human-readable > results... > > ie. best of both worlds... OK, I withdraw my objection. Also, I like the idea of adding Hints and Function/line numbers to the output too. The offset of the error would work into that system. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
> But how do they display error messages now? Can't they just continue > doing that with this new code? Do we want to make them code their own > error handling, and for what little benefit? Let them figure out how to > display the error in fixed-width font and be done with it. I am sure > they have bigger things to do than colorize error locations. My 2c: Why not do tom's suggestion for the POSITION: n thing, and modify psql to strip out that header, and output the relevant part of the sql with a caret highlighting the error position. This will make it so that writers of the guis and format errors how they like, and users of the most popular text interface (psql) get human-readable results... ie. best of both worlds... Chris
Bruce Momjian wrote: > > > > My 2c: > > > > > > Why not do tom's suggestion for the POSITION: n thing, and modify psql to > > > strip out that header, and output the relevant part of the sql with a caret > > > highlighting the error position. > > > > > > This will make it so that writers of the guis and format errors how they > > > like, and users of the most popular text interface (psql) get human-readable > > > results... > > > > > > ie. best of both worlds... > > > > OK, I withdraw my objection. > > > > Also, I like the idea of adding Hints and Function/line numbers to the > > output too. The offset of the error would work into that system. > > I guess the thing that bothered me is that 90% of our interfaces are > just going to throw the carret under the error line and this patch > requires us to modify all the client interfaces to do that, just to > allow 10% to customize their display. > > Now, I know we are going to allow elog() to generate filename, line > number, and function name as optional output information. We could have > a SET paramter like: > > SET SYSOUTPUT TO "message, function, offset" > > and this displays: > > ERROR: lkjasdf > FUNCTION: lkjasdf > OFFSET: 2343 > > and we could have an option for HIGHLIGHT: > > HIGHLIGHT: FROM tab1, tab2 > HIGHLIGHT: ^^^^ > > We could control this via GUC or via the client startup code, and > clients could grab whatever they want to know about an error. I think it seems that we all have a general idea of where we want to go with this. How about the following as a plan to get this ready for 7.2: 1. Leave elog() alone. 2. For syntax error reporting, and syntax error reporting alone, output the error message in the new, multi-line format from the backend. 3. Add functionality to psql for parsing the multi-line format error messages. (Probably this will form a reusable function library that other utilities can use.) 4. Modify psql to use this new functionality, but only for processing parse errors -- all other messages will be handled as is. Thoughts? Neil -- Neil Padgett Red Hat Canada Ltd. E-Mail: npadgett@redhat.com 2323 Yonge Street, Suite #300, Toronto, ON M4P 2C9
> > My 2c: > > > > Why not do tom's suggestion for the POSITION: n thing, and modify psql to > > strip out that header, and output the relevant part of the sql with a caret > > highlighting the error position. > > > > This will make it so that writers of the guis and format errors how they > > like, and users of the most popular text interface (psql) get human-readable > > results... > > > > ie. best of both worlds... > > OK, I withdraw my objection. > > Also, I like the idea of adding Hints and Function/line numbers to the > output too. The offset of the error would work into that system. I guess the thing that bothered me is that 90% of our interfaces are just going to throw the carret under the error line and this patch requires us to modify all the client interfaces to do that, just to allow 10% to customize their display. Now, I know we are going to allow elog() to generate filename, line number, and function name as optional output information. We could have a SET paramter like: SET SYSOUTPUT TO "message, function, offset" and this displays: ERROR: lkjasdf FUNCTION: lkjasdf OFFSET: 2343 and we could have an option for HIGHLIGHT: HIGHLIGHT: FROM tab1, tab2 HIGHLIGHT: ^^^^ We could control this via GUC or via the client startup code, and clients could grab whatever they want to know about an error. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
> -----Original Message----- > From: Bruce Momjian [mailto:pgman@candle.pha.pa.us] > Sent: 01 August 2001 22:23 > To: Bruce Momjian > Cc: Tom Lane; Neil Padgett; pgsql-patches@postgresql.org > Subject: Re: [PATCHES] Patch for Improved Syntax Error Reporting > > > > > > In fact, their solution is an improvement over what is in > > > > TODO.detail/yacc now. > > > > > > Agreed, the idea of pulling out just the one line is an > improvement > > > over the last patch. It's still going down the wrong > path though. > > > We should be empowering client apps to highlight syntax errors > > > properly, not presenting edited info in a way that might > be useful > > > to humans but will be unintelligible to programs. If we go that > > > route, it will be harder to do the right thing later. > > > > > > > I know some people like a client-independent way of displaying > > > > errors, but I like the direct approach of this patch, > returning a > > > > string with the error line highlighted and the location > marked. I > > > > don't want to push added complexity into the client, especially > > > > when we don't even have a client who has this need yet. > > > > > > pgAdmin, phpAdmin, pgaccess, and friends don't count? We > have GUI > > > front ends *today*, you know. > > > > But how do they display error messages now? Can't they > just continue > > doing that with this new code? Do we want to make them > code their own > > error handling, and for what little benefit? Let them > figure out how > > to display the error in fixed-width font and be done with it. I am > > sure they have bigger things to do than colorize error locations. > Apologies for not responding to the original message, I only seem to have bits of this thread :-( In the case of pgAdmin, it displays the error messages as returned by the backend. I have no desire to start maintaining my own list of error codes/messages, especially as those returned by the backend are generally user friendly, and I certainly don't want to start parsing user supplied SQL statements myself - I'd rather leave that to the real parser. Personally I would welcome syntax error messages in the form Neil's original patch supplies. Regards, Dave.
> -----Original Message----- > From: Fernando Nasser [mailto:fnasser@cygnus.com] > Sent: 01 August 2001 22:34 > To: Bruce Momjian > Cc: Tom Lane; Neil Padgett; pgsql-patches@postgresql.org > Subject: Re: [PATCHES] Patch for Improved Syntax Error Reporting > > > Bruce Momjian wrote: > > > > > > > In fact, their solution is an improvement over what is in > > > > > TODO.detail/yacc now. > > > > > > > > Agreed, the idea of pulling out just the one line is an > > > > improvement over the last patch. It's still going down > the wrong > > > > path though. We should be empowering client apps to highlight > > > > syntax errors properly, not presenting edited info in a > way that > > > > might be useful to humans but will be unintelligible to > programs. > > > > If we go that route, it will be harder to do the right thing > > > > later. > > > > > > > > > I know some people like a client-independent way of > displaying > > > > > errors, but I like the direct approach of this patch, > returning > > > > > a string with the error line highlighted and the location > > > > > marked. I don't want to push added complexity into > the client, > > > > > especially when we don't even have a client who has this need > > > > > yet. > > > > > > > > pgAdmin, phpAdmin, pgaccess, and friends don't count? > We have GUI > > > > front ends *today*, you know. > > > > > > But how do they display error messages now? Can't they just > > > continue doing that with this new code? Do we want to make them > > > code their own error handling, and for what little benefit? Let > > > them figure out how to display the error in fixed-width > font and be > > > done with it. I am sure they have bigger things to do > than colorize > > > error locations. > > > > A bigger question is that if we decide to output just offset > > information in the message, we have to be sure _all_ the > clients can > > interpret it or the syntax information is confusing. Are > we prepared > > to get all the clients update at the same time we add the feature? > > Seems we should go with a simple solution now and add later. > > > > If instead of printing: > > ERROR: A parse error near "foo" > > we print > > ERROR: A parse error near "foo" (index=10) > > it should not affect any of the existing clients. > For the clients, this will be just text as before and they > will print it as received. If some client wants, it may look > for the index information and do whatever is convenient for > that interface (as an enhancement). > > So I think this option is backward compatible. I disagree. In pgAdmin's case, if the user enters an SQL query, pgAdmin will rewrite it to remove any formatting that the user has entered before it's sent to the backend. This means that the query sent may (== often is) shorter than the query entered by the user where carriage returns etc. have been removed, hence the index value would just be confusing to the user as it may well be wrong. Regards, Dave.
Neil Padgett <npadgett@redhat.com> writes: > I think it seems that we all have a general idea of where we want to go > with this. How about the following as a plan to get this ready for 7.2: > 1. Leave elog() alone. > 2. For syntax error reporting, and syntax error reporting alone, output > the error message in the new, multi-line format from the backend. > 3. Add functionality to psql for parsing the multi-line format error > messages. (Probably this will form a reusable function library that > other utilities can use.) > 4. Modify psql to use this new functionality, but only for processing > parse errors -- all other messages will be handled as is. That seems like a good plan --- forward progress, and doable within the 7.2 time frame. I think the thing we need to nail down next is the changes in the wire protocol --- specifically, how the "multi line format" of error messages will be defined. We don't necessarily need to define all the field keywords yet, but we do need to have a clear idea of the format rules and the parsing algorithm that clients will use. This might be trickier than it seems at first glance, because we need both backwards and forwards compatibility if we are to avoid a protocol version bump: not only must old clients accept the new syntax (which is a no-op), but new clients should behave reasonably well when fed messages from an old backend (which might not adhere perfectly to the new syntax definition). I'd suggest drawing up a straw-man definition and posting it on pghackers and/or pginterfaces for comment. Another thing to think about (orthogonally to the wire protocol) is where on the client side to do the parsing. IMHO it'd be a good idea to put as much of it as we can into libpq, where it'll be available automatically to non-psql applications. Again, though, compatibility is an issue. regards, tom lane
Dave Page <dpage@vale-housing.co.uk> writes: > I disagree. In pgAdmin's case, if the user enters an SQL query, pgAdmin will > rewrite it to remove any formatting that the user has entered before it's > sent to the backend. This means that the query sent may (== often is) > shorter than the query entered by the user where carriage returns etc. have > been removed, hence the index value would just be confusing to the user as > it may well be wrong. This strikes me as a perfect example of the situation where the frontend application *must* take some of the responsibility for displaying a proper location for a syntax error. Once you've rewritten the query like that, a patch such as Neil's original effort would be unlikely to produce anything particularly helpful to the user. For example: does your reformatting include collapsing out whitespace, eg reducing newlines to spaces? If so, Neil's assumption that one line surrounding the error point is the right amount of context will fail badly. I think if you want to do that sort of thing then it's up to you to maintain the mapping between what the user typed and what you actually sent to the backend, so that you can reverse it to interpret the query offset, and finally display an error that points at the right place in text that the user really typed. Memo to Neil: I believe you'll find that even psql does a certain amount of this --- IIRC, it strips out SQL comments, for instance. regards, tom lane
Dave Page <dpage@vale-housing.co.uk> writes: > In the case of pgAdmin, it displays the error messages as returned by the > backend. I have no desire to start maintaining my own list of error > codes/messages, especially as those returned by the backend are generally > user friendly, and I certainly don't want to start parsing user supplied SQL > statements myself - I'd rather leave that to the real parser. Maybe I'm confused, but I don't think any of the proposals would require you to do any of those things. The only case that requires any significant new effort from the client app is where it reformats queries before sending them on to the parser ... but that is knowledge you've already built into the client, no? You just have to remember what you did so that you can map a backend error index into the original text. regards, tom lane
> Maybe I'm confused, but I don't think any of the proposals would require > you to do any of those things. The only case that requires any > significant new effort from the client app is where it reformats queries > before sending them on to the parser ... but that is knowledge you've > already built into the client, no? You just have to remember what you > did so that you can map a backend error index into the original text. That seems particularly hard. If you collapse spaces and stuff, it is hard to figure out how it was originally formatted. I would just return the collapsed version locating the error. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
> -----Original Message----- > From: Tom Lane [mailto:tgl@sss.pgh.pa.us] > Sent: 02 August 2001 22:12 > To: Dave Page > Cc: 'Fernando Nasser'; Bruce Momjian; Neil Padgett; > pgsql-patches@postgresql.org > Subject: Re: [PATCHES] Patch for Improved Syntax Error Reporting > > > Dave Page <dpage@vale-housing.co.uk> writes: > > I disagree. In pgAdmin's case, if the user enters an SQL query, > > pgAdmin will rewrite it to remove any formatting that the user has > > entered before it's sent to the backend. This means that the query > > sent may (== often is) shorter than the query entered by the user > > where carriage returns etc. have been removed, hence the > index value > > would just be confusing to the user as it may well be wrong. > > This strikes me as a perfect example of the situation where > the frontend application *must* take some of the > responsibility for displaying a proper location for a syntax > error. Once you've rewritten the query like that, a patch > such as Neil's original effort would be unlikely to produce > anything particularly helpful to the user. For example: does > your reformatting include collapsing out whitespace, eg > reducing newlines to spaces? If so, Neil's assumption that > one line surrounding the error point is the right amount of > context will fail badly. > > I think if you want to do that sort of thing then it's up to > you to maintain the mapping between what the user typed and > what you actually sent to the backend, so that you can > reverse it to interpret the query offset, and finally display > an error that points at the right place in text that the user > really typed. > > Memo to Neil: I believe you'll find that even psql does a > certain amount of this --- IIRC, it strips out SQL comments, > for instance. Yes, I agree that it is partially pgAdmin's fault if it all falls over. However, pgAdmin does pretty much just the reformatting you've suggested, including stripping of comments (well that's in the new codebase anyway), so: -- Simple Query SELECT oid, relname FRUM pg_class becomes: SELECT oid, relname FRUM pg_class Giving the error ERROR: parser: parse error at or near 'frum': SELECT oid, relname FRUM pg_class ^ Which would still be useful of course. Regards, Dave.
> -----Original Message----- > From: Tom Lane [mailto:tgl@sss.pgh.pa.us] > Sent: 02 August 2001 22:16 > To: Dave Page > Cc: 'Bruce Momjian'; Neil Padgett; pgsql-patches@postgresql.org > Subject: Re: [PATCHES] Patch for Improved Syntax Error Reporting > > > Dave Page <dpage@vale-housing.co.uk> writes: > > In the case of pgAdmin, it displays the error messages as > returned by > > the backend. I have no desire to start maintaining my own list of > > error codes/messages, especially as those returned by the > backend are > > generally user friendly, and I certainly don't want to > start parsing > > user supplied SQL statements myself - I'd rather leave that to the > > real parser. > > Maybe I'm confused, but I don't think any of the proposals > would require you to do any of those things. The only case > that requires any significant new effort from the client app > is where it reformats queries before sending them on to the > parser ... but that is knowledge you've already built into > the client, no? You just have to remember what you did so > that you can map a backend error index into the original text. Like I said in another message, I seem to only have received bits of this thread - I've probably misunderstood the comments that I read before writing that.... it's been a long day and it's 11PM here now! Regards, Dave.
Dave Page <dpage@vale-housing.co.uk> writes: > However, pgAdmin does pretty much just the reformatting you've suggested, > including stripping of comments (well that's in the new codebase anyway), > so: > -- Simple Query > SELECT > oid, > relname > FRUM > pg_class > becomes: > SELECT oid, relname FRUM pg_class > Giving the error > ERROR: parser: parse error at or near 'frum': > SELECT oid, relname FRUM pg_class > ^ > Which would still be useful of course. Yes, but on that size of query you hardly need a syntax error pointer anyway. Consider what happens when the query is several thousand characters long and the error message wraps across several dozen lines (as does the spaces-and-caret pointer line). Now you really *need* that error pointer, but good luck making any use of what's displayed :-( Another reason why the simplistic approach is not all that useful is that the reformatting might itself be or mask the problem. Suppose I fat-finger my query so that I have an accidentally unterminated comment, or the code recognizes a comment where I didn't intend one. The error message produced by the backend will omit the parts of the query that the client code thought were comments, which is likely to make it harder not easier for me to figure out what I did wrong --- not least because it eliminates cues I might otherwise use to figure out which part of a large query string is being complained about. Basically, upgrading syntax-error behavior to something useful will take some cooperation from the client side as well as the backend. IMHO we shouldn't dumb down our ambitions to be merely what can be implemented without any client cooperation. regards, tom lane
Bruce Momjian <pgman@candle.pha.pa.us> writes: >> significant new effort from the client app is where it reformats queries >> before sending them on to the parser ... but that is knowledge you've >> already built into the client, no? You just have to remember what you >> did so that you can map a backend error index into the original text. > That seems particularly hard. No, it's absolutely and utterly trivial. As you collapse the query to what you send, you are copying it from an input buffer to an output workspace, no? As you do this, you build an index array that holds the input index of each output character. Then when you get an error index from the backend, you look in this array to find the source-text index you want to point at. Then you do what Neil originally proposed to do in the backend (which was what, about a dozen lines of code?) to produce an error report, if the style of error report he proposed was what makes sense for your client app. Or you do something different if that's what makes sense for your app. ninchars = strlen(inputbuf); outbuf = (char *) malloc((ninchars+1) * sizeof(char)); maparray = (int *) malloc((ninchars+1) * sizeof(int)); outindex = 0; for (inindex = 0 ; inindex < ninchars; inindex++) { if (want to send this char to backend) { outbuf[outindex] = inputbuf[inindex]; maparray[outindex] = inindex; outindex++; } } outbuf[outindex] = inputbuf[inindex]; /* terminating \0 */ maparray[outindex] = inindex; This is essentially three more lines than what you had anyway (everything except the lines mentioning maparray was there before). When you get an error index back from the backend, it takes one more line of code to map it back to an input-string index. Not exactly rocket science, IMHO. Four additional lines of code on the client side seems a *very* small price to pay for giving client apps the freedom to do the right thing for their style of user interface. regards, tom lane PS: okay, five lines. I forgot to count free(maparray) ;-)
> Basically, upgrading syntax-error behavior to something useful will take > some cooperation from the client side as well as the backend. IMHO we > shouldn't dumb down our ambitions to be merely what can be implemented > without any client cooperation. I like the idea of putting the formatting stuff in libpq. It centralizes it, and allows the client to control the formatting too. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
Yes, that is how I thought they would have to do it. Seems pretty complicated to me, at least, but if no one complains, it is fine by me. > Bruce Momjian <pgman@candle.pha.pa.us> writes: > >> significant new effort from the client app is where it reformats queries > >> before sending them on to the parser ... but that is knowledge you've > >> already built into the client, no? You just have to remember what you > >> did so that you can map a backend error index into the original text. > > > That seems particularly hard. > > No, it's absolutely and utterly trivial. As you collapse the query to > what you send, you are copying it from an input buffer to an output > workspace, no? As you do this, you build an index array that holds the > input index of each output character. Then when you get an error index > from the backend, you look in this array to find the source-text index > you want to point at. Then you do what Neil originally proposed to do > in the backend (which was what, about a dozen lines of code?) to produce > an error report, if the style of error report he proposed was what makes > sense for your client app. Or you do something different if that's what > makes sense for your app. > > ninchars = strlen(inputbuf); > outbuf = (char *) malloc((ninchars+1) * sizeof(char)); > maparray = (int *) malloc((ninchars+1) * sizeof(int)); > > outindex = 0; > for (inindex = 0 ; inindex < ninchars; inindex++) > { > if (want to send this char to backend) > { > outbuf[outindex] = inputbuf[inindex]; > maparray[outindex] = inindex; > outindex++; > } > } > > outbuf[outindex] = inputbuf[inindex]; /* terminating \0 */ > maparray[outindex] = inindex; > > This is essentially three more lines than what you had anyway > (everything except the lines mentioning maparray was there before). > When you get an error index back from the backend, it takes one > more line of code to map it back to an input-string index. > Not exactly rocket science, IMHO. > > Four additional lines of code on the client side seems a *very* small > price to pay for giving client apps the freedom to do the right thing > for their style of user interface. > > regards, tom lane > > PS: okay, five lines. I forgot to count free(maparray) ;-) > -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
Bruce Momjian wrote: > > > Basically, upgrading syntax-error behavior to something useful will take > > some cooperation from the client side as well as the backend. IMHO we > > shouldn't dumb down our ambitions to be merely what can be implemented > > without any client cooperation. > > I like the idea of putting the formatting stuff in libpq. It > centralizes it, and allows the client to control the formatting too. What exactly would you put in libpq? Neil -- Neil Padgett Red Hat Canada Ltd. E-Mail: npadgett@redhat.com 2323 Yonge Street, Suite #300, Toronto, ON M4P 2C9
Neil Padgett <npadgett@redhat.com> writes: >> I like the idea of putting the formatting stuff in libpq. It >> centralizes it, and allows the client to control the formatting too. > What exactly would you put in libpq? I think we could put in code that parses the multi-line error message format, and returns preparsed data in the form of a list of field names and field values. I don't see that libpq can do anything useful with producing a syntax-error pointer, since it doesn't have access to the original user query string, only to the same string that's sent to the backend; so as far as it can know, the error index that the backend returns is gospel. Any reverse-mapping from that to a user-query index has got to be in the client app, AFAICS. regards, tom lane
> That seems like a good plan --- forward progress, and doable within the > 7.2 time frame. Just a quick question - when does stuff need to be in for 7.2? I've been sitting on this ADD UNIQUE patch for a while. There's just a few little things in its behaviour that need to be changed. Like, letting ppl add unique(a,b) and unique (b,a) plus warn if there's an existing non-unique index... I'm just finding it really hard to find time to fiddle with it - how long do I have before 7.2alpha? Chris
> Neil Padgett <npadgett@redhat.com> writes: > >> I like the idea of putting the formatting stuff in libpq. It > >> centralizes it, and allows the client to control the formatting too. > > > What exactly would you put in libpq? > > I think we could put in code that parses the multi-line error message > format, and returns preparsed data in the form of a list of field names > and field values. I don't see that libpq can do anything useful with > producing a syntax-error pointer, since it doesn't have access to the > original user query string, only to the same string that's sent to the > backend; so as far as it can know, the error index that the backend > returns is gospel. Any reverse-mapping from that to a user-query index > has got to be in the client app, AFAICS. Seems we could easily have the backend return a string with three tokens before/after the syntax error, and that would be enough. It would save the client from doing all this housekeeping just for syntax errors. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
Bruce Momjian <pgman@candle.pha.pa.us> writes: > Seems we could easily have the backend return a string with three tokens > before/after the syntax error, and that would be enough. I strongly disagree. If the "three tokens" are something like ")))", how's that going to help you figure out where you tanked your parenthesization in a huge query with dozens of parentheses? See also the points I made concerning errors masked by comment removal. regards, tom lane
> Bruce Momjian <pgman@candle.pha.pa.us> writes: > > Seems we could easily have the backend return a string with three tokens > > before/after the syntax error, and that would be enough. > > I strongly disagree. If the "three tokens" are something like ")))", > how's that going to help you figure out where you tanked your > parenthesization in a huge query with dozens of parentheses? See also > the points I made concerning errors masked by comment removal. I strongly disagree too. :-) (See, I can use the word "strongly" too.) Anyway, you aren't going to have three parens on both sides of the error, are you? I didn't say it was perfect, just easier for the client coders. -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026
> -----Original Message----- > From: Tom Lane [mailto:tgl@sss.pgh.pa.us] > Sent: 02 August 2001 23:12 > To: Dave Page > Cc: 'Fernando Nasser'; Bruce Momjian; Neil Padgett; > pgsql-patches@postgresql.org > Subject: Re: [PATCHES] Patch for Improved Syntax Error Reporting > > > Dave Page <dpage@vale-housing.co.uk> writes: > > However, pgAdmin does pretty much just the reformatting you've > > suggested, including stripping of comments (well that's in the new > > codebase anyway), > > so: > > > -- Simple Query > > SELECT > > oid, > > relname > > FRUM > > pg_class > > > becomes: > > > SELECT oid, relname FRUM pg_class > > > Giving the error > > > ERROR: parser: parse error at or near 'frum': > > SELECT oid, relname FRUM pg_class > > ^ > > > Which would still be useful of course. > > Yes, but on that size of query you hardly need a syntax error > pointer anyway. Consider what happens when the query is > several thousand characters long and the error message wraps > across several dozen lines (as does the spaces-and-caret > pointer line). Now you really *need* that error pointer, but > good luck making any use of what's displayed :-( Yes, point taken. > Another reason why the simplistic approach is not all that > useful is that the reformatting might itself be or mask the > problem. Suppose I fat-finger my query so that I have an > accidentally unterminated comment, or the code recognizes a > comment where I didn't intend one. The error message > produced by the backend will omit the parts of the query that > the client code thought were comments, which is likely to > make it harder not easier for me to figure out what I did > wrong --- not least because it eliminates cues I might > otherwise use to figure out which part of a large query > string is being complained about. Again, point taken. > Basically, upgrading syntax-error behavior to something > useful will take some cooperation from the client side as > well as the backend. IMHO we shouldn't dumb down our > ambitions to be merely what can be implemented without any > client cooperation. Oh, I quite agree. I'm not adverse to updating my code, I just want to avoid users getting misleading messages until I come up with those updates. Regards, Dave.
Dave Page <dpage@vale-housing.co.uk> writes: > Oh, I quite agree. I'm not adverse to updating my code, I just want to avoid > users getting misleading messages until I come up with those updates. Hmm ... if they were actively misleading then I'd share your concern. I guess what you're thinking is that the error offset reported by the backend won't correspond directly to what the user typed, and if the user tries to use the offset to manually count off characters, he may arrive at the wrong place? Good point. I'm not sure whether a message like ERROR: parser: parse error at or near 'frum'; POSITION: 42 would be likely to encourage people to try that. Thoughts? (I do think this is a good argument for not embedding the position straight into the main error message though...) One possible compromise is to combine the straight character-offset approach with a simplistic context display: ERROR: parser: parse error at or near 'frum'; POSITION: 42 ... oid,relname FRUM ... The idea is to define the "POSITION" field as an integer offset possibly followed by whitespace and noise words. An updated client would grab the offset, ignore the rest of the field, and do the right thing. A not-updated client would display the entire message, and with any luck the user would read it correctly. regards, tom lane
Neil Padgett writes: > 1. Leave elog() alone. > 2. For syntax error reporting, and syntax error reporting alone, output > the error message in the new, multi-line format from the backend. > 3. Add functionality to psql for parsing the multi-line format error > messages. (Probably this will form a reusable function library that > other utilities can use.) > 4. Modify psql to use this new functionality, but only for processing > parse errors -- all other messages will be handled as is. We've had a discussion a month or two ago about rearranging error and notice packets into key/value pairs, which would contain error codes, command string positions, and what not. I opined that this would require a wire protocol change. I do not like the alternative suggestion of separating these pairs by newlines or some such. That puts a rather peculiar (and possibly not automatically enforcable) restriction on the allowable contents of the message texts. Note that message texts can contain parse or plan trees, which can contain all kinds of funny line noise. A protocol change isn't the end of the world, so please consider it. Btw., there's something in the SQL standard about all of this. I can tell you more once I'm done reading back emails. -- Peter Eisentraut peter_e@gmx.net http://funkturm.homeip.net/~peter
> -----Original Message----- > From: Tom Lane [mailto:tgl@sss.pgh.pa.us] > Sent: 04 August 2001 17:55 > To: Dave Page > Cc: 'Fernando Nasser'; Bruce Momjian; Neil Padgett; > pgsql-patches@postgresql.org > Subject: Re: [PATCHES] Patch for Improved Syntax Error Reporting > > > Dave Page <dpage@vale-housing.co.uk> writes: > > Oh, I quite agree. I'm not adverse to updating my code, I > just want to > > avoid users getting misleading messages until I come up with those > > updates. > > Hmm ... if they were actively misleading then I'd share your concern. > > I guess what you're thinking is that the error offset > reported by the backend won't correspond directly to what the > user typed, and if the user tries to use the offset to > manually count off characters, he may arrive at the wrong > place? Good point. I'm not sure whether a message like > > ERROR: parser: parse error at or near 'frum'; > POSITION: 42 > > would be likely to encourage people to try that. Thoughts? > (I do think this is a good argument for not embedding the > position straight into the main error message though...) Yes, that is precisely what I'm thinking. > One possible compromise is to combine the straight > character-offset approach with a simplistic context display: > > ERROR: parser: parse error at or near 'frum'; > POSITION: 42 ... oid,relname FRUM ... > > The idea is to define the "POSITION" field as an integer > offset possibly followed by whitespace and noise words. An > updated client would grab the offset, ignore the rest of the > field, and do the right thing. A not-updated client would > display the entire message, and with any luck the user would > read it correctly. That sounds good, though I'll bet that M$ ADO will chop the message at the whitespace anyway! :-( Of course, that's my problem... Regards, Dave.
Neil Padgett <npadgett@redhat.com> writes: > Attached please find a patch to the input parser that yields better > syntax error reporting on parse errors. This has been discussed before (you guys really should spend more time reading the mail list archives) and in fact you are not the first to submit essentially this patch. The major objections to reporting syntax problems this way, IIRC, are that (a) it makes unsupportable assumptions about what the user interface looks like, for example the assumption that the error message will be displayed in a fixed-width font; and (b) it becomes essentially useless when the input query exceeds a few lines in size. The conclusion we had come to in previous discussion is that the error offset ought to be delivered to the client application as a separate component of the error report, and the client ought to be responsible for doing something appropriate with it --- which might, for example, include highlighting the offending word(s) if it's a GUI application that has the input query in a window. psql couldn't do that, but might choose to redisplay a few dozen characters around the position of the error. In any case, the limiting factor is not the parser change, which is trivial, but our ability/willingness to change the on-the-wire protocol to allow error reports to contain multiple components. There are some other useful things that could be done once we did that. Again, I'd recommend trawling the archives for a bit. regards, tom lane
Email added to TODO.detail. > Dave Page <dpage@vale-housing.co.uk> writes: > > Oh, I quite agree. I'm not adverse to updating my code, I just want to avoid > > users getting misleading messages until I come up with those updates. > > Hmm ... if they were actively misleading then I'd share your concern. > > I guess what you're thinking is that the error offset reported by the > backend won't correspond directly to what the user typed, and if the > user tries to use the offset to manually count off characters, he may > arrive at the wrong place? Good point. I'm not sure whether a message > like > > ERROR: parser: parse error at or near 'frum'; > POSITION: 42 > > would be likely to encourage people to try that. Thoughts? (I do think > this is a good argument for not embedding the position straight into the > main error message though...) > > One possible compromise is to combine the straight character-offset > approach with a simplistic context display: > > ERROR: parser: parse error at or near 'frum'; > POSITION: 42 ... oid,relname FRUM ... > > The idea is to define the "POSITION" field as an integer offset possibly > followed by whitespace and noise words. An updated client would grab > the offset, ignore the rest of the field, and do the right thing. A > not-updated client would display the entire message, and with any luck > the user would read it correctly. > > regards, tom lane > > ---------------------------(end of broadcast)--------------------------- > TIP 5: Have you checked our extensive FAQ? > > http://www.postgresql.org/users-lounge/docs/faq.html > -- Bruce Momjian | http://candle.pha.pa.us pgman@candle.pha.pa.us | (610) 853-3000 + If your life is a hard drive, | 830 Blythe Avenue + Christ can be your backup. | Drexel Hill, Pennsylvania 19026