Thread: Extended SERIAL parsing
Hi, after some experimentation, I came up with the attached patch, which implements parsing the following SERIAL types: SERIAL SERIAL GENERATED { ALWAYS | BY DEFAULT } SERIAL GENERATED [ ALWAYS | BY DEFAULT ] AS IDENTITY( sequence options ) The underlying type is still int4 or int8, so the problems you discussed aren't solved. But at least the current semantics is kept. It passes all regression tests, and it works, too: # create table proba (i serial generated as identity(minvalue 5 maxvalue 10) primary key, t text); NOTICE: CREATE TABLE will create implicit sequence "proba_i_seq" for serial column "proba.i" NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "proba_pkey" for table "proba" CREATE TABLE # insert into proba (t) values ('a'); INSERT 0 1 # select * from proba; i | t ---+--- 5 | a (1 row) For now, GENERATED { ALWAYS | BY DEFAULT } are just fillings. The condition (column->is_serial && column->force_default) can help enforcing GENERATED ALWAYS at INSERT time and can also help fixing the two TODO entries about SERIAL. Best regards, Zoltán Böszörményi diff -ur postgresql-8.2/src/backend/parser/analyze.c postgresql-8.2-serial/src/backend/parser/analyze.c --- postgresql-8.2/src/backend/parser/analyze.c 2006-04-30 20:30:39.000000000 +0200 +++ postgresql-8.2-serial/src/backend/parser/analyze.c 2006-06-11 23:36:22.000000000 +0200 @@ -825,40 +825,17 @@ transformColumnDefinition(ParseState *pstate, CreateStmtContext *cxt, ColumnDef *column) { - bool is_serial; bool saw_nullable; Constraint *constraint; ListCell *clist; cxt->columns = lappend(cxt->columns, column); - /* Check for SERIAL pseudo-types */ - is_serial = false; - if (list_length(column->typename->names) == 1) - { - char *typname = strVal(linitial(column->typename->names)); - - if (strcmp(typname, "serial") == 0 || - strcmp(typname, "serial4") == 0) - { - is_serial = true; - column->typename->names = NIL; - column->typename->typeid = INT4OID; - } - else if (strcmp(typname, "bigserial") == 0 || - strcmp(typname, "serial8") == 0) - { - is_serial = true; - column->typename->names = NIL; - column->typename->typeid = INT8OID; - } - } - /* Do necessary work on the column type declaration */ transformColumnType(pstate, column); /* Special actions for SERIAL pseudo-types */ - if (is_serial) + if (column->is_serial) { Oid snamespaceid; char *snamespace; @@ -898,7 +875,7 @@ */ seqstmt = makeNode(CreateSeqStmt); seqstmt->sequence = makeRangeVar(snamespace, sname); - seqstmt->options = NIL; + seqstmt->options = column->seq_opts; cxt->blist = lappend(cxt->blist, seqstmt); diff -ur postgresql-8.2/src/backend/parser/gram.y postgresql-8.2-serial/src/backend/parser/gram.y --- postgresql-8.2/src/backend/parser/gram.y 2006-05-27 19:38:45.000000000 +0200 +++ postgresql-8.2-serial/src/backend/parser/gram.y 2006-06-11 23:42:02.000000000 +0200 @@ -275,6 +275,7 @@ %type <boolean> opt_instead opt_analyze %type <boolean> index_opt_unique opt_verbose opt_full %type <boolean> opt_freeze opt_default opt_recheck +%type <boolean> ColIdGen ColOptIdGen %type <defelt> opt_binary opt_oids copy_delimiter %type <boolean> copy_from opt_hold @@ -284,8 +285,8 @@ %type <node> fetch_direction select_limit_value select_offset_value -%type <list> OptSeqList -%type <defelt> OptSeqElem +%type <list> OptSeqList OptSerialSeqList +%type <defelt> OptSeqElem OptSerialSeqElem %type <istmt> insert_rest @@ -313,7 +314,7 @@ %type <range> relation_expr_opt_alias %type <target> target_el insert_target_el update_target_el insert_column_item -%type <typnam> Typename SimpleTypename ConstTypename +%type <typnam> Typename SimpleTypename SerialTypename ConstTypename GenericType Numeric opt_float Character ConstCharacter CharacterWithLength CharacterWithoutLength @@ -357,10 +358,10 @@ /* ordinary key words in alphabetical order */ %token <keyword> ABORT_P ABSOLUTE_P ACCESS ACTION ADD_P ADMIN AFTER - AGGREGATE ALL ALSO ALTER ANALYSE ANALYZE AND ANY ARRAY AS ASC + AGGREGATE ALL ALSO ALTER ALWAYS ANALYSE ANALYZE AND ANY ARRAY AS ASC ASSERTION ASSIGNMENT ASYMMETRIC AT AUTHORIZATION - BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BINARY BIT + BACKWARD BEFORE BEGIN_P BETWEEN BIGINT BIGSERIAL BINARY BIT BOOLEAN_P BOTH BY CACHE CALLED CASCADE CASCADED CASE CAST CHAIN CHAR_P @@ -380,11 +381,11 @@ FALSE_P FETCH FIRST_P FLOAT_P FOR FORCE FOREIGN FORWARD FREEZE FROM FULL FUNCTION - GLOBAL GRANT GRANTED GREATEST GROUP_P + GENERATED GLOBAL GRANT GRANTED GREATEST GROUP_P HANDLER HAVING HEADER_P HOLD HOUR_P - IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IN_P INCLUDING INCREMENT + IDENTITY IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IN_P INCLUDING INCREMENT INDEX INHERIT INHERITS INITIALLY INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -417,7 +418,7 @@ ROLE ROLLBACK ROW ROWS RULE SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE - SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE + SERIAL SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE SHOW SIMILAR SIMPLE SMALLINT SOME STABLE START STATEMENT STATISTICS STDIN STDOUT STORAGE STRICT_P SUBSTRING SUPERUSER_P SYMMETRIC SYSID SYSTEM_P @@ -1798,17 +1799,83 @@ | TableConstraint { $$ = $1; } ; -columnDef: ColId Typename ColQualList +/* + * SERIAL is special, handle it here. Possible cases: + * ColID SERIAL + * ColID SERIAL GENERATED {ALWAYS | BY DEFAULT} + * ColID SERIAL GENERATED [ALWAYS | BY DEFAULT] AS IDENTITY(...) + */ +columnDef: ColId SerialTypename GENERATED ColOptIdGen AS IDENTITY '(' OptSerialSeqList ')' ColQualList + { + ColumnDef *n = makeNode(ColumnDef); + n->colname = $1; + n->typename = $2; + n->constraints = $10; + n->is_local = true; + n->is_serial = true; + n->force_default = $4; + n->seq_opts = $8; + $$ = (Node *)n; + } + | ColId SerialTypename GENERATED ColIdGen ColQualList + { + ColumnDef *n = makeNode(ColumnDef); + n->colname = $1; + n->typename = $2; + n->constraints = $5; + n->is_local = true; + n->is_serial = true; + n->force_default = $4; + n->seq_opts = NIL; + $$ = (Node *)n; + } + | ColId SerialTypename ColQualList + { + ColumnDef *n = makeNode(ColumnDef); + n->colname = $1; + n->typename = $2; + n->constraints = $3; + n->is_local = true; + n->is_serial = true; + n->force_default = false; + n->seq_opts = NIL; + $$ = (Node *)n; + } + | ColId Typename ColQualList { ColumnDef *n = makeNode(ColumnDef); n->colname = $1; n->typename = $2; n->constraints = $3; n->is_local = true; + n->is_serial = false; + n->force_default = false; + n->seq_opts = NIL; $$ = (Node *)n; } ; +SerialTypename: SERIAL + { + $$ = SystemTypeName("int4"); + } + | BIGSERIAL + { + $$ = SystemTypeName("int8"); + } + ; + +ColOptIdGen: + ALWAYS { $$ = true; } + | BY DEFAULT { $$ = false; } + | /*EMPTY*/ { $$ = false; } + ; + +ColIdGen: + ALWAYS { $$ = true; } + | BY DEFAULT { $$ = false; } + ; + ColQualList: ColQualList ColConstraint { $$ = lappend($1, $2); } | /*EMPTY*/ { $$ = NIL; } @@ -2319,6 +2386,48 @@ } ; +OptSerialSeqList: OptSerialSeqList OptSerialSeqElem { $$ = lappend($1, $2); } + | /*EMPTY*/ { $$ = NIL; } + ; + +OptSerialSeqElem: CYCLE + { + $$ = makeDefElem("cycle", (Node *)makeInteger(TRUE)); + } + | NO CYCLE + { + $$ = makeDefElem("cycle", (Node *)makeInteger(FALSE)); + } + | INCREMENT opt_by NumericOnly + { + $$ = makeDefElem("increment", (Node *)$3); + } + | MAXVALUE NumericOnly + { + $$ = makeDefElem("maxvalue", (Node *)$2); + } + | MINVALUE NumericOnly + { + $$ = makeDefElem("minvalue", (Node *)$2); + } + | NO MAXVALUE + { + $$ = makeDefElem("maxvalue", NULL); + } + | NO MINVALUE + { + $$ = makeDefElem("minvalue", NULL); + } + | START opt_with NumericOnly + { + $$ = makeDefElem("start", (Node *)$3); + } + | RESTART opt_with NumericOnly + { + $$ = makeDefElem("restart", (Node *)$3); + } + ; + opt_by: BY {} | /* empty */ {} ; @@ -8349,6 +8458,7 @@ | AGGREGATE | ALSO | ALTER + | ALWAYS | ASSERTION | ASSIGNMENT | AT @@ -8408,6 +8518,7 @@ | FORCE | FORWARD | FUNCTION + | GENERATED | GLOBAL | GRANTED | HANDLER @@ -8561,6 +8672,7 @@ */ col_name_keyword: BIGINT + | BIGSERIAL | BIT | BOOLEAN_P | CHAR_P @@ -8589,6 +8701,7 @@ | PRECISION | REAL | ROW + | SERIAL | SETOF | SMALLINT | SUBSTRING @@ -8676,6 +8789,7 @@ | GRANT | GROUP_P | HAVING + | IDENTITY | IN_P | INITIALLY | INTERSECT diff -ur postgresql-8.2/src/backend/parser/keywords.c postgresql-8.2-serial/src/backend/parser/keywords.c --- postgresql-8.2/src/backend/parser/keywords.c 2006-03-05 16:58:32.000000000 +0100 +++ postgresql-8.2-serial/src/backend/parser/keywords.c 2006-06-11 23:29:39.000000000 +0200 @@ -41,6 +41,7 @@ {"all", ALL}, {"also", ALSO}, {"alter", ALTER}, + {"always", ALWAYS}, {"analyse", ANALYSE}, /* British spelling */ {"analyze", ANALYZE}, {"and", AND}, @@ -58,6 +59,7 @@ {"begin", BEGIN_P}, {"between", BETWEEN}, {"bigint", BIGINT}, + {"bigserial", BIGSERIAL}, {"binary", BINARY}, {"bit", BIT}, {"boolean", BOOLEAN_P}, @@ -150,6 +152,7 @@ {"from", FROM}, {"full", FULL}, {"function", FUNCTION}, + {"generated", GENERATED}, {"global", GLOBAL}, {"grant", GRANT}, {"granted", GRANTED}, @@ -160,6 +163,7 @@ {"header", HEADER_P}, {"hold", HOLD}, {"hour", HOUR_P}, + {"identity", IDENTITY}, {"if", IF_P}, {"ilike", ILIKE}, {"immediate", IMMEDIATE}, @@ -297,6 +301,9 @@ {"security", SECURITY}, {"select", SELECT}, {"sequence", SEQUENCE}, + {"serial", SERIAL}, + {"serial4", SERIAL}, + {"serial8", BIGSERIAL}, {"serializable", SERIALIZABLE}, {"session", SESSION}, {"session_user", SESSION_USER}, diff -ur postgresql-8.2/src/include/nodes/parsenodes.h postgresql-8.2-serial/src/include/nodes/parsenodes.h --- postgresql-8.2/src/include/nodes/parsenodes.h 2006-04-30 20:30:40.000000000 +0200 +++ postgresql-8.2-serial/src/include/nodes/parsenodes.h 2006-06-11 23:37:15.000000000 +0200 @@ -394,6 +394,9 @@ char *cooked_default; /* nodeToString representation */ List *constraints; /* other constraints on column */ RangeVar *support; /* supporting relation, if any */ + bool is_serial; /* the column is SERIAL */ + bool force_default; /* the column is SERIAL GENERATED ALWAYS */ + List *seq_opts; /* the column is SERIAL ... AS IDENTITY(...) */ } ColumnDef; /*
> The condition (column->is_serial && column->force_default) > can help enforcing GENERATED ALWAYS at INSERT time > and can also help fixing the two TODO entries about SERIAL. You will need to include the insert components of the spec which allow for overriding GENERATED ALWAYS during an INSERT and extend that to COPY and teach pg_dump how to use them. --
Rod Taylor írta: >> The condition (column->is_serial && column->force_default) >> can help enforcing GENERATED ALWAYS at INSERT time >> and can also help fixing the two TODO entries about SERIAL. >> > > You will need to include the insert components of the spec which allow > for overriding GENERATED ALWAYS during an INSERT and extend that to COPY > and teach pg_dump how to use them. > OK, that's over my head at the moment. :-) Maybe the wizards here pick up my patch and complete it. (I hope.) Best regards, Zoltán Böszörményi
Zoltan Boszormenyi <zboszor@dunaweb.hu> writes: > after some experimentation, I came up with the attached patch, > which implements parsing the following SERIAL types: As has been pointed out before, it would be a seriously bad idea to implement the SQL syntax for identity columns without matching the SQL semantics for them. That would leave us behind the eight-ball when we wanted to implement the SQL semantics. Right now we have a useful but non-standard semantics, and a useful but non-standard syntax, and those two should stick together. I'm not too happy with converting SERIAL4 and SERIAL8 into reserved words, either, as I believe this patch does. Some other things missing are documentation and pg_dump support. regards, tom lane
> Zoltan Boszormenyi <zboszor@dunaweb.hu> writes: >> after some experimentation, I came up with the attached patch, >> which implements parsing the following SERIAL types: > > As has been pointed out before, it would be a seriously bad idea to > implement the SQL syntax for identity columns without matching the > SQL semantics for them. That would leave us behind the eight-ball > when we wanted to implement the SQL semantics. Right now we have > a useful but non-standard semantics, and a useful but non-standard > syntax, and those two should stick together. Well, I read all sections of 5WD-02-Foundation-2003-09.pdf where "identity" appears, here are the list of changes that will be needed for an identity column: - Only one identity column can appear in the column list. I have to check for this at CREATE, TABLE, ALTER TABLE ADD COLUMN and ALTER TABLE ALTER COLUMN. - ALTER TABLE ALTER COLUMN ... RESTART [WITH] or SET alter the sequence on the column. - If colname is SERIAL GENERATED ALWAYS, then only "UPDATE SER colname = default" may occur. Then there's the DROP default question: should PostgreSQL allow it or not? What I found about this in the standard is this: definitions of the DEFAULT clause, the identity column specification and the generation clause are mutually exclusive, see 11.4. So, if you cannot specify a DEFAULT for an identity column, you must not be able to drop it, although this isn't expressed in the standard, it's just my opinion. Is there anything else? I haven't found any. Or I can't read between the lines, which is a skill that isn't required for reading standards. :-) > I'm not too happy with converting SERIAL4 and SERIAL8 into reserved > words, either, as I believe this patch does. Not really, only IDENTITY is added to the list of reserved words, serial/serial4/serial8/bigserial are just type names: # create table serial8 (serial8 serial8 primary key); NOTICE: CREATE TABLE will create implicit sequence "serial8_serial8_seq" for serial column "serial8.serial8" NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "serial8_pkey" for table "serial8" CREATE TABLE The others (AS, GENERATED) were added to the non-reserved keyword list. > Some other things missing are documentation and pg_dump support. I am working on that. The documentation is easier. :-) Also note, that I misread the generated column syntax as part of the identity column syntax. So, the parsing should only recognize SERIAL [ GENERATED { ALWAYS | BY DEFAULT } AS IDENTITY [ ( sequence_options ) ] ] which I already fixed here and the sequence_options list cannot be empty as with my previous attempt. Best regards, Zoltán Böszörményi
On Mon, Jun 12, 2006 at 02:27:31PM +0200, B?sz?rm?nyi Zolt?n wrote: > > Zoltan Boszormenyi <zboszor@dunaweb.hu> writes: > >> after some experimentation, I came up with the attached patch, > >> which implements parsing the following SERIAL types: > > > > As has been pointed out before, it would be a seriously bad idea to > > implement the SQL syntax for identity columns without matching the > > SQL semantics for them. That would leave us behind the eight-ball > > when we wanted to implement the SQL semantics. Right now we have > > a useful but non-standard semantics, and a useful but non-standard > > syntax, and those two should stick together. > > Well, I read all sections of 5WD-02-Foundation-2003-09.pdf > where "identity" appears, here are the list of changes that will > be needed for an identity column: Have you read the archives on the recent discussions that have taken place about whether SERIAL should be a black box or not? IIRC most of this was all hashed out in that thread. -- Jim C. Nasby, Sr. Engineering Consultant jnasby@pervasive.com Pervasive Software http://pervasive.com work: 512-231-6117 vcard: http://jim.nasby.net/pervasive.vcf cell: 512-569-9461
Böszörményi Zoltán <zboszor@dunaweb.hu> writes: > Well, I read all sections of 5WD-02-Foundation-2003-09.pdf > where "identity" appears, here are the list of changes that will > be needed for an identity column: You're missing the hard part: NEXT VALUE FOR is sufficiently different from nextval() that it will be very painful to implement. Until we have a way of doing that, I think it would be unwise to use the SQL syntax for things that don't behave the way the spec says. We might find that spec-compliant sequences need to be a completely different object type, or something equally evil. Right now, we have the freedom to do that if that's what it takes. With the spec syntax already locked down as generating PG-style sequences, we'd be hosed. >> I'm not too happy with converting SERIAL4 and SERIAL8 into reserved >> words, either, as I believe this patch does. > Not really, only IDENTITY is added to the list of reserved words, > serial/serial4/serial8/bigserial are just type names: You apparently haven't thought very hard about the consequences of what you did to keywords.c. But I'll give you a hint: mapping distinct strings to the same token is a bad idea. regards, tom lane
Hi, Jim C. Nasby írta: > On Mon, Jun 12, 2006 at 02:27:31PM +0200, B?sz?rm?nyi Zolt?n wrote: > >>> Zoltan Boszormenyi <zboszor@dunaweb.hu> writes: >>> >>>> after some experimentation, I came up with the attached patch, >>>> which implements parsing the following SERIAL types: >>>> >>> As has been pointed out before, it would be a seriously bad idea to >>> implement the SQL syntax for identity columns without matching the >>> SQL semantics for them. That would leave us behind the eight-ball >>> when we wanted to implement the SQL semantics. Right now we have >>> a useful but non-standard semantics, and a useful but non-standard >>> syntax, and those two should stick together. >>> >> Well, I read all sections of 5WD-02-Foundation-2003-09.pdf >> where "identity" appears, here are the list of changes that will >> be needed for an identity column: >> > > Have you read the archives on the recent discussions that have taken > place about whether SERIAL should be a black box or not? IIRC most of > this was all hashed out in that thread. I just read it thoroughly, and the issues I listed wasn't mentioned in the "black box" thread, at all. I am trying to implement the standard syntax ( and gradually the conformant behaviour ) along the lines of sections 4.14.7, 11.3, 11.4, 11.7, 11.11, 11.12, 11.17 and 14.8. Best regards, Zoltán Böszörményi
Tom Lane írta: > Böszörményi Zoltán <zboszor@dunaweb.hu> writes: > >> Well, I read all sections of 5WD-02-Foundation-2003-09.pdf >> where "identity" appears, here are the list of changes that will >> be needed for an identity column: >> > > You're missing the hard part: NEXT VALUE FOR is sufficiently different > from nextval() that it will be very painful to implement. Until we have > a way of doing that, I think it would be unwise to use the SQL syntax > for things that don't behave the way the spec says. We might find that > spec-compliant sequences need to be a completely different object type, > or something equally evil. Right now, we have the freedom to do that > if that's what it takes. With the spec syntax already locked down as > generating PG-style sequences, we'd be hosed. > Do you mean the allowed and denied contexts of the NEXT VALUE FOR expression in section 6.13? (As opposed to nextval() which, as being a function is allowed more broadly.) This part may still be described with grammar, unless you mean something more suble. >>> I'm not too happy with converting SERIAL4 and SERIAL8 into reserved >>> words, either, as I believe this patch does. >>> > > >> Not really, only IDENTITY is added to the list of reserved words, >> serial/serial4/serial8/bigserial are just type names: >> > > You apparently haven't thought very hard about the consequences of what > you did to keywords.c. But I'll give you a hint: mapping distinct > strings to the same token is a bad idea. > OK, point taken. Best regards, Zoltán Böszörményi
Zoltan Boszormenyi <zboszor@dunaweb.hu> writes: > Tom Lane �rta: >> You're missing the hard part: NEXT VALUE FOR is sufficiently different >> from nextval() that it will be very painful to implement. > Do you mean the allowed and denied contexts of the > NEXT VALUE FOR expression in section 6.13? No, I mean that different textual instances of NEXT VALUE FOR are required to return the same value in certain cases. See the "once per row" bits in the 6.13 General Rules. It's not real clear what "once per row" actually means, especially in cases such as references from within functions invoked by a query --- is that the same query or a different one? You could make a case for wanting either behavior, particularly when considering trigger functions. On the whole, it's a mess, and not particularly well thought out IMHO. But it's in the spec, and if we are going to adopt the spec's syntax for identity columns then we'd better provide the spec's behavior. > (As opposed to nextval() which, as being a function > is allowed more broadly.) This part may still be described > with grammar, unless you mean something more suble. I think trying to enforce the restrictions in 6.13 in the grammar would be a terrible mistake; better to do it in parse analysis. Compare the restrictions on where aggregate functions can appear; we don't try to enforce those grammatically. One reason why not is that you'd have a hard time getting the grammar to produce anything more specific than "syntax error", which is pretty unhelpful. regards, tom lane