diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 7c009d8..9f96fe3 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -12275,6 +12275,7 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; lag(value any [, offset integer [, default any ]]) + [ { RESPECT | IGNORE } NULLS ] @@ -12289,7 +12290,9 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; default are evaluated with respect to the current row. If omitted, offset defaults to 1 and - default to null + default to null. If + IGNORE NULLS is specified then the function will be evaluated + as if the rows containing nulls didn't exist. @@ -12302,6 +12305,7 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; lead(value any [, offset integer [, default any ]]) + [respect nulls]|[ignore nulls] @@ -12316,7 +12320,8 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; default are evaluated with respect to the current row. If omitted, offset defaults to 1 and - default to null + IGNORE NULLS is specified then the function will be evaluated + as if the rows containing nulls didn't exist. @@ -12410,11 +12415,10 @@ SELECT xmlagg(x) FROM (SELECT x FROM test ORDER BY y DESC) AS tab; The SQL standard defines a RESPECT NULLS or - IGNORE NULLS option for lead, lag, - first_value, last_value, and - nth_value. This is not implemented in - PostgreSQL: the behavior is always the - same as the standard's default, namely RESPECT NULLS. + IGNORE NULLS option for first_value, + last_value, and nth_value. This is not + implemented in PostgreSQL: the behavior is + always the same as the standard's default, namely RESPECT NULLS. Likewise, the standard's FROM FIRST or FROM LAST option for nth_value is not implemented: only the default FROM FIRST behavior is supported. (You can achieve diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index d9f0e79..e1a1020 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -2000,6 +2000,16 @@ WinGetCurrentPosition(WindowObject winobj) Assert(WindowObjectIsValid(winobj)); return winobj->winstate->currentpos; } +/* + * WinGetFrameOptions + * Returns the frame option flags + */ +int +WinGetFrameOptions(WindowObject winobj) +{ + Assert(WindowObjectIsValid(winobj)); + return winobj->winstate->frameOptions; +} /* * WinGetPartitionRowCount diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c index b18b7a5..70e84d1 100644 --- a/src/backend/nodes/bitmapset.c +++ b/src/backend/nodes/bitmapset.c @@ -26,9 +26,6 @@ #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) -#define BITMAPSET_SIZE(nwords) \ - (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword)) - /*---------- * This is a well-known cute trick for isolating the rightmost one-bit * in a word. It assumes two's complement arithmetic. Consider any diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c41f1b5..917e233 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -288,6 +288,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type TriggerEvents TriggerOneEvent %type TriggerFuncArg %type TriggerWhen +%type opt_ignore_nulls %type event_trigger_when_list event_trigger_value_list %type event_trigger_when_item @@ -545,7 +546,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); HANDLER HAVING HEADER_P HOLD HOUR_P - IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IN_P + IDENTITY_P IF_P IGNORE ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IN_P INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION @@ -575,7 +576,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); RANGE READ REAL REASSIGN RECHECK RECURSIVE REF REFERENCES REFRESH REINDEX RELATIVE_P RELEASE RENAME REPEATABLE REPLACE REPLICA - RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK + RESET RESPECT RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT ROLE ROLLBACK ROW ROWS RULE SAVEPOINT SCHEMA SCROLL SEARCH SECOND_P SECURITY SELECT SEQUENCE SEQUENCES @@ -11782,16 +11783,25 @@ window_definition: } ; -over_clause: OVER window_specification - { $$ = $2; } - | OVER ColId +opt_ignore_nulls: + IGNORE NULLS_P { $$ = FRAMEOPTION_IGNORE_NULLS; } + | RESPECT NULLS_P { $$ = 0; } + | /* EMPTY */ { $$ = 0; } + ; + +over_clause: opt_ignore_nulls OVER window_specification + { + $3->frameOptions |= $1; + $$ = $3; + } + | opt_ignore_nulls OVER ColId { WindowDef *n = makeNode(WindowDef); - n->name = $2; + n->name = $3; n->refname = NULL; n->partitionClause = NIL; n->orderClause = NIL; - n->frameOptions = FRAMEOPTION_DEFAULTS; + n->frameOptions = FRAMEOPTION_DEFAULTS | $1; n->startOffset = NULL; n->endOffset = NULL; n->location = @2; @@ -12770,6 +12780,7 @@ unreserved_keyword: | HOUR_P | IDENTITY_P | IF_P + | IGNORE | IMMEDIATE | IMMUTABLE | IMPLICIT_P @@ -12858,6 +12869,7 @@ unreserved_keyword: | REPLACE | REPLICA | RESET + | RESPECT | RESTART | RESTRICT | RETURNS diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c index b7c42d3..b14491c 100644 --- a/src/backend/utils/adt/windowfuncs.c +++ b/src/backend/utils/adt/windowfuncs.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "nodes/bitmapset.h" #include "utils/builtins.h" #include "windowapi.h" @@ -25,6 +26,13 @@ typedef struct rank_context } rank_context; /* + * lead-lag process helpers + */ + #define ISNULL_INDEX(i) (2 * (i)) + #define HAVESCANNED_INDEX(i) ((2 * (i)) + 1) + #define SET_WITHOUT_RESIZING(b, i) b->words[(i) / BITS_PER_BITMAPWORD] |= (bitmapword) 1 << (i) % BITS_PER_BITMAPWORD + +/* * ntile process information */ typedef struct @@ -280,7 +288,8 @@ window_ntile(PG_FUNCTION_ARGS) * common operation of lead() and lag() * For lead() forward is true, whereas for lag() it is false. * withoffset indicates we have an offset second argument. - * withdefault indicates we have a default third argument. + * withdefault indicates we have a default third argument. We'll only + * return this default if the offset we want is outside of the partition. */ static Datum leadlag_common(FunctionCallInfo fcinfo, @@ -290,8 +299,18 @@ leadlag_common(FunctionCallInfo fcinfo, int32 offset; bool const_offset; Datum result; - bool isnull; - bool isout; + bool isnull = false; + bool isout = false; + bool ignore_nulls; + Bitmapset* null_values; + + /* + * We want to set the markpos (the earliest tuple we can access) as + * aggressively as possible to save memory, but if the offset isn't + * constant we really need random access on the partition (so can't + * mark at all). + */ + ignore_nulls = (WinGetFrameOptions(winobj) & FRAMEOPTION_IGNORE_NULLS) != 0; if (withoffset) { @@ -305,12 +324,134 @@ leadlag_common(FunctionCallInfo fcinfo, offset = 1; const_offset = true; } + if(!forward) + { + offset = -offset; + } + + if (ignore_nulls) + { + int64 bits_needed, scanning, words_needed, current = WinGetCurrentPosition(winobj); + bool scanForward; + + /* + * This case is a little complicated; we're defining "IGNORE NULLS" as + * "run the query, and pretend the rows with nulls in them don't exist". + * This means that we'll scan from the current row an 'offset' number of + * non-null rows, and then return that one. + */ - result = WinGetFuncArgInPartition(winobj, 0, - (forward ? offset : -offset), - WINDOW_SEEK_CURRENT, - const_offset, + /* + * Accessing tuples is expensive, so we'll keep track of the ones we've + * accessed (more specifically, if they're null or not). We'll need one + * bit for whether the value is null and one bit for whether we've checked + * that tuple or not. We'll keep these two bits together (as opposed to + * having two separate bitmaps) to improve cache locality. + */ + bits_needed = 2 * WinGetPartitionRowCount(winobj); + words_needed = (bits_needed / BITS_PER_BITMAPWORD) + 1; + + null_values = (Bitmapset *) WinGetPartitionLocalMemory( + winobj, + BITMAPSET_SIZE(words_needed)); + Assert(null_values); + + /* + * We use offset >= 0 instead of just forward as the offset might be in the + * opposite direction to the way we're scanning. We'll then force offset to + * be positive to make counting down the rows easier. + */ + scanForward = offset == 0 ? forward : (offset > 0); + offset = abs(offset); + + for (scanning = current;; scanForward ? ++scanning : --scanning) + { + if (scanning < 0 || scanning >= WinGetPartitionRowCount(winobj)) + { + isout = true; + + /* + * As we're out of the window we want to return NULL or the default + * value, but not whatever's left in result. We'll use the isnull + * flag to say "ignore it"! + */ + isnull = true; + + break; + } + + /* look in the bitmap cache - do we know if this index is null? */ + if (bms_is_member(HAVESCANNED_INDEX(scanning), null_values)) + { + isnull = bms_is_member(ISNULL_INDEX(scanning), null_values); + } + else + { + /* first time we've accessed this index; let's see if it's null: */ + result = WinGetFuncArgInPartition(winobj, 0, + scanning, + WINDOW_SEEK_HEAD, + false, &isnull, &isout); + if (isout) + break; + + /* update our bitmap with this result */ + SET_WITHOUT_RESIZING(null_values, HAVESCANNED_INDEX(scanning)); + if (isnull) + { + SET_WITHOUT_RESIZING(null_values, ISNULL_INDEX(scanning)); + } + } + + /* + * Now the isnull flag is set correctly. If !isnull there's a chance + * that we may stop iterating here: + */ + if (!isnull) + { + if (offset == 0) + { + result = WinGetFuncArgInPartition(winobj, 0, + scanning, + WINDOW_SEEK_HEAD, + false, + &isnull, &isout); + break; + } + else + --offset; /* it's not null, so we're one step closer to the value we want */ + } + else if (scanning == current) + { + /* + * A slight edge case. Consider: + * + * A | lag(A, 1) + * 1 | + * 2 | 1 + * | ? + * + * Does a lag of one when the current value is null mean go back to the first + * non-null value (i.e. 2), or find the previous non-null value of the first + * non-null value (i.e. 1)? We're implementing the former semantics, so we'll + * need to correct slightly: + */ + --offset; + } + } + } + else + { + /* + * We don't care about nulls; just get the row at the required offset. + */ + result = WinGetFuncArgInPartition(winobj, 0, + offset, + WINDOW_SEEK_CURRENT, + const_offset, + &isnull, &isout); + } if (isout) { diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h index 2a4b41d..710000f 100644 --- a/src/include/nodes/bitmapset.h +++ b/src/include/nodes/bitmapset.h @@ -34,7 +34,8 @@ typedef struct Bitmapset int nwords; /* number of words in array */ bitmapword words[1]; /* really [nwords] */ } Bitmapset; /* VARIABLE LENGTH STRUCT */ - +#define BITMAPSET_SIZE(nwords) \ + (offsetof(Bitmapset, words) + (nwords) * sizeof(bitmapword)) /* result of bms_subset_compare */ typedef enum diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 6723647..71b44d5 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -435,6 +435,7 @@ typedef struct WindowDef #define FRAMEOPTION_END_VALUE_PRECEDING 0x00800 /* end is V. P. */ #define FRAMEOPTION_START_VALUE_FOLLOWING 0x01000 /* start is V. F. */ #define FRAMEOPTION_END_VALUE_FOLLOWING 0x02000 /* end is V. F. */ +#define FRAMEOPTION_IGNORE_NULLS 0x04000 #define FRAMEOPTION_START_VALUE \ (FRAMEOPTION_START_VALUE_PRECEDING | FRAMEOPTION_START_VALUE_FOLLOWING) diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index b3d72a9..dd7396e 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -179,6 +179,7 @@ PG_KEYWORD("hold", HOLD, UNRESERVED_KEYWORD) PG_KEYWORD("hour", HOUR_P, UNRESERVED_KEYWORD) PG_KEYWORD("identity", IDENTITY_P, UNRESERVED_KEYWORD) PG_KEYWORD("if", IF_P, UNRESERVED_KEYWORD) +PG_KEYWORD("ignore", IGNORE, UNRESERVED_KEYWORD) PG_KEYWORD("ilike", ILIKE, TYPE_FUNC_NAME_KEYWORD) PG_KEYWORD("immediate", IMMEDIATE, UNRESERVED_KEYWORD) PG_KEYWORD("immutable", IMMUTABLE, UNRESERVED_KEYWORD) @@ -312,6 +313,7 @@ PG_KEYWORD("repeatable", REPEATABLE, UNRESERVED_KEYWORD) PG_KEYWORD("replace", REPLACE, UNRESERVED_KEYWORD) PG_KEYWORD("replica", REPLICA, UNRESERVED_KEYWORD) PG_KEYWORD("reset", RESET, UNRESERVED_KEYWORD) +PG_KEYWORD("respect", RESPECT, UNRESERVED_KEYWORD) PG_KEYWORD("restart", RESTART, UNRESERVED_KEYWORD) PG_KEYWORD("restrict", RESTRICT, UNRESERVED_KEYWORD) PG_KEYWORD("returning", RETURNING, RESERVED_KEYWORD) diff --git a/src/include/windowapi.h b/src/include/windowapi.h index 5bbf1fa..81f5ba0 100644 --- a/src/include/windowapi.h +++ b/src/include/windowapi.h @@ -46,6 +46,8 @@ extern void *WinGetPartitionLocalMemory(WindowObject winobj, Size sz); extern int64 WinGetCurrentPosition(WindowObject winobj); extern int64 WinGetPartitionRowCount(WindowObject winobj); +extern int WinGetFrameOptions(WindowObject winobj); + extern void WinSetMarkPosition(WindowObject winobj, int64 markpos); extern bool WinRowsArePeers(WindowObject winobj, int64 pos1, int64 pos2); diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index ecc1c2c..473c7f3 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -5,19 +5,21 @@ CREATE TEMPORARY TABLE empsalary ( depname varchar, empno bigint, salary int, - enroll_date date + enroll_date date, + term_date date, + respect text ); INSERT INTO empsalary VALUES -('develop', 10, 5200, '2007-08-01'), -('sales', 1, 5000, '2006-10-01'), -('personnel', 5, 3500, '2007-12-10'), -('sales', 4, 4800, '2007-08-08'), -('personnel', 2, 3900, '2006-12-23'), -('develop', 7, 4200, '2008-01-01'), -('develop', 9, 4500, '2008-01-01'), -('sales', 3, 4800, '2007-08-01'), -('develop', 8, 6000, '2006-10-01'), -('develop', 11, 5200, '2007-08-15'); +('develop', 10, 5200, '2007-08-01', null, null), +('sales', 1, 5000, '2006-10-01', null, 'frog'), +('personnel', 5, 3500, '2007-12-10', null, null), +('sales', 4, 4800, '2007-08-08', '2010-09-22', 'chicken'), +('personnel', 2, 3900, '2006-12-23', null, null), +('develop', 7, 4200, '2008-01-01', null, null), +('develop', 9, 4500, '2008-01-01', null, 'gorilla'), +('sales', 3, 4800, '2007-08-01', '2009-03-05', null), +('develop', 8, 6000, '2006-10-01', '2009-11-17', 'tiger'), +('develop', 11, 5200, '2007-08-15', null, null); SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; depname | empno | salary | sum -----------+-------+--------+------- @@ -1020,5 +1022,135 @@ SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1; ERROR: argument of ntile must be greater than zero SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1; ERROR: argument of nth_value must be greater than zero +-- test null behaviour: (1) lags +SELECT term_date, lag(term_date) OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lag +------------+------------ + | + | + 03-05-2009 | + 09-22-2010 | 03-05-2009 + | 09-22-2010 + | + 11-17-2009 | + | 11-17-2009 + | + | +(10 rows) + +SELECT term_date, lag(term_date) RESPECT NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lag +------------+------------ + | + | + 03-05-2009 | + 09-22-2010 | 03-05-2009 + | 09-22-2010 + | + 11-17-2009 | + | 11-17-2009 + | + | +(10 rows) + +-- a numeric (date) column +SELECT term_date, lag(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lag +------------+------------ + | + | + 03-05-2009 | + 09-22-2010 | 03-05-2009 + | 09-22-2010 + | 09-22-2010 + 11-17-2009 | 09-22-2010 + | 11-17-2009 + | 11-17-2009 + | 11-17-2009 +(10 rows) + +-- a text column +SELECT respect, lag(respect) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + respect | lag +---------+--------- + frog | + | frog + | frog + chicken | frog + | chicken + | chicken + tiger | chicken + gorilla | tiger + | gorilla + | gorilla +(10 rows) + +-- (2) leads +SELECT term_date, lead(term_date) OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lead +------------+------------ + | + | 03-05-2009 + 03-05-2009 | 09-22-2010 + 09-22-2010 | + | + | 11-17-2009 + 11-17-2009 | + | + | + | +(10 rows) + +SELECT term_date, lead(term_date) RESPECT NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lead +------------+------------ + | + | 03-05-2009 + 03-05-2009 | 09-22-2010 + 09-22-2010 | + | + | 11-17-2009 + 11-17-2009 | + | + | + | +(10 rows) + +SELECT term_date, lead(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + term_date | lead +------------+------------ + | 03-05-2009 + | 03-05-2009 + 03-05-2009 | 09-22-2010 + 09-22-2010 | 11-17-2009 + | 11-17-2009 + | 11-17-2009 + 11-17-2009 | + | + | + | +(10 rows) + -- cleanup DROP TABLE empsalary; +-- some more test cases +CREATE TABLE test_table ( + id serial, + val integer); +INSERT INTO test_table (val) SELECT * FROM unnest(ARRAY[1,2,3,4,NULL, NULL, NULL, 5, 6, 7]); +SELECT val, lead(val, 2) IGNORE NULLS OVER (ORDER BY id) FROM test_table ORDER BY id; + val | lead +-----+------ + 1 | 3 + 2 | 4 + 3 | 5 + 4 | 6 + | 6 + | 6 + | 6 + 5 | 7 + 6 | + 7 | +(10 rows) + +DROP TABLE test_table; diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 769be0f..92166d7 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -6,20 +6,22 @@ CREATE TEMPORARY TABLE empsalary ( depname varchar, empno bigint, salary int, - enroll_date date + enroll_date date, + term_date date, + respect text ); INSERT INTO empsalary VALUES -('develop', 10, 5200, '2007-08-01'), -('sales', 1, 5000, '2006-10-01'), -('personnel', 5, 3500, '2007-12-10'), -('sales', 4, 4800, '2007-08-08'), -('personnel', 2, 3900, '2006-12-23'), -('develop', 7, 4200, '2008-01-01'), -('develop', 9, 4500, '2008-01-01'), -('sales', 3, 4800, '2007-08-01'), -('develop', 8, 6000, '2006-10-01'), -('develop', 11, 5200, '2007-08-15'); +('develop', 10, 5200, '2007-08-01', null, null), +('sales', 1, 5000, '2006-10-01', null, 'frog'), +('personnel', 5, 3500, '2007-12-10', null, null), +('sales', 4, 4800, '2007-08-08', '2010-09-22', 'chicken'), +('personnel', 2, 3900, '2006-12-23', null, null), +('develop', 7, 4200, '2008-01-01', null, null), +('develop', 9, 4500, '2008-01-01', null, 'gorilla'), +('sales', 3, 4800, '2007-08-01', '2009-03-05', null), +('develop', 8, 6000, '2006-10-01', '2009-11-17', 'tiger'), +('develop', 11, 5200, '2007-08-15', null, null); SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; @@ -264,5 +266,35 @@ SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1; SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1; +-- test null behaviour: (1) lags + +SELECT term_date, lag(term_date) OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +SELECT term_date, lag(term_date) RESPECT NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +-- a numeric (date) column +SELECT term_date, lag(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +-- a text column +SELECT respect, lag(respect) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +-- (2) leads + +SELECT term_date, lead(term_date) OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +SELECT term_date, lead(term_date) RESPECT NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + +SELECT term_date, lead(term_date) IGNORE NULLS OVER (ORDER BY empno) FROM empsalary ORDER BY empno; + -- cleanup DROP TABLE empsalary; + +-- some more test cases +CREATE TABLE test_table ( + id serial, + val integer); +INSERT INTO test_table (val) SELECT * FROM unnest(ARRAY[1,2,3,4,NULL, NULL, NULL, 5, 6, 7]); + +SELECT val, lead(val, 2) IGNORE NULLS OVER (ORDER BY id) FROM test_table ORDER BY id; + +DROP TABLE test_table;